[
  {
    "path": ".gitignore",
    "content": "# Byte-compiled / optimized / DLL files\r\n__pycache__/\r\n*.py[cod]\r\n*$py.class\r\n*.ipynb\r\n\r\n# C extensions\r\n*.so\r\n\r\n# Distribution / packaging\r\n.Python\r\nbuild/\r\ndevelop-eggs/\r\ndist/\r\ndownloads/\r\neggs/\r\n.eggs/\r\nlib/\r\nlib64/\r\nparts/\r\nsdist/\r\nvar/\r\nwheels/\r\n*.egg-info/\r\n.installed.cfg\r\n*.egg\r\nMANIFEST\r\n\r\n# PyInstaller\r\n#  Usually these files are written by a python script from a template\r\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\r\n*.manifest\r\n*.spec\r\n\r\n# Installer logs\r\npip-log.txt\r\npip-delete-this-directory.txt\r\n\r\n# Unit test / coverage reports\r\nhtmlcov/\r\n.tox/\r\n.coverage\r\n.coverage.*\r\n.cache\r\nnosetests.xml\r\ncoverage.xml\r\n*.cover\r\n.hypothesis/\r\n.pytest_cache/\r\n\r\n# Translations\r\n*.mo\r\n*.pot\r\n\r\n# Django stuff:\r\n*.log\r\nlocal_settings.py\r\ndb.sqlite3\r\n\r\n# Flask stuff:\r\ninstance/\r\n.webassets-cache\r\n\r\n# Scrapy stuff:\r\n.scrapy\r\n\r\n# Sphinx documentation\r\ndocs/_build/\r\n\r\n# PyBuilder\r\ntarget/\r\n\r\n# Jupyter Notebook\r\n.ipynb_checkpoints\r\n\r\n# pyenv\r\n.python-version\r\n\r\n# celery beat schedule file\r\ncelerybeat-schedule\r\n\r\n# SageMath parsed files\r\n*.sage.py\r\n\r\n# Environments\r\n.env\r\n.venv\r\nenv/\r\nvenv/\r\nENV/\r\nenv.bak/\r\nvenv.bak/\r\n\r\n# Spyder project settings\r\n.spyderproject\r\n.spyproject\r\n\r\n# Rope project settings\r\n.ropeproject\r\n\r\n# mkdocs documentation\r\n/site\r\n\r\n# mypy\r\n.mypy_cache/\r\n\r\n# cython generated cpp\r\ndata\r\n.vscode\r\n.idea\r\n\r\n# custom\r\n*.pkl\r\n*.pkl.json\r\n*.log.json\r\nwork_dirs/\r\nexps/\r\n*~\r\n\r\n# Pytorch\r\n*.pth\r\n\r\n# demo\r\n*.jpg\r\n*.png\r\n/data/scannet/scans/\r\n/data/sunrgbd/OFFICIAL_SUNRGBD/\r\n*.obj\r\n*.ply\r\n\n"
  },
  {
    "path": "LICENSE",
    "content": "Copyright 2018-2019 Open-MMLab. All rights reserved.\n\n                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright 2018-2019 Open-MMLab.\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "MANIFEST.in",
    "content": "include requirements/*.txt\ninclude mmdet3d/ops/**/*.cpp mmdet3d/ops/**/*.cu\ninclude mmdet3d/ops/**/*.h mmdet3d/ops/**/*.cc\ninclude mmdet3d/VERSION\n"
  },
  {
    "path": "README.md",
    "content": "# [ICCV 2023] SparseFusion: Fusing Multi-Modal Sparse Representations for Multi-Sensor 3D Object Detection\n![video](video.gif)\n\n## Abstract\nWe propose SparseFusion, a novel multi-sensor 3D detection method that exclusively uses sparse candidates and sparse representations. Specifically, SparseFusion utilizes the outputs of parallel detectors in the LiDAR and camera modalities as sparse candidates for fusion. We transform the camera candidates into the LiDAR coordinate space by disentangling the object representations. Then, we can fuse the multi-modality candidates in a unified 3D space by a lightweight self-attention module. To mitigate negative transfer between modalities, we propose novel semantic and geometric cross-modality transfer modules that are applied prior to the modality-specific detectors. SparseFusion achieves state-of-the-art performance on the nuScenes benchmark while also running at the fastest speed.\n\n[[paper link]](https://openaccess.thecvf.com/content/ICCV2023/papers/Xie_SparseFusion_Fusing_Multi-Modal_Sparse_Representations_for_Multi-Sensor_3D_Object_Detection_ICCV_2023_paper.pdf) [[Chinese summary (自动驾驶之心)]](https://zhuanlan.zhihu.com/p/671293323)\n\n## Updates\n[2023-8-21] Much better training GPU memory efficiency (45GB -> 29GB) with no hurt to the performance and speed!\n\n[2023-7-13] 🔥SparseFusion has been accepted to ICCV 2023!🔥\n\n[2023-3-21] We release the first version code of SparseFusion. \n\n## Overview\n![teaser](teaser.png)\n\nCompared to existing fusion algorithms, SparseFusion achieves state-of-the-art performance as well as the fastest inference speed on nuScenes test set. †: Official [repository](https://github.com/zehuichen123/AutoAlignV2) of AutoAlignV2 uses flip as test-time augmentation. ‡: We use BEVFusion-base results in the official [repository](https://github.com/mit-han-lab/bevfusion) of BEVFusion to match the input resolutions of other methods. $\\S:$ Swin-T is adopted as image backbone.\n## NuScene Performance\nWe do not use any test-time augmentations or model ensembles to get these results. We have released the configure files and pretrained checkpoints to reproduce our results.\n#### Validation Set\n\n| Image Backbone | Point Cloud Backbone | mAP | NDS | Link |\n| --------- | ------ | ------ | --------- | --------- |\n| ResNet50  | VoxelNet             | 70.5 | 72.8 | [config](configs/sparsefusion_nusc_voxel_LC_r50.py)/[ckpt](https://drive.google.com/file/d/1NZIrg7s-VwxkwuPHTTWSQQO7T7IILBGC/view?usp=share_link) |\n| Swin-T  | VoxelNet             | 71.0 | 73.1 | [config](configs/sparsefusion_nusc_voxel_LC_SwinT.py)/[ckpt](https://drive.google.com/file/d/1dAhOKtbLd1e3I5jwk_3E1gzbl61P24qy/view?usp=share_link) |\n\n#### Test Set\n\n| Image Backbone | Point Cloud Backbone | mAP  | NDS |\n| --------- | ------ | ------ | --------- |\n| ResNet50  | VoxelNet             | 72.0 | 73.8 |\n\n## Usage \n\n#### Installation\n+ We test our code on an environment with CUDA 11.5, python 3.7, PyTorch 1.7.1, TorchVision 0.8.2, NumPy 1.20.0, and numba 0.48.0.\n\n+ We use `mmdet==2.10.0, mmcv==1.2.7` for our code. Please refer to their official instructions for installation.\n\n+ You can install `mmdet3d==0.11.0` directly from our repo by\n  ```\n  cd SparseFusion\n  pip install -e .\n  ```\n\n\n+ We use `spconv==2.3.3`. Please follow the [official instruction](https://github.com/traveller59/spconv) to install it based on your CUDA version.\n\n  ```\n  pip install spconv-cuxxx \n  # e.g. pip install spconv-cu114\t\n  ```\n\n+ You also need to install the deformable attention module with the following command.\n\n  ```\n  pip install ./mmdet3d/models/utils/ops\n  ```\n\n#### Data Preparation\n\nDownload nuScenes full dataset from the [official website](https://www.nuscenes.org/download). You should have a folder structure like this:\n\n```\nSparseFusion\n├── mmdet3d\n├── tools\n├── configs\n├── data\n│   ├── nuscenes\n│   │   ├── maps\n│   │   ├── samples\n│   │   ├── sweeps\n│   │   ├── v1.0-test\n|   |   ├── v1.0-trainval\n```\n\nThen, you can select  **either** of the two ways to preprocess the data.\n\n1. Run the following two commands sequentially. \n\n   ```\n   python tools/create_data.py nuscenes --root-path ./data/nuscenes --out-dir ./data/nuscenes --extra-tag nuscenes\n   python tools/combine_view_info.py\n   ```\n\n2. Alternatively, you may directly download our preprocessed data from [Google Drive](https://drive.google.com/drive/folders/1L5lvLsNWBA0vfTlNSMa4OXXBLoZgJbg4?usp=share_link), and put these files in `data/nuscenes`.\n\n#### Initial Weights\n\nPlease download the [initial weights](https://drive.google.com/drive/folders/1wmYBi3PBprdcegF843AU-22q2OwDgoZk?usp=share_link) for model training, and put them in `checkpoints/`.\n\n#### Train & Test\n\nIn our default setting, we train the model with 4 GPUs.\n\n```\n# training\nbash tools/dist_train.sh configs/sparsefusion_nusc_voxel_LC_r50.py 4 --work-dir work_dirs/sparsefusion_nusc_voxel_LC_r50\n\n# test\nbash tools/dist_test.sh configs/sparsefusion_nusc_voxel_LC_r50.py ${CHECKPOINT_FILE} 4 --eval=bbox\n```\n\nNote: We use A6000 GPUs (48GB per-GPU memory) for model training. The training of SparseFusion model (ResNet50 backbone) requires ~29 GB per-GPU memory.\n\n## Contact\nIf you have any questions, feel free to open an issue or contact us at yichen_xie@berkeley.edu.\n\n## Acknowledgments\n\nWe sincerely thank the authors of [mmdetection3d](https://github.com/open-mmlab/mmdetection3d), [TransFusion](https://github.com/XuyangBai/TransFusion), [BEVFusion](https://github.com/mit-han-lab/bevfusion), [MSMDFusion](https://github.com/SxJyJay/MSMDFusion), and [DeepInteraction](https://github.com/fudan-zvg/DeepInteraction) for providing their codes or pretrained weights.\n\n## Reference\n\nIf you find our work useful, please consider citing the following paper:\n\n```\n@article{xie2023sparsefusion,\n  title={SparseFusion: Fusing Multi-Modal Sparse Representations for Multi-Sensor 3D Object Detection},\n  author={Xie, Yichen and Xu, Chenfeng and Rakotosaona, Marie-Julie and Rim, Patrick and Tombari, Federico and Keutzer, Kurt and Tomizuka, Masayoshi and Zhan, Wei},\n  journal={arXiv preprint arXiv:2304.14340},\n  year={2023}\n}\n```\n\n"
  },
  {
    "path": "README_zh-CN.md",
    "content": "<div align=\"center\">\n  <img src=\"resources/mmdet3d-logo.png\" width=\"600\"/>\n</div>\n\n[![docs](https://img.shields.io/badge/docs-latest-blue)](https://mmdetection3d.readthedocs.io/en/latest/)\n[![badge](https://github.com/open-mmlab/mmdetection3d/workflows/build/badge.svg)](https://github.com/open-mmlab/mmdetection3d/actions)\n[![codecov](https://codecov.io/gh/open-mmlab/mmdetection3d/branch/master/graph/badge.svg)](https://codecov.io/gh/open-mmlab/mmdetection3d)\n[![license](https://img.shields.io/github/license/open-mmlab/mmdetection3d.svg)](https://github.com/open-mmlab/mmdetection3d/blob/master/LICENSE)\n\n\n**新闻**: 我们发布了版本v0.11.0.\n\n在第三届[ nuScenes 3D 检测挑战赛](https://www.nuscenes.org/object-detection?externalData=all&mapData=all&modalities=Any)（第五届 AI Driving Olympics, NeurIPS 2020）中，我们获得了最佳 PKL 奖、第三名和最好的纯视觉的结果，相关的代码和模型将会在不久后发布。\n\n文档: https://mmdetection3d.readthedocs.io/\n\n## 简介\n\n[English](README.md) | 简体中文\n\n主分支代码目前支持 PyTorch 1.3 以上的版本。\n\nMMDetection3D 是一个基于 PyTorch 的目标检测开源工具箱, 下一代面向3D检测的平台. 它是 OpenMMlab 项目的一部分，这个项目由香港中文大学多媒体实验室和商汤科技联合发起.\n\n![demo image](resources/mmdet3d_outdoor_demo.gif)\n\n### 主要特性\n\n- **支持多模态/单模态的检测器**\n\n  支持多模态/单模态检测器，包括 MVXNet，VoteNet，PointPillars 等。\n\n- **支持户内/户外的数据集**\n\n  支持室内/室外的3D检测数据集，包括 ScanNet, SUNRGB-D, Waymo, nuScenes, Lyft, KITTI.\n\n  对于 nuScenes 数据集, 我们也支持 [nuImages 数据集](https://github.com/open-mmlab/mmdetection3d/tree/master/configs/nuimages).\n\n- **与 2D 检测器的自然整合**\n\n   [MMDetection](https://github.com/open-mmlab/mmdetection/blob/master/docs/model_zoo.md) 支持的**300+个模型 , 40+的论文算法**, 和相关模块都可以在此代码库中训练或使用。\n\n- **性能高**\n\n   训练速度比其他代码库更快。下表可见主要的对比结果。更多的细节可见[基准测评文档](./docs/benchmarks.md)。我们对比了每秒训练的样本数（值越高越好）。其他代码库不支持的模型被标记为 `×`。\n\n  | Methods | MMDetection3D | [OpenPCDet](https://github.com/open-mmlab/OpenPCDet) |[votenet](https://github.com/facebookresearch/votenet)| [Det3D](https://github.com/poodarchu/Det3D) |\n  |:-------:|:-------------:|:---------:|:-----:|:-----:|\n  | VoteNet | 358           | ×         |   77  | ×     |\n  | PointPillars-car| 141           | ×         |   ×  | 140     |\n  | PointPillars-3class| 107           |44     |   ×      | ×    |\n  | SECOND| 40           |30     |   ×      | ×    |\n  | Part-A2| 17           |14     |   ×      | ×    |\n\n和 [MMDetection](https://github.com/open-mmlab/mmdetection)，[MMCV](https://github.com/open-mmlab/mmcv) 一样, MMDetection3D 也可以作为一个库去支持各式各样的项目.\n\n## 开源许可证\n\n该项目采用 [Apache 2.0 开源许可证](LICENSE)。\n\n## 更新日志\n\n最新的版本 v0.11.0 在 2021.03.01发布。\n如果想了解更多版本更新细节和历史信息，请阅读[更新日志](docs/changelog.md)。\n\n## 基准测试和模型库\n\n测试结果和模型可以在[模型库](docs/model_zoo.md)中找到。\n\n已支持的骨干网络：\n\n- [x] PointNet (CVPR'2017)\n- [x] PointNet++ (NeurIPS'2017)\n- [x] RegNet (CVPR'2020)\n\n已支持的算法：\n\n- [x] [SECOND (Sensor'2018)](configs/second/README.md)\n- [x] [PointPillars (CVPR'2019)](configs/pointpillars/README.md)\n- [x] [FreeAnchor (NeurIPS'2019)](configs/free_anchor/README.md)\n- [x] [VoteNet (ICCV'2019)](configs/votenet/README.md)\n- [x] [H3DNet (ECCV'2020)](configs/h3dnet/README.md)\n- [x] [3DSSD (CVPR'2020)](configs/3dssd/README.md)\n- [x] [Part-A2 (TPAMI'2020)](configs/parta2/README.md)\n- [x] [MVXNet (ICRA'2019)](configs/mvxnet/README.md)\n- [x] [CenterPoint (CVPR'2021)](configs/centerpoint/README.md)\n- [x] [SSN (ECCV'2020)](configs/ssn/README.md)\n- [x] [ImVoteNet (CVPR'2020)](configs/imvotenet/README.md)\n\n|                    | ResNet   | ResNeXt  | SENet    |PointNet++ | HRNet | RegNetX | Res2Net |\n|--------------------|:--------:|:--------:|:--------:|:---------:|:-----:|:--------:|:-----:|\n| SECOND             | ☐        | ☐        | ☐        | ✗         | ☐     | ✓        | ☐     |\n| PointPillars       | ☐        | ☐        | ☐        | ✗         | ☐     | ✓        | ☐     |\n| FreeAnchor         | ☐        | ☐        | ☐        | ✗         | ☐     | ✓        | ☐     |\n| VoteNet            | ✗        | ✗        | ✗        | ✓         | ✗     | ✗        | ✗     |\n| H3DNet            | ✗        | ✗        | ✗        | ✓         | ✗     | ✗        | ✗     |\n| 3DSSD            | ✗        | ✗        | ✗        | ✓         | ✗     | ✗        | ✗     |\n| Part-A2            | ☐        | ☐        | ☐        | ✗         | ☐     | ✓        | ☐     |\n| MVXNet             | ☐        | ☐        | ☐        | ✗         | ☐     | ✓        | ☐     |\n| CenterPoint        | ☐        | ☐        | ☐        | ✗         | ☐     | ✓        | ☐     |\n| SSN                | ☐        | ☐        | ☐        | ✗         | ☐     | ✓        | ☐     |\n| ImVoteNet            | ✗        | ✗        | ✗        | ✓         | ✗     | ✗        | ✗     |\n\n其他特性\n- [x] [Dynamic Voxelization](configs/dynamic_voxelization/README.md)\n\n**注意：** [MMDetection](https://github.com/open-mmlab/mmdetection/blob/master/docs/model_zoo.md) 支持的基于2D检测的**300+个模型 , 40+的论文算法**在 MMDetection3D 中都可以被训练或使用。\n\n## 安装\n\n请参考[快速入门文档](docs/get_started.md)进行安装。\n\n## 快速入门\n\n请参考[快速入门文档](docs/get_started.md)学习 MMDetection3D 的基本使用。 我们为新手提供了分别针对[已有数据集](docs/1_exist_data_model.md)和[新数据集](docs/2_new_data_model.md)的使用指南。我们也提供了一些进阶教程，内容覆盖了[学习配置文件](docs/tutorials/config.md), [增加数据集支持](docs/tutorials/customize_dataset.md), [设计新的数据预处理流程](docs/tutorials/data_pipeline.md), [增加自定义模型](docs/tutorials/customize_models.md), [增加自定义的运行时配置](docs/tutorials/customize_runtime.md)和 [Waymo 数据集](docs/tutorials/waymo.md).\n\n## 引用\n\n如果你觉得本项目对你的研究工作有所帮助，请参考如下 bibtex 引用 MMdetection3D\n\n```latex\n@misc{mmdet3d2020,\n    title={{MMDetection3D: OpenMMLab} next-generation platform for general {3D} object detection},\n    author={MMDetection3D Contributors},\n    howpublished = {\\url{https://github.com/open-mmlab/mmdetection3d}},\n    year={2020}\n}\n```\n\n## 贡献指南\n\n我们感谢所有的贡献者为改进和提升 MMDetection3D 所作出的努力。请参考[贡献指南](.github/CONTRIBUTING.md)来了解参与项目贡献的相关指引。\n\n## 致谢\n\nMMDetection3D 是一款由来自不同高校和企业的研发人员共同参与贡献的开源项目。我们感谢所有为项目提供算法复现和新功能支持的贡献者，以及提供宝贵反馈的用户。我们希望这个工具箱和基准测试可以为社区提供灵活的代码工具，供用户复现已有算法并开发自己的新的 3D 检测模型。\n\n## OpenMMLab 的其他项目\n\n- [MMCV](https://github.com/open-mmlab/mmcv): OpenMMLab 计算机视觉基础库\n- [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab 图像分类工具箱\n- [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab 目标检测工具箱\n- [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab 新一代通用 3D 目标检测平台\n- [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab 语义分割工具箱\n- [MMAction2](https://github.com/open-mmlab/mmaction2): OpenMMLab 新一代视频理解工具箱\n- [MMTracking](https://github.com/open-mmlab/mmtracking): OpenMMLab 一体化视频目标感知平台\n- [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab 姿态估计工具箱\n- [MMEditing](https://github.com/open-mmlab/mmediting): OpenMMLab 图像视频编辑工具箱\n"
  },
  {
    "path": "configs/3dssd/3dssd_kitti-3d-car.py",
    "content": "_base_ = [\n    '../_base_/models/3dssd.py', '../_base_/datasets/kitti-3d-car.py',\n    '../_base_/default_runtime.py'\n]\n\n# dataset settings\ndataset_type = 'KittiDataset'\ndata_root = 'data/kitti/'\nclass_names = ['Car']\npoint_cloud_range = [0, -40, -5, 70, 40, 3]\ninput_modality = dict(use_lidar=True, use_camera=False)\ndb_sampler = dict(\n    data_root=data_root,\n    info_path=data_root + 'kitti_dbinfos_train.pkl',\n    rate=1.0,\n    prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),\n    classes=class_names,\n    sample_groups=dict(Car=15))\n\nfile_client_args = dict(backend='disk')\n# Uncomment the following if use ceph or other file clients.\n# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient\n# for more details.\n# file_client_args = dict(\n#     backend='petrel', path_mapping=dict(data='s3://kitti_data/'))\n\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=4,\n        use_dim=4,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadAnnotations3D',\n        with_bbox_3d=True,\n        with_label_3d=True,\n        file_client_args=file_client_args),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectSample', db_sampler=db_sampler),\n    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n    dict(\n        type='ObjectNoise',\n        num_try=100,\n        translation_std=[1.0, 1.0, 0],\n        global_rot_range=[0.0, 0.0],\n        rot_range=[-1.0471975511965976, 1.0471975511965976]),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.78539816, 0.78539816],\n        scale_ratio_range=[0.9, 1.1]),\n    dict(type='BackgroundPointsFilter', bbox_enlarge_range=(0.5, 2.0, 0.5)),\n    dict(type='IndoorPointSample', num_points=16384),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\n\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=4,\n        use_dim=4,\n        file_client_args=file_client_args),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(type='IndoorPointSample', num_points=16384),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n\ndata = dict(\n    samples_per_gpu=4,\n    workers_per_gpu=4,\n    train=dict(dataset=dict(pipeline=train_pipeline)),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n\nevaluation = dict(interval=2)\n\n# model settings\nmodel = dict(\n    bbox_head=dict(\n        num_classes=1,\n        bbox_coder=dict(\n            type='AnchorFreeBBoxCoder', num_dir_bins=12, with_rot=True)))\n\n# optimizer\nlr = 0.002  # max learning rate\noptimizer = dict(type='AdamW', lr=lr, weight_decay=0)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\nlr_config = dict(policy='step', warmup=None, step=[80, 120])\n# runtime settings\ntotal_epochs = 150\n\n# yapf:disable\nlog_config = dict(\n    interval=30,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n"
  },
  {
    "path": "configs/3dssd/README.md",
    "content": "# 3DSSD: Point-based 3D Single Stage Object Detector\n\n## Introduction\n\n[ALGORITHM]\n\nWe implement 3DSSD and provide the results and checkpoints on KITTI datasets.\n\n```\n@inproceedings{yang20203dssd,\n    author = {Zetong Yang and Yanan Sun and Shu Liu and Jiaya Jia},\n    title = {3DSSD: Point-based 3D Single Stage Object Detector},\n    booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},\n    year = {2020}\n}\n```\n\n### Experiment details on KITTI datasets\nSome settings in our implementation are different from the [official implementation](https://github.com/Jia-Research-Lab/3DSSD), which bring marginal differences to the performance on KITTI datasets in our experiments. To simplify and unify the models of our implementation, we skip them in our models. These differences are listed as below:\n1. We keep the scenes without any object while the official code skips these scenes in training. In the official implementation, only 3229 and 3394 samples are used as training and validation sets, respectively. In our implementation, we keep using 3712 and 3769 samples as training and validation sets, respectively, as those used for all the other models in our implementation on KITTI datasets.\n2. We do not modify the decay of `batch normalization` during training.\n3. While using [`DataBaseSampler`](https://github.com/open-mmlab/mmdetection3d/blob/master/mmdet3d/datasets/pipelines/dbsampler.py#L80) for data augmentation, the official code uses road planes as reference to place the sampled objects while we do not.\n4. We perform detection using LIDAR coordinates while the official code uses camera coordinates.\n\n## Results\n\n### KITTI\n|  Backbone   |Class| Lr schd | Mem (GB) | Inf time (fps) | mAP |Download |\n| :---------: | :-----: | :------: | :------------: | :----: |:----: | :------: |\n|    [PointNet2SAMSG](./3dssd_kitti-3d-car.py)| Car |72e|4.7||78.39(81.00)<sup>1</sup>|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/3dssd/3dssd_kitti-3d-car_20210324_122002-07e9a19b.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/3dssd/3dssd_kitti-3d-car_20210324_122002.log.json)|\n\n[1]: We report two different 3D object detection performance here. 78.39mAP is evaluated by our evaluation code and 81.00mAP is evaluated by the official development kit （so as that used in the paper and official code of 3DSSD ）. We found that the commonly used Python implementation of [`rotate_iou`](https://github.com/traveller59/second.pytorch/blob/e42e4a0e17262ab7d180ee96a0a36427f2c20a44/second/core/non_max_suppression/nms_gpu.py#L605) which is used in our KITTI dataset evaluation, is different from the official implemention in [KITTI benchmark](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d).\n"
  },
  {
    "path": "configs/_base_/datasets/coco_instance.py",
    "content": "dataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        pipeline=train_pipeline),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline))\nevaluation = dict(metric=['bbox', 'segm'])\n"
  },
  {
    "path": "configs/_base_/datasets/kitti-3d-3class.py",
    "content": "# dataset settings\ndataset_type = 'KittiDataset'\ndata_root = 'data/kitti/'\nclass_names = ['Pedestrian', 'Cyclist', 'Car']\npoint_cloud_range = [0, -40, -3, 70.4, 40, 1]\ninput_modality = dict(use_lidar=True, use_camera=False)\ndb_sampler = dict(\n    data_root=data_root,\n    info_path=data_root + 'kitti_dbinfos_train.pkl',\n    rate=1.0,\n    prepare=dict(\n        filter_by_difficulty=[-1],\n        filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),\n    classes=class_names,\n    sample_groups=dict(Car=12, Pedestrian=6, Cyclist=6))\n\nfile_client_args = dict(backend='disk')\n# Uncomment the following if use ceph or other file clients.\n# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient\n# for more details.\n# file_client_args = dict(\n#     backend='petrel', path_mapping=dict(data='s3://kitti_data/'))\n\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=4,\n        use_dim=4,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadAnnotations3D',\n        with_bbox_3d=True,\n        with_label_3d=True,\n        file_client_args=file_client_args),\n    dict(type='ObjectSample', db_sampler=db_sampler),\n    dict(\n        type='ObjectNoise',\n        num_try=100,\n        translation_std=[1.0, 1.0, 0.5],\n        global_rot_range=[0.0, 0.0],\n        rot_range=[-0.78539816, 0.78539816]),\n    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.78539816, 0.78539816],\n        scale_ratio_range=[0.95, 1.05]),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=4,\n        use_dim=4,\n        file_client_args=file_client_args),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n\ndata = dict(\n    samples_per_gpu=6,\n    workers_per_gpu=4,\n    train=dict(\n        type='RepeatDataset',\n        times=2,\n        dataset=dict(\n            type=dataset_type,\n            data_root=data_root,\n            ann_file=data_root + 'kitti_infos_train.pkl',\n            split='training',\n            pts_prefix='velodyne_reduced',\n            pipeline=train_pipeline,\n            modality=input_modality,\n            classes=class_names,\n            test_mode=False,\n            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n            # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n            box_type_3d='LiDAR')),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'kitti_infos_val.pkl',\n        split='training',\n        pts_prefix='velodyne_reduced',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='LiDAR'),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'kitti_infos_val.pkl',\n        split='training',\n        pts_prefix='velodyne_reduced',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='LiDAR'))\n\nevaluation = dict(interval=1)\n"
  },
  {
    "path": "configs/_base_/datasets/kitti-3d-car.py",
    "content": "# dataset settings\ndataset_type = 'KittiDataset'\ndata_root = 'data/kitti/'\nclass_names = ['Car']\npoint_cloud_range = [0, -40, -3, 70.4, 40, 1]\ninput_modality = dict(use_lidar=True, use_camera=False)\ndb_sampler = dict(\n    data_root=data_root,\n    info_path=data_root + 'kitti_dbinfos_train.pkl',\n    rate=1.0,\n    prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),\n    classes=class_names,\n    sample_groups=dict(Car=15))\n\nfile_client_args = dict(backend='disk')\n# Uncomment the following if use ceph or other file clients.\n# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient\n# for more details.\n# file_client_args = dict(\n#     backend='petrel', path_mapping=dict(data='s3://kitti_data/'))\n\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=4,\n        use_dim=4,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadAnnotations3D',\n        with_bbox_3d=True,\n        with_label_3d=True,\n        file_client_args=file_client_args),\n    dict(type='ObjectSample', db_sampler=db_sampler),\n    dict(\n        type='ObjectNoise',\n        num_try=100,\n        translation_std=[1.0, 1.0, 0.5],\n        global_rot_range=[0.0, 0.0],\n        rot_range=[-0.78539816, 0.78539816]),\n    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.78539816, 0.78539816],\n        scale_ratio_range=[0.95, 1.05]),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=4,\n        use_dim=4,\n        file_client_args=file_client_args),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n\ndata = dict(\n    samples_per_gpu=6,\n    workers_per_gpu=4,\n    train=dict(\n        type='RepeatDataset',\n        times=2,\n        dataset=dict(\n            type=dataset_type,\n            data_root=data_root,\n            ann_file=data_root + 'kitti_infos_train.pkl',\n            split='training',\n            pts_prefix='velodyne_reduced',\n            pipeline=train_pipeline,\n            modality=input_modality,\n            classes=class_names,\n            test_mode=False,\n            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n            # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n            box_type_3d='LiDAR')),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'kitti_infos_val.pkl',\n        split='training',\n        pts_prefix='velodyne_reduced',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='LiDAR'),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'kitti_infos_val.pkl',\n        split='training',\n        pts_prefix='velodyne_reduced',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='LiDAR'))\n\nevaluation = dict(interval=1)\n"
  },
  {
    "path": "configs/_base_/datasets/lyft-3d.py",
    "content": "# If point cloud range is changed, the models should also change their point\n# cloud range accordingly\npoint_cloud_range = [-80, -80, -5, 80, 80, 3]\n# For Lyft we usually do 9-class detection\nclass_names = [\n    'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle',\n    'bicycle', 'pedestrian', 'animal'\n]\ndataset_type = 'LyftDataset'\ndata_root = 'data/lyft/'\n# Input modality for Lyft dataset, this is consistent with the submission\n# format which requires the information in input_modality.\ninput_modality = dict(\n    use_lidar=True,\n    use_camera=False,\n    use_radar=False,\n    use_map=False,\n    use_external=False)\nfile_client_args = dict(backend='disk')\n# Uncomment the following if use ceph or other file clients.\n# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient\n# for more details.\n# file_client_args = dict(\n#     backend='petrel',\n#     path_mapping=dict({\n#         './data/lyft/': 's3://lyft/lyft/',\n#         'data/lyft/': 's3://lyft/lyft/'\n#    }))\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        file_client_args=file_client_args),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.3925, 0.3925],\n        scale_ratio_range=[0.95, 1.05],\n        translation_std=[0, 0, 0]),\n    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        file_client_args=file_client_args),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'lyft_infos_train.pkl',\n        pipeline=train_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=False),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'lyft_infos_val.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=True),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'lyft_infos_test.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=True))\n# For Lyft dataset, we usually evaluate the model at the end of training.\n# Since the models are trained by 24 epochs by default, we set evaluation\n# interval to be 24. Please change the interval accordingly if you do not\n# use a default schedule.\nevaluation = dict(interval=24)\n"
  },
  {
    "path": "configs/_base_/datasets/nuim_instance.py",
    "content": "dataset_type = 'CocoDataset'\ndata_root = 'data/nuimages/'\nclass_names = [\n    'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',\n    'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'\n]\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=[(1280, 720), (1920, 1080)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1600, 900),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/nuimages_v1.0-train.json',\n        img_prefix=data_root,\n        classes=class_names,\n        pipeline=train_pipeline),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/nuimages_v1.0-val.json',\n        img_prefix=data_root,\n        classes=class_names,\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/nuimages_v1.0-val.json',\n        img_prefix=data_root,\n        classes=class_names,\n        pipeline=test_pipeline))\nevaluation = dict(metric=['bbox', 'segm'])\n"
  },
  {
    "path": "configs/_base_/datasets/nus-3d.py",
    "content": "# If point cloud range is changed, the models should also change their point\n# cloud range accordingly\npoint_cloud_range = [-50, -50, -5, 50, 50, 3]\n# For nuScenes we usually do 10-class detection\nclass_names = [\n    'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',\n    'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'\n]\ndataset_type = 'NuScenesDataset'\ndata_root = 'data/nuscenes/'\n# Input modality for nuScenes dataset, this is consistent with the submission\n# format which requires the information in input_modality.\ninput_modality = dict(\n    use_lidar=True,\n    use_camera=False,\n    use_radar=False,\n    use_map=False,\n    use_external=False)\nfile_client_args = dict(backend='disk')\n# Uncomment the following if use ceph or other file clients.\n# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient\n# for more details.\n# file_client_args = dict(\n#     backend='petrel',\n#     path_mapping=dict({\n#         './data/nuscenes/': 's3://nuscenes/nuscenes/',\n#         'data/nuscenes/': 's3://nuscenes/nuscenes/'\n#     }))\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        file_client_args=file_client_args),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.3925, 0.3925],\n        scale_ratio_range=[0.95, 1.05],\n        translation_std=[0, 0, 0]),\n    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectNameFilter', classes=class_names),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        file_client_args=file_client_args),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n\ndata = dict(\n    samples_per_gpu=4,\n    workers_per_gpu=4,\n    train=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'nuscenes_infos_train.pkl',\n        pipeline=train_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=False,\n        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n        # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n        box_type_3d='LiDAR'),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'nuscenes_infos_val.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=True,\n        box_type_3d='LiDAR'),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'nuscenes_infos_val.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=True,\n        box_type_3d='LiDAR'))\n# For nuScenes dataset, we usually evaluate the model at the end of training.\n# Since the models are trained by 24 epochs by default, we set evaluation\n# interval to be 24. Please change the interval accordingly if you do not\n# use a default schedule.\nevaluation = dict(interval=24)\n"
  },
  {
    "path": "configs/_base_/datasets/range100_lyft-3d.py",
    "content": "# If point cloud range is changed, the models should also change their point\n# cloud range accordingly\npoint_cloud_range = [-100, -100, -5, 100, 100, 3]\n# For Lyft we usually do 9-class detection\nclass_names = [\n    'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle',\n    'bicycle', 'pedestrian', 'animal'\n]\ndataset_type = 'LyftDataset'\ndata_root = 'data/lyft/'\n# Input modality for Lyft dataset, this is consistent with the submission\n# format which requires the information in input_modality.\ninput_modality = dict(\n    use_lidar=True,\n    use_camera=False,\n    use_radar=False,\n    use_map=False,\n    use_external=False)\nfile_client_args = dict(backend='disk')\n# Uncomment the following if use ceph or other file clients.\n# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient\n# for more details.\n# file_client_args = dict(\n#     backend='petrel',\n#     path_mapping=dict({\n#         './data/lyft/': 's3://lyft/lyft/',\n#         'data/lyft/': 's3://lyft/lyft/'\n#    }))\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        file_client_args=file_client_args),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.3925, 0.3925],\n        scale_ratio_range=[0.95, 1.05],\n        translation_std=[0, 0, 0]),\n    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        file_client_args=file_client_args),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'lyft_infos_train.pkl',\n        pipeline=train_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=False),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'lyft_infos_val.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=True),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'lyft_infos_test.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=True))\n# For Lyft dataset, we usually evaluate the model at the end of training.\n# Since the models are trained by 24 epochs by default, we set evaluation\n# interval to be 24. Please change the interval accordingly if you do not\n# use a default schedule.\nevaluation = dict(interval=24)\n"
  },
  {
    "path": "configs/_base_/datasets/scannet-3d-18class.py",
    "content": "# dataset settings\ndataset_type = 'ScanNetDataset'\ndata_root = './data/scannet/'\nclass_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',\n               'bookshelf', 'picture', 'counter', 'desk', 'curtain',\n               'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',\n               'garbagebin')\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='DEPTH',\n        shift_height=True,\n        load_dim=6,\n        use_dim=[0, 1, 2]),\n    dict(\n        type='LoadAnnotations3D',\n        with_bbox_3d=True,\n        with_label_3d=True,\n        with_mask_3d=True,\n        with_seg_3d=True),\n    dict(\n        type='PointSegClassMapping',\n        valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34,\n                       36, 39)),\n    dict(type='IndoorPointSample', num_points=40000),\n    dict(\n        type='RandomFlip3D',\n        sync_2d=False,\n        flip_ratio_bev_horizontal=0.5,\n        flip_ratio_bev_vertical=0.5),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.087266, 0.087266],\n        scale_ratio_range=[1.0, 1.0],\n        shift_height=True),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(\n        type='Collect3D',\n        keys=[\n            'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',\n            'pts_instance_mask'\n        ])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='DEPTH',\n        shift_height=True,\n        load_dim=6,\n        use_dim=[0, 1, 2]),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(\n                type='RandomFlip3D',\n                sync_2d=False,\n                flip_ratio_bev_horizontal=0.5,\n                flip_ratio_bev_vertical=0.5),\n            dict(type='IndoorPointSample', num_points=40000),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n\ndata = dict(\n    samples_per_gpu=8,\n    workers_per_gpu=4,\n    train=dict(\n        type='RepeatDataset',\n        times=5,\n        dataset=dict(\n            type=dataset_type,\n            data_root=data_root,\n            ann_file=data_root + 'scannet_infos_train.pkl',\n            pipeline=train_pipeline,\n            filter_empty_gt=False,\n            classes=class_names,\n            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n            # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n            box_type_3d='Depth')),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'scannet_infos_val.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='Depth'),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'scannet_infos_val.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='Depth'))\n"
  },
  {
    "path": "configs/_base_/datasets/sunrgbd-3d-10class.py",
    "content": "dataset_type = 'SUNRGBDDataset'\ndata_root = 'data/sunrgbd/'\nclass_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',\n               'night_stand', 'bookshelf', 'bathtub')\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='DEPTH',\n        shift_height=True,\n        load_dim=6,\n        use_dim=[0, 1, 2]),\n    dict(type='LoadAnnotations3D'),\n    dict(\n        type='RandomFlip3D',\n        sync_2d=False,\n        flip_ratio_bev_horizontal=0.5,\n    ),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.523599, 0.523599],\n        scale_ratio_range=[0.85, 1.15],\n        shift_height=True),\n    dict(type='IndoorPointSample', num_points=20000),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='DEPTH',\n        shift_height=True,\n        load_dim=6,\n        use_dim=[0, 1, 2]),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(\n                type='RandomFlip3D',\n                sync_2d=False,\n                flip_ratio_bev_horizontal=0.5,\n            ),\n            dict(type='IndoorPointSample', num_points=20000),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n\ndata = dict(\n    samples_per_gpu=16,\n    workers_per_gpu=4,\n    train=dict(\n        type='RepeatDataset',\n        times=5,\n        dataset=dict(\n            type=dataset_type,\n            data_root=data_root,\n            ann_file=data_root + 'sunrgbd_infos_train.pkl',\n            pipeline=train_pipeline,\n            classes=class_names,\n            filter_empty_gt=False,\n            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n            # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n            box_type_3d='Depth')),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'sunrgbd_infos_val.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='Depth'),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'sunrgbd_infos_val.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='Depth'))\n"
  },
  {
    "path": "configs/_base_/datasets/waymoD5-3d-3class.py",
    "content": "# dataset settings\n# D5 in the config name means the whole dataset is divided into 5 folds\n# We only use one fold for efficient experiments\ndataset_type = 'WaymoDataset'\ndata_root = 'data/waymo/kitti_format/'\nfile_client_args = dict(backend='disk')\n# Uncomment the following if use ceph or other file clients.\n# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient\n# for more details.\n# file_client_args = dict(\n#     backend='petrel', path_mapping=dict(data='s3://waymo_data/'))\n\nclass_names = ['Car', 'Pedestrian', 'Cyclist']\npoint_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4]\ninput_modality = dict(use_lidar=True, use_camera=False)\ndb_sampler = dict(\n    data_root=data_root,\n    info_path=data_root + 'waymo_dbinfos_train.pkl',\n    rate=1.0,\n    prepare=dict(\n        filter_by_difficulty=[-1],\n        filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),\n    classes=class_names,\n    sample_groups=dict(Car=15, Pedestrian=10, Cyclist=10),\n    points_loader=dict(\n        type='LoadPointsFromFile',\n        load_dim=5,\n        use_dim=[0, 1, 2, 3, 4],\n        file_client_args=file_client_args))\n\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=6,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadAnnotations3D',\n        with_bbox_3d=True,\n        with_label_3d=True,\n        file_client_args=file_client_args),\n    dict(type='ObjectSample', db_sampler=db_sampler),\n    dict(\n        type='RandomFlip3D',\n        sync_2d=False,\n        flip_ratio_bev_horizontal=0.5,\n        flip_ratio_bev_vertical=0.5),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.78539816, 0.78539816],\n        scale_ratio_range=[0.95, 1.05]),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=6,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=4,\n    train=dict(\n        type='RepeatDataset',\n        times=2,\n        dataset=dict(\n            type=dataset_type,\n            data_root=data_root,\n            ann_file=data_root + 'waymo_infos_train.pkl',\n            split='training',\n            pipeline=train_pipeline,\n            modality=input_modality,\n            classes=class_names,\n            test_mode=False,\n            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n            # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n            box_type_3d='LiDAR',\n            # load one frame every five frames\n            load_interval=5)),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'waymo_infos_val.pkl',\n        split='training',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='LiDAR'),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'waymo_infos_val.pkl',\n        split='training',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='LiDAR'))\n\nevaluation = dict(interval=24)\n"
  },
  {
    "path": "configs/_base_/datasets/waymoD5-3d-car.py",
    "content": "# dataset settings\n# D5 in the config name means the whole dataset is divided into 5 folds\n# We only use one fold for efficient experiments\ndataset_type = 'WaymoDataset'\ndata_root = 'data/waymo/kitti_format/'\nfile_client_args = dict(backend='disk')\n# Uncomment the following if use ceph or other file clients.\n# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient\n# for more details.\n# file_client_args = dict(\n#     backend='petrel', path_mapping=dict(data='s3://waymo_data/'))\n\nclass_names = ['Car']\npoint_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4]\ninput_modality = dict(use_lidar=True, use_camera=False)\ndb_sampler = dict(\n    data_root=data_root,\n    info_path=data_root + 'waymo_dbinfos_train.pkl',\n    rate=1.0,\n    prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),\n    classes=class_names,\n    sample_groups=dict(Car=15),\n    points_loader=dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=[0, 1, 2, 3, 4],\n        file_client_args=file_client_args))\n\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=6,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadAnnotations3D',\n        with_bbox_3d=True,\n        with_label_3d=True,\n        file_client_args=file_client_args),\n    dict(type='ObjectSample', db_sampler=db_sampler),\n    dict(\n        type='RandomFlip3D',\n        sync_2d=False,\n        flip_ratio_bev_horizontal=0.5,\n        flip_ratio_bev_vertical=0.5),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.78539816, 0.78539816],\n        scale_ratio_range=[0.95, 1.05]),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=6,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=4,\n    train=dict(\n        type='RepeatDataset',\n        times=2,\n        dataset=dict(\n            type=dataset_type,\n            data_root=data_root,\n            ann_file=data_root + 'waymo_infos_train.pkl',\n            split='training',\n            pipeline=train_pipeline,\n            modality=input_modality,\n            classes=class_names,\n            test_mode=False,\n            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n            # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n            box_type_3d='LiDAR',\n            # load one frame every five frames\n            load_interval=5)),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'waymo_infos_val.pkl',\n        split='training',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='LiDAR'),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'waymo_infos_val.pkl',\n        split='training',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='LiDAR'))\n\nevaluation = dict(interval=24)\n"
  },
  {
    "path": "configs/_base_/default_runtime.py",
    "content": "checkpoint_config = dict(interval=1)\n# yapf:disable push\n# By default we use textlogger hook and tensorboard\n# For more loggers see\n# https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = None\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "configs/_base_/models/3dssd.py",
    "content": "model = dict(\n    type='SSD3DNet',\n    backbone=dict(\n        type='PointNet2SAMSG',\n        in_channels=4,\n        num_points=(4096, 512, (256, 256)),\n        radii=((0.2, 0.4, 0.8), (0.4, 0.8, 1.6), (1.6, 3.2, 4.8)),\n        num_samples=((32, 32, 64), (32, 32, 64), (32, 32, 32)),\n        sa_channels=(((16, 16, 32), (16, 16, 32), (32, 32, 64)),\n                     ((64, 64, 128), (64, 64, 128), (64, 96, 128)),\n                     ((128, 128, 256), (128, 192, 256), (128, 256, 256))),\n        aggregation_channels=(64, 128, 256),\n        fps_mods=(('D-FPS'), ('FS'), ('F-FPS', 'D-FPS')),\n        fps_sample_range_lists=((-1), (-1), (512, -1)),\n        norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1),\n        sa_cfg=dict(\n            type='PointSAModuleMSG',\n            pool_mod='max',\n            use_xyz=True,\n            normalize_xyz=False)),\n    bbox_head=dict(\n        type='SSD3DHead',\n        in_channels=256,\n        vote_module_cfg=dict(\n            in_channels=256,\n            num_points=256,\n            gt_per_seed=1,\n            conv_channels=(128, ),\n            conv_cfg=dict(type='Conv1d'),\n            norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),\n            with_res_feat=False,\n            vote_xyz_range=(3.0, 3.0, 2.0)),\n        vote_aggregation_cfg=dict(\n            type='PointSAModuleMSG',\n            num_point=256,\n            radii=(4.8, 6.4),\n            sample_nums=(16, 32),\n            mlp_channels=((256, 256, 256, 512), (256, 256, 512, 1024)),\n            norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1),\n            use_xyz=True,\n            normalize_xyz=False,\n            bias=True),\n        pred_layer_cfg=dict(\n            in_channels=1536,\n            shared_conv_channels=(512, 128),\n            cls_conv_channels=(128, ),\n            reg_conv_channels=(128, ),\n            conv_cfg=dict(type='Conv1d'),\n            norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),\n            bias=True),\n        conv_cfg=dict(type='Conv1d'),\n        norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),\n        objectness_loss=dict(\n            type='CrossEntropyLoss',\n            use_sigmoid=True,\n            reduction='sum',\n            loss_weight=1.0),\n        center_loss=dict(\n            type='SmoothL1Loss', reduction='sum', loss_weight=1.0),\n        dir_class_loss=dict(\n            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),\n        dir_res_loss=dict(\n            type='SmoothL1Loss', reduction='sum', loss_weight=1.0),\n        size_res_loss=dict(\n            type='SmoothL1Loss', reduction='sum', loss_weight=1.0),\n        corner_loss=dict(\n            type='SmoothL1Loss', reduction='sum', loss_weight=1.0),\n        vote_loss=dict(type='SmoothL1Loss', reduction='sum', loss_weight=1.0)),\n    # model training and testing settings\n    train_cfg=dict(\n        sample_mod='spec', pos_distance_thr=10.0, expand_dims_length=0.05),\n    test_cfg=dict(\n        nms_cfg=dict(type='nms', iou_thr=0.1),\n        sample_mod='spec',\n        score_thr=0.0,\n        per_class_proposal=True,\n        max_output_num=100))\n\n# optimizer\n# This schedule is mainly used by models on indoor dataset,\n# e.g., VoteNet on SUNRGBD and ScanNet\nlr = 0.002  # max learning rate\noptimizer = dict(type='AdamW', lr=lr, weight_decay=0)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\nlr_config = dict(policy='step', warmup=None, step=[80, 120])\n# runtime settings\ntotal_epochs = 150\n"
  },
  {
    "path": "configs/_base_/models/cascade_mask_rcnn_r50_fpn.py",
    "content": "# model settings\nmodel = dict(\n    type='CascadeRCNN',\n    pretrained='torchvision://resnet50',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            scales=[8],\n            ratios=[0.5, 1.0, 2.0],\n            strides=[4, 8, 16, 32, 64]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1.0, 1.0, 1.0, 1.0]),\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),\n    roi_head=dict(\n        type='CascadeRoIHead',\n        num_stages=3,\n        stage_loss_weights=[1, 0.5, 0.25],\n        bbox_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        bbox_head=[\n            dict(\n                type='Shared2FCBBoxHead',\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=80,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.1, 0.1, 0.2, 0.2]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,\n                               loss_weight=1.0)),\n            dict(\n                type='Shared2FCBBoxHead',\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=80,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.05, 0.05, 0.1, 0.1]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,\n                               loss_weight=1.0)),\n            dict(\n                type='Shared2FCBBoxHead',\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=80,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.033, 0.033, 0.067, 0.067]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))\n        ],\n        mask_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        mask_head=dict(\n            type='FCNMaskHead',\n            num_convs=4,\n            in_channels=256,\n            conv_out_channels=256,\n            num_classes=80,\n            loss_mask=dict(\n                type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                match_low_quality=True,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=256,\n                pos_fraction=0.5,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False),\n            allowed_border=0,\n            pos_weight=-1,\n            debug=False),\n        rpn_proposal=dict(\n            nms_across_levels=False,\n            nms_pre=2000,\n            nms_post=2000,\n            max_num=2000,\n            nms_thr=0.7,\n            min_bbox_size=0),\n        rcnn=[\n            dict(\n                assigner=dict(\n                    type='MaxIoUAssigner',\n                    pos_iou_thr=0.5,\n                    neg_iou_thr=0.5,\n                    min_pos_iou=0.5,\n                    match_low_quality=False,\n                    ignore_iof_thr=-1),\n                sampler=dict(\n                    type='RandomSampler',\n                    num=512,\n                    pos_fraction=0.25,\n                    neg_pos_ub=-1,\n                    add_gt_as_proposals=True),\n                mask_size=28,\n                pos_weight=-1,\n                debug=False),\n            dict(\n                assigner=dict(\n                    type='MaxIoUAssigner',\n                    pos_iou_thr=0.6,\n                    neg_iou_thr=0.6,\n                    min_pos_iou=0.6,\n                    match_low_quality=False,\n                    ignore_iof_thr=-1),\n                sampler=dict(\n                    type='RandomSampler',\n                    num=512,\n                    pos_fraction=0.25,\n                    neg_pos_ub=-1,\n                    add_gt_as_proposals=True),\n                mask_size=28,\n                pos_weight=-1,\n                debug=False),\n            dict(\n                assigner=dict(\n                    type='MaxIoUAssigner',\n                    pos_iou_thr=0.7,\n                    neg_iou_thr=0.7,\n                    min_pos_iou=0.7,\n                    match_low_quality=False,\n                    ignore_iof_thr=-1),\n                sampler=dict(\n                    type='RandomSampler',\n                    num=512,\n                    pos_fraction=0.25,\n                    neg_pos_ub=-1,\n                    add_gt_as_proposals=True),\n                mask_size=28,\n                pos_weight=-1,\n                debug=False)\n        ]),\n    test_cfg=dict(\n        rpn=dict(\n            nms_across_levels=False,\n            nms_pre=1000,\n            nms_post=1000,\n            max_num=1000,\n            nms_thr=0.7,\n            min_bbox_size=0),\n        rcnn=dict(\n            score_thr=0.05,\n            nms=dict(type='nms', iou_threshold=0.5),\n            max_per_img=100,\n            mask_thr_binary=0.5)))\n"
  },
  {
    "path": "configs/_base_/models/centerpoint_01voxel_second_secfpn_nus.py",
    "content": "voxel_size = [0.1, 0.1, 0.2]\nmodel = dict(\n    type='CenterPoint',\n    pts_voxel_layer=dict(\n        max_num_points=10, voxel_size=voxel_size, max_voxels=(90000, 120000)),\n    pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=5),\n    pts_middle_encoder=dict(\n        type='SparseEncoder',\n        in_channels=5,\n        sparse_shape=[41, 1024, 1024],\n        output_channels=128,\n        order=('conv', 'norm', 'act'),\n        encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128,\n                                                                      128)),\n        encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),\n        block_type='basicblock'),\n    pts_backbone=dict(\n        type='SECOND',\n        in_channels=256,\n        out_channels=[128, 256],\n        layer_nums=[5, 5],\n        layer_strides=[1, 2],\n        norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),\n        conv_cfg=dict(type='Conv2d', bias=False)),\n    pts_neck=dict(\n        type='SECONDFPN',\n        in_channels=[128, 256],\n        out_channels=[256, 256],\n        upsample_strides=[1, 2],\n        norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),\n        upsample_cfg=dict(type='deconv', bias=False),\n        use_conv_for_no_stride=True),\n    pts_bbox_head=dict(\n        type='CenterHead',\n        in_channels=sum([256, 256]),\n        tasks=[\n            dict(num_class=1, class_names=['car']),\n            dict(num_class=2, class_names=['truck', 'construction_vehicle']),\n            dict(num_class=2, class_names=['bus', 'trailer']),\n            dict(num_class=1, class_names=['barrier']),\n            dict(num_class=2, class_names=['motorcycle', 'bicycle']),\n            dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),\n        ],\n        common_heads=dict(\n            reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),\n        share_conv_channel=64,\n        bbox_coder=dict(\n            type='CenterPointBBoxCoder',\n            post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],\n            max_num=500,\n            score_threshold=0.1,\n            out_size_factor=8,\n            voxel_size=voxel_size[:2],\n            code_size=9),\n        separate_head=dict(\n            type='SeparateHead', init_bias=-2.19, final_kernel=3),\n        loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),\n        loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),\n        norm_bbox=True),\n    # model training and testing settings\n    train_cfg=dict(\n        pts=dict(\n            grid_size=[1024, 1024, 40],\n            voxel_size=voxel_size,\n            out_size_factor=8,\n            dense_reg=1,\n            gaussian_overlap=0.1,\n            max_objs=500,\n            min_radius=2,\n            code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),\n    test_cfg=dict(\n        pts=dict(\n            post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],\n            max_per_img=500,\n            max_pool_nms=False,\n            min_radius=[4, 12, 10, 1, 0.85, 0.175],\n            score_threshold=0.1,\n            out_size_factor=8,\n            voxel_size=voxel_size[:2],\n            nms_type='rotate',\n            pre_max_size=1000,\n            post_max_size=83,\n            nms_thr=0.2)))\n"
  },
  {
    "path": "configs/_base_/models/centerpoint_02pillar_second_secfpn_nus.py",
    "content": "voxel_size = [0.2, 0.2, 8]\nmodel = dict(\n    type='CenterPoint',\n    pts_voxel_layer=dict(\n        max_num_points=20, voxel_size=voxel_size, max_voxels=(30000, 40000)),\n    pts_voxel_encoder=dict(\n        type='PillarFeatureNet',\n        in_channels=5,\n        feat_channels=[64],\n        with_distance=False,\n        voxel_size=(0.2, 0.2, 8),\n        norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),\n        legacy=False),\n    pts_middle_encoder=dict(\n        type='PointPillarsScatter', in_channels=64, output_shape=(512, 512)),\n    pts_backbone=dict(\n        type='SECOND',\n        in_channels=64,\n        out_channels=[64, 128, 256],\n        layer_nums=[3, 5, 5],\n        layer_strides=[2, 2, 2],\n        norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),\n        conv_cfg=dict(type='Conv2d', bias=False)),\n    pts_neck=dict(\n        type='SECONDFPN',\n        in_channels=[64, 128, 256],\n        out_channels=[128, 128, 128],\n        upsample_strides=[0.5, 1, 2],\n        norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),\n        upsample_cfg=dict(type='deconv', bias=False),\n        use_conv_for_no_stride=True),\n    pts_bbox_head=dict(\n        type='CenterHead',\n        in_channels=sum([128, 128, 128]),\n        tasks=[\n            dict(num_class=1, class_names=['car']),\n            dict(num_class=2, class_names=['truck', 'construction_vehicle']),\n            dict(num_class=2, class_names=['bus', 'trailer']),\n            dict(num_class=1, class_names=['barrier']),\n            dict(num_class=2, class_names=['motorcycle', 'bicycle']),\n            dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),\n        ],\n        common_heads=dict(\n            reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),\n        share_conv_channel=64,\n        bbox_coder=dict(\n            type='CenterPointBBoxCoder',\n            post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],\n            max_num=500,\n            score_threshold=0.1,\n            out_size_factor=4,\n            voxel_size=voxel_size[:2],\n            code_size=9),\n        separate_head=dict(\n            type='SeparateHead', init_bias=-2.19, final_kernel=3),\n        loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),\n        loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),\n        norm_bbox=True),\n    # model training and testing settings\n    train_cfg=dict(\n        pts=dict(\n            grid_size=[512, 512, 1],\n            voxel_size=voxel_size,\n            out_size_factor=4,\n            dense_reg=1,\n            gaussian_overlap=0.1,\n            max_objs=500,\n            min_radius=2,\n            code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),\n    test_cfg=dict(\n        pts=dict(\n            post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],\n            max_per_img=500,\n            max_pool_nms=False,\n            min_radius=[4, 12, 10, 1, 0.85, 0.175],\n            score_threshold=0.1,\n            pc_range=[-51.2, -51.2],\n            out_size_factor=4,\n            voxel_size=voxel_size[:2],\n            nms_type='rotate',\n            pre_max_size=1000,\n            post_max_size=83,\n            nms_thr=0.2)))\n"
  },
  {
    "path": "configs/_base_/models/h3dnet.py",
    "content": "primitive_z_cfg = dict(\n    type='PrimitiveHead',\n    num_dims=2,\n    num_classes=18,\n    primitive_mode='z',\n    upper_thresh=100.0,\n    surface_thresh=0.5,\n    vote_module_cfg=dict(\n        in_channels=256,\n        vote_per_seed=1,\n        gt_per_seed=1,\n        conv_channels=(256, 256),\n        conv_cfg=dict(type='Conv1d'),\n        norm_cfg=dict(type='BN1d'),\n        norm_feats=True,\n        vote_loss=dict(\n            type='ChamferDistance',\n            mode='l1',\n            reduction='none',\n            loss_dst_weight=10.0)),\n    vote_aggregation_cfg=dict(\n        type='PointSAModule',\n        num_point=1024,\n        radius=0.3,\n        num_sample=16,\n        mlp_channels=[256, 128, 128, 128],\n        use_xyz=True,\n        normalize_xyz=True),\n    feat_channels=(128, 128),\n    conv_cfg=dict(type='Conv1d'),\n    norm_cfg=dict(type='BN1d'),\n    objectness_loss=dict(\n        type='CrossEntropyLoss',\n        class_weight=[0.4, 0.6],\n        reduction='mean',\n        loss_weight=30.0),\n    center_loss=dict(\n        type='ChamferDistance',\n        mode='l1',\n        reduction='sum',\n        loss_src_weight=0.5,\n        loss_dst_weight=0.5),\n    semantic_reg_loss=dict(\n        type='ChamferDistance',\n        mode='l1',\n        reduction='sum',\n        loss_src_weight=0.5,\n        loss_dst_weight=0.5),\n    semantic_cls_loss=dict(\n        type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),\n    train_cfg=dict(\n        dist_thresh=0.2,\n        var_thresh=1e-2,\n        lower_thresh=1e-6,\n        num_point=100,\n        num_point_line=10,\n        line_thresh=0.2))\n\nprimitive_xy_cfg = dict(\n    type='PrimitiveHead',\n    num_dims=1,\n    num_classes=18,\n    primitive_mode='xy',\n    upper_thresh=100.0,\n    surface_thresh=0.5,\n    vote_module_cfg=dict(\n        in_channels=256,\n        vote_per_seed=1,\n        gt_per_seed=1,\n        conv_channels=(256, 256),\n        conv_cfg=dict(type='Conv1d'),\n        norm_cfg=dict(type='BN1d'),\n        norm_feats=True,\n        vote_loss=dict(\n            type='ChamferDistance',\n            mode='l1',\n            reduction='none',\n            loss_dst_weight=10.0)),\n    vote_aggregation_cfg=dict(\n        type='PointSAModule',\n        num_point=1024,\n        radius=0.3,\n        num_sample=16,\n        mlp_channels=[256, 128, 128, 128],\n        use_xyz=True,\n        normalize_xyz=True),\n    feat_channels=(128, 128),\n    conv_cfg=dict(type='Conv1d'),\n    norm_cfg=dict(type='BN1d'),\n    objectness_loss=dict(\n        type='CrossEntropyLoss',\n        class_weight=[0.4, 0.6],\n        reduction='mean',\n        loss_weight=30.0),\n    center_loss=dict(\n        type='ChamferDistance',\n        mode='l1',\n        reduction='sum',\n        loss_src_weight=0.5,\n        loss_dst_weight=0.5),\n    semantic_reg_loss=dict(\n        type='ChamferDistance',\n        mode='l1',\n        reduction='sum',\n        loss_src_weight=0.5,\n        loss_dst_weight=0.5),\n    semantic_cls_loss=dict(\n        type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),\n    train_cfg=dict(\n        dist_thresh=0.2,\n        var_thresh=1e-2,\n        lower_thresh=1e-6,\n        num_point=100,\n        num_point_line=10,\n        line_thresh=0.2))\n\nprimitive_line_cfg = dict(\n    type='PrimitiveHead',\n    num_dims=0,\n    num_classes=18,\n    primitive_mode='line',\n    upper_thresh=100.0,\n    surface_thresh=0.5,\n    vote_module_cfg=dict(\n        in_channels=256,\n        vote_per_seed=1,\n        gt_per_seed=1,\n        conv_channels=(256, 256),\n        conv_cfg=dict(type='Conv1d'),\n        norm_cfg=dict(type='BN1d'),\n        norm_feats=True,\n        vote_loss=dict(\n            type='ChamferDistance',\n            mode='l1',\n            reduction='none',\n            loss_dst_weight=10.0)),\n    vote_aggregation_cfg=dict(\n        type='PointSAModule',\n        num_point=1024,\n        radius=0.3,\n        num_sample=16,\n        mlp_channels=[256, 128, 128, 128],\n        use_xyz=True,\n        normalize_xyz=True),\n    feat_channels=(128, 128),\n    conv_cfg=dict(type='Conv1d'),\n    norm_cfg=dict(type='BN1d'),\n    objectness_loss=dict(\n        type='CrossEntropyLoss',\n        class_weight=[0.4, 0.6],\n        reduction='mean',\n        loss_weight=30.0),\n    center_loss=dict(\n        type='ChamferDistance',\n        mode='l1',\n        reduction='sum',\n        loss_src_weight=1.0,\n        loss_dst_weight=1.0),\n    semantic_reg_loss=dict(\n        type='ChamferDistance',\n        mode='l1',\n        reduction='sum',\n        loss_src_weight=1.0,\n        loss_dst_weight=1.0),\n    semantic_cls_loss=dict(\n        type='CrossEntropyLoss', reduction='sum', loss_weight=2.0),\n    train_cfg=dict(\n        dist_thresh=0.2,\n        var_thresh=1e-2,\n        lower_thresh=1e-6,\n        num_point=100,\n        num_point_line=10,\n        line_thresh=0.2))\n\nmodel = dict(\n    type='H3DNet',\n    backbone=dict(\n        type='MultiBackbone',\n        num_streams=4,\n        suffixes=['net0', 'net1', 'net2', 'net3'],\n        conv_cfg=dict(type='Conv1d'),\n        norm_cfg=dict(type='BN1d', eps=1e-5, momentum=0.01),\n        act_cfg=dict(type='ReLU'),\n        backbones=dict(\n            type='PointNet2SASSG',\n            in_channels=4,\n            num_points=(2048, 1024, 512, 256),\n            radius=(0.2, 0.4, 0.8, 1.2),\n            num_samples=(64, 32, 16, 16),\n            sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),\n                         (128, 128, 256)),\n            fp_channels=((256, 256), (256, 256)),\n            norm_cfg=dict(type='BN2d'),\n            sa_cfg=dict(\n                type='PointSAModule',\n                pool_mod='max',\n                use_xyz=True,\n                normalize_xyz=True))),\n    rpn_head=dict(\n        type='VoteHead',\n        vote_module_cfg=dict(\n            in_channels=256,\n            vote_per_seed=1,\n            gt_per_seed=3,\n            conv_channels=(256, 256),\n            conv_cfg=dict(type='Conv1d'),\n            norm_cfg=dict(type='BN1d'),\n            norm_feats=True,\n            vote_loss=dict(\n                type='ChamferDistance',\n                mode='l1',\n                reduction='none',\n                loss_dst_weight=10.0)),\n        vote_aggregation_cfg=dict(\n            type='PointSAModule',\n            num_point=256,\n            radius=0.3,\n            num_sample=16,\n            mlp_channels=[256, 128, 128, 128],\n            use_xyz=True,\n            normalize_xyz=True),\n        pred_layer_cfg=dict(\n            in_channels=128, shared_conv_channels=(128, 128), bias=True),\n        conv_cfg=dict(type='Conv1d'),\n        norm_cfg=dict(type='BN1d'),\n        objectness_loss=dict(\n            type='CrossEntropyLoss',\n            class_weight=[0.2, 0.8],\n            reduction='sum',\n            loss_weight=5.0),\n        center_loss=dict(\n            type='ChamferDistance',\n            mode='l2',\n            reduction='sum',\n            loss_src_weight=10.0,\n            loss_dst_weight=10.0),\n        dir_class_loss=dict(\n            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),\n        dir_res_loss=dict(\n            type='SmoothL1Loss', reduction='sum', loss_weight=10.0),\n        size_class_loss=dict(\n            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),\n        size_res_loss=dict(\n            type='SmoothL1Loss', reduction='sum', loss_weight=10.0),\n        semantic_loss=dict(\n            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),\n    roi_head=dict(\n        type='H3DRoIHead',\n        primitive_list=[primitive_z_cfg, primitive_xy_cfg, primitive_line_cfg],\n        bbox_head=dict(\n            type='H3DBboxHead',\n            gt_per_seed=3,\n            num_proposal=256,\n            suface_matching_cfg=dict(\n                type='PointSAModule',\n                num_point=256 * 6,\n                radius=0.5,\n                num_sample=32,\n                mlp_channels=[128 + 6, 128, 64, 32],\n                use_xyz=True,\n                normalize_xyz=True),\n            line_matching_cfg=dict(\n                type='PointSAModule',\n                num_point=256 * 12,\n                radius=0.5,\n                num_sample=32,\n                mlp_channels=[128 + 12, 128, 64, 32],\n                use_xyz=True,\n                normalize_xyz=True),\n            feat_channels=(128, 128),\n            primitive_refine_channels=[128, 128, 128],\n            upper_thresh=100.0,\n            surface_thresh=0.5,\n            line_thresh=0.5,\n            conv_cfg=dict(type='Conv1d'),\n            norm_cfg=dict(type='BN1d'),\n            objectness_loss=dict(\n                type='CrossEntropyLoss',\n                class_weight=[0.2, 0.8],\n                reduction='sum',\n                loss_weight=5.0),\n            center_loss=dict(\n                type='ChamferDistance',\n                mode='l2',\n                reduction='sum',\n                loss_src_weight=10.0,\n                loss_dst_weight=10.0),\n            dir_class_loss=dict(\n                type='CrossEntropyLoss', reduction='sum', loss_weight=0.1),\n            dir_res_loss=dict(\n                type='SmoothL1Loss', reduction='sum', loss_weight=10.0),\n            size_class_loss=dict(\n                type='CrossEntropyLoss', reduction='sum', loss_weight=0.1),\n            size_res_loss=dict(\n                type='SmoothL1Loss', reduction='sum', loss_weight=10.0),\n            semantic_loss=dict(\n                type='CrossEntropyLoss', reduction='sum', loss_weight=0.1),\n            cues_objectness_loss=dict(\n                type='CrossEntropyLoss',\n                class_weight=[0.3, 0.7],\n                reduction='mean',\n                loss_weight=5.0),\n            cues_semantic_loss=dict(\n                type='CrossEntropyLoss',\n                class_weight=[0.3, 0.7],\n                reduction='mean',\n                loss_weight=5.0),\n            proposal_objectness_loss=dict(\n                type='CrossEntropyLoss',\n                class_weight=[0.2, 0.8],\n                reduction='none',\n                loss_weight=5.0),\n            primitive_center_loss=dict(\n                type='MSELoss', reduction='none', loss_weight=1.0))),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn=dict(\n            pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'),\n        rpn_proposal=dict(use_nms=False),\n        rcnn=dict(\n            pos_distance_thr=0.3,\n            neg_distance_thr=0.6,\n            sample_mod='vote',\n            far_threshold=0.6,\n            near_threshold=0.3,\n            mask_surface_threshold=0.3,\n            label_surface_threshold=0.3,\n            mask_line_threshold=0.3,\n            label_line_threshold=0.3)),\n    test_cfg=dict(\n        rpn=dict(\n            sample_mod='seed',\n            nms_thr=0.25,\n            score_thr=0.05,\n            per_class_proposal=True,\n            use_nms=False),\n        rcnn=dict(\n            sample_mod='seed',\n            nms_thr=0.25,\n            score_thr=0.05,\n            per_class_proposal=True)))\n"
  },
  {
    "path": "configs/_base_/models/hv_pointpillars_fpn_lyft.py",
    "content": "_base_ = './hv_pointpillars_fpn_nus.py'\n\n# model settings (based on nuScenes model settings)\n# Voxel size for voxel encoder\n# Usually voxel size is changed consistently with the point cloud range\n# If point cloud range is modified, do remember to change all related\n# keys in the config.\nmodel = dict(\n    pts_voxel_layer=dict(\n        max_num_points=20,\n        point_cloud_range=[-80, -80, -5, 80, 80, 3],\n        max_voxels=(60000, 60000)),\n    pts_voxel_encoder=dict(\n        feat_channels=[64], point_cloud_range=[-80, -80, -5, 80, 80, 3]),\n    pts_middle_encoder=dict(output_shape=[640, 640]),\n    pts_bbox_head=dict(\n        num_classes=9,\n        anchor_generator=dict(\n            ranges=[[-80, -80, -1.8, 80, 80, -1.8]], custom_values=[]),\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)),\n    # model training settings (based on nuScenes model settings)\n    train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])))\n"
  },
  {
    "path": "configs/_base_/models/hv_pointpillars_fpn_nus.py",
    "content": "# model settings\n# Voxel size for voxel encoder\n# Usually voxel size is changed consistently with the point cloud range\n# If point cloud range is modified, do remember to change all related\n# keys in the config.\nvoxel_size = [0.25, 0.25, 8]\nmodel = dict(\n    type='MVXFasterRCNN',\n    pts_voxel_layer=dict(\n        max_num_points=64,\n        point_cloud_range=[-50, -50, -5, 50, 50, 3],\n        voxel_size=voxel_size,\n        max_voxels=(30000, 40000)),\n    pts_voxel_encoder=dict(\n        type='HardVFE',\n        in_channels=4,\n        feat_channels=[64, 64],\n        with_distance=False,\n        voxel_size=voxel_size,\n        with_cluster_center=True,\n        with_voxel_center=True,\n        point_cloud_range=[-50, -50, -5, 50, 50, 3],\n        norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),\n    pts_middle_encoder=dict(\n        type='PointPillarsScatter', in_channels=64, output_shape=[400, 400]),\n    pts_backbone=dict(\n        type='SECOND',\n        in_channels=64,\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        layer_nums=[3, 5, 5],\n        layer_strides=[2, 2, 2],\n        out_channels=[64, 128, 256]),\n    pts_neck=dict(\n        type='FPN',\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        act_cfg=dict(type='ReLU'),\n        in_channels=[64, 128, 256],\n        out_channels=256,\n        start_level=0,\n        num_outs=3),\n    pts_bbox_head=dict(\n        type='Anchor3DHead',\n        num_classes=10,\n        in_channels=256,\n        feat_channels=256,\n        use_direction_classifier=True,\n        anchor_generator=dict(\n            type='AlignedAnchor3DRangeGenerator',\n            ranges=[[-50, -50, -1.8, 50, 50, -1.8]],\n            scales=[1, 2, 4],\n            sizes=[\n                [0.8660, 2.5981, 1.],  # 1.5/sqrt(3)\n                [0.5774, 1.7321, 1.],  # 1/sqrt(3)\n                [1., 1., 1.],\n                [0.4, 0.4, 1],\n            ],\n            custom_values=[0, 0],\n            rotations=[0, 1.57],\n            reshape_out=True),\n        assigner_per_size=False,\n        diff_rad_by_sin=True,\n        dir_offset=0.7854,  # pi/4\n        dir_limit_offset=0,\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),\n        loss_dir=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),\n    # model training and testing settings\n    train_cfg=dict(\n        pts=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                ignore_iof_thr=-1),\n            allowed_border=0,\n            code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],\n            pos_weight=-1,\n            debug=False)),\n    test_cfg=dict(\n        pts=dict(\n            use_rotate_nms=True,\n            nms_across_levels=False,\n            nms_pre=1000,\n            nms_thr=0.2,\n            score_thr=0.05,\n            min_bbox_size=0,\n            max_num=500)))\n"
  },
  {
    "path": "configs/_base_/models/hv_pointpillars_fpn_range100_lyft.py",
    "content": "_base_ = './hv_pointpillars_fpn_nus.py'\n\n# model settings (based on nuScenes model settings)\n# Voxel size for voxel encoder\n# Usually voxel size is changed consistently with the point cloud range\n# If point cloud range is modified, do remember to change all related\n# keys in the config.\nmodel = dict(\n    pts_voxel_layer=dict(\n        max_num_points=20,\n        point_cloud_range=[-100, -100, -5, 100, 100, 3],\n        max_voxels=(60000, 60000)),\n    pts_voxel_encoder=dict(\n        feat_channels=[64], point_cloud_range=[-100, -100, -5, 100, 100, 3]),\n    pts_middle_encoder=dict(output_shape=[800, 800]),\n    pts_bbox_head=dict(\n        num_classes=9,\n        anchor_generator=dict(\n            ranges=[[-100, -100, -1.8, 100, 100, -1.8]], custom_values=[]),\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)),\n    # model training settings (based on nuScenes model settings)\n    train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])))\n"
  },
  {
    "path": "configs/_base_/models/hv_pointpillars_secfpn_kitti.py",
    "content": "voxel_size = [0.16, 0.16, 4]\nmodel = dict(\n    type='VoxelNet',\n    voxel_layer=dict(\n        max_num_points=32,\n        point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1],\n        voxel_size=voxel_size,\n        max_voxels=(16000, 40000)),\n    voxel_encoder=dict(\n        type='PillarFeatureNet',\n        in_channels=4,\n        feat_channels=[64],\n        with_distance=False,\n        voxel_size=voxel_size,\n        point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1]),\n    middle_encoder=dict(\n        type='PointPillarsScatter', in_channels=64, output_shape=[496, 432]),\n    backbone=dict(\n        type='SECOND',\n        in_channels=64,\n        layer_nums=[3, 5, 5],\n        layer_strides=[2, 2, 2],\n        out_channels=[64, 128, 256]),\n    neck=dict(\n        type='SECONDFPN',\n        in_channels=[64, 128, 256],\n        upsample_strides=[1, 2, 4],\n        out_channels=[128, 128, 128]),\n    bbox_head=dict(\n        type='Anchor3DHead',\n        num_classes=3,\n        in_channels=384,\n        feat_channels=384,\n        use_direction_classifier=True,\n        anchor_generator=dict(\n            type='Anchor3DRangeGenerator',\n            ranges=[\n                [0, -39.68, -0.6, 70.4, 39.68, -0.6],\n                [0, -39.68, -0.6, 70.4, 39.68, -0.6],\n                [0, -39.68, -1.78, 70.4, 39.68, -1.78],\n            ],\n            sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],\n            rotations=[0, 1.57],\n            reshape_out=False),\n        diff_rad_by_sin=True,\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),\n        loss_dir=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),\n    # model training and testing settings\n    train_cfg=dict(\n        assigner=[\n            dict(  # for Pedestrian\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.35,\n                min_pos_iou=0.35,\n                ignore_iof_thr=-1),\n            dict(  # for Cyclist\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.35,\n                min_pos_iou=0.35,\n                ignore_iof_thr=-1),\n            dict(  # for Car\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.45,\n                min_pos_iou=0.45,\n                ignore_iof_thr=-1),\n        ],\n        allowed_border=0,\n        pos_weight=-1,\n        debug=False),\n    test_cfg=dict(\n        use_rotate_nms=True,\n        nms_across_levels=False,\n        nms_thr=0.01,\n        score_thr=0.1,\n        min_bbox_size=0,\n        nms_pre=100,\n        max_num=50))\n"
  },
  {
    "path": "configs/_base_/models/hv_pointpillars_secfpn_waymo.py",
    "content": "# model settings\n# Voxel size for voxel encoder\n# Usually voxel size is changed consistently with the point cloud range\n# If point cloud range is modified, do remember to change all related\n# keys in the config.\nvoxel_size = [0.32, 0.32, 6]\nmodel = dict(\n    type='MVXFasterRCNN',\n    pts_voxel_layer=dict(\n        max_num_points=20,\n        point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4],\n        voxel_size=voxel_size,\n        max_voxels=(32000, 32000)),\n    pts_voxel_encoder=dict(\n        type='HardVFE',\n        in_channels=5,\n        feat_channels=[64],\n        with_distance=False,\n        voxel_size=voxel_size,\n        with_cluster_center=True,\n        with_voxel_center=True,\n        point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4],\n        norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),\n    pts_middle_encoder=dict(\n        type='PointPillarsScatter', in_channels=64, output_shape=[468, 468]),\n    pts_backbone=dict(\n        type='SECOND',\n        in_channels=64,\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        layer_nums=[3, 5, 5],\n        layer_strides=[1, 2, 2],\n        out_channels=[64, 128, 256]),\n    pts_neck=dict(\n        type='SECONDFPN',\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        in_channels=[64, 128, 256],\n        upsample_strides=[1, 2, 4],\n        out_channels=[128, 128, 128]),\n    pts_bbox_head=dict(\n        type='Anchor3DHead',\n        num_classes=3,\n        in_channels=384,\n        feat_channels=384,\n        use_direction_classifier=True,\n        anchor_generator=dict(\n            type='AlignedAnchor3DRangeGenerator',\n            ranges=[[-74.88, -74.88, -0.0345, 74.88, 74.88, -0.0345],\n                    [-74.88, -74.88, -0.1188, 74.88, 74.88, -0.1188],\n                    [-74.88, -74.88, 0, 74.88, 74.88, 0]],\n            sizes=[\n                [2.08, 4.73, 1.77],  # car\n                [0.84, 1.81, 1.77],  # cyclist\n                [0.84, 0.91, 1.74]  # pedestrian\n            ],\n            rotations=[0, 1.57],\n            reshape_out=False),\n        diff_rad_by_sin=True,\n        dir_offset=0.7854,  # pi/4\n        dir_limit_offset=0,\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),\n        loss_dir=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),\n    # model training and testing settings\n    train_cfg=dict(\n        pts=dict(\n            assigner=[\n                dict(  # car\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.55,\n                    neg_iou_thr=0.4,\n                    min_pos_iou=0.4,\n                    ignore_iof_thr=-1),\n                dict(  # cyclist\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.5,\n                    neg_iou_thr=0.3,\n                    min_pos_iou=0.3,\n                    ignore_iof_thr=-1),\n                dict(  # pedestrian\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.5,\n                    neg_iou_thr=0.3,\n                    min_pos_iou=0.3,\n                    ignore_iof_thr=-1),\n            ],\n            allowed_border=0,\n            code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],\n            pos_weight=-1,\n            debug=False)),\n    test_cfg=dict(\n        pts=dict(\n            use_rotate_nms=True,\n            nms_across_levels=False,\n            nms_pre=4096,\n            nms_thr=0.25,\n            score_thr=0.1,\n            min_bbox_size=0,\n            max_num=500)))\n"
  },
  {
    "path": "configs/_base_/models/hv_second_secfpn_kitti.py",
    "content": "model = dict(\n    type='VoxelNet',\n    voxel_layer=dict(\n        max_num_points=5,\n        point_cloud_range=[0, -40, -3, 70.4, 40, 1],\n        voxel_size=[0.05, 0.05, 0.1],\n        max_voxels=(16000, 40000)),\n    voxel_encoder=dict(type='HardSimpleVFE'),\n    middle_encoder=dict(\n        type='SparseEncoder',\n        in_channels=4,\n        sparse_shape=[41, 1600, 1408],\n        order=('conv', 'norm', 'act')),\n    backbone=dict(\n        type='SECOND',\n        in_channels=256,\n        layer_nums=[5, 5],\n        layer_strides=[1, 2],\n        out_channels=[128, 256]),\n    neck=dict(\n        type='SECONDFPN',\n        in_channels=[128, 256],\n        upsample_strides=[1, 2],\n        out_channels=[256, 256]),\n    bbox_head=dict(\n        type='Anchor3DHead',\n        num_classes=3,\n        in_channels=512,\n        feat_channels=512,\n        use_direction_classifier=True,\n        anchor_generator=dict(\n            type='Anchor3DRangeGenerator',\n            ranges=[\n                [0, -40.0, -0.6, 70.4, 40.0, -0.6],\n                [0, -40.0, -0.6, 70.4, 40.0, -0.6],\n                [0, -40.0, -1.78, 70.4, 40.0, -1.78],\n            ],\n            sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],\n            rotations=[0, 1.57],\n            reshape_out=False),\n        diff_rad_by_sin=True,\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),\n        loss_dir=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),\n    # model training and testing settings\n    train_cfg=dict(\n        assigner=[\n            dict(  # for Pedestrian\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.35,\n                neg_iou_thr=0.2,\n                min_pos_iou=0.2,\n                ignore_iof_thr=-1),\n            dict(  # for Cyclist\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.35,\n                neg_iou_thr=0.2,\n                min_pos_iou=0.2,\n                ignore_iof_thr=-1),\n            dict(  # for Car\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.45,\n                min_pos_iou=0.45,\n                ignore_iof_thr=-1),\n        ],\n        allowed_border=0,\n        pos_weight=-1,\n        debug=False),\n    test_cfg=dict(\n        use_rotate_nms=True,\n        nms_across_levels=False,\n        nms_thr=0.01,\n        score_thr=0.1,\n        min_bbox_size=0,\n        nms_pre=100,\n        max_num=50))\n"
  },
  {
    "path": "configs/_base_/models/hv_second_secfpn_waymo.py",
    "content": "# model settings\n# Voxel size for voxel encoder\n# Usually voxel size is changed consistently with the point cloud range\n# If point cloud range is modified, do remember to change all related\n# keys in the config.\nvoxel_size = [0.08, 0.08, 0.1]\nmodel = dict(\n    type='VoxelNet',\n    voxel_layer=dict(\n        max_num_points=10,\n        point_cloud_range=[-76.8, -51.2, -2, 76.8, 51.2, 4],\n        voxel_size=voxel_size,\n        max_voxels=(80000, 90000)),\n    voxel_encoder=dict(type='HardSimpleVFE', num_features=5),\n    middle_encoder=dict(\n        type='SparseEncoder',\n        in_channels=5,\n        sparse_shape=[61, 1280, 1920],\n        order=('conv', 'norm', 'act')),\n    backbone=dict(\n        type='SECOND',\n        in_channels=384,\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        layer_nums=[5, 5],\n        layer_strides=[1, 2],\n        out_channels=[128, 256]),\n    neck=dict(\n        type='SECONDFPN',\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        in_channels=[128, 256],\n        upsample_strides=[1, 2],\n        out_channels=[256, 256]),\n    bbox_head=dict(\n        type='Anchor3DHead',\n        num_classes=3,\n        in_channels=512,\n        feat_channels=512,\n        use_direction_classifier=True,\n        anchor_generator=dict(\n            type='AlignedAnchor3DRangeGenerator',\n            ranges=[[-76.8, -51.2, -0.0345, 76.8, 51.2, -0.0345],\n                    [-76.8, -51.2, 0, 76.8, 51.2, 0],\n                    [-76.8, -51.2, -0.1188, 76.8, 51.2, -0.1188]],\n            sizes=[\n                [2.08, 4.73, 1.77],  # car\n                [0.84, 0.91, 1.74],  # pedestrian\n                [0.84, 1.81, 1.77]  # cyclist\n            ],\n            rotations=[0, 1.57],\n            reshape_out=False),\n        diff_rad_by_sin=True,\n        dir_offset=0.7854,  # pi/4\n        dir_limit_offset=0,\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),\n        loss_dir=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),\n    # model training and testing settings\n    train_cfg=dict(\n        assigner=[\n            dict(  # car\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.55,\n                neg_iou_thr=0.4,\n                min_pos_iou=0.4,\n                ignore_iof_thr=-1),\n            dict(  # pedestrian\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                ignore_iof_thr=-1),\n            dict(  # cyclist\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                ignore_iof_thr=-1)\n        ],\n        allowed_border=0,\n        code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],\n        pos_weight=-1,\n        debug=False),\n    test_cfg=dict(\n        use_rotate_nms=True,\n        nms_across_levels=False,\n        nms_pre=4096,\n        nms_thr=0.25,\n        score_thr=0.1,\n        min_bbox_size=0,\n        max_num=500))\n"
  },
  {
    "path": "configs/_base_/models/imvotenet_image.py",
    "content": "model = dict(\n    type='ImVoteNet',\n    img_backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=False),\n        norm_eval=True,\n        style='caffe'),\n    img_neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    img_rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            scales=[8],\n            ratios=[0.5, 1.0, 2.0],\n            strides=[4, 8, 16, 32, 64]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1.0, 1.0, 1.0, 1.0]),\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),\n    img_roi_head=dict(\n        type='StandardRoIHead',\n        bbox_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        bbox_head=dict(\n            type='Shared2FCBBoxHead',\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=10,\n            bbox_coder=dict(\n                type='DeltaXYWHBBoxCoder',\n                target_means=[0., 0., 0., 0.],\n                target_stds=[0.1, 0.1, 0.2, 0.2]),\n            reg_class_agnostic=False,\n            loss_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n            loss_bbox=dict(type='L1Loss', loss_weight=1.0))),\n\n    # model training and testing settings\n    train_cfg=dict(\n        img_rpn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                match_low_quality=True,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=256,\n                pos_fraction=0.5,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False),\n            allowed_border=-1,\n            pos_weight=-1,\n            debug=False),\n        img_rpn_proposal=dict(\n            nms_across_levels=False,\n            nms_pre=2000,\n            nms_post=1000,\n            max_num=1000,\n            nms_thr=0.7,\n            min_bbox_size=0),\n        img_rcnn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                match_low_quality=False,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            pos_weight=-1,\n            debug=False)),\n    test_cfg=dict(\n        img_rpn=dict(\n            nms_across_levels=False,\n            nms_pre=1000,\n            nms_post=1000,\n            max_num=1000,\n            nms_thr=0.7,\n            min_bbox_size=0),\n        img_rcnn=dict(\n            score_thr=0.05,\n            nms=dict(type='nms', iou_threshold=0.5),\n            max_per_img=100)))\n"
  },
  {
    "path": "configs/_base_/models/mask_rcnn_r50_fpn.py",
    "content": "# model settings\nmodel = dict(\n    type='MaskRCNN',\n    pretrained='torchvision://resnet50',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            scales=[8],\n            ratios=[0.5, 1.0, 2.0],\n            strides=[4, 8, 16, 32, 64]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1.0, 1.0, 1.0, 1.0]),\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),\n    roi_head=dict(\n        type='StandardRoIHead',\n        bbox_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        bbox_head=dict(\n            type='Shared2FCBBoxHead',\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=80,\n            bbox_coder=dict(\n                type='DeltaXYWHBBoxCoder',\n                target_means=[0., 0., 0., 0.],\n                target_stds=[0.1, 0.1, 0.2, 0.2]),\n            reg_class_agnostic=False,\n            loss_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n            loss_bbox=dict(type='L1Loss', loss_weight=1.0)),\n        mask_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        mask_head=dict(\n            type='FCNMaskHead',\n            num_convs=4,\n            in_channels=256,\n            conv_out_channels=256,\n            num_classes=80,\n            loss_mask=dict(\n                type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                match_low_quality=True,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=256,\n                pos_fraction=0.5,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False),\n            allowed_border=-1,\n            pos_weight=-1,\n            debug=False),\n        rpn_proposal=dict(\n            nms_across_levels=False,\n            nms_pre=2000,\n            nms_post=1000,\n            max_num=1000,\n            nms_thr=0.7,\n            min_bbox_size=0),\n        rcnn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                match_low_quality=True,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False)),\n    test_cfg=dict(\n        rpn=dict(\n            nms_across_levels=False,\n            nms_pre=1000,\n            nms_post=1000,\n            max_num=1000,\n            nms_thr=0.7,\n            min_bbox_size=0),\n        rcnn=dict(\n            score_thr=0.05,\n            nms=dict(type='nms', iou_threshold=0.5),\n            max_per_img=100,\n            mask_thr_binary=0.5)))\n"
  },
  {
    "path": "configs/_base_/models/votenet.py",
    "content": "model = dict(\n    type='VoteNet',\n    backbone=dict(\n        type='PointNet2SASSG',\n        in_channels=4,\n        num_points=(2048, 1024, 512, 256),\n        radius=(0.2, 0.4, 0.8, 1.2),\n        num_samples=(64, 32, 16, 16),\n        sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),\n                     (128, 128, 256)),\n        fp_channels=((256, 256), (256, 256)),\n        norm_cfg=dict(type='BN2d'),\n        sa_cfg=dict(\n            type='PointSAModule',\n            pool_mod='max',\n            use_xyz=True,\n            normalize_xyz=True)),\n    bbox_head=dict(\n        type='VoteHead',\n        vote_module_cfg=dict(\n            in_channels=256,\n            vote_per_seed=1,\n            gt_per_seed=3,\n            conv_channels=(256, 256),\n            conv_cfg=dict(type='Conv1d'),\n            norm_cfg=dict(type='BN1d'),\n            norm_feats=True,\n            vote_loss=dict(\n                type='ChamferDistance',\n                mode='l1',\n                reduction='none',\n                loss_dst_weight=10.0)),\n        vote_aggregation_cfg=dict(\n            type='PointSAModule',\n            num_point=256,\n            radius=0.3,\n            num_sample=16,\n            mlp_channels=[256, 128, 128, 128],\n            use_xyz=True,\n            normalize_xyz=True),\n        pred_layer_cfg=dict(\n            in_channels=128, shared_conv_channels=(128, 128), bias=True),\n        conv_cfg=dict(type='Conv1d'),\n        norm_cfg=dict(type='BN1d'),\n        objectness_loss=dict(\n            type='CrossEntropyLoss',\n            class_weight=[0.2, 0.8],\n            reduction='sum',\n            loss_weight=5.0),\n        center_loss=dict(\n            type='ChamferDistance',\n            mode='l2',\n            reduction='sum',\n            loss_src_weight=10.0,\n            loss_dst_weight=10.0),\n        dir_class_loss=dict(\n            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),\n        dir_res_loss=dict(\n            type='SmoothL1Loss', reduction='sum', loss_weight=10.0),\n        size_class_loss=dict(\n            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),\n        size_res_loss=dict(\n            type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0),\n        semantic_loss=dict(\n            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),\n    # model training and testing settings\n    train_cfg=dict(\n        pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'),\n    test_cfg=dict(\n        sample_mod='seed',\n        nms_thr=0.25,\n        score_thr=0.05,\n        per_class_proposal=True))\n"
  },
  {
    "path": "configs/_base_/schedules/cyclic_20e.py",
    "content": "# For nuScenes dataset, we usually evaluate the model at the end of training.\n# Since the models are trained by 24 epochs by default, we set evaluation\n# interval to be 20. Please change the interval accordingly if you do not\n# use a default schedule.\n# optimizer\n# This schedule is mainly used by models on nuScenes dataset\noptimizer = dict(type='AdamW', lr=1e-4, weight_decay=0.01)\n# max_norm=10 is better for SECOND\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\nlr_config = dict(\n    policy='cyclic',\n    target_ratio=(10, 1e-4),\n    cyclic_times=1,\n    step_ratio_up=0.4,\n)\nmomentum_config = dict(\n    policy='cyclic',\n    target_ratio=(0.85 / 0.95, 1),\n    cyclic_times=1,\n    step_ratio_up=0.4,\n)\n\n# runtime settings\ntotal_epochs = 20\n"
  },
  {
    "path": "configs/_base_/schedules/cyclic_40e.py",
    "content": "# The schedule is usually used by models trained on KITTI dataset\n\n# The learning rate set in the cyclic schedule is the initial learning rate\n# rather than the max learning rate. Since the target_ratio is (10, 1e-4),\n# the learning rate will change from 0.0018 to 0.018, than go to 0.0018*1e-4\nlr = 0.0018\n# The optimizer follows the setting in SECOND.Pytorch, but here we use\n# the offcial AdamW optimizer implemented by PyTorch.\noptimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)\noptimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))\n# We use cyclic learning rate and momentum schedule following SECOND.Pytorch\n# https://github.com/traveller59/second.pytorch/blob/3aba19c9688274f75ebb5e576f65cfe54773c021/torchplus/train/learning_schedules_fastai.py#L69  # noqa\n# We implement them in mmcv, for more details, please refer to\n# https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327  # noqa\n# https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130  # noqa\nlr_config = dict(\n    policy='cyclic',\n    target_ratio=(10, 1e-4),\n    cyclic_times=1,\n    step_ratio_up=0.4,\n)\nmomentum_config = dict(\n    policy='cyclic',\n    target_ratio=(0.85 / 0.95, 1),\n    cyclic_times=1,\n    step_ratio_up=0.4,\n)\n# Although the total_epochs is 40, this schedule is usually used we\n# RepeatDataset with repeat ratio N, thus the actual total epoch\n# number could be Nx40\ntotal_epochs = 40\n"
  },
  {
    "path": "configs/_base_/schedules/mmdet_schedule_1x.py",
    "content": "# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=None)\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=0.001,\n    step=[8, 11])\ntotal_epochs = 12\n"
  },
  {
    "path": "configs/_base_/schedules/schedule_2x.py",
    "content": "# optimizer\n# This schedule is mainly used by models on nuScenes dataset\noptimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)\n# max_norm=10 is better for SECOND\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=1000,\n    warmup_ratio=1.0 / 1000,\n    step=[20, 23])\nmomentum_config = None\n# runtime settings\ntotal_epochs = 24\n"
  },
  {
    "path": "configs/_base_/schedules/schedule_3x.py",
    "content": "# optimizer\n# This schedule is mainly used by models on indoor dataset,\n# e.g., VoteNet on SUNRGBD and ScanNet\nlr = 0.008  # max learning rate\noptimizer = dict(type='AdamW', lr=lr, weight_decay=0.01)\noptimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))\nlr_config = dict(policy='step', warmup=None, step=[24, 32])\n# runtime settings\ntotal_epochs = 36\n"
  },
  {
    "path": "configs/benchmark/hv_PartA2_secfpn_4x8_cyclic_80e_pcdet_kitti-3d-3class.py",
    "content": "# model settings\nvoxel_size = [0.05, 0.05, 0.1]\npoint_cloud_range = [0, -40, -3, 70.4, 40, 1]  # velodyne coordinates, x, y, z\n\nmodel = dict(\n    type='PartA2',\n    voxel_layer=dict(\n        max_num_points=5,  # max_points_per_voxel\n        point_cloud_range=point_cloud_range,\n        voxel_size=voxel_size,\n        max_voxels=(16000, 40000)  # (training, testing) max_coxels\n    ),\n    voxel_encoder=dict(type='HardSimpleVFE'),\n    middle_encoder=dict(\n        type='SparseUNet',\n        in_channels=4,\n        sparse_shape=[41, 1600, 1408],\n        order=('conv', 'norm', 'act')),\n    backbone=dict(\n        type='SECOND',\n        in_channels=256,\n        layer_nums=[5, 5],\n        layer_strides=[1, 2],\n        out_channels=[128, 256]),\n    neck=dict(\n        type='SECONDFPN',\n        in_channels=[128, 256],\n        upsample_strides=[1, 2],\n        out_channels=[256, 256]),\n    rpn_head=dict(\n        type='PartA2RPNHead',\n        num_classes=3,\n        in_channels=512,\n        feat_channels=512,\n        use_direction_classifier=True,\n        anchor_generator=dict(\n            type='Anchor3DRangeGenerator',\n            ranges=[[0, -40.0, -0.6, 70.4, 40.0, -0.6],\n                    [0, -40.0, -0.6, 70.4, 40.0, -0.6],\n                    [0, -40.0, -1.78, 70.4, 40.0, -1.78]],\n            sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],\n            rotations=[0, 1.57],\n            reshape_out=False),\n        diff_rad_by_sin=True,\n        assigner_per_size=True,\n        assign_per_class=True,\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),\n        loss_dir=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),\n    roi_head=dict(\n        type='PartAggregationROIHead',\n        num_classes=3,\n        semantic_head=dict(\n            type='PointwiseSemanticHead',\n            in_channels=16,\n            extra_width=0.2,\n            seg_score_thr=0.3,\n            num_classes=3,\n            loss_seg=dict(\n                type='FocalLoss',\n                use_sigmoid=True,\n                reduction='sum',\n                gamma=2.0,\n                alpha=0.25,\n                loss_weight=1.0),\n            loss_part=dict(\n                type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),\n        seg_roi_extractor=dict(\n            type='Single3DRoIAwareExtractor',\n            roi_layer=dict(\n                type='RoIAwarePool3d',\n                out_size=14,\n                max_pts_per_voxel=128,\n                mode='max')),\n        part_roi_extractor=dict(\n            type='Single3DRoIAwareExtractor',\n            roi_layer=dict(\n                type='RoIAwarePool3d',\n                out_size=14,\n                max_pts_per_voxel=128,\n                mode='avg')),\n        bbox_head=dict(\n            type='PartA2BboxHead',\n            num_classes=3,\n            seg_in_channels=16,\n            part_in_channels=4,\n            seg_conv_channels=[64, 64],\n            part_conv_channels=[64, 64],\n            merge_conv_channels=[128, 128],\n            down_conv_channels=[128, 256],\n            bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),\n            shared_fc_channels=[256, 512, 512, 512],\n            cls_channels=[256, 256],\n            reg_channels=[256, 256],\n            dropout_ratio=0.1,\n            roi_feat_size=14,\n            with_corner_loss=True,\n            loss_bbox=dict(\n                type='SmoothL1Loss',\n                beta=1.0 / 9.0,\n                reduction='sum',\n                loss_weight=1.0),\n            loss_cls=dict(\n                type='CrossEntropyLoss',\n                use_sigmoid=True,\n                reduction='sum',\n                loss_weight=1.0))),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn=dict(\n            assigner=[\n                dict(  # for Pedestrian\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.5,\n                    neg_iou_thr=0.35,\n                    min_pos_iou=0.35,\n                    ignore_iof_thr=-1),\n                dict(  # for Cyclist\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.5,\n                    neg_iou_thr=0.35,\n                    min_pos_iou=0.35,\n                    ignore_iof_thr=-1),\n                dict(  # for Car\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.6,\n                    neg_iou_thr=0.45,\n                    min_pos_iou=0.45,\n                    ignore_iof_thr=-1)\n            ],\n            allowed_border=0,\n            pos_weight=-1,\n            debug=False),\n        rpn_proposal=dict(\n            nms_pre=9000,\n            nms_post=512,\n            max_num=512,\n            nms_thr=0.8,\n            score_thr=0,\n            use_rotate_nms=False),\n        rcnn=dict(\n            assigner=[\n                dict(  # for Pedestrian\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(\n                        type='BboxOverlaps3D', coordinate='lidar'),\n                    pos_iou_thr=0.55,\n                    neg_iou_thr=0.55,\n                    min_pos_iou=0.55,\n                    ignore_iof_thr=-1),\n                dict(  # for Cyclist\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(\n                        type='BboxOverlaps3D', coordinate='lidar'),\n                    pos_iou_thr=0.55,\n                    neg_iou_thr=0.55,\n                    min_pos_iou=0.55,\n                    ignore_iof_thr=-1),\n                dict(  # for Car\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(\n                        type='BboxOverlaps3D', coordinate='lidar'),\n                    pos_iou_thr=0.55,\n                    neg_iou_thr=0.55,\n                    min_pos_iou=0.55,\n                    ignore_iof_thr=-1)\n            ],\n            sampler=dict(\n                type='IoUNegPiecewiseSampler',\n                num=128,\n                pos_fraction=0.55,\n                neg_piece_fractions=[0.8, 0.2],\n                neg_iou_piece_thrs=[0.55, 0.1],\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False,\n                return_iou=True),\n            cls_pos_thr=0.75,\n            cls_neg_thr=0.25)),\n    test_cfg=dict(\n        rpn=dict(\n            nms_pre=1024,\n            nms_post=100,\n            max_num=100,\n            nms_thr=0.7,\n            score_thr=0,\n            use_rotate_nms=True),\n        rcnn=dict(\n            use_rotate_nms=True,\n            use_raw_score=True,\n            nms_thr=0.01,\n            score_thr=0.3)))\n\n# dataset settings\ndataset_type = 'KittiDataset'\ndata_root = 'data/kitti/'\nclass_names = ['Pedestrian', 'Cyclist', 'Car']\ninput_modality = dict(use_lidar=True, use_camera=False)\ndb_sampler = dict(\n    data_root=data_root,\n    info_path=data_root + 'kitti_dbinfos_train.pkl',\n    rate=1.0,\n    prepare=dict(\n        filter_by_difficulty=[-1],\n        filter_by_min_points=dict(Car=5, Pedestrian=5, Cyclist=5)),\n    classes=class_names,\n    sample_groups=dict(Car=20, Pedestrian=15, Cyclist=15))\ntrain_pipeline = [\n    dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    dict(type='ObjectSample', db_sampler=db_sampler),\n    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.78539816, 0.78539816],\n        scale_ratio_range=[0.95, 1.05]),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectNameFilter', classes=class_names),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n\ndata = dict(\n    samples_per_gpu=4,\n    workers_per_gpu=4,\n    train=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'kitti_infos_train.pkl',\n        split='training',\n        pts_prefix='velodyne_reduced',\n        pipeline=train_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=False),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'kitti_infos_val.pkl',\n        split='training',\n        pts_prefix='velodyne_reduced',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'kitti_infos_val.pkl',\n        split='training',\n        pts_prefix='velodyne_reduced',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True))\n# optimizer\nlr = 0.001  # max learning rate\noptimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)\noptimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))\nlr_config = dict(\n    policy='cyclic',\n    target_ratio=(10, 1e-4),\n    cyclic_times=1,\n    step_ratio_up=0.4)\nmomentum_config = dict(\n    policy='cyclic',\n    target_ratio=(0.85 / 0.95, 1),\n    cyclic_times=1,\n    step_ratio_up=0.4)\ncheckpoint_config = dict(interval=1)\nevaluation = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 80\ndist_params = dict(backend='nccl', port=29506)\nlog_level = 'INFO'\nfind_unused_parameters = True\nwork_dir = './work_dirs/parta2_secfpn_80e'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "configs/benchmark/hv_pointpillars_secfpn_3x8_100e_det3d_kitti-3d-car.py",
    "content": "# model settings\nvoxel_size = [0.16, 0.16, 4]\npoint_cloud_range = [0, -39.68, -3, 69.12, 39.68, 1]\nmodel = dict(\n    type='VoxelNet',\n    voxel_layer=dict(\n        max_num_points=64,\n        point_cloud_range=point_cloud_range,\n        voxel_size=voxel_size,\n        max_voxels=(12000, 20000)),\n    voxel_encoder=dict(\n        type='PillarFeatureNet',\n        in_channels=4,\n        feat_channels=[64],\n        with_distance=False,\n        voxel_size=voxel_size,\n        point_cloud_range=point_cloud_range),\n    middle_encoder=dict(\n        type='PointPillarsScatter', in_channels=64, output_shape=[496, 432]),\n    backbone=dict(\n        type='SECOND',\n        in_channels=64,\n        layer_nums=[3, 5, 5],\n        layer_strides=[2, 2, 2],\n        out_channels=[64, 128, 256]),\n    neck=dict(\n        type='SECONDFPN',\n        in_channels=[64, 128, 256],\n        upsample_strides=[1, 2, 4],\n        out_channels=[128, 128, 128]),\n    bbox_head=dict(\n        type='Anchor3DHead',\n        num_classes=1,\n        in_channels=384,\n        feat_channels=384,\n        use_direction_classifier=True,\n        anchor_generator=dict(\n            type='Anchor3DRangeGenerator',\n            ranges=[[0, -39.68, -1.78, 69.12, 39.68, -1.78]],\n            sizes=[[1.6, 3.9, 1.56]],\n            rotations=[0, 1.57],\n            reshape_out=True),\n        diff_rad_by_sin=True,\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),\n        loss_dir=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),\n    # model training and testing settings\n    train_cfg=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            iou_calculator=dict(type='BboxOverlapsNearest3D'),\n            pos_iou_thr=0.6,\n            neg_iou_thr=0.45,\n            min_pos_iou=0.45,\n            ignore_iof_thr=-1),\n        allowed_border=0,\n        pos_weight=-1,\n        debug=False),\n    test_cfg=dict(\n        use_rotate_nms=True,\n        nms_across_levels=False,\n        nms_thr=0.01,\n        score_thr=0.1,\n        min_bbox_size=0,\n        nms_pre=100,\n        max_num=50))\n\n# dataset settings\ndataset_type = 'KittiDataset'\ndata_root = 'data/kitti/'\nclass_names = ['Car']\ninput_modality = dict(use_lidar=True, use_camera=False)\ndb_sampler = dict(\n    data_root=data_root,\n    info_path=data_root + 'kitti_dbinfos_train.pkl',\n    rate=1.0,\n    prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),\n    sample_groups=dict(Car=15),\n    classes=class_names)\n\ntrain_pipeline = [\n    dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    dict(type='ObjectSample', db_sampler=db_sampler),\n    dict(\n        type='ObjectNoise',\n        num_try=100,\n        loc_noise_std=[0.25, 0.25, 0.25],\n        global_rot_range=[0.0, 0.0],\n        rot_uniform_noise=[-0.15707963267, 0.15707963267]),\n    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n    dict(\n        type='GlobalRotScale',\n        rot_uniform_noise=[-0.78539816, 0.78539816],\n        scaling_uniform_noise=[0.95, 1.05]),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(\n        type='DefaultFormatBundle3D',\n        class_names=class_names,\n        with_label=False),\n    dict(type='Collect3D', keys=['points'])\n]\n\ndata = dict(\n    samples_per_gpu=3,\n    workers_per_gpu=3,\n    train=dict(\n        type='RepeatDataset',\n        times=2,\n        dataset=dict(\n            type=dataset_type,\n            data_root=data_root,\n            ann_file=data_root + 'kitti_infos_train.pkl',\n            split='training',\n            pts_prefix='velodyne_reduced',\n            pipeline=train_pipeline,\n            modality=input_modality,\n            classes=class_names,\n            test_mode=False)),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'kitti_infos_val.pkl',\n        split='training',\n        pts_prefix='velodyne_reduced',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'kitti_infos_val.pkl',\n        split='training',\n        pts_prefix='velodyne_reduced',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True))\n# optimizer\nlr = 0.001  # max learning rate\noptimizer = dict(\n    type='AdamW',\n    lr=lr,\n    betas=(0.95, 0.99),  # the momentum is change during training\n    weight_decay=0.01)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='cyclic',\n    target_ratio=(10, 1e-4),\n    cyclic_times=1,\n    step_ratio_up=0.4)\nmomentum_config = dict(\n    policy='cyclic',\n    target_ratio=(0.85 / 0.95, 1),\n    cyclic_times=1,\n    step_ratio_up=0.4)\ncheckpoint_config = dict(interval=1)\nevaluation = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 50\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/pp_secfpn_100e'\nload_from = None\nresume_from = None\nworkflow = [('train', 50)]\n"
  },
  {
    "path": "configs/benchmark/hv_pointpillars_secfpn_4x8_80e_pcdet_kitti-3d-3class.py",
    "content": "# model settings\npoint_cloud_range = [0, -39.68, -3, 69.12, 39.68, 1]\nvoxel_size = [0.16, 0.16, 4]\nmodel = dict(\n    type='VoxelNet',\n    voxel_layer=dict(\n        max_num_points=32,  # max_points_per_voxel\n        point_cloud_range=point_cloud_range,\n        voxel_size=voxel_size,\n        max_voxels=(16000, 40000)  # (training, testing) max_coxels\n    ),\n    voxel_encoder=dict(\n        type='PillarFeatureNet',\n        in_channels=4,\n        feat_channels=[64],\n        with_distance=False,\n        voxel_size=voxel_size,\n        point_cloud_range=point_cloud_range,\n    ),\n    middle_encoder=dict(\n        type='PointPillarsScatter',\n        in_channels=64,\n        output_shape=[496, 432],\n    ),\n    backbone=dict(\n        type='SECOND',\n        in_channels=64,\n        layer_nums=[3, 5, 5],\n        layer_strides=[2, 2, 2],\n        out_channels=[64, 128, 256],\n    ),\n    neck=dict(\n        type='SECONDFPN',\n        in_channels=[64, 128, 256],\n        upsample_strides=[1, 2, 4],\n        out_channels=[128, 128, 128],\n    ),\n    bbox_head=dict(\n        type='Anchor3DHead',\n        num_classes=3,\n        in_channels=384,\n        feat_channels=384,\n        use_direction_classifier=True,\n        anchor_generator=dict(\n            type='Anchor3DRangeGenerator',\n            ranges=[\n                [0, -40.0, -0.6, 70.4, 40.0, -0.6],\n                [0, -40.0, -0.6, 70.4, 40.0, -0.6],\n                [0, -40.0, -1.78, 70.4, 40.0, -1.78],\n            ],\n            sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],\n            rotations=[0, 1.57],\n            reshape_out=False),\n        diff_rad_by_sin=True,\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),\n        loss_dir=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2),\n    ),\n    # model training and testing settings\n    train_cfg=dict(\n        assigner=[\n            dict(  # for Pedestrian\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.35,\n                min_pos_iou=0.35,\n                ignore_iof_thr=-1),\n            dict(  # for Cyclist\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.35,\n                min_pos_iou=0.35,\n                ignore_iof_thr=-1),\n            dict(  # for Car\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.45,\n                min_pos_iou=0.45,\n                ignore_iof_thr=-1),\n        ],\n        allowed_border=0,\n        pos_weight=-1,\n        debug=False),\n    test_cfg=dict(\n        use_rotate_nms=True,\n        nms_across_levels=False,\n        nms_thr=0.01,\n        score_thr=0.1,\n        min_bbox_size=0,\n        nms_pre=100,\n        max_num=50))\n\n# dataset settings\ndataset_type = 'KittiDataset'\ndata_root = 'data/kitti/'\nclass_names = ['Pedestrian', 'Cyclist', 'Car']\ninput_modality = dict(use_lidar=True, use_camera=False)\ndb_sampler = dict(\n    data_root=data_root,\n    info_path=data_root + 'kitti_dbinfos_train.pkl',\n    rate=1.0,\n    prepare=dict(\n        filter_by_difficulty=[-1],\n        filter_by_min_points=dict(\n            Car=5,\n            Pedestrian=5,\n            Cyclist=5,\n        )),\n    classes=class_names,\n    sample_groups=dict(\n        Car=15,\n        Pedestrian=15,\n        Cyclist=15,\n    ))\n\ntrain_pipeline = [\n    dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    dict(type='ObjectSample', db_sampler=db_sampler),\n    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.78539816, 0.78539816],\n        scale_ratio_range=[0.95, 1.05]),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),\n]\ntest_pipeline = [\n    dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n\ndata = dict(\n    samples_per_gpu=4,\n    workers_per_gpu=4,\n    train=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'kitti_infos_train.pkl',\n        split='training',\n        pts_prefix='velodyne_reduced',\n        pipeline=train_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=False),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'kitti_infos_val.pkl',\n        split='training',\n        pts_prefix='velodyne_reduced',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'kitti_infos_val.pkl',\n        split='training',\n        pts_prefix='velodyne_reduced',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True))\n# optimizer\nlr = 0.0003  # max learning rate\noptimizer = dict(\n    type='AdamW',\n    lr=lr,\n    betas=(0.95, 0.99),  # the momentum is change during training\n    weight_decay=0.01)\noptimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='cyclic',\n    target_ratio=(10, 1e-4),\n    cyclic_times=1,\n    step_ratio_up=0.4)\nmomentum_config = dict(\n    policy='cyclic',\n    target_ratio=(0.85 / 0.95, 1),\n    cyclic_times=1,\n    step_ratio_up=0.4)\ncheckpoint_config = dict(interval=1)\nevaluation = dict(interval=2)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 80\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/pp_secfpn_80e'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "configs/benchmark/hv_second_secfpn_4x8_80e_pcdet_kitti-3d-3class.py",
    "content": "# model settings\nvoxel_size = [0.05, 0.05, 0.1]\npoint_cloud_range = [0, -40, -3, 70.4, 40, 1]\n\nmodel = dict(\n    type='VoxelNet',\n    voxel_layer=dict(\n        max_num_points=5,\n        point_cloud_range=point_cloud_range,\n        voxel_size=voxel_size,\n        max_voxels=(16000, 40000)),\n    voxel_encoder=dict(type='HardSimpleVFE'),\n    middle_encoder=dict(\n        type='SparseEncoder',\n        in_channels=4,\n        sparse_shape=[41, 1600, 1408],\n        order=('conv', 'norm', 'act')),\n    backbone=dict(\n        type='SECOND',\n        in_channels=256,\n        layer_nums=[5, 5],\n        layer_strides=[1, 2],\n        out_channels=[128, 256]),\n    neck=dict(\n        type='SECONDFPN',\n        in_channels=[128, 256],\n        upsample_strides=[1, 2],\n        out_channels=[256, 256]),\n    bbox_head=dict(\n        type='Anchor3DHead',\n        num_classes=3,\n        in_channels=512,\n        feat_channels=512,\n        use_direction_classifier=True,\n        anchor_generator=dict(\n            type='Anchor3DRangeGenerator',\n            ranges=[\n                [0, -40.0, -0.6, 70.4, 40.0, -0.6],\n                [0, -40.0, -0.6, 70.4, 40.0, -0.6],\n                [0, -40.0, -1.78, 70.4, 40.0, -1.78],\n            ],\n            sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],\n            rotations=[0, 1.57],\n            reshape_out=False),\n        diff_rad_by_sin=True,\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),\n        loss_dir=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),\n    # model training and testing settings\n    train_cfg=dict(\n        assigner=[\n            dict(  # for Pedestrian\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.35,\n                min_pos_iou=0.35,\n                ignore_iof_thr=-1),\n            dict(  # for Cyclist\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.35,\n                min_pos_iou=0.35,\n                ignore_iof_thr=-1),\n            dict(  # for Car\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.45,\n                min_pos_iou=0.45,\n                ignore_iof_thr=-1),\n        ],\n        allowed_border=0,\n        pos_weight=-1,\n        debug=False),\n    test_cfg=dict(\n        use_rotate_nms=True,\n        nms_across_levels=False,\n        nms_thr=0.01,\n        score_thr=0.1,\n        min_bbox_size=0,\n        nms_pre=100,\n        max_num=50))\n\n# dataset settings\ndataset_type = 'KittiDataset'\ndata_root = 'data/kitti/'\nclass_names = ['Pedestrian', 'Cyclist', 'Car']\ninput_modality = dict(use_lidar=False, use_camera=False)\ndb_sampler = dict(\n    data_root=data_root,\n    info_path=data_root + 'kitti_dbinfos_train.pkl',\n    rate=1.0,\n    prepare=dict(\n        filter_by_difficulty=[-1],\n        filter_by_min_points=dict(\n            Car=5,\n            Pedestrian=5,\n            Cyclist=5,\n        )),\n    classes=class_names,\n    sample_groups=dict(\n        Car=20,\n        Pedestrian=15,\n        Cyclist=15,\n    ))\nfile_client_args = dict(backend='disk')\n# file_client_args = dict(\n#     backend='petrel', path_mapping=dict(data='s3://kitti_data/'))\n\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=4,\n        use_dim=4,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadAnnotations3D',\n        with_bbox_3d=True,\n        with_label_3d=True,\n        file_client_args=file_client_args),\n    dict(type='ObjectSample', db_sampler=db_sampler),\n    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.78539816, 0.78539816],\n        scale_ratio_range=[0.95, 1.05]),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=4,\n        use_dim=4,\n        file_client_args=file_client_args),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n\ndata = dict(\n    samples_per_gpu=4,\n    workers_per_gpu=4,\n    train=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'kitti_infos_train.pkl',\n        split='training',\n        pts_prefix='velodyne_reduced',\n        pipeline=train_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=False),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'kitti_infos_val.pkl',\n        split='training',\n        pts_prefix='velodyne_reduced',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'kitti_infos_val.pkl',\n        split='training',\n        pts_prefix='velodyne_reduced',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True))\n# optimizer\nlr = 0.0003  # max learning rate\noptimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)\noptimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))\nlr_config = dict(\n    policy='cyclic',\n    target_ratio=(10, 1e-4),\n    cyclic_times=1,\n    step_ratio_up=0.4)\nmomentum_config = dict(\n    policy='cyclic',\n    target_ratio=(0.85 / 0.95, 1),\n    cyclic_times=1,\n    step_ratio_up=0.4)\ncheckpoint_config = dict(interval=1)\nevaluation = dict(interval=2)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\ntotal_epochs = 80\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/sec_secfpn_80e'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "configs/centerpoint/README.md",
    "content": "# Center-based 3D Object Detection and Tracking\n\n## Introduction\n\n[ALGORITHM]\n\nWe implement CenterPoint and provide the result and checkpoints on nuScenes dataset.\n\nWe follow the below style to name config files. Contributors are advised to follow the same style.\n`{xxx}` is required field and `[yyy]` is optional.\n\n`{model}`: model type like `centerpoint`.\n\n`{model setting}`: voxel size and voxel type like `01voxel`, `02pillar`.\n\n`{backbone}`: backbone type like `second`.\n\n`{neck}`: neck type like `secfpn`.\n\n`[dcn]`: Whether to use deformable convolution.\n\n`[circle]`: Whether to use circular nms.\n\n`[batch_per_gpu x gpu]`: GPUs and samples per GPU, 4x8 is used by default.\n\n`{schedule}`: training schedule, options are 1x, 2x, 20e, etc. 1x and 2x means 12 epochs and 24 epochs respectively. 20e is adopted in cascade models, which denotes 20 epochs. For 1x/2x, initial learning rate decays by a factor of 10 at the 8/16th and 11/22th epochs. For 20e, initial learning rate decays by a factor of 10 at the 16th and 19th epochs.\n\n`{dataset}`: dataset like nus-3d, kitti-3d, lyft-3d, scannet-3d, sunrgbd-3d. We also indicate the number of classes we are using if there exist multiple settings, e.g., kitti-3d-3class and kitti-3d-car means training on KITTI dataset with 3 classes and single class, respectively.\n```\n@article{yin2021center,\n  title={Center-based 3D Object Detection and Tracking},\n  author={Yin, Tianwei and Zhou, Xingyi and Kr{\\\"a}henb{\\\"u}hl, Philipp},\n  journal={CVPR},\n  year={2021},\n}\n```\n\n## Usage\n\n### Test time augmentation\n\nWe have supported double-flip and scale augmentation during test time. To use test time augmentation, users need to modify the\n`test_pipeline` and `test_cfg` in the config.\nFor example, we change `centerpoint_0075voxel_second_secfpn_circlenms_4x8_cyclic_20e_nus.py` to the following.\n\n```python\n_base_ = './centerpoint_0075voxel_second_secfpn_circlenms' \\\n         '_4x8_cyclic_20e_nus.py'\n\nmodel = dict(\n    test_cfg=dict(\n        pts=dict(\n            use_rotate_nms=True,\n            max_num=83)))\n\npoint_cloud_range = [-54, -54, -5.0, 54, 54, 3.0]\nfile_client_args = dict(backend='disk')\nclass_names = [\n    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n]\n\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=9,\n        use_dim=[0, 1, 2, 3, 4],\n        file_client_args=file_client_args,\n        pad_empty_sweeps=True,\n        remove_close=True),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=[0.95, 1.0, 1.05],\n        flip=True,\n        pcd_horizontal_flip=True,\n        pcd_vertical_flip=True,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D', sync_2d=False),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n\ndata = dict(\n    val=dict(pipeline=test_pipeline), test=dict(pipeline=test_pipeline))\n\n```\n\n## Results\n\n### CenterPoint\n\n|Backbone|  Voxel type (voxel size)   |Dcn|Circular nms| Mem (GB) | Inf time (fps) | mAP |NDS| Download |\n| :---------: |:-----: |:-----: | :------: | :------------: | :----: |:----: | :------: |:------: |\n|[SECFPN](./centerpoint_01voxel_second_secfpn_circlenms_4x8_cyclic_20e_nus.py)|voxel (0.1)|✗|✓|4.9| |56.19|64.43|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/centerpoint/centerpoint_01voxel_second_secfpn_circlenms_4x8_cyclic_20e_nus/centerpoint_01voxel_second_secfpn_circlenms_4x8_cyclic_20e_nus_20201001_135205-5db91e00.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/centerpoint/centerpoint_01voxel_second_secfpn_circlenms_4x8_cyclic_20e_nus/centerpoint_01voxel_second_secfpn_circlenms_4x8_cyclic_20e_nus_20201001_135205.log.json)|\n|above w/o circle nms|voxel (0.1)|✗|✗| | |56.56|64.46||\n|[SECFPN](./centerpoint_01voxel_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus.py)|voxel (0.1)|✓|✓|5.2| |56.34|64.81|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/centerpoint/centerpoint_01voxel_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus/centerpoint_01voxel_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus_20201004_075317-26d8176c.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/centerpoint/centerpoint_01voxel_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus/centerpoint_01voxel_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus_20201004_075317.log.json)|\n|above w/o circle nms|voxel (0.1)|✓|✗| | |56.60|64.90||\n|[SECFPN](./centerpoint_0075voxel_second_secfpn_circlenms_4x8_cyclic_20e_nus.py)|voxel (0.075)|✗|✓|7.8| |57.34|65.23|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/centerpoint/centerpoint_0075voxel_second_secfpn_circlenms_4x8_cyclic_20e_nus/centerpoint_0075voxel_second_secfpn_circlenms_4x8_cyclic_20e_nus_20200925_230905-358fbe3b.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/centerpoint/centerpoint_0075voxel_second_secfpn_circlenms_4x8_cyclic_20e_nus/centerpoint_0075voxel_second_secfpn_circlenms_4x8_cyclic_20e_nus_20200925_230905.log.json)|\n|above w/o circle nms|voxel (0.075)|✗|✗| | |57.63|65.39| |\n|[SECFPN](./centerpoint_0075voxel_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus.py)|voxel (0.075)|✓|✓|8.5| |57.27|65.58|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/centerpoint/centerpoint_0075voxel_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus/centerpoint_0075voxel_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus_20200930_201619-67c8496f.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/centerpoint/centerpoint_0075voxel_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus/centerpoint_0075voxel_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus_20200930_201619.log.json)|\n|above w/o circle nms|voxel (0.075)|✓|✗| | |57.43|65.63||\n|above w/ double flip|voxel (0.075)|✓|✗| | |59.73|67.39||\n|above w/ scale tta|voxel (0.075)|✓|✗| | |60.43|67.65||\n|above w/ circle nms w/o scale tta|voxel (0.075)|✓|✗| | |59.52|67.24||\n|[SECFPN](./centerpoint_02pillar_second_secfpn_circlenms_4x8_cyclic_20e_nus.py)|pillar (0.2)|✗|✓|4.4| |49.07|59.66|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/centerpoint/centerpoint_01voxel_second_secfpn_circlenms_4x8_cyclic_20e_nus/centerpoint_02pillar_second_secfpn_circlenms_4x8_cyclic_20e_nus_20201004_170716-a134a233.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/centerpoint/centerpoint_01voxel_second_secfpn_circlenms_4x8_cyclic_20e_nus/centerpoint_02pillar_second_secfpn_circlenms_4x8_cyclic_20e_nus_20201004_170716.log.json)|\n|above w/o circle nms|pillar (0.2)|✗|✗| | |49.12|59.66||\n|[SECFPN](./centerpoint_02pillar_second_secfpn_dcn_4x8_cyclic_20e_nus.py)|pillar (0.2)|✓|✗| 4.6| |48.8 |59.67 |[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/centerpoint/centerpoint_02pillar_second_secfpn_dcn_4x8_cyclic_20e_nus/centerpoint_02pillar_second_secfpn_dcn_4x8_cyclic_20e_nus_20200930_103722-3bb135f2.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/centerpoint/centerpoint_02pillar_second_secfpn_dcn_4x8_cyclic_20e_nus/centerpoint_02pillar_second_secfpn_dcn_4x8_cyclic_20e_nus_20200930_103722.log.json)|\n|above w/ circle nms|pillar (0.2)|✓|✓| | |48.79|59.65||\n"
  },
  {
    "path": "configs/centerpoint/centerpoint_0075voxel_second_secfpn_4x8_cyclic_20e_nus.py",
    "content": "_base_ = ['./centerpoint_01voxel_second_secfpn_4x8_cyclic_20e_nus.py']\n\n# If point cloud range is changed, the models should also change their point\n# cloud range accordingly\nvoxel_size = [0.075, 0.075, 0.2]\npoint_cloud_range = [-54, -54, -5.0, 54, 54, 3.0]\n# For nuScenes we usually do 10-class detection\nclass_names = [\n    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n]\n\nmodel = dict(\n    pts_voxel_layer=dict(\n        voxel_size=voxel_size, point_cloud_range=point_cloud_range),\n    pts_middle_encoder=dict(sparse_shape=[41, 1440, 1440]),\n    pts_bbox_head=dict(\n        bbox_coder=dict(\n            voxel_size=voxel_size[:2], pc_range=point_cloud_range[:2])),\n    train_cfg=dict(\n        pts=dict(\n            grid_size=[1440, 1440, 40],\n            voxel_size=voxel_size,\n            point_cloud_range=point_cloud_range)),\n    test_cfg=dict(\n        pts=dict(voxel_size=voxel_size[:2], pc_range=point_cloud_range[:2])))\n\ndataset_type = 'NuScenesDataset'\ndata_root = 'data/nuscenes/'\nfile_client_args = dict(backend='disk')\n\ndb_sampler = dict(\n    data_root=data_root,\n    info_path=data_root + 'nuscenes_dbinfos_train.pkl',\n    rate=1.0,\n    prepare=dict(\n        filter_by_difficulty=[-1],\n        filter_by_min_points=dict(\n            car=5,\n            truck=5,\n            bus=5,\n            trailer=5,\n            construction_vehicle=5,\n            traffic_cone=5,\n            barrier=5,\n            motorcycle=5,\n            bicycle=5,\n            pedestrian=5)),\n    classes=class_names,\n    sample_groups=dict(\n        car=2,\n        truck=3,\n        construction_vehicle=7,\n        bus=4,\n        trailer=6,\n        barrier=2,\n        motorcycle=6,\n        bicycle=6,\n        pedestrian=2,\n        traffic_cone=2),\n    points_loader=dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=[0, 1, 2, 3, 4],\n        file_client_args=file_client_args))\n\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=9,\n        use_dim=[0, 1, 2, 3, 4],\n        file_client_args=file_client_args,\n        pad_empty_sweeps=True,\n        remove_close=True),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    dict(type='ObjectSample', db_sampler=db_sampler),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.3925, 0.3925],\n        scale_ratio_range=[0.95, 1.05],\n        translation_std=[0, 0, 0]),\n    dict(\n        type='RandomFlip3D',\n        sync_2d=False,\n        flip_ratio_bev_horizontal=0.5,\n        flip_ratio_bev_vertical=0.5),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectNameFilter', classes=class_names),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=9,\n        use_dim=[0, 1, 2, 3, 4],\n        file_client_args=file_client_args,\n        pad_empty_sweeps=True,\n        remove_close=True),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n\ndata = dict(\n    train=dict(dataset=dict(pipeline=train_pipeline)),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "configs/centerpoint/centerpoint_0075voxel_second_secfpn_circlenms_4x8_cyclic_20e_nus.py",
    "content": "_base_ = ['./centerpoint_0075voxel_second_secfpn_4x8_cyclic_20e_nus.py']\n\nmodel = dict(test_cfg=dict(pts=dict(nms_type='circle')))\n"
  },
  {
    "path": "configs/centerpoint/centerpoint_0075voxel_second_secfpn_dcn_4x8_cyclic_20e_nus.py",
    "content": "_base_ = ['./centerpoint_0075voxel_second_secfpn_4x8_cyclic_20e_nus.py']\n\nmodel = dict(\n    pts_bbox_head=dict(\n        separate_head=dict(\n            type='DCNSeparateHead',\n            dcn_config=dict(\n                type='DCN',\n                in_channels=64,\n                out_channels=64,\n                kernel_size=3,\n                padding=1,\n                groups=4),\n            init_bias=-2.19,\n            final_kernel=3)))\n"
  },
  {
    "path": "configs/centerpoint/centerpoint_0075voxel_second_secfpn_dcn_4x8_cyclic_flip-tta_20e_nus.py",
    "content": "_base_ = './centerpoint_0075voxel_second_secfpn_dcn_4x8_cyclic_20e_nus.py'\n\npoint_cloud_range = [-54, -54, -5.0, 54, 54, 3.0]\nfile_client_args = dict(backend='disk')\nclass_names = [\n    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n]\n\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=9,\n        use_dim=[0, 1, 2, 3, 4],\n        file_client_args=file_client_args,\n        pad_empty_sweeps=True,\n        remove_close=True),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        # Add double-flip augmentation\n        flip=True,\n        pcd_horizontal_flip=True,\n        pcd_vertical_flip=True,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D', sync_2d=False),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n\ndata = dict(\n    val=dict(pipeline=test_pipeline), test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "configs/centerpoint/centerpoint_0075voxel_second_secfpn_dcn_4x8_cyclic_tta_20e_nus.py",
    "content": "_base_ = './centerpoint_0075voxel_second_secfpn_dcn_4x8_cyclic_20e_nus.py'\n\ntest_cfg = dict(pts=dict(use_rotate_nms=True, max_num=500))\n\npoint_cloud_range = [-54, -54, -5.0, 54, 54, 3.0]\nfile_client_args = dict(backend='disk')\nclass_names = [\n    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n]\n\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=9,\n        use_dim=[0, 1, 2, 3, 4],\n        file_client_args=file_client_args,\n        pad_empty_sweeps=True,\n        remove_close=True),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=[0.95, 1.0, 1.05],\n        # Add double-flip augmentation\n        flip=True,\n        pcd_horizontal_flip=True,\n        pcd_vertical_flip=True,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D', sync_2d=False),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n\ndata = dict(\n    val=dict(pipeline=test_pipeline), test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "configs/centerpoint/centerpoint_0075voxel_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus.py",
    "content": "_base_ = ['./centerpoint_0075voxel_second_secfpn_4x8_cyclic_20e_nus.py']\n\nmodel = dict(\n    pts_bbox_head=dict(\n        separate_head=dict(\n            type='DCNSeparateHead',\n            dcn_config=dict(\n                type='DCN',\n                in_channels=64,\n                out_channels=64,\n                kernel_size=3,\n                padding=1,\n                groups=4),\n            init_bias=-2.19,\n            final_kernel=3)),\n    test_cfg=dict(pts=dict(nms_type='circle')))\n"
  },
  {
    "path": "configs/centerpoint/centerpoint_0075voxel_second_secfpn_dcn_circlenms_4x8_cyclic_flip-tta_20e_nus.py",
    "content": "_base_ = './centerpoint_0075voxel_second_secfpn_dcn_' \\\n         'circlenms_4x8_cyclic_20e_nus.py'\n\npoint_cloud_range = [-54, -54, -5.0, 54, 54, 3.0]\nfile_client_args = dict(backend='disk')\nclass_names = [\n    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n]\n\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=9,\n        use_dim=[0, 1, 2, 3, 4],\n        file_client_args=file_client_args,\n        pad_empty_sweeps=True,\n        remove_close=True),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        # Add double-flip augmentation\n        flip=True,\n        pcd_horizontal_flip=True,\n        pcd_vertical_flip=True,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D', sync_2d=False),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n\ndata = dict(\n    val=dict(pipeline=test_pipeline), test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "configs/centerpoint/centerpoint_01voxel_second_secfpn_4x8_cyclic_20e_nus.py",
    "content": "_base_ = [\n    '../_base_/datasets/nus-3d.py',\n    '../_base_/models/centerpoint_01voxel_second_secfpn_nus.py',\n    '../_base_/schedules/cyclic_20e.py', '../_base_/default_runtime.py'\n]\n\n# If point cloud range is changed, the models should also change their point\n# cloud range accordingly\npoint_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]\n# For nuScenes we usually do 10-class detection\nclass_names = [\n    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n]\n\nmodel = dict(\n    pts_voxel_layer=dict(point_cloud_range=point_cloud_range),\n    pts_bbox_head=dict(bbox_coder=dict(pc_range=point_cloud_range[:2])),\n    # model training and testing settings\n    train_cfg=dict(pts=dict(point_cloud_range=point_cloud_range)),\n    test_cfg=dict(pts=dict(pc_range=point_cloud_range[:2])))\n\ndataset_type = 'NuScenesDataset'\ndata_root = 'data/nuscenes/'\nfile_client_args = dict(backend='disk')\n\ndb_sampler = dict(\n    data_root=data_root,\n    info_path=data_root + 'nuscenes_dbinfos_train.pkl',\n    rate=1.0,\n    prepare=dict(\n        filter_by_difficulty=[-1],\n        filter_by_min_points=dict(\n            car=5,\n            truck=5,\n            bus=5,\n            trailer=5,\n            construction_vehicle=5,\n            traffic_cone=5,\n            barrier=5,\n            motorcycle=5,\n            bicycle=5,\n            pedestrian=5)),\n    classes=class_names,\n    sample_groups=dict(\n        car=2,\n        truck=3,\n        construction_vehicle=7,\n        bus=4,\n        trailer=6,\n        barrier=2,\n        motorcycle=6,\n        bicycle=6,\n        pedestrian=2,\n        traffic_cone=2),\n    points_loader=dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=[0, 1, 2, 3, 4],\n        file_client_args=file_client_args))\n\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=9,\n        use_dim=[0, 1, 2, 3, 4],\n        file_client_args=file_client_args,\n        pad_empty_sweeps=True,\n        remove_close=True),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    dict(type='ObjectSample', db_sampler=db_sampler),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.3925, 0.3925],\n        scale_ratio_range=[0.95, 1.05],\n        translation_std=[0, 0, 0]),\n    dict(\n        type='RandomFlip3D',\n        sync_2d=False,\n        flip_ratio_bev_horizontal=0.5,\n        flip_ratio_bev_vertical=0.5),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectNameFilter', classes=class_names),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=9,\n        use_dim=[0, 1, 2, 3, 4],\n        file_client_args=file_client_args,\n        pad_empty_sweeps=True,\n        remove_close=True),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n\ndata = dict(\n    train=dict(\n        type='CBGSDataset',\n        dataset=dict(\n            type=dataset_type,\n            data_root=data_root,\n            ann_file=data_root + 'nuscenes_infos_train.pkl',\n            pipeline=train_pipeline,\n            classes=class_names,\n            test_mode=False,\n            use_valid_flag=True,\n            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n            # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n            box_type_3d='LiDAR')),\n    val=dict(pipeline=test_pipeline, classes=class_names),\n    test=dict(pipeline=test_pipeline, classes=class_names))\n\nevaluation = dict(interval=20)\n"
  },
  {
    "path": "configs/centerpoint/centerpoint_01voxel_second_secfpn_circlenms_4x8_cyclic_20e_nus.py",
    "content": "_base_ = ['./centerpoint_01voxel_second_secfpn_4x8_cyclic_20e_nus.py']\n\nmodel = dict(test_cfg=dict(pts=dict(nms_type='circle')))\n"
  },
  {
    "path": "configs/centerpoint/centerpoint_01voxel_second_secfpn_dcn_4x8_cyclic_20e_nus.py",
    "content": "_base_ = ['./centerpoint_01voxel_second_secfpn_4x8_cyclic_20e_nus.py']\n\nmodel = dict(\n    pts_bbox_head=dict(\n        separate_head=dict(\n            type='DCNSeparateHead',\n            dcn_config=dict(\n                type='DCN',\n                in_channels=64,\n                out_channels=64,\n                kernel_size=3,\n                padding=1,\n                groups=4),\n            init_bias=-2.19,\n            final_kernel=3)))\n"
  },
  {
    "path": "configs/centerpoint/centerpoint_01voxel_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus.py",
    "content": "_base_ = ['./centerpoint_01voxel_second_secfpn_4x8_cyclic_20e_nus.py']\n\nmodel = dict(\n    pts_bbox_head=dict(\n        separate_head=dict(\n            type='DCNSeparateHead',\n            dcn_config=dict(\n                type='DCN',\n                in_channels=64,\n                out_channels=64,\n                kernel_size=3,\n                padding=1,\n                groups=4),\n            init_bias=-2.19,\n            final_kernel=3)),\n    test_cfg=dict(pts=dict(nms_type='circle')))\n"
  },
  {
    "path": "configs/centerpoint/centerpoint_02pillar_second_secfpn_4x8_cyclic_20e_nus.py",
    "content": "_base_ = [\n    '../_base_/datasets/nus-3d.py',\n    '../_base_/models/centerpoint_02pillar_second_secfpn_nus.py',\n    '../_base_/schedules/cyclic_20e.py', '../_base_/default_runtime.py'\n]\n\n# If point cloud range is changed, the models should also change their point\n# cloud range accordingly\npoint_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]\n# For nuScenes we usually do 10-class detection\nclass_names = [\n    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n]\n\nmodel = dict(\n    pts_voxel_layer=dict(point_cloud_range=point_cloud_range),\n    pts_voxel_encoder=dict(point_cloud_range=point_cloud_range),\n    pts_bbox_head=dict(bbox_coder=dict(pc_range=point_cloud_range[:2])),\n    # model training and testing settings\n    train_cfg=dict(pts=dict(point_cloud_range=point_cloud_range)),\n    test_cfg=dict(pts=dict(pc_range=point_cloud_range[:2])))\n\ndataset_type = 'NuScenesDataset'\ndata_root = 'data/nuscenes/'\nfile_client_args = dict(backend='disk')\n\ndb_sampler = dict(\n    data_root=data_root,\n    info_path=data_root + 'nuscenes_dbinfos_train.pkl',\n    rate=1.0,\n    prepare=dict(\n        filter_by_difficulty=[-1],\n        filter_by_min_points=dict(\n            car=5,\n            truck=5,\n            bus=5,\n            trailer=5,\n            construction_vehicle=5,\n            traffic_cone=5,\n            barrier=5,\n            motorcycle=5,\n            bicycle=5,\n            pedestrian=5)),\n    classes=class_names,\n    sample_groups=dict(\n        car=2,\n        truck=3,\n        construction_vehicle=7,\n        bus=4,\n        trailer=6,\n        barrier=2,\n        motorcycle=6,\n        bicycle=6,\n        pedestrian=2,\n        traffic_cone=2),\n    points_loader=dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=[0, 1, 2, 3, 4],\n        file_client_args=file_client_args))\n\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=9,\n        use_dim=[0, 1, 2, 3, 4],\n        file_client_args=file_client_args,\n        pad_empty_sweeps=True,\n        remove_close=True),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    dict(type='ObjectSample', db_sampler=db_sampler),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.3925, 0.3925],\n        scale_ratio_range=[0.95, 1.05],\n        translation_std=[0, 0, 0]),\n    dict(\n        type='RandomFlip3D',\n        sync_2d=False,\n        flip_ratio_bev_horizontal=0.5,\n        flip_ratio_bev_vertical=0.5),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectNameFilter', classes=class_names),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=9,\n        use_dim=[0, 1, 2, 3, 4],\n        file_client_args=file_client_args,\n        pad_empty_sweeps=True,\n        remove_close=True),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n\ndata = dict(\n    train=dict(\n        type='CBGSDataset',\n        dataset=dict(\n            type=dataset_type,\n            data_root=data_root,\n            ann_file=data_root + 'nuscenes_infos_train.pkl',\n            pipeline=train_pipeline,\n            classes=class_names,\n            test_mode=False,\n            use_valid_flag=True,\n            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n            # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n            box_type_3d='LiDAR')),\n    val=dict(pipeline=test_pipeline, classes=class_names),\n    test=dict(pipeline=test_pipeline, classes=class_names))\n\nevaluation = dict(interval=20)\n"
  },
  {
    "path": "configs/centerpoint/centerpoint_02pillar_second_secfpn_circlenms_4x8_cyclic_20e_nus.py",
    "content": "_base_ = ['./centerpoint_02pillar_second_secfpn_4x8_cyclic_20e_nus.py']\n\nmodel = dict(test_cfg=dict(pts=dict(nms_type='circle')))\n"
  },
  {
    "path": "configs/centerpoint/centerpoint_02pillar_second_secfpn_dcn_4x8_cyclic_20e_nus.py",
    "content": "_base_ = ['./centerpoint_02pillar_second_secfpn_4x8_cyclic_20e_nus.py']\n\nmodel = dict(\n    pts_bbox_head=dict(\n        separate_head=dict(\n            type='DCNSeparateHead',\n            dcn_config=dict(\n                type='DCN',\n                in_channels=64,\n                out_channels=64,\n                kernel_size=3,\n                padding=1,\n                groups=4),\n            init_bias=-2.19,\n            final_kernel=3)))\n"
  },
  {
    "path": "configs/centerpoint/centerpoint_02pillar_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus.py",
    "content": "_base_ = ['./centerpoint_02pillar_second_secfpn_4x8_cyclic_20e_nus.py']\n\nmodel = dict(\n    pts_bbox_head=dict(\n        separate_head=dict(\n            type='DCNSeparateHead',\n            dcn_config=dict(\n                type='DCN',\n                in_channels=64,\n                out_channels=64,\n                kernel_size=3,\n                padding=1,\n                groups=4),\n            init_bias=-2.19,\n            final_kernel=3)),\n    test_cfg=dict(pts=dict(nms_type='circle')))\n"
  },
  {
    "path": "configs/dynamic_voxelization/README.md",
    "content": "# Dynamic Voxelization\n\n## Introduction\n\n[ALGORITHM]\n\nWe implement Dynamic Voxelization proposed in  and provide its results and models on KITTI dataset.\n```\n@article{zhou2019endtoend,\n    title={End-to-End Multi-View Fusion for 3D Object Detection in LiDAR Point Clouds},\n    author={Yin Zhou and Pei Sun and Yu Zhang and Dragomir Anguelov and Jiyang Gao and Tom Ouyang and James Guo and Jiquan Ngiam and Vijay Vasudevan},\n    year={2019},\n    eprint={1910.06528},\n    archivePrefix={arXiv},\n    primaryClass={cs.CV}\n}\n\n```\n\n## Results\n\n### KITTI\n\n|  Model   |Class| Lr schd | Mem (GB) | Inf time (fps) | mAP | Download |\n| :---------: | :-----: |:-----: | :------: | :------------: | :----: | :------: |\n|[SECOND](./dv_second_secfpn_6x8_80e_kitti-3d-car.py)|Car    |cyclic 80e|5.5||78.83|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/dynamic_voxelization/dv_second_secfpn_6x8_80e_kitti-3d-car/dv_second_secfpn_6x8_80e_kitti-3d-car_20200620_235228-ac2c1c0c.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/dynamic_voxelization/dv_second_secfpn_6x8_80e_kitti-3d-car/dv_second_secfpn_6x8_80e_kitti-3d-car_20200620_235228.log.json)|\n|[SECOND](./dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py)| 3 Class|cosine 80e|5.5||65.10|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/dynamic_voxelization/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class_20200620_231010-6aa607d3.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/dynamic_voxelization/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class_20200620_231010.log.json)|\n|[PointPillars](./dv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py)| Car|cyclic 80e|4.7||77.76|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/dynamic_voxelization/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car_20200620_230844-ee7b75c9.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/dynamic_voxelization/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car_20200620_230844.log.json)|\n"
  },
  {
    "path": "configs/dynamic_voxelization/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py",
    "content": "_base_ = '../pointpillars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py'\n\nvoxel_size = [0.16, 0.16, 4]\npoint_cloud_range = [0, -39.68, -3, 69.12, 39.68, 1]\n\nmodel = dict(\n    type='DynamicVoxelNet',\n    voxel_layer=dict(\n        max_num_points=-1,\n        point_cloud_range=point_cloud_range,\n        voxel_size=voxel_size,\n        max_voxels=(-1, -1)),\n    voxel_encoder=dict(\n        type='DynamicPillarFeatureNet',\n        in_channels=4,\n        feat_channels=[64],\n        with_distance=False,\n        voxel_size=voxel_size,\n        point_cloud_range=point_cloud_range))\n"
  },
  {
    "path": "configs/dynamic_voxelization/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py",
    "content": "_base_ = '../second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py'\n\npoint_cloud_range = [0, -40, -3, 70.4, 40, 1]\nvoxel_size = [0.05, 0.05, 0.1]\n\nmodel = dict(\n    type='DynamicVoxelNet',\n    voxel_layer=dict(\n        _delete_=True,\n        max_num_points=-1,\n        point_cloud_range=point_cloud_range,\n        voxel_size=voxel_size,\n        max_voxels=(-1, -1)),\n    voxel_encoder=dict(\n        _delete_=True,\n        type='DynamicSimpleVFE',\n        voxel_size=voxel_size,\n        point_cloud_range=point_cloud_range))\n\n# optimizer\nlr = 0.003  # max learning rate\noptimizer = dict(\n    _delete_=True,\n    type='AdamW',\n    lr=lr,\n    betas=(0.95, 0.99),  # the momentum is change during training\n    weight_decay=0.001)\nlr_config = dict(\n    _delete_=True,\n    policy='CosineAnnealing',\n    warmup='linear',\n    warmup_iters=1000,\n    warmup_ratio=1.0 / 10,\n    min_lr_ratio=1e-5)\nmomentum_config = None\n"
  },
  {
    "path": "configs/dynamic_voxelization/dv_second_secfpn_6x8_80e_kitti-3d-car.py",
    "content": "_base_ = '../second/hv_second_secfpn_6x8_80e_kitti-3d-car.py'\n\npoint_cloud_range = [0, -40, -3, 70.4, 40, 1]\nvoxel_size = [0.05, 0.05, 0.1]\n\nmodel = dict(\n    type='DynamicVoxelNet',\n    voxel_layer=dict(\n        _delete_=True,\n        max_num_points=-1,\n        point_cloud_range=point_cloud_range,\n        voxel_size=voxel_size,\n        max_voxels=(-1, -1)),\n    voxel_encoder=dict(\n        _delete_=True,\n        type='DynamicSimpleVFE',\n        voxel_size=voxel_size,\n        point_cloud_range=point_cloud_range))\n"
  },
  {
    "path": "configs/fp16/README.md",
    "content": "# Mixed Precision Training\n\n## Introduction\n\n[OTHERS]\n\nWe implement mixed precision training and apply it to VoxelNets (e.g., SECOND and PointPillars).\nThe results are in the following tables.\n\n**Note**: For mixed precision training, we currently do not support PointNet-based methods (e.g., VoteNet).\nMixed precision training for PointNet-based methods will be supported in the future release.\n\n## Results\n\n### SECOND on KITTI dataset\n|  Backbone   |Class| Lr schd | FP32 Mem (GB) | FP16 Mem (GB) | FP32 mAP | FP16 mAP |Download |\n| :---------: | :-----: | :------: | :------------: | :----: |:----: | :------: | :------: |\n|    [SECFPN](./hv_second_secfpn_fp16_6x8_80e_kitti-3d-car.py)| Car |cyclic 80e|5.4|2.9|79.07|78.72|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/fp16/hv_second_secfpn_fp16_6x8_80e_kitti-3d-car/hv_second_secfpn_fp16_6x8_80e_kitti-3d-car_20200924_211301-1f5ad833.pth)&#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/fp16/hv_second_secfpn_fp16_6x8_80e_kitti-3d-car/hv_second_secfpn_fp16_6x8_80e_kitti-3d-car_20200924_211301.log.json)|\n|    [SECFPN](./hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py)| 3 Class |cyclic 80e|5.4|2.9|64.41|67.4|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/fp16/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class_20200925_110059-05f67bdf.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/fp16/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class_20200925_110059.log.json)|\n\n### PointPillars on nuScenes dataset\n|  Backbone   | Lr schd | FP32 Mem (GB) | FP16 Mem (GB) | FP32 mAP | FP32 NDS| FP16 mAP | FP16 NDS| Download |\n| :---------: | :-----: | :------: | :------------: | :----: |:----: | :----: |:----: | :------: |\n|[SECFPN](./hv_pointpillars_secfpn_sbn-all_fp16_2x8_2x_nus-3d.py)|2x|16.4|8.37|35.17|49.7|35.19|50.27|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/fp16/hv_pointpillars_secfpn_sbn-all_fp16_2x8_2x_nus-3d/hv_pointpillars_secfpn_sbn-all_fp16_2x8_2x_nus-3d_20201020_222626-c3f0483e.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/fp16/hv_pointpillars_secfpn_sbn-all_fp16_2x8_2x_nus-3d/hv_pointpillars_secfpn_sbn-all_fp16_2x8_2x_nus-3d_20201020_222626.log.json)|\n|[FPN](./hv_pointpillars_fpn_sbn-all_fp16_2x8_2x_nus-3d.py)|2x|16.4|8.40|40.0|53.3|39.26|53.26|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/fp16/hv_pointpillars_fpn_sbn-all_fp16_2x8_2x_nus-3d/hv_pointpillars_fpn_sbn-all_fp16_2x8_2x_nus-3d_20201021_120719-269f9dd6.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/fp16/hv_pointpillars_fpn_sbn-all_fp16_2x8_2x_nus-3d/hv_pointpillars_fpn_sbn-all_fp16_2x8_2x_nus-3d_20201021_120719.log.json)|\n\n**Note**:\n1. With mixed precision training, we can train PointPillars with nuScenes dataset on 8 Titan XP GPUS with batch size of 2.\nThis will cause OOM error without mixed precision training.\n2. The loss scale for PointPillars on nuScenes dataset is specifically tuned to avoid the loss to be Nan. We find 32 is more stable than 512, though loss scale 32 still cause Nan sometimes.\n"
  },
  {
    "path": "configs/fp16/hv_pointpillars_fpn_sbn-all_fp16_2x8_2x_nus-3d.py",
    "content": "_base_ = '../pointpillars/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d.py'\ndata = dict(samples_per_gpu=2, workers_per_gpu=2)\n# fp16 settings, the loss scale is specifically tuned to avoid Nan\nfp16 = dict(loss_scale=32.)\n"
  },
  {
    "path": "configs/fp16/hv_pointpillars_regnet-400mf_fpn_sbn-all_fp16_2x8_2x_nus-3d.py",
    "content": "_base_ = '../regnet/hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d.py'\ndata = dict(samples_per_gpu=2, workers_per_gpu=2)\n# fp16 settings, the loss scale is specifically tuned to avoid Nan\nfp16 = dict(loss_scale=32.)\n"
  },
  {
    "path": "configs/fp16/hv_pointpillars_secfpn_sbn-all_fp16_2x8_2x_nus-3d.py",
    "content": "_base_ = '../pointpillars/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py'\ndata = dict(samples_per_gpu=2, workers_per_gpu=2)\n# fp16 settings, the loss scale is specifically tuned to avoid Nan\nfp16 = dict(loss_scale=32.)\n"
  },
  {
    "path": "configs/fp16/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py",
    "content": "_base_ = '../second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py'\n# fp16 settings\nfp16 = dict(loss_scale=512.)\n"
  },
  {
    "path": "configs/fp16/hv_second_secfpn_fp16_6x8_80e_kitti-3d-car.py",
    "content": "_base_ = '../second/hv_second_secfpn_6x8_80e_kitti-3d-car.py'\n# fp16 settings\nfp16 = dict(loss_scale=512.)\n"
  },
  {
    "path": "configs/free_anchor/README.md",
    "content": "# FreeAnchor for 3D Object Detection\n\n## Introduction\n\n[ALGORITHM]\n\nWe implement FreeAnchor in 3D detection systems and provide their first results with PointPillars on nuScenes dataset.\nWith the implemented `FreeAnchor3DHead`, a PointPillar detector with a big backbone (e.g., RegNet-3.2GF) achieves top performance\non the nuScenes benchmark.\n\n```\n@inproceedings{zhang2019freeanchor,\n  title   =  {{FreeAnchor}: Learning to Match Anchors for Visual Object Detection},\n  author  =  {Zhang, Xiaosong and Wan, Fang and Liu, Chang and Ji, Rongrong and Ye, Qixiang},\n  booktitle =  {Neural Information Processing Systems},\n  year    =  {2019}\n}\n```\n\n## Usage\n\n### Modify config\n\nAs in the [baseline config](hv_pointpillars_fpn_sbn-all_free-anchor_4x8_2x_nus-3d.py), we only need to replace the head of an existing one-stage detector to use FreeAnchor head.\nSince the config is inherit from a common detector head, `_delete_=True` is necessary to avoid conflicts.\nThe hyperparameters are specifically tuned according to the original paper.\n\n```python\n_base_ = [\n    '../_base_/models/hv_pointpillars_fpn_lyft.py',\n    '../_base_/datasets/nus-3d.py', '../_base_/schedules/schedule_2x.py',\n    '../_base_/default_runtime.py'\n]\n\nmodel = dict(\n    pts_bbox_head=dict(\n        _delete_=True,\n        type='FreeAnchor3DHead',\n        num_classes=10,\n        in_channels=256,\n        feat_channels=256,\n        use_direction_classifier=True,\n        pre_anchor_topk=25,\n        bbox_thr=0.5,\n        gamma=2.0,\n        alpha=0.5,\n        anchor_generator=dict(\n            type='AlignedAnchor3DRangeGenerator',\n            ranges=[[-50, -50, -1.8, 50, 50, -1.8]],\n            scales=[1, 2, 4],\n            sizes=[\n                [0.8660, 2.5981, 1.],  # 1.5/sqrt(3)\n                [0.5774, 1.7321, 1.],  # 1/sqrt(3)\n                [1., 1., 1.],\n                [0.4, 0.4, 1],\n            ],\n            custom_values=[0, 0],\n            rotations=[0, 1.57],\n            reshape_out=True),\n        assigner_per_size=False,\n        diff_rad_by_sin=True,\n        dir_offset=0.7854,  # pi/4\n        dir_limit_offset=0,\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=0.8),\n        loss_dir=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),\n    # model training and testing settings\n    train_cfg = dict(\n        pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.25, 0.25])))\n```\n\n## Results\n\n### PointPillars\n\n|  Backbone   |FreeAnchor|Lr schd | Mem (GB) | Inf time (fps) | mAP |NDS| Download |\n| :---------: |:-----: |:-----: | :------: | :------------: | :----: |:----: | :------: |\n|[FPN](../pointpillars/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d.py)|✗|2x|17.1||40.0|53.3|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d_20200620_230405-2fa62f3d.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d_20200620_230405.log.json)|\n|[FPN](./hv_pointpillars_fpn_sbn-all_free-anchor_4x8_2x_nus-3d.py)|✓|2x|16.2||43.7|55.3|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/free_anchor/hv_pointpillars_fpn_sbn-all_free-anchor_4x8_2x_nus-3d/hv_pointpillars_fpn_sbn-all_free-anchor_4x8_2x_nus-3d_20200628_210537-09d359fc.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/free_anchor/hv_pointpillars_fpn_sbn-all_free-anchor_4x8_2x_nus-3d/hv_pointpillars_fpn_sbn-all_free-anchor_4x8_2x_nus-3d_20200628_210537.log.json)|\n|[RegNetX-400MF-FPN](../regnet/hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d.py)|✗|2x|17.3||44.8|56.4|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/regnet/hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d_20200620_230239-c694dce7.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/regnet/hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d_20200620_230239.log.json)|\n|[RegNetX-400MF-FPN](./hv_pointpillars_regnet-400mf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d.py)|✓|2x|17.7||47.9|58.6|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/free_anchor/hv_pointpillars_regnet-400mf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d/hv_pointpillars_regnet-400mf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d_20200629_050311-a334765d.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/free_anchor/hv_pointpillars_regnet-400mf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d/hv_pointpillars_regnet-400mf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d_20200629_050311.log.json)|\n|[RegNetX-1.6GF-FPN](./hv_pointpillars_regnet-1.6gf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d.py)|✓|2x|24.3||51.2|60.8|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/free_anchor/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d_20200629_105446-6ffa59cb.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/free_anchor/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d_20200629_105446.log.json)|\n|[RegNetX-1.6GF-FPN](./hv_pointpillars_regnet-1.6gf_fpn_sbn-all_free-anchor_strong-aug_4x8_3x_nus-3d.py)*|✓|3x|24.3||53.0|62.2|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/free_anchor/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_free-anchor_strong-aug_4x8_3x_nus-3d/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_free-anchor_strong-aug_4x8_3x_nus-3d_20200701_201531-036f7de3.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/free_anchor/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_free-anchor_strong-aug_4x8_3x_nus-3d/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_free-anchor_strong-aug_4x8_3x_nus-3d_20200701_201531.log.json)|\n|[RegNetX-3.2GF-FPN](./hv_pointpillars_regnet-3.2gf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d.py)|✓|2x|29.5||52.2|62.0|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/free_anchor/hv_pointpillars_regnet-3.2gf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d/hv_pointpillars_regnet-3.2gf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d_20200629_055854-658125b0.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/free_anchor/hv_pointpillars_regnet-3.2gf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d/hv_pointpillars_regnet-3.2gf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d_20200629_055854.log.json)|\n|[RegNetX-3.2GF-FPN](./hv_pointpillars_regnet-3.2gf_fpn_sbn-all_free-anchor_strong-aug_4x8_3x_nus-3d.py)*|✓|3x|29.5||55.09|63.5|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/free_anchor/hv_pointpillars_regnet-3.2gf_fpn_sbn-all_free-anchor_strong-aug_4x8_3x_nus-3d/hv_pointpillars_regnet-3.2gf_fpn_sbn-all_free-anchor_strong-aug_4x8_3x_nus-3d_20200629_181452-297fdc66.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/free_anchor/hv_pointpillars_regnet-3.2gf_fpn_sbn-all_free-anchor_strong-aug_4x8_3x_nus-3d/hv_pointpillars_regnet-3.2gf_fpn_sbn-all_free-anchor_strong-aug_4x8_3x_nus-3d_20200629_181452.log.json)|\n\n**Note**: Models noted by `*` means it is trained using stronger augmentation with vertical flip under bird-eye-view, global translation, and larger range of global rotation.\n"
  },
  {
    "path": "configs/free_anchor/hv_pointpillars_fpn_sbn-all_free-anchor_4x8_2x_nus-3d.py",
    "content": "_base_ = [\n    '../_base_/models/hv_pointpillars_fpn_nus.py',\n    '../_base_/datasets/nus-3d.py', '../_base_/schedules/schedule_2x.py',\n    '../_base_/default_runtime.py'\n]\n\nmodel = dict(\n    pts_bbox_head=dict(\n        _delete_=True,\n        type='FreeAnchor3DHead',\n        num_classes=10,\n        in_channels=256,\n        feat_channels=256,\n        use_direction_classifier=True,\n        pre_anchor_topk=25,\n        bbox_thr=0.5,\n        gamma=2.0,\n        alpha=0.5,\n        anchor_generator=dict(\n            type='AlignedAnchor3DRangeGenerator',\n            ranges=[[-50, -50, -1.8, 50, 50, -1.8]],\n            scales=[1, 2, 4],\n            sizes=[\n                [0.8660, 2.5981, 1.],  # 1.5/sqrt(3)\n                [0.5774, 1.7321, 1.],  # 1/sqrt(3)\n                [1., 1., 1.],\n                [0.4, 0.4, 1],\n            ],\n            custom_values=[0, 0],\n            rotations=[0, 1.57],\n            reshape_out=True),\n        assigner_per_size=False,\n        diff_rad_by_sin=True,\n        dir_offset=0.7854,  # pi/4\n        dir_limit_offset=0,\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=0.8),\n        loss_dir=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),\n    # model training and testing settings\n    train_cfg=dict(\n        pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.25, 0.25])))\n"
  },
  {
    "path": "configs/free_anchor/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d.py",
    "content": "_base_ = './hv_pointpillars_fpn_sbn-all_free-anchor_4x8_2x_nus-3d.py'\n\nmodel = dict(\n    pretrained=dict(pts='open-mmlab://regnetx_1.6gf'),\n    pts_backbone=dict(\n        _delete_=True,\n        type='NoStemRegNet',\n        arch='regnetx_1.6gf',\n        out_indices=(1, 2, 3),\n        frozen_stages=-1,\n        strides=(1, 2, 2, 2),\n        base_channels=64,\n        stem_channels=64,\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        norm_eval=False,\n        style='pytorch'),\n    pts_neck=dict(in_channels=[168, 408, 912]))\n"
  },
  {
    "path": "configs/free_anchor/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_free-anchor_strong-aug_4x8_3x_nus-3d.py",
    "content": "_base_ = './hv_pointpillars_fpn_sbn-all_free-anchor_4x8_2x_nus-3d.py'\n\nmodel = dict(\n    pretrained=dict(pts='open-mmlab://regnetx_1.6gf'),\n    pts_backbone=dict(\n        _delete_=True,\n        type='NoStemRegNet',\n        arch='regnetx_1.6gf',\n        out_indices=(1, 2, 3),\n        frozen_stages=-1,\n        strides=(1, 2, 2, 2),\n        base_channels=64,\n        stem_channels=64,\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        norm_eval=False,\n        style='pytorch'),\n    pts_neck=dict(in_channels=[168, 408, 912]))\n\n# If point cloud range is changed, the models should also change their point\n# cloud range accordingly\npoint_cloud_range = [-50, -50, -5, 50, 50, 3]\n# For nuScenes we usually do 10-class detection\nclass_names = [\n    'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',\n    'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'\n]\n# file_client_args = dict(backend='disk')\n# Uncomment the following if use ceph or other file clients.\n# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient\n# for more details.\nfile_client_args = dict(\n    backend='petrel',\n    path_mapping=dict({\n        './data/nuscenes/': 's3://nuscenes/nuscenes/',\n        'data/nuscenes/': 's3://nuscenes/nuscenes/'\n    }))\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        file_client_args=file_client_args),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.7854, 0.7854],\n        scale_ratio_range=[0.95, 1.05],\n        translation_std=[0.2, 0.2, 0.2]),\n    dict(\n        type='RandomFlip3D',\n        flip_ratio_bev_horizontal=0.5,\n        flip_ratio_bev_vertical=0.5),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectNameFilter', classes=class_names),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ndata = dict(train=dict(pipeline=train_pipeline))\n\nlr_config = dict(step=[28, 34])\nevaluation = dict(interval=36)\ntotal_epochs = 36\n"
  },
  {
    "path": "configs/free_anchor/hv_pointpillars_regnet-3.2gf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d.py",
    "content": "_base_ = './hv_pointpillars_fpn_sbn-all_free-anchor_4x8_2x_nus-3d.py'\n\nmodel = dict(\n    pretrained=dict(pts='open-mmlab://regnetx_3.2gf'),\n    pts_backbone=dict(\n        _delete_=True,\n        type='NoStemRegNet',\n        arch='regnetx_3.2gf',\n        out_indices=(1, 2, 3),\n        frozen_stages=-1,\n        strides=(1, 2, 2, 2),\n        base_channels=64,\n        stem_channels=64,\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        norm_eval=False,\n        style='pytorch'),\n    pts_neck=dict(in_channels=[192, 432, 1008]))\n"
  },
  {
    "path": "configs/free_anchor/hv_pointpillars_regnet-3.2gf_fpn_sbn-all_free-anchor_strong-aug_4x8_3x_nus-3d.py",
    "content": "_base_ = './hv_pointpillars_fpn_sbn-all_free-anchor_4x8_2x_nus-3d.py'\n\nmodel = dict(\n    pretrained=dict(pts='open-mmlab://regnetx_3.2gf'),\n    pts_backbone=dict(\n        _delete_=True,\n        type='NoStemRegNet',\n        arch='regnetx_3.2gf',\n        out_indices=(1, 2, 3),\n        frozen_stages=-1,\n        strides=(1, 2, 2, 2),\n        base_channels=64,\n        stem_channels=64,\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        norm_eval=False,\n        style='pytorch'),\n    pts_neck=dict(in_channels=[192, 432, 1008]))\n\n# If point cloud range is changed, the models should also change their point\n# cloud range accordingly\npoint_cloud_range = [-50, -50, -5, 50, 50, 3]\n# For nuScenes we usually do 10-class detection\nclass_names = [\n    'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',\n    'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'\n]\n# file_client_args = dict(backend='disk')\n# Uncomment the following if use ceph or other file clients.\n# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient\n# for more details.\nfile_client_args = dict(\n    backend='petrel',\n    path_mapping=dict({\n        './data/nuscenes/': 's3://nuscenes/nuscenes/',\n        'data/nuscenes/': 's3://nuscenes/nuscenes/'\n    }))\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        file_client_args=file_client_args),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.7854, 0.7854],\n        scale_ratio_range=[0.9, 1.1],\n        translation_std=[0.2, 0.2, 0.2]),\n    dict(\n        type='RandomFlip3D',\n        flip_ratio_bev_horizontal=0.5,\n        flip_ratio_bev_vertical=0.5),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectNameFilter', classes=class_names),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\n\ndata = dict(train=dict(pipeline=train_pipeline))\nlr_config = dict(step=[28, 34])\nevaluation = dict(interval=36)\ntotal_epochs = 36\n"
  },
  {
    "path": "configs/free_anchor/hv_pointpillars_regnet-400mf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d.py",
    "content": "_base_ = './hv_pointpillars_fpn_sbn-all_free-anchor_4x8_2x_nus-3d.py'\n\nmodel = dict(\n    pretrained=dict(pts='open-mmlab://regnetx_400mf'),\n    pts_backbone=dict(\n        _delete_=True,\n        type='NoStemRegNet',\n        arch='regnetx_400mf',\n        out_indices=(1, 2, 3),\n        frozen_stages=-1,\n        strides=(1, 2, 2, 2),\n        base_channels=64,\n        stem_channels=64,\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        norm_eval=False,\n        style='pytorch'),\n    pts_neck=dict(in_channels=[64, 160, 384]))\n"
  },
  {
    "path": "configs/h3dnet/README.md",
    "content": "# H3DNet: 3D Object Detection Using Hybrid Geometric Primitives\n\n## Introduction\n\n[ALGORITHM]\n\nWe implement H3DNet and provide the result and checkpoints on ScanNet datasets.\n```\n@inproceedings{zhang2020h3dnet,\n    author = {Zhang, Zaiwei and Sun, Bo and Yang, Haitao and Huang, Qixing},\n    title = {H3DNet: 3D Object Detection Using Hybrid Geometric Primitives},\n    booktitle = {Proceedings of the European Conference on Computer Vision},\n    year = {2020}\n}\n```\n\n## Results\n\n### ScanNet\n|  Backbone   | Lr schd | Mem (GB) | Inf time (fps) | AP@0.25 |AP@0.5| Download |\n| :---------: | :-----: | :------: | :------------: | :----: |:----: | :------: |\n|    [MultiBackbone](./h3dnet_3x8_scannet-3d-18class.py)     |  3x    |7.9||66.43|48.01|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/h3dnet/h3dnet_scannet-3d-18class/h3dnet_scannet-3d-18class_20200830_000136-02e36246.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/h3dnet/h3dnet_scannet-3d-18class/h3dnet_scannet-3d-18class_20200830_000136.log.json) |\n"
  },
  {
    "path": "configs/h3dnet/h3dnet_3x8_scannet-3d-18class.py",
    "content": "_base_ = [\n    '../_base_/datasets/scannet-3d-18class.py', '../_base_/models/h3dnet.py',\n    '../_base_/schedules/schedule_3x.py', '../_base_/default_runtime.py'\n]\n\n# model settings\nmodel = dict(\n    rpn_head=dict(\n        num_classes=18,\n        bbox_coder=dict(\n            type='PartialBinBasedBBoxCoder',\n            num_sizes=18,\n            num_dir_bins=24,\n            with_rot=False,\n            mean_sizes=[[0.76966727, 0.8116021, 0.92573744],\n                        [1.876858, 1.8425595, 1.1931566],\n                        [0.61328, 0.6148609, 0.7182701],\n                        [1.3955007, 1.5121545, 0.83443564],\n                        [0.97949594, 1.0675149, 0.6329687],\n                        [0.531663, 0.5955577, 1.7500148],\n                        [0.9624706, 0.72462326, 1.1481868],\n                        [0.83221924, 1.0490936, 1.6875663],\n                        [0.21132214, 0.4206159, 0.5372846],\n                        [1.4440073, 1.8970833, 0.26985747],\n                        [1.0294262, 1.4040797, 0.87554324],\n                        [1.3766412, 0.65521795, 1.6813129],\n                        [0.6650819, 0.71111923, 1.298853],\n                        [0.41999173, 0.37906948, 1.7513971],\n                        [0.59359556, 0.5912492, 0.73919016],\n                        [0.50867593, 0.50656086, 0.30136237],\n                        [1.1511526, 1.0546296, 0.49706793],\n                        [0.47535285, 0.49249494, 0.5802117]])),\n    roi_head=dict(\n        bbox_head=dict(\n            num_classes=18,\n            bbox_coder=dict(\n                type='PartialBinBasedBBoxCoder',\n                num_sizes=18,\n                num_dir_bins=24,\n                with_rot=False,\n                mean_sizes=[[0.76966727, 0.8116021, 0.92573744],\n                            [1.876858, 1.8425595, 1.1931566],\n                            [0.61328, 0.6148609, 0.7182701],\n                            [1.3955007, 1.5121545, 0.83443564],\n                            [0.97949594, 1.0675149, 0.6329687],\n                            [0.531663, 0.5955577, 1.7500148],\n                            [0.9624706, 0.72462326, 1.1481868],\n                            [0.83221924, 1.0490936, 1.6875663],\n                            [0.21132214, 0.4206159, 0.5372846],\n                            [1.4440073, 1.8970833, 0.26985747],\n                            [1.0294262, 1.4040797, 0.87554324],\n                            [1.3766412, 0.65521795, 1.6813129],\n                            [0.6650819, 0.71111923, 1.298853],\n                            [0.41999173, 0.37906948, 1.7513971],\n                            [0.59359556, 0.5912492, 0.73919016],\n                            [0.50867593, 0.50656086, 0.30136237],\n                            [1.1511526, 1.0546296, 0.49706793],\n                            [0.47535285, 0.49249494, 0.5802117]]))))\n\ndata = dict(samples_per_gpu=3, workers_per_gpu=2)\n\n# optimizer\n# yapf:disable\nlog_config = dict(\n    interval=30,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n"
  },
  {
    "path": "configs/imvotenet/README.md",
    "content": "# ImVoteNet: Boosting 3D Object Detection in Point Clouds with Image Votes\n\n## Introduction\n\n[ALGORITHM]\n\nWe implement ImVoteNet and provide the result and checkpoints on SUNRGBD.\n\n```\n@inproceedings{qi2020imvotenet,\n  title={Imvotenet: Boosting 3D object detection in point clouds with image votes},\n  author={Qi, Charles R and Chen, Xinlei and Litany, Or and Guibas, Leonidas J},\n  booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition},\n  pages={4404--4413},\n  year={2020}\n}\n```\n\n## Results\n\n### SUNRGBD-2D (Stage 1, image branch pre-train)\n\n|  Backbone   | Lr schd | Mem (GB) | Inf time (fps) | AP@0.25 |AP@0.5| Download |\n| :---------: | :-----: | :------: | :------------: | :----: |:----: | :------: |\n|    [PointNet++](./imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class.py)     |   |2.1| ||62.70|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/imvotenet/imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class/imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class_20210323_173222-cad62aeb.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/imvotenet/imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class/imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class_20210323_173222.log.json)|\n\n### SUNRGBD-3D (Stage 2)\n\n|  Backbone   | Lr schd | Mem (GB) | Inf time (fps) | AP@0.25 |AP@0.5| Download |\n| :---------: | :-----: | :------: | :------------: | :----: |:----: | :------: |\n|    [PointNet++](./imvotenet_stage2_16x8_sunrgbd-3d-10class.py)     |  3x    |9.4| |64.04||[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/imvotenet/imvotenet_stage2_16x8_sunrgbd-3d-10class/imvotenet_stage2_16x8_sunrgbd-3d-10class_20210323_184021-d44dcb66.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/imvotenet/imvotenet_stage2_16x8_sunrgbd-3d-10class/imvotenet_stage2_16x8_sunrgbd-3d-10class_20210323_184021.log.json)|\n"
  },
  {
    "path": "configs/imvotenet/imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class.py",
    "content": "_base_ = [\n    '../_base_/datasets/sunrgbd-3d-10class.py', '../_base_/default_runtime.py',\n    '../_base_/models/imvotenet_image.py'\n]\n\n# use caffe img_norm\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\n\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 480), (1333, 504), (1333, 528), (1333, 552),\n                   (1333, 576), (1333, 600)],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 600),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\n\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(times=1, dataset=dict(pipeline=train_pipeline)),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=None)\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=0.001,\n    step=[6])\ntotal_epochs = 8\n\nload_from = 'http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth'  # noqa\n"
  },
  {
    "path": "configs/imvotenet/imvotenet_stage2_16x8_sunrgbd-3d-10class.py",
    "content": "_base_ = [\n    '../_base_/datasets/sunrgbd-3d-10class.py',\n    '../_base_/schedules/schedule_3x.py', '../_base_/default_runtime.py',\n    '../_base_/models/imvotenet_image.py'\n]\n\nclass_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',\n               'night_stand', 'bookshelf', 'bathtub')\n\n# use caffe img_norm\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\n\nmodel = dict(\n    pts_backbone=dict(\n        type='PointNet2SASSG',\n        in_channels=4,\n        num_points=(2048, 1024, 512, 256),\n        radius=(0.2, 0.4, 0.8, 1.2),\n        num_samples=(64, 32, 16, 16),\n        sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),\n                     (128, 128, 256)),\n        fp_channels=((256, 256), (256, 256)),\n        norm_cfg=dict(type='BN2d'),\n        sa_cfg=dict(\n            type='PointSAModule',\n            pool_mod='max',\n            use_xyz=True,\n            normalize_xyz=True)),\n    pts_bbox_heads=dict(\n        common=dict(\n            type='VoteHead',\n            num_classes=10,\n            bbox_coder=dict(\n                type='PartialBinBasedBBoxCoder',\n                num_sizes=10,\n                num_dir_bins=12,\n                with_rot=True,\n                mean_sizes=[[2.114256, 1.620300, 0.927272],\n                            [0.791118, 1.279516, 0.718182],\n                            [0.923508, 1.867419, 0.845495],\n                            [0.591958, 0.552978, 0.827272],\n                            [0.699104, 0.454178, 0.75625],\n                            [0.69519, 1.346299, 0.736364],\n                            [0.528526, 1.002642, 1.172878],\n                            [0.500618, 0.632163, 0.683424],\n                            [0.404671, 1.071108, 1.688889],\n                            [0.76584, 1.398258, 0.472728]]),\n            pred_layer_cfg=dict(\n                in_channels=128, shared_conv_channels=(128, 128), bias=True),\n            conv_cfg=dict(type='Conv1d'),\n            norm_cfg=dict(type='BN1d'),\n            objectness_loss=dict(\n                type='CrossEntropyLoss',\n                class_weight=[0.2, 0.8],\n                reduction='sum',\n                loss_weight=5.0),\n            center_loss=dict(\n                type='ChamferDistance',\n                mode='l2',\n                reduction='sum',\n                loss_src_weight=10.0,\n                loss_dst_weight=10.0),\n            dir_class_loss=dict(\n                type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),\n            dir_res_loss=dict(\n                type='SmoothL1Loss', reduction='sum', loss_weight=10.0),\n            size_class_loss=dict(\n                type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),\n            size_res_loss=dict(\n                type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0),\n            semantic_loss=dict(\n                type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),\n        joint=dict(\n            vote_module_cfg=dict(\n                in_channels=512,\n                vote_per_seed=1,\n                gt_per_seed=3,\n                conv_channels=(512, 256),\n                conv_cfg=dict(type='Conv1d'),\n                norm_cfg=dict(type='BN1d'),\n                norm_feats=True,\n                vote_loss=dict(\n                    type='ChamferDistance',\n                    mode='l1',\n                    reduction='none',\n                    loss_dst_weight=10.0)),\n            vote_aggregation_cfg=dict(\n                type='PointSAModule',\n                num_point=256,\n                radius=0.3,\n                num_sample=16,\n                mlp_channels=[512, 128, 128, 128],\n                use_xyz=True,\n                normalize_xyz=True)),\n        pts=dict(\n            vote_module_cfg=dict(\n                in_channels=256,\n                vote_per_seed=1,\n                gt_per_seed=3,\n                conv_channels=(256, 256),\n                conv_cfg=dict(type='Conv1d'),\n                norm_cfg=dict(type='BN1d'),\n                norm_feats=True,\n                vote_loss=dict(\n                    type='ChamferDistance',\n                    mode='l1',\n                    reduction='none',\n                    loss_dst_weight=10.0)),\n            vote_aggregation_cfg=dict(\n                type='PointSAModule',\n                num_point=256,\n                radius=0.3,\n                num_sample=16,\n                mlp_channels=[256, 128, 128, 128],\n                use_xyz=True,\n                normalize_xyz=True)),\n        img=dict(\n            vote_module_cfg=dict(\n                in_channels=256,\n                vote_per_seed=1,\n                gt_per_seed=3,\n                conv_channels=(256, 256),\n                conv_cfg=dict(type='Conv1d'),\n                norm_cfg=dict(type='BN1d'),\n                norm_feats=True,\n                vote_loss=dict(\n                    type='ChamferDistance',\n                    mode='l1',\n                    reduction='none',\n                    loss_dst_weight=10.0)),\n            vote_aggregation_cfg=dict(\n                type='PointSAModule',\n                num_point=256,\n                radius=0.3,\n                num_sample=16,\n                mlp_channels=[256, 128, 128, 128],\n                use_xyz=True,\n                normalize_xyz=True)),\n        loss_weights=[0.4, 0.3, 0.3]),\n    img_mlp=dict(\n        in_channel=18,\n        conv_channels=(256, 256),\n        conv_cfg=dict(type='Conv1d'),\n        norm_cfg=dict(type='BN1d'),\n        act_cfg=dict(type='ReLU')),\n    fusion_layer=dict(\n        type='VoteFusion',\n        num_classes=len(class_names),\n        max_imvote_per_pixel=3),\n    num_sampled_seed=1024,\n    freeze_img_branch=True,\n\n    # model training and testing settings\n    train_cfg=dict(\n        pts=dict(\n            pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote')),\n    test_cfg=dict(\n        img_rcnn=dict(score_thr=0.1),\n        pts=dict(\n            sample_mod='seed',\n            nms_thr=0.25,\n            score_thr=0.05,\n            per_class_proposal=True)))\n\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='DEPTH',\n        shift_height=True,\n        load_dim=6,\n        use_dim=[0, 1, 2]),\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations3D'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(type='Resize', img_scale=(1333, 600), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.0),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(\n        type='RandomFlip3D',\n        sync_2d=False,\n        flip_ratio_bev_horizontal=0.5,\n    ),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.523599, 0.523599],\n        scale_ratio_range=[0.85, 1.15],\n        shift_height=True),\n    dict(type='IndoorPointSample', num_points=20000),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(\n        type='Collect3D',\n        keys=[\n            'img', 'gt_bboxes', 'gt_labels', 'points', 'gt_bboxes_3d',\n            'gt_labels_3d', 'calib'\n        ])\n]\n\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='DEPTH',\n        shift_height=True,\n        load_dim=6,\n        use_dim=[0, 1, 2]),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 600),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip', flip_ratio=0.0),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(\n                type='RandomFlip3D',\n                sync_2d=False,\n                flip_ratio_bev_horizontal=0.5,\n            ),\n            dict(type='IndoorPointSample', num_points=20000),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['img', 'points', 'calib'])\n        ]),\n]\n\ndata = dict(\n    train=dict(dataset=dict(pipeline=train_pipeline)),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n\n# may also use your own pre-trained image branch\nload_from = 'https://download.openmmlab.com/mmdetection3d/v0.1.0_models/imvotenet/imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class/imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class_20210323_173222-cad62aeb.pth'  # noqa\n"
  },
  {
    "path": "configs/mvxnet/README.md",
    "content": "# MVX-Net: Multimodal VoxelNet for 3D Object Detection\n\n## Introduction\n\n[ALGORITHM]\n\nWe implement MVX-Net and provide its results and models on KITTI dataset.\n```\n@inproceedings{sindagi2019mvx,\n  title={MVX-Net: Multimodal voxelnet for 3D object detection},\n  author={Sindagi, Vishwanath A and Zhou, Yin and Tuzel, Oncel},\n  booktitle={2019 International Conference on Robotics and Automation (ICRA)},\n  pages={7276--7282},\n  year={2019},\n  organization={IEEE}\n}\n\n```\n\n## Results\n\n### KITTI\n\n|  Backbone   |Class| Lr schd | Mem (GB) | Inf time (fps) | mAP | Download |\n| :---------: | :-----: | :------: | :------------: | :----: |:----: | :------: |\n|    [SECFPN](./dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class.py)|3 Class|cosine 80e|6.7||63.0|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/mvxnet/dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class/dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class_20200621_003904-10140f2d.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/mvxnet/dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class/dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class_20200621_003904.log.json)|\n"
  },
  {
    "path": "configs/mvxnet/dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class.py",
    "content": "# model settings\nvoxel_size = [0.05, 0.05, 0.1]\npoint_cloud_range = [0, -40, -3, 70.4, 40, 1]\n\nmodel = dict(\n    type='DynamicMVXFasterRCNN',\n    img_backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=False),\n        norm_eval=True,\n        style='caffe'),\n    img_neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    pts_voxel_layer=dict(\n        max_num_points=-1,\n        point_cloud_range=point_cloud_range,\n        voxel_size=voxel_size,\n        max_voxels=(-1, -1),\n    ),\n    pts_voxel_encoder=dict(\n        type='DynamicVFE',\n        in_channels=4,\n        feat_channels=[64, 64],\n        with_distance=False,\n        voxel_size=voxel_size,\n        with_cluster_center=True,\n        with_voxel_center=True,\n        point_cloud_range=point_cloud_range,\n        fusion_layer=dict(\n            type='PointFusion',\n            img_channels=256,\n            pts_channels=64,\n            mid_channels=128,\n            out_channels=128,\n            img_levels=[0, 1, 2, 3, 4],\n            align_corners=False,\n            activate_out=True,\n            fuse_out=False)),\n    pts_middle_encoder=dict(\n        type='SparseEncoder',\n        in_channels=128,\n        sparse_shape=[41, 1600, 1408],\n        order=('conv', 'norm', 'act')),\n    pts_backbone=dict(\n        type='SECOND',\n        in_channels=256,\n        layer_nums=[5, 5],\n        layer_strides=[1, 2],\n        out_channels=[128, 256]),\n    pts_neck=dict(\n        type='SECONDFPN',\n        in_channels=[128, 256],\n        upsample_strides=[1, 2],\n        out_channels=[256, 256]),\n    pts_bbox_head=dict(\n        type='Anchor3DHead',\n        num_classes=3,\n        in_channels=512,\n        feat_channels=512,\n        use_direction_classifier=True,\n        anchor_generator=dict(\n            type='Anchor3DRangeGenerator',\n            ranges=[\n                [0, -40.0, -0.6, 70.4, 40.0, -0.6],\n                [0, -40.0, -0.6, 70.4, 40.0, -0.6],\n                [0, -40.0, -1.78, 70.4, 40.0, -1.78],\n            ],\n            sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],\n            rotations=[0, 1.57],\n            reshape_out=False),\n        assigner_per_size=True,\n        diff_rad_by_sin=True,\n        assign_per_class=True,\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),\n        loss_dir=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),\n    # model training and testing settings\n    train_cfg=dict(\n        pts=dict(\n            assigner=[\n                dict(  # for Pedestrian\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.35,\n                    neg_iou_thr=0.2,\n                    min_pos_iou=0.2,\n                    ignore_iof_thr=-1),\n                dict(  # for Cyclist\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.35,\n                    neg_iou_thr=0.2,\n                    min_pos_iou=0.2,\n                    ignore_iof_thr=-1),\n                dict(  # for Car\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.6,\n                    neg_iou_thr=0.45,\n                    min_pos_iou=0.45,\n                    ignore_iof_thr=-1),\n            ],\n            allowed_border=0,\n            pos_weight=-1,\n            debug=False)),\n    test_cfg=dict(\n        pts=dict(\n            use_rotate_nms=True,\n            nms_across_levels=False,\n            nms_thr=0.01,\n            score_thr=0.1,\n            min_bbox_size=0,\n            nms_pre=100,\n            max_num=50)))\n\n# dataset settings\ndataset_type = 'KittiDataset'\ndata_root = 'data/kitti/'\nclass_names = ['Pedestrian', 'Cyclist', 'Car']\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ninput_modality = dict(use_lidar=True, use_camera=True)\ntrain_pipeline = [\n    dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    dict(\n        type='Resize',\n        img_scale=[(640, 192), (2560, 768)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.78539816, 0.78539816],\n        scale_ratio_range=[0.95, 1.05],\n        translation_std=[0.2, 0.2, 0.2]),\n    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='PointShuffle'),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(\n        type='Collect3D',\n        keys=['points', 'img', 'gt_bboxes_3d', 'gt_labels_3d']),\n]\ntest_pipeline = [\n    dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1280, 384),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(type='Resize', multiscale_mode='value', keep_ratio=True),\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points', 'img'])\n        ])\n]\n\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type='RepeatDataset',\n        times=2,\n        dataset=dict(\n            type=dataset_type,\n            data_root=data_root,\n            ann_file=data_root + 'kitti_infos_train.pkl',\n            split='training',\n            pts_prefix='velodyne_reduced',\n            pipeline=train_pipeline,\n            modality=input_modality,\n            classes=class_names,\n            test_mode=False)),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'kitti_infos_val.pkl',\n        split='training',\n        pts_prefix='velodyne_reduced',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'kitti_infos_val.pkl',\n        split='training',\n        pts_prefix='velodyne_reduced',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True))\n# Training settings\noptimizer = dict(type='AdamW', lr=0.003, betas=(0.95, 0.99), weight_decay=0.01)\n# max_norm=10 is better for SECOND\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\nlr_config = dict(\n    policy='CosineAnnealing',\n    warmup='linear',\n    warmup_iters=1000,\n    warmup_ratio=1.0 / 10,\n    min_lr_ratio=1e-5)\nmomentum_config = None\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\nevaluation = dict(interval=1)\n# runtime settings\ntotal_epochs = 40\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = None\n# You may need to download the model first is the network is unstable\nload_from = 'https://download.openmmlab.com/mmdetection3d/pretrain_models/mvx_faster_rcnn_detectron2-caffe_20e_coco-pretrain_gt-sample_kitti-3-class_moderate-79.3_20200207-a4a6a3c7.pth'  # noqa\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "configs/nuimages/README.md",
    "content": "# NuImages Results\n\n## Introduction\n\n[DATASET]\n\nWe support and provide some baseline results on [nuImages dataset](https://www.nuscenes.org/nuimages).\nWe follow the class mapping in nuScenes dataset, which maps the original categories into 10 foreground categories.\nThe convert script can be found [here](https://github.com/open-mmlab/mmdetection3d/blob/master/tools/data_converter/nuimage_converter.py).\nThe baseline results include instance segmentation models, e.g., Mask R-CNN, Cascade Mask R-CNN, and HTC.\nWe will support panoptic segmentation models in the future.\n\n![demo image](../../resources/nuimages_demo.gif)\n\nThe dataset converted by the script of v0.6.0 only supports instance segmentation. Since v0.7.0, we also support to produce semantic segmentation mask of each image; thus, we can train HTC or semantic segmentation models using the dataset. To convert the nuImages dataset into COCO format, please use the command below:\n\n```shell\npython -u tools/data_converter/nuimage_converter.py --data-root ${DATA_ROOT} --version ${VERIONS} \\\n                                                    --out-dir ${OUT_DIR} --nproc ${NUM_WORKERS} --extra-tag ${TAG}\n```\n\n- `--data-root`: the root of the dataset, defaults to `./data/nuimages`.\n- `--version`: the version of the dataset, defaults to `v1.0-mini`. To get the full dataset, please use `--version v1.0-train v1.0-val v1.0-mini`\n- `--out-dir`: the output directory of annotations and semantic masks, defaults to `./data/nuimages/annotations/`.\n- `--nproc`: number of workers for data preparation, defaults to `4`. Larger number could reduce the preparation time as images are processed in parallel.\n- `--extra-tag`: extra tag of the annotations, defaults to `nuimages`. This can be used to separate different annotations processed in different time for study.\n\n## Results\n\n### Instance Segmentation\n\nWe report Mask R-CNN and Cascade Mask R-CNN results on nuimages.\n\n|Method | Backbone|Pretraining | Lr schd | Mem (GB) | Box AP  | Mask AP  |Download |\n| :---------: |:---------: | :---------: | :-----: |:-----: | :------: | :------------: | :----: |\n| Mask R-CNN| [R-50](./mask_rcnn_r50_fpn_1x_nuim.py) |IN|1x|7.4|47.8 |38.4|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/mask_rcnn_r50_fpn_1x_nuim/mask_rcnn_r50_fpn_1x_nuim_20201008_195238-e99f5182.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/mask_rcnn_r50_fpn_1x_nuim/mask_rcnn_r50_fpn_1x_nuim_20201008_195238.log.json)|\n| Mask R-CNN| [R-50](./mask_rcnn_r50_fpn_coco-2x_1x_nuim.py) |IN+COCO-2x|1x|7.4|49.7|40.5|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/mask_rcnn_r50_fpn_coco-2x_1x_nuim/mask_rcnn_r50_fpn_coco-2x_1x_nuim_20201008_195238-b1742a60.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/mask_rcnn_r50_fpn_coco-2x_1x_nuim/mask_rcnn_r50_fpn_coco-2x_1x_nuim_20201008_195238.log.json)|\n| Mask R-CNN| [R-50-CAFFE](./mask_rcnn_r50_caffe_fpn_1x_nuim.py) |IN|1x|7.0|47.7|38.2|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/mask_rcnn_r50_caffe_fpn_1x_nuim/) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/mask_rcnn_r50_caffe_fpn_1x_nuim/)|\n| Mask R-CNN| [R-50-CAFFE](./mask_rcnn_r50_caffe_fpn_coco-3x_1x_nuim.py) |IN+COCO-3x|1x|7.0|49.9|40.8|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/mask_rcnn_r50_caffe_fpn_coco-3x_1x_nuim/mask_rcnn_r50_caffe_fpn_coco-3x_1x_nuim_20201008_195305-661a992e.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/mask_rcnn_r50_caffe_fpn_coco-3x_1x_nuim/mask_rcnn_r50_caffe_fpn_coco-3x_1x_nuim_20201008_195305.log.json)|\n| Mask R-CNN| [R-50-CAFFE](./mask_rcnn_r50_caffe_fpn_coco-3x_1x_nuim.py) |IN+COCO-3x|20e|7.0|50.6|41.3|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/mask_rcnn_r50_caffe_fpn_coco-3x_20e_nuim/mask_rcnn_r50_caffe_fpn_coco-3x_20e_nuim_20201009_125002-5529442c.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/mask_rcnn_r50_caffe_fpn_coco-3x_20e_nuim/mask_rcnn_r50_caffe_fpn_coco-3x_20e_nuim_20201009_125002.log.json)|\n| Mask R-CNN| [R-101](./mask_rcnn_r101_fpn_1x_nuim.py) |IN|1x|10.9|48.9|39.1|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/mask_rcnn_r101_fpn_1x_nuim/mask_rcnn_r101_fpn_1x_nuim_20201024_134803-65c7623a.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/mask_rcnn_r101_fpn_1x_nuim/mask_rcnn_r101_fpn_1x_nuim_20201024_134803.log.json)|\n| Mask R-CNN| [X-101_32x4d](./mask_rcnn_x101_32x4d_fpn_1x_nuim.py) |IN|1x|13.3|50.4|40.5|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/mask_rcnn_x101_32x4d_fpn_1x_nuim/mask_rcnn_x101_32x4d_fpn_1x_nuim_20201024_135741-b699ab37.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/mask_rcnn_x101_32x4d_fpn_1x_nuim/mask_rcnn_x101_32x4d_fpn_1x_nuim_20201024_135741.log.json)|\n| Cascade Mask R-CNN| [R-50](./cascade_mask_rcnn_r50_fpn_1x_nuim.py) |IN|1x|8.9|50.8|40.4|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/cascade_mask_rcnn_r50_fpn_1x_nuim/cascade_mask_rcnn_r50_fpn_1x_nuim_20201008_195342-1147c036.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/cascade_mask_rcnn_r50_fpn_1x_nuim/cascade_mask_rcnn_r50_fpn_1x_nuim_20201008_195342.log.json)|\n| Cascade Mask R-CNN| [R-50](./cascade_mask_rcnn_r50_fpn_coco-20e_1x_nuim.py) |IN+COCO-20e|1x|8.9|52.8|42.2|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/cascade_mask_rcnn_r50_fpn_coco-20e_1x_nuim/cascade_mask_rcnn_r50_fpn_coco-20e_1x_nuim_20201009_124158-ad0540e3.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/cascade_mask_rcnn_r50_fpn_coco-20e_1x_nuim/cascade_mask_rcnn_r50_fpn_coco-20e_1x_nuim_20201009_124158.log.json)|\n| Cascade Mask R-CNN| [R-50](./cascade_mask_rcnn_r50_fpn_coco-20e_20e_nuim.py) |IN+COCO-20e|20e|8.9|52.8|42.2|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/cascade_mask_rcnn_r50_fpn_coco-20e_20e_nuim/cascade_mask_rcnn_r50_fpn_coco-20e_20e_nuim_20201009_124951-40963960.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/cascade_mask_rcnn_r50_fpn_coco-20e_20e_nuim/cascade_mask_rcnn_r50_fpn_coco-20e_20e_nuim_20201009_124951.log.json)|\n| Cascade Mask R-CNN| [R-101](./cascade_mask_rcnn_r101_fpn_1x_nuim.py) |IN|1x|12.5|51.5|40.7|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/cascade_mask_rcnn_r101_fpn_1x_nuim/cascade_mask_rcnn_r101_fpn_1x_nuim_20201024_134804-45215b1e.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/cascade_mask_rcnn_r101_fpn_1x_nuim/cascade_mask_rcnn_r101_fpn_1x_nuim_20201024_134804.log.json)|\n| Cascade Mask R-CNN| [X-101_32x4d](./cascade_mask_rcnn_x101_32x4d_fpn_1x_nuim.py) |IN|1x|14.9|52.8|41.6|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/cascade_mask_rcnn_x101_32x4d_fpn_1x_nuim/cascade_mask_rcnn_x101_32x4d_fpn_1x_nuim_20201024_135753-e0e49778.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/cascade_mask_rcnn_x101_32x4d_fpn_1x_nuim/cascade_mask_rcnn_x101_32x4d_fpn_1x_nuim_20201024_135753.log.json)|\n| HTC w/o semantic|[R-50](./htc_without_semantic_r50_fpn_1x_nuim.py) |IN|1x||[model]() &#124; [log]()|\n| HTC|[R-50](./htc_r50_fpn_1x_nuim.py) |IN|1x||[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/)|\n| HTC|[R-50](./htc_r50_fpn_coco-20e_1x_nuim.py) |IN+COCO-20e|1x|11.6|53.8|43.8|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/htc_r50_fpn_coco-20e_1x_nuim/htc_r50_fpn_coco-20e_1x_nuim_20201010_070203-0b53a65e.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/htc_r50_fpn_coco-20e_1x_nuim/htc_r50_fpn_coco-20e_1x_nuim_20201010_070203.log.json)|\n| HTC|[R-50](./htc_r50_fpn_coco-20e_20e_nuim.py) |IN+COCO-20e|20e|11.6|54.8|44.4|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/htc_r50_fpn_coco-20e_20e_nuim/htc_r50_fpn_coco-20e_20e_nuim_20201008_211415-d6c60a2c.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/htc_r50_fpn_coco-20e_20e_nuim/htc_r50_fpn_coco-20e_20e_nuim_20201008_211415.log.json)|\n| HTC|[X-101_64x4d + DCN_c3-c5](./htc_x101_64x4d_fpn_dconv_c3-c5_coco-20e_16x1_20e_nuim.py) |IN+COCO-20e|20e|13.3|57.3|46.4|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/htc_x101_64x4d_fpn_dconv_c3-c5_coco-20e_16x1_20e_nuim/htc_x101_64x4d_fpn_dconv_c3-c5_coco-20e_16x1_20e_nuim_20201008_211222-0b16ac4b.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/nuimages_semseg/htc_x101_64x4d_fpn_dconv_c3-c5_coco-20e_16x1_20e_nuim/htc_x101_64x4d_fpn_dconv_c3-c5_coco-20e_16x1_20e_nuim_20201008_211222.log.json)|\n\n**Note**:\n1. `IN` means only using ImageNet pre-trained backbone. `IN+COCO-Nx` and `IN+COCO-Ne` means the backbone is first pre-trained on ImageNet, and then the detector is pre-trained on COCO train2017 dataset by `Nx` and `N` epochs schedules, respectively.\n2. All the training hyper-parameters follow the standard schedules on COCO dataset except that the images are resized from\n1280 x 720 to 1920 x 1080 (relative ratio 0.8 to 1.2) since the images are in size 1600 x 900.\n3. The class order in the detectors released in v0.6.0 is different from the order in the configs because the bug in the convertion script. This bug has been fixed since v0.7.0 and the models trained by the correct class order are also released. If you used nuImages since v0.6.0, please re-convert the data through the convertion script using the above-mentioned command.\n"
  },
  {
    "path": "configs/nuimages/cascade_mask_rcnn_r101_fpn_1x_nuim.py",
    "content": "_base_ = './cascade_mask_rcnn_r50_fpn_1x_nuim.py'\nmodel = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101))\n"
  },
  {
    "path": "configs/nuimages/cascade_mask_rcnn_r50_fpn_1x_nuim.py",
    "content": "_base_ = [\n    '../_base_/models/cascade_mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/nuim_instance.py',\n    '../_base_/schedules/mmdet_schedule_1x.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    roi_head=dict(\n        bbox_head=[\n            dict(\n                type='Shared2FCBBoxHead',\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=10,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.1, 0.1, 0.2, 0.2]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,\n                               loss_weight=1.0)),\n            dict(\n                type='Shared2FCBBoxHead',\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=10,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.05, 0.05, 0.1, 0.1]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,\n                               loss_weight=1.0)),\n            dict(\n                type='Shared2FCBBoxHead',\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=10,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.033, 0.033, 0.067, 0.067]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))\n        ],\n        mask_head=dict(num_classes=10)))\n"
  },
  {
    "path": "configs/nuimages/cascade_mask_rcnn_r50_fpn_coco-20e_1x_nuim.py",
    "content": "_base_ = './cascade_mask_rcnn_r50_fpn_1x_nuim.py'\n\nload_from = 'http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco/cascade_mask_rcnn_r50_fpn_20e_coco_bbox_mAP-0.419__segm_mAP-0.365_20200504_174711-4af8e66e.pth'  # noqa\n"
  },
  {
    "path": "configs/nuimages/cascade_mask_rcnn_r50_fpn_coco-20e_20e_nuim.py",
    "content": "_base_ = './cascade_mask_rcnn_r50_fpn_1x_nuim.py'\n\n# learning policy\nlr_config = dict(step=[16, 19])\ntotal_epochs = 20\n\nload_from = 'http://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco/cascade_mask_rcnn_r50_fpn_20e_coco_bbox_mAP-0.419__segm_mAP-0.365_20200504_174711-4af8e66e.pth'  # noqa\n"
  },
  {
    "path": "configs/nuimages/cascade_mask_rcnn_x101_32x4d_fpn_1x_nuim.py",
    "content": "_base_ = './cascade_mask_rcnn_r50_fpn_1x_nuim.py'\nmodel = dict(\n    pretrained='open-mmlab://resnext101_32x4d',\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch'))\n"
  },
  {
    "path": "configs/nuimages/htc_r50_fpn_1x_nuim.py",
    "content": "_base_ = './htc_without_semantic_r50_fpn_1x_nuim.py'\nmodel = dict(\n    roi_head=dict(\n        semantic_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[8]),\n        semantic_head=dict(\n            type='FusedSemanticHead',\n            num_ins=5,\n            fusion_level=1,\n            num_convs=4,\n            in_channels=256,\n            conv_out_channels=256,\n            num_classes=32,\n            ignore_label=0,\n            loss_weight=0.2)))\n\ndata_root = 'data/nuimages/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True),\n    dict(\n        type='Resize',\n        img_scale=[(1280, 720), (1920, 1080)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='SegRescale', scale_factor=1 / 8),\n    dict(type='DefaultFormatBundle'),\n    dict(\n        type='Collect',\n        keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg'])\n]\ndata = dict(\n    train=dict(\n        seg_prefix=data_root + 'annotations/semantic_masks/',\n        pipeline=train_pipeline))\n"
  },
  {
    "path": "configs/nuimages/htc_r50_fpn_coco-20e_1x_nuim.py",
    "content": "_base_ = './htc_r50_fpn_1x_nuim.py'\n\nload_from = 'http://download.openmmlab.com/mmdetection/v2.0/htc/htc_r50_fpn_20e_coco/htc_r50_fpn_20e_coco_20200319-fe28c577.pth'  # noqa\n"
  },
  {
    "path": "configs/nuimages/htc_r50_fpn_coco-20e_20e_nuim.py",
    "content": "_base_ = './htc_r50_fpn_coco-20e_1x_nuim.py'\n# learning policy\nlr_config = dict(step=[16, 19])\ntotal_epochs = 20\n"
  },
  {
    "path": "configs/nuimages/htc_without_semantic_r50_fpn_1x_nuim.py",
    "content": "_base_ = [\n    '../_base_/datasets/nuim_instance.py',\n    '../_base_/schedules/mmdet_schedule_1x.py', '../_base_/default_runtime.py'\n]\n# model settings\nmodel = dict(\n    type='HybridTaskCascade',\n    pretrained='torchvision://resnet50',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            scales=[8],\n            ratios=[0.5, 1.0, 2.0],\n            strides=[4, 8, 16, 32, 64]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1.0, 1.0, 1.0, 1.0]),\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),\n    roi_head=dict(\n        type='HybridTaskCascadeRoIHead',\n        interleaved=True,\n        mask_info_flow=True,\n        num_stages=3,\n        stage_loss_weights=[1, 0.5, 0.25],\n        bbox_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        bbox_head=[\n            dict(\n                type='Shared2FCBBoxHead',\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=10,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.1, 0.1, 0.2, 0.2]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,\n                               loss_weight=1.0)),\n            dict(\n                type='Shared2FCBBoxHead',\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=10,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.05, 0.05, 0.1, 0.1]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,\n                               loss_weight=1.0)),\n            dict(\n                type='Shared2FCBBoxHead',\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=10,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.033, 0.033, 0.067, 0.067]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))\n        ],\n        mask_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        mask_head=[\n            dict(\n                type='HTCMaskHead',\n                with_conv_res=False,\n                num_convs=4,\n                in_channels=256,\n                conv_out_channels=256,\n                num_classes=10,\n                loss_mask=dict(\n                    type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)),\n            dict(\n                type='HTCMaskHead',\n                num_convs=4,\n                in_channels=256,\n                conv_out_channels=256,\n                num_classes=10,\n                loss_mask=dict(\n                    type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)),\n            dict(\n                type='HTCMaskHead',\n                num_convs=4,\n                in_channels=256,\n                conv_out_channels=256,\n                num_classes=10,\n                loss_mask=dict(\n                    type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))\n        ]),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=256,\n                pos_fraction=0.5,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False),\n            allowed_border=0,\n            pos_weight=-1,\n            debug=False),\n        rpn_proposal=dict(\n            nms_across_levels=False,\n            nms_pre=2000,\n            nms_post=2000,\n            max_num=2000,\n            nms_thr=0.7,\n            min_bbox_size=0),\n        rcnn=[\n            dict(\n                assigner=dict(\n                    type='MaxIoUAssigner',\n                    pos_iou_thr=0.5,\n                    neg_iou_thr=0.5,\n                    min_pos_iou=0.5,\n                    ignore_iof_thr=-1),\n                sampler=dict(\n                    type='RandomSampler',\n                    num=512,\n                    pos_fraction=0.25,\n                    neg_pos_ub=-1,\n                    add_gt_as_proposals=True),\n                mask_size=28,\n                pos_weight=-1,\n                debug=False),\n            dict(\n                assigner=dict(\n                    type='MaxIoUAssigner',\n                    pos_iou_thr=0.6,\n                    neg_iou_thr=0.6,\n                    min_pos_iou=0.6,\n                    ignore_iof_thr=-1),\n                sampler=dict(\n                    type='RandomSampler',\n                    num=512,\n                    pos_fraction=0.25,\n                    neg_pos_ub=-1,\n                    add_gt_as_proposals=True),\n                mask_size=28,\n                pos_weight=-1,\n                debug=False),\n            dict(\n                assigner=dict(\n                    type='MaxIoUAssigner',\n                    pos_iou_thr=0.7,\n                    neg_iou_thr=0.7,\n                    min_pos_iou=0.7,\n                    ignore_iof_thr=-1),\n                sampler=dict(\n                    type='RandomSampler',\n                    num=512,\n                    pos_fraction=0.25,\n                    neg_pos_ub=-1,\n                    add_gt_as_proposals=True),\n                mask_size=28,\n                pos_weight=-1,\n                debug=False)\n        ]),\n    test_cfg=dict(\n        rpn=dict(\n            nms_across_levels=False,\n            nms_pre=1000,\n            nms_post=1000,\n            max_num=1000,\n            nms_thr=0.7,\n            min_bbox_size=0),\n        rcnn=dict(\n            score_thr=0.001,\n            nms=dict(type='nms', iou_threshold=0.5),\n            max_per_img=100,\n            mask_thr_binary=0.5)))\n"
  },
  {
    "path": "configs/nuimages/htc_x101_64x4d_fpn_dconv_c3-c5_coco-20e_16x1_20e_nuim.py",
    "content": "_base_ = './htc_r50_fpn_1x_nuim.py'\nmodel = dict(\n    pretrained='open-mmlab://resnext101_64x4d',\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True)))\n\ndata = dict(samples_per_gpu=1, workers_per_gpu=1)\n# learning policy\nlr_config = dict(step=[16, 19])\ntotal_epochs = 20\n\nload_from = 'http://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco_20200312-946fd751.pth'  # noqa\n"
  },
  {
    "path": "configs/nuimages/mask_rcnn_r101_fpn_1x_nuim.py",
    "content": "_base_ = './mask_rcnn_r50_fpn_1x_nuim.py'\nmodel = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101))\n"
  },
  {
    "path": "configs/nuimages/mask_rcnn_r50_caffe_fpn_1x_nuim.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/nuim_instance.py',\n    '../_base_/schedules/mmdet_schedule_1x.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    pretrained='open-mmlab://detectron2/resnet50_caffe',\n    backbone=dict(norm_cfg=dict(requires_grad=False), style='caffe'),\n    roi_head=dict(\n        bbox_head=dict(num_classes=10), mask_head=dict(num_classes=10)))\n# use caffe img_norm\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=[(1280, 720), (1920, 1080)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1600, 900),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "configs/nuimages/mask_rcnn_r50_caffe_fpn_coco-3x_1x_nuim.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/nuim_instance.py',\n    '../_base_/schedules/mmdet_schedule_1x.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    pretrained='open-mmlab://detectron2/resnet50_caffe',\n    backbone=dict(norm_cfg=dict(requires_grad=False), style='caffe'),\n    roi_head=dict(\n        bbox_head=dict(num_classes=10), mask_head=dict(num_classes=10)))\n# use caffe img_norm\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=[(1280, 720), (1920, 1080)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1600, 900),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n\nload_from = 'https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth'  # noqa\n"
  },
  {
    "path": "configs/nuimages/mask_rcnn_r50_caffe_fpn_coco-3x_20e_nuim.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/nuim_instance.py',\n    '../_base_/schedules/mmdet_schedule_1x.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    pretrained='open-mmlab://detectron2/resnet50_caffe',\n    backbone=dict(norm_cfg=dict(requires_grad=False), style='caffe'),\n    roi_head=dict(\n        bbox_head=dict(num_classes=10), mask_head=dict(num_classes=10)))\n# use caffe img_norm\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=[(1280, 720), (1920, 1080)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1600, 900),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n\n# learning policy\nlr_config = dict(step=[16, 19])\ntotal_epochs = 20\n\nload_from = 'http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth'  # noqa\n"
  },
  {
    "path": "configs/nuimages/mask_rcnn_r50_fpn_1x_nuim.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/nuim_instance.py',\n    '../_base_/schedules/mmdet_schedule_1x.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    roi_head=dict(\n        bbox_head=dict(num_classes=10), mask_head=dict(num_classes=10)))\n"
  },
  {
    "path": "configs/nuimages/mask_rcnn_r50_fpn_coco-2x_1x_nuim.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/nuim_instance.py',\n    '../_base_/schedules/mmdet_schedule_1x.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    roi_head=dict(\n        bbox_head=dict(num_classes=10), mask_head=dict(num_classes=10)))\nload_from = 'https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_2x_coco/mask_rcnn_r50_fpn_2x_coco_bbox_mAP-0.392__segm_mAP-0.354_20200505_003907-3e542a40.pth'  # noqa\n"
  },
  {
    "path": "configs/nuimages/mask_rcnn_r50_fpn_coco-2x_1x_nus-2d.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/nuim_instance.py',\n    '../_base_/schedules/mmdet_schedule_1x.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    roi_head=dict(\n        bbox_head=dict(num_classes=10), mask_head=dict(num_classes=10)))\n\nfile_client_args = dict(\n    backend='petrel',\n    path_mapping=dict({\n        './data/nuscenes/': 's3://nuscenes/nuscenes/',\n        'data/nuscenes/': 's3://nuscenes/nuscenes/'\n    }))\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\n\ntest_pipeline = [\n    dict(type='LoadImageFromFile', file_client_args=file_client_args),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1600, 900),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata_root = 'data/nuimages/'\n# data = dict(\n#     val=dict(\n#         ann_file=data_root + 'annotations/nuimages_v1.0-mini.json'),\n#     test=dict(\n#         ann_file=data_root + 'annotations/nuimages_v1.0-mini.json'))\n"
  },
  {
    "path": "configs/nuimages/mask_rcnn_swinT_coco-2x_1x_nuim.py",
    "content": "_base_ = [\r\n    '../_base_/datasets/nuim_instance.py', '../_base_/default_runtime.py'\r\n]\r\n\r\nmodel = dict(\r\n    type='MaskRCNN',\r\n    backbone=dict(\r\n        type='SwinTransformer',\r\n        embed_dims=96,\r\n        depths=[2, 2, 6, 2],\r\n        num_heads=[3, 6, 12, 24],\r\n        window_size=7,\r\n        mlp_ratio=4,\r\n        qkv_bias=True,\r\n        qk_scale=None,\r\n        drop_rate=0.,\r\n        attn_drop_rate=0.,\r\n        drop_path_rate=0.2,\r\n        patch_norm=True,\r\n        out_indices=(0, 1, 2, 3),\r\n        with_cp=False,\r\n        convert_weights=True,\r\n    ),\r\n    neck=dict(\r\n        type='FPN',\r\n        in_channels=[96, 192, 384, 768],\r\n        out_channels=256,\r\n        num_outs=5),\r\n    rpn_head=dict(\r\n        type='RPNHead',\r\n        in_channels=256,\r\n        feat_channels=256,\r\n        anchor_generator=dict(\r\n            type='AnchorGenerator',\r\n            scales=[8],\r\n            ratios=[0.5, 1.0, 2.0],\r\n            strides=[4, 8, 16, 32, 64]),\r\n        bbox_coder=dict(\r\n            type='DeltaXYWHBBoxCoder',\r\n            target_means=[.0, .0, .0, .0],\r\n            target_stds=[1.0, 1.0, 1.0, 1.0]),\r\n        loss_cls=dict(\r\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\r\n        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),\r\n    roi_head=dict(\r\n        type='StandardRoIHead',\r\n        bbox_roi_extractor=dict(\r\n            type='SingleRoIExtractor',\r\n            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),\r\n            out_channels=256,\r\n            featmap_strides=[4, 8, 16, 32]),\r\n        bbox_head=dict(\r\n            type='Shared2FCBBoxHead',\r\n            in_channels=256,\r\n            fc_out_channels=1024,\r\n            roi_feat_size=7,\r\n            num_classes=10,\r\n            bbox_coder=dict(\r\n                type='DeltaXYWHBBoxCoder',\r\n                target_means=[0., 0., 0., 0.],\r\n                target_stds=[0.1, 0.1, 0.2, 0.2]),\r\n            reg_class_agnostic=False,\r\n            loss_cls=dict(\r\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\r\n            loss_bbox=dict(type='L1Loss', loss_weight=1.0)),\r\n        mask_roi_extractor=dict(\r\n            type='SingleRoIExtractor',\r\n            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),\r\n            out_channels=256,\r\n            featmap_strides=[4, 8, 16, 32]),\r\n        mask_head=dict(\r\n            type='FCNMaskHead',\r\n            num_convs=4,\r\n            in_channels=256,\r\n            conv_out_channels=256,\r\n            num_classes=10,\r\n            loss_mask=dict(\r\n                type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),\r\n    # model training and testing settings\r\n    train_cfg=dict(\r\n        rpn=dict(\r\n            assigner=dict(\r\n                type='MaxIoUAssigner',\r\n                pos_iou_thr=0.7,\r\n                neg_iou_thr=0.3,\r\n                min_pos_iou=0.3,\r\n                match_low_quality=True,\r\n                ignore_iof_thr=-1),\r\n            sampler=dict(\r\n                type='RandomSampler',\r\n                num=256,\r\n                pos_fraction=0.5,\r\n                neg_pos_ub=-1,\r\n                add_gt_as_proposals=False),\r\n            allowed_border=-1,\r\n            pos_weight=-1,\r\n            debug=False),\r\n        rpn_proposal=dict(\r\n            nms_across_levels=False,\r\n            nms_pre=2000,\r\n            nms_post=1000,\r\n            max_num=1000,\r\n            nms_thr=0.7,\r\n            min_bbox_size=0),\r\n        rcnn=dict(\r\n            assigner=dict(\r\n                type='MaxIoUAssigner',\r\n                pos_iou_thr=0.5,\r\n                neg_iou_thr=0.5,\r\n                min_pos_iou=0.5,\r\n                match_low_quality=True,\r\n                ignore_iof_thr=-1),\r\n            sampler=dict(\r\n                type='RandomSampler',\r\n                num=512,\r\n                pos_fraction=0.25,\r\n                neg_pos_ub=-1,\r\n                add_gt_as_proposals=True),\r\n            mask_size=28,\r\n            pos_weight=-1,\r\n            debug=False)),\r\n    test_cfg=dict(\r\n        rpn=dict(\r\n            nms_across_levels=False,\r\n            nms_pre=1000,\r\n            nms_post=1000,\r\n            max_num=1000,\r\n            nms_thr=0.7,\r\n            min_bbox_size=0),\r\n        rcnn=dict(\r\n            score_thr=0.05,\r\n            nms=dict(type='nms', iou_threshold=0.5),\r\n            max_per_img=100,\r\n            mask_thr_binary=0.5)))\r\n\r\nload_from = '/data/yc_code/ImplicitFusion/checkpoints/mask_rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco_20210906_131725-bacf6f7b.pth'  # noqa\r\n\r\n\r\ndata = dict(\r\n    samples_per_gpu=2,\r\n    workers_per_gpu=4\r\n)\r\n\r\n# optimizer\r\noptimizer = dict(\r\n    type='AdamW',\r\n    lr=0.000025,\r\n    betas=(0.9, 0.999),\r\n    weight_decay=0.05,\r\n    paramwise_cfg=dict(\r\n        custom_keys={\r\n            'absolute_pos_embed': dict(decay_mult=0.),\r\n            'relative_position_bias_table': dict(decay_mult=0.),\r\n            'norm': dict(decay_mult=0.)\r\n        }))\r\nlr_config = dict(policy='step', warmup='linear', warmup_iters=1000, warmup_ratio=0.001, step=[8, 11])\r\noptimizer_config = dict(grad_clip=None)\r\nrunner = dict(type='EpochBasedRunner', max_epochs=12)\r\n"
  },
  {
    "path": "configs/nuimages/mask_rcnn_x101_32x4d_fpn_1x_nuim.py",
    "content": "_base_ = './mask_rcnn_r50_fpn_1x_nuim.py'\nmodel = dict(\n    pretrained='open-mmlab://resnext101_32x4d',\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch'))\n"
  },
  {
    "path": "configs/nuscenes.md",
    "content": "# MODEL ZOO\n\n## Common settings and notes\n\n- The experiments are run with PyTorch 1.7.0, CUDA 10.1 and CUDNN 7.6\n- The training is conducted on 8 Tesla V100 GPUs\n- For the *fade strategy* proposed by PointAugmenting(disenable the copy-and-paste augmentation for the last 5 epochs), we currently implement this strategy by manually stop training at 15 epoch and resume the training without copy-and-paste augmentation. If you find more elegant ways to implement such strategy, please let we know and we really appreciate it. The fade strategy reduces lots of false positive, improving the mAP remarkably especially for TransFusion-L while having less influence on TransFusion.  \n\n## Pretrained 2D Backbones\n- DLA34: Following PointAugmenting, we directly reuse the checkpoints pretrained on monocular 3D detection task provided by [CenterNet]((https://github.com/xingyizhou/CenterTrack/blob/master/readme/MODEL_ZOO.md#monocular-3d-detection-tracking)).\n- ResNet50 on instance segmentation:  We acquire the model pretrained on nuImages from [MMDetection3D](https://github.com/open-mmlab/mmdetection3d/blob/v0.12.0/configs/nuimages/README.md).\n- ResNet50 on 2D detection: We train a model using the [config](https://github.com/open-mmlab/mmdetection3d/blob/v0.12.0/configs/nuimages/mask_rcnn_r50_fpn_1x_nuim.py) of instance segmentation but remove the mask head.\n\n\n## nuScenes 3D Detection\n\nAll the LiDAR-only models are trained in 20 epochs, the fusion-based models are further trained for 6 epochs from the pretrained LiDAR backbone. We freeze the weight of LiDAR backbone to save GPU memory.\n\n| Model   | Backbone | mAP | NDS  |\n|---------|--------|--------|---------|\n| [TransFusion-L](configs/transfusion_nusc_pillar_L.py) | PointPillars | 54.51 | 62.66 |\n| [TransFusion](configs/transfusion_nusc_pillar_LC.py) | PointPillars | 60.21 | 65.50 |\n| [TransFusion-L](configs/transfusion_nusc_voxel_L.py) | VoxelNet | 65.06 | 70.10 |\n| [TransFusion](configs/transfusion_nusc_voxel_LC.py) | VoxelNet | 67.49 | 71.28 |\n\n## nuScenes 3D Tracking\n\nWe perform tracking-by-detection with the same tracking algorithms proposed by CenterPoint. \n\n| Model   | Backbone | AMOTA | AMOTP  |\n|---------|--------|--------|---------|\n| [TransFusion-L](configs/transfusion_nusc_voxel_L.py) | VoxelNet | 0.703 | 0.553 |\n| [TransFusion](configs/transfusion_nusc_voxel_LC.py) | VoxelNet | 0.725 | 0.561 |\n\n\n## nuScenes Leaderboard\n\n\n### Detection\n\nWe use 300 object queries during inference for online submission for a slightly better performance. We do not use any test-time-augmentation and model ensemble.\n\n| Model   | Backbone | Test mAP | Test NDS  | Link  |\n|---------|--------|--------|---------|---------|\n| TransFusion-L | VoxelNet | 65.52 | 70.23 | [Detection](https://drive.google.com/file/d/1Wk8p2LJEhwfKfhsKzlU9vDBOd0zn38dN/view?usp=sharing)\n| TransFusion | VoxelNet | 68.90 | 71.68 | [Detection](https://drive.google.com/file/d/1X7_ig4v5A2vKsiHtUGtgeMN-0RJKsM6W/view?usp=sharing)\n\n### Tracking\n\n| Model | Backbone | Test AMOTA |  Test AMOTP   | Link  |\n|---------|--------|--------|---------|---------|\n| TranFusion-L| VoxelNet | 0.686 | 0.529 | [Detection](https://drive.google.com/file/d/1Wk8p2LJEhwfKfhsKzlU9vDBOd0zn38dN/view?usp=sharing) / [Tracking](https://drive.google.com/file/d/1pKvRBUsM9h1Xgturd0Ae_bnGt0m_j3hk/view?usp=sharing)| \n| TranFusion| VoxelNet | 0.718 | 0.551 | [Detection](https://drive.google.com/file/d/1X7_ig4v5A2vKsiHtUGtgeMN-0RJKsM6W/view?usp=sharing) / [Tracking](https://drive.google.com/file/d/1EVuS-MAg_HSXUVqMrXEs4-RpZp0p5cfv/view?usp=sharing)| \n\n\n \n"
  },
  {
    "path": "configs/parta2/README.md",
    "content": "# From Points to Parts: 3D Object Detection from Point Cloud with Part-aware and Part-aggregation Network\n\n## Introduction\n\n[ALGORITHM]\n\nWe implement Part-A^2 and provide its results and checkpoints on KITTI dataset.\n\n```\n@article{shi2020points,\n  title={From points to parts: 3d object detection from point cloud with part-aware and part-aggregation network},\n  author={Shi, Shaoshuai and Wang, Zhe and Shi, Jianping and Wang, Xiaogang and Li, Hongsheng},\n  journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},\n  year={2020},\n  publisher={IEEE}\n}\n\n```\n## Results\n\n### KITTI\n\n|  Backbone   |Class| Lr schd | Mem (GB) | Inf time (fps) | mAP | Download |\n| :---------: | :-----: |:-----: | :------: | :------------: | :----: |:----: |\n|    [SECFPN](./hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-3class.py) |3 Class|cyclic 80e|4.1||67.9|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-3class/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-3class_20200620_230724-a2672098.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-3class/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-3class_20200620_230724.log.json)|\n|    [SECFPN](./hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-car.py) |Car |cyclic 80e|4.0||79.16|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-car/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-car_20200620_230755-f2a38b9a.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-car/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-car_20200620_230755.log.json)|\n"
  },
  {
    "path": "configs/parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-3class.py",
    "content": "_base_ = ['../_base_/schedules/cyclic_40e.py', '../_base_/default_runtime.py']\n\n# model settings\nvoxel_size = [0.05, 0.05, 0.1]\npoint_cloud_range = [0, -40, -3, 70.4, 40, 1]\n\nmodel = dict(\n    type='PartA2',\n    voxel_layer=dict(\n        max_num_points=5,\n        point_cloud_range=point_cloud_range,\n        voxel_size=voxel_size,\n        max_voxels=(16000, 40000)),\n    voxel_encoder=dict(type='HardSimpleVFE'),\n    middle_encoder=dict(\n        type='SparseUNet',\n        in_channels=4,\n        sparse_shape=[41, 1600, 1408],\n        order=('conv', 'norm', 'act')),\n    backbone=dict(\n        type='SECOND',\n        in_channels=256,\n        layer_nums=[5, 5],\n        layer_strides=[1, 2],\n        out_channels=[128, 256]),\n    neck=dict(\n        type='SECONDFPN',\n        in_channels=[128, 256],\n        upsample_strides=[1, 2],\n        out_channels=[256, 256]),\n    rpn_head=dict(\n        type='PartA2RPNHead',\n        num_classes=3,\n        in_channels=512,\n        feat_channels=512,\n        use_direction_classifier=True,\n        anchor_generator=dict(\n            type='Anchor3DRangeGenerator',\n            ranges=[[0, -40.0, -0.6, 70.4, 40.0, -0.6],\n                    [0, -40.0, -0.6, 70.4, 40.0, -0.6],\n                    [0, -40.0, -1.78, 70.4, 40.0, -1.78]],\n            sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],\n            rotations=[0, 1.57],\n            reshape_out=False),\n        diff_rad_by_sin=True,\n        assigner_per_size=True,\n        assign_per_class=True,\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),\n        loss_dir=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),\n    roi_head=dict(\n        type='PartAggregationROIHead',\n        num_classes=3,\n        semantic_head=dict(\n            type='PointwiseSemanticHead',\n            in_channels=16,\n            extra_width=0.2,\n            seg_score_thr=0.3,\n            num_classes=3,\n            loss_seg=dict(\n                type='FocalLoss',\n                use_sigmoid=True,\n                reduction='sum',\n                gamma=2.0,\n                alpha=0.25,\n                loss_weight=1.0),\n            loss_part=dict(\n                type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),\n        seg_roi_extractor=dict(\n            type='Single3DRoIAwareExtractor',\n            roi_layer=dict(\n                type='RoIAwarePool3d',\n                out_size=14,\n                max_pts_per_voxel=128,\n                mode='max')),\n        part_roi_extractor=dict(\n            type='Single3DRoIAwareExtractor',\n            roi_layer=dict(\n                type='RoIAwarePool3d',\n                out_size=14,\n                max_pts_per_voxel=128,\n                mode='avg')),\n        bbox_head=dict(\n            type='PartA2BboxHead',\n            num_classes=3,\n            seg_in_channels=16,\n            part_in_channels=4,\n            seg_conv_channels=[64, 64],\n            part_conv_channels=[64, 64],\n            merge_conv_channels=[128, 128],\n            down_conv_channels=[128, 256],\n            bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),\n            shared_fc_channels=[256, 512, 512, 512],\n            cls_channels=[256, 256],\n            reg_channels=[256, 256],\n            dropout_ratio=0.1,\n            roi_feat_size=14,\n            with_corner_loss=True,\n            loss_bbox=dict(\n                type='SmoothL1Loss',\n                beta=1.0 / 9.0,\n                reduction='sum',\n                loss_weight=1.0),\n            loss_cls=dict(\n                type='CrossEntropyLoss',\n                use_sigmoid=True,\n                reduction='sum',\n                loss_weight=1.0))),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn=dict(\n            assigner=[\n                dict(  # for Pedestrian\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.5,\n                    neg_iou_thr=0.35,\n                    min_pos_iou=0.35,\n                    ignore_iof_thr=-1),\n                dict(  # for Cyclist\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.5,\n                    neg_iou_thr=0.35,\n                    min_pos_iou=0.35,\n                    ignore_iof_thr=-1),\n                dict(  # for Car\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.6,\n                    neg_iou_thr=0.45,\n                    min_pos_iou=0.45,\n                    ignore_iof_thr=-1)\n            ],\n            allowed_border=0,\n            pos_weight=-1,\n            debug=False),\n        rpn_proposal=dict(\n            nms_pre=9000,\n            nms_post=512,\n            max_num=512,\n            nms_thr=0.8,\n            score_thr=0,\n            use_rotate_nms=False),\n        rcnn=dict(\n            assigner=[\n                dict(  # for Pedestrian\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(\n                        type='BboxOverlaps3D', coordinate='lidar'),\n                    pos_iou_thr=0.55,\n                    neg_iou_thr=0.55,\n                    min_pos_iou=0.55,\n                    ignore_iof_thr=-1),\n                dict(  # for Cyclist\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(\n                        type='BboxOverlaps3D', coordinate='lidar'),\n                    pos_iou_thr=0.55,\n                    neg_iou_thr=0.55,\n                    min_pos_iou=0.55,\n                    ignore_iof_thr=-1),\n                dict(  # for Car\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(\n                        type='BboxOverlaps3D', coordinate='lidar'),\n                    pos_iou_thr=0.55,\n                    neg_iou_thr=0.55,\n                    min_pos_iou=0.55,\n                    ignore_iof_thr=-1)\n            ],\n            sampler=dict(\n                type='IoUNegPiecewiseSampler',\n                num=128,\n                pos_fraction=0.55,\n                neg_piece_fractions=[0.8, 0.2],\n                neg_iou_piece_thrs=[0.55, 0.1],\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False,\n                return_iou=True),\n            cls_pos_thr=0.75,\n            cls_neg_thr=0.25)),\n    test_cfg=dict(\n        rpn=dict(\n            nms_pre=1024,\n            nms_post=100,\n            max_num=100,\n            nms_thr=0.7,\n            score_thr=0,\n            use_rotate_nms=True),\n        rcnn=dict(\n            use_rotate_nms=True,\n            use_raw_score=True,\n            nms_thr=0.01,\n            score_thr=0.1)))\n\n# dataset settings\ndataset_type = 'KittiDataset'\ndata_root = 'data/kitti/'\nclass_names = ['Pedestrian', 'Cyclist', 'Car']\ninput_modality = dict(use_lidar=True, use_camera=False)\ndb_sampler = dict(\n    data_root=data_root,\n    info_path=data_root + 'kitti_dbinfos_train.pkl',\n    rate=1.0,\n    prepare=dict(\n        filter_by_difficulty=[-1],\n        filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),\n    classes=class_names,\n    sample_groups=dict(Car=12, Pedestrian=6, Cyclist=6))\ntrain_pipeline = [\n    dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    dict(type='ObjectSample', db_sampler=db_sampler),\n    dict(\n        type='ObjectNoise',\n        num_try=100,\n        translation_std=[1.0, 1.0, 0.5],\n        global_rot_range=[0.0, 0.0],\n        rot_range=[-0.78539816, 0.78539816]),\n    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.78539816, 0.78539816],\n        scale_ratio_range=[0.95, 1.05]),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectNameFilter', classes=class_names),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type='RepeatDataset',\n        times=2,\n        dataset=dict(\n            type=dataset_type,\n            data_root=data_root,\n            ann_file=data_root + 'kitti_infos_train.pkl',\n            split='training',\n            pts_prefix='velodyne_reduced',\n            pipeline=train_pipeline,\n            modality=input_modality,\n            classes=class_names,\n            test_mode=False)),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'kitti_infos_val.pkl',\n        split='training',\n        pts_prefix='velodyne_reduced',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'kitti_infos_val.pkl',\n        split='training',\n        pts_prefix='velodyne_reduced',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True))\n\n# Part-A2 uses a different learning rate from what SECOND uses.\nlr = 0.001\noptimizer = dict(lr=lr)\nfind_unused_parameters = True\n"
  },
  {
    "path": "configs/parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-car.py",
    "content": "_base_ = './hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-3class.py'\n\nvoxel_size = [0.05, 0.05, 0.1]\npoint_cloud_range = [0, -40, -3, 70.4, 40, 1]  # velodyne coordinates, x, y, z\n\nmodel = dict(\n    rpn_head=dict(\n        type='PartA2RPNHead',\n        num_classes=1,\n        anchor_generator=dict(\n            _delete_=True,\n            type='Anchor3DRangeGenerator',\n            ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]],\n            sizes=[[1.6, 3.9, 1.56]],\n            rotations=[0, 1.57],\n            reshape_out=False)),\n    roi_head=dict(\n        num_classes=1,\n        semantic_head=dict(num_classes=1),\n        bbox_head=dict(num_classes=1)),\n    # model training and testing settings\n    train_cfg=dict(\n        _delete_=True,\n        rpn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.45,\n                min_pos_iou=0.45,\n                ignore_iof_thr=-1),\n            allowed_border=0,\n            pos_weight=-1,\n            debug=False),\n        rpn_proposal=dict(\n            nms_pre=9000,\n            nms_post=512,\n            max_num=512,\n            nms_thr=0.8,\n            score_thr=0,\n            use_rotate_nms=False),\n        rcnn=dict(\n            assigner=dict(  # for Car\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlaps3D', coordinate='lidar'),\n                pos_iou_thr=0.55,\n                neg_iou_thr=0.55,\n                min_pos_iou=0.55,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='IoUNegPiecewiseSampler',\n                num=128,\n                pos_fraction=0.55,\n                neg_piece_fractions=[0.8, 0.2],\n                neg_iou_piece_thrs=[0.55, 0.1],\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False,\n                return_iou=True),\n            cls_pos_thr=0.75,\n            cls_neg_thr=0.25)),\n    test_cfg=dict(\n        rpn=dict(\n            nms_pre=1024,\n            nms_post=100,\n            max_num=100,\n            nms_thr=0.7,\n            score_thr=0,\n            use_rotate_nms=True),\n        rcnn=dict(\n            use_rotate_nms=True,\n            use_raw_score=True,\n            nms_thr=0.01,\n            score_thr=0.1)))\n\n# dataset settings\ndataset_type = 'KittiDataset'\ndata_root = 'data/kitti/'\nclass_names = ['Car']\ninput_modality = dict(use_lidar=True, use_camera=False)\ndb_sampler = dict(\n    data_root=data_root,\n    info_path=data_root + 'kitti_dbinfos_train.pkl',\n    rate=1.0,\n    prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),\n    classes=class_names,\n    sample_groups=dict(Car=15))\ntrain_pipeline = [\n    dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    dict(type='ObjectSample', db_sampler=db_sampler),\n    dict(\n        type='ObjectNoise',\n        num_try=100,\n        translation_std=[1.0, 1.0, 0.5],\n        global_rot_range=[0.0, 0.0],\n        rot_range=[-0.78539816, 0.78539816]),\n    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.78539816, 0.78539816],\n        scale_ratio_range=[0.95, 1.05]),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectNameFilter', classes=class_names),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n\ndata = dict(\n    train=dict(dataset=dict(pipeline=train_pipeline, classes=class_names)),\n    val=dict(pipeline=test_pipeline, classes=class_names),\n    test=dict(pipeline=test_pipeline, classes=class_names))\n\nfind_unused_parameters = True\n"
  },
  {
    "path": "configs/pointpillars/README.md",
    "content": "# PointPillars: Fast Encoders for Object Detection from Point Clouds\n\n## Introduction\n\n[ALGORITHM]\n\nWe implement PointPillars and provide the results and checkpoints on KITTI, nuScenes, Lyft and Waymo datasets.\n\n```\n@inproceedings{lang2019pointpillars,\n  title={Pointpillars: Fast encoders for object detection from point clouds},\n  author={Lang, Alex H and Vora, Sourabh and Caesar, Holger and Zhou, Lubing and Yang, Jiong and Beijbom, Oscar},\n  booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},\n  pages={12697--12705},\n  year={2019}\n}\n\n```\n\n## Results\n\n### KITTI\n\n|  Backbone|Class   | Lr schd | Mem (GB) | Inf time (fps) | AP  |Download |\n| :---------: | :-----: |:-----: | :------: | :------------: | :----: | :------: |\n|    [SECFPN](./hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py)|Car|cyclic 160e|5.4||77.1|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car_20200620_230614-77663cd6.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car_20200620_230614.log.json)|\n|    [SECFPN](./hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class.py)|3 Class|cyclic 160e|5.5||59.5|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class_20200620_230421-aa0f3adb.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class_20200620_230421.log.json)|\n\n### nuScenes\n\n|  Backbone   | Lr schd | Mem (GB) | Inf time (fps) | mAP |NDS| Download |\n| :---------: | :-----: | :------: | :------------: | :----: |:----: | :------: |\n|[SECFPN](./hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py)|2x|16.4||35.17|49.7|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d_20200620_230725-0817d270.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d_20200620_230725.log.json)|\n|[FPN](./hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d.py)|2x|16.4||40.0|53.3|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d_20200620_230405-2fa62f3d.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d_20200620_230405.log.json)|\n\n### Lyft\n\n|  Backbone   | Lr schd | Mem (GB) | Inf time (fps) | Private Score | Public Score | Download |\n| :---------: | :-----: | :------: | :------------: | :----: |:----: | :------: |\n|[SECFPN](./hv_pointpillars_secfpn_sbn-all_4x8_2x_lyft-3d.py)|2x|||13.4|13.4||\n|[FPN](./hv_pointpillars_fpn_sbn-all_4x8_2x_lyft-3d.py)|2x|||14.0|14.2||\n\n### Waymo\n\n|  Backbone | Load Interval | Class | Lr schd | Mem (GB) | Inf time (fps) | mAP@L1 | mAPH@L1 |  mAP@L2 | **mAPH@L2** | Download |\n| :-------: | :-----------: |:-----:| :------:| :------: | :------------: | :----: | :-----: | :-----: | :-----: | :------: |\n| [SECFPN](./hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-car.py)|5|Car|2x|7.76||70.2|69.6|62.6|62.1|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-car/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-car_20200901_204315-302fc3e7.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-car/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-car_20200901_204315.log.json)|\n| [SECFPN](./hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py)|5|3 Class|2x|8.12||64.7|57.6|58.4|52.1|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class_20200831_204144-d1a706b1.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class_20200831_204144.log.json)|\n| above @ Car|||2x|8.12||68.5|67.9|60.1|59.6| |\n| above @ Pedestrian|||2x|8.12||67.8|50.6|59.6|44.3| |\n| above @ Cyclist|||2x|8.12||57.7|54.4|55.5|52.4| |\n| [SECFPN](./hv_pointpillars_secfpn_sbn_2x16_2x_waymo-3d-car.py)|1|Car|2x|7.76||72.1|71.5|63.6|63.1|[log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymo-3d-car/hv_pointpillars_secfpn_sbn_2x16_2x_waymo-3d-car.log.json)|\n| [SECFPN](./hv_pointpillars_secfpn_sbn_2x16_2x_waymo-3d-3class.py)|1|3 Class|2x|8.12||68.8|63.3|62.6|57.6|[log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymo-3d-3class/hv_pointpillars_secfpn_sbn_2x16_2x_waymo-3d-3class.log.json)|\n| above @ Car|||2x|8.12||71.6|71.0|63.1|62.5| |\n| above @ Pedestrian|||2x|8.12||70.6|56.7|62.9|50.2| |\n| above @ Cyclist|||2x|8.12||64.4|62.3|61.9|59.9| |\n\n#### Note:\n\n- **Metric**: For model trained with 3 classes, the average APH@L2 (mAPH@L2) of all the categories is reported and used to rank the model. For model trained with only 1 class, the APH@L2 is reported and used to rank the model.\n- **Data Split**: Here we provide several baselines for waymo dataset, among which D5 means that we divide the dataset into 5 folds and only use one fold for efficient experiments. Using the complete dataset can boost the performance a lot, especially for the detection of cyclist and pedestrian, where more than 5 mAP or mAPH improvement can be expected.\n- **Implementation Details**: We basically follow the implementation in the [paper](https://arxiv.org/pdf/1912.04838.pdf) in terms of the network architecture (having a\nstride of 1 for the first convolutional block). Different settings of voxelization, data augmentation and hyper parameters make these baselines outperform those in the paper by about 7 mAP for car and 4 mAP for pedestrian with only a subset of the whole dataset. All of these results are achieved without bells-and-whistles, e.g. ensemble, multi-scale training and test augmentation.\n- **License Agreement**: To comply the [license agreement of Waymo dataset](https://waymo.com/open/terms/), the pre-trained models on Waymo dataset are not released. We still release the training log as a reference to ease the future research.\n"
  },
  {
    "path": "configs/pointpillars/hv_pointpillars_fpn_sbn-all_2x8_2x_lyft-3d.py",
    "content": "_base_ = [\n    '../_base_/models/hv_pointpillars_fpn_lyft.py',\n    '../_base_/datasets/lyft-3d.py', '../_base_/schedules/schedule_2x.py',\n    '../_base_/default_runtime.py'\n]\n"
  },
  {
    "path": "configs/pointpillars/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d.py",
    "content": "_base_ = [\n    '../_base_/models/hv_pointpillars_fpn_nus.py',\n    '../_base_/datasets/nus-3d.py', '../_base_/schedules/schedule_2x.py',\n    '../_base_/default_runtime.py'\n]\n"
  },
  {
    "path": "configs/pointpillars/hv_pointpillars_fpn_sbn-all_range100_2x8_2x_lyft-3d.py",
    "content": "_base_ = [\n    '../_base_/models/hv_pointpillars_fpn_range100_lyft.py',\n    '../_base_/datasets/range100_lyft-3d.py',\n    '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py'\n]\n"
  },
  {
    "path": "configs/pointpillars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class.py",
    "content": "_base_ = [\n    '../_base_/models/hv_pointpillars_secfpn_kitti.py',\n    '../_base_/datasets/kitti-3d-3class.py',\n    '../_base_/schedules/cyclic_40e.py', '../_base_/default_runtime.py'\n]\n\npoint_cloud_range = [0, -39.68, -3, 69.12, 39.68, 1]\n# dataset settings\ndata_root = 'data/kitti/'\nclass_names = ['Pedestrian', 'Cyclist', 'Car']\n# PointPillars adopted a different sampling strategies among classes\ndb_sampler = dict(\n    data_root=data_root,\n    info_path=data_root + 'kitti_dbinfos_train.pkl',\n    rate=1.0,\n    prepare=dict(\n        filter_by_difficulty=[-1],\n        filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),\n    classes=class_names,\n    sample_groups=dict(Car=15, Pedestrian=10, Cyclist=10))\n\n# PointPillars uses different augmentation hyper parameters\ntrain_pipeline = [\n    dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    dict(type='ObjectSample', db_sampler=db_sampler),\n    dict(\n        type='ObjectNoise',\n        num_try=100,\n        translation_std=[0.25, 0.25, 0.25],\n        global_rot_range=[0.0, 0.0],\n        rot_range=[-0.15707963267, 0.15707963267]),\n    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.78539816, 0.78539816],\n        scale_ratio_range=[0.95, 1.05]),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n\ndata = dict(\n    train=dict(dataset=dict(pipeline=train_pipeline, classes=class_names)),\n    val=dict(pipeline=test_pipeline, classes=class_names),\n    test=dict(pipeline=test_pipeline, classes=class_names))\n\n# In practice PointPillars also uses a different schedule\n# optimizer\nlr = 0.001\noptimizer = dict(lr=lr)\n# max_norm=35 is slightly better than 10 for PointPillars in the earlier\n# development of the codebase thus we keep the setting. But we does not\n# specifically tune this parameter.\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# Use evaluation interval=2 reduce the number of evaluation timese\nevaluation = dict(interval=2)\n# PointPillars usually need longer schedule than second, we simply double\n# the training schedule. Do remind that since we use RepeatDataset and\n# repeat factor is 2, so we actually train 160 epochs.\ntotal_epochs = 80\n"
  },
  {
    "path": "configs/pointpillars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py",
    "content": "# model settings\n_base_ = './hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class.py'\n\npoint_cloud_range = [0, -39.68, -3, 69.12, 39.68, 1]\nmodel = dict(\n    bbox_head=dict(\n        type='Anchor3DHead',\n        num_classes=1,\n        anchor_generator=dict(\n            _delete_=True,\n            type='Anchor3DRangeGenerator',\n            ranges=[[0, -39.68, -1.78, 69.12, 39.68, -1.78]],\n            sizes=[[1.6, 3.9, 1.56]],\n            rotations=[0, 1.57],\n            reshape_out=True)),\n    # model training and testing settings\n    train_cfg=dict(\n        _delete_=True,\n        assigner=dict(\n            type='MaxIoUAssigner',\n            iou_calculator=dict(type='BboxOverlapsNearest3D'),\n            pos_iou_thr=0.6,\n            neg_iou_thr=0.45,\n            min_pos_iou=0.45,\n            ignore_iof_thr=-1),\n        allowed_border=0,\n        pos_weight=-1,\n        debug=False))\n\n# dataset settings\ndataset_type = 'KittiDataset'\ndata_root = 'data/kitti/'\nclass_names = ['Car']\ndb_sampler = dict(\n    data_root=data_root,\n    info_path=data_root + 'kitti_dbinfos_train.pkl',\n    rate=1.0,\n    prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),\n    sample_groups=dict(Car=15),\n    classes=class_names)\n\ntrain_pipeline = [\n    dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    dict(type='ObjectSample', db_sampler=db_sampler),\n    dict(\n        type='ObjectNoise',\n        num_try=100,\n        translation_std=[0.25, 0.25, 0.25],\n        global_rot_range=[0.0, 0.0],\n        rot_range=[-0.15707963267, 0.15707963267]),\n    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.78539816, 0.78539816],\n        scale_ratio_range=[0.95, 1.05]),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n\ndata = dict(\n    train=dict(\n        type='RepeatDataset',\n        times=2,\n        dataset=dict(pipeline=train_pipeline, classes=class_names)),\n    val=dict(pipeline=test_pipeline, classes=class_names),\n    test=dict(pipeline=test_pipeline, classes=class_names))\n"
  },
  {
    "path": "configs/pointpillars/hv_pointpillars_secfpn_sbn-all_2x8_2x_lyft-3d.py",
    "content": "_base_ = [\n    '../_base_/models/hv_pointpillars_fpn_lyft.py',\n    '../_base_/datasets/lyft-3d.py',\n    '../_base_/schedules/schedule_2x.py',\n    '../_base_/default_runtime.py',\n]\n# model settings\nmodel = dict(\n    pts_neck=dict(\n        _delete_=True,\n        type='SECONDFPN',\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        in_channels=[64, 128, 256],\n        upsample_strides=[1, 2, 4],\n        out_channels=[128, 128, 128]),\n    pts_bbox_head=dict(\n        in_channels=384,\n        feat_channels=384,\n        anchor_generator=dict(\n            _delete_=True,\n            type='AlignedAnchor3DRangeGenerator',\n            ranges=[[-80, -80, -1.0715024, 80, 80, -1.0715024],\n                    [-80, -80, -0.3033737, 80, 80, -0.3033737],\n                    [-80, -80, -0.3519405, 80, 80, -0.3519405],\n                    [-80, -80, -0.8871424, 80, 80, -0.8871424],\n                    [-80, -80, -0.6276341, 80, 80, -0.6276341],\n                    [-80, -80, -1.3220503, 80, 80, -1.3220503],\n                    [-80, -80, -1.0709302, 80, 80, -1.0709302],\n                    [-80, -80, -0.9122268, 80, 80, -0.9122268],\n                    [-80, -80, -1.8012227, 80, 80, -1.8012227]],\n            sizes=[\n                [1.92, 4.75, 1.71],  # car\n                [2.84, 10.24, 3.44],  # truck\n                [2.92, 12.70, 3.42],  # bus\n                [2.42, 6.52, 2.34],  # emergency vehicle\n                [2.75, 8.17, 3.20],  # other vehicle\n                [0.96, 2.35, 1.59],  # motorcycle\n                [0.63, 1.76, 1.44],  # bicycle\n                [0.76, 0.80, 1.76],  # pedestrian\n                [0.35, 0.73, 0.50]  # animal\n            ],\n            rotations=[0, 1.57],\n            reshape_out=True)))\n"
  },
  {
    "path": "configs/pointpillars/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py",
    "content": "_base_ = [\n    '../_base_/models/hv_pointpillars_fpn_nus.py',\n    '../_base_/datasets/nus-3d.py',\n    '../_base_/schedules/schedule_2x.py',\n    '../_base_/default_runtime.py',\n]\n# model settings\nmodel = dict(\n    pts_neck=dict(\n        _delete_=True,\n        type='SECONDFPN',\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        in_channels=[64, 128, 256],\n        upsample_strides=[1, 2, 4],\n        out_channels=[128, 128, 128]),\n    pts_bbox_head=dict(\n        in_channels=384,\n        feat_channels=384,\n        anchor_generator=dict(\n            _delete_=True,\n            type='AlignedAnchor3DRangeGenerator',\n            ranges=[\n                [-49.6, -49.6, -1.80032795, 49.6, 49.6, -1.80032795],\n                [-49.6, -49.6, -1.74440365, 49.6, 49.6, -1.74440365],\n                [-49.6, -49.6, -1.68526504, 49.6, 49.6, -1.68526504],\n                [-49.6, -49.6, -1.67339111, 49.6, 49.6, -1.67339111],\n                [-49.6, -49.6, -1.61785072, 49.6, 49.6, -1.61785072],\n                [-49.6, -49.6, -1.80984986, 49.6, 49.6, -1.80984986],\n                [-49.6, -49.6, -1.763965, 49.6, 49.6, -1.763965],\n            ],\n            sizes=[\n                [1.95017717, 4.60718145, 1.72270761],  # car\n                [2.4560939, 6.73778078, 2.73004906],  # truck\n                [2.87427237, 12.01320693, 3.81509561],  # trailer\n                [0.60058911, 1.68452161, 1.27192197],  # bicycle\n                [0.66344886, 0.7256437, 1.75748069],  # pedestrian\n                [0.39694519, 0.40359262, 1.06232151],  # traffic_cone\n                [2.49008838, 0.48578221, 0.98297065],  # barrier\n            ],\n            custom_values=[0, 0],\n            rotations=[0, 1.57],\n            reshape_out=True)))\n"
  },
  {
    "path": "configs/pointpillars/hv_pointpillars_secfpn_sbn-all_range100_2x8_2x_lyft-3d.py",
    "content": "_base_ = [\n    '../_base_/models/hv_pointpillars_fpn_range100_lyft.py',\n    '../_base_/datasets/range100_lyft-3d.py',\n    '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py'\n]\n# model settings\nmodel = dict(\n    pts_neck=dict(\n        _delete_=True,\n        type='SECONDFPN',\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        in_channels=[64, 128, 256],\n        upsample_strides=[1, 2, 4],\n        out_channels=[128, 128, 128]),\n    pts_bbox_head=dict(\n        in_channels=384,\n        feat_channels=384,\n        anchor_generator=dict(\n            _delete_=True,\n            type='AlignedAnchor3DRangeGenerator',\n            ranges=[[-100, -100, -1.0715024, 100, 100, -1.0715024],\n                    [-100, -100, -0.3033737, 100, 100, -0.3033737],\n                    [-100, -100, -0.3519405, 100, 100, -0.3519405],\n                    [-100, -100, -0.8871424, 100, 100, -0.8871424],\n                    [-100, -100, -0.6276341, 100, 100, -0.6276341],\n                    [-100, -100, -1.3220503, 100, 100, -1.3220503],\n                    [-100, -100, -1.0709302, 100, 100, -1.0709302],\n                    [-100, -100, -0.9122268, 100, 100, -0.9122268],\n                    [-100, -100, -1.8012227, 100, 100, -1.8012227]],\n            sizes=[\n                [1.92, 4.75, 1.71],  # car\n                [2.84, 10.24, 3.44],  # truck\n                [2.92, 12.70, 3.42],  # bus\n                [2.42, 6.52, 2.34],  # emergency vehicle\n                [2.75, 8.17, 3.20],  # other vehicle\n                [0.96, 2.35, 1.59],  # motorcycle\n                [0.63, 1.76, 1.44],  # bicycle\n                [0.76, 0.80, 1.76],  # pedestrian\n                [0.35, 0.73, 0.50]  # animal\n            ],\n            rotations=[0, 1.57],\n            reshape_out=True)))\n"
  },
  {
    "path": "configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymo-3d-3class.py",
    "content": "_base_ = [\n    '../_base_/models/hv_pointpillars_secfpn_waymo.py',\n    '../_base_/datasets/waymoD5-3d-3class.py',\n    '../_base_/schedules/schedule_2x.py',\n    '../_base_/default_runtime.py',\n]\n\n# data settings\ndata = dict(train=dict(dataset=dict(load_interval=1)))\n"
  },
  {
    "path": "configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymo-3d-car.py",
    "content": "_base_ = [\n    '../_base_/models/hv_pointpillars_secfpn_waymo.py',\n    '../_base_/datasets/waymoD5-3d-car.py',\n    '../_base_/schedules/schedule_2x.py',\n    '../_base_/default_runtime.py',\n]\n\n# data settings\ndata = dict(train=dict(dataset=dict(load_interval=1)))\n\n# model settings\nmodel = dict(\n    type='MVXFasterRCNN',\n    pts_bbox_head=dict(\n        type='Anchor3DHead',\n        num_classes=1,\n        anchor_generator=dict(\n            type='AlignedAnchor3DRangeGenerator',\n            ranges=[[-74.88, -74.88, -0.0345, 74.88, 74.88, -0.0345]],\n            sizes=[[2.08, 4.73, 1.77]],\n            rotations=[0, 1.57],\n            reshape_out=True)),\n    # model training and testing settings\n    train_cfg=dict(\n        _delete_=True,\n        pts=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.55,\n                neg_iou_thr=0.4,\n                min_pos_iou=0.4,\n                ignore_iof_thr=-1),\n            allowed_border=0,\n            code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],\n            pos_weight=-1,\n            debug=False)))\n"
  },
  {
    "path": "configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py",
    "content": "_base_ = [\n    '../_base_/models/hv_pointpillars_secfpn_waymo.py',\n    '../_base_/datasets/waymoD5-3d-3class.py',\n    '../_base_/schedules/schedule_2x.py',\n    '../_base_/default_runtime.py',\n]\n"
  },
  {
    "path": "configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-car.py",
    "content": "_base_ = [\n    '../_base_/models/hv_pointpillars_secfpn_waymo.py',\n    '../_base_/datasets/waymoD5-3d-car.py',\n    '../_base_/schedules/schedule_2x.py',\n    '../_base_/default_runtime.py',\n]\n\n# model settings\nmodel = dict(\n    type='MVXFasterRCNN',\n    pts_bbox_head=dict(\n        type='Anchor3DHead',\n        num_classes=1,\n        anchor_generator=dict(\n            type='AlignedAnchor3DRangeGenerator',\n            ranges=[[-74.88, -74.88, -0.0345, 74.88, 74.88, -0.0345]],\n            sizes=[[2.08, 4.73, 1.77]],\n            rotations=[0, 1.57],\n            reshape_out=True)),\n    # model training and testing settings\n    train_cfg=dict(\n        _delete_=True,\n        pts=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.55,\n                neg_iou_thr=0.4,\n                min_pos_iou=0.4,\n                ignore_iof_thr=-1),\n            allowed_border=0,\n            code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],\n            pos_weight=-1,\n            debug=False)))\n"
  },
  {
    "path": "configs/regnet/README.md",
    "content": "# Designing Network Design Spaces\n\n## Introduction\n\n[BACKBONE]\n\nWe implement RegNetX models in 3D detection systems and provide their first results with PointPillars on nuScenes dataset.\n\nThe pre-trained models are converted from [model zoo of pycls](https://github.com/facebookresearch/pycls/blob/master/MODEL_ZOO.md) and maintained in [mmcv](https://github.com/open-mmlab/mmcv).\n\n```\n@article{radosavovic2020designing,\n    title={Designing Network Design Spaces},\n    author={Ilija Radosavovic and Raj Prateek Kosaraju and Ross Girshick and Kaiming He and Piotr Dollár},\n    year={2020},\n    eprint={2003.13678},\n    archivePrefix={arXiv},\n    primaryClass={cs.CV}\n}\n```\n\n## Usage\n\nTo use a regnet model, there are two steps to do:\n1. Convert the model to ResNet-style supported by MMDetection\n2. Modify backbone and neck in config accordingly\n\n### Convert model\n\nWe already prepare models of FLOPs from 800M to 12G in our model zoo.\n\nFor more general usage, we also provide script `regnet2mmdet.py` in the tools directory to convert the key of models pretrained by [pycls](https://github.com/facebookresearch/pycls/) to\nResNet-style checkpoints used in MMDetection.\n\n```bash\npython -u tools/model_converters/regnet2mmdet.py ${PRETRAIN_PATH} ${STORE_PATH}\n```\nThis script convert model from `PRETRAIN_PATH` and store the converted model in `STORE_PATH`.\n\n\n### Modify config\n\nThe users can modify the config's `depth` of backbone and corresponding keys in `arch` according to the configs in the [pycls model zoo](https://github.com/facebookresearch/pycls/blob/master/MODEL_ZOO.md).\nThe parameter `in_channels` in FPN can be found in the Figure 15 & 16 of the paper (`wi` in the legend).\nThis directory already provides some configs with their performance, using RegNetX from 800MF to 12GF level.\nFor other pre-trained models or self-implemented regnet models, the users are responsible to check these parameters by themselves.\n\n**Note**: Although Fig. 15 & 16 also provide `w0`, `wa`, `wm`, `group_w`, and `bot_mul` for `arch`, they are quantized thus inaccurate, using them sometimes produces different backbone that does not match the key in the pre-trained model.\n\n## Results\n\n### nuScenes\n\n|  Backbone   | Lr schd | Mem (GB) | Inf time (fps) | mAP |NDS| Download |\n| :---------: | :-----: | :------: | :------------: | :----: |:----: | :------: |\n|[SECFPN](../pointpillars/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py)|2x|16.4||35.17|49.7|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d_20200620_230725-0817d270.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d_20200620_230725.log.json)|\n|[RegNetX-400MF-SECFPN](./hv_pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_nus-3d.py)|  2x    |16.4||41.2|55.2|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/regnet/hv_pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_nus-3d_20200620_230334-53044f32.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/regnet/hv_pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_nus-3d_20200620_230334.log.json)|\n|[FPN](../pointpillars/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d.py)|2x|17.1||40.0|53.3|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d_20200620_230405-2fa62f3d.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d_20200620_230405.log.json)|\n|[RegNetX-400MF-FPN](./hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d.py)|2x|17.3||44.8|56.4|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/regnet/hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d_20200620_230239-c694dce7.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/regnet/hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d_20200620_230239.log.json)|\n|[RegNetX-1.6gF-FPN](./hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py)|2x|24.0||48.2|59.3|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d_20200629_050311-dcd4e090.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d_20200629_050311.log.json)|\n\n### Lyft\n\n|  Backbone   | Lr schd | Mem (GB) | Inf time (fps) | Private Score | Public Score | Download |\n| :---------: | :-----: | :------: | :------------: | :----: |:----: | :------: |\n|[SECFPN](../pointpillars/hv_pointpillars_secfpn_sbn-all_4x8_2x_lyft-3d.py)|2x|||13.4|13.4||\n|[RegNetX-400MF-SECFPN](./hv_pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_lyft-3d.py)|  2x  ||||||\n|[FPN](../pointpillars/hv_pointpillars_fpn_sbn-all_4x8_2x_lyft-3d.py)|2x|||14.0|14.2||\n|[RegNetX-400MF-FPN](./hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_lyft-3d.py)|2x|||15.5|15.6||\n"
  },
  {
    "path": "configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py",
    "content": "_base_ = [\n    '../_base_/models/hv_pointpillars_fpn_nus.py',\n    '../_base_/datasets/nus-3d.py',\n    '../_base_/schedules/schedule_2x.py',\n    '../_base_/default_runtime.py',\n]\n# model settings\nmodel = dict(\n    type='MVXFasterRCNN',\n    pretrained=dict(pts='open-mmlab://regnetx_1.6gf'),\n    pts_backbone=dict(\n        _delete_=True,\n        type='NoStemRegNet',\n        arch='regnetx_1.6gf',\n        out_indices=(1, 2, 3),\n        frozen_stages=-1,\n        strides=(1, 2, 2, 2),\n        base_channels=64,\n        stem_channels=64,\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        norm_eval=False,\n        style='pytorch'),\n    pts_neck=dict(in_channels=[168, 408, 912]))\n"
  },
  {
    "path": "configs/regnet/hv_pointpillars_regnet-400mf_fpn_sbn-all_2x8_2x_lyft-3d.py",
    "content": "_base_ = [\n    '../_base_/models/hv_pointpillars_fpn_lyft.py',\n    '../_base_/datasets/lyft-3d.py',\n    '../_base_/schedules/schedule_2x.py',\n    '../_base_/default_runtime.py',\n]\n# model settings\nmodel = dict(\n    type='MVXFasterRCNN',\n    pretrained=dict(pts='open-mmlab://regnetx_400mf'),\n    pts_backbone=dict(\n        _delete_=True,\n        type='NoStemRegNet',\n        arch=dict(w0=24, wa=24.48, wm=2.54, group_w=16, depth=22, bot_mul=1.0),\n        out_indices=(1, 2, 3),\n        frozen_stages=-1,\n        strides=(1, 2, 2, 2),\n        base_channels=64,\n        stem_channels=64,\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        norm_eval=False,\n        style='pytorch'),\n    pts_neck=dict(in_channels=[64, 160, 384]))\n"
  },
  {
    "path": "configs/regnet/hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d.py",
    "content": "_base_ = [\n    '../_base_/models/hv_pointpillars_fpn_nus.py',\n    '../_base_/datasets/nus-3d.py',\n    '../_base_/schedules/schedule_2x.py',\n    '../_base_/default_runtime.py',\n]\n# model settings\nmodel = dict(\n    type='MVXFasterRCNN',\n    pretrained=dict(pts='open-mmlab://regnetx_400mf'),\n    pts_backbone=dict(\n        _delete_=True,\n        type='NoStemRegNet',\n        arch=dict(w0=24, wa=24.48, wm=2.54, group_w=16, depth=22, bot_mul=1.0),\n        out_indices=(1, 2, 3),\n        frozen_stages=-1,\n        strides=(1, 2, 2, 2),\n        base_channels=64,\n        stem_channels=64,\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        norm_eval=False,\n        style='pytorch'),\n    pts_neck=dict(in_channels=[64, 160, 384]))\n"
  },
  {
    "path": "configs/regnet/hv_pointpillars_regnet-400mf_fpn_sbn-all_range100_2x8_2x_lyft-3d.py",
    "content": "_base_ = [\n    '../_base_/models/hv_pointpillars_fpn_range100_lyft.py',\n    '../_base_/datasets/range100_lyft-3d.py',\n    '../_base_/schedules/schedule_2x.py',\n    '../_base_/default_runtime.py',\n]\n# model settings\nmodel = dict(\n    type='MVXFasterRCNN',\n    pretrained=dict(pts='open-mmlab://regnetx_400mf'),\n    pts_backbone=dict(\n        _delete_=True,\n        type='NoStemRegNet',\n        arch=dict(w0=24, wa=24.48, wm=2.54, group_w=16, depth=22, bot_mul=1.0),\n        out_indices=(1, 2, 3),\n        frozen_stages=-1,\n        strides=(1, 2, 2, 2),\n        base_channels=64,\n        stem_channels=64,\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        norm_eval=False,\n        style='pytorch'),\n    pts_neck=dict(in_channels=[64, 160, 384]))\n"
  },
  {
    "path": "configs/regnet/hv_pointpillars_regnet-400mf_secfpn_sbn-all_2x8_2x_lyft-3d.py",
    "content": "_base_ = './hv_pointpillars_regnet-400mf_fpn_sbn-all_2x8_2x_lyft-3d.py'\n# model settings\nmodel = dict(\n    pts_neck=dict(\n        type='SECONDFPN',\n        _delete_=True,\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        in_channels=[64, 160, 384],\n        upsample_strides=[1, 2, 4],\n        out_channels=[128, 128, 128]),\n    pts_bbox_head=dict(\n        type='Anchor3DHead',\n        in_channels=384,\n        feat_channels=384,\n        anchor_generator=dict(\n            _delete_=True,\n            type='AlignedAnchor3DRangeGenerator',\n            ranges=[[-80, -80, -1.0715024, 80, 80, -1.0715024],\n                    [-80, -80, -0.3033737, 80, 80, -0.3033737],\n                    [-80, -80, -0.3519405, 80, 80, -0.3519405],\n                    [-80, -80, -0.8871424, 80, 80, -0.8871424],\n                    [-80, -80, -0.6276341, 80, 80, -0.6276341],\n                    [-80, -80, -1.3220503, 80, 80, -1.3220503],\n                    [-80, -80, -1.0709302, 80, 80, -1.0709302],\n                    [-80, -80, -0.9122268, 80, 80, -0.9122268],\n                    [-80, -80, -1.8012227, 80, 80, -1.8012227]],\n            sizes=[\n                [1.92, 4.75, 1.71],  # car\n                [2.84, 10.24, 3.44],  # truck\n                [2.92, 12.70, 3.42],  # bus\n                [2.42, 6.52, 2.34],  # emergency vehicle\n                [2.75, 8.17, 3.20],  # other vehicle\n                [0.96, 2.35, 1.59],  # motorcycle\n                [0.63, 1.76, 1.44],  # bicycle\n                [0.76, 0.80, 1.76],  # pedestrian\n                [0.35, 0.73, 0.50]  # animal\n            ],\n            rotations=[0, 1.57],\n            reshape_out=True)))\n"
  },
  {
    "path": "configs/regnet/hv_pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_nus-3d.py",
    "content": "_base_ = './hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d.py'\n# model settings\nmodel = dict(\n    pts_neck=dict(\n        type='SECONDFPN',\n        _delete_=True,\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        in_channels=[64, 160, 384],\n        upsample_strides=[1, 2, 4],\n        out_channels=[128, 128, 128]),\n    pts_bbox_head=dict(\n        type='Anchor3DHead',\n        in_channels=384,\n        feat_channels=384,\n        anchor_generator=dict(\n            _delete_=True,\n            type='AlignedAnchor3DRangeGenerator',\n            ranges=[\n                [-49.6, -49.6, -1.80032795, 49.6, 49.6, -1.80032795],\n                [-49.6, -49.6, -1.74440365, 49.6, 49.6, -1.74440365],\n                [-49.6, -49.6, -1.68526504, 49.6, 49.6, -1.68526504],\n                [-49.6, -49.6, -1.67339111, 49.6, 49.6, -1.67339111],\n                [-49.6, -49.6, -1.61785072, 49.6, 49.6, -1.61785072],\n                [-49.6, -49.6, -1.80984986, 49.6, 49.6, -1.80984986],\n                [-49.6, -49.6, -1.763965, 49.6, 49.6, -1.763965],\n            ],\n            sizes=[\n                [1.95017717, 4.60718145, 1.72270761],  # car\n                [2.4560939, 6.73778078, 2.73004906],  # truck\n                [2.87427237, 12.01320693, 3.81509561],  # trailer\n                [0.60058911, 1.68452161, 1.27192197],  # bicycle\n                [0.66344886, 0.7256437, 1.75748069],  # pedestrian\n                [0.39694519, 0.40359262, 1.06232151],  # traffic_cone\n                [2.49008838, 0.48578221, 0.98297065],  # barrier\n            ],\n            custom_values=[0, 0],\n            rotations=[0, 1.57],\n            reshape_out=True)))\n"
  },
  {
    "path": "configs/regnet/hv_pointpillars_regnet-400mf_secfpn_sbn-all_range100_2x8_2x_lyft-3d.py",
    "content": "_base_ = \\\n    './hv_pointpillars_regnet-400mf_fpn_sbn-all_range100_2x8_2x_lyft-3d.py'\n# model settings\nmodel = dict(\n    pts_neck=dict(\n        type='SECONDFPN',\n        _delete_=True,\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        in_channels=[64, 160, 384],\n        upsample_strides=[1, 2, 4],\n        out_channels=[128, 128, 128]),\n    pts_bbox_head=dict(\n        type='Anchor3DHead',\n        in_channels=384,\n        feat_channels=384,\n        anchor_generator=dict(\n            _delete_=True,\n            type='AlignedAnchor3DRangeGenerator',\n            ranges=[[-100, -100, -1.0715024, 100, 100, -1.0715024],\n                    [-100, -100, -0.3033737, 100, 100, -0.3033737],\n                    [-100, -100, -0.3519405, 100, 100, -0.3519405],\n                    [-100, -100, -0.8871424, 100, 100, -0.8871424],\n                    [-100, -100, -0.6276341, 100, 100, -0.6276341],\n                    [-100, -100, -1.3220503, 100, 100, -1.3220503],\n                    [-100, -100, -1.0709302, 100, 100, -1.0709302],\n                    [-100, -100, -0.9122268, 100, 100, -0.9122268],\n                    [-100, -100, -1.8012227, 100, 100, -1.8012227]],\n            sizes=[\n                [1.92, 4.75, 1.71],  # car\n                [2.84, 10.24, 3.44],  # truck\n                [2.92, 12.70, 3.42],  # bus\n                [2.42, 6.52, 2.34],  # emergency vehicle\n                [2.75, 8.17, 3.20],  # other vehicle\n                [0.96, 2.35, 1.59],  # motorcycle\n                [0.63, 1.76, 1.44],  # bicycle\n                [0.76, 0.80, 1.76],  # pedestrian\n                [0.35, 0.73, 0.50]  # animal\n            ],\n            rotations=[0, 1.57],\n            reshape_out=True)))\n"
  },
  {
    "path": "configs/second/README.md",
    "content": "# Second: Sparsely embedded convolutional detection\n\n## Introduction\n\n[ALGORITHM]\n\nWe implement SECOND and provide the results and checkpoints on KITTI dataset.\n```\n@article{yan2018second,\n  title={Second: Sparsely embedded convolutional detection},\n  author={Yan, Yan and Mao, Yuxing and Li, Bo},\n  journal={Sensors},\n  year={2018},\n  publisher={Multidisciplinary Digital Publishing Institute}\n}\n\n```\n## Results\n\n### KITTI\n|  Backbone   |Class| Lr schd | Mem (GB) | Inf time (fps) | mAP |Download |\n| :---------: | :-----: | :------: | :------------: | :----: |:----: | :------: |\n|    [SECFPN](./hv_second_secfpn_6x8_80e_kitti-3d-car.py)| Car |cyclic 80e|5.4||79.07|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/second/hv_second_secfpn_6x8_80e_kitti-3d-car/hv_second_secfpn_6x8_80e_kitti-3d-car_20200620_230238-393f000c.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/second/hv_second_secfpn_6x8_80e_kitti-3d-car/hv_second_secfpn_6x8_80e_kitti-3d-car_20200620_230238.log.json)|\n|    [SECFPN](./hv_second_secfpn_6x8_80e_kitti-3d-3class.py)| 3 Class |cyclic 80e|5.4||64.41|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/second/hv_second_secfpn_6x8_80e_kitti-3d-3class/hv_second_secfpn_6x8_80e_kitti-3d-3class_20200620_230238-9208083a.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/second/hv_second_secfpn_6x8_80e_kitti-3d-3class/hv_second_secfpn_6x8_80e_kitti-3d-3class_20200620_230238.log.json)|\n\n### Waymo\n\n|  Backbone | Load Interval | Class | Lr schd | Mem (GB) | Inf time (fps) | mAP@L1 | mAPH@L1 |  mAP@L2 | **mAPH@L2** | Download |\n| :-------: | :-----------: |:-----:| :------:| :------: | :------------: | :----: | :-----: | :-----: | :-----: | :------: |\n| [SECFPN](./hv_second_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py)|5|3 Class|2x|8.12||65.3|61.7|58.9|55.7|[log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/second/hv_second_secfpn_sbn_4x8_2x_waymoD5-3d-3class/hv_second_secfpn_sbn_4x8_2x_waymoD5-3d-3class_20201115_112448.log.json)|\n| above @ Car|||2x|8.12||67.1|66.6|58.7|58.2| |\n| above @ Pedestrian|||2x|8.12||68.1|59.1|59.5|51.5| |\n| above @ Cyclist|||2x|8.12||60.7|59.5|58.4|57.3| |\n\nNote: See more details about metrics and data split on Waymo [HERE](https://github.com/open-mmlab/mmdetection3d/tree/master/configs/pointpillars). For implementation details, we basically follow the original settings. All of these results are achieved without bells-and-whistles, e.g. ensemble, multi-scale training and test augmentation.\n"
  },
  {
    "path": "configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py",
    "content": "_base_ = [\n    '../_base_/models/hv_second_secfpn_kitti.py',\n    '../_base_/datasets/kitti-3d-3class.py',\n    '../_base_/schedules/cyclic_40e.py', '../_base_/default_runtime.py'\n]\n"
  },
  {
    "path": "configs/second/hv_second_secfpn_6x8_80e_kitti-3d-car.py",
    "content": "_base_ = [\n    '../_base_/models/hv_second_secfpn_kitti.py',\n    '../_base_/datasets/kitti-3d-car.py', '../_base_/schedules/cyclic_40e.py',\n    '../_base_/default_runtime.py'\n]\npoint_cloud_range = [0, -40, -3, 70.4, 40, 1]\nmodel = dict(\n    bbox_head=dict(\n        type='Anchor3DHead',\n        num_classes=1,\n        anchor_generator=dict(\n            _delete_=True,\n            type='Anchor3DRangeGenerator',\n            ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]],\n            sizes=[[1.6, 3.9, 1.56]],\n            rotations=[0, 1.57],\n            reshape_out=True)),\n    # model training and testing settings\n    train_cfg=dict(\n        _delete_=True,\n        assigner=dict(\n            type='MaxIoUAssigner',\n            iou_calculator=dict(type='BboxOverlapsNearest3D'),\n            pos_iou_thr=0.6,\n            neg_iou_thr=0.45,\n            min_pos_iou=0.45,\n            ignore_iof_thr=-1),\n        allowed_border=0,\n        pos_weight=-1,\n        debug=False))\n"
  },
  {
    "path": "configs/second/hv_second_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py",
    "content": "_base_ = [\n    '../_base_/models/hv_second_secfpn_waymo.py',\n    '../_base_/datasets/waymoD5-3d-3class.py',\n    '../_base_/schedules/schedule_2x.py',\n    '../_base_/default_runtime.py',\n]\n\ndataset_type = 'WaymoDataset'\ndata_root = 'data/waymo/kitti_format/'\nclass_names = ['Car', 'Pedestrian', 'Cyclist']\npoint_cloud_range = [-76.8, -51.2, -2, 76.8, 51.2, 4]\ninput_modality = dict(use_lidar=True, use_camera=False)\n\ndb_sampler = dict(\n    data_root=data_root,\n    info_path=data_root + 'waymo_dbinfos_train.pkl',\n    rate=1.0,\n    prepare=dict(\n        filter_by_difficulty=[-1],\n        filter_by_min_points=dict(Car=5, Pedestrian=5, Cyclist=5)),\n    classes=class_names,\n    sample_groups=dict(Car=15, Pedestrian=10, Cyclist=10),\n    points_loader=dict(\n        type='LoadPointsFromFile', load_dim=5, use_dim=[0, 1, 2, 3, 4]))\n\ntrain_pipeline = [\n    dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=6, use_dim=5),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    dict(type='ObjectSample', db_sampler=db_sampler),\n    dict(\n        type='RandomFlip3D',\n        sync_2d=False,\n        flip_ratio_bev_horizontal=0.5,\n        flip_ratio_bev_vertical=0.5),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.78539816, 0.78539816],\n        scale_ratio_range=[0.95, 1.05]),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\n\ntest_pipeline = [\n    dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=6, use_dim=5),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n\ndata = dict(\n    samples_per_gpu=4,\n    workers_per_gpu=4,\n    train=dict(\n        type='RepeatDataset',\n        times=2,\n        dataset=dict(\n            type=dataset_type,\n            data_root=data_root,\n            ann_file=data_root + 'waymo_infos_train.pkl',\n            split='training',\n            pipeline=train_pipeline,\n            modality=input_modality,\n            classes=class_names,\n            test_mode=False,\n            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n            # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n            box_type_3d='LiDAR',\n            # load one frame every five frames\n            load_interval=5)),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'waymo_infos_val.pkl',\n        split='training',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='LiDAR'),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'waymo_infos_val.pkl',\n        split='training',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='LiDAR'))\n"
  },
  {
    "path": "configs/sparsefusion_nusc_voxel_LC_SwinT.py",
    "content": "point_cloud_range = [-54.0, -54.0, -5.0, 54.0, 54.0, 3.0]\r\nclass_names = [\r\n    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\r\n    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\r\n]\r\nvoxel_size = [0.075, 0.075, 0.2]\r\nout_size_factor = 8\r\nevaluation = dict(interval=1)\r\ndataset_type = 'NuScenesDataset_ViewInfo'\r\ndata_root = 'data/nuscenes/'\r\ninput_modality = dict(\r\n    use_lidar=True,\r\n    use_camera=True,\r\n    use_radar=False,\r\n    use_map=False,\r\n    use_external=False)\r\nimg_scale = (800, 448)\r\nnum_views = 6\r\nimg_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\r\ntrain_pipeline = [\r\n    dict(\r\n        type='LoadPointsFromFile',\r\n        coord_type='LIDAR',\r\n        load_dim=5,\r\n        use_dim=[0, 1, 2, 3, 4],\r\n    ),\r\n    dict(\r\n        type='LoadPointsFromMultiSweeps',\r\n        sweeps_num=10,\r\n        use_dim=[0, 1, 2, 3, 4],\r\n    ),\r\n    dict(type='MyLoadAnnotations3D', with_bbox_3d=True, with_label_3d=True, with_bbox=True, with_label=True, with_centers=True, with_cam_bbox=True, with_visible=True),\r\n    dict(type='LoadMultiViewImageFromFiles'),\r\n    dict(\r\n        type='OurGlobalRotScaleTrans',\r\n        rot_range=[-0.3925 * 2, 0.3925 * 2],\r\n        scale_ratio_range=[0.9, 1.1],\r\n        translation_std=[0.5, 0.5, 0.5],\r\n    ),\r\n    dict(\r\n        type='OurRandomFlip3D',\r\n        sync_2d=False,\r\n        flip_ratio_bev_horizontal=0.5,\r\n        flip_ratio_bev_vertical=0.5),\r\n    # dict(type='PhotoMetricDistortionMultiViewImage', swap_channel=False),  # color augmentation cannot improve the performance\r\n    dict(type='OurRandomAffine', scaling_ratio_range=(0.9, 1.1), flip_ratio=0.5, flip_sync_3d=True),\r\n    dict(type='MyResize', img_scale=img_scale, keep_ratio=True),\r\n    dict(type='MyNormalize', **img_norm_cfg),\r\n    dict(type='MyPad', size_divisor=32),\r\n    dict(type='SparseDepth', scale_factors=[4], exp_time=0),\r\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\r\n    dict(type='OurObjectRangeFilter', point_cloud_range=point_cloud_range),\r\n    dict(type='ObjectNameFilter', classes=class_names),\r\n    dict(type='PointShuffle'),\r\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\r\n    dict(type='Collect3D', keys=['points', 'img', 'gt_bboxes_3d', 'gt_labels_3d', 'gt_bboxes', 'gt_labels', 'gt_pts_centers_view', 'gt_img_centers_view', 'gt_bboxes_cam_view', 'gt_bboxes_lidar_view', 'sparse_depth', 'gt_visible_3d'])\r\n]\r\ntest_pipeline = [\r\n    dict(\r\n        type='LoadPointsFromFile',\r\n        coord_type='LIDAR',\r\n        load_dim=5,\r\n        use_dim=[0, 1, 2, 3, 4],\r\n    ),\r\n    dict(\r\n        type='LoadPointsFromMultiSweeps',\r\n        sweeps_num=10,\r\n        use_dim=[0, 1, 2, 3, 4],\r\n    ),\r\n    dict(type='LoadMultiViewImageFromFiles'),\r\n    dict(\r\n        type='MultiScaleFlipAug3D',\r\n        img_scale=img_scale,\r\n        pts_scale_ratio=1,\r\n        flip=False,\r\n        transforms=[\r\n            dict(\r\n                type='GlobalRotScaleTrans',\r\n                rot_range=[0, 0],\r\n                scale_ratio_range=[1.0, 1.0],\r\n                translation_std=[0, 0, 0]),\r\n            dict(type='RandomFlip3D'),\r\n            dict(type='MyResize', img_scale=img_scale, keep_ratio=True),\r\n            dict(type='MyNormalize', **img_norm_cfg),\r\n            dict(type='MyPad', size_divisor=32),\r\n            dict(type='SparseDepth', scale_factors=[4]),\r\n            dict(\r\n                type='DefaultFormatBundle3D',\r\n                class_names=class_names,\r\n                with_label=False),\r\n            dict(type='Collect3D', keys=['points', 'img', 'sparse_depth'])\r\n        ])\r\n]\r\n\r\n# our default setting uses 4 GPUs with 3 samples per-GPU, please ensure the LR consistent with your batch size\r\ndata = dict(\r\n    samples_per_gpu=3,\r\n    workers_per_gpu=4,\r\n    train=dict(\r\n        type='CBGSDataset',\r\n        dataset=dict(\r\n            type=dataset_type,\r\n            data_root=data_root,\r\n            num_views=num_views,\r\n            ann_file=data_root + '/nuscenes_infos_w_views_train.pkl',\r\n            load_interval=1,\r\n            pipeline=train_pipeline,\r\n            classes=class_names,\r\n            modality=input_modality,\r\n            test_mode=False,\r\n            box_type_3d='LiDAR')),\r\n    val=dict(\r\n        type=dataset_type,\r\n        data_root=data_root,\r\n        num_views=num_views,\r\n        ann_file=data_root + '/nuscenes_infos_w_views_val.pkl',\r\n        load_interval=1,\r\n        pipeline=test_pipeline,\r\n        classes=class_names,\r\n        modality=input_modality,\r\n        test_mode=True,\r\n        box_type_3d='LiDAR'),\r\n    test=dict(\r\n        type=dataset_type,\r\n        data_root=data_root,\r\n        num_views=num_views,\r\n        ann_file=data_root + '/nuscenes_infos_w_views_val.pkl',\r\n        load_interval=1,\r\n        pipeline=test_pipeline,\r\n        classes=class_names,\r\n        modality=input_modality,\r\n        test_mode=True,\r\n        box_type_3d='LiDAR'))\r\nmodel = dict(\r\n    type='SparseFusionDetector',\r\n    freeze_img=False,\r\n    img_backbone=dict(\r\n        type='SwinTransformer',\r\n        embed_dims=96,\r\n        depths=[2, 2, 6, 2],\r\n        num_heads=[3, 6, 12, 24],\r\n        window_size=7,\r\n        mlp_ratio=4,\r\n        qkv_bias=True,\r\n        qk_scale=None,\r\n        drop_rate=0.,\r\n        attn_drop_rate=0.,\r\n        drop_path_rate=0.2,\r\n        patch_norm=True,\r\n        out_indices=(0, 1, 2, 3),\r\n        with_cp=False,\r\n        convert_weights=True,\r\n    ),\r\n    img_neck=dict(\r\n        type='FPN',\r\n        in_channels=[96, 192, 384, 768],\r\n        out_channels=256,\r\n        num_outs=5),\r\n    pts_voxel_layer=dict(\r\n        max_num_points=10,\r\n        voxel_size=voxel_size,\r\n        max_voxels=(120000, 160000),\r\n        point_cloud_range=point_cloud_range),\r\n    pts_voxel_encoder=dict(\r\n        type='HardSimpleVFE',\r\n        num_features=5,\r\n    ),\r\n    pts_middle_encoder=dict(\r\n        type='SparseEncoder',\r\n        in_channels=5,\r\n        sparse_shape=[41, 1440, 1440],\r\n        output_channels=128,\r\n        order=('conv', 'norm', 'act'),\r\n        encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)),\r\n        encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),\r\n        block_type='basicblock'),\r\n    pts_backbone=dict(\r\n        type='SECOND',\r\n        in_channels=256,\r\n        out_channels=[128, 256],\r\n        layer_nums=[5, 5],\r\n        layer_strides=[1, 2],\r\n        norm_cfg=dict(type='BN', eps=0.001, momentum=0.01),\r\n        conv_cfg=dict(type='Conv2d', bias=False)),\r\n    pts_neck=dict(\r\n        type='SECONDFPN',\r\n        in_channels=[128, 256],\r\n        out_channels=[256, 256],\r\n        upsample_strides=[1, 2],\r\n        norm_cfg=dict(type='BN', eps=0.001, momentum=0.01),\r\n        upsample_cfg=dict(type='deconv', bias=False),\r\n        use_conv_for_no_stride=True),\r\n    pts_bbox_head=dict(\r\n        type='SparseFusionHead2D_Deform',\r\n        num_views=num_views,\r\n        in_channels_img=256,\r\n        out_size_factor_img=4,\r\n        in_channels=256 * 2,\r\n        hidden_channel=128,\r\n        num_heads=8,\r\n        num_classes=len(class_names),\r\n        ffn_channel=256,\r\n        dropout=0.1,\r\n        bn_momentum=0.1,\r\n        activation='relu',\r\n        img_reg_bn=False,\r\n        img_reg_layer=3,\r\n        common_heads=dict(center=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),\r\n\r\n        num_proposals=200,  # query number in the LiDAR branch\r\n        num_img_proposals=200,  # query number in the camera branch\r\n\r\n        level_num=4,\r\n\r\n        num_pts_decoder_layers=1,  # number of transformer layers in the point detector (if you set it >1, ensure it is consistent with your pretrained LiDAR-only model or set \"freeze_lidar_detector=False\")\r\n        num_img_decoder_layers=1,  # number of transformer layers in the image detector\r\n        num_fusion_decoder_layers=1,  # number of the transformer layers in the fusion stage\r\n\r\n        initialize_by_heatmap=True,  # initialize the queries based on the heatmap (we never set it as False)\r\n\r\n        semantic_transfer=True,  # whether to use semantic transfer (camera to LiDAR)\r\n        cross_only=True,  # if false, output heatmap would be the average of semantic transfer and the LiDAR-only heatmap of TransFusion-L\r\n        cross_heatmap_layer=1,\r\n        nms_kernel_size=3,  # suppress nearby proposals when initializing queries for the LiDAR branch\r\n\r\n        geometric_transfer=True,  # whether to use geometric transfer\r\n        depth_input_channel=2,  # channel number of depth features. Do not change it unless you modify the SparseDepth class in \"mmdet3d/datasets/pipelines/loading.py\"\r\n        img_heatmap_layer=2,\r\n        img_nms_kernel_size=3, # suppress nearby proposals when initializing queries for the camera branch\r\n\r\n        view_transform=True,  # whether to transform the coordinate for the output bboxes of the camera branch\r\n        use_camera='se',  # \"se\" or None: whether to encode the camera parameters in the view transformation\r\n\r\n        bbox_coder=dict(\r\n            type='TransFusionBBoxCoder',\r\n            pc_range=point_cloud_range[:2],\r\n            voxel_size=voxel_size[:2],\r\n            out_size_factor=out_size_factor,\r\n            post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],\r\n            score_threshold=0.0,\r\n            code_size=10,\r\n        ),\r\n        bbox_2d_coder=dict(\r\n            type='CameraBBoxCoder',\r\n            code_size=10,\r\n        ),\r\n        loss_cls=dict(type='FocalLoss', use_sigmoid=True, gamma=2, alpha=0.25, reduction='mean', loss_weight=1.0),\r\n        loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),\r\n        loss_heatmap=dict(type='GaussianFocalLoss', reduction='mean', loss_weight=0.1),\r\n        loss_heatmap_2d=dict(type='GaussianFocalLoss', reduction='mean', loss_weight=0.1),\r\n        loss_center_2d=dict(type='L1Loss', reduction='mean', loss_weight=5.0),\r\n    ),\r\n    train_cfg=dict(\r\n        pts=dict(\r\n            dataset='nuScenes',\r\n            assigner=dict(\r\n                type='HungarianAssigner3D',\r\n                iou_calculator=dict(type='BboxOverlaps3D', coordinate='lidar'),\r\n                cls_cost=dict(type='FocalLossCost', gamma=2, alpha=0.25, weight=0.15),\r\n                reg_cost=dict(type='BBoxBEVL1Cost', weight=0.25),\r\n                iou_cost=dict(type='IoU3DCost', weight=0.25)\r\n            ),\r\n            assigner_2d=dict(\r\n                type='HungarianAssignerCameraBox',\r\n                iou_calculator=dict(type='BboxOverlaps3D', coordinate='camera'),\r\n                cls_cost=dict(type='FocalLossCost', gamma=2, alpha=0.25, weight=0.15),\r\n                reg_cost=dict(type='BBoxBEVL1Cost', weight=0.25),\r\n                iou_cost=dict(type='IoU3DCost', weight=0.25),\r\n            ),\r\n            pos_weight=-1,\r\n            gaussian_overlap=0.1,\r\n            gaussian_overlap_2d=0.1,\r\n            min_radius=2,\r\n            max_radius=999,\r\n            grid_size=[1440, 1440, 40],  # [x_len, y_len, 1]\r\n            voxel_size=voxel_size,\r\n            out_size_factor=out_size_factor,\r\n            code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],\r\n            img_code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],\r\n            point_cloud_range=point_cloud_range)),\r\n    test_cfg=dict(\r\n        pts=dict(\r\n            dataset='nuScenes',\r\n            grid_size=[1440, 1440, 40],\r\n            img_scale=img_scale,\r\n            out_size_factor=out_size_factor,\r\n            pc_range=point_cloud_range,\r\n            voxel_size=voxel_size,\r\n            nms_type='circle',\r\n        )))\r\noptimizer = dict(\r\n    type='AdamW',\r\n    lr=0.000075,\r\n    weight_decay=0.01,\r\n    paramwise_cfg=dict(\r\n        custom_keys={\r\n            'img_backbone': dict(lr_mult=0.1, decay_mult=5),\r\n            'img_neck': dict(lr_mult=0.1),\r\n            'pts_voxel_layer': dict(lr_mult=0.1),\r\n            'pts_voxel_encoder': dict(lr_mult=0.1),\r\n            'pts_middle_encoder': dict(lr_mult=0.1),\r\n            'pts_backbone': dict(lr_mult=0.1),\r\n            'pts_neck': dict(lr_mult=0.1),\r\n            'pts_bbox_head.point_transformer': dict(lr_mult=0.1),\r\n            'pts_bbox_head.class_encoding': dict(lr_mult=0.1),\r\n            'pts_bbox_head.heatmap_head': dict(lr_mult=0.1),\r\n            'pts_bbox_head.shared_conv': dict(lr_mult=0.1),\r\n            'absolute_pos_embed': dict(decay_mult=0.),\r\n            'relative_position_bias_table': dict(decay_mult=0.),\r\n            'norm': dict(decay_mult=0.)\r\n        }),\r\n)  # for 4gpu * 3sample_per_gpu\r\noptimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))\r\n\r\nlr_config = dict(\r\n    policy='cyclic',\r\n    target_ratio=(8, 0.0001),\r\n    cyclic_times=1,\r\n    step_ratio_up=0.4,\r\n    )\r\nmomentum_config = dict(\r\n    policy='cyclic',\r\n    target_ratio=(0.8947368421052632, 1),\r\n    cyclic_times=1,\r\n    step_ratio_up=0.4)\r\ntotal_epochs = 6\r\ncheckpoint_config = dict(interval=1)\r\nlog_config = dict(\r\n    interval=50,\r\n    hooks=[dict(type='TextLoggerHook'),\r\n           dict(type='TensorboardLoggerHook')])\r\ndist_params = dict(backend='nccl')\r\nlog_level = 'INFO'\r\nwork_dir = None\r\nload_from = 'checkpoints/sparsefusion_voxel0075_SwinT_initial.pth'\r\nresume_from = None\r\nworkflow = [('train', 1)]\r\ngpu_ids = range(0, 8)\r\n\r\nfreeze_lidar_components = True  # freeze the LiDAR backbone\r\nfreeze_lidar_detector = True  # freeze the LiDAR detector\r\n\r\nfind_unused_parameters = True\r\n\r\n\r\n# Evaluating bboxes of pts_bbox\r\n# mAP: 0.7102\r\n# mATE: 0.2778\r\n# mASE: 0.2477\r\n# mAOE: 0.2701\r\n# mAVE: 0.2529\r\n# mAAE: 0.1881\r\n# NDS: 0.7314\r\n# Eval time: 133.6s\r\n#\r\n# Per-class results:\r\n# Object Class    AP      ATE     ASE     AOE     AVE     AAE\r\n# car     0.883   0.171   0.147   0.067   0.263   0.184\r\n# truck   0.651   0.306   0.176   0.078   0.230   0.216\r\n# bus     0.777   0.306   0.178   0.043   0.396   0.256\r\n# trailer 0.453   0.527   0.211   0.466   0.184   0.164\r\n# construction_vehicle    0.308   0.686   0.420   0.857   0.124   0.316\r\n# pedestrian      0.897   0.128   0.280   0.328   0.215   0.099\r\n# motorcycle      0.823   0.188   0.236   0.216   0.421   0.254\r\n# bicycle 0.727   0.164   0.262   0.314   0.189   0.016\r\n# traffic_cone    0.803   0.118   0.298   nan     nan     nan\r\n# barrier 0.779   0.185   0.269   0.060   nan     nan\r\n\r\n\r\n"
  },
  {
    "path": "configs/sparsefusion_nusc_voxel_LC_r50.py",
    "content": "point_cloud_range = [-54.0, -54.0, -5.0, 54.0, 54.0, 3.0]\r\nclass_names = [\r\n    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\r\n    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\r\n]\r\nvoxel_size = [0.075, 0.075, 0.2]\r\nout_size_factor = 8\r\nevaluation = dict(interval=1)\r\ndataset_type = 'NuScenesDataset_ViewInfo'\r\ndata_root = 'data/nuscenes/'\r\ninput_modality = dict(\r\n    use_lidar=True,\r\n    use_camera=True,\r\n    use_radar=False,\r\n    use_map=False,\r\n    use_external=False)\r\nimg_scale = (800, 448)\r\nnum_views = 6\r\nimg_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\r\ntrain_pipeline = [\r\n    dict(\r\n        type='LoadPointsFromFile',\r\n        coord_type='LIDAR',\r\n        load_dim=5,\r\n        use_dim=[0, 1, 2, 3, 4],\r\n    ),\r\n    dict(\r\n        type='LoadPointsFromMultiSweeps',\r\n        sweeps_num=10,\r\n        use_dim=[0, 1, 2, 3, 4],\r\n    ),\r\n    dict(type='MyLoadAnnotations3D', with_bbox_3d=True, with_label_3d=True, with_bbox=True, with_label=True, with_centers=True, with_cam_bbox=True, with_visible=True),\r\n    dict(type='LoadMultiViewImageFromFiles'),\r\n    dict(\r\n        type='OurGlobalRotScaleTrans',\r\n        rot_range=[-0.3925 * 2, 0.3925 * 2],\r\n        scale_ratio_range=[0.9, 1.1],\r\n        translation_std=[0.5, 0.5, 0.5],\r\n    ),\r\n    dict(\r\n        type='OurRandomFlip3D',\r\n        sync_2d=False,\r\n        flip_ratio_bev_horizontal=0.5,\r\n        flip_ratio_bev_vertical=0.5),\r\n    # dict(type='PhotoMetricDistortionMultiViewImage', swap_channel=False),  # color augmentation cannot improve the performance\r\n    dict(type='OurRandomAffine', scaling_ratio_range=(0.9, 1.1), flip_ratio=0.5, flip_sync_3d=True),\r\n    dict(type='MyResize', img_scale=img_scale, keep_ratio=True),\r\n    dict(type='MyNormalize', **img_norm_cfg),\r\n    dict(type='MyPad', size_divisor=32),\r\n    dict(type='SparseDepth', scale_factors=[4], exp_time=0),\r\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\r\n    dict(type='OurObjectRangeFilter', point_cloud_range=point_cloud_range),\r\n    dict(type='ObjectNameFilter', classes=class_names),\r\n    dict(type='PointShuffle'),\r\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\r\n    dict(type='Collect3D', keys=['points', 'img', 'gt_bboxes_3d', 'gt_labels_3d', 'gt_bboxes', 'gt_labels', 'gt_pts_centers_view', 'gt_img_centers_view', 'gt_bboxes_cam_view', 'gt_bboxes_lidar_view', 'sparse_depth', 'gt_visible_3d'])\r\n]\r\ntest_pipeline = [\r\n    dict(\r\n        type='LoadPointsFromFile',\r\n        coord_type='LIDAR',\r\n        load_dim=5,\r\n        use_dim=[0, 1, 2, 3, 4],\r\n    ),\r\n    dict(\r\n        type='LoadPointsFromMultiSweeps',\r\n        sweeps_num=10,\r\n        use_dim=[0, 1, 2, 3, 4],\r\n    ),\r\n    dict(type='LoadMultiViewImageFromFiles'),\r\n    dict(\r\n        type='MultiScaleFlipAug3D',\r\n        img_scale=img_scale,\r\n        pts_scale_ratio=1,\r\n        flip=False,\r\n        transforms=[\r\n            dict(\r\n                type='GlobalRotScaleTrans',\r\n                rot_range=[0, 0],\r\n                scale_ratio_range=[1.0, 1.0],\r\n                translation_std=[0, 0, 0]),\r\n            dict(type='RandomFlip3D'),\r\n            dict(type='MyResize', img_scale=img_scale, keep_ratio=True),\r\n            dict(type='MyNormalize', **img_norm_cfg),\r\n            dict(type='MyPad', size_divisor=32),\r\n            dict(type='SparseDepth', scale_factors=[4]),\r\n            dict(\r\n                type='DefaultFormatBundle3D',\r\n                class_names=class_names,\r\n                with_label=False),\r\n            dict(type='Collect3D', keys=['points', 'img', 'sparse_depth'])\r\n        ])\r\n]\r\n\r\n# our default setting uses 4 GPUs with 4 samples per-GPU, please ensure the LR consistent with your batch size\r\ndata = dict(\r\n    samples_per_gpu=4,\r\n    workers_per_gpu=4,\r\n    train=dict(\r\n        type='CBGSDataset',\r\n        dataset=dict(\r\n            type=dataset_type,\r\n            data_root=data_root,\r\n            num_views=num_views,\r\n            ann_file=data_root + '/nuscenes_infos_w_views_train.pkl',\r\n            load_interval=1,\r\n            pipeline=train_pipeline,\r\n            classes=class_names,\r\n            modality=input_modality,\r\n            test_mode=False,\r\n            box_type_3d='LiDAR')),\r\n    val=dict(\r\n        type=dataset_type,\r\n        data_root=data_root,\r\n        num_views=num_views,\r\n        ann_file=data_root + '/nuscenes_infos_w_views_val.pkl',\r\n        load_interval=1,\r\n        pipeline=test_pipeline,\r\n        classes=class_names,\r\n        modality=input_modality,\r\n        test_mode=True,\r\n        box_type_3d='LiDAR'),\r\n    test=dict(\r\n        type=dataset_type,\r\n        data_root=data_root,\r\n        num_views=num_views,\r\n        ann_file=data_root + '/nuscenes_infos_w_views_val.pkl',\r\n        load_interval=1,\r\n        pipeline=test_pipeline,\r\n        classes=class_names,\r\n        modality=input_modality,\r\n        test_mode=True,\r\n        box_type_3d='LiDAR'))\r\nmodel = dict(\r\n    type='SparseFusionDetector',\r\n    freeze_img=False,\r\n    img_backbone=dict(\r\n        type='ResNet',\r\n        depth=50,\r\n        num_stages=4,\r\n        out_indices=(0, 1, 2, 3),\r\n        frozen_stages=1,\r\n        in_channels=3,\r\n        norm_cfg=dict(type='BN', requires_grad=True),\r\n        norm_eval=True,\r\n        style='pytorch',\r\n    ),\r\n    img_neck=dict(\r\n        type='FPN',\r\n        in_channels=[256, 512, 1024, 2048],\r\n        out_channels=256,\r\n        num_outs=5),\r\n    pts_voxel_layer=dict(\r\n        max_num_points=10,\r\n        voxel_size=voxel_size,\r\n        max_voxels=(120000, 160000),\r\n        point_cloud_range=point_cloud_range),\r\n    pts_voxel_encoder=dict(\r\n        type='HardSimpleVFE',\r\n        num_features=5,\r\n    ),\r\n    pts_middle_encoder=dict(\r\n        type='SparseEncoder',\r\n        in_channels=5,\r\n        sparse_shape=[41, 1440, 1440],\r\n        output_channels=128,\r\n        order=('conv', 'norm', 'act'),\r\n        encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)),\r\n        encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),\r\n        block_type='basicblock'),\r\n    pts_backbone=dict(\r\n        type='SECOND',\r\n        in_channels=256,\r\n        out_channels=[128, 256],\r\n        layer_nums=[5, 5],\r\n        layer_strides=[1, 2],\r\n        norm_cfg=dict(type='BN', eps=0.001, momentum=0.01),\r\n        conv_cfg=dict(type='Conv2d', bias=False)),\r\n    pts_neck=dict(\r\n        type='SECONDFPN',\r\n        in_channels=[128, 256],\r\n        out_channels=[256, 256],\r\n        upsample_strides=[1, 2],\r\n        norm_cfg=dict(type='BN', eps=0.001, momentum=0.01),\r\n        upsample_cfg=dict(type='deconv', bias=False),\r\n        use_conv_for_no_stride=True),\r\n    pts_bbox_head=dict(\r\n        type='SparseFusionHead2D_Deform',\r\n        num_views=num_views,\r\n        in_channels_img=256,\r\n        out_size_factor_img=4,\r\n        in_channels=256 * 2,\r\n        hidden_channel=128,\r\n        num_heads=8,\r\n        num_classes=len(class_names),\r\n        ffn_channel=256,\r\n        dropout=0.1,\r\n        bn_momentum=0.1,\r\n        activation='relu',\r\n        img_reg_bn=False,\r\n        img_reg_layer=3,\r\n        common_heads=dict(center=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),\r\n\r\n        num_proposals=200,  # query number in the LiDAR branch\r\n        num_img_proposals=200,  # query number in the camera branch\r\n\r\n        level_num=4,\r\n\r\n        num_pts_decoder_layers=1,  # number of transformer layers in the point detector (if you set it >1, ensure it is consistent with your pretrained LiDAR-only model or set \"freeze_lidar_detector=False\")\r\n        num_img_decoder_layers=1,  # number of transformer layers in the image detector\r\n        num_fusion_decoder_layers=1,  # number of the transformer layers in the fusion stage\r\n\r\n        initialize_by_heatmap=True,  # initialize the queries based on the heatmap (we never set it as False)\r\n\r\n        semantic_transfer=True,  # whether to use semantic transfer (camera to LiDAR)\r\n        cross_only=True,  # if false, output heatmap would be the average of semantic transfer and the LiDAR-only heatmap of TransFusion-L\r\n        cross_heatmap_layer=1,\r\n        nms_kernel_size=3,  # suppress nearby proposals when initializing queries for the LiDAR branch\r\n\r\n        geometric_transfer=True,  # whether to use geometric transfer\r\n        depth_input_channel=2,  # channel number of depth features. Do not change it unless you modify the SparseDepth class in \"mmdet3d/datasets/pipelines/loading.py\"\r\n        img_heatmap_layer=2,\r\n        img_nms_kernel_size=3, # suppress nearby proposals when initializing queries for the camera branch\r\n\r\n        view_transform=True,  # whether to transform the coordinate for the output bboxes of the camera branch\r\n        use_camera='se',  # \"se\" or None: whether to encode the camera parameters in the view transformation\r\n\r\n        bbox_coder=dict(\r\n            type='TransFusionBBoxCoder',\r\n            pc_range=point_cloud_range[:2],\r\n            voxel_size=voxel_size[:2],\r\n            out_size_factor=out_size_factor,\r\n            post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],\r\n            score_threshold=0.0,\r\n            code_size=10,\r\n        ),\r\n        bbox_2d_coder=dict(\r\n            type='CameraBBoxCoder',\r\n            code_size=10,\r\n        ),\r\n        loss_cls=dict(type='FocalLoss', use_sigmoid=True, gamma=2, alpha=0.25, reduction='mean', loss_weight=1.0),\r\n        loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),\r\n        loss_heatmap=dict(type='GaussianFocalLoss', reduction='mean', loss_weight=0.1),\r\n        loss_heatmap_2d=dict(type='GaussianFocalLoss', reduction='mean', loss_weight=0.1),\r\n        loss_center_2d=dict(type='L1Loss', reduction='mean', loss_weight=5.0),\r\n    ),\r\n    train_cfg=dict(\r\n        pts=dict(\r\n            dataset='nuScenes',\r\n            assigner=dict(\r\n                type='HungarianAssigner3D',\r\n                iou_calculator=dict(type='BboxOverlaps3D', coordinate='lidar'),\r\n                cls_cost=dict(type='FocalLossCost', gamma=2, alpha=0.25, weight=0.15),\r\n                reg_cost=dict(type='BBoxBEVL1Cost', weight=0.25),\r\n                iou_cost=dict(type='IoU3DCost', weight=0.25)\r\n            ),\r\n            assigner_2d=dict(\r\n                type='HungarianAssignerCameraBox',\r\n                iou_calculator=dict(type='BboxOverlaps3D', coordinate='camera'),\r\n                cls_cost=dict(type='FocalLossCost', gamma=2, alpha=0.25, weight=0.15),\r\n                reg_cost=dict(type='BBoxBEVL1Cost', weight=0.25),\r\n                iou_cost=dict(type='IoU3DCost', weight=0.25),\r\n            ),\r\n            pos_weight=-1,\r\n            gaussian_overlap=0.1,\r\n            gaussian_overlap_2d=0.1,\r\n            min_radius=2,\r\n            max_radius=999,\r\n            grid_size=[1440, 1440, 40],  # [x_len, y_len, 1]\r\n            voxel_size=voxel_size,\r\n            out_size_factor=out_size_factor,\r\n            code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],\r\n            img_code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],\r\n            point_cloud_range=point_cloud_range)),\r\n    test_cfg=dict(\r\n        pts=dict(\r\n            dataset='nuScenes',\r\n            grid_size=[1440, 1440, 40],\r\n            img_scale=img_scale,\r\n            out_size_factor=out_size_factor,\r\n            pc_range=point_cloud_range,\r\n            voxel_size=voxel_size,\r\n            nms_type='circle',\r\n        )))\r\noptimizer = dict(\r\n    type='AdamW',\r\n    lr=0.0001,\r\n    weight_decay=0.01,\r\n    paramwise_cfg=dict(\r\n        custom_keys={\r\n            'img_backbone': dict(lr_mult=0.1),\r\n            'img_neck': dict(lr_mult=0.1),\r\n            'pts_voxel_layer': dict(lr_mult=0.1),\r\n            'pts_voxel_encoder': dict(lr_mult=0.1),\r\n            'pts_middle_encoder': dict(lr_mult=0.1),\r\n            'pts_backbone': dict(lr_mult=0.1),\r\n            'pts_neck': dict(lr_mult=0.1),\r\n            'pts_bbox_head.point_transformer': dict(lr_mult=0.1),\r\n            'pts_bbox_head.class_encoding': dict(lr_mult=0.1),\r\n            'pts_bbox_head.heatmap_head': dict(lr_mult=0.1),\r\n            'pts_bbox_head.shared_conv': dict(lr_mult=0.1),\r\n        }),\r\n)  # for 4gpu * 4sample_per_gpu\r\noptimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))\r\n\r\nlr_config = dict(\r\n    policy='cyclic',\r\n    target_ratio=(8, 0.0001),\r\n    cyclic_times=1,\r\n    step_ratio_up=0.4,\r\n    )\r\nmomentum_config = dict(\r\n    policy='cyclic',\r\n    target_ratio=(0.8947368421052632, 1),\r\n    cyclic_times=1,\r\n    step_ratio_up=0.4)\r\ntotal_epochs = 6\r\ncheckpoint_config = dict(interval=1)\r\nlog_config = dict(\r\n    interval=50,\r\n    hooks=[dict(type='TextLoggerHook'),\r\n           dict(type='TensorboardLoggerHook')])\r\ndist_params = dict(backend='nccl')\r\nlog_level = 'INFO'\r\nwork_dir = None\r\nload_from = 'checkpoints/sparsefusion_voxel0075_R50_initial.pth'\r\nresume_from = None\r\nworkflow = [('train', 1)]\r\ngpu_ids = range(0, 8)\r\n\r\nfreeze_lidar_components = True  # freeze the LiDAR backbone\r\nfreeze_lidar_detector = True  # freeze the LiDAR detector\r\n\r\nfind_unused_parameters = True\r\n\r\n\r\n# Evaluating bboxes of pts_bbox\r\n# mAP: 0.7051\r\n# mATE: 0.2757\r\n# mASE: 0.2506\r\n# mAOE: 0.2767\r\n# mAVE: 0.2562\r\n# mAAE: 0.1869\r\n# NDS: 0.7279\r\n# Eval time: 137.2s\r\n#\r\n# Per-class results:\r\n# Object Class    AP      ATE     ASE     AOE     AVE     AAE\r\n# car     0.883   0.171   0.146   0.066   0.262   0.187\r\n# truck   0.643   0.305   0.177   0.071   0.235   0.211\r\n# bus     0.775   0.304   0.177   0.044   0.411   0.250\r\n# trailer 0.447   0.522   0.214   0.432   0.179   0.159\r\n# construction_vehicle    0.303   0.669   0.424   0.842   0.127   0.326\r\n# pedestrian      0.898   0.127   0.282   0.329   0.216   0.104\r\n# motorcycle      0.810   0.189   0.241   0.215   0.426   0.249\r\n# bicycle 0.712   0.164   0.263   0.422   0.193   0.010\r\n# traffic_cone    0.808   0.118   0.309   nan     nan     nan\r\n# barrier 0.772   0.188   0.273   0.068   nan     nan\r\n\r\n\r\n"
  },
  {
    "path": "configs/ssn/README.md",
    "content": "# SSN: Shape Signature Networks for Multi-class Object Detection from Point Clouds\n\n## Introduction\n\n[ALGORITHM]\n\nWe implement PointPillars with Shape-aware grouping heads used in the SSN and provide the results and checkpoints on the nuScenes and Lyft dataset.\n\n```\n@inproceedings{zhu2020ssn,\n  title={SSN: Shape Signature Networks for Multi-class Object Detection from Point Clouds},\n  author={Zhu, Xinge and Ma, Yuexin and Wang, Tai and Xu, Yan and Shi, Jianping and Lin, Dahua},\n  booktitle={Proceedings of the European Conference on Computer Vision},\n  year={2020}\n}\n\n```\n\n## Results\n\n### NuScenes\n\n|  Backbone   | Lr schd | Mem (GB) | Inf time (fps) | mAP | NDS | Download |\n| :---------: | :-----: | :------: | :------------: | :----: |:----: | :------: |\n|[SECFPN](../pointpillars/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py)|2x|16.4||35.17|49.76|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d_20200620_230725-0817d270.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d_20200620_230725.log.json)|\n|[SSN](./hv_ssn_secfpn_sbn-all_2x16_2x_nus-3d.py)|2x|9.62||41.56|54.83|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/ssn/hv_ssn_secfpn_sbn-all_2x16_2x_nus-3d/hv_ssn_secfpn_sbn-all_2x16_2x_nus-3d_20201023_193737-5fda3f00.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/ssn/hv_ssn_secfpn_sbn-all_2x16_2x_nus-3d/hv_ssn_secfpn_sbn-all_2x16_2x_nus-3d_20201023_193737.log.json)|\n[RegNetX-400MF-SECFPN](../regnet/hv_pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_nus-3d.py)|2x|16.4||41.15|55.20|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/regnet/hv_pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_nus-3d_20200620_230334-53044f32.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/regnet/hv_pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_nus-3d_20200620_230334.log.json)|\n|[RegNetX-400MF-SSN](./hv_ssn_regnet-400mf_secfpn_sbn-all_2x16_2x_nus-3d.py)|2x|10.26||46.95|58.24|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/ssn/hv_ssn_regnet-400mf_secfpn_sbn-all_2x16_2x_nus-3d/hv_ssn_regnet-400mf_secfpn_sbn-all_2x16_2x_nus-3d_20201024_232447-7af3d8c8.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/ssn/hv_ssn_regnet-400mf_secfpn_sbn-all_2x16_2x_nus-3d/hv_ssn_regnet-400mf_secfpn_sbn-all_2x16_2x_nus-3d_20201024_232447.log.json)|\n\n### Lyft\n\n|  Backbone   | Lr schd | Mem (GB) | Inf time (fps) | Private Score | Public Score | Download |\n| :---------: | :-----: | :------: | :------------: | :----: |:----: | :------: |\n|[SECFPN](../pointpillars/hv_pointpillars_secfpn_sbn-all_4x8_2x_lyft-3d.py)|2x|||13.4|13.4||\n|[SSN](./hv_ssn_secfpn_sbn-all_2x16_2x_lyft-3d.py)|2x|8.30||17.4|17.5|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/ssn/hv_ssn_secfpn_sbn-all_2x16_2x_lyft-3d/hv_ssn_secfpn_sbn-all_2x16_2x_lyft-3d_20201016_220844-3058d9fc.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/ssn/hv_ssn_secfpn_sbn-all_2x16_2x_lyft-3d/hv_ssn_secfpn_sbn-all_2x16_2x_lyft-3d_20201016_220844.log.json)|\n|[RegNetX-400MF-SSN](./hv_ssn_regnet-400mf_secfpn_sbn-all_1x16_2x_lyft-3d.py)|2x|9.98||18.1|18.3|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/ssn/hv_ssn_regnet-400mf_secfpn_sbn-all_2x16_2x_lyft-3d/hv_ssn_regnet-400mf_secfpn_sbn-all_2x16_2x_lyft-3d_20201025_213155-4532096c.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/ssn/hv_ssn_regnet-400mf_secfpn_sbn-all_2x16_2x_lyft-3d/hv_ssn_regnet-400mf_secfpn_sbn-all_2x16_2x_lyft-3d_20201025_213155.log.json)|\n\nNote:\n\nThe main difference of the shape-aware grouping heads with the original SECOND FPN heads is that the former groups objects with similar sizes and shapes together, and design shape-specific heads for each group. Heavier heads (with more convolutions and large strides) are designed for large objects while smaller heads for small objects. Note that there may appear different feature map sizes in the outputs, so an anchor generator tailored to these feature maps is also needed in the implementation.\n\nUsers could try other settings in terms of the head design. Here we basically refer to the implementation [HERE](https://github.com/xinge008/SSN).\n"
  },
  {
    "path": "configs/ssn/hv_ssn_regnet-400mf_secfpn_sbn-all_1x16_2x_lyft-3d.py",
    "content": "_base_ = './hv_ssn_secfpn_sbn-all_2x16_2x_lyft-3d.py'\n# model settings\nmodel = dict(\n    type='MVXFasterRCNN',\n    pretrained=dict(pts='open-mmlab://regnetx_400mf'),\n    pts_backbone=dict(\n        _delete_=True,\n        type='NoStemRegNet',\n        arch=dict(w0=24, wa=24.48, wm=2.54, group_w=16, depth=22, bot_mul=1.0),\n        out_indices=(1, 2, 3),\n        frozen_stages=-1,\n        strides=(1, 2, 2, 2),\n        base_channels=64,\n        stem_channels=64,\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        norm_eval=False,\n        style='pytorch'),\n    pts_neck=dict(in_channels=[64, 160, 384]))\n# dataset settings\ndata = dict(samples_per_gpu=1, workers_per_gpu=2)\n"
  },
  {
    "path": "configs/ssn/hv_ssn_regnet-400mf_secfpn_sbn-all_2x16_2x_nus-3d.py",
    "content": "_base_ = './hv_ssn_secfpn_sbn-all_2x16_2x_nus-3d.py'\n# model settings\nmodel = dict(\n    type='MVXFasterRCNN',\n    pretrained=dict(pts='open-mmlab://regnetx_400mf'),\n    pts_backbone=dict(\n        _delete_=True,\n        type='NoStemRegNet',\n        arch=dict(w0=24, wa=24.48, wm=2.54, group_w=16, depth=22, bot_mul=1.0),\n        out_indices=(1, 2, 3),\n        frozen_stages=-1,\n        strides=(1, 2, 2, 2),\n        base_channels=64,\n        stem_channels=64,\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        norm_eval=False,\n        style='pytorch'),\n    pts_neck=dict(in_channels=[64, 160, 384]))\n"
  },
  {
    "path": "configs/ssn/hv_ssn_secfpn_sbn-all_2x16_2x_lyft-3d.py",
    "content": "_base_ = [\n    '../_base_/models/hv_pointpillars_fpn_lyft.py',\n    '../_base_/datasets/lyft-3d.py',\n    '../_base_/schedules/schedule_2x.py',\n    '../_base_/default_runtime.py',\n]\npoint_cloud_range = [-100, -100, -5, 100, 100, 3]\n# Note that the order of class names should be consistent with\n# the following anchors' order\nclass_names = [\n    'bicycle', 'motorcycle', 'pedestrian', 'animal', 'car',\n    'emergency_vehicle', 'bus', 'other_vehicle', 'truck'\n]\n\ntrain_pipeline = [\n    dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=5, use_dim=5),\n    dict(type='LoadPointsFromMultiSweeps', sweeps_num=10),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.3925, 0.3925],\n        scale_ratio_range=[0.95, 1.05],\n        translation_std=[0, 0, 0]),\n    dict(\n        type='RandomFlip3D',\n        sync_2d=False,\n        flip_ratio_bev_horizontal=0.5,\n        flip_ratio_bev_vertical=0.5),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=5, use_dim=5),\n    dict(type='LoadPointsFromMultiSweeps', sweeps_num=10),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=4,\n    train=dict(pipeline=train_pipeline, classes=class_names),\n    val=dict(pipeline=test_pipeline, classes=class_names),\n    test=dict(pipeline=test_pipeline, classes=class_names))\n\n# model settings\nmodel = dict(\n    pts_voxel_layer=dict(point_cloud_range=[-100, -100, -5, 100, 100, 3]),\n    pts_voxel_encoder=dict(\n        feat_channels=[32, 64],\n        point_cloud_range=[-100, -100, -5, 100, 100, 3]),\n    pts_middle_encoder=dict(output_shape=[800, 800]),\n    pts_neck=dict(\n        _delete_=True,\n        type='SECONDFPN',\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        in_channels=[64, 128, 256],\n        upsample_strides=[1, 2, 4],\n        out_channels=[128, 128, 128]),\n    pts_bbox_head=dict(\n        _delete_=True,\n        type='ShapeAwareHead',\n        num_classes=9,\n        in_channels=384,\n        feat_channels=384,\n        use_direction_classifier=True,\n        anchor_generator=dict(\n            type='AlignedAnchor3DRangeGeneratorPerCls',\n            ranges=[[-100, -100, -1.0709302, 100, 100, -1.0709302],\n                    [-100, -100, -1.3220503, 100, 100, -1.3220503],\n                    [-100, -100, -0.9122268, 100, 100, -0.9122268],\n                    [-100, -100, -1.8012227, 100, 100, -1.8012227],\n                    [-100, -100, -1.0715024, 100, 100, -1.0715024],\n                    [-100, -100, -0.8871424, 100, 100, -0.8871424],\n                    [-100, -100, -0.3519405, 100, 100, -0.3519405],\n                    [-100, -100, -0.6276341, 100, 100, -0.6276341],\n                    [-100, -100, -0.3033737, 100, 100, -0.3033737]],\n            sizes=[\n                [0.63, 1.76, 1.44],  # bicycle\n                [0.96, 2.35, 1.59],  # motorcycle\n                [0.76, 0.80, 1.76],  # pedestrian\n                [0.35, 0.73, 0.50],  # animal\n                [1.92, 4.75, 1.71],  # car\n                [2.42, 6.52, 2.34],  # emergency vehicle\n                [2.92, 12.70, 3.42],  # bus\n                [2.75, 8.17, 3.20],  # other vehicle\n                [2.84, 10.24, 3.44]  # truck\n            ],\n            custom_values=[],\n            rotations=[0, 1.57],\n            reshape_out=False),\n        tasks=[\n            dict(\n                num_class=2,\n                class_names=['bicycle', 'motorcycle'],\n                shared_conv_channels=(64, 64),\n                shared_conv_strides=(1, 1),\n                norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01)),\n            dict(\n                num_class=2,\n                class_names=['pedestrian', 'animal'],\n                shared_conv_channels=(64, 64),\n                shared_conv_strides=(1, 1),\n                norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01)),\n            dict(\n                num_class=2,\n                class_names=['car', 'emergency_vehicle'],\n                shared_conv_channels=(64, 64, 64),\n                shared_conv_strides=(2, 1, 1),\n                norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01)),\n            dict(\n                num_class=3,\n                class_names=['bus', 'other_vehicle', 'truck'],\n                shared_conv_channels=(64, 64, 64),\n                shared_conv_strides=(2, 1, 1),\n                norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01))\n        ],\n        assign_per_class=True,\n        diff_rad_by_sin=True,\n        dir_offset=0.7854,  # pi/4\n        dir_limit_offset=0,\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),\n        loss_dir=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)))\n\n# model training and testing settings\ntrain_cfg = dict(\n    _delete_=True,\n    pts=dict(\n        assigner=[\n            dict(  # bicycle\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.55,\n                neg_iou_thr=0.4,\n                min_pos_iou=0.4,\n                ignore_iof_thr=-1),\n            dict(  # motorcycle\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.55,\n                neg_iou_thr=0.4,\n                min_pos_iou=0.4,\n                ignore_iof_thr=-1),\n            dict(  # pedestrian\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.55,\n                neg_iou_thr=0.4,\n                min_pos_iou=0.4,\n                ignore_iof_thr=-1),\n            dict(  # animal\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.55,\n                neg_iou_thr=0.4,\n                min_pos_iou=0.4,\n                ignore_iof_thr=-1),\n            dict(  # car\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.45,\n                min_pos_iou=0.45,\n                ignore_iof_thr=-1),\n            dict(  # emergency vehicle\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.55,\n                neg_iou_thr=0.4,\n                min_pos_iou=0.4,\n                ignore_iof_thr=-1),\n            dict(  # bus\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.45,\n                min_pos_iou=0.45,\n                ignore_iof_thr=-1),\n            dict(  # other vehicle\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.55,\n                neg_iou_thr=0.4,\n                min_pos_iou=0.4,\n                ignore_iof_thr=-1),\n            dict(  # truck\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.45,\n                min_pos_iou=0.45,\n                ignore_iof_thr=-1)\n        ],\n        allowed_border=0,\n        code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],\n        pos_weight=-1,\n        debug=False))\n"
  },
  {
    "path": "configs/ssn/hv_ssn_secfpn_sbn-all_2x16_2x_nus-3d.py",
    "content": "_base_ = [\n    '../_base_/models/hv_pointpillars_fpn_nus.py',\n    '../_base_/datasets/nus-3d.py',\n    '../_base_/schedules/schedule_2x.py',\n    '../_base_/default_runtime.py',\n]\n# Note that the order of class names should be consistent with\n# the following anchors' order\npoint_cloud_range = [-50, -50, -5, 50, 50, 3]\nclass_names = [\n    'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier', 'car',\n    'truck', 'trailer', 'bus', 'construction_vehicle'\n]\n\ntrain_pipeline = [\n    dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=5, use_dim=5),\n    dict(type='LoadPointsFromMultiSweeps', sweeps_num=10),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.3925, 0.3925],\n        scale_ratio_range=[0.95, 1.05],\n        translation_std=[0, 0, 0]),\n    dict(\n        type='RandomFlip3D',\n        sync_2d=False,\n        flip_ratio_bev_horizontal=0.5,\n        flip_ratio_bev_vertical=0.5),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=5, use_dim=5),\n    dict(type='LoadPointsFromMultiSweeps', sweeps_num=10),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=4,\n    train=dict(pipeline=train_pipeline, classes=class_names),\n    val=dict(pipeline=test_pipeline, classes=class_names),\n    test=dict(pipeline=test_pipeline, classes=class_names))\n\n# model settings\nmodel = dict(\n    pts_voxel_layer=dict(max_num_points=20),\n    pts_voxel_encoder=dict(feat_channels=[64, 64]),\n    pts_neck=dict(\n        _delete_=True,\n        type='SECONDFPN',\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        in_channels=[64, 128, 256],\n        upsample_strides=[1, 2, 4],\n        out_channels=[128, 128, 128]),\n    pts_bbox_head=dict(\n        _delete_=True,\n        type='ShapeAwareHead',\n        num_classes=10,\n        in_channels=384,\n        feat_channels=384,\n        use_direction_classifier=True,\n        anchor_generator=dict(\n            type='AlignedAnchor3DRangeGeneratorPerCls',\n            ranges=[[-50, -50, -1.67339111, 50, 50, -1.67339111],\n                    [-50, -50, -1.71396371, 50, 50, -1.71396371],\n                    [-50, -50, -1.61785072, 50, 50, -1.61785072],\n                    [-50, -50, -1.80984986, 50, 50, -1.80984986],\n                    [-50, -50, -1.76396500, 50, 50, -1.76396500],\n                    [-50, -50, -1.80032795, 50, 50, -1.80032795],\n                    [-50, -50, -1.74440365, 50, 50, -1.74440365],\n                    [-50, -50, -1.68526504, 50, 50, -1.68526504],\n                    [-50, -50, -1.80673031, 50, 50, -1.80673031],\n                    [-50, -50, -1.64824291, 50, 50, -1.64824291]],\n            sizes=[\n                [0.60058911, 1.68452161, 1.27192197],  # bicycle\n                [0.76279481, 2.09973778, 1.44403034],  # motorcycle\n                [0.66344886, 0.72564370, 1.75748069],  # pedestrian\n                [0.39694519, 0.40359262, 1.06232151],  # traffic cone\n                [2.49008838, 0.48578221, 0.98297065],  # barrier\n                [1.95017717, 4.60718145, 1.72270761],  # car\n                [2.45609390, 6.73778078, 2.73004906],  # truck\n                [2.87427237, 12.01320693, 3.81509561],  # trailer\n                [2.94046906, 11.1885991, 3.47030982],  # bus\n                [2.73050468, 6.38352896, 3.13312415]  # construction vehicle\n            ],\n            custom_values=[0, 0],\n            rotations=[0, 1.57],\n            reshape_out=False),\n        tasks=[\n            dict(\n                num_class=2,\n                class_names=['bicycle', 'motorcycle'],\n                shared_conv_channels=(64, 64),\n                shared_conv_strides=(1, 1),\n                norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01)),\n            dict(\n                num_class=1,\n                class_names=['pedestrian'],\n                shared_conv_channels=(64, 64),\n                shared_conv_strides=(1, 1),\n                norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01)),\n            dict(\n                num_class=2,\n                class_names=['traffic_cone', 'barrier'],\n                shared_conv_channels=(64, 64),\n                shared_conv_strides=(1, 1),\n                norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01)),\n            dict(\n                num_class=1,\n                class_names=['car'],\n                shared_conv_channels=(64, 64, 64),\n                shared_conv_strides=(2, 1, 1),\n                norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01)),\n            dict(\n                num_class=4,\n                class_names=[\n                    'truck', 'trailer', 'bus', 'construction_vehicle'\n                ],\n                shared_conv_channels=(64, 64, 64),\n                shared_conv_strides=(2, 1, 1),\n                norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01))\n        ],\n        assign_per_class=True,\n        diff_rad_by_sin=True,\n        dir_offset=0.7854,  # pi/4\n        dir_limit_offset=0,\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),\n        loss_dir=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),\n    # model training and testing settings\n    train_cfg=dict(\n        _delete_=True,\n        pts=dict(\n            assigner=[\n                dict(  # bicycle\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.5,\n                    neg_iou_thr=0.35,\n                    min_pos_iou=0.35,\n                    ignore_iof_thr=-1),\n                dict(  # motorcycle\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.5,\n                    neg_iou_thr=0.3,\n                    min_pos_iou=0.3,\n                    ignore_iof_thr=-1),\n                dict(  # pedestrian\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.6,\n                    neg_iou_thr=0.4,\n                    min_pos_iou=0.4,\n                    ignore_iof_thr=-1),\n                dict(  # traffic cone\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.6,\n                    neg_iou_thr=0.4,\n                    min_pos_iou=0.4,\n                    ignore_iof_thr=-1),\n                dict(  # barrier\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.55,\n                    neg_iou_thr=0.4,\n                    min_pos_iou=0.4,\n                    ignore_iof_thr=-1),\n                dict(  # car\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.6,\n                    neg_iou_thr=0.45,\n                    min_pos_iou=0.45,\n                    ignore_iof_thr=-1),\n                dict(  # truck\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.55,\n                    neg_iou_thr=0.4,\n                    min_pos_iou=0.4,\n                    ignore_iof_thr=-1),\n                dict(  # trailer\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.5,\n                    neg_iou_thr=0.35,\n                    min_pos_iou=0.35,\n                    ignore_iof_thr=-1),\n                dict(  # bus\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.55,\n                    neg_iou_thr=0.4,\n                    min_pos_iou=0.4,\n                    ignore_iof_thr=-1),\n                dict(  # construction vehicle\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.5,\n                    neg_iou_thr=0.35,\n                    min_pos_iou=0.35,\n                    ignore_iof_thr=-1)\n            ],\n            allowed_border=0,\n            code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],\n            pos_weight=-1,\n            debug=False)))\n"
  },
  {
    "path": "configs/transfusion_nusc_pillar_L.py",
    "content": "point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]\nclass_names = [\n    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n]\nvoxel_size = [0.2, 0.2, 8]\nout_size_factor = 4\nevaluation = dict(interval=1)\ndataset_type = 'NuScenesDataset'\ndata_root = 'data/nuscenes/'\ninput_modality = dict(\n    use_lidar=True,\n    use_camera=False,\n    use_radar=False,\n    use_map=False,\n    use_external=False)\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=[0, 1, 2, 3, 4],\n    ),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        use_dim=[0, 1, 2, 3, 4],\n    ),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    dict(\n        type='ObjectSample',\n        db_sampler=dict(\n            data_root=None,\n            info_path=data_root + 'nuscenes_dbinfos_train.pkl',\n            rate=1.0,\n            prepare=dict(\n                filter_by_difficulty=[-1],\n                filter_by_min_points=dict(\n                    car=5,\n                    truck=5,\n                    bus=5,\n                    trailer=5,\n                    construction_vehicle=5,\n                    traffic_cone=5,\n                    barrier=5,\n                    motorcycle=5,\n                    bicycle=5,\n                    pedestrian=5)),\n            classes=class_names,\n            sample_groups=dict(\n                car=2,\n                truck=3,\n                construction_vehicle=7,\n                bus=4,\n                trailer=6,\n                barrier=2,\n                motorcycle=6,\n                bicycle=6,\n                pedestrian=2,\n                traffic_cone=2),\n            points_loader=dict(\n                type='LoadPointsFromFile',\n                coord_type='LIDAR',\n                load_dim=5,\n                use_dim=[0, 1, 2, 3, 4],\n            ))),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.3925 * 2, 0.3925 * 2],\n        scale_ratio_range=[0.9, 1.1],\n        translation_std=[0.5, 0.5, 0.5]),\n    dict(\n        type='RandomFlip3D',\n        sync_2d=False,\n        flip_ratio_bev_horizontal=0.5,\n        flip_ratio_bev_vertical=0.5),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectNameFilter', classes=class_names),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=[0, 1, 2, 3, 4],\n    ),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        use_dim=[0, 1, 2, 3, 4],\n    ),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1.0, 1.0],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=6,\n    train=dict(\n        type='CBGSDataset',\n        dataset=dict(\n            type=dataset_type,\n            data_root=data_root,\n            ann_file=data_root + '/nuscenes_infos_train.pkl',\n            load_interval=1,\n            pipeline=train_pipeline,\n            classes=class_names,\n            modality=input_modality,\n            test_mode=False,\n            box_type_3d='LiDAR')),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + '/nuscenes_infos_val.pkl',\n        load_interval=1,\n        pipeline=test_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=True,\n        box_type_3d='LiDAR'),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + '/nuscenes_infos_val.pkl',\n        load_interval=1,\n        pipeline=test_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=True,\n        box_type_3d='LiDAR'))\nmodel = dict(\n    type='TransFusionDetector',\n    pts_voxel_layer=dict(\n        max_num_points=20,\n        voxel_size=voxel_size,\n        max_voxels=(30000, 60000),\n        point_cloud_range=point_cloud_range),\n    pts_voxel_encoder=dict(\n        type='PillarFeatureNet',\n        in_channels=5,\n        feat_channels=[64],\n        with_distance=False,\n        voxel_size=voxel_size,\n        norm_cfg=dict(type='BN1d', eps=0.001, momentum=0.01),\n        point_cloud_range=point_cloud_range,\n    ),\n    pts_middle_encoder=dict(\n        type='PointPillarsScatter', in_channels=64, output_shape=(512, 512)\n    ),\n    pts_backbone=dict(\n        type='SECOND',\n        in_channels=64,\n        out_channels=[64, 128, 256],\n        layer_nums=[3, 5, 5],\n        layer_strides=[2, 2, 2],\n        norm_cfg=dict(type='BN', eps=0.001, momentum=0.01),\n        conv_cfg=dict(type='Conv2d', bias=False)),\n    pts_neck=dict(\n        type='SECONDFPN',\n        in_channels=[64, 128, 256],\n        out_channels=[128, 128, 128],\n        upsample_strides=[0.5, 1, 2],\n        norm_cfg=dict(type='BN', eps=0.001, momentum=0.01),\n        upsample_cfg=dict(type='deconv', bias=False),\n        use_conv_for_no_stride=True),\n    pts_bbox_head=dict(\n        type='TransFusionHead',\n        num_proposals=200,\n        auxiliary=True,\n        in_channels=128 * 3,\n        hidden_channel=128,\n        num_classes=len(class_names),\n        num_decoder_layers=1,\n        num_heads=8,\n        learnable_query_pos=False,\n        initialize_by_heatmap=True,\n        nms_kernel_size=3,\n        ffn_channel=256,\n        dropout=0.1,\n        bn_momentum=0.1,\n        activation='relu',\n        common_heads=dict(center=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),\n        bbox_coder=dict(\n            type='TransFusionBBoxCoder',\n            pc_range=point_cloud_range[:2],\n            voxel_size=voxel_size[:2],\n            out_size_factor=out_size_factor,\n            post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],\n            score_threshold=0.0,\n            code_size=10,\n        ),\n        loss_cls=dict(type='FocalLoss', use_sigmoid=True, gamma=2, alpha=0.25, reduction='mean', loss_weight=1.0),\n        # loss_iou=dict(type='CrossEntropyLoss', use_sigmoid=True, reduction='mean', loss_weight=0.0),\n        loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),\n        loss_heatmap=dict(type='GaussianFocalLoss', reduction='mean', loss_weight=1.0),\n    ),\n    train_cfg=dict(\n        pts=dict(\n            dataset='nuScenes',\n            assigner=dict(\n                type='HungarianAssigner3D',\n                iou_calculator=dict(type='BboxOverlaps3D', coordinate='lidar'),\n                cls_cost=dict(type='FocalLossCost', gamma=2, alpha=0.25, weight=0.15),\n                reg_cost=dict(type='BBoxBEVL1Cost', weight=0.25),\n                iou_cost=dict(type='IoU3DCost', weight=0.25)\n            ),\n            pos_weight=-1,\n            gaussian_overlap=0.1,\n            min_radius=2,\n            grid_size=[512, 512, 1],  # [x_len, y_len, 1]\n            voxel_size=voxel_size,\n            out_size_factor=out_size_factor,\n            code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],\n            point_cloud_range=point_cloud_range)),\n    test_cfg=dict(\n        pts=dict(\n            dataset='nuScenes',\n            grid_size=[512, 512, 1],\n            out_size_factor=out_size_factor,\n            pc_range=point_cloud_range[0:2],\n            voxel_size=voxel_size[:2],\n            nms_type=None,\n        )))\noptimizer = dict(type='AdamW', lr=0.0001, weight_decay=0.01)  # for 8gpu * 2sample_per_gpu\noptimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))\nlr_config = dict(\n    policy='cyclic',\n    target_ratio=(10, 0.0001),\n    cyclic_times=1,\n    step_ratio_up=0.4)\nmomentum_config = dict(\n    policy='cyclic',\n    target_ratio=(0.8947368421052632, 1),\n    cyclic_times=1,\n    step_ratio_up=0.4)\ntotal_epochs = 20\ncheckpoint_config = dict(interval=1)\nlog_config = dict(\n    interval=50,\n    hooks=[dict(type='TextLoggerHook'),\n           dict(type='TensorboardLoggerHook')])\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = None\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\ngpu_ids = range(0, 8)\n"
  },
  {
    "path": "configs/transfusion_nusc_pillar_LC.py",
    "content": "point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]\nclass_names = [\n    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n]\nvoxel_size = [0.2, 0.2, 8]\nout_size_factor = 4\nevaluation = dict(interval=1)\ndataset_type = 'NuScenesDataset'\ndata_root = 'data/nuscenes/'\ninput_modality = dict(\n    use_lidar=True,\n    use_camera=True,\n    use_radar=False,\n    use_map=False,\n    use_external=False)\nimg_scale = (800, 448)\nnum_views = 6\nimg_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=[0, 1, 2, 3, 4],\n    ),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        use_dim=[0, 1, 2, 3, 4],\n    ),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    dict(type='LoadMultiViewImageFromFiles'),\n    # dict(\n    #     type='GlobalRotScaleTrans',\n    #     rot_range=[-0.3925 * 2, 0.3925 * 2],\n    #     scale_ratio_range=[0.9, 1.1],\n    #     translation_std=[0.5, 0.5, 0.5]),\n    # dict(\n    #     type='RandomFlip3D',\n    #     sync_2d=True,\n    #     flip_ratio_bev_horizontal=0.5,\n    #     flip_ratio_bev_vertical=0.5),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectNameFilter', classes=class_names),\n    dict(type='PointShuffle'),\n    dict(type='MyResize', img_scale=img_scale, keep_ratio=True),\n    dict(type='MyNormalize', **img_norm_cfg),\n    dict(type='MyPad', size_divisor=32),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'img', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=[0, 1, 2, 3, 4],\n    ),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        use_dim=[0, 1, 2, 3, 4],\n    ),\n    dict(type='LoadMultiViewImageFromFiles'),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=img_scale,\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1.0, 1.0],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(type='MyResize', img_scale=img_scale, keep_ratio=True),\n            dict(type='MyNormalize', **img_norm_cfg),\n            dict(type='MyPad', size_divisor=32),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points', 'img'])\n        ])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=6,\n    train=dict(\n        type='CBGSDataset',\n        dataset=dict(\n            type=dataset_type,\n            data_root=data_root,\n            num_views=num_views,\n            ann_file=data_root + '/nuscenes_infos_train.pkl',\n            load_interval=1,\n            pipeline=train_pipeline,\n            classes=class_names,\n            modality=input_modality,\n            test_mode=False,\n            box_type_3d='LiDAR')),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        num_views=num_views,\n        ann_file=data_root + '/nuscenes_infos_val.pkl',\n        load_interval=1,\n        pipeline=test_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=True,\n        box_type_3d='LiDAR'),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        num_views=num_views,\n        ann_file=data_root + '/nuscenes_infos_val.pkl',\n        load_interval=1,\n        pipeline=test_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=True,\n        box_type_3d='LiDAR'))\nmodel = dict(\n    type='TransFusionDetector',\n    freeze_img=True,\n    # img_backbone=dict(\n    #     type='DLASeg',\n    #     num_layers=34,\n    #     heads={},\n    #     head_convs=-1,\n    #     ),\n    img_backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch'),\n    img_neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    pts_voxel_layer=dict(\n        max_num_points=20,\n        voxel_size=voxel_size,\n        max_voxels=(30000, 60000),\n        point_cloud_range=point_cloud_range),\n    pts_voxel_encoder=dict(\n        type='PillarFeatureNet',\n        in_channels=5,\n        feat_channels=[64],\n        with_distance=False,\n        voxel_size=voxel_size,\n        norm_cfg=dict(type='BN1d', eps=0.001, momentum=0.01),\n        point_cloud_range=point_cloud_range,\n    ),\n    pts_middle_encoder=dict(\n        type='PointPillarsScatter', in_channels=64, output_shape=(512, 512)\n    ),\n    pts_backbone=dict(\n        type='SECOND',\n        in_channels=64,\n        out_channels=[64, 128, 256],\n        layer_nums=[3, 5, 5],\n        layer_strides=[2, 2, 2],\n        norm_cfg=dict(type='BN', eps=0.001, momentum=0.01),\n        conv_cfg=dict(type='Conv2d', bias=False)),\n    pts_neck=dict(\n        type='SECONDFPN',\n        in_channels=[64, 128, 256],\n        out_channels=[128, 128, 128],\n        upsample_strides=[0.5, 1, 2],\n        norm_cfg=dict(type='BN', eps=0.001, momentum=0.01),\n        upsample_cfg=dict(type='deconv', bias=False),\n        use_conv_for_no_stride=True),\n    pts_bbox_head=dict(\n        type='TransFusionHead',\n        fuse_img=True,\n        num_views=num_views,\n        in_channels_img=256,\n        out_size_factor_img=4,\n        num_proposals=200,\n        auxiliary=True,\n        in_channels=128 * 3,\n        hidden_channel=128,\n        num_classes=len(class_names),\n        num_decoder_layers=1,\n        num_heads=8,\n        learnable_query_pos=False,\n        initialize_by_heatmap=True,\n        nms_kernel_size=3,\n        ffn_channel=256,\n        dropout=0.1,\n        bn_momentum=0.1,\n        activation='relu',\n        common_heads=dict(center=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),\n        bbox_coder=dict(\n            type='TransFusionBBoxCoder',\n            pc_range=point_cloud_range[:2],\n            voxel_size=voxel_size[:2],\n            out_size_factor=out_size_factor,\n            post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],\n            score_threshold=0.0,\n            code_size=10,\n        ),\n        loss_cls=dict(type='FocalLoss', use_sigmoid=True, gamma=2, alpha=0.25, reduction='mean', loss_weight=1.0),\n        # loss_iou=dict(type='CrossEntropyLoss', use_sigmoid=True, reduction='mean', loss_weight=0.0),\n        loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),\n        loss_heatmap=dict(type='GaussianFocalLoss', reduction='mean', loss_weight=1.0),\n    ),\n    train_cfg=dict(\n        pts=dict(\n            dataset='nuScenes',\n            assigner=dict(\n                type='HungarianAssigner3D',\n                iou_calculator=dict(type='BboxOverlaps3D', coordinate='lidar'),\n                cls_cost=dict(type='FocalLossCost', gamma=2, alpha=0.25, weight=0.15),\n                reg_cost=dict(type='BBoxBEVL1Cost', weight=0.25),\n                iou_cost=dict(type='IoU3DCost', weight=0.25)\n            ),\n            pos_weight=-1,\n            gaussian_overlap=0.1,\n            min_radius=2,\n            grid_size=[512, 512, 1],  # [x_len, y_len, 1]\n            voxel_size=voxel_size,\n            out_size_factor=out_size_factor,\n            code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],\n            point_cloud_range=point_cloud_range)),\n    test_cfg=dict(\n        pts=dict(\n            dataset='nuScenes',\n            grid_size=[512, 512, 1],\n            out_size_factor=out_size_factor,\n            pc_range=point_cloud_range[0:2],\n            voxel_size=voxel_size[:2],\n            nms_type=None,\n        )))\noptimizer = dict(type='AdamW', lr=0.0001, weight_decay=0.01)  # for 8gpu * 2sample_per_gpu\noptimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))\nlr_config = dict(\n    policy='cyclic',\n    target_ratio=(10, 0.0001),\n    cyclic_times=1,\n    step_ratio_up=0.4)\nmomentum_config = dict(\n    policy='cyclic',\n    target_ratio=(0.8947368421052632, 1),\n    cyclic_times=1,\n    step_ratio_up=0.4)\ntotal_epochs = 6\ncheckpoint_config = dict(interval=1)\nlog_config = dict(\n    interval=50,\n    hooks=[dict(type='TextLoggerHook'),\n           dict(type='TensorboardLoggerHook')])\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = None\nload_from = 'checkpoints/fusion_pillar02_R50.pth'\nresume_from = None\nworkflow = [('train', 1)]\ngpu_ids = range(0, 8)\nfreeze_lidar_components = True\nfind_unused_parameters = True\n"
  },
  {
    "path": "configs/transfusion_nusc_voxel_L.py",
    "content": "point_cloud_range = [-54.0, -54.0, -5.0, 54.0, 54.0, 3.0]\nclass_names = [\n    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n]\nvoxel_size = [0.075, 0.075, 0.2]\nout_size_factor = 8\nevaluation = dict(interval=1)\ndataset_type = 'NuScenesDataset'\ndata_root = 'data/nuscenes/'\ninput_modality = dict(\n    use_lidar=True,\n    use_camera=False,\n    use_radar=False,\n    use_map=False,\n    use_external=False)\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=[0, 1, 2, 3, 4],\n    ),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        use_dim=[0, 1, 2, 3, 4],\n    ),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    dict(\n        type='ObjectSample',\n        db_sampler=dict(\n            data_root=None,\n            info_path=data_root + 'nuscenes_dbinfos_train.pkl',\n            rate=1.0,\n            prepare=dict(\n                filter_by_difficulty=[-1],\n                filter_by_min_points=dict(\n                    car=5,\n                    truck=5,\n                    bus=5,\n                    trailer=5,\n                    construction_vehicle=5,\n                    traffic_cone=5,\n                    barrier=5,\n                    motorcycle=5,\n                    bicycle=5,\n                    pedestrian=5)),\n            classes=class_names,\n            sample_groups=dict(\n                car=2,\n                truck=3,\n                construction_vehicle=7,\n                bus=4,\n                trailer=6,\n                barrier=2,\n                motorcycle=6,\n                bicycle=6,\n                pedestrian=2,\n                traffic_cone=2),\n            points_loader=dict(\n                type='LoadPointsFromFile',\n                coord_type='LIDAR',\n                load_dim=5,\n                use_dim=[0, 1, 2, 3, 4],\n            ))),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.3925 * 2, 0.3925 * 2],\n        scale_ratio_range=[0.9, 1.1],\n        translation_std=[0.5, 0.5, 0.5]),\n    dict(\n        type='RandomFlip3D',\n        sync_2d=False,\n        flip_ratio_bev_horizontal=0.5,\n        flip_ratio_bev_vertical=0.5),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectNameFilter', classes=class_names),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=[0, 1, 2, 3, 4],\n    ),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        use_dim=[0, 1, 2, 3, 4],\n    ),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1.0, 1.0],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=6,\n    train=dict(\n        type='CBGSDataset',\n        dataset=dict(\n            type=dataset_type,\n            data_root=data_root,\n            ann_file=data_root + '/nuscenes_infos_train.pkl',\n            load_interval=1,\n            pipeline=train_pipeline,\n            classes=class_names,\n            modality=input_modality,\n            test_mode=False,\n            box_type_3d='LiDAR')),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + '/nuscenes_infos_val.pkl',\n        load_interval=1,\n        pipeline=test_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=True,\n        box_type_3d='LiDAR'),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + '/nuscenes_infos_val.pkl',\n        load_interval=1,\n        pipeline=test_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=True,\n        box_type_3d='LiDAR'))\nmodel = dict(\n    type='TransFusionDetector',\n    pts_voxel_layer=dict(\n        max_num_points=10,\n        voxel_size=voxel_size,\n        max_voxels=(120000, 160000),\n        point_cloud_range=point_cloud_range),\n    pts_voxel_encoder=dict(\n        type='HardSimpleVFE',\n        num_features=5,\n    ),\n    pts_middle_encoder=dict(\n        type='SparseEncoder',\n        in_channels=5,\n        sparse_shape=[41, 1440, 1440],\n        output_channels=128,\n        order=('conv', 'norm', 'act'),\n        encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)),\n        encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),\n        block_type='basicblock'),\n    pts_backbone=dict(\n        type='SECOND',\n        in_channels=256,\n        out_channels=[128, 256],\n        layer_nums=[5, 5],\n        layer_strides=[1, 2],\n        norm_cfg=dict(type='BN', eps=0.001, momentum=0.01),\n        conv_cfg=dict(type='Conv2d', bias=False)),\n    pts_neck=dict(\n        type='SECONDFPN',\n        in_channels=[128, 256],\n        out_channels=[256, 256],\n        upsample_strides=[1, 2],\n        norm_cfg=dict(type='BN', eps=0.001, momentum=0.01),\n        upsample_cfg=dict(type='deconv', bias=False),\n        use_conv_for_no_stride=True),\n    pts_bbox_head=dict(\n        type='TransFusionHead',\n        num_proposals=200,\n        auxiliary=True,\n        in_channels=256 * 2,\n        hidden_channel=128,\n        num_classes=len(class_names),\n        num_decoder_layers=1,\n        num_heads=8,\n        learnable_query_pos=False,\n        initialize_by_heatmap=True,\n        nms_kernel_size=3,\n        ffn_channel=256,\n        dropout=0.1,\n        bn_momentum=0.1,\n        activation='relu',\n        common_heads=dict(center=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),\n        bbox_coder=dict(\n            type='TransFusionBBoxCoder',\n            pc_range=point_cloud_range[:2],\n            voxel_size=voxel_size[:2],\n            out_size_factor=out_size_factor,\n            post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],\n            score_threshold=0.0,\n            code_size=10,\n        ),\n        loss_cls=dict(type='FocalLoss', use_sigmoid=True, gamma=2, alpha=0.25, reduction='mean', loss_weight=1.0),\n        # loss_iou=dict(type='CrossEntropyLoss', use_sigmoid=True, reduction='mean', loss_weight=0.0),\n        loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),\n        loss_heatmap=dict(type='GaussianFocalLoss', reduction='mean', loss_weight=1.0),\n    ),\n    train_cfg=dict(\n        pts=dict(\n            dataset='nuScenes',\n            assigner=dict(\n                type='HungarianAssigner3D',\n                iou_calculator=dict(type='BboxOverlaps3D', coordinate='lidar'),\n                cls_cost=dict(type='FocalLossCost', gamma=2, alpha=0.25, weight=0.15),\n                reg_cost=dict(type='BBoxBEVL1Cost', weight=0.25),\n                iou_cost=dict(type='IoU3DCost', weight=0.25)\n            ),\n            pos_weight=-1,\n            gaussian_overlap=0.1,\n            min_radius=2,\n            grid_size=[1440, 1440, 40],  # [x_len, y_len, 1]\n            voxel_size=voxel_size,\n            out_size_factor=out_size_factor,\n            code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],\n            point_cloud_range=point_cloud_range)),\n    test_cfg=dict(\n        pts=dict(\n            dataset='nuScenes',\n            grid_size=[1440, 1440, 40],\n            out_size_factor=out_size_factor,\n            pc_range=point_cloud_range[0:2],\n            voxel_size=voxel_size[:2],\n            nms_type=None,\n        )))\noptimizer = dict(type='AdamW', lr=0.0001, weight_decay=0.01)  # for 8gpu * 2sample_per_gpu\noptimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))\nlr_config = dict(\n    policy='cyclic',\n    target_ratio=(10, 0.0001),\n    cyclic_times=1,\n    step_ratio_up=0.4)\nmomentum_config = dict(\n    policy='cyclic',\n    target_ratio=(0.8947368421052632, 1),\n    cyclic_times=1,\n    step_ratio_up=0.4)\ntotal_epochs = 20\ncheckpoint_config = dict(interval=1)\nlog_config = dict(\n    interval=50,\n    hooks=[dict(type='TextLoggerHook'),\n           dict(type='TensorboardLoggerHook')])\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = None\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\ngpu_ids = range(0, 8)\n"
  },
  {
    "path": "configs/transfusion_nusc_voxel_LC.py",
    "content": "point_cloud_range = [-54.0, -54.0, -5.0, 54.0, 54.0, 3.0]\nclass_names = [\n    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n]\nvoxel_size = [0.075, 0.075, 0.2]\nout_size_factor = 8\nevaluation = dict(interval=1)\ndataset_type = 'NuScenesDataset'\ndata_root = 'data/nuscenes/'\ninput_modality = dict(\n    use_lidar=True,\n    use_camera=True,\n    use_radar=False,\n    use_map=False,\n    use_external=False)\nimg_scale = (800, 448)\nnum_views = 6\nimg_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=[0, 1, 2, 3, 4],\n    ),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        use_dim=[0, 1, 2, 3, 4],\n    ),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    dict(type='LoadMultiViewImageFromFiles'),\n    # dict(\n    #     type='GlobalRotScaleTrans',\n    #     rot_range=[-0.3925 * 2, 0.3925 * 2],\n    #     scale_ratio_range=[0.9, 1.1],\n    #     translation_std=[0.5, 0.5, 0.5]),\n    # dict(\n    #     type='RandomFlip3D',\n    #     sync_2d=True,\n    #     flip_ratio_bev_horizontal=0.5,\n    #     flip_ratio_bev_vertical=0.5),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectNameFilter', classes=class_names),\n    dict(type='PointShuffle'),\n    dict(type='MyResize', img_scale=img_scale, keep_ratio=True),\n    dict(type='MyNormalize', **img_norm_cfg),\n    dict(type='MyPad', size_divisor=32),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'img', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=[0, 1, 2, 3, 4],\n    ),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        use_dim=[0, 1, 2, 3, 4],\n    ),\n    dict(type='LoadMultiViewImageFromFiles'),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=img_scale,\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1.0, 1.0],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(type='MyResize', img_scale=img_scale, keep_ratio=True),\n            dict(type='MyNormalize', **img_norm_cfg),\n            dict(type='MyPad', size_divisor=32),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points', 'img'])\n        ])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=6,\n    train=dict(\n        type='CBGSDataset',\n        dataset=dict(\n            type=dataset_type,\n            data_root=data_root,\n            num_views=num_views,\n            ann_file=data_root + '/nuscenes_infos_train_20pc.pkl',\n            load_interval=1,\n            pipeline=train_pipeline,\n            classes=class_names,\n            modality=input_modality,\n            test_mode=False,\n            box_type_3d='LiDAR')),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        num_views=num_views,\n        ann_file=data_root + '/nuscenes_infos_val.pkl',\n        load_interval=1,\n        pipeline=test_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=True,\n        box_type_3d='LiDAR'),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        num_views=num_views,\n        ann_file=data_root + '/nuscenes_infos_val.pkl',\n        load_interval=1,\n        pipeline=test_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=True,\n        box_type_3d='LiDAR'))\nmodel = dict(\n    type='TransFusionDetector',\n    freeze_img=True,\n    # img_backbone=dict(\n    #     type='DLASeg',\n    #     num_layers=34,\n    #     heads={},\n    #     head_convs=-1,\n    #     ),\n    img_backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch'),\n    img_neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    pts_voxel_layer=dict(\n        max_num_points=10,\n        voxel_size=voxel_size,\n        max_voxels=(120000, 160000),\n        point_cloud_range=point_cloud_range),\n    pts_voxel_encoder=dict(\n        type='HardSimpleVFE',\n        num_features=5,\n    ),\n    pts_middle_encoder=dict(\n        type='SparseEncoder',\n        in_channels=5,\n        sparse_shape=[41, 1440, 1440],\n        output_channels=128,\n        order=('conv', 'norm', 'act'),\n        encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)),\n        encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),\n        block_type='basicblock'),\n    pts_backbone=dict(\n        type='SECOND',\n        in_channels=256,\n        out_channels=[128, 256],\n        layer_nums=[5, 5],\n        layer_strides=[1, 2],\n        norm_cfg=dict(type='BN', eps=0.001, momentum=0.01),\n        conv_cfg=dict(type='Conv2d', bias=False)),\n    pts_neck=dict(\n        type='SECONDFPN',\n        in_channels=[128, 256],\n        out_channels=[256, 256],\n        upsample_strides=[1, 2],\n        norm_cfg=dict(type='BN', eps=0.001, momentum=0.01),\n        upsample_cfg=dict(type='deconv', bias=False),\n        use_conv_for_no_stride=True),\n    pts_bbox_head=dict(\n        type='TransFusionHead',\n        fuse_img=True,\n        num_views=num_views,\n        in_channels_img=256,\n        out_size_factor_img=4,\n        num_proposals=200,\n        auxiliary=True,\n        in_channels=256 * 2,\n        hidden_channel=128,\n        num_classes=len(class_names),\n        num_decoder_layers=1,\n        num_heads=8,\n        learnable_query_pos=False,\n        initialize_by_heatmap=True,\n        nms_kernel_size=3,\n        ffn_channel=256,\n        dropout=0.1,\n        bn_momentum=0.1,\n        activation='relu',\n        common_heads=dict(center=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),\n        bbox_coder=dict(\n            type='TransFusionBBoxCoder',\n            pc_range=point_cloud_range[:2],\n            voxel_size=voxel_size[:2],\n            out_size_factor=out_size_factor,\n            post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],\n            score_threshold=0.0,\n            code_size=10,\n        ),\n        loss_cls=dict(type='FocalLoss', use_sigmoid=True, gamma=2, alpha=0.25, reduction='mean', loss_weight=1.0),\n        # loss_iou=dict(type='CrossEntropyLoss', use_sigmoid=True, reduction='mean', loss_weight=0.0),\n        loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),\n        loss_heatmap=dict(type='GaussianFocalLoss', reduction='mean', loss_weight=1.0),\n    ),\n    train_cfg=dict(\n        pts=dict(\n            dataset='nuScenes',\n            assigner=dict(\n                type='HungarianAssigner3D',\n                iou_calculator=dict(type='BboxOverlaps3D', coordinate='lidar'),\n                cls_cost=dict(type='FocalLossCost', gamma=2, alpha=0.25, weight=0.15),\n                reg_cost=dict(type='BBoxBEVL1Cost', weight=0.25),\n                iou_cost=dict(type='IoU3DCost', weight=0.25)\n            ),\n            pos_weight=-1,\n            gaussian_overlap=0.1,\n            min_radius=2,\n            grid_size=[1440, 1440, 40],  # [x_len, y_len, 1]\n            voxel_size=voxel_size,\n            out_size_factor=out_size_factor,\n            code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],\n            point_cloud_range=point_cloud_range)),\n    test_cfg=dict(\n        pts=dict(\n            dataset='nuScenes',\n            grid_size=[1440, 1440, 40],\n            out_size_factor=out_size_factor,\n            pc_range=point_cloud_range[0:2],\n            voxel_size=voxel_size[:2],\n            nms_type=None,\n        )))\noptimizer = dict(type='AdamW', lr=0.0001, weight_decay=0.01)  # for 8gpu * 2sample_per_gpu\noptimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))\nlr_config = dict(\n    policy='cyclic',\n    target_ratio=(10, 0.0001),\n    cyclic_times=1,\n    step_ratio_up=0.4)\nmomentum_config = dict(\n    policy='cyclic',\n    target_ratio=(0.8947368421052632, 1),\n    cyclic_times=1,\n    step_ratio_up=0.4)\ntotal_epochs = 6\ncheckpoint_config = dict(interval=1)\nlog_config = dict(\n    interval=50,\n    hooks=[dict(type='TextLoggerHook'),\n           dict(type='TensorboardLoggerHook')])\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = None\nload_from = 'checkpoints/fusion_voxel0075_R50.pth'\nresume_from = None\nworkflow = [('train', 1)]\ngpu_ids = range(0, 8)\nfreeze_lidar_components = True\nfind_unused_parameters = True\n"
  },
  {
    "path": "configs/transfusion_waymo_voxel_L.py",
    "content": "point_cloud_range = [-75.2, -75.2, -2, 75.2, 75.2, 4]\nclass_names = ['Car', 'Pedestrian', 'Cyclist']\nvoxel_size = [0.1, 0.1, 0.15]\nout_size_factor = 8\nevaluation = dict(interval=1)\ndataset_type = 'WaymoDataset'\ndata_root = 'data/waymo/kitti_format'\ninput_modality = dict(\n    use_lidar=True,\n    use_camera=False,\n    use_radar=False,\n    use_map=False,\n    use_external=False)\ntrain_pipeline = [\n    dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=6, use_dim=5),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    # dict(type='ObjectSample',\n    #      db_sampler=dict(\n    #          data_root=data_root,\n    #          info_path=data_root + '/waymo_dbinfos_train.pkl',\n    #          rate=1.0,\n    #          prepare=dict(\n    #              filter_by_difficulty=[-1],\n    #              filter_by_min_points=dict(Car=5, Pedestrian=5, Cyclist=5)),\n    #          classes=class_names,\n    #          sample_groups=dict(Car=15, Pedestrian=10, Cyclist=10),\n    #          points_loader=dict(\n    #              type='LoadPointsFromFile', coord_type='LIDAR', load_dim=5, use_dim=[0, 1, 2, 3, 4]))\n    #      ),\n    dict(\n        type='RandomFlip3D',\n        sync_2d=False,\n        flip_ratio_bev_horizontal=0.5,\n        flip_ratio_bev_vertical=0.5),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.78539816, 0.78539816],\n        scale_ratio_range=[0.95, 1.05],\n    ),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=6, use_dim=5),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(800, 1333),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1.0, 1.0],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\ndata = dict(\n    samples_per_gpu=4,\n    workers_per_gpu=6,\n    train=dict(\n        type='RepeatDataset',\n        times=1,\n        dataset=dict(\n            type=dataset_type,\n            data_root=data_root,\n            load_interval=1,\n            ann_file=data_root + '/waymo_infos_train.pkl',\n            split='training',\n            pipeline=train_pipeline,\n            classes=class_names,\n            modality=input_modality,\n            test_mode=False,\n            box_type_3d='LiDAR')),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + '/waymo_infos_val.pkl',\n        split='training',\n        load_interval=10,\n        pipeline=test_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=True,\n        box_type_3d='LiDAR'),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + '/waymo_infos_val.pkl',\n        split='training',\n        load_interval=10,\n        pipeline=test_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=True,\n        box_type_3d='LiDAR'))\nmodel = dict(\n    type='TransFusionDetector',\n    pts_voxel_layer=dict(\n        max_num_points=5,\n        voxel_size=voxel_size,\n        max_voxels=150000,\n        point_cloud_range=point_cloud_range),\n    pts_voxel_encoder=dict(\n        type='HardVFE',\n        in_channels=5,\n        # num_features=5,\n        feat_channels=[64],\n        with_distance=False,\n        with_cluster_center=False,\n        with_voxel_center=False,\n        voxel_size=voxel_size,\n        norm_cfg=dict(type='BN1d', eps=0.001, momentum=0.01),\n        point_cloud_range=point_cloud_range,\n    ),\n    pts_middle_encoder=dict(\n        type='SparseEncoder',\n        in_channels=64,\n        sparse_shape=[41, 1504, 1504],\n        output_channels=128,\n        order=('conv', 'norm', 'act'),\n        encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)),\n        encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),\n        block_type='basicblock'),\n    pts_backbone=dict(\n        type='SECOND',\n        in_channels=256,\n        out_channels=[128, 256],\n        layer_nums=[5, 5],\n        layer_strides=[1, 2],\n        norm_cfg=dict(type='BN', eps=0.001, momentum=0.01),\n        conv_cfg=dict(type='Conv2d', bias=False)),\n    pts_neck=dict(\n        type='SECONDFPN',\n        in_channels=[128, 256],\n        out_channels=[256, 256],\n        upsample_strides=[1, 2],\n        norm_cfg=dict(type='BN', eps=0.001, momentum=0.01),\n        upsample_cfg=dict(type='deconv', bias=False),\n        use_conv_for_no_stride=True),\n    pts_bbox_head=dict(\n        type='TransFusionHead',\n        num_proposals=300,\n        auxiliary=True,\n        in_channels=256 * 2,\n        hidden_channel=128,\n        num_classes=len(class_names),\n        num_decoder_layers=1,\n        num_heads=8,\n        learnable_query_pos=False,\n        initialize_by_heatmap=True,\n        nms_kernel_size=3,\n        ffn_channel=256,\n        dropout=0.1,\n        bn_momentum=0.1,\n        activation='relu',\n        common_heads=dict(center=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2)),\n        bbox_coder=dict(\n            type='TransFusionBBoxCoder',\n            pc_range=point_cloud_range[:2],\n            voxel_size=voxel_size[:2],\n            out_size_factor=out_size_factor,\n            post_center_range=[-80, -80, -10.0, 80, 80, 10.0],\n            score_threshold=0.0,\n            code_size=8,\n        ),\n        loss_cls=dict(type='FocalLoss', use_sigmoid=True, gamma=2, alpha=0.25, reduction='mean', loss_weight=1.0),\n        # loss_iou=dict(type='CrossEntropyLoss', use_sigmoid=True, reduction='mean', loss_weight=0.0),\n        loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=2.0),\n        loss_heatmap=dict(type='GaussianFocalLoss', reduction='mean', loss_weight=1.0),\n    ),\n    train_cfg=dict(\n        pts=dict(\n            dataset='Waymo',\n            assigner=dict(\n                type='HungarianAssigner3D',\n                iou_calculator=dict(type='BboxOverlaps3D', coordinate='lidar'),\n                cls_cost=dict(type='FocalLossCost', gamma=2, alpha=0.25, weight=0.6),\n                reg_cost=dict(type='BBoxBEVL1Cost', weight=2.0),\n                iou_cost=dict(type='IoU3DCost', weight=2.0)\n            ),\n            pos_weight=-1,\n            gaussian_overlap=0.1,\n            min_radius=2,\n            grid_size=[1504, 1504, 40],  # [x_len, y_len, 1]\n            voxel_size=voxel_size,\n            out_size_factor=out_size_factor,\n            code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],\n            point_cloud_range=point_cloud_range)),\n    test_cfg=dict(\n        pts=dict(\n            dataset='Waymo',\n            grid_size=[1504, 1504, 40],\n            out_size_factor=out_size_factor,\n            voxel_size=voxel_size[:2],\n            nms_type=None,\n        )))\noptimizer = dict(type='AdamW', lr=0.0001, weight_decay=0.01)  # for 8gpu * 4sample_per_gpu\noptimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))\nlr_config = dict(\n    policy='cyclic',\n    target_ratio=(10, 0.0001),\n    cyclic_times=1,\n    step_ratio_up=0.4)\nmomentum_config = dict(\n    policy='cyclic',\n    target_ratio=(0.8947368421052632, 1),\n    cyclic_times=1,\n    step_ratio_up=0.4)\ntotal_epochs = 36\ncheckpoint_config = dict(interval=1)\nlog_config = dict(\n    interval=50,\n    hooks=[dict(type='TextLoggerHook'),\n           dict(type='TensorboardLoggerHook')])\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = None\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\ngpu_ids = range(0, 8)"
  },
  {
    "path": "configs/transfusion_waymo_voxel_LC.py",
    "content": "point_cloud_range = [-75.2, -75.2, -2, 75.2, 75.2, 4]\nclass_names = ['Car', 'Pedestrian', 'Cyclist']\nvoxel_size = [0.1, 0.1, 0.15]\nout_size_factor = 8\nevaluation = dict(interval=1)\ndataset_type = 'WaymoDataset'\ndata_root = 'data/waymo/kitti_format'\ninput_modality = dict(\n    use_lidar=True,\n    use_camera=True,\n    use_radar=False,\n    use_map=False,\n    use_external=False)\nimg_scale = (640, 960)\nnum_views = 5\nimg_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=6, use_dim=5),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    dict(type='LoadMultiViewImageFromFiles', img_scale=(1280, 1920)),\n    dict(type='MyResize', img_scale=img_scale, keep_ratio=True),\n    dict(type='MyNormalize', **img_norm_cfg),\n    dict(type='MyPad', size_divisor=32),\n    # dict(\n    #     type='RandomFlip3D',\n    #     sync_2d=True,\n    #     flip_ratio_bev_horizontal=0.5,\n    #     flip_ratio_bev_vertical=0.5),\n    # dict(\n    #     type='GlobalRotScaleTrans',\n    #     rot_range=[-0.78539816, 0.78539816],\n    #     scale_ratio_range=[0.95, 1.05],\n    # ),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'img', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=6, use_dim=5),\n    dict(type='LoadMultiViewImageFromFiles', img_scale=(1280, 1920)),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=img_scale,\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1.0, 1.0],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(type='MyNormalize', **img_norm_cfg),\n            dict(type='MyResize', img_scale=img_scale, keep_ratio=True),\n            dict(type='MyPad', size_divisor=32),\n            dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points', 'img'])\n        ])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=6,\n    train=dict(\n        type='RepeatDataset',\n        times=1,\n        dataset=dict(\n            type=dataset_type,\n            data_root=data_root,\n            load_interval=1,\n            num_views=num_views,\n            ann_file=data_root + '/waymo_infos_train.pkl',\n            split='training',\n            pipeline=train_pipeline,\n            classes=class_names,\n            modality=input_modality,\n            test_mode=False,\n            box_type_3d='LiDAR')),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        num_views=num_views,\n        ann_file=data_root + '/waymo_infos_val.pkl',\n        split='training',\n        load_interval=10,\n        pipeline=test_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=True,\n        box_type_3d='LiDAR'),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        num_views=num_views,\n        ann_file=data_root + '/waymo_infos_val.pkl',\n        split='training',\n        load_interval=10,\n        pipeline=test_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=True,\n        box_type_3d='LiDAR'))\nmodel = dict(\n    type='TransFusionDetector',\n    freeze_img=True,\n    img_backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch'),\n    img_neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    pts_voxel_layer=dict(\n        max_num_points=5,\n        voxel_size=voxel_size,\n        max_voxels=150000,\n        point_cloud_range=point_cloud_range),\n    pts_voxel_encoder=dict(\n        type='HardVFE',\n        in_channels=5,\n        feat_channels=[64],\n        with_distance=False,\n        with_cluster_center=False,\n        with_voxel_center=False,\n        voxel_size=voxel_size,\n        norm_cfg=dict(type='BN1d', eps=0.001, momentum=0.01),\n        point_cloud_range=point_cloud_range,\n    ),\n    pts_middle_encoder=dict(\n        type='SparseEncoder',\n        in_channels=64,\n        sparse_shape=[41, 1504, 1504],\n        output_channels=128,\n        order=('conv', 'norm', 'act'),\n        encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)),\n        encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),\n        block_type='basicblock'),\n    pts_backbone=dict(\n        type='SECOND',\n        in_channels=256,\n        out_channels=[128, 256],\n        layer_nums=[5, 5],\n        layer_strides=[1, 2],\n        norm_cfg=dict(type='BN', eps=0.001, momentum=0.01),\n        conv_cfg=dict(type='Conv2d', bias=False)),\n    pts_neck=dict(\n        type='SECONDFPN',\n        in_channels=[128, 256],\n        out_channels=[256, 256],\n        upsample_strides=[1, 2],\n        norm_cfg=dict(type='BN', eps=0.001, momentum=0.01),\n        upsample_cfg=dict(type='deconv', bias=False),\n        use_conv_for_no_stride=True),\n    pts_bbox_head=dict(\n        type='TransFusionHead',\n        fuse_img=True,\n        num_views=num_views,\n        in_channels_img=256,\n        out_size_factor_img=4,\n        num_proposals=300,\n        auxiliary=True,\n        in_channels=256 * 2,\n        hidden_channel=128,\n        num_classes=len(class_names),\n        num_decoder_layers=1,\n        num_heads=8,\n        learnable_query_pos=False,\n        initialize_by_heatmap=True,\n        nms_kernel_size=3,\n        ffn_channel=256,\n        dropout=0.1,\n        bn_momentum=0.1,\n        activation='relu',\n        common_heads=dict(center=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2)),\n        bbox_coder=dict(\n            type='TransFusionBBoxCoder',\n            pc_range=point_cloud_range[:2],\n            voxel_size=voxel_size[:2],\n            out_size_factor=out_size_factor,\n            post_center_range=[-80, -80, -10.0, 80, 80, 10.0],\n            score_threshold=0.0,\n            code_size=8,\n        ),\n        loss_cls=dict(type='FocalLoss', use_sigmoid=True, gamma=2, alpha=0.25, reduction='mean', loss_weight=1.0),\n        # loss_iou=dict(type='CrossEntropyLoss', use_sigmoid=True, reduction='mean', loss_weight=0.0),\n        loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=2.0),\n        loss_heatmap=dict(type='GaussianFocalLoss', reduction='mean', loss_weight=1.0),\n    ),\n    train_cfg=dict(\n        pts=dict(\n            dataset='Waymo',\n            assigner=dict(\n                type='HungarianAssigner3D',\n                iou_calculator=dict(type='BboxOverlaps3D', coordinate='lidar'),\n                cls_cost=dict(type='FocalLossCost', gamma=2, alpha=0.25, weight=0.6),\n                reg_cost=dict(type='BBoxBEVL1Cost', weight=2.0),\n                iou_cost=dict(type='IoU3DCost', weight=2.0)\n            ),\n            pos_weight=-1,\n            gaussian_overlap=0.1,\n            min_radius=2,\n            grid_size=[1504, 1504, 40],  # [x_len, y_len, 1]\n            voxel_size=voxel_size,\n            out_size_factor=out_size_factor,\n            code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],\n            point_cloud_range=point_cloud_range)),\n    test_cfg=dict(\n        pts=dict(\n            dataset='Waymo',\n            pc_range=point_cloud_range[:2],\n            grid_size=[1504, 1504, 40],\n            out_size_factor=out_size_factor,\n            voxel_size=voxel_size[:2],\n            nms_type=None,\n        )))\noptimizer = dict(type='AdamW', lr=0.0001, weight_decay=0.01)  # for 8gpu * 2sample_per_gpu\noptimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))\nlr_config = dict(\n    policy='cyclic',\n    target_ratio=(10, 0.0001),\n    cyclic_times=1,\n    step_ratio_up=0.4)\nmomentum_config = dict(\n    policy='cyclic',\n    target_ratio=(0.8947368421052632, 1),\n    cyclic_times=1,\n    step_ratio_up=0.4)\ntotal_epochs = 12\ncheckpoint_config = dict(interval=1)\nlog_config = dict(\n    interval=50,\n    hooks=[dict(type='TextLoggerHook'),\n           dict(type='TensorboardLoggerHook')])\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = None\nload_from = 'checkpoints/waymo_36e_R50.pth'\nresume_from = None\nworkflow = [('train', 1)]\nfreeze_lidar_components = True\nfind_unused_parameters = True\ngpu_ids = range(0, 8)\n"
  },
  {
    "path": "configs/votenet/README.md",
    "content": "# Deep Hough Voting for 3D Object Detection in Point Clouds\n\n## Introduction\n\n[ALGORITHM]\n\nWe implement VoteNet and provide the result and checkpoints on ScanNet and SUNRGBD datasets.\n\n```\n@inproceedings{qi2019deep,\n    author = {Qi, Charles R and Litany, Or and He, Kaiming and Guibas, Leonidas J},\n    title = {Deep Hough Voting for 3D Object Detection in Point Clouds},\n    booktitle = {Proceedings of the IEEE International Conference on Computer Vision},\n    year = {2019}\n}\n```\n\n## Results\n\n### ScanNet\n\n|  Backbone   | Lr schd | Mem (GB) | Inf time (fps) | AP@0.25 |AP@0.5| Download |\n| :---------: | :-----: | :------: | :------------: | :----: |:----: | :------: |\n|    [PointNet++](./votenet_8x8_scannet-3d-18class.py)     |  3x    |4.1||62.90|39.91|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/votenet/votenet_8x8_scannet-3d-18class/votenet_8x8_scannet-3d-18class_20200620_230238-2cea9c3a.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/votenet/votenet_8x8_scannet-3d-18class/votenet_8x8_scannet-3d-18class_20200620_230238.log.json)|\n\n### SUNRGBD\n\n|  Backbone   | Lr schd | Mem (GB) | Inf time (fps) | AP@0.25 |AP@0.5| Download |\n| :---------: | :-----: | :------: | :------------: | :----: |:----: | :------: |\n|    [PointNet++](./votenet_16x8_sunrgbd-3d-10class.py)     |  3x    |8.1||59.07|35.77|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/votenet/votenet_16x8_sunrgbd-3d-10class/votenet_16x8_sunrgbd-3d-10class_20200620_230238-4483c0c0.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/votenet/votenet_16x8_sunrgbd-3d-10class/votenet_16x8_sunrgbd-3d-10class_20200620_230238.log.json)|\n\n**Notice**: If your current mmdetection3d version >= 0.6.0, and you are using the checkpoints downloaded from the above links or using checkpoints trained with mmdetection3d version < 0.6.0, the checkpoints have to be first converted via [tools/model_converters/convert_votenet_checkpoints.py](../../tools/model_converters/convert_votenet_checkpoints.py):\n\n```\npython ./tools/model_converters/convert_votenet_checkpoints.py ${ORIGINAL_CHECKPOINT_PATH} --out=${NEW_CHECKPOINT_PATH}\n```\n\nThen you can use the converted checkpoints following [getting_started.md](../../docs/getting_started.md).\n\n## Indeterminism\n\nSince test data preparation randomly downsamples the points, and the test script uses fixed random seeds while the random seeds of validation in training are not fixed, the test results may be slightly different from the results reported above.\n\n## IoU loss\n\nAdding IoU loss (simply = 1-IoU) boosts VoteNet's performance. To use IoU loss, add this loss term to the config file:\n\n```python\niou_loss=dict(type='AxisAlignedIoULoss', reduction='sum', loss_weight=10.0 / 3.0)\n```\n\n|  Backbone   | Lr schd | Mem (GB) | Inf time (fps) | AP@0.25 |AP@0.5| Download |\n| :---------: | :-----: | :------: | :------------: | :----: |:----: | :------: |\n|    [PointNet++](./votenet_iouloss_8x8_scannet-3d-18class.py)     |  3x    |4.1||63.81|44.21|/|\n\nFor now, we only support calculating IoU loss for axis-aligned bounding boxes since the CUDA op of general 3D IoU calculation does not implement the backward method. Therefore, IoU loss can only be used for ScanNet dataset for now.\n"
  },
  {
    "path": "configs/votenet/votenet_16x8_sunrgbd-3d-10class.py",
    "content": "_base_ = [\n    '../_base_/datasets/sunrgbd-3d-10class.py', '../_base_/models/votenet.py',\n    '../_base_/schedules/schedule_3x.py', '../_base_/default_runtime.py'\n]\n# model settings\nmodel = dict(\n    bbox_head=dict(\n        num_classes=10,\n        bbox_coder=dict(\n            type='PartialBinBasedBBoxCoder',\n            num_sizes=10,\n            num_dir_bins=12,\n            with_rot=True,\n            mean_sizes=[\n                [2.114256, 1.620300, 0.927272], [0.791118, 1.279516, 0.718182],\n                [0.923508, 1.867419, 0.845495], [0.591958, 0.552978, 0.827272],\n                [0.699104, 0.454178, 0.75625], [0.69519, 1.346299, 0.736364],\n                [0.528526, 1.002642, 1.172878], [0.500618, 0.632163, 0.683424],\n                [0.404671, 1.071108, 1.688889], [0.76584, 1.398258, 0.472728]\n            ]),\n    ))\n"
  },
  {
    "path": "configs/votenet/votenet_8x8_scannet-3d-18class.py",
    "content": "_base_ = [\n    '../_base_/datasets/scannet-3d-18class.py', '../_base_/models/votenet.py',\n    '../_base_/schedules/schedule_3x.py', '../_base_/default_runtime.py'\n]\n\n# model settings\nmodel = dict(\n    bbox_head=dict(\n        num_classes=18,\n        bbox_coder=dict(\n            type='PartialBinBasedBBoxCoder',\n            num_sizes=18,\n            num_dir_bins=1,\n            with_rot=False,\n            mean_sizes=[[0.76966727, 0.8116021, 0.92573744],\n                        [1.876858, 1.8425595, 1.1931566],\n                        [0.61328, 0.6148609, 0.7182701],\n                        [1.3955007, 1.5121545, 0.83443564],\n                        [0.97949594, 1.0675149, 0.6329687],\n                        [0.531663, 0.5955577, 1.7500148],\n                        [0.9624706, 0.72462326, 1.1481868],\n                        [0.83221924, 1.0490936, 1.6875663],\n                        [0.21132214, 0.4206159, 0.5372846],\n                        [1.4440073, 1.8970833, 0.26985747],\n                        [1.0294262, 1.4040797, 0.87554324],\n                        [1.3766412, 0.65521795, 1.6813129],\n                        [0.6650819, 0.71111923, 1.298853],\n                        [0.41999173, 0.37906948, 1.7513971],\n                        [0.59359556, 0.5912492, 0.73919016],\n                        [0.50867593, 0.50656086, 0.30136237],\n                        [1.1511526, 1.0546296, 0.49706793],\n                        [0.47535285, 0.49249494, 0.5802117]])))\n\n# optimizer\n# yapf:disable\nlog_config = dict(\n    interval=30,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n"
  },
  {
    "path": "configs/votenet/votenet_iouloss_8x8_scannet-3d-18class.py",
    "content": "_base_ = ['./votenet_8x8_scannet-3d-18class.py']\n\n# model settings, add iou loss\nmodel = dict(\n    bbox_head=dict(\n        iou_loss=dict(\n            type='AxisAlignedIoULoss', reduction='sum', loss_weight=10.0 /\n            3.0)))\n"
  },
  {
    "path": "configs/waymo.md",
    "content": "# MODEL ZOO\n\n## Common settings and notes\n\n- The experiments are run with PyTorch 1.7.0, CUDA 10.1 and CUDNN 7.6\n- The training is conducted on 8 Tesla V100 GPUs\n\n## Waymo 3D Detection\n\nWe try a few training schedules for TransFusion-L and list the performance below. The fusion-based models are further trained for 6 epochs from the pretrained LiDAR backbone. We freeze the weight of LiDAR backbone to save GPU memory.\n\n| Model   | Backbone | epoch | Veh_L2 | Ped_L2 | Cyc_L2  | MAPH   |\n|---------|--------|--------|---------|---------|---------|---------|\n| [TransFusion-L](configs/transfusion_waymo_voxel_L.py) | VoxelNet | 12 | 63.86 | 62.84 | 67.17 | 64.63\n| [TransFusion-L](configs/transfusion_waymo_voxel_L.py) | VoxelNet | 24 | 64.54 | 63.39 | 66.43 | 64.78\n| [TransFusion-L](configs/transfusion_waymo_voxel_L.py) | VoxelNet | 36 | 65.07 | 63.70 | 65.97 | 64.91\n| [TransFusion](configs/transfusion_waymo_voxel_LC.py) | VoxelNet | 36 + 6| 65.11 | 64.02 | 67.40 | 65.51\n\n"
  },
  {
    "path": "demo/pcd_demo.py",
    "content": "from argparse import ArgumentParser\n\nfrom mmdet3d.apis import inference_detector, init_detector, show_result_meshlab\n\n\ndef main():\n    parser = ArgumentParser()\n    parser.add_argument('pcd', help='Point cloud file')\n    parser.add_argument('config', help='Config file')\n    parser.add_argument('checkpoint', help='Checkpoint file')\n    parser.add_argument(\n        '--device', default='cuda:0', help='Device used for inference')\n    parser.add_argument(\n        '--score-thr', type=float, default=0.6, help='bbox score threshold')\n    parser.add_argument(\n        '--out-dir', type=str, default='demo', help='dir to save results')\n    args = parser.parse_args()\n\n    # build the model from a config file and a checkpoint file\n    model = init_detector(args.config, args.checkpoint, device=args.device)\n    # test a single image\n    result, data = inference_detector(model, args.pcd)\n    # show the results\n    show_result_meshlab(data, result, args.out_dir)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "docker/Dockerfile",
    "content": "ARG PYTORCH=\"1.6.0\"\nARG CUDA=\"10.1\"\nARG CUDNN=\"7\"\n\nFROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel\n\nENV TORCH_CUDA_ARCH_LIST=\"6.0 6.1 7.0+PTX\"\nENV TORCH_NVCC_FLAGS=\"-Xfatbin -compress-all\"\nENV CMAKE_PREFIX_PATH=\"$(dirname $(which conda))/../\"\n\nRUN apt-get update && apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \\\n    && apt-get clean \\\n    && rm -rf /var/lib/apt/lists/*\n\n# Install MMCV\nRUN pip install mmcv-full==latest+torch1.6.0+cu101 -f https://openmmlab.oss-accelerate.aliyuncs.com/mmcv/dist/index.html\nRUN pip install mmdet\n\n# Install MMDetection\nRUN conda clean --all\nRUN git clone https://github.com/open-mmlab/mmdetection3d.git /mmdetection3d\nWORKDIR /mmdetection3d\nENV FORCE_CUDA=\"1\"\nRUN pip install -r requirements/build.txt\nRUN pip install --no-cache-dir -e .\n\n# uninstall pycocotools installed by nuscenes-devkit and reinstall mmpycocotools\nRUN pip uninstall pycocotools --no-cache-dir -y\nRUN pip install mmpycocotools --no-cache-dir --force --no-deps\n"
  },
  {
    "path": "mmdet3d/__init__.py",
    "content": "import mmcv\n\nimport mmdet\nfrom .version import __version__, short_version\n\n\ndef digit_version(version_str):\n    digit_version = []\n    for x in version_str.split('.'):\n        if x.isdigit():\n            digit_version.append(int(x))\n        elif x.find('rc') != -1:\n            patch_version = x.split('rc')\n            digit_version.append(int(patch_version[0]) - 1)\n            digit_version.append(int(patch_version[1]))\n    return digit_version\n\n\nmmcv_minimum_version = '1.2.4'\nmmcv_maximum_version = '1.4.0'\nmmcv_version = digit_version(mmcv.__version__)\n\n\nassert (mmcv_version >= digit_version(mmcv_minimum_version)\n        and mmcv_version <= digit_version(mmcv_maximum_version)), \\\n    f'MMCV=={mmcv.__version__} is used but incompatible. ' \\\n    f'Please install mmcv>={mmcv_minimum_version}, <={mmcv_maximum_version}.'\n\nmmdet_minimum_version = '2.5.0'\nmmdet_maximum_version = '3.0.0'\nmmdet_version = digit_version(mmdet.__version__)\nassert (mmdet_version >= digit_version(mmdet_minimum_version)\n        and mmdet_version <= digit_version(mmdet_maximum_version)), \\\n    f'MMDET=={mmdet.__version__} is used but incompatible. ' \\\n    f'Please install mmdet>={mmdet_minimum_version}, ' \\\n    f'<={mmdet_maximum_version}.'\n\n__all__ = ['__version__', 'short_version']\n"
  },
  {
    "path": "mmdet3d/apis/__init__.py",
    "content": "from .inference import (convert_SyncBN, inference_detector, init_detector,\n                        show_result_meshlab)\nfrom .test import single_gpu_test\n\n__all__ = [\n    'inference_detector', 'init_detector', 'single_gpu_test',\n    'show_result_meshlab', 'convert_SyncBN'\n]\n"
  },
  {
    "path": "mmdet3d/apis/inference.py",
    "content": "import mmcv\nimport torch\nfrom copy import deepcopy\nfrom mmcv.parallel import collate, scatter\nfrom mmcv.runner import load_checkpoint\nfrom os import path as osp\n\nfrom mmdet3d.core import Box3DMode, show_result\nfrom mmdet3d.core.bbox import get_box_type\nfrom mmdet3d.datasets.pipelines import Compose\nfrom mmdet3d.models import build_detector\n\n\ndef convert_SyncBN(config):\n    \"\"\"Convert config's naiveSyncBN to BN.\n\n    Args:\n         config (str or :obj:`mmcv.Config`): Config file path or the config\n            object.\n    \"\"\"\n    if isinstance(config, dict):\n        for item in config:\n            if item == 'norm_cfg':\n                config[item]['type'] = config[item]['type']. \\\n                                    replace('naiveSyncBN', 'BN')\n            else:\n                convert_SyncBN(config[item])\n\n\ndef init_detector(config, checkpoint=None, device='cuda:0'):\n    \"\"\"Initialize a detector from config file.\n\n    Args:\n        config (str or :obj:`mmcv.Config`): Config file path or the config\n            object.\n        checkpoint (str, optional): Checkpoint path. If left as None, the model\n            will not load any weights.\n        device (str): Device to use.\n\n    Returns:\n        nn.Module: The constructed detector.\n    \"\"\"\n    if isinstance(config, str):\n        config = mmcv.Config.fromfile(config)\n    elif not isinstance(config, mmcv.Config):\n        raise TypeError('config must be a filename or Config object, '\n                        f'but got {type(config)}')\n    config.model.pretrained = None\n    convert_SyncBN(config.model)\n    config.model.train_cfg = None\n    model = build_detector(config.model, test_cfg=config.get('test_cfg'))\n    if checkpoint is not None:\n        checkpoint = load_checkpoint(model, checkpoint)\n        if 'CLASSES' in checkpoint['meta']:\n            model.CLASSES = checkpoint['meta']['CLASSES']\n        else:\n            model.CLASSES = config.class_names\n    model.cfg = config  # save the config in the model for convenience\n    model.to(device)\n    model.eval()\n    return model\n\n\ndef inference_detector(model, pcd):\n    \"\"\"Inference point cloud with the detector.\n\n    Args:\n        model (nn.Module): The loaded detector.\n        pcd (str): Point cloud files.\n\n    Returns:\n        tuple: Predicted results and data from pipeline.\n    \"\"\"\n    cfg = model.cfg\n    device = next(model.parameters()).device  # model device\n    # build the data pipeline\n    test_pipeline = deepcopy(cfg.data.test.pipeline)\n    test_pipeline = Compose(test_pipeline)\n    box_type_3d, box_mode_3d = get_box_type(cfg.data.test.box_type_3d)\n    data = dict(\n        pts_filename=pcd,\n        box_type_3d=box_type_3d,\n        box_mode_3d=box_mode_3d,\n        sweeps=[],\n        # set timestamp = 0\n        timestamp=[0],\n        img_fields=[],\n        bbox3d_fields=[],\n        pts_mask_fields=[],\n        pts_seg_fields=[],\n        bbox_fields=[],\n        mask_fields=[],\n        seg_fields=[])\n    data = test_pipeline(data)\n    data = collate([data], samples_per_gpu=1)\n    if next(model.parameters()).is_cuda:\n        # scatter to specified GPU\n        data = scatter(data, [device.index])[0]\n    else:\n        # this is a workaround to avoid the bug of MMDataParallel\n        data['img_metas'] = data['img_metas'][0].data\n        data['points'] = data['points'][0].data\n    # forward the model\n    with torch.no_grad():\n        result = model(return_loss=False, rescale=True, **data)\n    return result, data\n\n\ndef show_result_meshlab(data, result, out_dir):\n    \"\"\"Show result by meshlab.\n\n    Args:\n        data (dict): Contain data from pipeline.\n        result (dict): Predicted result from model.\n        out_dir (str): Directory to save visualized result.\n    \"\"\"\n    points = data['points'][0][0].cpu().numpy()\n    pts_filename = data['img_metas'][0][0]['pts_filename']\n    file_name = osp.split(pts_filename)[-1].split('.')[0]\n\n    assert out_dir is not None, 'Expect out_dir, got none.'\n\n    if 'pts_bbox' in result[0].keys():\n        pred_bboxes = result[0]['pts_bbox']['boxes_3d'].tensor.numpy()\n    else:\n        pred_bboxes = result[0]['boxes_3d'].tensor.numpy()\n    # for now we convert points into depth mode\n    if data['img_metas'][0][0]['box_mode_3d'] != Box3DMode.DEPTH:\n        points = points[..., [1, 0, 2]]\n        points[..., 0] *= -1\n        pred_bboxes = Box3DMode.convert(pred_bboxes,\n                                        data['img_metas'][0][0]['box_mode_3d'],\n                                        Box3DMode.DEPTH)\n    show_result(points, None, pred_bboxes, out_dir, file_name, show=False)\n    return out_dir, file_name\n"
  },
  {
    "path": "mmdet3d/apis/test.py",
    "content": "import mmcv\nimport torch\n\n\ndef single_gpu_test(model, data_loader, show=False, out_dir=None):\n    \"\"\"Test model with single gpu.\n\n    This method tests model with single gpu and gives the 'show' option.\n    By setting ``show=True``, it saves the visualization results under\n    ``out_dir``.\n\n    Args:\n        model (nn.Module): Model to be tested.\n        data_loader (nn.Dataloader): Pytorch data loader.\n        show (bool): Whether to save viualization results.\n            Default: True.\n        out_dir (str): The path to save visualization results.\n            Default: None.\n\n    Returns:\n        list[dict]: The prediction results.\n    \"\"\"\n    model.eval()\n    results = []\n    dataset = data_loader.dataset\n    prog_bar = mmcv.ProgressBar(len(dataset))\n    for i, data in enumerate(data_loader):\n        with torch.no_grad():\n            result = model(return_loss=False, rescale=True, **data)\n\n        if show:\n            model.module.show_results(data, result, out_dir)\n\n        results.extend(result)\n\n        batch_size = len(result)\n        for _ in range(batch_size):\n            prog_bar.update()\n    return results\n"
  },
  {
    "path": "mmdet3d/core/__init__.py",
    "content": "from .anchor import *  # noqa: F401, F403\nfrom .bbox import *  # noqa: F401, F403\nfrom .evaluation import *  # noqa: F401, F403\nfrom .points import *  # noqa: F401, F403\nfrom .post_processing import *  # noqa: F401, F403\nfrom .utils import *  # noqa: F401, F403\nfrom .visualizer import *  # noqa: F401, F403\nfrom .voxel import *  # noqa: F401, F403\n"
  },
  {
    "path": "mmdet3d/core/anchor/__init__.py",
    "content": "from mmdet.core.anchor import build_anchor_generator\nfrom .anchor_3d_generator import (AlignedAnchor3DRangeGenerator,\n                                  AlignedAnchor3DRangeGeneratorPerCls,\n                                  Anchor3DRangeGenerator)\n\n__all__ = [\n    'AlignedAnchor3DRangeGenerator', 'Anchor3DRangeGenerator',\n    'build_anchor_generator', 'AlignedAnchor3DRangeGeneratorPerCls'\n]\n"
  },
  {
    "path": "mmdet3d/core/anchor/anchor_3d_generator.py",
    "content": "import mmcv\nimport torch\n\nfrom mmdet.core.anchor import ANCHOR_GENERATORS\n\n\n@ANCHOR_GENERATORS.register_module()\nclass Anchor3DRangeGenerator(object):\n    \"\"\"3D Anchor Generator by range.\n\n    This anchor generator generates anchors by the given range in different\n    feature levels.\n    Due the convention in 3D detection, different anchor sizes are related to\n    different ranges for different categories. However we find this setting\n    does not effect the performance much in some datasets, e.g., nuScenes.\n\n    Args:\n        ranges (list[list[float]]): Ranges of different anchors.\n            The ranges are the same across different feature levels. But may\n            vary for different anchor sizes if size_per_range is True.\n        sizes (list[list[float]]): 3D sizes of anchors.\n        scales (list[int]): Scales of anchors in different feature levels.\n        rotations (list[float]): Rotations of anchors in a feature grid.\n        custom_values (tuple[float]): Customized values of that anchor. For\n            example, in nuScenes the anchors have velocities.\n        reshape_out (bool): Whether to reshape the output into (N x 4).\n        size_per_range: Whether to use separate ranges for different sizes.\n            If size_per_range is True, the ranges should have the same length\n            as the sizes, if not, it will be duplicated.\n    \"\"\"\n\n    def __init__(self,\n                 ranges,\n                 sizes=[[1.6, 3.9, 1.56]],\n                 scales=[1],\n                 rotations=[0, 1.5707963],\n                 custom_values=(),\n                 reshape_out=True,\n                 size_per_range=True):\n        assert mmcv.is_list_of(ranges, list)\n        if size_per_range:\n            if len(sizes) != len(ranges):\n                assert len(ranges) == 1\n                ranges = ranges * len(sizes)\n            assert len(ranges) == len(sizes)\n        else:\n            assert len(ranges) == 1\n        assert mmcv.is_list_of(sizes, list)\n        assert isinstance(scales, list)\n\n        self.sizes = sizes\n        self.scales = scales\n        self.ranges = ranges\n        self.rotations = rotations\n        self.custom_values = custom_values\n        self.cached_anchors = None\n        self.reshape_out = reshape_out\n        self.size_per_range = size_per_range\n\n    def __repr__(self):\n        s = self.__class__.__name__ + '('\n        s += f'anchor_range={self.ranges},\\n'\n        s += f'scales={self.scales},\\n'\n        s += f'sizes={self.sizes},\\n'\n        s += f'rotations={self.rotations},\\n'\n        s += f'reshape_out={self.reshape_out},\\n'\n        s += f'size_per_range={self.size_per_range})'\n        return s\n\n    @property\n    def num_base_anchors(self):\n        \"\"\"list[int]: Total number of base anchors in a feature grid.\"\"\"\n        num_rot = len(self.rotations)\n        num_size = torch.tensor(self.sizes).reshape(-1, 3).size(0)\n        return num_rot * num_size\n\n    @property\n    def num_levels(self):\n        \"\"\"int: Number of feature levels that the generator is applied to.\"\"\"\n        return len(self.scales)\n\n    def grid_anchors(self, featmap_sizes, device='cuda'):\n        \"\"\"Generate grid anchors in multiple feature levels.\n\n        Args:\n            featmap_sizes (list[tuple]): List of feature map sizes in\n                multiple feature levels.\n            device (str): Device where the anchors will be put on.\n\n        Returns:\n            list[torch.Tensor]: Anchors in multiple feature levels. \\\n                The sizes of each tensor should be [N, 4], where \\\n                N = width * height * num_base_anchors, width and height \\\n                are the sizes of the corresponding feature lavel, \\\n                num_base_anchors is the number of anchors for that level.\n        \"\"\"\n        assert self.num_levels == len(featmap_sizes)\n        multi_level_anchors = []\n        for i in range(self.num_levels):\n            anchors = self.single_level_grid_anchors(\n                featmap_sizes[i], self.scales[i], device=device)\n            if self.reshape_out:\n                anchors = anchors.reshape(-1, anchors.size(-1))\n            multi_level_anchors.append(anchors)\n        return multi_level_anchors\n\n    def single_level_grid_anchors(self, featmap_size, scale, device='cuda'):\n        \"\"\"Generate grid anchors of a single level feature map.\n\n        This function is usually called by method ``self.grid_anchors``.\n\n        Args:\n            featmap_size (tuple[int]): Size of the feature map.\n            scale (float): Scale factor of the anchors in the current level.\n            device (str, optional): Device the tensor will be put on.\n                Defaults to 'cuda'.\n\n        Returns:\n            torch.Tensor: Anchors in the overall feature map.\n        \"\"\"\n        # We reimplement the anchor generator using torch in cuda\n        # torch: 0.6975 s for 1000 times\n        # numpy: 4.3345 s for 1000 times\n        # which is ~5 times faster than the numpy implementation\n        if not self.size_per_range:\n            return self.anchors_single_range(\n                featmap_size,\n                self.ranges[0],\n                scale,\n                self.sizes,\n                self.rotations,\n                device=device)\n\n        mr_anchors = []\n        for anchor_range, anchor_size in zip(self.ranges, self.sizes):\n            mr_anchors.append(\n                self.anchors_single_range(\n                    featmap_size,\n                    anchor_range,\n                    scale,\n                    anchor_size,\n                    self.rotations,\n                    device=device))\n        mr_anchors = torch.cat(mr_anchors, dim=-3)\n        return mr_anchors\n\n    def anchors_single_range(self,\n                             feature_size,\n                             anchor_range,\n                             scale=1,\n                             sizes=[[1.6, 3.9, 1.56]],\n                             rotations=[0, 1.5707963],\n                             device='cuda'):\n        \"\"\"Generate anchors in a single range.\n\n        Args:\n            feature_size (list[float] | tuple[float]): Feature map size. It is\n                either a list of a tuple of [D, H, W](in order of z, y, and x).\n            anchor_range (torch.Tensor | list[float]): Range of anchors with\n                shape [6]. The order is consistent with that of anchors, i.e.,\n                (x_min, y_min, z_min, x_max, y_max, z_max).\n            scale (float | int, optional): The scale factor of anchors.\n            sizes (list[list] | np.ndarray | torch.Tensor): Anchor size with\n                shape [N, 3], in order of x, y, z.\n            rotations (list[float] | np.ndarray | torch.Tensor): Rotations of\n                anchors in a single feature grid.\n            device (str): Devices that the anchors will be put on.\n\n        Returns:\n            torch.Tensor: Anchors with shape \\\n                [*feature_size, num_sizes, num_rots, 7].\n        \"\"\"\n        if len(feature_size) == 2:\n            feature_size = [1, feature_size[0], feature_size[1]]\n        anchor_range = torch.tensor(anchor_range, device=device)\n        z_centers = torch.linspace(\n            anchor_range[2], anchor_range[5], feature_size[0], device=device)\n        y_centers = torch.linspace(\n            anchor_range[1], anchor_range[4], feature_size[1], device=device)\n        x_centers = torch.linspace(\n            anchor_range[0], anchor_range[3], feature_size[2], device=device)\n        sizes = torch.tensor(sizes, device=device).reshape(-1, 3) * scale\n        rotations = torch.tensor(rotations, device=device)\n\n        # torch.meshgrid default behavior is 'id', np's default is 'xy'\n        rets = torch.meshgrid(x_centers, y_centers, z_centers, rotations)\n        # torch.meshgrid returns a tuple rather than list\n        rets = list(rets)\n        tile_shape = [1] * 5\n        tile_shape[-2] = int(sizes.shape[0])\n        for i in range(len(rets)):\n            rets[i] = rets[i].unsqueeze(-2).repeat(tile_shape).unsqueeze(-1)\n\n        sizes = sizes.reshape([1, 1, 1, -1, 1, 3])\n        tile_size_shape = list(rets[0].shape)\n        tile_size_shape[3] = 1\n        sizes = sizes.repeat(tile_size_shape)\n        rets.insert(3, sizes)\n\n        ret = torch.cat(rets, dim=-1).permute([2, 1, 0, 3, 4, 5])\n        # [1, 200, 176, N, 2, 7] for kitti after permute\n\n        if len(self.custom_values) > 0:\n            custom_ndim = len(self.custom_values)\n            custom = ret.new_zeros([*ret.shape[:-1], custom_ndim])\n            # custom[:] = self.custom_values\n            ret = torch.cat([ret, custom], dim=-1)\n            # [1, 200, 176, N, 2, 9] for nus dataset after permute\n        return ret\n\n\n@ANCHOR_GENERATORS.register_module()\nclass AlignedAnchor3DRangeGenerator(Anchor3DRangeGenerator):\n    \"\"\"Aligned 3D Anchor Generator by range.\n\n    This anchor generator uses a different manner to generate the positions\n    of anchors' centers from :class:`Anchor3DRangeGenerator`.\n\n    Note:\n        The `align` means that the anchor's center is aligned with the voxel\n        grid, which is also the feature grid. The previous implementation of\n        :class:`Anchor3DRangeGenerator` does not generate the anchors' center\n        according to the voxel grid. Rather, it generates the center by\n        uniformly distributing the anchors inside the minimum and maximum\n        anchor ranges according to the feature map sizes.\n        However, this makes the anchors center does not match the feature grid.\n        The :class:`AlignedAnchor3DRangeGenerator` add + 1 when using the\n        feature map sizes to obtain the corners of the voxel grid. Then it\n        shifts the coordinates to the center of voxel grid and use the left\n        up corner to distribute anchors.\n\n    Args:\n        anchor_corner (bool): Whether to align with the corner of the voxel\n            grid. By default it is False and the anchor's center will be\n            the same as the corresponding voxel's center, which is also the\n            center of the corresponding greature grid.\n    \"\"\"\n\n    def __init__(self, align_corner=False, **kwargs):\n        super(AlignedAnchor3DRangeGenerator, self).__init__(**kwargs)\n        self.align_corner = align_corner\n\n    def anchors_single_range(self,\n                             feature_size,\n                             anchor_range,\n                             scale,\n                             sizes=[[1.6, 3.9, 1.56]],\n                             rotations=[0, 1.5707963],\n                             device='cuda'):\n        \"\"\"Generate anchors in a single range.\n\n        Args:\n            feature_size (list[float] | tuple[float]): Feature map size. It is\n                either a list of a tuple of [D, H, W](in order of z, y, and x).\n            anchor_range (torch.Tensor | list[float]): Range of anchors with\n                shape [6]. The order is consistent with that of anchors, i.e.,\n                (x_min, y_min, z_min, x_max, y_max, z_max).\n            scale (float | int, optional): The scale factor of anchors.\n            sizes (list[list] | np.ndarray | torch.Tensor): Anchor size with\n                shape [N, 3], in order of x, y, z.\n            rotations (list[float] | np.ndarray | torch.Tensor): Rotations of\n                anchors in a single feature grid.\n            device (str): Devices that the anchors will be put on.\n\n        Returns:\n            torch.Tensor: Anchors with shape \\\n                [*feature_size, num_sizes, num_rots, 7].\n        \"\"\"\n        if len(feature_size) == 2:\n            feature_size = [1, feature_size[0], feature_size[1]]\n        anchor_range = torch.tensor(anchor_range, device=device)\n        z_centers = torch.linspace(\n            anchor_range[2],\n            anchor_range[5],\n            feature_size[0] + 1,\n            device=device)\n        y_centers = torch.linspace(\n            anchor_range[1],\n            anchor_range[4],\n            feature_size[1] + 1,\n            device=device)\n        x_centers = torch.linspace(\n            anchor_range[0],\n            anchor_range[3],\n            feature_size[2] + 1,\n            device=device)\n        sizes = torch.tensor(sizes, device=device).reshape(-1, 3) * scale\n        rotations = torch.tensor(rotations, device=device)\n\n        # shift the anchor center\n        if not self.align_corner:\n            z_shift = (z_centers[1] - z_centers[0]) / 2\n            y_shift = (y_centers[1] - y_centers[0]) / 2\n            x_shift = (x_centers[1] - x_centers[0]) / 2\n            z_centers += z_shift\n            y_centers += y_shift\n            x_centers += x_shift\n\n        # torch.meshgrid default behavior is 'id', np's default is 'xy'\n        rets = torch.meshgrid(x_centers[:feature_size[2]],\n                              y_centers[:feature_size[1]],\n                              z_centers[:feature_size[0]], rotations)\n\n        # torch.meshgrid returns a tuple rather than list\n        rets = list(rets)\n        tile_shape = [1] * 5\n        tile_shape[-2] = int(sizes.shape[0])\n        for i in range(len(rets)):\n            rets[i] = rets[i].unsqueeze(-2).repeat(tile_shape).unsqueeze(-1)\n\n        sizes = sizes.reshape([1, 1, 1, -1, 1, 3])\n        tile_size_shape = list(rets[0].shape)\n        tile_size_shape[3] = 1\n        sizes = sizes.repeat(tile_size_shape)\n        rets.insert(3, sizes)\n\n        ret = torch.cat(rets, dim=-1).permute([2, 1, 0, 3, 4, 5])\n\n        if len(self.custom_values) > 0:\n            custom_ndim = len(self.custom_values)\n            custom = ret.new_zeros([*ret.shape[:-1], custom_ndim])\n            # TODO: check the support of custom values\n            # custom[:] = self.custom_values\n            ret = torch.cat([ret, custom], dim=-1)\n        return ret\n\n\n@ANCHOR_GENERATORS.register_module()\nclass AlignedAnchor3DRangeGeneratorPerCls(AlignedAnchor3DRangeGenerator):\n    \"\"\"3D Anchor Generator by range for per class.\n\n    This anchor generator generates anchors by the given range for per class.\n    Note that feature maps of different classes may be different.\n\n    Args:\n        kwargs (dict): Arguments are the same as those in \\\n            :class:`AlignedAnchor3DRangeGenerator`.\n    \"\"\"\n\n    def __init__(self, **kwargs):\n        super(AlignedAnchor3DRangeGeneratorPerCls, self).__init__(**kwargs)\n        assert len(self.scales) == 1, 'Multi-scale feature map levels are' + \\\n            ' not supported currently in this kind of anchor generator.'\n\n    def grid_anchors(self, featmap_sizes, device='cuda'):\n        \"\"\"Generate grid anchors in multiple feature levels.\n\n        Args:\n            featmap_sizes (list[tuple]): List of feature map sizes for \\\n                different classes in a single feature level.\n            device (str): Device where the anchors will be put on.\n\n        Returns:\n            list[list[torch.Tensor]]: Anchors in multiple feature levels. \\\n                Note that in this anchor generator, we currently only \\\n                support single feature level. The sizes of each tensor \\\n                should be [num_sizes/ranges*num_rots*featmap_size, \\\n                box_code_size].\n        \"\"\"\n        multi_level_anchors = []\n        anchors = self.multi_cls_grid_anchors(\n            featmap_sizes, self.scales[0], device=device)\n        multi_level_anchors.append(anchors)\n        return multi_level_anchors\n\n    def multi_cls_grid_anchors(self, featmap_sizes, scale, device='cuda'):\n        \"\"\"Generate grid anchors of a single level feature map for multi-class\n        with different feature map sizes.\n\n        This function is usually called by method ``self.grid_anchors``.\n\n        Args:\n            featmap_sizes (list[tuple]): List of feature map sizes for \\\n                different classes in a single feature level.\n            scale (float): Scale factor of the anchors in the current level.\n            device (str, optional): Device the tensor will be put on.\n                Defaults to 'cuda'.\n\n        Returns:\n            torch.Tensor: Anchors in the overall feature map.\n        \"\"\"\n        assert len(featmap_sizes) == len(self.sizes) == len(self.ranges), \\\n            'The number of different feature map sizes anchor sizes and ' + \\\n            'ranges should be the same.'\n\n        multi_cls_anchors = []\n        for i in range(len(featmap_sizes)):\n            anchors = self.anchors_single_range(\n                featmap_sizes[i],\n                self.ranges[i],\n                scale,\n                self.sizes[i],\n                self.rotations,\n                device=device)\n            # [*featmap_size, num_sizes/ranges, num_rots, box_code_size]\n            ndim = len(featmap_sizes[i])\n            anchors = anchors.view(*featmap_sizes[i], -1, anchors.size(-1))\n            # [*featmap_size, num_sizes/ranges*num_rots, box_code_size]\n            anchors = anchors.permute(ndim, *range(0, ndim), ndim + 1)\n            # [num_sizes/ranges*num_rots, *featmap_size, box_code_size]\n            multi_cls_anchors.append(anchors.reshape(-1, anchors.size(-1)))\n            # [num_sizes/ranges*num_rots*featmap_size, box_code_size]\n        return multi_cls_anchors\n"
  },
  {
    "path": "mmdet3d/core/bbox/__init__.py",
    "content": "from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner\nfrom .coders import DeltaXYZWLHRBBoxCoder\n# from .bbox_target import bbox_target\nfrom .iou_calculators import (AxisAlignedBboxOverlaps3D, BboxOverlaps3D,\n                              BboxOverlapsNearest3D,\n                              axis_aligned_bbox_overlaps_3d, bbox_overlaps_3d,\n                              bbox_overlaps_nearest_3d)\nfrom .samplers import (BaseSampler, CombinedSampler,\n                       InstanceBalancedPosSampler, IoUBalancedNegSampler,\n                       PseudoSampler, RandomSampler, SamplingResult)\nfrom .structures import (BaseInstance3DBoxes, Box3DMode, CameraInstance3DBoxes,\n                         Coord3DMode, DepthInstance3DBoxes,\n                         LiDARInstance3DBoxes, get_box_type, limit_period,\n                         points_cam2img, xywhr2xyxyr)\nfrom .transforms import bbox3d2result, bbox3d2roi, bbox3d_mapping_back\n\n__all__ = [\n    'BaseSampler', 'AssignResult', 'BaseAssigner', 'MaxIoUAssigner',\n    'PseudoSampler', 'RandomSampler', 'InstanceBalancedPosSampler',\n    'IoUBalancedNegSampler', 'CombinedSampler', 'SamplingResult',\n    'DeltaXYZWLHRBBoxCoder', 'BboxOverlapsNearest3D', 'BboxOverlaps3D',\n    'bbox_overlaps_nearest_3d', 'bbox_overlaps_3d',\n    'AxisAlignedBboxOverlaps3D', 'axis_aligned_bbox_overlaps_3d', 'Box3DMode',\n    'LiDARInstance3DBoxes', 'CameraInstance3DBoxes', 'bbox3d2roi',\n    'bbox3d2result', 'DepthInstance3DBoxes', 'BaseInstance3DBoxes',\n    'bbox3d_mapping_back', 'xywhr2xyxyr', 'limit_period', 'points_cam2img',\n    'get_box_type', 'Coord3DMode'\n]\n"
  },
  {
    "path": "mmdet3d/core/bbox/assigners/__init__.py",
    "content": "from mmdet.core.bbox import AssignResult, BaseAssigner, MaxIoUAssigner\nfrom .hungarian_assigner import HungarianAssigner3D, HeuristicAssigner3D, HungarianAssignerView2D, HungarianAssignerViewProj2D, HungarianAssignerCameraBox\n\n__all__ = ['BaseAssigner', 'MaxIoUAssigner', 'AssignResult', 'HungarianAssigner3D', 'HeuristicAssigner',\n           'HungarianAssignerView2D', 'HungarianAssignerViewProj2D', 'HungarianAssignerCameraBox']\n"
  },
  {
    "path": "mmdet3d/core/bbox/assigners/hungarian_assigner.py",
    "content": "from mmdet.core.bbox.builder import BBOX_ASSIGNERS\nfrom mmdet.core.bbox.assigners import AssignResult, BaseAssigner\nfrom mmdet.core.bbox.match_costs import build_match_cost\nfrom mmdet.core.bbox.match_costs.builder import MATCH_COST\nfrom mmdet.core.bbox.iou_calculators import build_iou_calculator\nfrom mmdet.core.bbox.assigners import HungarianAssigner\nfrom mmdet.core.bbox.transforms import bbox_cxcywh_to_xyxy\nimport torch\n\ntry:\n    from scipy.optimize import linear_sum_assignment\nexcept ImportError:\n    linear_sum_assignment = None\n\n\n@MATCH_COST.register_module()\nclass BBox3DL1Cost(object):\n    def __init__(self, weight):\n        self.weight = weight\n\n    def __call__(self, bboxes, gt_bboxes, train_cfg=None):\n        reg_cost = torch.cdist(bboxes, gt_bboxes, p=1)\n        return reg_cost * self.weight\n\n\n@MATCH_COST.register_module()\nclass BBoxBEVL1Cost(object):\n    def __init__(self, weight):\n        self.weight = weight\n\n    def __call__(self, bboxes, gt_bboxes, train_cfg):\n        pc_start = bboxes.new(train_cfg['point_cloud_range'][0:2])\n        pc_range = bboxes.new(train_cfg['point_cloud_range'][3:5]) - bboxes.new(train_cfg['point_cloud_range'][0:2])\n        # normalize the box center to [0, 1]\n        normalized_bboxes_xy = (bboxes[:, :2] - pc_start) / pc_range\n        normalized_gt_bboxes_xy = (gt_bboxes[:, :2] - pc_start) / pc_range\n        reg_cost = torch.cdist(normalized_bboxes_xy, normalized_gt_bboxes_xy, p=1)\n        return reg_cost * self.weight\n\n\n@MATCH_COST.register_module()\nclass IoU3DCost(object):\n    def __init__(self, weight):\n        self.weight = weight\n\n    def __call__(self, iou):\n        iou_cost = - iou\n        return iou_cost * self.weight\n\n\n@BBOX_ASSIGNERS.register_module()\nclass HeuristicAssigner3D(BaseAssigner):\n    def __init__(self,\n                 dist_thre=100,\n                 iou_calculator=dict(type='BboxOverlaps3D')\n                 ):\n        self.dist_thre = dist_thre  # distance in meter\n        self.iou_calculator = build_iou_calculator(iou_calculator)\n\n    def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None, query_labels=None):\n        dist_thre = self.dist_thre\n        num_gts, num_bboxes = len(gt_bboxes), len(bboxes)\n\n        bev_dist = torch.norm(bboxes[:, 0:2][None, :, :] - gt_bboxes[:, 0:2][:, None, :], dim=-1)  # [num_gts, num_bboxes]\n        if query_labels is not None:\n            # only match the gt box and query with same category\n            not_same_class = (query_labels[None] != gt_labels[:, None])\n            bev_dist += not_same_class * dist_thre\n\n        # for each gt box, assign it to the nearest pred box\n        nearest_values, nearest_indices = bev_dist.min(1)  # [num_gts]\n        assigned_gt_inds = torch.ones([num_bboxes, ]).to(bboxes) * 0\n        assigned_gt_vals = torch.ones([num_bboxes, ]).to(bboxes) * 10000\n        assigned_gt_labels = torch.ones([num_bboxes, ]).to(bboxes) * -1\n        for idx_gts in range(num_gts):\n            # for idx_pred in torch.where(bev_dist[idx_gts] < dist_thre)[0]: # each gt match to all the pred box within some radius\n            idx_pred = nearest_indices[idx_gts]  # each gt only match to the nearest pred box\n            if bev_dist[idx_gts, idx_pred] <= dist_thre:\n                if bev_dist[idx_gts, idx_pred] < assigned_gt_vals[idx_pred]:  # if this pred box is assigned, then compare\n                    assigned_gt_vals[idx_pred] = bev_dist[idx_gts, idx_pred]\n                    assigned_gt_inds[idx_pred] = idx_gts + 1  # for AssignResult, 0 is negative, -1 is ignore, 1-based indices are positive\n                    assigned_gt_labels[idx_pred] = gt_labels[idx_gts]\n\n        max_overlaps = torch.zeros([num_bboxes, ]).to(bboxes)\n        matched_indices = torch.where(assigned_gt_inds > 0)\n        matched_iou = self.iou_calculator(gt_bboxes[assigned_gt_inds[matched_indices].long() - 1], bboxes[matched_indices]).diag()\n        max_overlaps[matched_indices] = matched_iou\n\n        return AssignResult(\n            num_gts, assigned_gt_inds.long(), max_overlaps, labels=assigned_gt_labels\n        )\n\n\n@BBOX_ASSIGNERS.register_module()\nclass HungarianAssigner3D(BaseAssigner):\n    def __init__(self,\n                 cls_cost=dict(type='ClassificationCost', weight=1.),\n                 reg_cost=dict(type='BBoxBEVL1Cost', weight=1.0),\n                 iou_cost=dict(type='IoU3DCost', weight=1.0),\n                 iou_calculator=dict(type='BboxOverlaps3D')\n                 ):\n        self.cls_cost = build_match_cost(cls_cost)\n        self.reg_cost = build_match_cost(reg_cost)\n        self.iou_cost = build_match_cost(iou_cost)\n        self.iou_calculator = build_iou_calculator(iou_calculator)\n\n    def assign(self, bboxes, gt_bboxes, gt_labels, cls_pred, train_cfg):\n        num_gts, num_bboxes = gt_bboxes.size(0), bboxes.size(0)\n\n        # 1. assign -1 by default\n        assigned_gt_inds = bboxes.new_full((num_bboxes,),\n                                           -1,\n                                           dtype=torch.long)\n        assigned_labels = bboxes.new_full((num_bboxes,),\n                                          -1,\n                                          dtype=torch.long)\n        if num_gts == 0 or num_bboxes == 0:\n            # No ground truth or boxes, return empty assignment\n            if num_gts == 0:\n                # No ground truth, assign all to background\n                assigned_gt_inds[:] = 0\n            return AssignResult(\n                num_gts, assigned_gt_inds, torch.zeros(assigned_gt_inds.shape[0]).to(assigned_gt_inds.device), labels=assigned_labels)\n            # return AssignResult(\n            #     num_gts, assigned_gt_inds, None, labels=assigned_labels)\n \n        # 2. compute the weighted costs\n        # see mmdetection/mmdet/core/bbox/match_costs/match_cost.py\n        cls_cost = self.cls_cost(cls_pred[0].T, gt_labels)\n        reg_cost = self.reg_cost(bboxes, gt_bboxes, train_cfg)\n        iou = self.iou_calculator(bboxes, gt_bboxes)\n        iou_cost = self.iou_cost(iou)\n\n        # weighted sum of above three costs\n        cost = cls_cost + reg_cost + iou_cost\n\n        # 3. do Hungarian matching on CPU using linear_sum_assignment\n        cost = cost.detach().cpu()\n        if linear_sum_assignment is None:\n            raise ImportError('Please run \"pip install scipy\" '\n                              'to install scipy first.')\n        matched_row_inds, matched_col_inds = linear_sum_assignment(cost)\n        matched_row_inds = torch.from_numpy(matched_row_inds).to(bboxes.device)\n        matched_col_inds = torch.from_numpy(matched_col_inds).to(bboxes.device)\n\n        # 4. assign backgrounds and foregrounds\n        # assign all indices to backgrounds first\n        assigned_gt_inds[:] = 0\n        # assign foregrounds based on matching results\n        assigned_gt_inds[matched_row_inds] = matched_col_inds + 1\n        assigned_labels[matched_row_inds] = gt_labels[matched_col_inds]\n\n        max_overlaps = torch.zeros_like(iou.max(1).values)\n        max_overlaps[matched_row_inds] = iou[matched_row_inds, matched_col_inds]\n        # max_overlaps = iou.max(1).values\n        return AssignResult(\n            num_gts, assigned_gt_inds, max_overlaps, labels=assigned_labels)\n\n\n@BBOX_ASSIGNERS.register_module()\nclass HungarianAssignerView2D(HungarianAssigner):\n    def __init__(self,\n                 cls_cost=dict(type='ClassificationCost', weight=1.),\n                 reg_cost=dict(type='BBoxL1Cost', weight=1.0),\n                 iou_cost=dict(type='IoUCost', iou_mode='giou', weight=1.0)):\n        super(HungarianAssignerView2D, self).__init__(cls_cost, reg_cost, iou_cost)\n        self.view_cost = ViewCost()\n\n    def assign(self,\n               bbox_pred,\n               cls_pred,\n               view,\n               gt_bboxes,\n               gt_labels,\n               img_w,\n               img_h,\n               gt_bboxes_ignore=None,\n               eps=1e-7):\n        \"\"\"Computes one-to-one matching based on the weighted costs.\n\n        This method assign each query prediction to a ground truth or\n        background. The `assigned_gt_inds` with -1 means don't care,\n        0 means negative sample, and positive number is the index (1-based)\n        of assigned gt.\n        The assignment is done in the following steps, the order matters.\n\n        1. assign every prediction to -1\n        2. compute the weighted costs\n        3. do Hungarian matching on CPU based on the costs\n        4. assign all to 0 (background) first, then for each matched pair\n           between predictions and gts, treat this prediction as foreground\n           and assign the corresponding gt index (plus 1) to it.\n\n        Args:\n            bbox_pred (Tensor): Predicted boxes with normalized coordinates\n                (cx, cy, w, h), which are all in range [0, 1]. Shape\n                [num_query, 4].\n            cls_pred (Tensor): Predicted classification logits, shape\n                [num_query, num_class].\n            gt_bboxes (Tensor): Ground truth boxes with unnormalized\n                coordinates (cx, cy, w, h). Shape [num_gt, 4].\n            gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).\n            img_meta (dict): Meta information for current image.\n            gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are\n                labelled as `ignored`. Default None.\n            eps (int | float, optional): A value added to the denominator for\n                numerical stability. Default 1e-7.\n\n        Returns:\n            :obj:`AssignResult`: The assigned result.\n        \"\"\"\n        assert gt_bboxes_ignore is None, \\\n            'Only case when gt_bboxes_ignore is None is supported.'\n        num_gts, num_bboxes = gt_bboxes.size(0), bbox_pred.size(0)\n        gt_bboxes = bbox_cxcywh_to_xyxy(gt_bboxes)\n        gt_views = gt_labels[..., 1]\n        gt_labels = gt_labels[..., 0]\n\n        # 1. assign -1 by default\n        assigned_gt_inds = bbox_pred.new_full((num_bboxes, ),\n                                              -1,\n                                              dtype=torch.long)\n        assigned_labels = bbox_pred.new_full((num_bboxes, ),\n                                             -1,\n                                             dtype=torch.long)\n        if num_gts == 0 or num_bboxes == 0:\n            # No ground truth or boxes, return empty assignment\n            if num_gts == 0:\n                # No ground truth, assign all to background\n                assigned_gt_inds[:] = 0\n            return AssignResult(\n                num_gts, assigned_gt_inds, None, labels=assigned_labels)\n        factor = gt_bboxes.new_tensor([img_w, img_h, img_w, img_h]).unsqueeze(0)\n\n        # 2. compute the weighted costs\n        # classification and bboxcost.\n        cls_cost = self.cls_cost(cls_pred, gt_labels)\n        # regression L1 cost\n        normalize_gt_bboxes = gt_bboxes / factor\n        reg_cost = self.reg_cost(bbox_pred, normalize_gt_bboxes)\n        # regression iou cost, defaultly giou is used in official DETR.\n        bboxes = bbox_cxcywh_to_xyxy(bbox_pred) * factor\n        iou_cost = self.iou_cost(bboxes, gt_bboxes)\n        iou = -iou_cost / self.iou_cost.weight\n\n        view_cost = self.view_cost(view, gt_views)\n        # weighted sum of above three costs\n        cost = cls_cost + reg_cost + iou_cost + view_cost\n\n        # 3. do Hungarian matching on CPU using linear_sum_assignment\n        cost = cost.detach().cpu()\n        if linear_sum_assignment is None:\n            raise ImportError('Please run \"pip install scipy\" '\n                              'to install scipy first.')\n        matched_row_inds, matched_col_inds = linear_sum_assignment(cost)\n        matched_row_inds = torch.from_numpy(matched_row_inds).to(\n            bbox_pred.device)\n        matched_col_inds = torch.from_numpy(matched_col_inds).to(\n            bbox_pred.device)\n\n        # 4. assign backgrounds and foregrounds\n        # assign all indices to backgrounds first\n        assigned_gt_inds[:] = 0\n        # assign foregrounds based on matching results\n        assigned_gt_inds[matched_row_inds] = matched_col_inds + 1\n        assigned_labels[matched_row_inds] = gt_labels[matched_col_inds]\n        max_overlaps = torch.zeros_like(iou.max(1).values)\n        max_overlaps[matched_row_inds] = iou[matched_row_inds, matched_col_inds]\n        return AssignResult(\n            num_gts, assigned_gt_inds, max_overlaps, labels=assigned_labels)\n\n@BBOX_ASSIGNERS.register_module()\nclass HungarianAssignerViewProj2D(HungarianAssigner):\n    def __init__(self,\n                 cls_cost=dict(type='ClassificationCost', weight=1.),\n                 reg_cost=dict(type='BBoxL1Cost', weight=1.0),\n                 iou_cost=dict(type='IoUCost', iou_mode='giou', weight=1.0)):\n        super(HungarianAssignerViewProj2D, self).__init__(cls_cost, reg_cost, iou_cost)\n        self.view_cost = ViewCost()\n\n    def assign(self,\n               bbox_pred,\n               cls_pred,\n               center_pred,\n               offset_pred,\n               view,\n               gt_bboxes,\n               gt_labels,\n               gt_centers,\n               gt_offsets,\n               img_w,\n               img_h,\n               gt_bboxes_ignore=None,\n               eps=1e-7):\n        \"\"\"Computes one-to-one matching based on the weighted costs.\n\n        This method assign each query prediction to a ground truth or\n        background. The `assigned_gt_inds` with -1 means don't care,\n        0 means negative sample, and positive number is the index (1-based)\n        of assigned gt.\n        The assignment is done in the following steps, the order matters.\n\n        1. assign every prediction to -1\n        2. compute the weighted costs\n        3. do Hungarian matching on CPU based on the costs\n        4. assign all to 0 (background) first, then for each matched pair\n           between predictions and gts, treat this prediction as foreground\n           and assign the corresponding gt index (plus 1) to it.\n\n        Args:\n            bbox_pred (Tensor): Predicted boxes with normalized coordinates\n                (cx, cy, w, h), which are all in range [0, 1]. Shape\n                [num_query, 4].\n            cls_pred (Tensor): Predicted classification logits, shape\n                [num_query, num_class].\n            gt_bboxes (Tensor): Ground truth boxes with unnormalized\n                coordinates (cx, cy, w, h). Shape [num_gt, 4].\n            gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).\n            img_meta (dict): Meta information for current image.\n            gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are\n                labelled as `ignored`. Default None.\n            eps (int | float, optional): A value added to the denominator for\n                numerical stability. Default 1e-7.\n\n        Returns:\n            :obj:`AssignResult`: The assigned result.\n        \"\"\"\n        assert gt_bboxes_ignore is None, \\\n            'Only case when gt_bboxes_ignore is None is supported.'\n        num_gts, num_bboxes = gt_bboxes.size(0), bbox_pred.size(0)\n        gt_bboxes = bbox_cxcywh_to_xyxy(gt_bboxes)\n        gt_views = gt_labels[..., 1]\n        gt_labels = gt_labels[..., 0]\n\n        # 1. assign -1 by default\n        assigned_gt_inds = bbox_pred.new_full((num_bboxes, ),\n                                              -1,\n                                              dtype=torch.long)\n        assigned_labels = bbox_pred.new_full((num_bboxes, ),\n                                             -1,\n                                             dtype=torch.long)\n        if num_gts == 0 or num_bboxes == 0:\n            # No ground truth or boxes, return empty assignment\n            if num_gts == 0:\n                # No ground truth, assign all to background\n                assigned_gt_inds[:] = 0\n            return AssignResult(\n                num_gts, assigned_gt_inds, torch.zeros(assigned_gt_inds.shape[0]).to(assigned_gt_inds.device), labels=assigned_labels)\n        factor = gt_bboxes.new_tensor([img_w, img_h, img_w, img_h]).unsqueeze(0)\n\n        # 2. compute the weighted costs\n        # classification and bboxcost.\n        cls_cost = self.cls_cost(cls_pred, gt_labels)\n        # regression L1 cost\n        # reg_cost = self.reg_cost(bbox_pred, normalize_gt_bboxes)\n\n        normalize_gt_centers = gt_centers / factor[:, :2]\n        reg_cost = self.reg_cost(center_pred, normalize_gt_centers)\n\n        normalize_gt_offsets = gt_offsets / factor\n        # reg_cost = reg_cost + self.reg_cost(offset_pred, normalize_gt_offsets) / 2\n        reg_cost = reg_cost + self.reg_cost(offset_pred, normalize_gt_offsets)\n\n        # regression iou cost, defaultly giou is used in official DETR.\n        bboxes = bbox_cxcywh_to_xyxy(bbox_pred) * factor\n        iou_cost = self.iou_cost(bboxes, gt_bboxes)\n        iou = -iou_cost / self.iou_cost.weight\n\n        view_cost = self.view_cost(view, gt_views)\n        # weighted sum of above three costs\n        cost = cls_cost + reg_cost + iou_cost + view_cost\n\n        # 3. do Hungarian matching on CPU using linear_sum_assignment\n        cost = cost.detach().cpu()\n        if linear_sum_assignment is None:\n            raise ImportError('Please run \"pip install scipy\" '\n                              'to install scipy first.')\n        matched_row_inds, matched_col_inds = linear_sum_assignment(cost)\n        matched_row_inds = torch.from_numpy(matched_row_inds).to(\n            bbox_pred.device)\n        matched_col_inds = torch.from_numpy(matched_col_inds).to(\n            bbox_pred.device)\n\n        # 4. assign backgrounds and foregrounds\n        # assign all indices to backgrounds first\n        assigned_gt_inds[:] = 0\n        # assign foregrounds based on matching results\n        assigned_gt_inds[matched_row_inds] = matched_col_inds + 1\n        assigned_labels[matched_row_inds] = gt_labels[matched_col_inds]\n\n        max_overlaps = torch.zeros_like(iou.max(1).values)\n        max_overlaps[matched_row_inds] = iou[matched_row_inds, matched_col_inds]\n\n        return AssignResult(\n            num_gts, assigned_gt_inds, max_overlaps, labels=assigned_labels)\n\n\nclass ViewCost:\n    def __init__(self, weight=1000):\n        self.weight = weight\n\n    def __call__(self, view_pred, gt_views):\n        view_cost = torch.cdist(view_pred.unsqueeze(-1).float(), gt_views.unsqueeze(-1).float(), p=1)\n        view_cost = torch.clamp(view_cost, max=1, min=0)\n        return view_cost * self.weight\n\n\n@BBOX_ASSIGNERS.register_module()\nclass HungarianAssignerCameraBox(BaseAssigner):\n    def __init__(self,\n                 cls_cost=dict(type='ClassificationCost', weight=1.),\n                 reg_cost=dict(type='BBoxBEVL1Cost', weight=1.0),\n                 iou_cost=dict(type='IoU3DCost', weight=1.0),\n                 iou_calculator=dict(type='BboxOverlaps3D')\n                 ):\n        self.cls_cost = build_match_cost(cls_cost)\n        self.reg_cost = build_match_cost(reg_cost)\n        self.iou_cost = build_match_cost(iou_cost)\n        self.iou_calculator = build_iou_calculator(iou_calculator)\n        self.view_cost = ViewCost()\n\n    def assign(self, bboxes, gt_bboxes, gt_labels, cls_pred, view, train_cfg):\n        num_gts, num_bboxes = gt_bboxes.size(0), bboxes.size(0)\n\n        # 1. assign -1 by default\n        assigned_gt_inds = bboxes.new_full((num_bboxes,),\n                                           -1,\n                                           dtype=torch.long)\n        assigned_labels = bboxes.new_full((num_bboxes,),\n                                          -1,\n                                          dtype=torch.long)\n        if num_gts == 0 or num_bboxes == 0:\n            # No ground truth or boxes, return empty assignment\n            if num_gts == 0:\n                # No ground truth, assign all to background\n                assigned_gt_inds[:] = 0\n\n            return AssignResult(\n                num_gts, assigned_gt_inds, torch.zeros(assigned_gt_inds.shape[0]).to(assigned_gt_inds.device), labels=assigned_labels)\n\n        # 2. compute the weighted costs\n        # see mmdetection/mmdet/core/bbox/match_costs/match_cost.py\n\n        gt_views = gt_labels[..., 1]\n        gt_labels = gt_labels[..., 0]\n\n        cls_cost = self.cls_cost(cls_pred[0].T, gt_labels)\n\n        reg_cost = self.reg_cost(bboxes, gt_bboxes, train_cfg)\n        iou = self.iou_calculator(bboxes, gt_bboxes)\n        iou_cost = self.iou_cost(iou)\n        view_cost = self.view_cost(view, gt_views)\n\n        # weighted sum of above three costs\n        cost = cls_cost + reg_cost + iou_cost + view_cost\n\n        # 3. do Hungarian matching on CPU using linear_sum_assignment\n        cost = cost.detach().cpu()\n        if linear_sum_assignment is None:\n            raise ImportError('Please run \"pip install scipy\" '\n                              'to install scipy first.')\n        matched_row_inds, matched_col_inds = linear_sum_assignment(cost)\n        matched_row_inds = torch.from_numpy(matched_row_inds).to(bboxes.device)\n        matched_col_inds = torch.from_numpy(matched_col_inds).to(bboxes.device)\n\n        # 4. assign backgrounds and foregrounds\n        # assign all indices to backgrounds first\n        assigned_gt_inds[:] = 0\n        # assign foregrounds based on matching results\n        assigned_gt_inds[matched_row_inds] = matched_col_inds + 1\n        assigned_labels[matched_row_inds] = gt_labels[matched_col_inds]\n\n        max_overlaps = torch.zeros_like(iou.max(1).values)\n        max_overlaps[matched_row_inds] = iou[matched_row_inds, matched_col_inds]\n        # max_overlaps = iou.max(1).values\n        return AssignResult(\n            num_gts, assigned_gt_inds, max_overlaps, labels=assigned_labels)\n"
  },
  {
    "path": "mmdet3d/core/bbox/box_np_ops.py",
    "content": "# TODO: clean the functions in this file and move the APIs into box structures\n# in the future\n\nimport numba\nimport numpy as np\n\n\ndef camera_to_lidar(points, r_rect, velo2cam):\n    \"\"\"Convert points in camera coordinate to lidar coordinate.\n\n    Args:\n        points (np.ndarray, shape=[N, 3]): Points in camera coordinate.\n        r_rect (np.ndarray, shape=[4, 4]): Matrix to project points in\n            specific camera coordinate (e.g. CAM2) to CAM0.\n        velo2cam (np.ndarray, shape=[4, 4]): Matrix to project points in\n            camera coordinate to lidar coordinate.\n\n    Returns:\n        np.ndarray, shape=[N, 3]: Points in lidar coordinate.\n    \"\"\"\n    points_shape = list(points.shape[0:-1])\n    if points.shape[-1] == 3:\n        points = np.concatenate([points, np.ones(points_shape + [1])], axis=-1)\n    lidar_points = points @ np.linalg.inv((r_rect @ velo2cam).T)\n    return lidar_points[..., :3]\n\n\ndef box_camera_to_lidar(data, r_rect, velo2cam):\n    \"\"\"Covert boxes in camera coordinate to lidar coordinate.\n\n    Args:\n        data (np.ndarray, shape=[N, 7]): Boxes in camera coordinate.\n        r_rect (np.ndarray, shape=[4, 4]): Matrix to project points in\n            specific camera coordinate (e.g. CAM2) to CAM0.\n        velo2cam (np.ndarray, shape=[4, 4]): Matrix to project points in\n            camera coordinate to lidar coordinate.\n\n    Returns:\n        np.ndarray, shape=[N, 3]: Boxes in lidar coordinate.\n    \"\"\"\n    xyz = data[:, 0:3]\n    l, h, w = data[:, 3:4], data[:, 4:5], data[:, 5:6]\n    r = data[:, 6:7]\n    xyz_lidar = camera_to_lidar(xyz, r_rect, velo2cam)\n    return np.concatenate([xyz_lidar, w, l, h, r], axis=1)\n\n\ndef corners_nd(dims, origin=0.5):\n    \"\"\"Generate relative box corners based on length per dim and origin point.\n\n    Args:\n        dims (np.ndarray, shape=[N, ndim]): Array of length per dim\n        origin (list or array or float): origin point relate to smallest point.\n\n    Returns:\n        np.ndarray, shape=[N, 2 ** ndim, ndim]: Returned corners.\n        point layout example: (2d) x0y0, x0y1, x1y0, x1y1;\n            (3d) x0y0z0, x0y0z1, x0y1z0, x0y1z1, x1y0z0, x1y0z1, x1y1z0, x1y1z1\n            where x0 < x1, y0 < y1, z0 < z1.\n    \"\"\"\n    ndim = int(dims.shape[1])\n    corners_norm = np.stack(\n        np.unravel_index(np.arange(2**ndim), [2] * ndim),\n        axis=1).astype(dims.dtype)\n    # now corners_norm has format: (2d) x0y0, x0y1, x1y0, x1y1\n    # (3d) x0y0z0, x0y0z1, x0y1z0, x0y1z1, x1y0z0, x1y0z1, x1y1z0, x1y1z1\n    # so need to convert to a format which is convenient to do other computing.\n    # for 2d boxes, format is clockwise start with minimum point\n    # for 3d boxes, please draw lines by your hand.\n    if ndim == 2:\n        # generate clockwise box corners\n        corners_norm = corners_norm[[0, 1, 3, 2]]\n    elif ndim == 3:\n        corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]\n    corners_norm = corners_norm - np.array(origin, dtype=dims.dtype)\n    corners = dims.reshape([-1, 1, ndim]) * corners_norm.reshape(\n        [1, 2**ndim, ndim])\n    return corners\n\n\ndef rotation_2d(points, angles):\n    \"\"\"Rotation 2d points based on origin point clockwise when angle positive.\n\n    Args:\n        points (np.ndarray): Points to be rotated with shape \\\n            (N, point_size, 2).\n        angles (np.ndarray): Rotation angle with shape (N).\n\n    Returns:\n        np.ndarray: Same shape as points.\n    \"\"\"\n    rot_sin = np.sin(angles)\n    rot_cos = np.cos(angles)\n    rot_mat_T = np.stack([[rot_cos, -rot_sin], [rot_sin, rot_cos]])\n    return np.einsum('aij,jka->aik', points, rot_mat_T)\n\n\ndef center_to_corner_box2d(centers, dims, angles=None, origin=0.5):\n    \"\"\"Convert kitti locations, dimensions and angles to corners.\n    format: center(xy), dims(xy), angles(clockwise when positive)\n\n    Args:\n        centers (np.ndarray): Locations in kitti label file with shape (N, 2).\n        dims (np.ndarray): Dimensions in kitti label file with shape (N, 2).\n        angles (np.ndarray): Rotation_y in kitti label file with shape (N).\n\n    Returns:\n        np.ndarray: Corners with the shape of (N, 4, 2).\n    \"\"\"\n    # 'length' in kitti format is in x axis.\n    # xyz(hwl)(kitti label file)<->xyz(lhw)(camera)<->z(-x)(-y)(wlh)(lidar)\n    # center in kitti format is [0.5, 1.0, 0.5] in xyz.\n    corners = corners_nd(dims, origin=origin)\n    # corners: [N, 4, 2]\n    if angles is not None:\n        corners = rotation_2d(corners, angles)\n    corners += centers.reshape([-1, 1, 2])\n    return corners\n\n\n@numba.jit(nopython=True)\ndef depth_to_points(depth, trunc_pixel):\n    \"\"\"Convert depth map to points.\n\n    Args:\n        depth (np.array, shape=[H, W]): Depth map which\n            the row of [0~`trunc_pixel`] are truncated.\n        trunc_pixel (int): The number of truncated row.\n\n    Returns:\n        np.ndarray: Points in camera coordinates.\n    \"\"\"\n    num_pts = np.sum(depth[trunc_pixel:, ] > 0.1)\n    points = np.zeros((num_pts, 3), dtype=depth.dtype)\n    x = np.array([0, 0, 1], dtype=depth.dtype)\n    k = 0\n    for i in range(trunc_pixel, depth.shape[0]):\n        for j in range(depth.shape[1]):\n            if depth[i, j] > 0.1:\n                x = np.array([j, i, 1], dtype=depth.dtype)\n                points[k] = x * depth[i, j]\n                k += 1\n    return points\n\n\ndef depth_to_lidar_points(depth, trunc_pixel, P2, r_rect, velo2cam):\n    \"\"\"Convert depth map to points in lidar coordinate.\n\n    Args:\n        depth (np.array, shape=[H, W]): Depth map which\n            the row of [0~`trunc_pixel`] are truncated.\n        trunc_pixel (int): The number of truncated row.\n        P2 (p.array, shape=[4, 4]): Intrinsics of Camera2.\n        r_rect (np.ndarray, shape=[4, 4]): Matrix to project points in\n            specific camera coordinate (e.g. CAM2) to CAM0.\n        velo2cam (np.ndarray, shape=[4, 4]): Matrix to project points in\n            camera coordinate to lidar coordinate.\n\n    Returns:\n        np.ndarray: Points in lidar coordinates.\n    \"\"\"\n    pts = depth_to_points(depth, trunc_pixel)\n    points_shape = list(pts.shape[0:-1])\n    points = np.concatenate([pts, np.ones(points_shape + [1])], axis=-1)\n    points = points @ np.linalg.inv(P2.T)\n    lidar_points = camera_to_lidar(points, r_rect, velo2cam)\n    return lidar_points\n\n\ndef rotation_3d_in_axis(points, angles, axis=0):\n    \"\"\"Rotate points in specific axis.\n\n    Args:\n        points (np.ndarray, shape=[N, point_size, 3]]):\n        angles (np.ndarray, shape=[N]]):\n        axis (int): Axis to rotate at.\n\n    Returns:\n        np.ndarray: Rotated points.\n    \"\"\"\n    # points: [N, point_size, 3]\n    rot_sin = np.sin(angles)\n    rot_cos = np.cos(angles)\n    ones = np.ones_like(rot_cos)\n    zeros = np.zeros_like(rot_cos)\n    if axis == 1:\n        rot_mat_T = np.stack([[rot_cos, zeros, -rot_sin], [zeros, ones, zeros],\n                              [rot_sin, zeros, rot_cos]])\n    elif axis == 2 or axis == -1:\n        rot_mat_T = np.stack([[rot_cos, -rot_sin, zeros],\n                              [rot_sin, rot_cos, zeros], [zeros, zeros, ones]])\n    elif axis == 0:\n        rot_mat_T = np.stack([[zeros, rot_cos, -rot_sin],\n                              [zeros, rot_sin, rot_cos], [ones, zeros, zeros]])\n    else:\n        raise ValueError('axis should in range')\n\n    return np.einsum('aij,jka->aik', points, rot_mat_T)\n\n\ndef center_to_corner_box3d(centers,\n                           dims,\n                           angles=None,\n                           origin=(0.5, 1.0, 0.5),\n                           axis=1):\n    \"\"\"Convert kitti locations, dimensions and angles to corners.\n\n    Args:\n        centers (np.ndarray): Locations in kitti label file with shape (N, 3).\n        dims (np.ndarray): Dimensions in kitti label file with shape (N, 3).\n        angles (np.ndarray): Rotation_y in kitti label file with shape (N).\n        origin (list or array or float): Origin point relate to smallest point.\n            use (0.5, 1.0, 0.5) in camera and (0.5, 0.5, 0) in lidar.\n        axis (int): Rotation axis. 1 for camera and 2 for lidar.\n\n    Returns:\n        np.ndarray: Corners with the shape of (N, 8, 3).\n    \"\"\"\n    # 'length' in kitti format is in x axis.\n    # yzx(hwl)(kitti label file)<->xyz(lhw)(camera)<->z(-x)(-y)(wlh)(lidar)\n    # center in kitti format is [0.5, 1.0, 0.5] in xyz.\n    corners = corners_nd(dims, origin=origin)\n    # corners: [N, 8, 3]\n    if angles is not None:\n        corners = rotation_3d_in_axis(corners, angles, axis=axis)\n    corners += centers.reshape([-1, 1, 3])\n    return corners\n\n\n@numba.jit(nopython=True)\ndef box2d_to_corner_jit(boxes):\n    \"\"\"Convert box2d to corner.\n\n    Args:\n        boxes (np.ndarray, shape=[N, 5]): Boxes2d with rotation.\n\n    Returns:\n        box_corners (np.ndarray, shape=[N, 4, 2]): Box corners.\n    \"\"\"\n    num_box = boxes.shape[0]\n    corners_norm = np.zeros((4, 2), dtype=boxes.dtype)\n    corners_norm[1, 1] = 1.0\n    corners_norm[2] = 1.0\n    corners_norm[3, 0] = 1.0\n    corners_norm -= np.array([0.5, 0.5], dtype=boxes.dtype)\n    corners = boxes.reshape(num_box, 1, 5)[:, :, 2:4] * corners_norm.reshape(\n        1, 4, 2)\n    rot_mat_T = np.zeros((2, 2), dtype=boxes.dtype)\n    box_corners = np.zeros((num_box, 4, 2), dtype=boxes.dtype)\n    for i in range(num_box):\n        rot_sin = np.sin(boxes[i, -1])\n        rot_cos = np.cos(boxes[i, -1])\n        rot_mat_T[0, 0] = rot_cos\n        rot_mat_T[0, 1] = -rot_sin\n        rot_mat_T[1, 0] = rot_sin\n        rot_mat_T[1, 1] = rot_cos\n        box_corners[i] = corners[i] @ rot_mat_T + boxes[i, :2]\n    return box_corners\n\n\n@numba.njit\ndef corner_to_standup_nd_jit(boxes_corner):\n    \"\"\"Convert boxes_corner to aligned (min-max) boxes.\n\n    Args:\n        boxes_corner (np.ndarray, shape=[N, 2**dim, dim]): Boxes corners.\n\n    Returns:\n        np.ndarray, shape=[N, dim*2]: Aligned (min-max) boxes.\n    \"\"\"\n    num_boxes = boxes_corner.shape[0]\n    ndim = boxes_corner.shape[-1]\n    result = np.zeros((num_boxes, ndim * 2), dtype=boxes_corner.dtype)\n    for i in range(num_boxes):\n        for j in range(ndim):\n            result[i, j] = np.min(boxes_corner[i, :, j])\n        for j in range(ndim):\n            result[i, j + ndim] = np.max(boxes_corner[i, :, j])\n    return result\n\n\n@numba.jit(nopython=True)\ndef corner_to_surfaces_3d_jit(corners):\n    \"\"\"Convert 3d box corners from corner function above to surfaces that\n    normal vectors all direct to internal.\n\n    Args:\n        corners (np.ndarray): 3d box corners with the shape of (N, 8, 3).\n\n    Returns:\n        np.ndarray: Surfaces with the shape of (N, 6, 4, 3).\n    \"\"\"\n    # box_corners: [N, 8, 3], must from corner functions in this module\n    num_boxes = corners.shape[0]\n    surfaces = np.zeros((num_boxes, 6, 4, 3), dtype=corners.dtype)\n    corner_idxes = np.array([\n        0, 1, 2, 3, 7, 6, 5, 4, 0, 3, 7, 4, 1, 5, 6, 2, 0, 4, 5, 1, 3, 2, 6, 7\n    ]).reshape(6, 4)\n    for i in range(num_boxes):\n        for j in range(6):\n            for k in range(4):\n                surfaces[i, j, k] = corners[i, corner_idxes[j, k]]\n    return surfaces\n\n\ndef rotation_points_single_angle(points, angle, axis=0):\n    \"\"\"Rotate points with a single angle.\n\n    Args:\n        points (np.ndarray, shape=[N, 3]]):\n        angles (np.ndarray, shape=[1]]):\n        axis (int): Axis to rotate at.\n\n    Returns:\n        np.ndarray: Rotated points.\n    \"\"\"\n    # points: [N, 3]\n    rot_sin = np.sin(angle)\n    rot_cos = np.cos(angle)\n    if axis == 1:\n        rot_mat_T = np.array(\n            [[rot_cos, 0, -rot_sin], [0, 1, 0], [rot_sin, 0, rot_cos]],\n            dtype=points.dtype)\n    elif axis == 2 or axis == -1:\n        rot_mat_T = np.array(\n            [[rot_cos, -rot_sin, 0], [rot_sin, rot_cos, 0], [0, 0, 1]],\n            dtype=points.dtype)\n    elif axis == 0:\n        rot_mat_T = np.array(\n            [[1, 0, 0], [0, rot_cos, -rot_sin], [0, rot_sin, rot_cos]],\n            dtype=points.dtype)\n    else:\n        raise ValueError('axis should in range')\n\n    return points @ rot_mat_T, rot_mat_T\n\n\ndef points_cam2img(points_3d, proj_mat,  with_depth=False):\n    \"\"\"Project points in camera coordinates to image coordinates.\n\n    Args:\n        points_3d (np.ndarray): Points in shape (N, 3)\n        proj_mat (np.ndarray): Transformation matrix between coordinates.\n\n    Returns:\n        np.ndarray: Points in image coordinates with shape [N, 2].\n    \"\"\"\n    points_shape = list(points_3d.shape)\n    points_shape[-1] = 1\n    points_4 = np.concatenate([points_3d, np.ones(points_shape)], axis=-1)\n\n    assert len(proj_mat.shape) == 2, 'The dimension of the projection'\\\n        f' matrix should be 2 instead of {len(proj_mat.shape)}.'\n    d1, d2 = proj_mat.shape[:2]\n    assert (d1 == 3 and d2 == 3) or (d1 == 3 and d2 == 4) or (\n        d1 == 4 and d2 == 4), 'The shape of the projection matrix'\\\n        f' ({d1}*{d2}) is not supported.'\n    if d1 == 3:\n        proj_mat_expanded = np.eye(4, dtype=proj_mat.dtype)\n        proj_mat_expanded[:d1, :d2] = proj_mat\n        proj_mat = proj_mat_expanded\n\n    point_2d = points_4 @ proj_mat.T\n    point_2d_res = point_2d[..., :2] / point_2d[..., 2:3]\n\n    if with_depth:\n        points_2d_depth = np.concatenate([point_2d_res, point_2d[..., 2:3]], axis=-1)\n        return points_2d_depth\n\n\n    return point_2d_res\n\n\ndef box3d_to_bbox(box3d, P2):\n    \"\"\"Convert box3d in camera coordinates to bbox in image coordinates.\n\n    Args:\n        box3d (np.ndarray, shape=[N, 7]): Boxes in camera coordinate.\n        P2 (np.array, shape=[4, 4]): Intrinsics of Camera2.\n\n    Returns:\n        np.ndarray, shape=[N, 4]: Boxes 2d in image coordinates.\n    \"\"\"\n    box_corners = center_to_corner_box3d(\n        box3d[:, :3], box3d[:, 3:6], box3d[:, 6], [0.5, 1.0, 0.5], axis=1)\n    box_corners_in_image = points_cam2img(box_corners, P2)\n    # box_corners_in_image: [N, 8, 2]\n    minxy = np.min(box_corners_in_image, axis=1)\n    maxxy = np.max(box_corners_in_image, axis=1)\n    bbox = np.concatenate([minxy, maxxy], axis=1)\n    return bbox\n\n\ndef corner_to_surfaces_3d(corners):\n    \"\"\"convert 3d box corners from corner function above to surfaces that\n    normal vectors all direct to internal.\n\n    Args:\n        corners (np.ndarray): 3D box corners with shape of (N, 8, 3).\n\n    Returns:\n        np.ndarray: Surfaces with the shape of (N, 6, 4, 3).\n    \"\"\"\n    # box_corners: [N, 8, 3], must from corner functions in this module\n    surfaces = np.array([\n        [corners[:, 0], corners[:, 1], corners[:, 2], corners[:, 3]],\n        [corners[:, 7], corners[:, 6], corners[:, 5], corners[:, 4]],\n        [corners[:, 0], corners[:, 3], corners[:, 7], corners[:, 4]],\n        [corners[:, 1], corners[:, 5], corners[:, 6], corners[:, 2]],\n        [corners[:, 0], corners[:, 4], corners[:, 5], corners[:, 1]],\n        [corners[:, 3], corners[:, 2], corners[:, 6], corners[:, 7]],\n    ]).transpose([2, 0, 1, 3])\n    return surfaces\n\n\ndef points_in_rbbox(points, rbbox, z_axis=2, origin=(0.5, 0.5, 0)):\n    \"\"\"Check points in rotated bbox and return indicces.\n\n    Args:\n        points (np.ndarray, shape=[N, 3+dim]): Points to query.\n        rbbox (np.ndarray, shape=[M, 7]): Boxes3d with rotation.\n        z_axis (int): Indicate which axis is height.\n        origin (tuple[int]): Indicate the position of box center.\n\n    Returns:\n        np.ndarray, shape=[N, M]: Indices of points in each box.\n    \"\"\"\n    # TODO: this function is different from PointCloud3D, be careful\n    # when start to use nuscene, check the input\n    rbbox_corners = center_to_corner_box3d(\n        rbbox[:, :3], rbbox[:, 3:6], rbbox[:, 6], origin=origin, axis=z_axis)\n    surfaces = corner_to_surfaces_3d(rbbox_corners)\n    indices = points_in_convex_polygon_3d_jit(points[:, :3], surfaces)\n    return indices\n\n\ndef minmax_to_corner_2d(minmax_box):\n    \"\"\"Convert minmax box to corners2d.\n\n    Args:\n        minmax_box (np.ndarray, shape=[N, dims]): minmax boxes.\n\n    Returns:\n        np.ndarray: 2d corners of boxes\n    \"\"\"\n    ndim = minmax_box.shape[-1] // 2\n    center = minmax_box[..., :ndim]\n    dims = minmax_box[..., ndim:] - center\n    return center_to_corner_box2d(center, dims, origin=0.0)\n\n\ndef limit_period(val, offset=0.5, period=np.pi):\n    \"\"\"Limit the value into a period for periodic function.\n\n    Args:\n        val (np.ndarray): The value to be converted.\n        offset (float, optional): Offset to set the value range. \\\n            Defaults to 0.5.\n        period (float, optional): Period of the value. Defaults to np.pi.\n\n    Returns:\n        torch.Tensor: Value in the range of \\\n            [-offset * period, (1-offset) * period]\n    \"\"\"\n    return val - np.floor(val / period + offset) * period\n\n\ndef create_anchors_3d_range(feature_size,\n                            anchor_range,\n                            sizes=((1.6, 3.9, 1.56), ),\n                            rotations=(0, np.pi / 2),\n                            dtype=np.float32):\n    \"\"\"Create anchors 3d by range.\n\n    Args:\n        feature_size (list[float] | tuple[float]): Feature map size. It is\n            either a list of a tuple of [D, H, W](in order of z, y, and x).\n        anchor_range (torch.Tensor | list[float]): Range of anchors with\n            shape [6]. The order is consistent with that of anchors, i.e.,\n            (x_min, y_min, z_min, x_max, y_max, z_max).\n        sizes (list[list] | np.ndarray | torch.Tensor): Anchor size with\n            shape [N, 3], in order of x, y, z.\n        rotations (list[float] | np.ndarray | torch.Tensor): Rotations of\n            anchors in a single feature grid.\n        dtype (type): Data type. Default to np.float32.\n\n    Returns:\n        np.ndarray: Range based anchors with shape of \\\n            (*feature_size, num_sizes, num_rots, 7).\n    \"\"\"\n    anchor_range = np.array(anchor_range, dtype)\n    z_centers = np.linspace(\n        anchor_range[2], anchor_range[5], feature_size[0], dtype=dtype)\n    y_centers = np.linspace(\n        anchor_range[1], anchor_range[4], feature_size[1], dtype=dtype)\n    x_centers = np.linspace(\n        anchor_range[0], anchor_range[3], feature_size[2], dtype=dtype)\n    sizes = np.reshape(np.array(sizes, dtype=dtype), [-1, 3])\n    rotations = np.array(rotations, dtype=dtype)\n    rets = np.meshgrid(\n        x_centers, y_centers, z_centers, rotations, indexing='ij')\n    tile_shape = [1] * 5\n    tile_shape[-2] = int(sizes.shape[0])\n    for i in range(len(rets)):\n        rets[i] = np.tile(rets[i][..., np.newaxis, :], tile_shape)\n        rets[i] = rets[i][..., np.newaxis]  # for concat\n    sizes = np.reshape(sizes, [1, 1, 1, -1, 1, 3])\n    tile_size_shape = list(rets[0].shape)\n    tile_size_shape[3] = 1\n    sizes = np.tile(sizes, tile_size_shape)\n    rets.insert(3, sizes)\n    ret = np.concatenate(rets, axis=-1)\n    return np.transpose(ret, [2, 1, 0, 3, 4, 5])\n\n\ndef center_to_minmax_2d(centers, dims, origin=0.5):\n    \"\"\"Center to minmax.\n\n    Args:\n        centers (np.ndarray): Center points.\n        dims (np.ndarray): Dimensions.\n        origin (list or array or float): origin point relate to smallest point.\n\n    Returns:\n        np.ndarray: Minmax points.\n    \"\"\"\n    if origin == 0.5:\n        return np.concatenate([centers - dims / 2, centers + dims / 2],\n                              axis=-1)\n    corners = center_to_corner_box2d(centers, dims, origin=origin)\n    return corners[:, [0, 2]].reshape([-1, 4])\n\n\ndef rbbox2d_to_near_bbox(rbboxes):\n    \"\"\"convert rotated bbox to nearest 'standing' or 'lying' bbox.\n\n    Args:\n        rbboxes (np.ndarray): Rotated bboxes with shape of \\\n            (N, 5(x, y, xdim, ydim, rad)).\n\n    Returns:\n        np.ndarray: Bounding boxes with the shpae of\n            (N, 4(xmin, ymin, xmax, ymax)).\n    \"\"\"\n    rots = rbboxes[..., -1]\n    rots_0_pi_div_2 = np.abs(limit_period(rots, 0.5, np.pi))\n    cond = (rots_0_pi_div_2 > np.pi / 4)[..., np.newaxis]\n    bboxes_center = np.where(cond, rbboxes[:, [0, 1, 3, 2]], rbboxes[:, :4])\n    bboxes = center_to_minmax_2d(bboxes_center[:, :2], bboxes_center[:, 2:])\n    return bboxes\n\n\n@numba.jit(nopython=True)\ndef iou_jit(boxes, query_boxes, mode='iou', eps=0.0):\n    \"\"\"Calculate box iou. Note that jit version runs ~10x faster than the\n    box_overlaps function in mmdet3d.core.evaluation.\n\n    Args:\n        boxes (np.ndarray): Input bounding boxes with shape of (N, 4).\n        query_boxes (np.ndarray): Query boxes with shape of (K, 4).\n\n    Returns:\n        np.ndarray: Overlap between boxes and query_boxes\n            with the shape of [N, K].\n    \"\"\"\n    N = boxes.shape[0]\n    K = query_boxes.shape[0]\n    overlaps = np.zeros((N, K), dtype=boxes.dtype)\n    for k in range(K):\n        box_area = ((query_boxes[k, 2] - query_boxes[k, 0] + eps) *\n                    (query_boxes[k, 3] - query_boxes[k, 1] + eps))\n        for n in range(N):\n            iw = (\n                min(boxes[n, 2], query_boxes[k, 2]) -\n                max(boxes[n, 0], query_boxes[k, 0]) + eps)\n            if iw > 0:\n                ih = (\n                    min(boxes[n, 3], query_boxes[k, 3]) -\n                    max(boxes[n, 1], query_boxes[k, 1]) + eps)\n                if ih > 0:\n                    if mode == 'iou':\n                        ua = ((boxes[n, 2] - boxes[n, 0] + eps) *\n                              (boxes[n, 3] - boxes[n, 1] + eps) + box_area -\n                              iw * ih)\n                    else:\n                        ua = ((boxes[n, 2] - boxes[n, 0] + eps) *\n                              (boxes[n, 3] - boxes[n, 1] + eps))\n                    overlaps[n, k] = iw * ih / ua\n    return overlaps\n\n\ndef projection_matrix_to_CRT_kitti(proj):\n    \"\"\"Split projection matrix of kitti.\n\n    P = C @ [R|T]\n    C is upper triangular matrix, so we need to inverse CR and use QR\n    stable for all kitti camera projection matrix.\n\n    Args:\n        proj (p.array, shape=[4, 4]): Intrinsics of camera.\n\n    Returns:\n        tuple[np.ndarray]: Splited matrix of C, R and T.\n    \"\"\"\n\n    CR = proj[0:3, 0:3]\n    CT = proj[0:3, 3]\n    RinvCinv = np.linalg.inv(CR)\n    Rinv, Cinv = np.linalg.qr(RinvCinv)\n    C = np.linalg.inv(Cinv)\n    R = np.linalg.inv(Rinv)\n    T = Cinv @ CT\n    return C, R, T\n\n\ndef remove_outside_points(points, rect, Trv2c, P2, image_shape):\n    \"\"\"Remove points which are outside of image.\n\n    Args:\n        points (np.ndarray, shape=[N, 3+dims]): Total points.\n        rect (np.ndarray, shape=[4, 4]): Matrix to project points in\n            specific camera coordinate (e.g. CAM2) to CAM0.\n        Trv2c (np.ndarray, shape=[4, 4]): Matrix to project points in\n            camera coordinate to lidar coordinate.\n        P2 (p.array, shape=[4, 4]): Intrinsics of Camera2.\n        image_shape (list[int]): Shape of image.\n\n    Returns:\n        np.ndarray, shape=[N, 3+dims]: Filtered points.\n    \"\"\"\n    # 5x faster than remove_outside_points_v1(2ms vs 10ms)\n    C, R, T = projection_matrix_to_CRT_kitti(P2)\n    image_bbox = [0, 0, image_shape[1], image_shape[0]]\n    frustum = get_frustum(image_bbox, C)\n    frustum -= T\n    frustum = np.linalg.inv(R) @ frustum.T\n    frustum = camera_to_lidar(frustum.T, rect, Trv2c)\n    frustum_surfaces = corner_to_surfaces_3d_jit(frustum[np.newaxis, ...])\n    indices = points_in_convex_polygon_3d_jit(points[:, :3], frustum_surfaces)\n    points = points[indices.reshape([-1])]\n    return points\n\n\ndef get_frustum(bbox_image, C, near_clip=0.001, far_clip=100):\n    \"\"\"Get frustum corners in camera coordinates.\n\n    Args:\n        bbox_image (list[int]): box in image coordinates.\n        C (np.ndarray): Intrinsics.\n        near_clip (float): Nearest distance of frustum.\n        far_clip (float): Farthest distance of frustum.\n\n    Returns:\n        np.ndarray, shape=[8, 3]: coordinates of frustum corners.\n    \"\"\"\n    fku = C[0, 0]\n    fkv = -C[1, 1]\n    u0v0 = C[0:2, 2]\n    z_points = np.array(\n        [near_clip] * 4 + [far_clip] * 4, dtype=C.dtype)[:, np.newaxis]\n    b = bbox_image\n    box_corners = np.array(\n        [[b[0], b[1]], [b[0], b[3]], [b[2], b[3]], [b[2], b[1]]],\n        dtype=C.dtype)\n    near_box_corners = (box_corners - u0v0) / np.array(\n        [fku / near_clip, -fkv / near_clip], dtype=C.dtype)\n    far_box_corners = (box_corners - u0v0) / np.array(\n        [fku / far_clip, -fkv / far_clip], dtype=C.dtype)\n    ret_xy = np.concatenate([near_box_corners, far_box_corners],\n                            axis=0)  # [8, 2]\n    ret_xyz = np.concatenate([ret_xy, z_points], axis=1)\n    return ret_xyz\n\n\ndef surface_equ_3d(polygon_surfaces):\n    \"\"\"\n\n    Args:\n        polygon_surfaces (np.ndarray): Polygon surfaces with shape of\n            [num_polygon, max_num_surfaces, max_num_points_of_surface, 3].\n            All surfaces' normal vector must direct to internal.\n            Max_num_points_of_surface must at least 3.\n\n    Returns:\n        tuple: normal vector and its direction.\n    \"\"\"\n    # return [a, b, c], d in ax+by+cz+d=0\n    # polygon_surfaces: [num_polygon, num_surfaces, num_points_of_polygon, 3]\n    surface_vec = polygon_surfaces[:, :, :2, :] - \\\n        polygon_surfaces[:, :, 1:3, :]\n    # normal_vec: [..., 3]\n    normal_vec = np.cross(surface_vec[:, :, 0, :], surface_vec[:, :, 1, :])\n    # print(normal_vec.shape, points[..., 0, :].shape)\n    # d = -np.inner(normal_vec, points[..., 0, :])\n    d = np.einsum('aij, aij->ai', normal_vec, polygon_surfaces[:, :, 0, :])\n    return normal_vec, -d\n\n\n@numba.njit\ndef _points_in_convex_polygon_3d_jit(points, polygon_surfaces, normal_vec, d,\n                                     num_surfaces):\n    \"\"\"\n    Args:\n        points (np.ndarray): Input points with shape of (num_points, 3).\n        polygon_surfaces (np.ndarray): Polygon surfaces with shape of\n            (num_polygon, max_num_surfaces, max_num_points_of_surface, 3).\n            All surfaces' normal vector must direct to internal.\n            Max_num_points_of_surface must at least 3.\n        normal_vec (np.ndarray): Normal vector of polygon_surfaces.\n        d (int): Directions of normal vector.\n        num_surfaces (np.ndarray): Number of surfaces a polygon contains\n            shape of (num_polygon).\n\n    Returns:\n        np.ndarray: Result matrix with the shape of [num_points, num_polygon].\n    \"\"\"\n    max_num_surfaces, max_num_points_of_surface = polygon_surfaces.shape[1:3]\n    num_points = points.shape[0]\n    num_polygons = polygon_surfaces.shape[0]\n    ret = np.ones((num_points, num_polygons), dtype=np.bool_)\n    sign = 0.0\n    for i in range(num_points):\n        for j in range(num_polygons):\n            for k in range(max_num_surfaces):\n                if k > num_surfaces[j]:\n                    break\n                sign = (\n                    points[i, 0] * normal_vec[j, k, 0] +\n                    points[i, 1] * normal_vec[j, k, 1] +\n                    points[i, 2] * normal_vec[j, k, 2] + d[j, k])\n                if sign >= 0:\n                    ret[i, j] = False\n                    break\n    return ret\n\n\ndef points_in_convex_polygon_3d_jit(points,\n                                    polygon_surfaces,\n                                    num_surfaces=None):\n    \"\"\"Check points is in 3d convex polygons.\n\n    Args:\n        points (np.ndarray): Input points with shape of (num_points, 3).\n        polygon_surfaces (np.ndarray): Polygon surfaces with shape of \\\n            (num_polygon, max_num_surfaces, max_num_points_of_surface, 3). \\\n            All surfaces' normal vector must direct to internal. \\\n            Max_num_points_of_surface must at least 3.\n        num_surfaces (np.ndarray): Number of surfaces a polygon contains \\\n            shape of (num_polygon).\n\n    Returns:\n        np.ndarray: Result matrix with the shape of [num_points, num_polygon].\n    \"\"\"\n    max_num_surfaces, max_num_points_of_surface = polygon_surfaces.shape[1:3]\n    # num_points = points.shape[0]\n    num_polygons = polygon_surfaces.shape[0]\n    if num_surfaces is None:\n        num_surfaces = np.full((num_polygons, ), 9999999, dtype=np.int64)\n    normal_vec, d = surface_equ_3d(polygon_surfaces[:, :, :3, :])\n    # normal_vec: [num_polygon, max_num_surfaces, 3]\n    # d: [num_polygon, max_num_surfaces]\n    return _points_in_convex_polygon_3d_jit(points, polygon_surfaces,\n                                            normal_vec, d, num_surfaces)\n\n\n@numba.jit\ndef points_in_convex_polygon_jit(points, polygon, clockwise=True):\n    \"\"\"Check points is in 2d convex polygons. True when point in polygon.\n\n    Args:\n        points (np.ndarray): Input points with the shape of [num_points, 2].\n        polygon (np.ndarray): Input polygon with the shape of\n            [num_polygon, num_points_of_polygon, 2].\n        clockwise (bool): Indicate polygon is clockwise.\n\n    Returns:\n        np.ndarray: Result matrix with the shape of [num_points, num_polygon].\n    \"\"\"\n    # first convert polygon to directed lines\n    num_points_of_polygon = polygon.shape[1]\n    num_points = points.shape[0]\n    num_polygons = polygon.shape[0]\n    # if clockwise:\n    #     vec1 = polygon - polygon[:, [num_points_of_polygon - 1] +\n    #                              list(range(num_points_of_polygon - 1)), :]\n    # else:\n    #     vec1 = polygon[:, [num_points_of_polygon - 1] +\n    #                    list(range(num_points_of_polygon - 1)), :] - polygon\n    # vec1: [num_polygon, num_points_of_polygon, 2]\n    vec1 = np.zeros((2), dtype=polygon.dtype)\n    ret = np.zeros((num_points, num_polygons), dtype=np.bool_)\n    success = True\n    cross = 0.0\n    for i in range(num_points):\n        for j in range(num_polygons):\n            success = True\n            for k in range(num_points_of_polygon):\n                if clockwise:\n                    vec1 = polygon[j, k] - polygon[j, k - 1]\n                else:\n                    vec1 = polygon[j, k - 1] - polygon[j, k]\n                cross = vec1[1] * (polygon[j, k, 0] - points[i, 0])\n                cross -= vec1[0] * (polygon[j, k, 1] - points[i, 1])\n                if cross >= 0:\n                    success = False\n                    break\n            ret[i, j] = success\n    return ret\n\n\ndef boxes3d_to_corners3d_lidar(boxes3d, bottom_center=True):\n    \"\"\"Convert kitti center boxes to corners.\n\n        7 -------- 4\n       /|         /|\n      6 -------- 5 .\n      | |        | |\n      . 3 -------- 0\n      |/         |/\n      2 -------- 1\n\n    Args:\n        boxes3d (np.ndarray): Boxes with shape of (N, 7) \\\n            [x, y, z, w, l, h, ry] in LiDAR coords, see the definition of ry \\\n            in KITTI dataset.\n        bottom_center (bool): Whether z is on the bottom center of object.\n\n    Returns:\n        np.ndarray: Box corners with the shape of [N, 8, 3].\n    \"\"\"\n    boxes_num = boxes3d.shape[0]\n    w, l, h = boxes3d[:, 3], boxes3d[:, 4], boxes3d[:, 5]\n    x_corners = np.array(\n        [w / 2., -w / 2., -w / 2., w / 2., w / 2., -w / 2., -w / 2., w / 2.],\n        dtype=np.float32).T\n    y_corners = np.array(\n        [-l / 2., -l / 2., l / 2., l / 2., -l / 2., -l / 2., l / 2., l / 2.],\n        dtype=np.float32).T\n    if bottom_center:\n        z_corners = np.zeros((boxes_num, 8), dtype=np.float32)\n        z_corners[:, 4:8] = h.reshape(boxes_num, 1).repeat(4, axis=1)  # (N, 8)\n    else:\n        z_corners = np.array([\n            -h / 2., -h / 2., -h / 2., -h / 2., h / 2., h / 2., h / 2., h / 2.\n        ],\n                             dtype=np.float32).T\n\n    ry = boxes3d[:, 6]\n    zeros, ones = np.zeros(\n        ry.size, dtype=np.float32), np.ones(\n            ry.size, dtype=np.float32)\n    rot_list = np.array([[np.cos(ry), -np.sin(ry), zeros],\n                         [np.sin(ry), np.cos(ry), zeros], [zeros, zeros,\n                                                           ones]])  # (3, 3, N)\n    R_list = np.transpose(rot_list, (2, 0, 1))  # (N, 3, 3)\n\n    temp_corners = np.concatenate((x_corners.reshape(\n        -1, 8, 1), y_corners.reshape(-1, 8, 1), z_corners.reshape(-1, 8, 1)),\n                                  axis=2)  # (N, 8, 3)\n    rotated_corners = np.matmul(temp_corners, R_list)  # (N, 8, 3)\n    x_corners = rotated_corners[:, :, 0]\n    y_corners = rotated_corners[:, :, 1]\n    z_corners = rotated_corners[:, :, 2]\n\n    x_loc, y_loc, z_loc = boxes3d[:, 0], boxes3d[:, 1], boxes3d[:, 2]\n\n    x = x_loc.reshape(-1, 1) + x_corners.reshape(-1, 8)\n    y = y_loc.reshape(-1, 1) + y_corners.reshape(-1, 8)\n    z = z_loc.reshape(-1, 1) + z_corners.reshape(-1, 8)\n\n    corners = np.concatenate(\n        (x.reshape(-1, 8, 1), y.reshape(-1, 8, 1), z.reshape(-1, 8, 1)),\n        axis=2)\n\n    return corners.astype(np.float32)\n"
  },
  {
    "path": "mmdet3d/core/bbox/coders/__init__.py",
    "content": "from mmdet.core.bbox import build_bbox_coder\nfrom .anchor_free_bbox_coder import AnchorFreeBBoxCoder\nfrom .centerpoint_bbox_coders import CenterPointBBoxCoder\nfrom .delta_xyzwhlr_bbox_coder import DeltaXYZWLHRBBoxCoder\nfrom .partial_bin_based_bbox_coder import PartialBinBasedBBoxCoder\nfrom .transfusion_bbox_coder import TransFusionBBoxCoder\nfrom .camera_bbox_coder import CameraBBoxCoder\n\n__all__ = [\n    'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'PartialBinBasedBBoxCoder',\n    'CenterPointBBoxCoder', 'AnchorFreeBBoxCoder', 'TransFusionBBoxCoder',\n    'CameraBBoxCoder'\n]\n"
  },
  {
    "path": "mmdet3d/core/bbox/coders/anchor_free_bbox_coder.py",
    "content": "import numpy as np\nimport torch\n\nfrom mmdet.core.bbox.builder import BBOX_CODERS\nfrom .partial_bin_based_bbox_coder import PartialBinBasedBBoxCoder\n\n\n@BBOX_CODERS.register_module()\nclass AnchorFreeBBoxCoder(PartialBinBasedBBoxCoder):\n    \"\"\"Anchor free bbox coder for 3D boxes.\n\n    Args:\n        num_dir_bins (int): Number of bins to encode direction angle.\n        with_rot (bool): Whether the bbox is with rotation.\n    \"\"\"\n\n    def __init__(self, num_dir_bins, with_rot=True):\n        super(AnchorFreeBBoxCoder, self).__init__(\n            num_dir_bins, 0, [], with_rot=with_rot)\n        self.num_dir_bins = num_dir_bins\n        self.with_rot = with_rot\n\n    def encode(self, gt_bboxes_3d, gt_labels_3d):\n        \"\"\"Encode ground truth to prediction targets.\n\n        Args:\n            gt_bboxes_3d (BaseInstance3DBoxes): Ground truth bboxes \\\n                with shape (n, 7).\n            gt_labels_3d (torch.Tensor): Ground truth classes.\n\n        Returns:\n            tuple: Targets of center, size and direction.\n        \"\"\"\n        # generate center target\n        center_target = gt_bboxes_3d.gravity_center\n\n        # generate bbox size target\n        size_res_target = gt_bboxes_3d.dims / 2\n\n        # generate dir target\n        box_num = gt_labels_3d.shape[0]\n        if self.with_rot:\n            (dir_class_target,\n             dir_res_target) = self.angle2class(gt_bboxes_3d.yaw)\n            dir_res_target /= (2 * np.pi / self.num_dir_bins)\n        else:\n            dir_class_target = gt_labels_3d.new_zeros(box_num)\n            dir_res_target = gt_bboxes_3d.tensor.new_zeros(box_num)\n\n        return (center_target, size_res_target, dir_class_target,\n                dir_res_target)\n\n    def decode(self, bbox_out):\n        \"\"\"Decode predicted parts to bbox3d.\n\n        Args:\n            bbox_out (dict): Predictions from model, should contain keys below.\n\n                - center: predicted bottom center of bboxes.\n                - dir_class: predicted bbox direction class.\n                - dir_res: predicted bbox direction residual.\n                - size: predicted bbox size.\n\n        Returns:\n            torch.Tensor: Decoded bbox3d with shape (batch, n, 7).\n        \"\"\"\n        center = bbox_out['center']\n        batch_size, num_proposal = center.shape[:2]\n\n        # decode heading angle\n        if self.with_rot:\n            dir_class = torch.argmax(bbox_out['dir_class'], -1)\n            dir_res = torch.gather(bbox_out['dir_res'], 2,\n                                   dir_class.unsqueeze(-1))\n            dir_res.squeeze_(2)\n            dir_angle = self.class2angle(dir_class, dir_res).reshape(\n                batch_size, num_proposal, 1)\n        else:\n            dir_angle = center.new_zeros(batch_size, num_proposal, 1)\n\n        # decode bbox size\n        bbox_size = torch.clamp(bbox_out['size'] * 2, min=0.1)\n\n        bbox3d = torch.cat([center, bbox_size, dir_angle], dim=-1)\n        return bbox3d\n\n    def split_pred(self, cls_preds, reg_preds, base_xyz):\n        \"\"\"Split predicted features to specific parts.\n\n        Args:\n            cls_preds (torch.Tensor): Class predicted features to split.\n            reg_preds (torch.Tensor): Regression predicted features to split.\n            base_xyz (torch.Tensor): Coordinates of points.\n\n        Returns:\n            dict[str, torch.Tensor]: Split results.\n        \"\"\"\n        results = {}\n        results['obj_scores'] = cls_preds\n\n        start, end = 0, 0\n        reg_preds_trans = reg_preds.transpose(2, 1)\n\n        # decode center\n        end += 3\n        # (batch_size, num_proposal, 3)\n        results['center_offset'] = reg_preds_trans[..., start:end]\n        results['center'] = base_xyz.detach() + reg_preds_trans[..., start:end]\n        start = end\n\n        # decode center\n        end += 3\n        # (batch_size, num_proposal, 3)\n        results['size'] = reg_preds_trans[..., start:end]\n        start = end\n\n        # decode direction\n        end += self.num_dir_bins\n        results['dir_class'] = reg_preds_trans[..., start:end]\n        start = end\n\n        end += self.num_dir_bins\n        dir_res_norm = reg_preds_trans[..., start:end]\n        start = end\n\n        results['dir_res_norm'] = dir_res_norm\n        results['dir_res'] = dir_res_norm * (2 * np.pi / self.num_dir_bins)\n\n        return results\n"
  },
  {
    "path": "mmdet3d/core/bbox/coders/camera_bbox_coder.py",
    "content": "import torch\r\n\r\nfrom mmdet.core.bbox import BaseBBoxCoder\r\nfrom mmdet.core.bbox.builder import BBOX_CODERS\r\n\r\n\r\n@BBOX_CODERS.register_module()\r\nclass CameraBBoxCoder(BaseBBoxCoder):\r\n    def __init__(self, code_size=8):\r\n        self.code_size = code_size\r\n\r\n    def encode(self, dst_boxes):\r\n        targets = torch.zeros([dst_boxes.shape[0], self.code_size]).to(dst_boxes.device)\r\n        targets[:, 3] = dst_boxes[:, 3].log()\r\n        targets[:, 4] = dst_boxes[:, 4].log()\r\n        targets[:, 5] = dst_boxes[:, 5].log()\r\n        targets[:, 6] = torch.sin(dst_boxes[:, 6])\r\n        targets[:, 7] = torch.cos(dst_boxes[:, 6])\r\n\r\n        targets[:, 0] = dst_boxes[:, 0]\r\n        targets[:, 1] = dst_boxes[:, 1] - 0.5 * dst_boxes[:, 4]\r\n        targets[:, 2] = dst_boxes[:, 2]\r\n\r\n        if self.code_size == 10:\r\n            targets[:, 8:10] = dst_boxes[:, 7:]\r\n        return targets\r\n\r\n    def decode(self, cls, rot, dim, center, vel):\r\n        \"\"\"Decode bboxes.\r\n\r\n        Args:\r\n            cls (torch.Tensor): Heatmap with the shape of [B, num_cls, num_proposals].\r\n            rot (torch.Tensor): Rotation with the shape of\r\n                [B, 2, num_proposals].\r\n            dim (torch.Tensor): Dim of the boxes with the shape of\r\n                [B, 3, num_proposals].\r\n            center (torch.Tensor): bev center of the boxes with the shape of\r\n                [B, 3, num_proposals]. (in feature map metric)\r\n            vel (torch.Tensor): Velocity with the shape of [B, 2, num_proposals].\r\n\r\n        Returns:\r\n            list[dict]: Decoded boxes.\r\n        \"\"\"\r\n        # class label\r\n        final_preds = cls.max(1, keepdims=False).indices\r\n        final_scores = cls.max(1, keepdims=False).values\r\n\r\n        dim[:, 0, :] = dim[:, 0, :].exp()\r\n        dim[:, 1, :] = dim[:, 1, :].exp()\r\n        dim[:, 2, :] = dim[:, 2, :].exp()\r\n        # dim = torch.exp(dim)\r\n        rots, rotc = rot[:, 0:1, :], rot[:, 1:2, :]\r\n        rot = torch.atan2(rots, rotc)\r\n\r\n        center = center.clone()\r\n        center[:, 1, :] = center[:, 1, :] + 0.5 * dim[:, 1, :]\r\n\r\n        if vel is None:\r\n            final_box_preds = torch.cat([center, dim, rot], dim=1).permute(0, 2, 1)\r\n        else:\r\n            final_box_preds = torch.cat([center, dim, rot, vel], dim=1).permute(0, 2, 1)\r\n\r\n        predictions_dicts = []\r\n        for i in range(cls.shape[0]):\r\n            boxes3d = final_box_preds[i]\r\n            scores = final_scores[i]\r\n            labels = final_preds[i]\r\n            predictions_dict = {\r\n                'bboxes': boxes3d,\r\n                'scores': scores,\r\n                'labels': labels\r\n            }\r\n            predictions_dicts.append(predictions_dict)\r\n\r\n        return predictions_dicts\r\n\r\n    @staticmethod\r\n    def decode_yaw(bbox, centers2d, cam2img):\r\n        bbox[:, 6] = torch.atan2(centers2d[:, 0] - cam2img[0, 2], cam2img[0, 0]) + bbox[:, 6]\r\n\r\n        return bbox\r\n"
  },
  {
    "path": "mmdet3d/core/bbox/coders/centerpoint_bbox_coders.py",
    "content": "import torch\n\nfrom mmdet.core.bbox import BaseBBoxCoder\nfrom mmdet.core.bbox.builder import BBOX_CODERS\n\n\n@BBOX_CODERS.register_module()\nclass CenterPointBBoxCoder(BaseBBoxCoder):\n    \"\"\"Bbox coder for CenterPoint.\n\n    Args:\n        pc_range (list[float]): Range of point cloud.\n        out_size_factor (int): Downsample factor of the model.\n        voxel_size (list[float]): Size of voxel.\n        post_center_range (list[float]): Limit of the center.\n            Default: None.\n        max_num (int): Max number to be kept. Default: 100.\n        score_threshold (float): Threshold to filter boxes based on score.\n            Default: None.\n        code_size (int): Code size of bboxes. Default: 9\n    \"\"\"\n\n    def __init__(self,\n                 pc_range,\n                 out_size_factor,\n                 voxel_size,\n                 post_center_range=None,\n                 max_num=100,\n                 score_threshold=None,\n                 code_size=9):\n\n        self.pc_range = pc_range\n        self.out_size_factor = out_size_factor\n        self.voxel_size = voxel_size\n        self.post_center_range = post_center_range\n        self.max_num = max_num\n        self.score_threshold = score_threshold\n        self.code_size = code_size\n\n    def _gather_feat(self, feats, inds, feat_masks=None):\n        \"\"\"Given feats and indexes, returns the gathered feats.\n\n        Args:\n            feats (torch.Tensor): Features to be transposed and gathered\n                with the shape of [B, 2, W, H].\n            inds (torch.Tensor): Indexes with the shape of [B, N].\n            feat_masks (torch.Tensor): Mask of the feats. Default: None.\n\n        Returns:\n            torch.Tensor: Gathered feats.\n        \"\"\"\n        dim = feats.size(2)\n        inds = inds.unsqueeze(2).expand(inds.size(0), inds.size(1), dim)\n        feats = feats.gather(1, inds)\n        if feat_masks is not None:\n            feat_masks = feat_masks.unsqueeze(2).expand_as(feats)\n            feats = feats[feat_masks]\n            feats = feats.view(-1, dim)\n        return feats\n\n    def _topk(self, scores, K=80):\n        \"\"\"Get indexes based on scores.\n\n        Args:\n            scores (torch.Tensor): scores with the shape of [B, N, W, H].\n            K (int): Number to be kept. Defaults to 80.\n\n        Returns:\n            tuple[torch.Tensor]\n                torch.Tensor: Selected scores with the shape of [B, K].\n                torch.Tensor: Selected indexes with the shape of [B, K].\n                torch.Tensor: Selected classes with the shape of [B, K].\n                torch.Tensor: Selected y coord with the shape of [B, K].\n                torch.Tensor: Selected x coord with the shape of [B, K].\n        \"\"\"\n        batch, cat, height, width = scores.size()\n\n        topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K)\n\n        topk_inds = topk_inds % (height * width)\n        topk_ys = (topk_inds.float() /\n                   torch.tensor(width, dtype=torch.float)).int().float()\n        topk_xs = (topk_inds % width).int().float()\n\n        topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K)\n        topk_clses = (topk_ind / torch.tensor(K, dtype=torch.float)).int()\n        topk_inds = self._gather_feat(topk_inds.view(batch, -1, 1),\n                                      topk_ind).view(batch, K)\n        topk_ys = self._gather_feat(topk_ys.view(batch, -1, 1),\n                                    topk_ind).view(batch, K)\n        topk_xs = self._gather_feat(topk_xs.view(batch, -1, 1),\n                                    topk_ind).view(batch, K)\n\n        return topk_score, topk_inds, topk_clses, topk_ys, topk_xs\n\n    def _transpose_and_gather_feat(self, feat, ind):\n        \"\"\"Given feats and indexes, returns the transposed and gathered feats.\n\n        Args:\n            feat (torch.Tensor): Features to be transposed and gathered\n                with the shape of [B, 2, W, H].\n            ind (torch.Tensor): Indexes with the shape of [B, N].\n\n        Returns:\n            torch.Tensor: Transposed and gathered feats.\n        \"\"\"\n        feat = feat.permute(0, 2, 3, 1).contiguous()\n        feat = feat.view(feat.size(0), -1, feat.size(3))\n        feat = self._gather_feat(feat, ind)\n        return feat\n\n    def encode(self):\n        pass\n\n    def decode(self,\n               heat,\n               rot_sine,\n               rot_cosine,\n               hei,\n               dim,\n               vel,\n               reg=None,\n               task_id=-1):\n        \"\"\"Decode bboxes.\n\n        Args:\n            heat (torch.Tensor): Heatmap with the shape of [B, N, W, H].\n            rot_sine (torch.Tensor): Sine of rotation with the shape of\n                [B, 1, W, H].\n            rot_cosine (torch.Tensor): Cosine of rotation with the shape of\n                [B, 1, W, H].\n            hei (torch.Tensor): Height of the boxes with the shape\n                of [B, 1, W, H].\n            dim (torch.Tensor): Dim of the boxes with the shape of\n                [B, 1, W, H].\n            vel (torch.Tensor): Velocity with the shape of [B, 1, W, H].\n            reg (torch.Tensor): Regression value of the boxes in 2D with\n                the shape of [B, 2, W, H]. Default: None.\n            task_id (int): Index of task. Default: -1.\n\n        Returns:\n            list[dict]: Decoded boxes.\n        \"\"\"\n        batch, cat, _, _ = heat.size()\n\n        scores, inds, clses, ys, xs = self._topk(heat, K=self.max_num)\n\n        if reg is not None:\n            reg = self._transpose_and_gather_feat(reg, inds)\n            reg = reg.view(batch, self.max_num, 2)\n            xs = xs.view(batch, self.max_num, 1) + reg[:, :, 0:1]\n            ys = ys.view(batch, self.max_num, 1) + reg[:, :, 1:2]\n        else:\n            xs = xs.view(batch, self.max_num, 1) + 0.5\n            ys = ys.view(batch, self.max_num, 1) + 0.5\n\n        # rotation value and direction label\n        rot_sine = self._transpose_and_gather_feat(rot_sine, inds)\n        rot_sine = rot_sine.view(batch, self.max_num, 1)\n\n        rot_cosine = self._transpose_and_gather_feat(rot_cosine, inds)\n        rot_cosine = rot_cosine.view(batch, self.max_num, 1)\n        rot = torch.atan2(rot_sine, rot_cosine)\n\n        # height in the bev\n        hei = self._transpose_and_gather_feat(hei, inds)\n        hei = hei.view(batch, self.max_num, 1)\n\n        # dim of the box\n        dim = self._transpose_and_gather_feat(dim, inds)\n        dim = dim.view(batch, self.max_num, 3)\n\n        # class label\n        clses = clses.view(batch, self.max_num).float()\n        scores = scores.view(batch, self.max_num)\n\n        xs = xs.view(\n            batch, self.max_num,\n            1) * self.out_size_factor * self.voxel_size[0] + self.pc_range[0]\n        ys = ys.view(\n            batch, self.max_num,\n            1) * self.out_size_factor * self.voxel_size[1] + self.pc_range[1]\n\n        if vel is None:  # KITTI FORMAT\n            final_box_preds = torch.cat([xs, ys, hei, dim, rot], dim=2)\n        else:  # exist velocity, nuscene format\n            vel = self._transpose_and_gather_feat(vel, inds)\n            vel = vel.view(batch, self.max_num, 2)\n            final_box_preds = torch.cat([xs, ys, hei, dim, rot, vel], dim=2)\n\n        final_scores = scores\n        final_preds = clses\n\n        # use score threshold\n        if self.score_threshold is not None:\n            thresh_mask = final_scores > self.score_threshold\n\n        if self.post_center_range is not None:\n            self.post_center_range = torch.tensor(\n                self.post_center_range, device=heat.device)\n            mask = (final_box_preds[..., :3] >=\n                    self.post_center_range[:3]).all(2)\n            mask &= (final_box_preds[..., :3] <=\n                     self.post_center_range[3:]).all(2)\n\n            predictions_dicts = []\n            for i in range(batch):\n                cmask = mask[i, :]\n                if self.score_threshold:\n                    cmask &= thresh_mask[i]\n\n                boxes3d = final_box_preds[i, cmask]\n                scores = final_scores[i, cmask]\n                labels = final_preds[i, cmask]\n                predictions_dict = {\n                    'bboxes': boxes3d,\n                    'scores': scores,\n                    'labels': labels\n                }\n\n                predictions_dicts.append(predictions_dict)\n        else:\n            raise NotImplementedError(\n                'Need to reorganize output as a batch, only '\n                'support post_center_range is not None for now!')\n\n        return predictions_dicts\n"
  },
  {
    "path": "mmdet3d/core/bbox/coders/delta_xyzwhlr_bbox_coder.py",
    "content": "import torch\n\nfrom mmdet.core.bbox import BaseBBoxCoder\nfrom mmdet.core.bbox.builder import BBOX_CODERS\n\n\n@BBOX_CODERS.register_module()\nclass DeltaXYZWLHRBBoxCoder(BaseBBoxCoder):\n    \"\"\"Bbox Coder for 3D boxes.\n\n    Args:\n        code_size (int): The dimension of boxes to be encoded.\n    \"\"\"\n\n    def __init__(self, code_size=7):\n        super(DeltaXYZWLHRBBoxCoder, self).__init__()\n        self.code_size = code_size\n\n    @staticmethod\n    def encode(src_boxes, dst_boxes):\n        \"\"\"Get box regression transformation deltas (dx, dy, dz, dw, dh, dl,\n        dr, dv*) that can be used to transform the `src_boxes` into the\n        `target_boxes`.\n\n        Args:\n            src_boxes (torch.Tensor): source boxes, e.g., object proposals.\n            dst_boxes (torch.Tensor): target of the transformation, e.g.,\n                ground-truth boxes.\n\n        Returns:\n            torch.Tensor: Box transformation deltas.\n        \"\"\"\n        box_ndim = src_boxes.shape[-1]\n        cas, cgs, cts = [], [], []\n        if box_ndim > 7:\n            xa, ya, za, wa, la, ha, ra, *cas = torch.split(\n                src_boxes, 1, dim=-1)\n            xg, yg, zg, wg, lg, hg, rg, *cgs = torch.split(\n                dst_boxes, 1, dim=-1)\n            cts = [g - a for g, a in zip(cgs, cas)]\n        else:\n            xa, ya, za, wa, la, ha, ra = torch.split(src_boxes, 1, dim=-1)\n            xg, yg, zg, wg, lg, hg, rg = torch.split(dst_boxes, 1, dim=-1)\n        za = za + ha / 2\n        zg = zg + hg / 2\n        diagonal = torch.sqrt(la**2 + wa**2)\n        xt = (xg - xa) / diagonal\n        yt = (yg - ya) / diagonal\n        zt = (zg - za) / ha\n        lt = torch.log(lg / la)\n        wt = torch.log(wg / wa)\n        ht = torch.log(hg / ha)\n        rt = rg - ra\n        return torch.cat([xt, yt, zt, wt, lt, ht, rt, *cts], dim=-1)\n\n    @staticmethod\n    def decode(anchors, deltas):\n        \"\"\"Apply transformation `deltas` (dx, dy, dz, dw, dh, dl, dr, dv*) to\n        `boxes`.\n\n        Args:\n            anchors (torch.Tensor): Parameters of anchors with shape (N, 7).\n            deltas (torch.Tensor): Encoded boxes with shape\n                (N, 7+n) [x, y, z, w, l, h, r, velo*].\n\n        Returns:\n            torch.Tensor: Decoded boxes.\n        \"\"\"\n        cas, cts = [], []\n        box_ndim = anchors.shape[-1]\n        if box_ndim > 7:\n            xa, ya, za, wa, la, ha, ra, *cas = torch.split(anchors, 1, dim=-1)\n            xt, yt, zt, wt, lt, ht, rt, *cts = torch.split(deltas, 1, dim=-1)\n        else:\n            xa, ya, za, wa, la, ha, ra = torch.split(anchors, 1, dim=-1)\n            xt, yt, zt, wt, lt, ht, rt = torch.split(deltas, 1, dim=-1)\n\n        za = za + ha / 2\n        diagonal = torch.sqrt(la**2 + wa**2)\n        xg = xt * diagonal + xa\n        yg = yt * diagonal + ya\n        zg = zt * ha + za\n\n        lg = torch.exp(lt) * la\n        wg = torch.exp(wt) * wa\n        hg = torch.exp(ht) * ha\n        rg = rt + ra\n        zg = zg - hg / 2\n        cgs = [t + a for t, a in zip(cts, cas)]\n        return torch.cat([xg, yg, zg, wg, lg, hg, rg, *cgs], dim=-1)\n"
  },
  {
    "path": "mmdet3d/core/bbox/coders/partial_bin_based_bbox_coder.py",
    "content": "import numpy as np\nimport torch\n\nfrom mmdet.core.bbox import BaseBBoxCoder\nfrom mmdet.core.bbox.builder import BBOX_CODERS\n\n\n@BBOX_CODERS.register_module()\nclass PartialBinBasedBBoxCoder(BaseBBoxCoder):\n    \"\"\"Partial bin based bbox coder.\n\n    Args:\n        num_dir_bins (int): Number of bins to encode direction angle.\n        num_sizes (int): Number of size clusters.\n        mean_sizes (list[list[int]]): Mean size of bboxes in each class.\n        with_rot (bool): Whether the bbox is with rotation.\n    \"\"\"\n\n    def __init__(self, num_dir_bins, num_sizes, mean_sizes, with_rot=True):\n        super(PartialBinBasedBBoxCoder, self).__init__()\n        assert len(mean_sizes) == num_sizes\n        self.num_dir_bins = num_dir_bins\n        self.num_sizes = num_sizes\n        self.mean_sizes = mean_sizes\n        self.with_rot = with_rot\n\n    def encode(self, gt_bboxes_3d, gt_labels_3d):\n        \"\"\"Encode ground truth to prediction targets.\n\n        Args:\n            gt_bboxes_3d (BaseInstance3DBoxes): Ground truth bboxes \\\n                with shape (n, 7).\n            gt_labels_3d (torch.Tensor): Ground truth classes.\n\n        Returns:\n            tuple: Targets of center, size and direction.\n        \"\"\"\n        # generate center target\n        center_target = gt_bboxes_3d.gravity_center\n\n        # generate bbox size target\n        size_class_target = gt_labels_3d\n        size_res_target = gt_bboxes_3d.dims - gt_bboxes_3d.tensor.new_tensor(\n            self.mean_sizes)[size_class_target]\n\n        # generate dir target\n        box_num = gt_labels_3d.shape[0]\n        if self.with_rot:\n            (dir_class_target,\n             dir_res_target) = self.angle2class(gt_bboxes_3d.yaw)\n        else:\n            dir_class_target = gt_labels_3d.new_zeros(box_num)\n            dir_res_target = gt_bboxes_3d.tensor.new_zeros(box_num)\n\n        return (center_target, size_class_target, size_res_target,\n                dir_class_target, dir_res_target)\n\n    def decode(self, bbox_out, suffix=''):\n        \"\"\"Decode predicted parts to bbox3d.\n\n        Args:\n            bbox_out (dict): Predictions from model, should contain keys below.\n\n                - center: predicted bottom center of bboxes.\n                - dir_class: predicted bbox direction class.\n                - dir_res: predicted bbox direction residual.\n                - size_class: predicted bbox size class.\n                - size_res: predicted bbox size residual.\n            suffix (str): Decode predictions with specific suffix.\n\n        Returns:\n            torch.Tensor: Decoded bbox3d with shape (batch, n, 7).\n        \"\"\"\n        center = bbox_out['center' + suffix]\n        batch_size, num_proposal = center.shape[:2]\n\n        # decode heading angle\n        if self.with_rot:\n            dir_class = torch.argmax(bbox_out['dir_class' + suffix], -1)\n            dir_res = torch.gather(bbox_out['dir_res' + suffix], 2,\n                                   dir_class.unsqueeze(-1))\n            dir_res.squeeze_(2)\n            dir_angle = self.class2angle(dir_class, dir_res).reshape(\n                batch_size, num_proposal, 1)\n        else:\n            dir_angle = center.new_zeros(batch_size, num_proposal, 1)\n\n        # decode bbox size\n        size_class = torch.argmax(\n            bbox_out['size_class' + suffix], -1, keepdim=True)\n        size_res = torch.gather(bbox_out['size_res' + suffix], 2,\n                                size_class.unsqueeze(-1).repeat(1, 1, 1, 3))\n        mean_sizes = center.new_tensor(self.mean_sizes)\n        size_base = torch.index_select(mean_sizes, 0, size_class.reshape(-1))\n        bbox_size = size_base.reshape(batch_size, num_proposal,\n                                      -1) + size_res.squeeze(2)\n\n        bbox3d = torch.cat([center, bbox_size, dir_angle], dim=-1)\n        return bbox3d\n\n    def decode_corners(self, center, size_res, size_class):\n        \"\"\"Decode center, size residuals and class to corners. Only useful for\n        axis-aligned bounding boxes, so angle isn't considered.\n\n        Args:\n            center (torch.Tensor): Shape [B, N, 3]\n            size_res (torch.Tensor): Shape [B, N, 3] or [B, N, C, 3]\n            size_class (torch.Tensor): Shape: [B, N] or [B, N, 1]\n            or [B, N, C, 3]\n\n        Returns:\n            torch.Tensor: Corners with shape [B, N, 6]\n        \"\"\"\n        if len(size_class.shape) == 2 or size_class.shape[-1] == 1:\n            batch_size, proposal_num = size_class.shape[:2]\n            one_hot_size_class = size_res.new_zeros(\n                (batch_size, proposal_num, self.num_sizes))\n            if len(size_class.shape) == 2:\n                size_class = size_class.unsqueeze(-1)\n            one_hot_size_class.scatter_(2, size_class, 1)\n            one_hot_size_class_expand = one_hot_size_class.unsqueeze(\n                -1).repeat(1, 1, 1, 3).contiguous()\n        else:\n            one_hot_size_class_expand = size_class\n\n        if len(size_res.shape) == 4:\n            size_res = torch.sum(size_res * one_hot_size_class_expand, 2)\n\n        mean_sizes = size_res.new_tensor(self.mean_sizes)\n        mean_sizes = torch.sum(mean_sizes * one_hot_size_class_expand, 2)\n        size_full = (size_res + 1) * mean_sizes\n        size_full = torch.clamp(size_full, 0)\n        half_size_full = size_full / 2\n        corner1 = center - half_size_full\n        corner2 = center + half_size_full\n        corners = torch.cat([corner1, corner2], dim=-1)\n        return corners\n\n    def split_pred(self, cls_preds, reg_preds, base_xyz):\n        \"\"\"Split predicted features to specific parts.\n\n        Args:\n            cls_preds (torch.Tensor): Class predicted features to split.\n            reg_preds (torch.Tensor): Regression predicted features to split.\n            base_xyz (torch.Tensor): Coordinates of points.\n\n        Returns:\n            dict[str, torch.Tensor]: Split results.\n        \"\"\"\n        results = {}\n        start, end = 0, 0\n\n        cls_preds_trans = cls_preds.transpose(2, 1)\n        reg_preds_trans = reg_preds.transpose(2, 1)\n\n        # decode center\n        end += 3\n        # (batch_size, num_proposal, 3)\n        results['center'] = base_xyz + \\\n            reg_preds_trans[..., start:end].contiguous()\n        start = end\n\n        # decode direction\n        end += self.num_dir_bins\n        results['dir_class'] = reg_preds_trans[..., start:end].contiguous()\n        start = end\n\n        end += self.num_dir_bins\n        dir_res_norm = reg_preds_trans[..., start:end].contiguous()\n        start = end\n\n        results['dir_res_norm'] = dir_res_norm\n        results['dir_res'] = dir_res_norm * (np.pi / self.num_dir_bins)\n\n        # decode size\n        end += self.num_sizes\n        results['size_class'] = reg_preds_trans[..., start:end].contiguous()\n        start = end\n\n        end += self.num_sizes * 3\n        size_res_norm = reg_preds_trans[..., start:end]\n        batch_size, num_proposal = reg_preds_trans.shape[:2]\n        size_res_norm = size_res_norm.view(\n            [batch_size, num_proposal, self.num_sizes, 3])\n        start = end\n\n        results['size_res_norm'] = size_res_norm.contiguous()\n        mean_sizes = reg_preds.new_tensor(self.mean_sizes)\n        results['size_res'] = (\n            size_res_norm * mean_sizes.unsqueeze(0).unsqueeze(0))\n\n        # decode objectness score\n        start = 0\n        end = 2\n        results['obj_scores'] = cls_preds_trans[..., start:end].contiguous()\n        start = end\n\n        # decode semantic score\n        results['sem_scores'] = cls_preds_trans[..., start:].contiguous()\n\n        return results\n\n    def angle2class(self, angle):\n        \"\"\"Convert continuous angle to a discrete class and a residual.\n\n        Convert continuous angle to a discrete class and a small\n        regression number from class center angle to current angle.\n\n        Args:\n            angle (torch.Tensor): Angle is from 0-2pi (or -pi~pi),\n                class center at 0, 1*(2pi/N), 2*(2pi/N) ...  (N-1)*(2pi/N).\n\n        Returns:\n            tuple: Encoded discrete class and residual.\n        \"\"\"\n        angle = angle % (2 * np.pi)\n        angle_per_class = 2 * np.pi / float(self.num_dir_bins)\n        shifted_angle = (angle + angle_per_class / 2) % (2 * np.pi)\n        angle_cls = shifted_angle // angle_per_class\n        angle_res = shifted_angle - (\n            angle_cls * angle_per_class + angle_per_class / 2)\n        return angle_cls.long(), angle_res\n\n    def class2angle(self, angle_cls, angle_res, limit_period=True):\n        \"\"\"Inverse function to angle2class.\n\n        Args:\n            angle_cls (torch.Tensor): Angle class to decode.\n            angle_res (torch.Tensor): Angle residual to decode.\n            limit_period (bool): Whether to limit angle to [-pi, pi].\n\n        Returns:\n            torch.Tensor: Angle decoded from angle_cls and angle_res.\n        \"\"\"\n        angle_per_class = 2 * np.pi / float(self.num_dir_bins)\n        angle_center = angle_cls.float() * angle_per_class\n        angle = angle_center + angle_res\n        if limit_period:\n            angle[angle > np.pi] -= 2 * np.pi\n        return angle\n"
  },
  {
    "path": "mmdet3d/core/bbox/coders/transfusion_bbox_coder.py",
    "content": "import torch\n\nfrom mmdet.core.bbox import BaseBBoxCoder\nfrom mmdet.core.bbox.builder import BBOX_CODERS\n\n\n@BBOX_CODERS.register_module()\nclass TransFusionBBoxCoder(BaseBBoxCoder):\n    def __init__(self,\n                 pc_range,\n                 out_size_factor,\n                 voxel_size,\n                 post_center_range=None,\n                 score_threshold=None,\n                 code_size=8,\n                 ):\n        self.pc_range = pc_range\n        self.out_size_factor = out_size_factor\n        self.voxel_size = voxel_size\n        self.post_center_range = post_center_range\n        self.score_threshold = score_threshold\n        self.code_size = code_size\n\n    def encode(self, dst_boxes):\n        targets = torch.zeros([dst_boxes.shape[0], self.code_size]).to(dst_boxes.device)\n        targets[:, 0] = (dst_boxes[:, 0] - self.pc_range[0]) / (self.out_size_factor * self.voxel_size[0])\n        targets[:, 1] = (dst_boxes[:, 1] - self.pc_range[1]) / (self.out_size_factor * self.voxel_size[1])\n        # targets[:, 2] = (dst_boxes[:, 2] - self.post_center_range[2]) / (self.post_center_range[5] - self.post_center_range[2])\n        targets[:, 3] = dst_boxes[:, 3].log()\n        targets[:, 4] = dst_boxes[:, 4].log()\n        targets[:, 5] = dst_boxes[:, 5].log()\n        targets[:, 2] = dst_boxes[:, 2] + dst_boxes[:, 5] * 0.5  # bottom center to gravity center\n        targets[:, 6] = torch.sin(dst_boxes[:, 6])\n        targets[:, 7] = torch.cos(dst_boxes[:, 6])\n        if self.code_size == 10:\n            targets[:, 8:10] = dst_boxes[:, 7:]\n        return targets\n\n    def decode(self, heatmap, rot, dim, center, height, vel, filter=False):\n        \"\"\"Decode bboxes.\n\n        Args:\n            heat (torch.Tensor): Heatmap with the shape of [B, num_cls, num_proposals].\n            rot (torch.Tensor): Rotation with the shape of\n                [B, 1, num_proposals].\n            dim (torch.Tensor): Dim of the boxes with the shape of\n                [B, 3, num_proposals].\n            center (torch.Tensor): bev center of the boxes with the shape of\n                [B, 2, num_proposals]. (in feature map metric)\n            hieght (torch.Tensor): height of the boxes with the shape of\n                [B, 2, num_proposals]. (in real world metric)\n            vel (torch.Tensor): Velocity with the shape of [B, 2, num_proposals].\n            filter: if False, return all box without checking score and center_range\n\n        Returns:\n            list[dict]: Decoded boxes.\n        \"\"\"\n        # class label\n        final_preds = heatmap.max(1, keepdims=False).indices\n        final_scores = heatmap.max(1, keepdims=False).values\n\n        # change size to real world metric\n        center[:, 0, :] = center[:, 0, :] * self.out_size_factor * self.voxel_size[0] + self.pc_range[0]\n        center[:, 1, :] = center[:, 1, :] * self.out_size_factor * self.voxel_size[1] + self.pc_range[1]\n        # center[:, 2, :] = center[:, 2, :] * (self.post_center_range[5] - self.post_center_range[2]) + self.post_center_range[2]\n        dim[:, 0, :] = dim[:, 0, :].exp()\n        dim[:, 1, :] = dim[:, 1, :].exp()\n        dim[:, 2, :] = dim[:, 2, :].exp()\n        height = height - dim[:, 2:3, :] * 0.5  # gravity center to bottom center\n        rots, rotc = rot[:, 0:1, :], rot[:, 1:2, :]\n        rot = torch.atan2(rots, rotc)\n\n        if vel is None:\n            final_box_preds = torch.cat([center, height, dim, rot], dim=1).permute(0, 2, 1)\n        else:\n            final_box_preds = torch.cat([center, height, dim, rot, vel], dim=1).permute(0, 2, 1)\n\n        predictions_dicts = []\n        for i in range(heatmap.shape[0]):\n            boxes3d = final_box_preds[i]\n            scores = final_scores[i]\n            labels = final_preds[i]\n            predictions_dict = {\n                'bboxes': boxes3d,\n                'scores': scores,\n                'labels': labels\n            }\n            predictions_dicts.append(predictions_dict)\n\n        if filter is False:\n            return predictions_dicts\n\n        # use score threshold\n        if self.score_threshold is not None:\n            thresh_mask = final_scores > self.score_threshold\n\n        if self.post_center_range is not None:\n            self.post_center_range = torch.tensor(\n                self.post_center_range, device=heatmap.device)\n            mask = (final_box_preds[..., :3] >=\n                    self.post_center_range[:3]).all(2)\n            mask &= (final_box_preds[..., :3] <=\n                     self.post_center_range[3:]).all(2)\n\n            predictions_dicts = []\n            for i in range(heatmap.shape[0]):\n                cmask = mask[i, :]\n                if self.score_threshold:\n                    cmask &= thresh_mask[i]\n\n                boxes3d = final_box_preds[i, cmask]\n                scores = final_scores[i, cmask]\n                labels = final_preds[i, cmask]\n                predictions_dict = {\n                    'bboxes': boxes3d,\n                    'scores': scores,\n                    'labels': labels,\n                    'cmask': cmask\n                }\n\n                predictions_dicts.append(predictions_dict)\n        else:\n            raise NotImplementedError(\n                'Need to reorganize output as a batch, only '\n                'support post_center_range is not None for now!')\n\n        return predictions_dicts\n"
  },
  {
    "path": "mmdet3d/core/bbox/iou_calculators/__init__.py",
    "content": "from .iou3d_calculator import (AxisAlignedBboxOverlaps3D, BboxOverlaps3D,\n                               BboxOverlapsNearest3D,\n                               axis_aligned_bbox_overlaps_3d, bbox_overlaps_3d,\n                               bbox_overlaps_nearest_3d)\n\n__all__ = [\n    'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d',\n    'bbox_overlaps_3d', 'AxisAlignedBboxOverlaps3D',\n    'axis_aligned_bbox_overlaps_3d'\n]\n"
  },
  {
    "path": "mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py",
    "content": "import torch\n\nfrom mmdet.core.bbox import bbox_overlaps\nfrom mmdet.core.bbox.iou_calculators.builder import IOU_CALCULATORS\nfrom ..structures import get_box_type\n\n\n@IOU_CALCULATORS.register_module()\nclass BboxOverlapsNearest3D(object):\n    \"\"\"Nearest 3D IoU Calculator.\n\n    Note:\n        This IoU calculator first finds the nearest 2D boxes in bird eye view\n        (BEV), and then calculates the 2D IoU using :meth:`bbox_overlaps`.\n\n    Args:\n        coordinate (str): 'camera', 'lidar', or 'depth' coordinate system.\n    \"\"\"\n\n    def __init__(self, coordinate='lidar'):\n        assert coordinate in ['camera', 'lidar', 'depth']\n        self.coordinate = coordinate\n\n    def __call__(self, bboxes1, bboxes2, mode='iou', is_aligned=False):\n        \"\"\"Calculate nearest 3D IoU.\n\n        Note:\n            If ``is_aligned`` is ``False``, then it calculates the ious between\n            each bbox of bboxes1 and bboxes2, otherwise it calculates the ious\n            between each aligned pair of bboxes1 and bboxes2.\n\n        Args:\n            bboxes1 (torch.Tensor): shape (N, 7+N) [x, y, z, h, w, l, ry, v].\n            bboxes2 (torch.Tensor): shape (M, 7+N) [x, y, z, h, w, l, ry, v].\n            mode (str): \"iou\" (intersection over union) or iof\n                (intersection over foreground).\n            is_aligned (bool): Whether the calculation is aligned.\n\n        Return:\n            torch.Tensor: If ``is_aligned`` is ``True``, return ious between \\\n                bboxes1 and bboxes2 with shape (M, N). If ``is_aligned`` is \\\n                ``False``, return shape is M.\n        \"\"\"\n        return bbox_overlaps_nearest_3d(bboxes1, bboxes2, mode, is_aligned,\n                                        self.coordinate)\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += f'(coordinate={self.coordinate}'\n        return repr_str\n\n\n@IOU_CALCULATORS.register_module()\nclass BboxOverlaps3D(object):\n    \"\"\"3D IoU Calculator.\n\n    Args:\n        coordinate (str): The coordinate system, valid options are\n            'camera', 'lidar', and 'depth'.\n    \"\"\"\n\n    def __init__(self, coordinate):\n        assert coordinate in ['camera', 'lidar', 'depth']\n        self.coordinate = coordinate\n\n    def __call__(self, bboxes1, bboxes2, mode='iou'):\n        \"\"\"Calculate 3D IoU using cuda implementation.\n\n        Note:\n            This function calculate the IoU of 3D boxes based on their volumes.\n            IoU calculator ``:class:BboxOverlaps3D`` uses this function to\n            calculate the actual 3D IoUs of boxes.\n\n        Args:\n            bboxes1 (torch.Tensor): shape (N, 7+C) [x, y, z, h, w, l, ry].\n            bboxes2 (torch.Tensor): shape (M, 7+C) [x, y, z, h, w, l, ry].\n            mode (str): \"iou\" (intersection over union) or\n                iof (intersection over foreground).\n\n        Return:\n            torch.Tensor: Bbox overlaps results of bboxes1 and bboxes2 \\\n                with shape (M, N) (aligned mode is not supported currently).\n        \"\"\"\n        return bbox_overlaps_3d(bboxes1, bboxes2, mode, self.coordinate)\n\n    def __repr__(self):\n        \"\"\"str: return a string that describes the module\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += f'(coordinate={self.coordinate}'\n        return repr_str\n\n\ndef bbox_overlaps_nearest_3d(bboxes1,\n                             bboxes2,\n                             mode='iou',\n                             is_aligned=False,\n                             coordinate='lidar'):\n    \"\"\"Calculate nearest 3D IoU.\n\n    Note:\n        This function first finds the nearest 2D boxes in bird eye view\n        (BEV), and then calculates the 2D IoU using :meth:`bbox_overlaps`.\n        Ths IoU calculator :class:`BboxOverlapsNearest3D` uses this\n        function to calculate IoUs of boxes.\n\n        If ``is_aligned`` is ``False``, then it calculates the ious between\n        each bbox of bboxes1 and bboxes2, otherwise the ious between each\n        aligned pair of bboxes1 and bboxes2.\n\n    Args:\n        bboxes1 (torch.Tensor): shape (N, 7+C) [x, y, z, h, w, l, ry, v].\n        bboxes2 (torch.Tensor): shape (M, 7+C) [x, y, z, h, w, l, ry, v].\n        mode (str): \"iou\" (intersection over union) or iof\n            (intersection over foreground).\n        is_aligned (bool): Whether the calculation is aligned\n\n    Return:\n        torch.Tensor: If ``is_aligned`` is ``True``, return ious between \\\n            bboxes1 and bboxes2 with shape (M, N). If ``is_aligned`` is \\\n            ``False``, return shape is M.\n    \"\"\"\n    assert bboxes1.size(-1) == bboxes2.size(-1) >= 7\n\n    box_type, _ = get_box_type(coordinate)\n\n    bboxes1 = box_type(bboxes1, box_dim=bboxes1.shape[-1])\n    bboxes2 = box_type(bboxes2, box_dim=bboxes2.shape[-1])\n\n    # Change the bboxes to bev\n    # box conversion and iou calculation in torch version on CUDA\n    # is 10x faster than that in numpy version\n    bboxes1_bev = bboxes1.nearest_bev\n    bboxes2_bev = bboxes2.nearest_bev\n\n    ret = bbox_overlaps(\n        bboxes1_bev, bboxes2_bev, mode=mode, is_aligned=is_aligned)\n    return ret\n\n\ndef bbox_overlaps_3d(bboxes1, bboxes2, mode='iou', coordinate='camera'):\n    \"\"\"Calculate 3D IoU using cuda implementation.\n\n    Note:\n        This function calculates the IoU of 3D boxes based on their volumes.\n        IoU calculator :class:`BboxOverlaps3D` uses this function to\n        calculate the actual IoUs of boxes.\n\n    Args:\n        bboxes1 (torch.Tensor): shape (N, 7+C) [x, y, z, h, w, l, ry].\n        bboxes2 (torch.Tensor): shape (M, 7+C) [x, y, z, h, w, l, ry].\n        mode (str): \"iou\" (intersection over union) or\n            iof (intersection over foreground).\n        coordinate (str): 'camera' or 'lidar' coordinate system.\n\n    Return:\n        torch.Tensor: Bbox overlaps results of bboxes1 and bboxes2 \\\n            with shape (M, N) (aligned mode is not supported currently).\n    \"\"\"\n    assert bboxes1.size(-1) == bboxes2.size(-1) >= 7\n\n    box_type, _ = get_box_type(coordinate)\n\n    bboxes1 = box_type(bboxes1, box_dim=bboxes1.shape[-1])\n    bboxes2 = box_type(bboxes2, box_dim=bboxes2.shape[-1])\n\n    return bboxes1.overlaps(bboxes1, bboxes2, mode=mode)\n\n\n@IOU_CALCULATORS.register_module()\nclass AxisAlignedBboxOverlaps3D(object):\n    \"\"\"Axis-aligned 3D Overlaps (IoU) Calculator.\"\"\"\n\n    def __call__(self, bboxes1, bboxes2, mode='iou', is_aligned=False):\n        \"\"\"Calculate IoU between 2D bboxes.\n\n        Args:\n            bboxes1 (Tensor): shape (B, m, 6) in <x1, y1, z1, x2, y2, z2>\n            format or empty.\n            bboxes2 (Tensor): shape (B, n, 6) in <x1, y1, z1, x2, y2, z2>\n            format or empty.\n                B indicates the batch dim, in shape (B1, B2, ..., Bn).\n                If ``is_aligned `` is ``True``, then m and n must be equal.\n            mode (str): \"iou\" (intersection over union) or \"giou\" (generalized\n                intersection over union).\n            is_aligned (bool, optional): If True, then m and n must be equal.\n                Default False.\n        Returns:\n            Tensor: shape (m, n) if ``is_aligned `` is False else shape (m,)\n        \"\"\"\n        assert bboxes1.size(-1) == bboxes2.size(-1) == 6\n        return axis_aligned_bbox_overlaps_3d(bboxes1, bboxes2, mode,\n                                             is_aligned)\n\n    def __repr__(self):\n        \"\"\"str: a string describing the module\"\"\"\n        repr_str = self.__class__.__name__ + '()'\n        return repr_str\n\n\ndef axis_aligned_bbox_overlaps_3d(bboxes1,\n                                  bboxes2,\n                                  mode='iou',\n                                  is_aligned=False,\n                                  eps=1e-6):\n    \"\"\"Calculate overlap between two set of axis aligned 3D bboxes. If\n    ``is_aligned `` is ``False``, then calculate the overlaps between each bbox\n    of bboxes1 and bboxes2, otherwise the overlaps between each aligned pair of\n    bboxes1 and bboxes2.\n\n    Args:\n        bboxes1 (Tensor): shape (B, m, 6) in <x1, y1, z1, x2, y2, z2>\n        format or empty.\n        bboxes2 (Tensor): shape (B, n, 6) in <x1, y1, z1, x2, y2, z2>\n        format or empty.\n            B indicates the batch dim, in shape (B1, B2, ..., Bn).\n            If ``is_aligned `` is ``True``, then m and n must be equal.\n        mode (str): \"iou\" (intersection over union) or \"giou\" (generalized\n            intersection over union).\n        is_aligned (bool, optional): If True, then m and n must be equal.\n            Default False.\n        eps (float, optional): A value added to the denominator for numerical\n            stability. Default 1e-6.\n\n    Returns:\n        Tensor: shape (m, n) if ``is_aligned `` is False else shape (m,)\n\n    Example:\n        >>> bboxes1 = torch.FloatTensor([\n        >>>     [0, 0, 0, 10, 10, 10],\n        >>>     [10, 10, 10, 20, 20, 20],\n        >>>     [32, 32, 32, 38, 40, 42],\n        >>> ])\n        >>> bboxes2 = torch.FloatTensor([\n        >>>     [0, 0, 0, 10, 20, 20],\n        >>>     [0, 10, 10, 10, 19, 20],\n        >>>     [10, 10, 10, 20, 20, 20],\n        >>> ])\n        >>> overlaps = axis_aligned_bbox_overlaps_3d(bboxes1, bboxes2)\n        >>> assert overlaps.shape == (3, 3)\n        >>> overlaps = bbox_overlaps(bboxes1, bboxes2, is_aligned=True)\n        >>> assert overlaps.shape == (3, )\n    Example:\n        >>> empty = torch.empty(0, 6)\n        >>> nonempty = torch.FloatTensor([[0, 0, 0, 10, 9, 10]])\n        >>> assert tuple(bbox_overlaps(empty, nonempty).shape) == (0, 1)\n        >>> assert tuple(bbox_overlaps(nonempty, empty).shape) == (1, 0)\n        >>> assert tuple(bbox_overlaps(empty, empty).shape) == (0, 0)\n    \"\"\"\n\n    assert mode in ['iou', 'giou'], f'Unsupported mode {mode}'\n    # Either the boxes are empty or the length of boxes's last dimenstion is 6\n    assert (bboxes1.size(-1) == 6 or bboxes1.size(0) == 0)\n    assert (bboxes2.size(-1) == 6 or bboxes2.size(0) == 0)\n\n    # Batch dim must be the same\n    # Batch dim: (B1, B2, ... Bn)\n    assert bboxes1.shape[:-2] == bboxes2.shape[:-2]\n    batch_shape = bboxes1.shape[:-2]\n\n    rows = bboxes1.size(-2)\n    cols = bboxes2.size(-2)\n    if is_aligned:\n        assert rows == cols\n\n    if rows * cols == 0:\n        if is_aligned:\n            return bboxes1.new(batch_shape + (rows, ))\n        else:\n            return bboxes1.new(batch_shape + (rows, cols))\n\n    area1 = (bboxes1[..., 3] -\n             bboxes1[..., 0]) * (bboxes1[..., 4] - bboxes1[..., 1]) * (\n                 bboxes1[..., 5] - bboxes1[..., 2])\n    area2 = (bboxes2[..., 3] -\n             bboxes2[..., 0]) * (bboxes2[..., 4] - bboxes2[..., 1]) * (\n                 bboxes2[..., 5] - bboxes2[..., 2])\n\n    if is_aligned:\n        lt = torch.max(bboxes1[..., :3], bboxes2[..., :3])  # [B, rows, 3]\n        rb = torch.min(bboxes1[..., 3:], bboxes2[..., 3:])  # [B, rows, 3]\n\n        wh = (rb - lt).clamp(min=0)  # [B, rows, 2]\n        overlap = wh[..., 0] * wh[..., 1] * wh[..., 2]\n\n        if mode in ['iou', 'giou']:\n            union = area1 + area2 - overlap\n        else:\n            union = area1\n        if mode == 'giou':\n            enclosed_lt = torch.min(bboxes1[..., :3], bboxes2[..., :3])\n            enclosed_rb = torch.max(bboxes1[..., 3:], bboxes2[..., 3:])\n    else:\n        lt = torch.max(bboxes1[..., :, None, :3],\n                       bboxes2[..., None, :, :3])  # [B, rows, cols, 3]\n        rb = torch.min(bboxes1[..., :, None, 3:],\n                       bboxes2[..., None, :, 3:])  # [B, rows, cols, 3]\n\n        wh = (rb - lt).clamp(min=0)  # [B, rows, cols, 3]\n        overlap = wh[..., 0] * wh[..., 1] * wh[..., 2]\n\n        if mode in ['iou', 'giou']:\n            union = area1[..., None] + area2[..., None, :] - overlap\n        if mode == 'giou':\n            enclosed_lt = torch.min(bboxes1[..., :, None, :3],\n                                    bboxes2[..., None, :, :3])\n            enclosed_rb = torch.max(bboxes1[..., :, None, 3:],\n                                    bboxes2[..., None, :, 3:])\n\n    eps = union.new_tensor([eps])\n    union = torch.max(union, eps)\n    ious = overlap / union\n    if mode in ['iou']:\n        return ious\n    # calculate gious\n    enclose_wh = (enclosed_rb - enclosed_lt).clamp(min=0)\n    enclose_area = enclose_wh[..., 0] * enclose_wh[..., 1] * enclose_wh[..., 2]\n    enclose_area = torch.max(enclose_area, eps)\n    gious = ious - (enclose_area - union) / enclose_area\n    return gious\n"
  },
  {
    "path": "mmdet3d/core/bbox/samplers/__init__.py",
    "content": "from mmdet.core.bbox.samplers import (BaseSampler, CombinedSampler,\n                                      InstanceBalancedPosSampler,\n                                      IoUBalancedNegSampler, OHEMSampler,\n                                      PseudoSampler, RandomSampler,\n                                      SamplingResult)\nfrom .iou_neg_piecewise_sampler import IoUNegPiecewiseSampler\n\n__all__ = [\n    'BaseSampler', 'PseudoSampler', 'RandomSampler',\n    'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',\n    'OHEMSampler', 'SamplingResult', 'IoUNegPiecewiseSampler'\n]\n"
  },
  {
    "path": "mmdet3d/core/bbox/samplers/iou_neg_piecewise_sampler.py",
    "content": "import torch\n\nfrom mmdet.core.bbox.builder import BBOX_SAMPLERS\nfrom . import RandomSampler, SamplingResult\n\n\n@BBOX_SAMPLERS.register_module()\nclass IoUNegPiecewiseSampler(RandomSampler):\n    \"\"\"IoU Piece-wise Sampling.\n\n    Sampling negtive proposals according to a list of IoU thresholds.\n    The negtive proposals are divided into several pieces according\n    to `neg_iou_piece_thrs`. And the ratio of each piece is indicated\n    by `neg_piece_fractions`.\n\n    Args:\n        num (int): Number of proposals.\n        pos_fraction (float): The fraction of positive proposals.\n        neg_piece_fractions (list): A list contains fractions that indicates\n            the ratio of each piece of total negtive samplers.\n        neg_iou_piece_thrs (list): A list contains IoU thresholds that\n            indicate the upper bound of this piece.\n        neg_pos_ub (float): The total ratio to limit the upper bound\n            number of negtive samples.\n        add_gt_as_proposals (bool): Whether to add gt as proposals.\n    \"\"\"\n\n    def __init__(self,\n                 num,\n                 pos_fraction=None,\n                 neg_piece_fractions=None,\n                 neg_iou_piece_thrs=None,\n                 neg_pos_ub=-1,\n                 add_gt_as_proposals=False,\n                 return_iou=False):\n        super(IoUNegPiecewiseSampler,\n              self).__init__(num, pos_fraction, neg_pos_ub,\n                             add_gt_as_proposals)\n        assert isinstance(neg_piece_fractions, list)\n        assert len(neg_piece_fractions) == len(neg_iou_piece_thrs)\n        self.neg_piece_fractions = neg_piece_fractions\n        self.neg_iou_thr = neg_iou_piece_thrs\n        self.return_iou = return_iou\n        self.neg_piece_num = len(self.neg_piece_fractions)\n\n    def _sample_pos(self, assign_result, num_expected, **kwargs):\n        \"\"\"Randomly sample some positive samples.\"\"\"\n        pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False)\n        if pos_inds.numel() != 0:\n            pos_inds = pos_inds.squeeze(1)\n        if pos_inds.numel() <= num_expected:\n            return pos_inds\n        else:\n            return self.random_choice(pos_inds, num_expected)\n\n    def _sample_neg(self, assign_result, num_expected, **kwargs):\n        \"\"\"Randomly sample some negative samples.\"\"\"\n        neg_inds = torch.nonzero(assign_result.gt_inds == 0, as_tuple=False)\n        if neg_inds.numel() != 0:\n            neg_inds = neg_inds.squeeze(1)\n        if len(neg_inds) <= num_expected:\n            return neg_inds\n        else:\n            neg_inds_choice = neg_inds.new_zeros([0])\n            extend_num = 0\n            max_overlaps = assign_result.max_overlaps[neg_inds]\n\n            for piece_inds in range(self.neg_piece_num):\n                if piece_inds == self.neg_piece_num - 1:  # for the last piece\n                    piece_expected_num = num_expected - len(neg_inds_choice)\n                    min_iou_thr = 0\n                else:\n                    # if the numbers of negative samplers in previous\n                    # pieces are less than the expected number, extend\n                    # the same number in the current piece.\n                    piece_expected_num = int(\n                        num_expected *\n                        self.neg_piece_fractions[piece_inds]) + extend_num\n                    min_iou_thr = self.neg_iou_thr[piece_inds + 1]\n                max_iou_thr = self.neg_iou_thr[piece_inds]\n                piece_neg_inds = torch.nonzero(\n                    (max_overlaps >= min_iou_thr)\n                    & (max_overlaps < max_iou_thr),\n                    as_tuple=False).view(-1)\n\n                if len(piece_neg_inds) < piece_expected_num:\n                    neg_inds_choice = torch.cat(\n                        [neg_inds_choice, neg_inds[piece_neg_inds]], dim=0)\n                    extend_num += piece_expected_num - len(piece_neg_inds)\n                else:\n                    piece_choice = self.random_choice(piece_neg_inds,\n                                                      piece_expected_num)\n                    neg_inds_choice = torch.cat(\n                        [neg_inds_choice, neg_inds[piece_choice]], dim=0)\n                    extend_num = 0\n            return neg_inds_choice\n\n    def sample(self,\n               assign_result,\n               bboxes,\n               gt_bboxes,\n               gt_labels=None,\n               **kwargs):\n        \"\"\"Sample positive and negative bboxes.\n\n        This is a simple implementation of bbox sampling given candidates,\n        assigning results and ground truth bboxes.\n\n        Args:\n            assign_result (:obj:`AssignResult`): Bbox assigning results.\n            bboxes (torch.Tensor): Boxes to be sampled from.\n            gt_bboxes (torch.Tensor): Ground truth bboxes.\n            gt_labels (torch.Tensor, optional): Class labels of ground truth \\\n                bboxes.\n\n        Returns:\n            :obj:`SamplingResult`: Sampling result.\n        \"\"\"\n        if len(bboxes.shape) < 2:\n            bboxes = bboxes[None, :]\n\n        gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.bool)\n        if self.add_gt_as_proposals and len(gt_bboxes) > 0:\n            if gt_labels is None:\n                raise ValueError(\n                    'gt_labels must be given when add_gt_as_proposals is True')\n            bboxes = torch.cat([gt_bboxes, bboxes], dim=0)\n            assign_result.add_gt_(gt_labels)\n            gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.bool)\n            gt_flags = torch.cat([gt_ones, gt_flags])\n\n        num_expected_pos = int(self.num * self.pos_fraction)\n        pos_inds = self.pos_sampler._sample_pos(\n            assign_result, num_expected_pos, bboxes=bboxes, **kwargs)\n        # We found that sampled indices have duplicated items occasionally.\n        # (may be a bug of PyTorch)\n        pos_inds = pos_inds.unique()\n        num_sampled_pos = pos_inds.numel()\n        num_expected_neg = self.num - num_sampled_pos\n        if self.neg_pos_ub >= 0:\n            _pos = max(1, num_sampled_pos)\n            neg_upper_bound = int(self.neg_pos_ub * _pos)\n            if num_expected_neg > neg_upper_bound:\n                num_expected_neg = neg_upper_bound\n        neg_inds = self.neg_sampler._sample_neg(\n            assign_result, num_expected_neg, bboxes=bboxes, **kwargs)\n        neg_inds = neg_inds.unique()\n\n        sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,\n                                         assign_result, gt_flags)\n        if self.return_iou:\n            # PartA2 needs iou score to regression.\n            sampling_result.iou = assign_result.max_overlaps[torch.cat(\n                [pos_inds, neg_inds])]\n            sampling_result.iou.detach_()\n\n        return sampling_result\n"
  },
  {
    "path": "mmdet3d/core/bbox/structures/__init__.py",
    "content": "from .base_box3d import BaseInstance3DBoxes\nfrom .box_3d_mode import Box3DMode\nfrom .cam_box3d import CameraInstance3DBoxes\nfrom .coord_3d_mode import Coord3DMode\nfrom .depth_box3d import DepthInstance3DBoxes\nfrom .lidar_box3d import LiDARInstance3DBoxes\nfrom .utils import (get_box_type, limit_period, points_cam2img,\n                    rotation_3d_in_axis, xywhr2xyxyr)\n\n__all__ = [\n    'Box3DMode', 'BaseInstance3DBoxes', 'LiDARInstance3DBoxes',\n    'CameraInstance3DBoxes', 'DepthInstance3DBoxes', 'xywhr2xyxyr',\n    'get_box_type', 'rotation_3d_in_axis', 'limit_period', 'points_cam2img',\n    'Coord3DMode'\n]\n"
  },
  {
    "path": "mmdet3d/core/bbox/structures/base_box3d.py",
    "content": "import numpy as np\nimport torch\nfrom abc import abstractmethod\n\nfrom mmdet3d.ops.iou3d import iou3d_cuda\nfrom .utils import limit_period, xywhr2xyxyr\n\n\nclass BaseInstance3DBoxes(object):\n    \"\"\"Base class for 3D Boxes.\n\n    Note:\n        The box is bottom centered, i.e. the relative position of origin in\n        the box is (0.5, 0.5, 0).\n\n    Args:\n        tensor (torch.Tensor | np.ndarray | list): a N x box_dim matrix.\n        box_dim (int): Number of the dimension of a box.\n            Each row is (x, y, z, x_size, y_size, z_size, yaw).\n            Default to 7.\n        with_yaw (bool): Whether the box is with yaw rotation.\n            If False, the value of yaw will be set to 0 as minmax boxes.\n            Default to True.\n        origin (tuple[float]): The relative position of origin in the box.\n            Default to (0.5, 0.5, 0). This will guide the box be converted to\n            (0.5, 0.5, 0) mode.\n\n    Attributes:\n        tensor (torch.Tensor): Float matrix of N x box_dim.\n        box_dim (int): Integer indicating the dimension of a box.\n            Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).\n        with_yaw (bool): If True, the value of yaw will be set to 0 as minmax\n            boxes.\n    \"\"\"\n\n    def __init__(self, tensor, box_dim=7, with_yaw=True, origin=(0.5, 0.5, 0)):\n        if isinstance(tensor, torch.Tensor):\n            device = tensor.device\n        else:\n            device = torch.device('cpu')\n        tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)\n        if tensor.numel() == 0:\n            # Use reshape, so we don't end up creating a new tensor that\n            # does not depend on the inputs (and consequently confuses jit)\n            tensor = tensor.reshape((0, box_dim)).to(\n                dtype=torch.float32, device=device)\n        assert tensor.dim() == 2 and tensor.size(-1) == box_dim, tensor.size()\n\n        if tensor.shape[-1] == 6:\n            # If the dimension of boxes is 6, we expand box_dim by padding\n            # 0 as a fake yaw and set with_yaw to False.\n            assert box_dim == 6\n            fake_rot = tensor.new_zeros(tensor.shape[0], 1)\n            tensor = torch.cat((tensor, fake_rot), dim=-1)\n            self.box_dim = box_dim + 1\n            self.with_yaw = False\n        else:\n            self.box_dim = box_dim\n            self.with_yaw = with_yaw\n        self.tensor = tensor.clone()\n\n        if origin != (0.5, 0.5, 0):\n            dst = self.tensor.new_tensor((0.5, 0.5, 0))\n            src = self.tensor.new_tensor(origin)\n            self.tensor[:, :3] += self.tensor[:, 3:6] * (dst - src)\n\n    @property\n    def volume(self):\n        \"\"\"torch.Tensor: A vector with volume of each box.\"\"\"\n        return self.tensor[:, 3] * self.tensor[:, 4] * self.tensor[:, 5]\n\n    @property\n    def dims(self):\n        \"\"\"torch.Tensor: Corners of each box with size (N, 8, 3).\"\"\"\n        return self.tensor[:, 3:6]\n\n    @property\n    def yaw(self):\n        \"\"\"torch.Tensor: A vector with yaw of each box.\"\"\"\n        return self.tensor[:, 6]\n\n    @property\n    def height(self):\n        \"\"\"torch.Tensor: A vector with height of each box.\"\"\"\n        return self.tensor[:, 5]\n\n    @property\n    def top_height(self):\n        \"\"\"torch.Tensor: A vector with the top height of each box.\"\"\"\n        return self.bottom_height + self.height\n\n    @property\n    def bottom_height(self):\n        \"\"\"torch.Tensor: A vector with bottom's height of each box.\"\"\"\n        return self.tensor[:, 2]\n\n    @property\n    def center(self):\n        \"\"\"Calculate the center of all the boxes.\n\n        Note:\n            In the MMDetection3D's convention, the bottom center is\n            usually taken as the default center.\n\n            The relative position of the centers in different kinds of\n            boxes are different, e.g., the relative center of a boxes is\n            (0.5, 1.0, 0.5) in camera and (0.5, 0.5, 0) in lidar.\n            It is recommended to use ``bottom_center`` or ``gravity_center``\n            for more clear usage.\n\n        Returns:\n            torch.Tensor: A tensor with center of each box.\n        \"\"\"\n        return self.bottom_center\n\n    @property\n    def bottom_center(self):\n        \"\"\"torch.Tensor: A tensor with center of each box.\"\"\"\n        return self.tensor[:, :3]\n\n    @property\n    def gravity_center(self):\n        \"\"\"torch.Tensor: A tensor with center of each box.\"\"\"\n        pass\n\n    @property\n    def corners(self):\n        \"\"\"torch.Tensor: a tensor with 8 corners of each box.\"\"\"\n        pass\n\n    @abstractmethod\n    def rotate(self, angles, axis=0):\n        \"\"\"Calculate whether the points are in any of the boxes.\n\n        Args:\n            angles (float): Rotation angles.\n            axis (int): The axis to rotate the boxes.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def flip(self, bev_direction='horizontal'):\n        \"\"\"Flip the boxes in BEV along given BEV direction.\"\"\"\n        pass\n\n    def translate(self, trans_vector):\n        \"\"\"Calculate whether the points are in any of the boxes.\n\n        Args:\n            trans_vector (torch.Tensor): Translation vector of size 1x3.\n        \"\"\"\n        if not isinstance(trans_vector, torch.Tensor):\n            trans_vector = self.tensor.new_tensor(trans_vector)\n        self.tensor[:, :3] += trans_vector\n\n    def in_range_3d(self, box_range):\n        \"\"\"Check whether the boxes are in the given range.\n\n        Args:\n            box_range (list | torch.Tensor): The range of box\n                (x_min, y_min, z_min, x_max, y_max, z_max)\n\n        Note:\n            In the original implementation of SECOND, checking whether\n            a box in the range checks whether the points are in a convex\n            polygon, we try to reduce the burden for simpler cases.\n\n        Returns:\n            torch.Tensor: A binary vector indicating whether each box is \\\n                inside the reference range.\n        \"\"\"\n        in_range_flags = ((self.tensor[:, 0] > box_range[0])\n                          & (self.tensor[:, 1] > box_range[1])\n                          & (self.tensor[:, 2] > box_range[2])\n                          & (self.tensor[:, 0] < box_range[3])\n                          & (self.tensor[:, 1] < box_range[4])\n                          & (self.tensor[:, 2] < box_range[5]))\n        return in_range_flags\n\n    @abstractmethod\n    def in_range_bev(self, box_range):\n        \"\"\"Check whether the boxes are in the given range.\n\n        Args:\n            box_range (list | torch.Tensor): The range of box\n                in order of (x_min, y_min, x_max, y_max).\n\n        Returns:\n            torch.Tensor: Indicating whether each box is inside \\\n                the reference range.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def convert_to(self, dst, rt_mat=None):\n        \"\"\"Convert self to ``dst`` mode.\n\n        Args:\n            dst (:obj:`BoxMode`): The target Box mode.\n            rt_mat (np.ndarray | torch.Tensor): The rotation and translation\n                matrix between different coordinates. Defaults to None.\n                The conversion from `src` coordinates to `dst` coordinates\n                usually comes along the change of sensors, e.g., from camera\n                to LiDAR. This requires a transformation matrix.\n\n        Returns:\n            :obj:`BaseInstance3DBoxes`: The converted box of the same type \\\n                in the `dst` mode.\n        \"\"\"\n        pass\n\n    def scale(self, scale_factor):\n        \"\"\"Scale the box with horizontal and vertical scaling factors.\n\n        Args:\n            scale_factors (float): Scale factors to scale the boxes.\n        \"\"\"\n        self.tensor[:, :6] *= scale_factor\n        self.tensor[:, 7:] *= scale_factor\n\n    def limit_yaw(self, offset=0.5, period=np.pi):\n        \"\"\"Limit the yaw to a given period and offset.\n\n        Args:\n            offset (float): The offset of the yaw.\n            period (float): The expected period.\n        \"\"\"\n        self.tensor[:, 6] = limit_period(self.tensor[:, 6], offset, period)\n\n    def nonempty(self, threshold: float = 0.0):\n        \"\"\"Find boxes that are non-empty.\n\n        A box is considered empty,\n        if either of its side is no larger than threshold.\n\n        Args:\n            threshold (float): The threshold of minimal sizes.\n\n        Returns:\n            torch.Tensor: A binary vector which represents whether each \\\n                box is empty (False) or non-empty (True).\n        \"\"\"\n        box = self.tensor\n        size_x = box[..., 3]\n        size_y = box[..., 4]\n        size_z = box[..., 5]\n        keep = ((size_x > threshold)\n                & (size_y > threshold) & (size_z > threshold))\n        return keep\n\n    def __getitem__(self, item):\n        \"\"\"\n        Note:\n            The following usage are allowed:\n            1. `new_boxes = boxes[3]`:\n                return a `Boxes` that contains only one box.\n            2. `new_boxes = boxes[2:10]`:\n                return a slice of boxes.\n            3. `new_boxes = boxes[vector]`:\n                where vector is a torch.BoolTensor with `length = len(boxes)`.\n                Nonzero elements in the vector will be selected.\n            Note that the returned Boxes might share storage with this Boxes,\n            subject to Pytorch's indexing semantics.\n\n        Returns:\n            :obj:`BaseInstances3DBoxes`: A new object of  \\\n                :class:`BaseInstances3DBoxes` after indexing.\n        \"\"\"\n        original_type = type(self)\n        if isinstance(item, int):\n            return original_type(\n                self.tensor[item].view(1, -1),\n                box_dim=self.box_dim,\n                with_yaw=self.with_yaw)\n        b = self.tensor[item]\n        assert b.dim() == 2, \\\n            f'Indexing on Boxes with {item} failed to return a matrix!'\n        return original_type(b, box_dim=self.box_dim, with_yaw=self.with_yaw)\n\n    def __len__(self):\n        \"\"\"int: Number of boxes in the current object.\"\"\"\n        return self.tensor.shape[0]\n\n    def __repr__(self):\n        \"\"\"str: Return a strings that describes the object.\"\"\"\n        return self.__class__.__name__ + '(\\n    ' + str(self.tensor) + ')'\n\n    @classmethod\n    def cat(cls, boxes_list):\n        \"\"\"Concatenate a list of Boxes into a single Boxes.\n\n        Args:\n            boxes_list (list[:obj:`BaseInstances3DBoxes`]): List of boxes.\n\n        Returns:\n            :obj:`BaseInstances3DBoxes`: The concatenated Boxes.\n        \"\"\"\n        assert isinstance(boxes_list, (list, tuple))\n        if len(boxes_list) == 0:\n            return cls(torch.empty(0))\n        assert all(isinstance(box, cls) for box in boxes_list)\n\n        # use torch.cat (v.s. layers.cat)\n        # so the returned boxes never share storage with input\n        cat_boxes = cls(\n            torch.cat([b.tensor for b in boxes_list], dim=0),\n            box_dim=boxes_list[0].tensor.shape[1],\n            with_yaw=boxes_list[0].with_yaw)\n        return cat_boxes\n\n    def to(self, device):\n        \"\"\"Convert current boxes to a specific device.\n\n        Args:\n            device (str | :obj:`torch.device`): The name of the device.\n\n        Returns:\n            :obj:`BaseInstance3DBoxes`: A new boxes object on the \\\n                specific device.\n        \"\"\"\n        original_type = type(self)\n        return original_type(\n            self.tensor.to(device),\n            box_dim=self.box_dim,\n            with_yaw=self.with_yaw)\n\n    def clone(self):\n        \"\"\"Clone the Boxes.\n\n        Returns:\n            :obj:`BaseInstance3DBoxes`: Box object with the same properties \\\n                as self.\n        \"\"\"\n        original_type = type(self)\n        return original_type(\n            self.tensor.clone(), box_dim=self.box_dim, with_yaw=self.with_yaw)\n\n    @property\n    def device(self):\n        \"\"\"str: The device of the boxes are on.\"\"\"\n        return self.tensor.device\n\n    def __iter__(self):\n        \"\"\"Yield a box as a Tensor of shape (4,) at a time.\n\n        Returns:\n            torch.Tensor: A box of shape (4,).\n        \"\"\"\n        yield from self.tensor\n\n    @classmethod\n    def height_overlaps(cls, boxes1, boxes2, mode='iou'):\n        \"\"\"Calculate height overlaps of two boxes.\n\n        Note:\n            This function calculates the height overlaps between boxes1 and\n            boxes2,  boxes1 and boxes2 should be in the same type.\n\n        Args:\n            boxes1 (:obj:`BaseInstanceBoxes`): Boxes 1 contain N boxes.\n            boxes2 (:obj:`BaseInstanceBoxes`): Boxes 2 contain M boxes.\n            mode (str, optional): Mode of iou calculation. Defaults to 'iou'.\n\n        Returns:\n            torch.Tensor: Calculated iou of boxes.\n        \"\"\"\n        assert isinstance(boxes1, BaseInstance3DBoxes)\n        assert isinstance(boxes2, BaseInstance3DBoxes)\n        assert type(boxes1) == type(boxes2), '\"boxes1\" and \"boxes2\" should' \\\n            f'be in the same type, got {type(boxes1)} and {type(boxes2)}.'\n\n        boxes1_top_height = boxes1.top_height.view(-1, 1)\n        boxes1_bottom_height = boxes1.bottom_height.view(-1, 1)\n        boxes2_top_height = boxes2.top_height.view(1, -1)\n        boxes2_bottom_height = boxes2.bottom_height.view(1, -1)\n\n        heighest_of_bottom = torch.max(boxes1_bottom_height,\n                                       boxes2_bottom_height)\n        lowest_of_top = torch.min(boxes1_top_height, boxes2_top_height)\n        overlaps_h = torch.clamp(lowest_of_top - heighest_of_bottom, min=0)\n        return overlaps_h\n\n    @classmethod\n    def overlaps(cls, boxes1, boxes2, mode='iou'):\n        \"\"\"Calculate 3D overlaps of two boxes.\n\n        Note:\n            This function calculates the overlaps between ``boxes1`` and\n            ``boxes2``, ``boxes1`` and ``boxes2`` should be in the same type.\n\n        Args:\n            boxes1 (:obj:`BaseInstanceBoxes`): Boxes 1 contain N boxes.\n            boxes2 (:obj:`BaseInstanceBoxes`): Boxes 2 contain M boxes.\n            mode (str, optional): Mode of iou calculation. Defaults to 'iou'.\n\n        Returns:\n            torch.Tensor: Calculated iou of boxes' heights.\n        \"\"\"\n        assert isinstance(boxes1, BaseInstance3DBoxes)\n        assert isinstance(boxes2, BaseInstance3DBoxes)\n        assert type(boxes1) == type(boxes2), '\"boxes1\" and \"boxes2\" should' \\\n            f'be in the same type, got {type(boxes1)} and {type(boxes2)}.'\n\n        assert mode in ['iou', 'iof']\n\n        rows = len(boxes1)\n        cols = len(boxes2)\n        if rows * cols == 0:\n            return boxes1.tensor.new(rows, cols)\n\n        # height overlap\n        overlaps_h = cls.height_overlaps(boxes1, boxes2)\n\n        # obtain BEV boxes in XYXYR format\n        boxes1_bev = xywhr2xyxyr(boxes1.bev)\n        boxes2_bev = xywhr2xyxyr(boxes2.bev)\n\n        # bev overlap\n        overlaps_bev = boxes1_bev.new_zeros(\n            (boxes1_bev.shape[0], boxes2_bev.shape[0])).cuda()  # (N, M)\n        iou3d_cuda.boxes_overlap_bev_gpu(boxes1_bev.contiguous().cuda(),\n                                         boxes2_bev.contiguous().cuda(),\n                                         overlaps_bev)\n\n        # 3d overlaps\n        overlaps_3d = overlaps_bev.to(boxes1.device) * overlaps_h\n\n        volume1 = boxes1.volume.view(-1, 1)\n        volume2 = boxes2.volume.view(1, -1)\n\n        if mode == 'iou':\n            # the clamp func is used to avoid division of 0\n            iou3d = overlaps_3d / torch.clamp(\n                volume1 + volume2 - overlaps_3d, min=1e-8)\n        else:\n            iou3d = overlaps_3d / torch.clamp(volume1, min=1e-8)\n\n        return iou3d\n\n    def new_box(self, data):\n        \"\"\"Create a new box object with data.\n\n        The new box and its tensor has the similar properties \\\n            as self and self.tensor, respectively.\n\n        Args:\n            data (torch.Tensor | numpy.array | list): Data to be copied.\n\n        Returns:\n            :obj:`BaseInstance3DBoxes`: A new bbox object with ``data``, \\\n                the object's other properties are similar to ``self``.\n        \"\"\"\n        new_tensor = self.tensor.new_tensor(data) \\\n            if not isinstance(data, torch.Tensor) else data.to(self.device)\n        original_type = type(self)\n        return original_type(\n            new_tensor, box_dim=self.box_dim, with_yaw=self.with_yaw)\n"
  },
  {
    "path": "mmdet3d/core/bbox/structures/box_3d_mode.py",
    "content": "import numpy as np\nimport torch\nfrom enum import IntEnum, unique\n\nfrom .base_box3d import BaseInstance3DBoxes\nfrom .cam_box3d import CameraInstance3DBoxes\nfrom .depth_box3d import DepthInstance3DBoxes\nfrom .lidar_box3d import LiDARInstance3DBoxes\n\n\n@unique\nclass Box3DMode(IntEnum):\n    r\"\"\"Enum of different ways to represent a box.\n\n    Coordinates in LiDAR:\n\n    .. code-block:: none\n\n                    up z\n                       ^   x front\n                       |  /\n                       | /\n        left y <------ 0\n\n    The relative coordinate of bottom center in a LiDAR box is (0.5, 0.5, 0),\n    and the yaw is around the z axis, thus the rotation axis=2.\n\n    Coordinates in camera:\n\n    .. code-block:: none\n\n                z front\n               /\n              /\n             0 ------> x right\n             |\n             |\n             v\n        down y\n\n    The relative coordinate of bottom center in a CAM box is [0.5, 1.0, 0.5],\n    and the yaw is around the y axis, thus the rotation axis=1.\n\n    Coordinates in Depth mode:\n\n    .. code-block:: none\n\n        up z\n           ^   y front\n           |  /\n           | /\n           0 ------> x right\n\n    The relative coordinate of bottom center in a DEPTH box is (0.5, 0.5, 0),\n    and the yaw is around the z axis, thus the rotation axis=2.\n    \"\"\"\n\n    LIDAR = 0\n    CAM = 1\n    DEPTH = 2\n\n    @staticmethod\n    def convert(box, src, dst, rt_mat=None):\n        \"\"\"Convert boxes from `src` mode to `dst` mode.\n\n        Args:\n            box (tuple | list | np.dnarray |\n                torch.Tensor | BaseInstance3DBoxes):\n                Can be a k-tuple, k-list or an Nxk array/tensor, where k = 7.\n            src (:obj:`BoxMode`): The src Box mode.\n            dst (:obj:`BoxMode`): The target Box mode.\n            rt_mat (np.dnarray | torch.Tensor): The rotation and translation\n                matrix between different coordinates. Defaults to None.\n                The conversion from `src` coordinates to `dst` coordinates\n                usually comes along the change of sensors, e.g., from camera\n                to LiDAR. This requires a transformation matrix.\n\n        Returns:\n            (tuple | list | np.dnarray | torch.Tensor | BaseInstance3DBoxes): \\\n                The converted box of the same type.\n        \"\"\"\n        if src == dst:\n            return box\n\n        is_numpy = isinstance(box, np.ndarray)\n        is_Instance3DBoxes = isinstance(box, BaseInstance3DBoxes)\n        single_box = isinstance(box, (list, tuple))\n        if single_box:\n            assert len(box) >= 7, (\n                'BoxMode.convert takes either a k-tuple/list or '\n                'an Nxk array/tensor, where k >= 7')\n            arr = torch.tensor(box)[None, :]\n        else:\n            # avoid modifying the input box\n            if is_numpy:\n                arr = torch.from_numpy(np.asarray(box)).clone()\n            elif is_Instance3DBoxes:\n                arr = box.tensor.clone()\n            else:\n                arr = box.clone()\n\n        # convert box from `src` mode to `dst` mode.\n        x_size, y_size, z_size = arr[..., 3:4], arr[..., 4:5], arr[..., 5:6]\n        if src == Box3DMode.LIDAR and dst == Box3DMode.CAM:\n            if rt_mat is None:\n                rt_mat = arr.new_tensor([[0, -1, 0], [0, 0, -1], [1, 0, 0]])\n            xyz_size = torch.cat([y_size, z_size, x_size], dim=-1)\n        elif src == Box3DMode.CAM and dst == Box3DMode.LIDAR:\n            if rt_mat is None:\n                rt_mat = arr.new_tensor([[0, 0, 1], [-1, 0, 0], [0, -1, 0]])\n            xyz_size = torch.cat([z_size, x_size, y_size], dim=-1)\n        elif src == Box3DMode.DEPTH and dst == Box3DMode.CAM:\n            if rt_mat is None:\n                rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, 1], [0, -1, 0]])\n            xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)\n        elif src == Box3DMode.CAM and dst == Box3DMode.DEPTH:\n            if rt_mat is None:\n                rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]])\n            xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)\n        elif src == Box3DMode.LIDAR and dst == Box3DMode.DEPTH:\n            if rt_mat is None:\n                rt_mat = arr.new_tensor([[0, -1, 0], [1, 0, 0], [0, 0, 1]])\n            xyz_size = torch.cat([y_size, x_size, z_size], dim=-1)\n        elif src == Box3DMode.DEPTH and dst == Box3DMode.LIDAR:\n            if rt_mat is None:\n                rt_mat = arr.new_tensor([[0, 1, 0], [-1, 0, 0], [0, 0, 1]])\n            xyz_size = torch.cat([y_size, x_size, z_size], dim=-1)\n        else:\n            raise NotImplementedError(\n                f'Conversion from Box3DMode {src} to {dst} '\n                'is not supported yet')\n\n        if not isinstance(rt_mat, torch.Tensor):\n            rt_mat = arr.new_tensor(rt_mat)\n        if rt_mat.size(1) == 4:\n            extended_xyz = torch.cat(\n                [arr[:, :3], arr.new_ones(arr.size(0), 1)], dim=-1)\n            xyz = extended_xyz @ rt_mat.t()\n        else:\n            xyz = arr[:, :3] @ rt_mat.t()\n\n        remains = arr[..., 6:]\n        arr = torch.cat([xyz[:, :3], xyz_size, remains], dim=-1)\n\n        # convert arr to the original type\n        original_type = type(box)\n        if single_box:\n            return original_type(arr.flatten().tolist())\n        if is_numpy:\n            return arr.numpy()\n        elif is_Instance3DBoxes:\n            if dst == Box3DMode.CAM:\n                target_type = CameraInstance3DBoxes\n            elif dst == Box3DMode.LIDAR:\n                target_type = LiDARInstance3DBoxes\n            elif dst == Box3DMode.DEPTH:\n                target_type = DepthInstance3DBoxes\n            else:\n                raise NotImplementedError(\n                    f'Conversion to {dst} through {original_type}'\n                    ' is not supported yet')\n            return target_type(\n                arr, box_dim=arr.size(-1), with_yaw=box.with_yaw)\n        else:\n            return arr\n"
  },
  {
    "path": "mmdet3d/core/bbox/structures/cam_box3d.py",
    "content": "import numpy as np\nimport torch\n\nfrom mmdet3d.core.points import BasePoints\nfrom .base_box3d import BaseInstance3DBoxes\nfrom .utils import limit_period, rotation_3d_in_axis\n\n\nclass CameraInstance3DBoxes(BaseInstance3DBoxes):\n    \"\"\"3D boxes of instances in CAM coordinates.\n\n    Coordinates in camera:\n\n    .. code-block:: none\n\n                z front (yaw=0.5*pi)\n               /\n              /\n             0 ------> x right (yaw=0)\n             |\n             |\n             v\n        down y\n\n    The relative coordinate of bottom center in a CAM box is (0.5, 1.0, 0.5),\n    and the yaw is around the y axis, thus the rotation axis=1.\n    The yaw is 0 at the positive direction of x axis, and increases from\n    the positive direction of x to the positive direction of z.\n\n    Attributes:\n        tensor (torch.Tensor): Float matrix of N x box_dim.\n        box_dim (int): Integer indicates the dimension of a box\n            Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).\n        with_yaw (bool): If True, the value of yaw will be set to 0 as minmax\n            boxes.\n    \"\"\"\n\n    def __init__(self,\n                 tensor,\n                 box_dim=7,\n                 with_yaw=True,\n                 origin=(0.5, 1.0, 0.5)):\n        if isinstance(tensor, torch.Tensor):\n            device = tensor.device\n        else:\n            device = torch.device('cpu')\n        tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)\n        if tensor.numel() == 0:\n            # Use reshape, so we don't end up creating a new tensor that\n            # does not depend on the inputs (and consequently confuses jit)\n            tensor = tensor.reshape((0, box_dim)).to(\n                dtype=torch.float32, device=device)\n        assert tensor.dim() == 2 and tensor.size(-1) == box_dim, tensor.size()\n\n        if tensor.shape[-1] == 6:\n            # If the dimension of boxes is 6, we expand box_dim by padding\n            # 0 as a fake yaw and set with_yaw to False.\n            assert box_dim == 6\n            fake_rot = tensor.new_zeros(tensor.shape[0], 1)\n            tensor = torch.cat((tensor, fake_rot), dim=-1)\n            self.box_dim = box_dim + 1\n            self.with_yaw = False\n        else:\n            self.box_dim = box_dim\n            self.with_yaw = with_yaw\n        self.tensor = tensor.clone()\n\n        if origin != (0.5, 1.0, 0.5):\n            dst = self.tensor.new_tensor((0.5, 1.0, 0.5))\n            src = self.tensor.new_tensor(origin)\n            self.tensor[:, :3] += self.tensor[:, 3:6] * (dst - src)\n\n    @property\n    def height(self):\n        \"\"\"torch.Tensor: A vector with height of each box.\"\"\"\n        return self.tensor[:, 4]\n\n    @property\n    def top_height(self):\n        \"\"\"torch.Tensor: A vector with the top height of each box.\"\"\"\n        # the positive direction is down rather than up\n        return self.bottom_height - self.height\n\n    @property\n    def bottom_height(self):\n        \"\"\"torch.Tensor: A vector with bottom's height of each box.\"\"\"\n        return self.tensor[:, 1]\n\n    @property\n    def gravity_center(self):\n        \"\"\"torch.Tensor: A tensor with center of each box.\"\"\"\n        bottom_center = self.bottom_center\n        gravity_center = torch.zeros_like(bottom_center)\n        gravity_center[:, [0, 2]] = bottom_center[:, [0, 2]]\n        gravity_center[:, 1] = bottom_center[:, 1] - self.tensor[:, 4] * 0.5\n        return gravity_center\n\n    @property\n    def corners(self):\n        \"\"\"torch.Tensor: Coordinates of corners of all the boxes in\n                         shape (N, 8, 3).\n\n        Convert the boxes to  in clockwise order, in the form of\n        (x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0)\n\n        .. code-block:: none\n\n                         front z\n                              /\n                             /\n               (x0, y0, z1) + -----------  + (x1, y0, z1)\n                           /|            / |\n                          / |           /  |\n            (x0, y0, z0) + ----------- +   + (x1, y1, z1)\n                         |  /      .   |  /\n                         | / oriign    | /\n            (x0, y1, z0) + ----------- + -------> x right\n                         |             (x1, y1, z0)\n                         |\n                         v\n                    down y\n        \"\"\"\n        # TODO: rotation_3d_in_axis function do not support\n        #  empty tensor currently.\n        assert len(self.tensor) != 0\n        dims = self.dims\n        corners_norm = torch.from_numpy(\n            np.stack(np.unravel_index(np.arange(8), [2] * 3), axis=1)).to(\n                device=dims.device, dtype=dims.dtype)\n\n        corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]\n        # use relative origin [0.5, 1, 0.5]\n        corners_norm = corners_norm - dims.new_tensor([0.5, 1, 0.5])\n        corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3])\n\n        # rotate around y axis\n        corners = rotation_3d_in_axis(corners, self.tensor[:, 6], axis=1)\n        corners += self.tensor[:, :3].view(-1, 1, 3)\n        return corners\n\n    @property\n    def bev(self):\n        \"\"\"torch.Tensor: A n x 5 tensor of 2D BEV box of each box\n        with rotation in XYWHR format.\"\"\"\n        return self.tensor[:, [0, 2, 3, 5, 6]]\n\n    @property\n    def nearest_bev(self):\n        \"\"\"torch.Tensor: A tensor of 2D BEV box of each box\n        without rotation.\"\"\"\n        # Obtain BEV boxes with rotation in XZWHR format\n        bev_rotated_boxes = self.bev\n        # convert the rotation to a valid range\n        rotations = bev_rotated_boxes[:, -1]\n        normed_rotations = torch.abs(limit_period(rotations, 0.5, np.pi))\n\n        # find the center of boxes\n        conditions = (normed_rotations > np.pi / 4)[..., None]\n        bboxes_xywh = torch.where(conditions, bev_rotated_boxes[:,\n                                                                [0, 1, 3, 2]],\n                                  bev_rotated_boxes[:, :4])\n\n        centers = bboxes_xywh[:, :2]\n        dims = bboxes_xywh[:, 2:]\n        bev_boxes = torch.cat([centers - dims / 2, centers + dims / 2], dim=-1)\n        return bev_boxes\n\n    def rotate(self, angle, points=None):\n        \"\"\"Rotate boxes with points (optional) with the given angle.\n\n        Args:\n            angle (float, torch.Tensor): Rotation angle.\n            points (torch.Tensor, numpy.ndarray, :obj:`BasePoints`, optional):\n                Points to rotate. Defaults to None.\n\n        Returns:\n            tuple or None: When ``points`` is None, the function returns \\\n                None, otherwise it returns the rotated points and the \\\n                rotation matrix ``rot_mat_T``.\n        \"\"\"\n        if not isinstance(angle, torch.Tensor):\n            angle = self.tensor.new_tensor(angle)\n        rot_sin = torch.sin(angle)\n        rot_cos = torch.cos(angle)\n        rot_mat_T = self.tensor.new_tensor([[rot_cos, 0, -rot_sin], [0, 1, 0],\n                                            [rot_sin, 0, rot_cos]])\n\n        self.tensor[:, :3] = self.tensor[:, :3] @ rot_mat_T\n        self.tensor[:, 6] += angle\n\n        if points is not None:\n            if isinstance(points, torch.Tensor):\n                points[:, :3] = points[:, :3] @ rot_mat_T\n            elif isinstance(points, np.ndarray):\n                rot_mat_T = rot_mat_T.numpy()\n                points[:, :3] = np.dot(points[:, :3], rot_mat_T)\n            elif isinstance(points, BasePoints):\n                # clockwise\n                points.rotate(-angle)\n            else:\n                raise ValueError\n            return points, rot_mat_T\n\n    def flip(self, bev_direction='horizontal', points=None):\n        \"\"\"Flip the boxes in BEV along given BEV direction.\n\n        In CAM coordinates, it flips the x (horizontal) or z (vertical) axis.\n\n        Args:\n            bev_direction (str): Flip direction (horizontal or vertical).\n            points (torch.Tensor, numpy.ndarray, :obj:`BasePoints`, None):\n                Points to flip. Defaults to None.\n\n        Returns:\n            torch.Tensor, numpy.ndarray or None: Flipped points.\n        \"\"\"\n        assert bev_direction in ('horizontal', 'vertical')\n        if bev_direction == 'horizontal':\n            self.tensor[:, 0::7] = -self.tensor[:, 0::7]\n            if self.with_yaw:\n                self.tensor[:, 6] = -self.tensor[:, 6] + np.pi\n        elif bev_direction == 'vertical':\n            self.tensor[:, 2::7] = -self.tensor[:, 2::7]\n            if self.with_yaw:\n                self.tensor[:, 6] = -self.tensor[:, 6]\n\n        if points is not None:\n            assert isinstance(points, (torch.Tensor, np.ndarray, BasePoints))\n            if isinstance(points, (torch.Tensor, np.ndarray)):\n                if bev_direction == 'horizontal':\n                    points[:, 0] = -points[:, 0]\n                elif bev_direction == 'vertical':\n                    points[:, 2] = -points[:, 2]\n            elif isinstance(points, BasePoints):\n                points.flip(bev_direction)\n            return points\n\n    def in_range_bev(self, box_range):\n        \"\"\"Check whether the boxes are in the given range.\n\n        Args:\n            box_range (list | torch.Tensor): The range of box\n                (x_min, z_min, x_max, z_max).\n\n        Note:\n            The original implementation of SECOND checks whether boxes in\n            a range by checking whether the points are in a convex\n            polygon, we reduce the burden for simpler cases.\n\n        Returns:\n            torch.Tensor: Indicating whether each box is inside \\\n                the reference range.\n        \"\"\"\n        in_range_flags = ((self.tensor[:, 0] > box_range[0])\n                          & (self.tensor[:, 2] > box_range[1])\n                          & (self.tensor[:, 0] < box_range[2])\n                          & (self.tensor[:, 2] < box_range[3]))\n        return in_range_flags\n\n    @classmethod\n    def height_overlaps(cls, boxes1, boxes2, mode='iou'):\n        \"\"\"Calculate height overlaps of two boxes.\n\n        This function calculates the height overlaps between ``boxes1`` and\n        ``boxes2``, where ``boxes1`` and ``boxes2`` should be in the same type.\n\n        Args:\n            boxes1 (:obj:`CameraInstance3DBoxes`): Boxes 1 contain N boxes.\n            boxes2 (:obj:`CameraInstance3DBoxes`): Boxes 2 contain M boxes.\n            mode (str, optional): Mode of iou calculation. Defaults to 'iou'.\n\n        Returns:\n            torch.Tensor: Calculated iou of boxes' heights.\n        \"\"\"\n        assert isinstance(boxes1, CameraInstance3DBoxes)\n        assert isinstance(boxes2, CameraInstance3DBoxes)\n\n        boxes1_top_height = boxes1.top_height.view(-1, 1)\n        boxes1_bottom_height = boxes1.bottom_height.view(-1, 1)\n        boxes2_top_height = boxes2.top_height.view(1, -1)\n        boxes2_bottom_height = boxes2.bottom_height.view(1, -1)\n\n        # In camera coordinate system\n        # from up to down is the positive direction\n        heighest_of_bottom = torch.min(boxes1_bottom_height,\n                                       boxes2_bottom_height)\n        lowest_of_top = torch.max(boxes1_top_height, boxes2_top_height)\n        overlaps_h = torch.clamp(heighest_of_bottom - lowest_of_top, min=0)\n        return overlaps_h\n\n    def convert_to(self, dst, rt_mat=None):\n        \"\"\"Convert self to ``dst`` mode.\n\n        Args:\n            dst (:obj:`BoxMode`): The target Box mode.\n            rt_mat (np.dnarray | torch.Tensor): The rotation and translation\n                matrix between different coordinates. Defaults to None.\n                The conversion from ``src`` coordinates to ``dst`` coordinates\n                usually comes along the change of sensors, e.g., from camera\n                to LiDAR. This requires a transformation matrix.\n\n        Returns:\n            :obj:`BaseInstance3DBoxes`:  \\\n                The converted box of the same type in the ``dst`` mode.\n        \"\"\"\n        from .box_3d_mode import Box3DMode\n        return Box3DMode.convert(\n            box=self, src=Box3DMode.CAM, dst=dst, rt_mat=rt_mat)\n"
  },
  {
    "path": "mmdet3d/core/bbox/structures/coord_3d_mode.py",
    "content": "import numpy as np\nimport torch\nfrom enum import IntEnum, unique\n\nfrom mmdet3d.core.points import (BasePoints, CameraPoints, DepthPoints,\n                                 LiDARPoints)\nfrom .base_box3d import BaseInstance3DBoxes\nfrom .cam_box3d import CameraInstance3DBoxes\nfrom .depth_box3d import DepthInstance3DBoxes\nfrom .lidar_box3d import LiDARInstance3DBoxes\n\n\n@unique\nclass Coord3DMode(IntEnum):\n    r\"\"\"Enum of different ways to represent a box\n        and point cloud.\n\n    Coordinates in LiDAR:\n\n    .. code-block:: none\n\n                    up z\n                       ^   x front\n                       |  /\n                       | /\n        left y <------ 0\n\n    The relative coordinate of bottom center in a LiDAR box is (0.5, 0.5, 0),\n    and the yaw is around the z axis, thus the rotation axis=2.\n\n    Coordinates in camera:\n\n    .. code-block:: none\n\n                z front\n               /\n              /\n             0 ------> x right\n             |\n             |\n             v\n        down y\n\n    The relative coordinate of bottom center in a CAM box is [0.5, 1.0, 0.5],\n    and the yaw is around the y axis, thus the rotation axis=1.\n\n    Coordinates in Depth mode:\n\n    .. code-block:: none\n\n        up z\n           ^   y front\n           |  /\n           | /\n           0 ------> x right\n\n    The relative coordinate of bottom center in a DEPTH box is (0.5, 0.5, 0),\n    and the yaw is around the z axis, thus the rotation axis=2.\n    \"\"\"\n\n    LIDAR = 0\n    CAM = 1\n    DEPTH = 2\n\n    @staticmethod\n    def convert(input, src, dst, rt_mat=None):\n        \"\"\"Convert boxes or points from `src` mode to `dst` mode.\"\"\"\n        if isinstance(input, BaseInstance3DBoxes):\n            return Coord3DMode.convert_box(input, src, dst, rt_mat=rt_mat)\n        elif isinstance(input, BasePoints):\n            return Coord3DMode.convert_point(input, src, dst, rt_mat=rt_mat)\n        else:\n            raise NotImplementedError\n\n    @staticmethod\n    def convert_box(box, src, dst, rt_mat=None):\n        \"\"\"Convert boxes from `src` mode to `dst` mode.\n\n        Args:\n            box (tuple | list | np.dnarray |\n                torch.Tensor | BaseInstance3DBoxes):\n                Can be a k-tuple, k-list or an Nxk array/tensor, where k = 7.\n            src (:obj:`CoordMode`): The src Box mode.\n            dst (:obj:`CoordMode`): The target Box mode.\n            rt_mat (np.dnarray | torch.Tensor): The rotation and translation\n                matrix between different coordinates. Defaults to None.\n                The conversion from `src` coordinates to `dst` coordinates\n                usually comes along the change of sensors, e.g., from camera\n                to LiDAR. This requires a transformation matrix.\n\n        Returns:\n            (tuple | list | np.dnarray | torch.Tensor | BaseInstance3DBoxes): \\\n                The converted box of the same type.\n        \"\"\"\n        if src == dst:\n            return box\n\n        is_numpy = isinstance(box, np.ndarray)\n        is_Instance3DBoxes = isinstance(box, BaseInstance3DBoxes)\n        single_box = isinstance(box, (list, tuple))\n        if single_box:\n            assert len(box) >= 7, (\n                'CoordMode.convert takes either a k-tuple/list or '\n                'an Nxk array/tensor, where k >= 7')\n            arr = torch.tensor(box)[None, :]\n        else:\n            # avoid modifying the input box\n            if is_numpy:\n                arr = torch.from_numpy(np.asarray(box)).clone()\n            elif is_Instance3DBoxes:\n                arr = box.tensor.clone()\n            else:\n                arr = box.clone()\n\n        # convert box from `src` mode to `dst` mode.\n        x_size, y_size, z_size = arr[..., 3:4], arr[..., 4:5], arr[..., 5:6]\n        if src == Coord3DMode.LIDAR and dst == Coord3DMode.CAM:\n            if rt_mat is None:\n                rt_mat = arr.new_tensor([[0, -1, 0], [0, 0, -1], [1, 0, 0]])\n            xyz_size = torch.cat([y_size, z_size, x_size], dim=-1)\n        elif src == Coord3DMode.CAM and dst == Coord3DMode.LIDAR:\n            if rt_mat is None:\n                rt_mat = arr.new_tensor([[0, 0, 1], [-1, 0, 0], [0, -1, 0]])\n            xyz_size = torch.cat([z_size, x_size, y_size], dim=-1)\n        elif src == Coord3DMode.DEPTH and dst == Coord3DMode.CAM:\n            if rt_mat is None:\n                rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, 1], [0, -1, 0]])\n            xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)\n        elif src == Coord3DMode.CAM and dst == Coord3DMode.DEPTH:\n            if rt_mat is None:\n                rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]])\n            xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)\n        elif src == Coord3DMode.LIDAR and dst == Coord3DMode.DEPTH:\n            if rt_mat is None:\n                rt_mat = arr.new_tensor([[0, -1, 0], [1, 0, 0], [0, 0, 1]])\n            xyz_size = torch.cat([y_size, x_size, z_size], dim=-1)\n        elif src == Coord3DMode.DEPTH and dst == Coord3DMode.LIDAR:\n            if rt_mat is None:\n                rt_mat = arr.new_tensor([[0, 1, 0], [-1, 0, 0], [0, 0, 1]])\n            xyz_size = torch.cat([y_size, x_size, z_size], dim=-1)\n        else:\n            raise NotImplementedError(\n                f'Conversion from Coord3DMode {src} to {dst} '\n                'is not supported yet')\n\n        if not isinstance(rt_mat, torch.Tensor):\n            rt_mat = arr.new_tensor(rt_mat)\n        if rt_mat.size(1) == 4:\n            extended_xyz = torch.cat(\n                [arr[:, :3], arr.new_ones(arr.size(0), 1)], dim=-1)\n            xyz = extended_xyz @ rt_mat.t()\n        else:\n            xyz = arr[:, :3] @ rt_mat.t()\n\n        remains = arr[..., 6:]\n        arr = torch.cat([xyz[:, :3], xyz_size, remains], dim=-1)\n\n        # convert arr to the original type\n        original_type = type(box)\n        if single_box:\n            return original_type(arr.flatten().tolist())\n        if is_numpy:\n            return arr.numpy()\n        elif is_Instance3DBoxes:\n            if dst == Coord3DMode.CAM:\n                target_type = CameraInstance3DBoxes\n            elif dst == Coord3DMode.LIDAR:\n                target_type = LiDARInstance3DBoxes\n            elif dst == Coord3DMode.DEPTH:\n                target_type = DepthInstance3DBoxes\n            else:\n                raise NotImplementedError(\n                    f'Conversion to {dst} through {original_type}'\n                    ' is not supported yet')\n            return target_type(\n                arr, box_dim=arr.size(-1), with_yaw=box.with_yaw)\n        else:\n            return arr\n\n    @staticmethod\n    def convert_point(point, src, dst, rt_mat=None):\n        \"\"\"Convert points from `src` mode to `dst` mode.\n\n        Args:\n            point (tuple | list | np.dnarray |\n                torch.Tensor | BasePoints):\n                Can be a k-tuple, k-list or an Nxk array/tensor.\n            src (:obj:`CoordMode`): The src Point mode.\n            dst (:obj:`CoordMode`): The target Point mode.\n            rt_mat (np.dnarray | torch.Tensor): The rotation and translation\n                matrix between different coordinates. Defaults to None.\n                The conversion from `src` coordinates to `dst` coordinates\n                usually comes along the change of sensors, e.g., from camera\n                to LiDAR. This requires a transformation matrix.\n\n        Returns:\n            (tuple | list | np.dnarray | torch.Tensor | BasePoints): \\\n                The converted point of the same type.\n        \"\"\"\n        if src == dst:\n            return point\n\n        is_numpy = isinstance(point, np.ndarray)\n        is_InstancePoints = isinstance(point, BasePoints)\n        single_point = isinstance(point, (list, tuple))\n        if single_point:\n            assert len(point) >= 3, (\n                'CoordMode.convert takes either a k-tuple/list or '\n                'an Nxk array/tensor, where k >= 3')\n            arr = torch.tensor(point)[None, :]\n        else:\n            # avoid modifying the input point\n            if is_numpy:\n                arr = torch.from_numpy(np.asarray(point)).clone()\n            elif is_InstancePoints:\n                arr = point.tensor.clone()\n            else:\n                arr = point.clone()\n\n        # convert point from `src` mode to `dst` mode.\n        # TODO: LIDAR\n        # only implemented provided Rt matrix in cam-depth conversion\n        if src == Coord3DMode.LIDAR and dst == Coord3DMode.CAM:\n            rt_mat = arr.new_tensor([[0, -1, 0], [0, 0, -1], [1, 0, 0]])\n        elif src == Coord3DMode.CAM and dst == Coord3DMode.LIDAR:\n            rt_mat = arr.new_tensor([[0, 0, 1], [-1, 0, 0], [0, -1, 0]])\n        elif src == Coord3DMode.DEPTH and dst == Coord3DMode.CAM:\n            if rt_mat is None:\n                rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]])\n            else:\n                rt_mat = rt_mat.new_tensor(\n                    [[1, 0, 0], [0, 0, -1], [0, 1, 0]]) @ \\\n                    rt_mat.transpose(1, 0)\n        elif src == Coord3DMode.CAM and dst == Coord3DMode.DEPTH:\n            if rt_mat is None:\n                rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, 1], [0, -1, 0]])\n            else:\n                rt_mat = rt_mat @ rt_mat.new_tensor([[1, 0, 0], [0, 0, 1],\n                                                     [0, -1, 0]])\n        elif src == Coord3DMode.LIDAR and dst == Coord3DMode.DEPTH:\n            rt_mat = arr.new_tensor([[0, -1, 0], [1, 0, 0], [0, 0, 1]])\n        elif src == Coord3DMode.DEPTH and dst == Coord3DMode.LIDAR:\n            rt_mat = arr.new_tensor([[0, 1, 0], [-1, 0, 0], [0, 0, 1]])\n        else:\n            raise NotImplementedError(\n                f'Conversion from Coord3DMode {src} to {dst} '\n                'is not supported yet')\n\n        if rt_mat.size(1) == 4:\n            extended_xyz = torch.cat(\n                [arr[:, :3], arr.new_ones(arr.size(0), 1)], dim=-1)\n            xyz = extended_xyz @ rt_mat.t()\n        else:\n            xyz = arr[:, :3] @ rt_mat.t()\n\n        remains = arr[:, 3:]\n        arr = torch.cat([xyz[:, :3], remains], dim=-1)\n\n        # convert arr to the original type\n        original_type = type(point)\n        if single_point:\n            return original_type(arr.flatten().tolist())\n        if is_numpy:\n            return arr.numpy()\n        elif is_InstancePoints:\n            if dst == Coord3DMode.CAM:\n                target_type = CameraPoints\n            elif dst == Coord3DMode.LIDAR:\n                target_type = LiDARPoints\n            elif dst == Coord3DMode.DEPTH:\n                target_type = DepthPoints\n            else:\n                raise NotImplementedError(\n                    f'Conversion to {dst} through {original_type}'\n                    ' is not supported yet')\n            return target_type(\n                arr,\n                points_dim=arr.size(-1),\n                attribute_dims=point.attribute_dims)\n        else:\n            return arr\n"
  },
  {
    "path": "mmdet3d/core/bbox/structures/depth_box3d.py",
    "content": "import numpy as np\nimport torch\n\nfrom mmdet3d.core.points import BasePoints\nfrom mmdet3d.ops import points_in_boxes_batch\nfrom .base_box3d import BaseInstance3DBoxes\nfrom .utils import limit_period, rotation_3d_in_axis\n\n\nclass DepthInstance3DBoxes(BaseInstance3DBoxes):\n    \"\"\"3D boxes of instances in Depth coordinates.\n\n    Coordinates in Depth:\n\n    .. code-block:: none\n\n                    up z    y front (yaw=0.5*pi)\n                       ^   ^\n                       |  /\n                       | /\n                       0 ------> x right (yaw=0)\n\n    The relative coordinate of bottom center in a Depth box is (0.5, 0.5, 0),\n    and the yaw is around the z axis, thus the rotation axis=2.\n    The yaw is 0 at the positive direction of x axis, and increases from\n    the positive direction of x to the positive direction of y.\n\n    Attributes:\n        tensor (torch.Tensor): Float matrix of N x box_dim.\n        box_dim (int): Integer indicates the dimension of a box\n            Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).\n        with_yaw (bool): If True, the value of yaw will be set to 0 as minmax\n            boxes.\n    \"\"\"\n\n    @property\n    def gravity_center(self):\n        \"\"\"torch.Tensor: A tensor with center of each box.\"\"\"\n        bottom_center = self.bottom_center\n        gravity_center = torch.zeros_like(bottom_center)\n        gravity_center[:, :2] = bottom_center[:, :2]\n        gravity_center[:, 2] = bottom_center[:, 2] + self.tensor[:, 5] * 0.5\n        return gravity_center\n\n    @property\n    def corners(self):\n        \"\"\"torch.Tensor: Coordinates of corners of all the boxes\n        in shape (N, 8, 3).\n\n        Convert the boxes to corners in clockwise order, in form of\n        ``(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0)``\n\n        .. code-block:: none\n\n                                           up z\n                            front y           ^\n                                 /            |\n                                /             |\n                  (x0, y1, z1) + -----------  + (x1, y1, z1)\n                              /|            / |\n                             / |           /  |\n               (x0, y0, z1) + ----------- +   + (x1, y1, z0)\n                            |  /      .   |  /\n                            | / oriign    | /\n               (x0, y0, z0) + ----------- + --------> right x\n                                          (x1, y0, z0)\n        \"\"\"\n        # TODO: rotation_3d_in_axis function do not support\n        #  empty tensor currently.\n        assert len(self.tensor) != 0\n        dims = self.dims\n        corners_norm = torch.from_numpy(\n            np.stack(np.unravel_index(np.arange(8), [2] * 3), axis=1)).to(\n                device=dims.device, dtype=dims.dtype)\n\n        corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]\n        # use relative origin (0.5, 0.5, 0)\n        corners_norm = corners_norm - dims.new_tensor([0.5, 0.5, 0])\n        corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3])\n\n        # rotate around z axis\n        corners = rotation_3d_in_axis(corners, self.tensor[:, 6], axis=2)\n        corners += self.tensor[:, :3].view(-1, 1, 3)\n        return corners\n\n    @property\n    def bev(self):\n        \"\"\"torch.Tensor: A n x 5 tensor of 2D BEV box of each box\n        in XYWHR format.\"\"\"\n        return self.tensor[:, [0, 1, 3, 4, 6]]\n\n    @property\n    def nearest_bev(self):\n        \"\"\"torch.Tensor: A tensor of 2D BEV box of each box\n        without rotation.\"\"\"\n        # Obtain BEV boxes with rotation in XYWHR format\n        bev_rotated_boxes = self.bev\n        # convert the rotation to a valid range\n        rotations = bev_rotated_boxes[:, -1]\n        normed_rotations = torch.abs(limit_period(rotations, 0.5, np.pi))\n\n        # find the center of boxes\n        conditions = (normed_rotations > np.pi / 4)[..., None]\n        bboxes_xywh = torch.where(conditions, bev_rotated_boxes[:,\n                                                                [0, 1, 3, 2]],\n                                  bev_rotated_boxes[:, :4])\n\n        centers = bboxes_xywh[:, :2]\n        dims = bboxes_xywh[:, 2:]\n        bev_boxes = torch.cat([centers - dims / 2, centers + dims / 2], dim=-1)\n        return bev_boxes\n\n    def rotate(self, angle, points=None):\n        \"\"\"Rotate boxes with points (optional) with the given angle.\n\n        Args:\n            angle (float, torch.Tensor): Rotation angle.\n            points (torch.Tensor, numpy.ndarray, :obj:`BasePoints`, optional):\n                Points to rotate. Defaults to None.\n\n        Returns:\n            tuple or None: When ``points`` is None, the function returns \\\n                None, otherwise it returns the rotated points and the \\\n                rotation matrix ``rot_mat_T``.\n        \"\"\"\n        if not isinstance(angle, torch.Tensor):\n            angle = self.tensor.new_tensor(angle)\n        rot_sin = torch.sin(angle)\n        rot_cos = torch.cos(angle)\n        rot_mat_T = self.tensor.new_tensor([[rot_cos, -rot_sin, 0],\n                                            [rot_sin, rot_cos, 0], [0, 0,\n                                                                    1]]).T\n        self.tensor[:, 0:3] = self.tensor[:, 0:3] @ rot_mat_T\n        if self.with_yaw:\n            self.tensor[:, 6] -= angle\n        else:\n            corners_rot = self.corners @ rot_mat_T\n            new_x_size = corners_rot[..., 0].max(\n                dim=1, keepdim=True)[0] - corners_rot[..., 0].min(\n                    dim=1, keepdim=True)[0]\n            new_y_size = corners_rot[..., 1].max(\n                dim=1, keepdim=True)[0] - corners_rot[..., 1].min(\n                    dim=1, keepdim=True)[0]\n            self.tensor[:, 3:5] = torch.cat((new_x_size, new_y_size), dim=-1)\n\n        if points is not None:\n            if isinstance(points, torch.Tensor):\n                points[:, :3] = points[:, :3] @ rot_mat_T\n            elif isinstance(points, np.ndarray):\n                rot_mat_T = rot_mat_T.numpy()\n                points[:, :3] = np.dot(points[:, :3], rot_mat_T)\n            elif isinstance(points, BasePoints):\n                # anti-clockwise\n                points.rotate(angle)\n            else:\n                raise ValueError\n            return points, rot_mat_T\n\n    def flip(self, bev_direction='horizontal', points=None):\n        \"\"\"Flip the boxes in BEV along given BEV direction.\n\n        In Depth coordinates, it flips x (horizontal) or y (vertical) axis.\n\n        Args:\n            bev_direction (str): Flip direction (horizontal or vertical).\n            points (torch.Tensor, numpy.ndarray, :obj:`BasePoints`, None):\n                Points to flip. Defaults to None.\n\n        Returns:\n            torch.Tensor, numpy.ndarray or None: Flipped points.\n        \"\"\"\n        assert bev_direction in ('horizontal', 'vertical')\n        if bev_direction == 'horizontal':\n            self.tensor[:, 0::7] = -self.tensor[:, 0::7]\n            if self.with_yaw:\n                self.tensor[:, 6] = -self.tensor[:, 6] + np.pi\n        elif bev_direction == 'vertical':\n            self.tensor[:, 1::7] = -self.tensor[:, 1::7]\n            if self.with_yaw:\n                self.tensor[:, 6] = -self.tensor[:, 6]\n\n        if points is not None:\n            assert isinstance(points, (torch.Tensor, np.ndarray, BasePoints))\n            if isinstance(points, (torch.Tensor, np.ndarray)):\n                if bev_direction == 'horizontal':\n                    points[:, 0] = -points[:, 0]\n                elif bev_direction == 'vertical':\n                    points[:, 1] = -points[:, 1]\n            elif isinstance(points, BasePoints):\n                points.flip(bev_direction)\n            return points\n\n    def in_range_bev(self, box_range):\n        \"\"\"Check whether the boxes are in the given range.\n\n        Args:\n            box_range (list | torch.Tensor): The range of box\n                (x_min, y_min, x_max, y_max).\n\n        Note:\n            In the original implementation of SECOND, checking whether\n            a box in the range checks whether the points are in a convex\n            polygon, we try to reduce the burdun for simpler cases.\n\n        Returns:\n            torch.Tensor: Indicating whether each box is inside \\\n                the reference range.\n        \"\"\"\n        in_range_flags = ((self.tensor[:, 0] > box_range[0])\n                          & (self.tensor[:, 1] > box_range[1])\n                          & (self.tensor[:, 0] < box_range[2])\n                          & (self.tensor[:, 1] < box_range[3]))\n        return in_range_flags\n\n    def convert_to(self, dst, rt_mat=None):\n        \"\"\"Convert self to ``dst`` mode.\n\n        Args:\n            dst (:obj:`BoxMode`): The target Box mode.\n            rt_mat (np.ndarray | torch.Tensor): The rotation and translation\n                matrix between different coordinates. Defaults to None.\n                The conversion from ``src`` coordinates to ``dst`` coordinates\n                usually comes along the change of sensors, e.g., from camera\n                to LiDAR. This requires a transformation matrix.\n\n        Returns:\n            :obj:`DepthInstance3DBoxes`: \\\n                The converted box of the same type in the ``dst`` mode.\n        \"\"\"\n        from .box_3d_mode import Box3DMode\n        return Box3DMode.convert(\n            box=self, src=Box3DMode.DEPTH, dst=dst, rt_mat=rt_mat)\n\n    def points_in_boxes(self, points):\n        \"\"\"Find points that are in boxes (CUDA).\n\n        Args:\n            points (torch.Tensor): Points in shape [1, M, 3] or [M, 3], \\\n                3 dimensions are [x, y, z] in LiDAR coordinate.\n\n        Returns:\n            torch.Tensor: The index of boxes each point lies in with shape \\\n                of (B, M, T).\n        \"\"\"\n        from .box_3d_mode import Box3DMode\n\n        # to lidar\n        points_lidar = points.clone()\n        points_lidar = points_lidar[..., [1, 0, 2]]\n        points_lidar[..., 1] *= -1\n        if points.dim() == 2:\n            points_lidar = points_lidar.unsqueeze(0)\n        else:\n            assert points.dim() == 3 and points_lidar.shape[0] == 1\n\n        boxes_lidar = self.convert_to(Box3DMode.LIDAR).tensor\n        boxes_lidar = boxes_lidar.to(points.device).unsqueeze(0)\n        box_idxs_of_pts = points_in_boxes_batch(points_lidar, boxes_lidar)\n\n        return box_idxs_of_pts.squeeze(0)\n\n    def enlarged_box(self, extra_width):\n        \"\"\"Enlarge the length, width and height boxes.\n\n        Args:\n            extra_width (float | torch.Tensor): Extra width to enlarge the box.\n\n        Returns:\n            :obj:`LiDARInstance3DBoxes`: Enlarged boxes.\n        \"\"\"\n        enlarged_boxes = self.tensor.clone()\n        enlarged_boxes[:, 3:6] += extra_width * 2\n        # bottom center z minus extra_width\n        enlarged_boxes[:, 2] -= extra_width\n        return self.new_box(enlarged_boxes)\n\n    def get_surface_line_center(self):\n        \"\"\"Compute surface and line center of bounding boxes.\n\n        Returns:\n            torch.Tensor: Surface and line center of bounding boxes.\n        \"\"\"\n        obj_size = self.dims\n        center = self.gravity_center.view(-1, 1, 3)\n        batch_size = center.shape[0]\n\n        rot_sin = torch.sin(-self.yaw)\n        rot_cos = torch.cos(-self.yaw)\n        rot_mat_T = self.yaw.new_zeros(tuple(list(self.yaw.shape) + [3, 3]))\n        rot_mat_T[..., 0, 0] = rot_cos\n        rot_mat_T[..., 0, 1] = -rot_sin\n        rot_mat_T[..., 1, 0] = rot_sin\n        rot_mat_T[..., 1, 1] = rot_cos\n        rot_mat_T[..., 2, 2] = 1\n\n        # Get the object surface center\n        offset = obj_size.new_tensor([[0, 0, 1], [0, 0, -1], [0, 1, 0],\n                                      [0, -1, 0], [1, 0, 0], [-1, 0, 0]])\n        offset = offset.view(1, 6, 3) / 2\n        surface_3d = (offset *\n                      obj_size.view(batch_size, 1, 3).repeat(1, 6, 1)).reshape(\n                          -1, 3)\n\n        # Get the object line center\n        offset = obj_size.new_tensor([[1, 0, 1], [-1, 0, 1], [0, 1, 1],\n                                      [0, -1, 1], [1, 0, -1], [-1, 0, -1],\n                                      [0, 1, -1], [0, -1, -1], [1, 1, 0],\n                                      [1, -1, 0], [-1, 1, 0], [-1, -1, 0]])\n        offset = offset.view(1, 12, 3) / 2\n\n        line_3d = (offset *\n                   obj_size.view(batch_size, 1, 3).repeat(1, 12, 1)).reshape(\n                       -1, 3)\n\n        surface_rot = rot_mat_T.repeat(6, 1, 1)\n        surface_3d = torch.matmul(\n            surface_3d.unsqueeze(-2), surface_rot.transpose(2, 1)).squeeze(-2)\n        surface_center = center.repeat(1, 6, 1).reshape(-1, 3) + surface_3d\n\n        line_rot = rot_mat_T.repeat(12, 1, 1)\n        line_3d = torch.matmul(\n            line_3d.unsqueeze(-2), line_rot.transpose(2, 1)).squeeze(-2)\n        line_center = center.repeat(1, 12, 1).reshape(-1, 3) + line_3d\n\n        return surface_center, line_center\n"
  },
  {
    "path": "mmdet3d/core/bbox/structures/lidar_box3d.py",
    "content": "import numpy as np\nimport torch\n\nfrom mmdet3d.core.points import BasePoints\nfrom mmdet3d.ops.roiaware_pool3d import points_in_boxes_gpu\nfrom .base_box3d import BaseInstance3DBoxes\nfrom .utils import limit_period, rotation_3d_in_axis\n\n\nclass LiDARInstance3DBoxes(BaseInstance3DBoxes):\n    \"\"\"3D boxes of instances in LIDAR coordinates.\n\n    Coordinates in LiDAR:\n\n    .. code-block:: none\n\n                            up z    x front (yaw=0.5*pi)\n                               ^   ^\n                               |  /\n                               | /\n       (yaw=pi) left y <------ 0\n\n    The relative coordinate of bottom center in a LiDAR box is (0.5, 0.5, 0),\n    and the yaw is around the z axis, thus the rotation axis=2.\n    The yaw is 0 at the negative direction of y axis, and increases from\n    the negative direction of y to the positive direction of x.\n\n    Attributes:\n        tensor (torch.Tensor): Float matrix of N x box_dim.\n        box_dim (int): Integer indicating the dimension of a box.\n            Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).\n        with_yaw (bool): If True, the value of yaw will be set to 0 as minmax\n            boxes.\n    \"\"\"\n\n    @property\n    def gravity_center(self):\n        \"\"\"torch.Tensor: A tensor with center of each box.\"\"\"\n        bottom_center = self.bottom_center\n        gravity_center = torch.zeros_like(bottom_center)\n        gravity_center[:, :2] = bottom_center[:, :2]\n        gravity_center[:, 2] = bottom_center[:, 2] + self.tensor[:, 5] * 0.5\n        return gravity_center\n\n    @property\n    def corners(self):\n        \"\"\"torch.Tensor: Coordinates of corners of all the boxes\n        in shape (N, 8, 3).\n\n        Convert the boxes to corners in clockwise order, in form of\n        ``(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0)``\n\n        .. code-block:: none\n\n                                           up z\n                            front x           ^\n                                 /            |\n                                /             |\n                  (x1, y0, z1) + -----------  + (x1, y1, z1)\n                              /|            / |\n                             / |           /  |\n               (x0, y0, z1) + ----------- +   + (x1, y1, z0)\n                            |  /      .   |  /\n                            | / oriign    | /\n            left y<-------- + ----------- + (x0, y1, z0)\n                (x0, y0, z0)\n        \"\"\"\n        # TODO: rotation_3d_in_axis function do not support\n        #  empty tensor currently.\n        assert len(self.tensor) != 0\n        dims = self.dims\n        corners_norm = torch.from_numpy(\n            np.stack(np.unravel_index(np.arange(8), [2] * 3), axis=1)).to(\n                device=dims.device, dtype=dims.dtype)\n\n        corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]\n        # use relative origin [0.5, 0.5, 0]\n        corners_norm = corners_norm - dims.new_tensor([0.5, 0.5, 0])\n        corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3])\n\n        # rotate around z axis\n        corners = rotation_3d_in_axis(corners, self.tensor[:, 6], axis=2)\n        corners += self.tensor[:, :3].view(-1, 1, 3)\n        return corners\n\n    @property\n    def bev(self):\n        \"\"\"torch.Tensor: 2D BEV box of each box with rotation\n        in XYWHR format.\"\"\"\n        return self.tensor[:, [0, 1, 3, 4, 6]]\n\n    @property\n    def nearest_bev(self):\n        \"\"\"torch.Tensor: A tensor of 2D BEV box of each box\n        without rotation.\"\"\"\n        # Obtain BEV boxes with rotation in XYWHR format\n        bev_rotated_boxes = self.bev\n        # convert the rotation to a valid range\n        rotations = bev_rotated_boxes[:, -1]\n        normed_rotations = torch.abs(limit_period(rotations, 0.5, np.pi))\n\n        # find the center of boxes\n        conditions = (normed_rotations > np.pi / 4)[..., None]\n        bboxes_xywh = torch.where(conditions, bev_rotated_boxes[:,\n                                                                [0, 1, 3, 2]],\n                                  bev_rotated_boxes[:, :4])\n\n        centers = bboxes_xywh[:, :2]\n        dims = bboxes_xywh[:, 2:]\n        bev_boxes = torch.cat([centers - dims / 2, centers + dims / 2], dim=-1)\n        return bev_boxes\n\n    def rotate(self, angle, points=None):\n        \"\"\"Rotate boxes with points (optional) with the given angle.\n\n        Args:\n            angle (float | torch.Tensor): Rotation angle.\n            points (torch.Tensor, numpy.ndarray, :obj:`BasePoints`, optional):\n                Points to rotate. Defaults to None.\n\n        Returns:\n            tuple or None: When ``points`` is None, the function returns \\\n                None, otherwise it returns the rotated points and the \\\n                rotation matrix ``rot_mat_T``.\n        \"\"\"\n        if not isinstance(angle, torch.Tensor):\n            angle = self.tensor.new_tensor(angle)\n        rot_sin = torch.sin(angle)\n        rot_cos = torch.cos(angle)\n        rot_mat_T = self.tensor.new_tensor([[rot_cos, -rot_sin, 0],\n                                            [rot_sin, rot_cos, 0], [0, 0, 1]])\n\n        self.tensor[:, :3] = self.tensor[:, :3] @ rot_mat_T\n        self.tensor[:, 6] += angle\n\n        if self.tensor.shape[1] == 9:\n            # rotate velo vector\n            self.tensor[:, 7:9] = self.tensor[:, 7:9] @ rot_mat_T[:2, :2]\n\n        if points is not None:\n            if isinstance(points, torch.Tensor):\n                points[:, :3] = points[:, :3] @ rot_mat_T\n            elif isinstance(points, np.ndarray):\n                rot_mat_T = rot_mat_T.numpy()\n                points[:, :3] = np.dot(points[:, :3], rot_mat_T)\n            elif isinstance(points, BasePoints):\n                # clockwise\n                points.rotate(-angle)\n            else:\n                raise ValueError\n            return points, rot_mat_T\n\n    def flip(self, bev_direction='horizontal', points=None):\n        \"\"\"Flip the boxes in BEV along given BEV direction.\n\n        In LIDAR coordinates, it flips the y (horizontal) or x (vertical) axis.\n\n        Args:\n            bev_direction (str): Flip direction (horizontal or vertical).\n            points (torch.Tensor, numpy.ndarray, :obj:`BasePoints`, None):\n                Points to flip. Defaults to None.\n\n        Returns:\n            torch.Tensor, numpy.ndarray or None: Flipped points.\n        \"\"\"\n        assert bev_direction in ('horizontal', 'vertical')\n        if bev_direction == 'horizontal':\n            self.tensor[:, 1::7] = -self.tensor[:, 1::7]\n            if self.with_yaw:\n                self.tensor[:, 6] = -self.tensor[:, 6] + np.pi\n        elif bev_direction == 'vertical':\n            self.tensor[:, 0::7] = -self.tensor[:, 0::7]\n            if self.with_yaw:\n                self.tensor[:, 6] = -self.tensor[:, 6]\n\n        if points is not None:\n            assert isinstance(points, (torch.Tensor, np.ndarray, BasePoints))\n            if isinstance(points, (torch.Tensor, np.ndarray)):\n                if bev_direction == 'horizontal':\n                    points[:, 1] = -points[:, 1]\n                elif bev_direction == 'vertical':\n                    points[:, 0] = -points[:, 0]\n            elif isinstance(points, BasePoints):\n                points.flip(bev_direction)\n            return points\n\n    def in_range_bev(self, box_range):\n        \"\"\"Check whether the boxes are in the given range.\n\n        Args:\n            box_range (list | torch.Tensor): the range of box\n                (x_min, y_min, x_max, y_max)\n\n        Note:\n            The original implementation of SECOND checks whether boxes in\n            a range by checking whether the points are in a convex\n            polygon, we reduce the burden for simpler cases.\n\n        Returns:\n            torch.Tensor: Whether each box is inside the reference range.\n        \"\"\"\n        in_range_flags = ((self.tensor[:, 0] > box_range[0])\n                          & (self.tensor[:, 1] > box_range[1])\n                          & (self.tensor[:, 0] < box_range[2])\n                          & (self.tensor[:, 1] < box_range[3]))\n        return in_range_flags\n\n    def convert_to(self, dst, rt_mat=None):\n        \"\"\"Convert self to ``dst`` mode.\n\n        Args:\n            dst (:obj:`BoxMode`): the target Box mode\n            rt_mat (np.ndarray | torch.Tensor): The rotation and translation\n                matrix between different coordinates. Defaults to None.\n                The conversion from ``src`` coordinates to ``dst`` coordinates\n                usually comes along the change of sensors, e.g., from camera\n                to LiDAR. This requires a transformation matrix.\n\n        Returns:\n            :obj:`BaseInstance3DBoxes`: \\\n                The converted box of the same type in the ``dst`` mode.\n        \"\"\"\n        from .box_3d_mode import Box3DMode\n        return Box3DMode.convert(\n            box=self, src=Box3DMode.LIDAR, dst=dst, rt_mat=rt_mat)\n\n    def enlarged_box(self, extra_width):\n        \"\"\"Enlarge the length, width and height boxes.\n\n        Args:\n            extra_width (float | torch.Tensor): Extra width to enlarge the box.\n\n        Returns:\n            :obj:`LiDARInstance3DBoxes`: Enlarged boxes.\n        \"\"\"\n        enlarged_boxes = self.tensor.clone()\n        enlarged_boxes[:, 3:6] += extra_width * 2\n        # bottom center z minus extra_width\n        enlarged_boxes[:, 2] -= extra_width\n        return self.new_box(enlarged_boxes)\n\n    def points_in_boxes(self, points):\n        \"\"\"Find the box which the points are in.\n\n        Args:\n            points (torch.Tensor): Points in shape (N, 3).\n\n        Returns:\n            torch.Tensor: The index of box where each point are in.\n        \"\"\"\n        box_idx = points_in_boxes_gpu(\n            points.unsqueeze(0),\n            self.tensor.unsqueeze(0).to(points.device)).squeeze(0)\n        return box_idx\n"
  },
  {
    "path": "mmdet3d/core/bbox/structures/utils.py",
    "content": "import numpy as np\nimport torch\n\n\ndef limit_period(val, offset=0.5, period=np.pi):\n    \"\"\"Limit the value into a period for periodic function.\n\n    Args:\n        val (torch.Tensor): The value to be converted.\n        offset (float, optional): Offset to set the value range. \\\n            Defaults to 0.5.\n        period ([type], optional): Period of the value. Defaults to np.pi.\n\n    Returns:\n        torch.Tensor: Value in the range of \\\n            [-offset * period, (1-offset) * period]\n    \"\"\"\n    return val - torch.floor(val / period + offset) * period\n\n\ndef rotation_3d_in_axis(points, angles, axis=0):\n    \"\"\"Rotate points by angles according to axis.\n\n    Args:\n        points (torch.Tensor): Points of shape (N, M, 3).\n        angles (torch.Tensor): Vector of angles in shape (N,)\n        axis (int, optional): The axis to be rotated. Defaults to 0.\n\n    Raises:\n        ValueError: when the axis is not in range [0, 1, 2], it will \\\n            raise value error.\n\n    Returns:\n        torch.Tensor: Rotated points in shape (N, M, 3)\n    \"\"\"\n    rot_sin = torch.sin(angles)\n    rot_cos = torch.cos(angles)\n    ones = torch.ones_like(rot_cos)\n    zeros = torch.zeros_like(rot_cos)\n    if axis == 1:\n        rot_mat_T = torch.stack([\n            torch.stack([rot_cos, zeros, -rot_sin]),\n            torch.stack([zeros, ones, zeros]),\n            torch.stack([rot_sin, zeros, rot_cos])\n        ])\n    elif axis == 2 or axis == -1:\n        rot_mat_T = torch.stack([\n            torch.stack([rot_cos, -rot_sin, zeros]),\n            torch.stack([rot_sin, rot_cos, zeros]),\n            torch.stack([zeros, zeros, ones])\n        ])\n    elif axis == 0:\n        rot_mat_T = torch.stack([\n            torch.stack([zeros, rot_cos, -rot_sin]),\n            torch.stack([zeros, rot_sin, rot_cos]),\n            torch.stack([ones, zeros, zeros])\n        ])\n    else:\n        raise ValueError(f'axis should in range [0, 1, 2], got {axis}')\n\n    return torch.einsum('aij,jka->aik', (points, rot_mat_T))\n\n\ndef xywhr2xyxyr(boxes_xywhr):\n    \"\"\"Convert a rotated boxes in XYWHR format to XYXYR format.\n\n    Args:\n        boxes_xywhr (torch.Tensor): Rotated boxes in XYWHR format.\n\n    Returns:\n        torch.Tensor: Converted boxes in XYXYR format.\n    \"\"\"\n    boxes = torch.zeros_like(boxes_xywhr)\n    half_w = boxes_xywhr[:, 2] / 2\n    half_h = boxes_xywhr[:, 3] / 2\n\n    boxes[:, 0] = boxes_xywhr[:, 0] - half_w\n    boxes[:, 1] = boxes_xywhr[:, 1] - half_h\n    boxes[:, 2] = boxes_xywhr[:, 0] + half_w\n    boxes[:, 3] = boxes_xywhr[:, 1] + half_h\n    boxes[:, 4] = boxes_xywhr[:, 4]\n    return boxes\n\n\ndef get_box_type(box_type):\n    \"\"\"Get the type and mode of box structure.\n\n    Args:\n        box_type (str): The type of box structure.\n            The valid value are \"LiDAR\", \"Camera\", or \"Depth\".\n\n    Returns:\n        tuple: Box type and box mode.\n    \"\"\"\n    from .box_3d_mode import (Box3DMode, CameraInstance3DBoxes,\n                              DepthInstance3DBoxes, LiDARInstance3DBoxes)\n    box_type_lower = box_type.lower()\n    if box_type_lower == 'lidar':\n        box_type_3d = LiDARInstance3DBoxes\n        box_mode_3d = Box3DMode.LIDAR\n    elif box_type_lower == 'camera':\n        box_type_3d = CameraInstance3DBoxes\n        box_mode_3d = Box3DMode.CAM\n    elif box_type_lower == 'depth':\n        box_type_3d = DepthInstance3DBoxes\n        box_mode_3d = Box3DMode.DEPTH\n    else:\n        raise ValueError('Only \"box_type\" of \"camera\", \"lidar\", \"depth\"'\n                         f' are supported, got {box_type}')\n\n    return box_type_3d, box_mode_3d\n\n\ndef points_cam2img(points_3d, proj_mat):\n    \"\"\"Project points from camera coordicates to image coordinates.\n\n    Args:\n        points_3d (torch.Tensor): Points in shape (N, 3)\n        proj_mat (torch.Tensor): Transformation matrix between coordinates.\n\n    Returns:\n        torch.Tensor: Points in image coordinates with shape [N, 2].\n    \"\"\"\n    points_num = list(points_3d.shape)[:-1]\n\n    points_shape = np.concatenate([points_num, [1]], axis=0).tolist()\n    assert len(proj_mat.shape) == 2, f'The dimension of the projection'\\\n        f'matrix should be 2 instead of {len(proj_mat.shape)}.'\n    d1, d2 = proj_mat.shape[:2]\n    assert (d1 == 3 and d2 == 3) or (d1 == 3 and d2 == 4) or (\n        d1 == 4 and d2 == 4), f'The shape of the projection matrix'\\\n        f' ({d1}*{d2}) is not supported.'\n    if d1 == 3:\n        proj_mat_expanded = torch.eye(\n            4, device=proj_mat.device, dtype=proj_mat.dtype)\n        proj_mat_expanded[:d1, :d2] = proj_mat\n        proj_mat = proj_mat_expanded\n\n    # previous implementation use new_zeros, new_one yeilds better results\n    points_4 = torch.cat(\n        [points_3d, points_3d.new_ones(*points_shape)], dim=-1)\n    point_2d = torch.matmul(points_4, proj_mat.t())\n    point_2d_res = point_2d[..., :2] / point_2d[..., 2:3]\n    return point_2d_res\n"
  },
  {
    "path": "mmdet3d/core/bbox/transforms.py",
    "content": "import torch\n\n\ndef bbox3d_mapping_back(bboxes, scale_factor, flip_horizontal, flip_vertical):\n    \"\"\"Map bboxes from testing scale to original image scale.\n\n    Args:\n        bboxes (:obj:`BaseInstance3DBoxes`): Boxes to be mapped back.\n        scale_factor (float): Scale factor.\n        flip_horizontal (bool): Whether to flip horizontally.\n        flip_vertical (bool): Whether to flip vertically.\n\n    Returns:\n        :obj:`BaseInstance3DBoxes`: Boxes mapped back.\n    \"\"\"\n    new_bboxes = bboxes.clone()\n    if flip_horizontal:\n        new_bboxes.flip('horizontal')\n    if flip_vertical:\n        new_bboxes.flip('vertical')\n    new_bboxes.scale(1 / scale_factor)\n\n    return new_bboxes\n\n\ndef bbox3d2roi(bbox_list):\n    \"\"\"Convert a list of bounding boxes to roi format.\n\n    Args:\n        bbox_list (list[torch.Tensor]): A list of bounding boxes\n            corresponding to a batch of images.\n\n    Returns:\n        torch.Tensor: Region of interests in shape (n, c), where \\\n            the channels are in order of [batch_ind, x, y ...].\n    \"\"\"\n    rois_list = []\n    for img_id, bboxes in enumerate(bbox_list):\n        if bboxes.size(0) > 0:\n            img_inds = bboxes.new_full((bboxes.size(0), 1), img_id)\n            rois = torch.cat([img_inds, bboxes], dim=-1)\n        else:\n            rois = torch.zeros_like(bboxes)\n        rois_list.append(rois)\n    rois = torch.cat(rois_list, 0)\n    return rois\n\n\ndef bbox3d2result(bboxes, scores, labels):\n    \"\"\"Convert detection results to a list of numpy arrays.\n\n    Args:\n        bboxes (torch.Tensor): Bounding boxes with shape of (n, 5).\n        labels (torch.Tensor): Labels with shape of (n, ).\n        scores (torch.Tensor): Scores with shape of (n, ).\n\n    Returns:\n        dict[str, torch.Tensor]: Bounding box results in cpu mode.\n\n            - boxes_3d (torch.Tensor): 3D boxes.\n            - scores (torch.Tensor): Prediction scores.\n            - labels_3d (torch.Tensor): Box labels.\n    \"\"\"\n    return dict(\n        boxes_3d=bboxes.to('cpu'),\n        scores_3d=scores.cpu(),\n        labels_3d=labels.cpu())\n"
  },
  {
    "path": "mmdet3d/core/evaluation/__init__.py",
    "content": "from .indoor_eval import indoor_eval\nfrom .kitti_utils import kitti_eval, kitti_eval_coco_style\nfrom .lyft_eval import lyft_eval\nfrom .seg_eval import seg_eval\n\n__all__ = [\n    'kitti_eval_coco_style', 'kitti_eval', 'indoor_eval', 'lyft_eval',\n    'seg_eval'\n]\n"
  },
  {
    "path": "mmdet3d/core/evaluation/indoor_eval.py",
    "content": "import numpy as np\nimport torch\nfrom mmcv.utils import print_log\nfrom terminaltables import AsciiTable\n\n\ndef average_precision(recalls, precisions, mode='area'):\n    \"\"\"Calculate average precision (for single or multiple scales).\n\n    Args:\n        recalls (np.ndarray): Recalls with shape of (num_scales, num_dets) \\\n            or (num_dets, ).\n        precisions (np.ndarray): Precisions with shape of \\\n            (num_scales, num_dets) or (num_dets, ).\n        mode (str): 'area' or '11points', 'area' means calculating the area\n            under precision-recall curve, '11points' means calculating\n            the average precision of recalls at [0, 0.1, ..., 1]\n\n    Returns:\n        float or np.ndarray: Calculated average precision.\n    \"\"\"\n    if recalls.ndim == 1:\n        recalls = recalls[np.newaxis, :]\n        precisions = precisions[np.newaxis, :]\n\n    assert recalls.shape == precisions.shape\n    assert recalls.ndim == 2\n\n    num_scales = recalls.shape[0]\n    ap = np.zeros(num_scales, dtype=np.float32)\n    if mode == 'area':\n        zeros = np.zeros((num_scales, 1), dtype=recalls.dtype)\n        ones = np.ones((num_scales, 1), dtype=recalls.dtype)\n        mrec = np.hstack((zeros, recalls, ones))\n        mpre = np.hstack((zeros, precisions, zeros))\n        for i in range(mpre.shape[1] - 1, 0, -1):\n            mpre[:, i - 1] = np.maximum(mpre[:, i - 1], mpre[:, i])\n        for i in range(num_scales):\n            ind = np.where(mrec[i, 1:] != mrec[i, :-1])[0]\n            ap[i] = np.sum(\n                (mrec[i, ind + 1] - mrec[i, ind]) * mpre[i, ind + 1])\n    elif mode == '11points':\n        for i in range(num_scales):\n            for thr in np.arange(0, 1 + 1e-3, 0.1):\n                precs = precisions[i, recalls[i, :] >= thr]\n                prec = precs.max() if precs.size > 0 else 0\n                ap[i] += prec\n            ap /= 11\n    else:\n        raise ValueError(\n            'Unrecognized mode, only \"area\" and \"11points\" are supported')\n    return ap\n\n\ndef eval_det_cls(pred, gt, iou_thr=None):\n    \"\"\"Generic functions to compute precision/recall for object detection for a\n    single class.\n\n    Args:\n        pred (dict): Predictions mapping from image id to bounding boxes \\\n            and scores.\n        gt (dict): Ground truths mapping from image id to bounding boxes.\n        iou_thr (list[float]): A list of iou thresholds.\n\n    Return:\n        tuple (np.ndarray, np.ndarray, float): Recalls, precisions and \\\n            average precision.\n    \"\"\"\n\n    # {img_id: {'bbox': box structure, 'det': matched list}}\n    class_recs = {}\n    npos = 0\n    for img_id in gt.keys():\n        cur_gt_num = len(gt[img_id])\n        if cur_gt_num != 0:\n            gt_cur = torch.zeros([cur_gt_num, 7], dtype=torch.float32)\n            for i in range(cur_gt_num):\n                gt_cur[i] = gt[img_id][i].tensor\n            bbox = gt[img_id][0].new_box(gt_cur)\n        else:\n            bbox = gt[img_id]\n        det = [[False] * len(bbox) for i in iou_thr]\n        npos += len(bbox)\n        class_recs[img_id] = {'bbox': bbox, 'det': det}\n\n    # construct dets\n    image_ids = []\n    confidence = []\n    ious = []\n    for img_id in pred.keys():\n        cur_num = len(pred[img_id])\n        if cur_num == 0:\n            continue\n        pred_cur = torch.zeros((cur_num, 7), dtype=torch.float32)\n        box_idx = 0\n        for box, score in pred[img_id]:\n            image_ids.append(img_id)\n            confidence.append(score)\n            pred_cur[box_idx] = box.tensor\n            box_idx += 1\n        pred_cur = box.new_box(pred_cur)\n        gt_cur = class_recs[img_id]['bbox']\n        if len(gt_cur) > 0:\n            # calculate iou in each image\n            iou_cur = pred_cur.overlaps(pred_cur, gt_cur)\n            for i in range(cur_num):\n                ious.append(iou_cur[i])\n        else:\n            for i in range(cur_num):\n                ious.append(np.zeros(1))\n\n    confidence = np.array(confidence)\n\n    # sort by confidence\n    sorted_ind = np.argsort(-confidence)\n    image_ids = [image_ids[x] for x in sorted_ind]\n    ious = [ious[x] for x in sorted_ind]\n\n    # go down dets and mark TPs and FPs\n    nd = len(image_ids)\n    tp_thr = [np.zeros(nd) for i in iou_thr]\n    fp_thr = [np.zeros(nd) for i in iou_thr]\n    for d in range(nd):\n        R = class_recs[image_ids[d]]\n        iou_max = -np.inf\n        BBGT = R['bbox']\n        cur_iou = ious[d]\n\n        if len(BBGT) > 0:\n            # compute overlaps\n            for j in range(len(BBGT)):\n                # iou = get_iou_main(get_iou_func, (bb, BBGT[j,...]))\n                iou = cur_iou[j]\n                if iou > iou_max:\n                    iou_max = iou\n                    jmax = j\n\n        for iou_idx, thresh in enumerate(iou_thr):\n            if iou_max > thresh:\n                if not R['det'][iou_idx][jmax]:\n                    tp_thr[iou_idx][d] = 1.\n                    R['det'][iou_idx][jmax] = 1\n                else:\n                    fp_thr[iou_idx][d] = 1.\n            else:\n                fp_thr[iou_idx][d] = 1.\n\n    ret = []\n    for iou_idx, thresh in enumerate(iou_thr):\n        # compute precision recall\n        fp = np.cumsum(fp_thr[iou_idx])\n        tp = np.cumsum(tp_thr[iou_idx])\n        recall = tp / float(npos)\n        # avoid divide by zero in case the first detection matches a difficult\n        # ground truth\n        precision = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)\n        ap = average_precision(recall, precision)\n        ret.append((recall, precision, ap))\n\n    return ret\n\n\ndef eval_map_recall(pred, gt, ovthresh=None):\n    \"\"\"Evaluate mAP and recall.\n\n    Generic functions to compute precision/recall for object detection\n        for multiple classes.\n\n    Args:\n        pred (dict): Information of detection results,\n            which maps class_id and predictions.\n        gt (dict): Information of ground truths, which maps class_id and \\\n            ground truths.\n        ovthresh (list[float]): iou threshold.\n            Default: None.\n\n    Return:\n        tuple[dict]: dict results of recall, AP, and precision for all classes.\n    \"\"\"\n\n    ret_values = {}\n    for classname in gt.keys():\n        if classname in pred:\n            ret_values[classname] = eval_det_cls(pred[classname],\n                                                 gt[classname], ovthresh)\n    recall = [{} for i in ovthresh]\n    precision = [{} for i in ovthresh]\n    ap = [{} for i in ovthresh]\n\n    for label in gt.keys():\n        for iou_idx, thresh in enumerate(ovthresh):\n            if label in pred:\n                recall[iou_idx][label], precision[iou_idx][label], ap[iou_idx][\n                    label] = ret_values[label][iou_idx]\n            else:\n                recall[iou_idx][label] = np.zeros(1)\n                precision[iou_idx][label] = np.zeros(1)\n                ap[iou_idx][label] = np.zeros(1)\n\n    return recall, precision, ap\n\n\ndef indoor_eval(gt_annos,\n                dt_annos,\n                metric,\n                label2cat,\n                logger=None,\n                box_type_3d=None,\n                box_mode_3d=None):\n    \"\"\"Indoor Evaluation.\n\n    Evaluate the result of the detection.\n\n    Args:\n        gt_annos (list[dict]): Ground truth annotations.\n        dt_annos (list[dict]): Detection annotations. the dict\n            includes the following keys\n\n            - labels_3d (torch.Tensor): Labels of boxes.\n            - boxes_3d (:obj:`BaseInstance3DBoxes`): \\\n                3D bounding boxes in Depth coordinate.\n            - scores_3d (torch.Tensor): Scores of boxes.\n        metric (list[float]): IoU thresholds for computing average precisions.\n        label2cat (dict): Map from label to category.\n        logger (logging.Logger | str | None): The way to print the mAP\n            summary. See `mmdet.utils.print_log()` for details. Default: None.\n\n    Return:\n        dict[str, float]: Dict of results.\n    \"\"\"\n    assert len(dt_annos) == len(gt_annos)\n    pred = {}  # map {class_id: pred}\n    gt = {}  # map {class_id: gt}\n    for img_id in range(len(dt_annos)):\n        # parse detected annotations\n        det_anno = dt_annos[img_id]\n        for i in range(len(det_anno['labels_3d'])):\n            label = det_anno['labels_3d'].numpy()[i]\n            bbox = det_anno['boxes_3d'].convert_to(box_mode_3d)[i]\n            score = det_anno['scores_3d'].numpy()[i]\n            if label not in pred:\n                pred[int(label)] = {}\n            if img_id not in pred[label]:\n                pred[int(label)][img_id] = []\n            if label not in gt:\n                gt[int(label)] = {}\n            if img_id not in gt[label]:\n                gt[int(label)][img_id] = []\n            pred[int(label)][img_id].append((bbox, score))\n\n        # parse gt annotations\n        gt_anno = gt_annos[img_id]\n        if gt_anno['gt_num'] != 0:\n            gt_boxes = box_type_3d(\n                gt_anno['gt_boxes_upright_depth'],\n                box_dim=gt_anno['gt_boxes_upright_depth'].shape[-1],\n                origin=(0.5, 0.5, 0.5)).convert_to(box_mode_3d)\n            labels_3d = gt_anno['class']\n        else:\n            gt_boxes = box_type_3d(np.array([], dtype=np.float32))\n            labels_3d = np.array([], dtype=np.int64)\n\n        for i in range(len(labels_3d)):\n            label = labels_3d[i]\n            bbox = gt_boxes[i]\n            if label not in gt:\n                gt[label] = {}\n            if img_id not in gt[label]:\n                gt[label][img_id] = []\n            gt[label][img_id].append(bbox)\n\n    rec, prec, ap = eval_map_recall(pred, gt, metric)\n    ret_dict = dict()\n    header = ['classes']\n    table_columns = [[label2cat[label]\n                      for label in ap[0].keys()] + ['Overall']]\n\n    for i, iou_thresh in enumerate(metric):\n        header.append(f'AP_{iou_thresh:.2f}')\n        header.append(f'AR_{iou_thresh:.2f}')\n        rec_list = []\n        for label in ap[i].keys():\n            ret_dict[f'{label2cat[label]}_AP_{iou_thresh:.2f}'] = float(\n                ap[i][label][0])\n        ret_dict[f'mAP_{iou_thresh:.2f}'] = float(\n            np.mean(list(ap[i].values())))\n\n        table_columns.append(list(map(float, list(ap[i].values()))))\n        table_columns[-1] += [ret_dict[f'mAP_{iou_thresh:.2f}']]\n        table_columns[-1] = [f'{x:.4f}' for x in table_columns[-1]]\n\n        for label in rec[i].keys():\n            ret_dict[f'{label2cat[label]}_rec_{iou_thresh:.2f}'] = float(\n                rec[i][label][-1])\n            rec_list.append(rec[i][label][-1])\n        ret_dict[f'mAR_{iou_thresh:.2f}'] = float(np.mean(rec_list))\n\n        table_columns.append(list(map(float, rec_list)))\n        table_columns[-1] += [ret_dict[f'mAR_{iou_thresh:.2f}']]\n        table_columns[-1] = [f'{x:.4f}' for x in table_columns[-1]]\n\n    table_data = [header]\n    table_rows = list(zip(*table_columns))\n    table_data += table_rows\n    table = AsciiTable(table_data)\n    table.inner_footing_row_border = True\n    print_log('\\n' + table.table, logger=logger)\n\n    return ret_dict\n"
  },
  {
    "path": "mmdet3d/core/evaluation/kitti_utils/__init__.py",
    "content": "from .eval import kitti_eval, kitti_eval_coco_style\n\n__all__ = ['kitti_eval', 'kitti_eval_coco_style']\n"
  },
  {
    "path": "mmdet3d/core/evaluation/kitti_utils/eval.py",
    "content": "import gc\nimport io as sysio\nimport numba\nimport numpy as np\n\n\n@numba.jit\ndef get_thresholds(scores: np.ndarray, num_gt, num_sample_pts=41):\n    scores.sort()\n    scores = scores[::-1]\n    current_recall = 0\n    thresholds = []\n    for i, score in enumerate(scores):\n        l_recall = (i + 1) / num_gt\n        if i < (len(scores) - 1):\n            r_recall = (i + 2) / num_gt\n        else:\n            r_recall = l_recall\n        if (((r_recall - current_recall) < (current_recall - l_recall))\n                and (i < (len(scores) - 1))):\n            continue\n        # recall = l_recall\n        thresholds.append(score)\n        current_recall += 1 / (num_sample_pts - 1.0)\n    return thresholds\n\n\ndef clean_data(gt_anno, dt_anno, current_class, difficulty):\n    CLASS_NAMES = ['car', 'pedestrian', 'cyclist']\n    MIN_HEIGHT = [40, 25, 25]\n    MAX_OCCLUSION = [0, 1, 2]\n    MAX_TRUNCATION = [0.15, 0.3, 0.5]\n    dc_bboxes, ignored_gt, ignored_dt = [], [], []\n    current_cls_name = CLASS_NAMES[current_class].lower()\n    num_gt = len(gt_anno['name'])\n    num_dt = len(dt_anno['name'])\n    num_valid_gt = 0\n    for i in range(num_gt):\n        bbox = gt_anno['bbox'][i]\n        gt_name = gt_anno['name'][i].lower()\n        height = bbox[3] - bbox[1]\n        valid_class = -1\n        if (gt_name == current_cls_name):\n            valid_class = 1\n        elif (current_cls_name == 'Pedestrian'.lower()\n              and 'Person_sitting'.lower() == gt_name):\n            valid_class = 0\n        elif (current_cls_name == 'Car'.lower() and 'Van'.lower() == gt_name):\n            valid_class = 0\n        else:\n            valid_class = -1\n        ignore = False\n        if ((gt_anno['occluded'][i] > MAX_OCCLUSION[difficulty])\n                or (gt_anno['truncated'][i] > MAX_TRUNCATION[difficulty])\n                or (height <= MIN_HEIGHT[difficulty])):\n            ignore = True\n        if valid_class == 1 and not ignore:\n            ignored_gt.append(0)\n            num_valid_gt += 1\n        elif (valid_class == 0 or (ignore and (valid_class == 1))):\n            ignored_gt.append(1)\n        else:\n            ignored_gt.append(-1)\n    # for i in range(num_gt):\n        if gt_anno['name'][i] == 'DontCare':\n            dc_bboxes.append(gt_anno['bbox'][i])\n    for i in range(num_dt):\n        if (dt_anno['name'][i].lower() == current_cls_name):\n            valid_class = 1\n        else:\n            valid_class = -1\n        height = abs(dt_anno['bbox'][i, 3] - dt_anno['bbox'][i, 1])\n        if height < MIN_HEIGHT[difficulty]:\n            ignored_dt.append(1)\n        elif valid_class == 1:\n            ignored_dt.append(0)\n        else:\n            ignored_dt.append(-1)\n\n    return num_valid_gt, ignored_gt, ignored_dt, dc_bboxes\n\n\n@numba.jit(nopython=True)\ndef image_box_overlap(boxes, query_boxes, criterion=-1):\n    N = boxes.shape[0]\n    K = query_boxes.shape[0]\n    overlaps = np.zeros((N, K), dtype=boxes.dtype)\n    for k in range(K):\n        qbox_area = ((query_boxes[k, 2] - query_boxes[k, 0]) *\n                     (query_boxes[k, 3] - query_boxes[k, 1]))\n        for n in range(N):\n            iw = (\n                min(boxes[n, 2], query_boxes[k, 2]) -\n                max(boxes[n, 0], query_boxes[k, 0]))\n            if iw > 0:\n                ih = (\n                    min(boxes[n, 3], query_boxes[k, 3]) -\n                    max(boxes[n, 1], query_boxes[k, 1]))\n                if ih > 0:\n                    if criterion == -1:\n                        ua = ((boxes[n, 2] - boxes[n, 0]) *\n                              (boxes[n, 3] - boxes[n, 1]) + qbox_area -\n                              iw * ih)\n                    elif criterion == 0:\n                        ua = ((boxes[n, 2] - boxes[n, 0]) *\n                              (boxes[n, 3] - boxes[n, 1]))\n                    elif criterion == 1:\n                        ua = qbox_area\n                    else:\n                        ua = 1.0\n                    overlaps[n, k] = iw * ih / ua\n    return overlaps\n\n\ndef bev_box_overlap(boxes, qboxes, criterion=-1):\n    from .rotate_iou import rotate_iou_gpu_eval\n    riou = rotate_iou_gpu_eval(boxes, qboxes, criterion)\n    return riou\n\n\n@numba.jit(nopython=True, parallel=True)\ndef d3_box_overlap_kernel(boxes, qboxes, rinc, criterion=-1):\n    # ONLY support overlap in CAMERA, not lidar.\n    # TODO: change to use prange for parallel mode, should check the difference\n    N, K = boxes.shape[0], qboxes.shape[0]\n    for i in numba.prange(N):\n        for j in numba.prange(K):\n            if rinc[i, j] > 0:\n                # iw = (min(boxes[i, 1] + boxes[i, 4], qboxes[j, 1] +\n                #         qboxes[j, 4]) - max(boxes[i, 1], qboxes[j, 1]))\n                iw = (\n                    min(boxes[i, 1], qboxes[j, 1]) -\n                    max(boxes[i, 1] - boxes[i, 4],\n                        qboxes[j, 1] - qboxes[j, 4]))\n\n                if iw > 0:\n                    area1 = boxes[i, 3] * boxes[i, 4] * boxes[i, 5]\n                    area2 = qboxes[j, 3] * qboxes[j, 4] * qboxes[j, 5]\n                    inc = iw * rinc[i, j]\n                    if criterion == -1:\n                        ua = (area1 + area2 - inc)\n                    elif criterion == 0:\n                        ua = area1\n                    elif criterion == 1:\n                        ua = area2\n                    else:\n                        ua = inc\n                    rinc[i, j] = inc / ua\n                else:\n                    rinc[i, j] = 0.0\n\n\ndef d3_box_overlap(boxes, qboxes, criterion=-1):\n    from .rotate_iou import rotate_iou_gpu_eval\n    rinc = rotate_iou_gpu_eval(boxes[:, [0, 2, 3, 5, 6]],\n                               qboxes[:, [0, 2, 3, 5, 6]], 2)\n    d3_box_overlap_kernel(boxes, qboxes, rinc, criterion)\n    return rinc\n\n\n@numba.jit(nopython=True)\ndef compute_statistics_jit(overlaps,\n                           gt_datas,\n                           dt_datas,\n                           ignored_gt,\n                           ignored_det,\n                           dc_bboxes,\n                           metric,\n                           min_overlap,\n                           thresh=0,\n                           compute_fp=False,\n                           compute_aos=False):\n\n    det_size = dt_datas.shape[0]\n    gt_size = gt_datas.shape[0]\n    dt_scores = dt_datas[:, -1]\n    dt_alphas = dt_datas[:, 4]\n    gt_alphas = gt_datas[:, 4]\n    dt_bboxes = dt_datas[:, :4]\n    # gt_bboxes = gt_datas[:, :4]\n\n    assigned_detection = [False] * det_size\n    ignored_threshold = [False] * det_size\n    if compute_fp:\n        for i in range(det_size):\n            if (dt_scores[i] < thresh):\n                ignored_threshold[i] = True\n    NO_DETECTION = -10000000\n    tp, fp, fn, similarity = 0, 0, 0, 0\n    # thresholds = [0.0]\n    # delta = [0.0]\n    thresholds = np.zeros((gt_size, ))\n    thresh_idx = 0\n    delta = np.zeros((gt_size, ))\n    delta_idx = 0\n    for i in range(gt_size):\n        if ignored_gt[i] == -1:\n            continue\n        det_idx = -1\n        valid_detection = NO_DETECTION\n        max_overlap = 0\n        assigned_ignored_det = False\n\n        for j in range(det_size):\n            if (ignored_det[j] == -1):\n                continue\n            if (assigned_detection[j]):\n                continue\n            if (ignored_threshold[j]):\n                continue\n            overlap = overlaps[j, i]\n            dt_score = dt_scores[j]\n            if (not compute_fp and (overlap > min_overlap)\n                    and dt_score > valid_detection):\n                det_idx = j\n                valid_detection = dt_score\n            elif (compute_fp and (overlap > min_overlap)\n                  and (overlap > max_overlap or assigned_ignored_det)\n                  and ignored_det[j] == 0):\n                max_overlap = overlap\n                det_idx = j\n                valid_detection = 1\n                assigned_ignored_det = False\n            elif (compute_fp and (overlap > min_overlap)\n                  and (valid_detection == NO_DETECTION)\n                  and ignored_det[j] == 1):\n                det_idx = j\n                valid_detection = 1\n                assigned_ignored_det = True\n\n        if (valid_detection == NO_DETECTION) and ignored_gt[i] == 0:\n            fn += 1\n        elif ((valid_detection != NO_DETECTION)\n              and (ignored_gt[i] == 1 or ignored_det[det_idx] == 1)):\n            assigned_detection[det_idx] = True\n        elif valid_detection != NO_DETECTION:\n            tp += 1\n            # thresholds.append(dt_scores[det_idx])\n            thresholds[thresh_idx] = dt_scores[det_idx]\n            thresh_idx += 1\n            if compute_aos:\n                # delta.append(gt_alphas[i] - dt_alphas[det_idx])\n                delta[delta_idx] = gt_alphas[i] - dt_alphas[det_idx]\n                delta_idx += 1\n\n            assigned_detection[det_idx] = True\n    if compute_fp:\n        for i in range(det_size):\n            if (not (assigned_detection[i] or ignored_det[i] == -1\n                     or ignored_det[i] == 1 or ignored_threshold[i])):\n                fp += 1\n        nstuff = 0\n        if metric == 0:\n            overlaps_dt_dc = image_box_overlap(dt_bboxes, dc_bboxes, 0)\n            for i in range(dc_bboxes.shape[0]):\n                for j in range(det_size):\n                    if (assigned_detection[j]):\n                        continue\n                    if (ignored_det[j] == -1 or ignored_det[j] == 1):\n                        continue\n                    if (ignored_threshold[j]):\n                        continue\n                    if overlaps_dt_dc[j, i] > min_overlap:\n                        assigned_detection[j] = True\n                        nstuff += 1\n        fp -= nstuff\n        if compute_aos:\n            tmp = np.zeros((fp + delta_idx, ))\n            # tmp = [0] * fp\n            for i in range(delta_idx):\n                tmp[i + fp] = (1.0 + np.cos(delta[i])) / 2.0\n                # tmp.append((1.0 + np.cos(delta[i])) / 2.0)\n            # assert len(tmp) == fp + tp\n            # assert len(delta) == tp\n            if tp > 0 or fp > 0:\n                similarity = np.sum(tmp)\n            else:\n                similarity = -1\n    return tp, fp, fn, similarity, thresholds[:thresh_idx]\n\n\ndef get_split_parts(num, num_part):\n    same_part = num // num_part\n    remain_num = num % num_part\n    if remain_num == 0:\n        return [same_part] * num_part\n    else:\n        return [same_part] * num_part + [remain_num]\n\n\n@numba.jit(nopython=True)\ndef fused_compute_statistics(overlaps,\n                             pr,\n                             gt_nums,\n                             dt_nums,\n                             dc_nums,\n                             gt_datas,\n                             dt_datas,\n                             dontcares,\n                             ignored_gts,\n                             ignored_dets,\n                             metric,\n                             min_overlap,\n                             thresholds,\n                             compute_aos=False):\n    gt_num = 0\n    dt_num = 0\n    dc_num = 0\n    for i in range(gt_nums.shape[0]):\n        for t, thresh in enumerate(thresholds):\n            overlap = overlaps[dt_num:dt_num + dt_nums[i],\n                               gt_num:gt_num + gt_nums[i]]\n\n            gt_data = gt_datas[gt_num:gt_num + gt_nums[i]]\n            dt_data = dt_datas[dt_num:dt_num + dt_nums[i]]\n            ignored_gt = ignored_gts[gt_num:gt_num + gt_nums[i]]\n            ignored_det = ignored_dets[dt_num:dt_num + dt_nums[i]]\n            dontcare = dontcares[dc_num:dc_num + dc_nums[i]]\n            tp, fp, fn, similarity, _ = compute_statistics_jit(\n                overlap,\n                gt_data,\n                dt_data,\n                ignored_gt,\n                ignored_det,\n                dontcare,\n                metric,\n                min_overlap=min_overlap,\n                thresh=thresh,\n                compute_fp=True,\n                compute_aos=compute_aos)\n            pr[t, 0] += tp\n            pr[t, 1] += fp\n            pr[t, 2] += fn\n            if similarity != -1:\n                pr[t, 3] += similarity\n        gt_num += gt_nums[i]\n        dt_num += dt_nums[i]\n        dc_num += dc_nums[i]\n\n\ndef calculate_iou_partly(gt_annos, dt_annos, metric, num_parts=50):\n    \"\"\"Fast iou algorithm. this function can be used independently to do result\n    analysis. Must be used in CAMERA coordinate system.\n\n    Args:\n        gt_annos (dict): Must from get_label_annos() in kitti_common.py.\n        dt_annos (dict): Must from get_label_annos() in kitti_common.py.\n        metric (int): Eval type. 0: bbox, 1: bev, 2: 3d.\n        num_parts (int): A parameter for fast calculate algorithm.\n    \"\"\"\n    assert len(gt_annos) == len(dt_annos)\n    total_dt_num = np.stack([len(a['name']) for a in dt_annos], 0)\n    total_gt_num = np.stack([len(a['name']) for a in gt_annos], 0)\n    num_examples = len(gt_annos)\n    split_parts = get_split_parts(num_examples, num_parts)\n    parted_overlaps = []\n    example_idx = 0\n\n    for num_part in split_parts:\n        gt_annos_part = gt_annos[example_idx:example_idx + num_part]\n        dt_annos_part = dt_annos[example_idx:example_idx + num_part]\n        if metric == 0:\n            gt_boxes = np.concatenate([a['bbox'] for a in gt_annos_part], 0)\n            dt_boxes = np.concatenate([a['bbox'] for a in dt_annos_part], 0)\n            overlap_part = image_box_overlap(gt_boxes, dt_boxes)\n        elif metric == 1:\n            loc = np.concatenate(\n                [a['location'][:, [0, 2]] for a in gt_annos_part], 0)\n            dims = np.concatenate(\n                [a['dimensions'][:, [0, 2]] for a in gt_annos_part], 0)\n            rots = np.concatenate([a['rotation_y'] for a in gt_annos_part], 0)\n            gt_boxes = np.concatenate([loc, dims, rots[..., np.newaxis]],\n                                      axis=1)\n            loc = np.concatenate(\n                [a['location'][:, [0, 2]] for a in dt_annos_part], 0)\n            dims = np.concatenate(\n                [a['dimensions'][:, [0, 2]] for a in dt_annos_part], 0)\n            rots = np.concatenate([a['rotation_y'] for a in dt_annos_part], 0)\n            dt_boxes = np.concatenate([loc, dims, rots[..., np.newaxis]],\n                                      axis=1)\n            overlap_part = bev_box_overlap(gt_boxes,\n                                           dt_boxes).astype(np.float64)\n        elif metric == 2:\n            loc = np.concatenate([a['location'] for a in gt_annos_part], 0)\n            dims = np.concatenate([a['dimensions'] for a in gt_annos_part], 0)\n            rots = np.concatenate([a['rotation_y'] for a in gt_annos_part], 0)\n            gt_boxes = np.concatenate([loc, dims, rots[..., np.newaxis]],\n                                      axis=1)\n            loc = np.concatenate([a['location'] for a in dt_annos_part], 0)\n            dims = np.concatenate([a['dimensions'] for a in dt_annos_part], 0)\n            rots = np.concatenate([a['rotation_y'] for a in dt_annos_part], 0)\n            dt_boxes = np.concatenate([loc, dims, rots[..., np.newaxis]],\n                                      axis=1)\n            overlap_part = d3_box_overlap(gt_boxes,\n                                          dt_boxes).astype(np.float64)\n        else:\n            raise ValueError('unknown metric')\n        parted_overlaps.append(overlap_part)\n        example_idx += num_part\n    overlaps = []\n    example_idx = 0\n    for j, num_part in enumerate(split_parts):\n        gt_annos_part = gt_annos[example_idx:example_idx + num_part]\n        dt_annos_part = dt_annos[example_idx:example_idx + num_part]\n        gt_num_idx, dt_num_idx = 0, 0\n        for i in range(num_part):\n            gt_box_num = total_gt_num[example_idx + i]\n            dt_box_num = total_dt_num[example_idx + i]\n            overlaps.append(\n                parted_overlaps[j][gt_num_idx:gt_num_idx + gt_box_num,\n                                   dt_num_idx:dt_num_idx + dt_box_num])\n            gt_num_idx += gt_box_num\n            dt_num_idx += dt_box_num\n        example_idx += num_part\n\n    return overlaps, parted_overlaps, total_gt_num, total_dt_num\n\n\ndef _prepare_data(gt_annos, dt_annos, current_class, difficulty):\n    gt_datas_list = []\n    dt_datas_list = []\n    total_dc_num = []\n    ignored_gts, ignored_dets, dontcares = [], [], []\n    total_num_valid_gt = 0\n    for i in range(len(gt_annos)):\n        rets = clean_data(gt_annos[i], dt_annos[i], current_class, difficulty)\n        num_valid_gt, ignored_gt, ignored_det, dc_bboxes = rets\n        ignored_gts.append(np.array(ignored_gt, dtype=np.int64))\n        ignored_dets.append(np.array(ignored_det, dtype=np.int64))\n        if len(dc_bboxes) == 0:\n            dc_bboxes = np.zeros((0, 4)).astype(np.float64)\n        else:\n            dc_bboxes = np.stack(dc_bboxes, 0).astype(np.float64)\n        total_dc_num.append(dc_bboxes.shape[0])\n        dontcares.append(dc_bboxes)\n        total_num_valid_gt += num_valid_gt\n        gt_datas = np.concatenate(\n            [gt_annos[i]['bbox'], gt_annos[i]['alpha'][..., np.newaxis]], 1)\n        dt_datas = np.concatenate([\n            dt_annos[i]['bbox'], dt_annos[i]['alpha'][..., np.newaxis],\n            dt_annos[i]['score'][..., np.newaxis]\n        ], 1)\n        gt_datas_list.append(gt_datas)\n        dt_datas_list.append(dt_datas)\n    total_dc_num = np.stack(total_dc_num, axis=0)\n    return (gt_datas_list, dt_datas_list, ignored_gts, ignored_dets, dontcares,\n            total_dc_num, total_num_valid_gt)\n\n\ndef eval_class(gt_annos,\n               dt_annos,\n               current_classes,\n               difficultys,\n               metric,\n               min_overlaps,\n               compute_aos=False,\n               num_parts=200):\n    \"\"\"Kitti eval. support 2d/bev/3d/aos eval. support 0.5:0.05:0.95 coco AP.\n\n    Args:\n        gt_annos (dict): Must from get_label_annos() in kitti_common.py.\n        dt_annos (dict): Must from get_label_annos() in kitti_common.py.\n        current_classes (list[int]): 0: car, 1: pedestrian, 2: cyclist.\n        difficultys (list[int]): Eval difficulty, 0: easy, 1: normal, 2: hard\n        metric (int): Eval type. 0: bbox, 1: bev, 2: 3d\n        min_overlaps (float): Min overlap. format:\n            [num_overlap, metric, class].\n        num_parts (int): A parameter for fast calculate algorithm\n\n    Returns:\n        dict[str, np.ndarray]: recall, precision and aos\n    \"\"\"\n    assert len(gt_annos) == len(dt_annos)\n    num_examples = len(gt_annos)\n    if num_examples < num_parts:\n        num_parts = num_examples\n    split_parts = get_split_parts(num_examples, num_parts)\n\n    rets = calculate_iou_partly(dt_annos, gt_annos, metric, num_parts)\n    overlaps, parted_overlaps, total_dt_num, total_gt_num = rets\n    N_SAMPLE_PTS = 41\n    num_minoverlap = len(min_overlaps)\n    num_class = len(current_classes)\n    num_difficulty = len(difficultys)\n    precision = np.zeros(\n        [num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS])\n    recall = np.zeros(\n        [num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS])\n    aos = np.zeros([num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS])\n    for m, current_class in enumerate(current_classes):\n        for idx_l, difficulty in enumerate(difficultys):\n            rets = _prepare_data(gt_annos, dt_annos, current_class, difficulty)\n            (gt_datas_list, dt_datas_list, ignored_gts, ignored_dets,\n             dontcares, total_dc_num, total_num_valid_gt) = rets\n            for k, min_overlap in enumerate(min_overlaps[:, metric, m]):\n                thresholdss = []\n                for i in range(len(gt_annos)):\n                    rets = compute_statistics_jit(\n                        overlaps[i],\n                        gt_datas_list[i],\n                        dt_datas_list[i],\n                        ignored_gts[i],\n                        ignored_dets[i],\n                        dontcares[i],\n                        metric,\n                        min_overlap=min_overlap,\n                        thresh=0.0,\n                        compute_fp=False)\n                    tp, fp, fn, similarity, thresholds = rets\n                    thresholdss += thresholds.tolist()\n                thresholdss = np.array(thresholdss)\n                thresholds = get_thresholds(thresholdss, total_num_valid_gt)\n                thresholds = np.array(thresholds)\n                pr = np.zeros([len(thresholds), 4])\n                idx = 0\n                for j, num_part in enumerate(split_parts):\n                    gt_datas_part = np.concatenate(\n                        gt_datas_list[idx:idx + num_part], 0)\n                    dt_datas_part = np.concatenate(\n                        dt_datas_list[idx:idx + num_part], 0)\n                    dc_datas_part = np.concatenate(\n                        dontcares[idx:idx + num_part], 0)\n                    ignored_dets_part = np.concatenate(\n                        ignored_dets[idx:idx + num_part], 0)\n                    ignored_gts_part = np.concatenate(\n                        ignored_gts[idx:idx + num_part], 0)\n                    fused_compute_statistics(\n                        parted_overlaps[j],\n                        pr,\n                        total_gt_num[idx:idx + num_part],\n                        total_dt_num[idx:idx + num_part],\n                        total_dc_num[idx:idx + num_part],\n                        gt_datas_part,\n                        dt_datas_part,\n                        dc_datas_part,\n                        ignored_gts_part,\n                        ignored_dets_part,\n                        metric,\n                        min_overlap=min_overlap,\n                        thresholds=thresholds,\n                        compute_aos=compute_aos)\n                    idx += num_part\n                for i in range(len(thresholds)):\n                    recall[m, idx_l, k, i] = pr[i, 0] / (pr[i, 0] + pr[i, 2])\n                    precision[m, idx_l, k, i] = pr[i, 0] / (\n                        pr[i, 0] + pr[i, 1])\n                    if compute_aos:\n                        aos[m, idx_l, k, i] = pr[i, 3] / (pr[i, 0] + pr[i, 1])\n                for i in range(len(thresholds)):\n                    precision[m, idx_l, k, i] = np.max(\n                        precision[m, idx_l, k, i:], axis=-1)\n                    recall[m, idx_l, k, i] = np.max(\n                        recall[m, idx_l, k, i:], axis=-1)\n                    if compute_aos:\n                        aos[m, idx_l, k, i] = np.max(\n                            aos[m, idx_l, k, i:], axis=-1)\n    ret_dict = {\n        'recall': recall,\n        'precision': precision,\n        'orientation': aos,\n    }\n\n    # clean temp variables\n    del overlaps\n    del parted_overlaps\n\n    gc.collect()\n    return ret_dict\n\n\ndef get_mAP(prec):\n    sums = 0\n    for i in range(0, prec.shape[-1], 4):\n        sums = sums + prec[..., i]\n    return sums / 11 * 100\n\n\ndef print_str(value, *arg, sstream=None):\n    if sstream is None:\n        sstream = sysio.StringIO()\n    sstream.truncate(0)\n    sstream.seek(0)\n    print(value, *arg, file=sstream)\n    return sstream.getvalue()\n\n\ndef do_eval(gt_annos,\n            dt_annos,\n            current_classes,\n            min_overlaps,\n            eval_types=['bbox', 'bev', '3d']):\n    # min_overlaps: [num_minoverlap, metric, num_class]\n    difficultys = [0, 1, 2]\n    mAP_bbox = None\n    mAP_aos = None\n    if 'bbox' in eval_types:\n        ret = eval_class(\n            gt_annos,\n            dt_annos,\n            current_classes,\n            difficultys,\n            0,\n            min_overlaps,\n            compute_aos=('aos' in eval_types))\n        # ret: [num_class, num_diff, num_minoverlap, num_sample_points]\n        mAP_bbox = get_mAP(ret['precision'])\n        if 'aos' in eval_types:\n            mAP_aos = get_mAP(ret['orientation'])\n\n    mAP_bev = None\n    if 'bev' in eval_types:\n        ret = eval_class(gt_annos, dt_annos, current_classes, difficultys, 1,\n                         min_overlaps)\n        mAP_bev = get_mAP(ret['precision'])\n\n    mAP_3d = None\n    if '3d' in eval_types:\n        ret = eval_class(gt_annos, dt_annos, current_classes, difficultys, 2,\n                         min_overlaps)\n        mAP_3d = get_mAP(ret['precision'])\n    return mAP_bbox, mAP_bev, mAP_3d, mAP_aos\n\n\ndef do_coco_style_eval(gt_annos, dt_annos, current_classes, overlap_ranges,\n                       compute_aos):\n    # overlap_ranges: [range, metric, num_class]\n    min_overlaps = np.zeros([10, *overlap_ranges.shape[1:]])\n    for i in range(overlap_ranges.shape[1]):\n        for j in range(overlap_ranges.shape[2]):\n            min_overlaps[:, i, j] = np.linspace(*overlap_ranges[:, i, j])\n    mAP_bbox, mAP_bev, mAP_3d, mAP_aos = do_eval(gt_annos, dt_annos,\n                                                 current_classes, min_overlaps,\n                                                 compute_aos)\n    # ret: [num_class, num_diff, num_minoverlap]\n    mAP_bbox = mAP_bbox.mean(-1)\n    mAP_bev = mAP_bev.mean(-1)\n    mAP_3d = mAP_3d.mean(-1)\n    if mAP_aos is not None:\n        mAP_aos = mAP_aos.mean(-1)\n    return mAP_bbox, mAP_bev, mAP_3d, mAP_aos\n\n\ndef kitti_eval(gt_annos,\n               dt_annos,\n               current_classes,\n               eval_types=['bbox', 'bev', '3d']):\n    \"\"\"KITTI evaluation.\n\n    Args:\n        gt_annos (list[dict]): Contain gt information of each sample.\n        dt_annos (list[dict]): Contain detected information of each sample.\n        current_classes (list[str]): Classes to evaluation.\n        eval_types (list[str], optional): Types to eval.\n            Defaults to ['bbox', 'bev', '3d'].\n\n    Returns:\n        tuple: String and dict of evaluation results.\n    \"\"\"\n    assert len(eval_types) > 0, 'must contain at least one evaluation type'\n    if 'aos' in eval_types:\n        assert 'bbox' in eval_types, 'must evaluate bbox when evaluating aos'\n    overlap_0_7 = np.array([[0.7, 0.5, 0.5, 0.7,\n                             0.5], [0.7, 0.5, 0.5, 0.7, 0.5],\n                            [0.7, 0.5, 0.5, 0.7, 0.5]])\n    overlap_0_5 = np.array([[0.7, 0.5, 0.5, 0.7, 0.5],\n                            [0.5, 0.25, 0.25, 0.5, 0.25],\n                            [0.5, 0.25, 0.25, 0.5, 0.25]])\n    min_overlaps = np.stack([overlap_0_7, overlap_0_5], axis=0)  # [2, 3, 5]\n    class_to_name = {\n        0: 'Car',\n        1: 'Pedestrian',\n        2: 'Cyclist',\n        3: 'Van',\n        4: 'Person_sitting',\n    }\n    name_to_class = {v: n for n, v in class_to_name.items()}\n    if not isinstance(current_classes, (list, tuple)):\n        current_classes = [current_classes]\n    current_classes_int = []\n    for curcls in current_classes:\n        if isinstance(curcls, str):\n            current_classes_int.append(name_to_class[curcls])\n        else:\n            current_classes_int.append(curcls)\n    current_classes = current_classes_int\n    min_overlaps = min_overlaps[:, :, current_classes]\n    result = ''\n    # check whether alpha is valid\n    compute_aos = False\n    pred_alpha = False\n    valid_alpha_gt = False\n    for anno in dt_annos:\n        if anno['alpha'].shape[0] != 0:\n            pred_alpha = True\n            break\n    for anno in gt_annos:\n        if anno['alpha'][0] != -10:\n            valid_alpha_gt = True\n            break\n    compute_aos = (pred_alpha and valid_alpha_gt)\n    if compute_aos:\n        eval_types.append('aos')\n\n    mAPbbox, mAPbev, mAP3d, mAPaos = do_eval(gt_annos, dt_annos,\n                                             current_classes, min_overlaps,\n                                             eval_types)\n\n    ret_dict = {}\n    difficulty = ['easy', 'moderate', 'hard']\n    for j, curcls in enumerate(current_classes):\n        # mAP threshold array: [num_minoverlap, metric, class]\n        # mAP result: [num_class, num_diff, num_minoverlap]\n        curcls_name = class_to_name[curcls]\n        for i in range(min_overlaps.shape[0]):\n            # prepare results for print\n            result += ('{} AP@{:.2f}, {:.2f}, {:.2f}:\\n'.format(\n                curcls_name, *min_overlaps[i, :, j]))\n            if mAPbbox is not None:\n                result += 'bbox AP:{:.4f}, {:.4f}, {:.4f}\\n'.format(\n                    *mAPbbox[j, :, i])\n            if mAPbev is not None:\n                result += 'bev  AP:{:.4f}, {:.4f}, {:.4f}\\n'.format(\n                    *mAPbev[j, :, i])\n            if mAP3d is not None:\n                result += '3d   AP:{:.4f}, {:.4f}, {:.4f}\\n'.format(\n                    *mAP3d[j, :, i])\n\n            if compute_aos:\n                result += 'aos  AP:{:.2f}, {:.2f}, {:.2f}\\n'.format(\n                    *mAPaos[j, :, i])\n\n            # prepare results for logger\n            for idx in range(3):\n                if i == 0:\n                    postfix = f'{difficulty[idx]}_strict'\n                else:\n                    postfix = f'{difficulty[idx]}_loose'\n                prefix = f'KITTI/{curcls_name}'\n                if mAP3d is not None:\n                    ret_dict[f'{prefix}_3D_{postfix}'] = mAP3d[j, idx, i]\n                if mAPbev is not None:\n                    ret_dict[f'{prefix}_BEV_{postfix}'] = mAPbev[j, idx, i]\n                if mAPbbox is not None:\n                    ret_dict[f'{prefix}_2D_{postfix}'] = mAPbbox[j, idx, i]\n\n    # calculate mAP over all classes if there are multiple classes\n    if len(current_classes) > 1:\n        # prepare results for print\n        result += ('\\nOverall AP@{}, {}, {}:\\n'.format(*difficulty))\n        if mAPbbox is not None:\n            mAPbbox = mAPbbox.mean(axis=0)\n            result += 'bbox AP:{:.4f}, {:.4f}, {:.4f}\\n'.format(*mAPbbox[:, 0])\n        if mAPbev is not None:\n            mAPbev = mAPbev.mean(axis=0)\n            result += 'bev  AP:{:.4f}, {:.4f}, {:.4f}\\n'.format(*mAPbev[:, 0])\n        if mAP3d is not None:\n            mAP3d = mAP3d.mean(axis=0)\n            result += '3d   AP:{:.4f}, {:.4f}, {:.4f}\\n'.format(*mAP3d[:, 0])\n        if compute_aos:\n            mAPaos = mAPaos.mean(axis=0)\n            result += 'aos  AP:{:.2f}, {:.2f}, {:.2f}\\n'.format(*mAPaos[:, 0])\n\n        # prepare results for logger\n        for idx in range(3):\n            postfix = f'{difficulty[idx]}'\n            if mAP3d is not None:\n                ret_dict[f'KITTI/Overall_3D_{postfix}'] = mAP3d[idx, 0]\n            if mAPbev is not None:\n                ret_dict[f'KITTI/Overall_BEV_{postfix}'] = mAPbev[idx, 0]\n            if mAPbbox is not None:\n                ret_dict[f'KITTI/Overall_2D_{postfix}'] = mAPbbox[idx, 0]\n\n    return result, ret_dict\n\n\ndef kitti_eval_coco_style(gt_annos, dt_annos, current_classes):\n    \"\"\"coco style evaluation of kitti.\n\n    Args:\n        gt_annos (list[dict]): Contain gt information of each sample.\n        dt_annos (list[dict]): Contain detected information of each sample.\n        current_classes (list[str]): Classes to evaluation.\n\n    Returns:\n        string: Evaluation results.\n    \"\"\"\n    class_to_name = {\n        0: 'Car',\n        1: 'Pedestrian',\n        2: 'Cyclist',\n        3: 'Van',\n        4: 'Person_sitting',\n    }\n    class_to_range = {\n        0: [0.5, 0.95, 10],\n        1: [0.25, 0.7, 10],\n        2: [0.25, 0.7, 10],\n        3: [0.5, 0.95, 10],\n        4: [0.25, 0.7, 10],\n    }\n    name_to_class = {v: n for n, v in class_to_name.items()}\n    if not isinstance(current_classes, (list, tuple)):\n        current_classes = [current_classes]\n    current_classes_int = []\n    for curcls in current_classes:\n        if isinstance(curcls, str):\n            current_classes_int.append(name_to_class[curcls])\n        else:\n            current_classes_int.append(curcls)\n    current_classes = current_classes_int\n    overlap_ranges = np.zeros([3, 3, len(current_classes)])\n    for i, curcls in enumerate(current_classes):\n        overlap_ranges[:, :, i] = np.array(class_to_range[curcls])[:,\n                                                                   np.newaxis]\n    result = ''\n    # check whether alpha is valid\n    compute_aos = False\n    for anno in dt_annos:\n        if anno['alpha'].shape[0] != 0:\n            if anno['alpha'][0] != -10:\n                compute_aos = True\n            break\n    mAPbbox, mAPbev, mAP3d, mAPaos = do_coco_style_eval(\n        gt_annos, dt_annos, current_classes, overlap_ranges, compute_aos)\n    for j, curcls in enumerate(current_classes):\n        # mAP threshold array: [num_minoverlap, metric, class]\n        # mAP result: [num_class, num_diff, num_minoverlap]\n        o_range = np.array(class_to_range[curcls])[[0, 2, 1]]\n        o_range[1] = (o_range[2] - o_range[0]) / (o_range[1] - 1)\n        result += print_str((f'{class_to_name[curcls]} '\n                             'coco AP@{:.2f}:{:.2f}:{:.2f}:'.format(*o_range)))\n        result += print_str((f'bbox AP:{mAPbbox[j, 0]:.2f}, '\n                             f'{mAPbbox[j, 1]:.2f}, '\n                             f'{mAPbbox[j, 2]:.2f}'))\n        result += print_str((f'bev  AP:{mAPbev[j, 0]:.2f}, '\n                             f'{mAPbev[j, 1]:.2f}, '\n                             f'{mAPbev[j, 2]:.2f}'))\n        result += print_str((f'3d   AP:{mAP3d[j, 0]:.2f}, '\n                             f'{mAP3d[j, 1]:.2f}, '\n                             f'{mAP3d[j, 2]:.2f}'))\n        if compute_aos:\n            result += print_str((f'aos  AP:{mAPaos[j, 0]:.2f}, '\n                                 f'{mAPaos[j, 1]:.2f}, '\n                                 f'{mAPaos[j, 2]:.2f}'))\n    return result\n"
  },
  {
    "path": "mmdet3d/core/evaluation/kitti_utils/rotate_iou.py",
    "content": "#####################\n# Based on https://github.com/hongzhenwang/RRPN-revise\n# Licensed under The MIT License\n# Author: yanyan, scrin@foxmail.com\n#####################\nimport math\nimport numba\nimport numpy as np\nfrom numba import cuda\n\n\n@numba.jit(nopython=True)\ndef div_up(m, n):\n    return m // n + (m % n > 0)\n\n\n@cuda.jit('(float32[:], float32[:], float32[:])', device=True, inline=True)\ndef trangle_area(a, b, c):\n    return ((a[0] - c[0]) * (b[1] - c[1]) - (a[1] - c[1]) *\n            (b[0] - c[0])) / 2.0\n\n\n@cuda.jit('(float32[:], int32)', device=True, inline=True)\ndef area(int_pts, num_of_inter):\n    area_val = 0.0\n    for i in range(num_of_inter - 2):\n        area_val += abs(\n            trangle_area(int_pts[:2], int_pts[2 * i + 2:2 * i + 4],\n                         int_pts[2 * i + 4:2 * i + 6]))\n    return area_val\n\n\n@cuda.jit('(float32[:], int32)', device=True, inline=True)\ndef sort_vertex_in_convex_polygon(int_pts, num_of_inter):\n    if num_of_inter > 0:\n        center = cuda.local.array((2, ), dtype=numba.float32)\n        center[:] = 0.0\n        for i in range(num_of_inter):\n            center[0] += int_pts[2 * i]\n            center[1] += int_pts[2 * i + 1]\n        center[0] /= num_of_inter\n        center[1] /= num_of_inter\n        v = cuda.local.array((2, ), dtype=numba.float32)\n        vs = cuda.local.array((16, ), dtype=numba.float32)\n        for i in range(num_of_inter):\n            v[0] = int_pts[2 * i] - center[0]\n            v[1] = int_pts[2 * i + 1] - center[1]\n            d = math.sqrt(v[0] * v[0] + v[1] * v[1])\n            v[0] = v[0] / d\n            v[1] = v[1] / d\n            if v[1] < 0:\n                v[0] = -2 - v[0]\n            vs[i] = v[0]\n        j = 0\n        temp = 0\n        for i in range(1, num_of_inter):\n            if vs[i - 1] > vs[i]:\n                temp = vs[i]\n                tx = int_pts[2 * i]\n                ty = int_pts[2 * i + 1]\n                j = i\n                while j > 0 and vs[j - 1] > temp:\n                    vs[j] = vs[j - 1]\n                    int_pts[j * 2] = int_pts[j * 2 - 2]\n                    int_pts[j * 2 + 1] = int_pts[j * 2 - 1]\n                    j -= 1\n\n                vs[j] = temp\n                int_pts[j * 2] = tx\n                int_pts[j * 2 + 1] = ty\n\n\n@cuda.jit(\n    '(float32[:], float32[:], int32, int32, float32[:])',\n    device=True,\n    inline=True)\ndef line_segment_intersection(pts1, pts2, i, j, temp_pts):\n    A = cuda.local.array((2, ), dtype=numba.float32)\n    B = cuda.local.array((2, ), dtype=numba.float32)\n    C = cuda.local.array((2, ), dtype=numba.float32)\n    D = cuda.local.array((2, ), dtype=numba.float32)\n\n    A[0] = pts1[2 * i]\n    A[1] = pts1[2 * i + 1]\n\n    B[0] = pts1[2 * ((i + 1) % 4)]\n    B[1] = pts1[2 * ((i + 1) % 4) + 1]\n\n    C[0] = pts2[2 * j]\n    C[1] = pts2[2 * j + 1]\n\n    D[0] = pts2[2 * ((j + 1) % 4)]\n    D[1] = pts2[2 * ((j + 1) % 4) + 1]\n    BA0 = B[0] - A[0]\n    BA1 = B[1] - A[1]\n    DA0 = D[0] - A[0]\n    CA0 = C[0] - A[0]\n    DA1 = D[1] - A[1]\n    CA1 = C[1] - A[1]\n    acd = DA1 * CA0 > CA1 * DA0\n    bcd = (D[1] - B[1]) * (C[0] - B[0]) > (C[1] - B[1]) * (D[0] - B[0])\n    if acd != bcd:\n        abc = CA1 * BA0 > BA1 * CA0\n        abd = DA1 * BA0 > BA1 * DA0\n        if abc != abd:\n            DC0 = D[0] - C[0]\n            DC1 = D[1] - C[1]\n            ABBA = A[0] * B[1] - B[0] * A[1]\n            CDDC = C[0] * D[1] - D[0] * C[1]\n            DH = BA1 * DC0 - BA0 * DC1\n            Dx = ABBA * DC0 - BA0 * CDDC\n            Dy = ABBA * DC1 - BA1 * CDDC\n            temp_pts[0] = Dx / DH\n            temp_pts[1] = Dy / DH\n            return True\n    return False\n\n\n@cuda.jit(\n    '(float32[:], float32[:], int32, int32, float32[:])',\n    device=True,\n    inline=True)\ndef line_segment_intersection_v1(pts1, pts2, i, j, temp_pts):\n    a = cuda.local.array((2, ), dtype=numba.float32)\n    b = cuda.local.array((2, ), dtype=numba.float32)\n    c = cuda.local.array((2, ), dtype=numba.float32)\n    d = cuda.local.array((2, ), dtype=numba.float32)\n\n    a[0] = pts1[2 * i]\n    a[1] = pts1[2 * i + 1]\n\n    b[0] = pts1[2 * ((i + 1) % 4)]\n    b[1] = pts1[2 * ((i + 1) % 4) + 1]\n\n    c[0] = pts2[2 * j]\n    c[1] = pts2[2 * j + 1]\n\n    d[0] = pts2[2 * ((j + 1) % 4)]\n    d[1] = pts2[2 * ((j + 1) % 4) + 1]\n\n    area_abc = trangle_area(a, b, c)\n    area_abd = trangle_area(a, b, d)\n\n    if area_abc * area_abd >= 0:\n        return False\n\n    area_cda = trangle_area(c, d, a)\n    area_cdb = area_cda + area_abc - area_abd\n\n    if area_cda * area_cdb >= 0:\n        return False\n    t = area_cda / (area_abd - area_abc)\n\n    dx = t * (b[0] - a[0])\n    dy = t * (b[1] - a[1])\n    temp_pts[0] = a[0] + dx\n    temp_pts[1] = a[1] + dy\n    return True\n\n\n@cuda.jit('(float32, float32, float32[:])', device=True, inline=True)\ndef point_in_quadrilateral(pt_x, pt_y, corners):\n    ab0 = corners[2] - corners[0]\n    ab1 = corners[3] - corners[1]\n\n    ad0 = corners[6] - corners[0]\n    ad1 = corners[7] - corners[1]\n\n    ap0 = pt_x - corners[0]\n    ap1 = pt_y - corners[1]\n\n    abab = ab0 * ab0 + ab1 * ab1\n    abap = ab0 * ap0 + ab1 * ap1\n    adad = ad0 * ad0 + ad1 * ad1\n    adap = ad0 * ap0 + ad1 * ap1\n\n    return abab >= abap and abap >= 0 and adad >= adap and adap >= 0\n\n\n@cuda.jit('(float32[:], float32[:], float32[:])', device=True, inline=True)\ndef quadrilateral_intersection(pts1, pts2, int_pts):\n    num_of_inter = 0\n    for i in range(4):\n        if point_in_quadrilateral(pts1[2 * i], pts1[2 * i + 1], pts2):\n            int_pts[num_of_inter * 2] = pts1[2 * i]\n            int_pts[num_of_inter * 2 + 1] = pts1[2 * i + 1]\n            num_of_inter += 1\n        if point_in_quadrilateral(pts2[2 * i], pts2[2 * i + 1], pts1):\n            int_pts[num_of_inter * 2] = pts2[2 * i]\n            int_pts[num_of_inter * 2 + 1] = pts2[2 * i + 1]\n            num_of_inter += 1\n    temp_pts = cuda.local.array((2, ), dtype=numba.float32)\n    for i in range(4):\n        for j in range(4):\n            has_pts = line_segment_intersection(pts1, pts2, i, j, temp_pts)\n            if has_pts:\n                int_pts[num_of_inter * 2] = temp_pts[0]\n                int_pts[num_of_inter * 2 + 1] = temp_pts[1]\n                num_of_inter += 1\n\n    return num_of_inter\n\n\n@cuda.jit('(float32[:], float32[:])', device=True, inline=True)\ndef rbbox_to_corners(corners, rbbox):\n    # generate clockwise corners and rotate it clockwise\n    angle = rbbox[4]\n    a_cos = math.cos(angle)\n    a_sin = math.sin(angle)\n    center_x = rbbox[0]\n    center_y = rbbox[1]\n    x_d = rbbox[2]\n    y_d = rbbox[3]\n    corners_x = cuda.local.array((4, ), dtype=numba.float32)\n    corners_y = cuda.local.array((4, ), dtype=numba.float32)\n    corners_x[0] = -x_d / 2\n    corners_x[1] = -x_d / 2\n    corners_x[2] = x_d / 2\n    corners_x[3] = x_d / 2\n    corners_y[0] = -y_d / 2\n    corners_y[1] = y_d / 2\n    corners_y[2] = y_d / 2\n    corners_y[3] = -y_d / 2\n    for i in range(4):\n        corners[2 * i] = a_cos * corners_x[i] + a_sin * corners_y[i] + center_x\n        corners[2 * i +\n                1] = -a_sin * corners_x[i] + a_cos * corners_y[i] + center_y\n\n\n@cuda.jit('(float32[:], float32[:])', device=True, inline=True)\ndef inter(rbbox1, rbbox2):\n    \"\"\"Compute intersection of two rotated boxes.\n\n    Args:\n        rbox1 (np.ndarray, shape=[5]): Rotated 2d box.\n        rbox2 (np.ndarray, shape=[5]): Rotated 2d box.\n\n    Returns:\n        float: Intersection of two rotated boxes.\n    \"\"\"\n    corners1 = cuda.local.array((8, ), dtype=numba.float32)\n    corners2 = cuda.local.array((8, ), dtype=numba.float32)\n    intersection_corners = cuda.local.array((16, ), dtype=numba.float32)\n\n    rbbox_to_corners(corners1, rbbox1)\n    rbbox_to_corners(corners2, rbbox2)\n\n    num_intersection = quadrilateral_intersection(corners1, corners2,\n                                                  intersection_corners)\n    sort_vertex_in_convex_polygon(intersection_corners, num_intersection)\n    # print(intersection_corners.reshape([-1, 2])[:num_intersection])\n\n    return area(intersection_corners, num_intersection)\n\n\n@cuda.jit('(float32[:], float32[:], int32)', device=True, inline=True)\ndef devRotateIoUEval(rbox1, rbox2, criterion=-1):\n    \"\"\"Compute rotated iou on device.\n\n    Args:\n        rbox1 (np.ndarray, shape=[5]): Rotated 2d box.\n        rbox2 (np.ndarray, shape=[5]): Rotated 2d box.\n        criterion (int, optional): Indicate different type of iou.\n            -1 indicate `area_inter / (area1 + area2 - area_inter)`,\n            0 indicate `area_inter / area1`,\n            1 indicate `area_inter / area2`.\n\n    Returns:\n        float: iou between two input boxes.\n    \"\"\"\n    area1 = rbox1[2] * rbox1[3]\n    area2 = rbox2[2] * rbox2[3]\n    area_inter = inter(rbox1, rbox2)\n    if criterion == -1:\n        return area_inter / (area1 + area2 - area_inter)\n    elif criterion == 0:\n        return area_inter / area1\n    elif criterion == 1:\n        return area_inter / area2\n    else:\n        return area_inter\n\n\n@cuda.jit(\n    '(int64, int64, float32[:], float32[:], float32[:], int32)',\n    fastmath=False)\ndef rotate_iou_kernel_eval(N,\n                           K,\n                           dev_boxes,\n                           dev_query_boxes,\n                           dev_iou,\n                           criterion=-1):\n    \"\"\"Kernel of computing rotated iou.\n\n    Args:\n        N (int): The number of boxes.\n        K (int): The number of query boxes.\n        dev_boxes (np.ndarray): Boxes on device.\n        dev_query_boxes (np.ndarray): Query boxes on device.\n        dev_iou (np.ndarray): Computed iou to return.\n        criterion (int, optional): Indicate different type of iou.\n            -1 indicate `area_inter / (area1 + area2 - area_inter)`,\n            0 indicate `area_inter / area1`,\n            1 indicate `area_inter / area2`.\n    \"\"\"\n    threadsPerBlock = 8 * 8\n    row_start = cuda.blockIdx.x\n    col_start = cuda.blockIdx.y\n    tx = cuda.threadIdx.x\n    row_size = min(N - row_start * threadsPerBlock, threadsPerBlock)\n    col_size = min(K - col_start * threadsPerBlock, threadsPerBlock)\n    block_boxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32)\n    block_qboxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32)\n\n    dev_query_box_idx = threadsPerBlock * col_start + tx\n    dev_box_idx = threadsPerBlock * row_start + tx\n    if (tx < col_size):\n        block_qboxes[tx * 5 + 0] = dev_query_boxes[dev_query_box_idx * 5 + 0]\n        block_qboxes[tx * 5 + 1] = dev_query_boxes[dev_query_box_idx * 5 + 1]\n        block_qboxes[tx * 5 + 2] = dev_query_boxes[dev_query_box_idx * 5 + 2]\n        block_qboxes[tx * 5 + 3] = dev_query_boxes[dev_query_box_idx * 5 + 3]\n        block_qboxes[tx * 5 + 4] = dev_query_boxes[dev_query_box_idx * 5 + 4]\n    if (tx < row_size):\n        block_boxes[tx * 5 + 0] = dev_boxes[dev_box_idx * 5 + 0]\n        block_boxes[tx * 5 + 1] = dev_boxes[dev_box_idx * 5 + 1]\n        block_boxes[tx * 5 + 2] = dev_boxes[dev_box_idx * 5 + 2]\n        block_boxes[tx * 5 + 3] = dev_boxes[dev_box_idx * 5 + 3]\n        block_boxes[tx * 5 + 4] = dev_boxes[dev_box_idx * 5 + 4]\n    cuda.syncthreads()\n    if tx < row_size:\n        for i in range(col_size):\n            offset = (\n                row_start * threadsPerBlock * K + col_start * threadsPerBlock +\n                tx * K + i)\n            dev_iou[offset] = devRotateIoUEval(block_qboxes[i * 5:i * 5 + 5],\n                                               block_boxes[tx * 5:tx * 5 + 5],\n                                               criterion)\n\n\ndef rotate_iou_gpu_eval(boxes, query_boxes, criterion=-1, device_id=0):\n    \"\"\"Rotated box iou running in gpu. 500x faster than cpu version (take 5ms\n    in one example with numba.cuda code). convert from [this project](\n    https://github.com/hongzhenwang/RRPN-revise/tree/master/lib/rotation).\n\n    Args:\n        boxes (torch.Tensor): rbboxes. format: centers, dims,\n            angles(clockwise when positive) with the shape of [N, 5].\n        query_boxes (float tensor: [K, 5]): rbboxes to compute iou with boxes.\n        device_id (int, optional): Defaults to 0. Device to use.\n        criterion (int, optional): Indicate different type of iou.\n            -1 indicate `area_inter / (area1 + area2 - area_inter)`,\n            0 indicate `area_inter / area1`,\n            1 indicate `area_inter / area2`.\n\n    Returns:\n        np.ndarray: IoU results.\n    \"\"\"\n    boxes = boxes.astype(np.float32)\n    query_boxes = query_boxes.astype(np.float32)\n    N = boxes.shape[0]\n    K = query_boxes.shape[0]\n    iou = np.zeros((N, K), dtype=np.float32)\n    if N == 0 or K == 0:\n        return iou\n    threadsPerBlock = 8 * 8\n    cuda.select_device(device_id)\n    blockspergrid = (div_up(N, threadsPerBlock), div_up(K, threadsPerBlock))\n\n    stream = cuda.stream()\n    with stream.auto_synchronize():\n        boxes_dev = cuda.to_device(boxes.reshape([-1]), stream)\n        query_boxes_dev = cuda.to_device(query_boxes.reshape([-1]), stream)\n        iou_dev = cuda.to_device(iou.reshape([-1]), stream)\n        rotate_iou_kernel_eval[blockspergrid, threadsPerBlock,\n                               stream](N, K, boxes_dev, query_boxes_dev,\n                                       iou_dev, criterion)\n        iou_dev.copy_to_host(iou.reshape([-1]), stream=stream)\n    return iou.astype(boxes.dtype)\n"
  },
  {
    "path": "mmdet3d/core/evaluation/lyft_eval.py",
    "content": "import mmcv\nimport numpy as np\nfrom lyft_dataset_sdk.eval.detection.mAP_evaluation import (Box3D, get_ap,\n                                                            get_class_names,\n                                                            get_ious,\n                                                            group_by_key,\n                                                            wrap_in_box)\nfrom mmcv.utils import print_log\nfrom os import path as osp\nfrom terminaltables import AsciiTable\n\n\ndef load_lyft_gts(lyft, data_root, eval_split, logger=None):\n    \"\"\"Loads ground truth boxes from database.\n\n    Args:\n        lyft (:obj:`LyftDataset`): Lyft class in the sdk.\n        data_root (str): Root of data for reading splits.\n        eval_split (str): Name of the split for evaluation.\n        logger (logging.Logger | str | None): Logger used for printing\n        related information during evaluation. Default: None.\n\n    Returns:\n        list[dict]: List of annotation dictionaries.\n    \"\"\"\n    split_scenes = mmcv.list_from_file(\n        osp.join(data_root, f'{eval_split}.txt'))\n\n    # Read out all sample_tokens in DB.\n    sample_tokens_all = [s['token'] for s in lyft.sample]\n    assert len(sample_tokens_all) > 0, 'Error: Database has no samples!'\n\n    if eval_split == 'test':\n        # Check that you aren't trying to cheat :)\n        assert len(lyft.sample_annotation) > 0, \\\n            'Error: You are trying to evaluate on the test set \\\n             but you do not have the annotations!'\n\n    sample_tokens = []\n    for sample_token in sample_tokens_all:\n        scene_token = lyft.get('sample', sample_token)['scene_token']\n        scene_record = lyft.get('scene', scene_token)\n        if scene_record['name'] in split_scenes:\n            sample_tokens.append(sample_token)\n\n    all_annotations = []\n\n    print_log('Loading ground truth annotations...', logger=logger)\n    # Load annotations and filter predictions and annotations.\n    for sample_token in mmcv.track_iter_progress(sample_tokens):\n        sample = lyft.get('sample', sample_token)\n        sample_annotation_tokens = sample['anns']\n        for sample_annotation_token in sample_annotation_tokens:\n            # Get label name in detection task and filter unused labels.\n            sample_annotation = \\\n                lyft.get('sample_annotation', sample_annotation_token)\n            detection_name = sample_annotation['category_name']\n            if detection_name is None:\n                continue\n            annotation = {\n                'sample_token': sample_token,\n                'translation': sample_annotation['translation'],\n                'size': sample_annotation['size'],\n                'rotation': sample_annotation['rotation'],\n                'name': detection_name,\n            }\n            all_annotations.append(annotation)\n\n    return all_annotations\n\n\ndef load_lyft_predictions(res_path):\n    \"\"\"Load Lyft predictions from json file.\n\n    Args:\n        res_path (str): Path of result json file recording detections.\n\n    Returns:\n        list[dict]: List of prediction dictionaries.\n    \"\"\"\n    predictions = mmcv.load(res_path)\n    predictions = predictions['results']\n    all_preds = []\n    for sample_token in predictions.keys():\n        all_preds.extend(predictions[sample_token])\n    return all_preds\n\n\ndef lyft_eval(lyft, data_root, res_path, eval_set, output_dir, logger=None):\n    \"\"\"Evaluation API for Lyft dataset.\n\n    Args:\n        lyft (:obj:`LyftDataset`): Lyft class in the sdk.\n        data_root (str): Root of data for reading splits.\n        res_path (str): Path of result json file recording detections.\n        eval_set (str): Name of the split for evaluation.\n        output_dir (str): Output directory for output json files.\n        logger (logging.Logger | str | None): Logger used for printing\n                related information during evaluation. Default: None.\n\n    Returns:\n        dict[str, float]: The evaluation results.\n    \"\"\"\n    # evaluate by lyft metrics\n    gts = load_lyft_gts(lyft, data_root, eval_set, logger)\n    predictions = load_lyft_predictions(res_path)\n\n    class_names = get_class_names(gts)\n    print('Calculating mAP@0.5:0.95...')\n\n    iou_thresholds = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]\n    metrics = {}\n    average_precisions = \\\n        get_classwise_aps(gts, predictions, class_names, iou_thresholds)\n    APs_data = [['IOU', 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]]\n\n    mAPs = np.mean(average_precisions, axis=0)\n    mAPs_cate = np.mean(average_precisions, axis=1)\n    final_mAP = np.mean(mAPs)\n\n    metrics['average_precisions'] = average_precisions.tolist()\n    metrics['mAPs'] = mAPs.tolist()\n    metrics['Final mAP'] = float(final_mAP)\n    metrics['class_names'] = class_names\n    metrics['mAPs_cate'] = mAPs_cate.tolist()\n\n    APs_data = [['class', 'mAP@0.5:0.95']]\n    for i in range(len(class_names)):\n        row = [class_names[i], round(mAPs_cate[i], 3)]\n        APs_data.append(row)\n    APs_data.append(['Overall', round(final_mAP, 3)])\n    APs_table = AsciiTable(APs_data, title='mAPs@0.5:0.95')\n    APs_table.inner_footing_row_border = True\n    print_log(APs_table.table, logger=logger)\n\n    res_path = osp.join(output_dir, 'lyft_metrics.json')\n    mmcv.dump(metrics, res_path)\n    return metrics\n\n\ndef get_classwise_aps(gt, predictions, class_names, iou_thresholds):\n    \"\"\"Returns an array with an average precision per class.\n\n    Note: Ground truth and predictions should have the following format.\n\n    .. code-block::\n\n    gt = [{\n        'sample_token': '0f0e3ce89d2324d8b45aa55a7b4f8207\n                         fbb039a550991a5149214f98cec136ac',\n        'translation': [974.2811881299899, 1714.6815014457964,\n                        -23.689857123368846],\n        'size': [1.796, 4.488, 1.664],\n        'rotation': [0.14882026466054782, 0, 0, 0.9888642620837121],\n        'name': 'car'\n    }]\n\n    predictions = [{\n        'sample_token': '0f0e3ce89d2324d8b45aa55a7b4f8207\n                         fbb039a550991a5149214f98cec136ac',\n        'translation': [971.8343488872263, 1713.6816097857359,\n                        -25.82534357061308],\n        'size': [2.519726579986132, 7.810161372666739, 3.483438286096803],\n        'rotation': [0.10913582721095375, 0.04099572636992043,\n                     0.01927712319721745, 1.029328402625659],\n        'name': 'car',\n        'score': 0.3077029437237213\n    }]\n\n    Args:\n        gt (list[dict]): list of dictionaries in the format described below.\n        predictions (list[dict]): list of dictionaries in the format\n            described below.\n        class_names (list[str]): list of the class names.\n        iou_thresholds (list[float]): IOU thresholds used to calculate\n            TP / FN\n\n    Returns:\n        np.ndarray: an array with an average precision per class.\n    \"\"\"\n    assert all([0 <= iou_th <= 1 for iou_th in iou_thresholds])\n\n    gt_by_class_name = group_by_key(gt, 'name')\n    pred_by_class_name = group_by_key(predictions, 'name')\n\n    average_precisions = np.zeros((len(class_names), len(iou_thresholds)))\n\n    for class_id, class_name in enumerate(class_names):\n        if class_name in pred_by_class_name:\n            recalls, precisions, average_precision = get_single_class_aps(\n                gt_by_class_name[class_name], pred_by_class_name[class_name],\n                iou_thresholds)\n            average_precisions[class_id, :] = average_precision\n\n    return average_precisions\n\n\ndef get_single_class_aps(gt, predictions, iou_thresholds):\n    \"\"\"Compute recall and precision for all iou thresholds. Adapted from\n    LyftDatasetDevkit.\n\n    Args:\n        gt (list[dict]): list of dictionaries in the format described above.\n        predictions (list[dict]): list of dictionaries in the format \\\n            described below.\n        iou_thresholds (list[float]): IOU thresholds used to calculate \\\n            TP / FN\n\n    Returns:\n        tuple[np.ndarray]: Returns (recalls, precisions, average precisions)\n            for each class.\n    \"\"\"\n    num_gts = len(gt)\n    image_gts = group_by_key(gt, 'sample_token')\n    image_gts = wrap_in_box(image_gts)\n\n    sample_gt_checked = {\n        sample_token: np.zeros((len(boxes), len(iou_thresholds)))\n        for sample_token, boxes in image_gts.items()\n    }\n\n    predictions = sorted(predictions, key=lambda x: x['score'], reverse=True)\n\n    # go down dets and mark TPs and FPs\n    num_predictions = len(predictions)\n    tps = np.zeros((num_predictions, len(iou_thresholds)))\n    fps = np.zeros((num_predictions, len(iou_thresholds)))\n\n    for prediction_index, prediction in enumerate(predictions):\n        predicted_box = Box3D(**prediction)\n\n        sample_token = prediction['sample_token']\n\n        max_overlap = -np.inf\n        jmax = -1\n\n        if sample_token in image_gts:\n            gt_boxes = image_gts[sample_token]\n            # gt_boxes per sample\n            gt_checked = sample_gt_checked[sample_token]\n            # gt flags per sample\n        else:\n            gt_boxes = []\n            gt_checked = None\n\n        if len(gt_boxes) > 0:\n            overlaps = get_ious(gt_boxes, predicted_box)\n\n            max_overlap = np.max(overlaps)\n\n            jmax = np.argmax(overlaps)\n\n        for i, iou_threshold in enumerate(iou_thresholds):\n            if max_overlap > iou_threshold:\n                if gt_checked[jmax, i] == 0:\n                    tps[prediction_index, i] = 1.0\n                    gt_checked[jmax, i] = 1\n                else:\n                    fps[prediction_index, i] = 1.0\n            else:\n                fps[prediction_index, i] = 1.0\n\n    # compute precision recall\n    fps = np.cumsum(fps, axis=0)\n    tps = np.cumsum(tps, axis=0)\n\n    recalls = tps / float(num_gts)\n    # avoid divide by zero in case the first detection\n    # matches a difficult ground truth\n    precisions = tps / np.maximum(tps + fps, np.finfo(np.float64).eps)\n\n    aps = []\n    for i in range(len(iou_thresholds)):\n        recall = recalls[:, i]\n        precision = precisions[:, i]\n        assert np.all(0 <= recall) & np.all(recall <= 1)\n        assert np.all(0 <= precision) & np.all(precision <= 1)\n        ap = get_ap(recall, precision)\n        aps.append(ap)\n\n    aps = np.array(aps)\n\n    return recalls, precisions, aps\n"
  },
  {
    "path": "mmdet3d/core/evaluation/seg_eval.py",
    "content": "import numpy as np\nfrom mmcv.utils import print_log\nfrom terminaltables import AsciiTable\n\n\ndef fast_hist(preds, labels, num_classes):\n    \"\"\"Compute the confusion matrix for every batch.\n\n    Args:\n        preds (np.ndarray):  Prediction labels of points with shape of\n        (num_points, ).\n        labels (np.ndarray): Ground truth labels of points with shape of\n        (num_points, ).\n        num_classes (int): number of classes\n\n    Returns:\n        np.ndarray: Calculated confusion matrix.\n    \"\"\"\n\n    k = (labels >= 0) & (labels < num_classes)\n    bin_count = np.bincount(\n        num_classes * labels[k].astype(int) + preds[k],\n        minlength=num_classes**2)\n    return bin_count[:num_classes**2].reshape(num_classes, num_classes)\n\n\ndef per_class_iou(hist):\n    \"\"\"Compute the per class iou.\n\n    Args:\n        hist(np.ndarray):  Overall confusion martix\n        (num_classes, num_classes ).\n\n    Returns:\n        np.ndarray: Calculated per class iou\n    \"\"\"\n\n    return np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))\n\n\ndef get_acc(hist):\n    \"\"\"Compute the overall accuracy.\n\n    Args:\n        hist(np.ndarray):  Overall confusion martix\n        (num_classes, num_classes ).\n\n    Returns:\n        float: Calculated overall acc\n    \"\"\"\n\n    return np.diag(hist).sum() / hist.sum()\n\n\ndef get_acc_cls(hist):\n    \"\"\"Compute the class average accuracy.\n\n    Args:\n        hist(np.ndarray):  Overall confusion martix\n        (num_classes, num_classes ).\n\n    Returns:\n        float: Calculated class average acc\n    \"\"\"\n\n    return np.nanmean(np.diag(hist) / hist.sum(axis=1))\n\n\ndef seg_eval(gt_labels, seg_preds, label2cat, logger=None):\n    \"\"\"Semantic Segmentation Evaluation.\n\n     Evaluate the result of the Semantic Segmentation.\n\n     Args:\n         gt_labels (list[torch.Tensor]): Ground truth labels.\n         seg_preds  (list[torch.Tensor]): Predtictions\n         label2cat (dict): Map from label to category.\n         logger (logging.Logger | str | None): The way to print the mAP\n            summary. See `mmdet.utils.print_log()` for details. Default: None.\n\n    Return:\n        dict[str, float]: Dict of results.\n    \"\"\"\n    assert len(seg_preds) == len(gt_labels)\n\n    hist_list = []\n    for i in range(len(seg_preds)):\n        hist_list.append(\n            fast_hist(seg_preds[i].numpy().astype(int),\n                      gt_labels[i].numpy().astype(int), len(label2cat)))\n    iou = per_class_iou(sum(hist_list))\n    miou = np.nanmean(iou)\n    acc = get_acc(sum(hist_list))\n    acc_cls = get_acc_cls(sum(hist_list))\n\n    header = ['classes']\n    for i in range(len(label2cat)):\n        header.append(label2cat[i])\n    header.extend(['miou', 'acc', 'acc_cls'])\n\n    ret_dict = dict()\n    table_columns = [['results']]\n    for i in range(len(label2cat)):\n        ret_dict[label2cat[i]] = float(iou[i])\n        table_columns.append([f'{iou[i]:.4f}'])\n    ret_dict['miou'] = float(miou)\n    ret_dict['acc'] = float(acc)\n    ret_dict['acc_cls'] = float(acc_cls)\n\n    table_columns.append([f'{miou:.4f}'])\n    table_columns.append([f'{acc:.4f}'])\n    table_columns.append([f'{acc_cls:.4f}'])\n\n    table_data = [header]\n    table_rows = list(zip(*table_columns))\n    table_data += table_rows\n    table = AsciiTable(table_data)\n    table.inner_footing_row_border = True\n    print_log('\\n' + table.table, logger=logger)\n\n    return ret_dict\n"
  },
  {
    "path": "mmdet3d/core/evaluation/waymo_utils/prediction_kitti_to_waymo.py",
    "content": "r\"\"\"Adapted from `Waymo to KITTI converter\n    <https://github.com/caizhongang/waymo_kitti_converter>`_.\n\"\"\"\n\ntry:\n    from waymo_open_dataset import dataset_pb2 as open_dataset\nexcept ImportError:\n    raise ImportError(\n        'Please run \"pip install waymo-open-dataset-tf-2-1-0==1.2.0\" '\n        'to install the official devkit first.')\n\nimport mmcv\nimport numpy as np\nimport tensorflow as tf\nfrom glob import glob\nfrom os.path import join\nfrom waymo_open_dataset import label_pb2\nfrom waymo_open_dataset.protos import metrics_pb2\n\n\nclass KITTI2Waymo(object):\n    \"\"\"KITTI predictions to Waymo converter.\n\n    This class serves as the converter to change predictions from KITTI to\n    Waymo format.\n\n    Args:\n        kitti_result_files (list[dict]): Predictions in KITTI format.\n        waymo_tfrecords_dir (str): Directory to load waymo raw data.\n        waymo_results_save_dir (str): Directory to save converted predictions\n            in waymo format (.bin files).\n        waymo_results_final_path (str): Path to save combined\n            predictions in waymo format (.bin file), like 'a/b/c.bin'.\n        prefix (str): Prefix of filename. In general, 0 for training, 1 for\n            validation and 2 for testing.\n        workers (str): Number of parallel processes.\n    \"\"\"\n\n    def __init__(self,\n                 kitti_result_files,\n                 waymo_tfrecords_dir,\n                 waymo_results_save_dir,\n                 waymo_results_final_path,\n                 prefix,\n                 workers=64):\n\n        self.kitti_result_files = kitti_result_files\n        self.waymo_tfrecords_dir = waymo_tfrecords_dir\n        self.waymo_results_save_dir = waymo_results_save_dir\n        self.waymo_results_final_path = waymo_results_final_path\n        self.prefix = prefix\n        self.workers = int(workers)\n        self.name2idx = {}\n        for idx, result in enumerate(kitti_result_files):\n            if len(result['sample_idx']) > 0:\n                self.name2idx[str(result['sample_idx'][0])] = idx\n\n        # turn on eager execution for older tensorflow versions\n        if int(tf.__version__.split('.')[0]) < 2:\n            tf.enable_eager_execution()\n\n        self.k2w_cls_map = {\n            'Car': label_pb2.Label.TYPE_VEHICLE,\n            'Pedestrian': label_pb2.Label.TYPE_PEDESTRIAN,\n            'Sign': label_pb2.Label.TYPE_SIGN,\n            'Cyclist': label_pb2.Label.TYPE_CYCLIST,\n        }\n\n        self.T_ref_to_front_cam = np.array([[0.0, 0.0, 1.0, 0.0],\n                                            [-1.0, 0.0, 0.0, 0.0],\n                                            [0.0, -1.0, 0.0, 0.0],\n                                            [0.0, 0.0, 0.0, 1.0]])\n\n        self.get_file_names()\n        self.create_folder()\n\n    def get_file_names(self):\n        \"\"\"Get file names of waymo raw data.\"\"\"\n        self.waymo_tfrecord_pathnames = sorted(\n            glob(join(self.waymo_tfrecords_dir, '*.tfrecord')))\n        print(len(self.waymo_tfrecord_pathnames), 'tfrecords found.')\n\n    def create_folder(self):\n        \"\"\"Create folder for data conversion.\"\"\"\n        mmcv.mkdir_or_exist(self.waymo_results_save_dir)\n\n    def parse_objects(self, kitti_result, T_k2w, context_name,\n                      frame_timestamp_micros):\n        \"\"\"Parse one prediction with several instances in kitti format and\n        convert them to `Object` proto.\n\n        Args:\n            kitti_result (dict): Predictions in kitti format.\n\n                - name (np.ndarray): Class labels of predictions.\n                - dimensions (np.ndarray): Height, width, length of boxes.\n                - location (np.ndarray): Bottom center of boxes (x, y, z).\n                - rotation_y (np.ndarray): Orientation of boxes.\n                - score (np.ndarray): Scores of predictions.\n            T_k2w (np.ndarray): Transformation matrix from kitti to waymo.\n            context_name (str): Context name of the frame.\n            frame_timestamp_micros (int): Frame timestamp.\n\n        Returns:\n            :obj:`Object`: Predictions in waymo dataset Object proto.\n        \"\"\"\n\n        def parse_one_object(instance_idx):\n            \"\"\"Parse one instance in kitti format and convert them to `Object`\n            proto.\n\n            Args:\n                instance_idx (int): Index of the instance to be converted.\n\n            Returns:\n                :obj:`Object`: Predicted instance in waymo dataset \\\n                    Object proto.\n            \"\"\"\n            cls = kitti_result['name'][instance_idx]\n            length = round(kitti_result['dimensions'][instance_idx, 0], 4)\n            height = round(kitti_result['dimensions'][instance_idx, 1], 4)\n            width = round(kitti_result['dimensions'][instance_idx, 2], 4)\n            x = round(kitti_result['location'][instance_idx, 0], 4)\n            y = round(kitti_result['location'][instance_idx, 1], 4)\n            z = round(kitti_result['location'][instance_idx, 2], 4)\n            rotation_y = round(kitti_result['rotation_y'][instance_idx], 4)\n            score = round(kitti_result['score'][instance_idx], 4)\n\n            # y: downwards; move box origin from bottom center (kitti) to\n            # true center (waymo)\n            y -= height / 2\n            # frame transformation: kitti -> waymo\n            x, y, z = self.transform(T_k2w, x, y, z)\n\n            # different conventions\n            heading = -(rotation_y + np.pi / 2)\n            while heading < -np.pi:\n                heading += 2 * np.pi\n            while heading > np.pi:\n                heading -= 2 * np.pi\n\n            box = label_pb2.Label.Box()\n            box.center_x = x\n            box.center_y = y\n            box.center_z = z\n            box.length = length\n            box.width = width\n            box.height = height\n            box.heading = heading\n\n            o = metrics_pb2.Object()\n            o.object.box.CopyFrom(box)\n            o.object.type = self.k2w_cls_map[cls]\n            o.score = score\n\n            o.context_name = context_name\n            o.frame_timestamp_micros = frame_timestamp_micros\n\n            return o\n\n        objects = metrics_pb2.Objects()\n\n        for instance_idx in range(len(kitti_result['name'])):\n            o = parse_one_object(instance_idx)\n            objects.objects.append(o)\n\n        return objects\n\n    def convert_one(self, file_idx):\n        \"\"\"Convert action for single file.\n\n        Args:\n            file_idx (int): Index of the file to be converted.\n        \"\"\"\n        file_pathname = self.waymo_tfrecord_pathnames[file_idx]\n        file_data = tf.data.TFRecordDataset(file_pathname, compression_type='')\n\n        for frame_num, frame_data in enumerate(file_data):\n            frame = open_dataset.Frame()\n            frame.ParseFromString(bytearray(frame_data.numpy()))\n\n            filename = f'{self.prefix}{file_idx:03d}{frame_num:03d}'\n\n            for camera in frame.context.camera_calibrations:\n                # FRONT = 1, see dataset.proto for details\n                if camera.name == 1:\n                    T_front_cam_to_vehicle = np.array(\n                        camera.extrinsic.transform).reshape(4, 4)\n\n            T_k2w = T_front_cam_to_vehicle @ self.T_ref_to_front_cam\n\n            context_name = frame.context.name\n            frame_timestamp_micros = frame.timestamp_micros\n\n            if filename in self.name2idx:\n                kitti_result = \\\n                    self.kitti_result_files[self.name2idx[filename]]\n                objects = self.parse_objects(kitti_result, T_k2w, context_name,\n                                             frame_timestamp_micros)\n            else:\n                print(filename, 'not found.')\n                objects = metrics_pb2.Objects()\n\n            with open(\n                    join(self.waymo_results_save_dir, f'{filename}.bin'),\n                    'wb') as f:\n                f.write(objects.SerializeToString())\n\n    def convert(self):\n        \"\"\"Convert action.\"\"\"\n        print('Start converting ...')\n        mmcv.track_parallel_progress(self.convert_one, range(len(self)),\n                                     self.workers)\n        print('\\nFinished ...')\n\n        # combine all files into one .bin\n        pathnames = sorted(glob(join(self.waymo_results_save_dir, '*.bin')))\n        combined = self.combine(pathnames)\n\n        with open(self.waymo_results_final_path, 'wb') as f:\n            f.write(combined.SerializeToString())\n\n    def __len__(self):\n        \"\"\"Length of the filename list.\"\"\"\n        return len(self.waymo_tfrecord_pathnames)\n\n    def transform(self, T, x, y, z):\n        \"\"\"Transform the coordinates with matrix T.\n\n        Args:\n            T (np.ndarray): Transformation matrix.\n            x(float): Coordinate in x axis.\n            y(float): Coordinate in y axis.\n            z(float): Coordinate in z axis.\n\n        Returns:\n            list: Coordinates after transformation.\n        \"\"\"\n        pt_bef = np.array([x, y, z, 1.0]).reshape(4, 1)\n        pt_aft = np.matmul(T, pt_bef)\n        return pt_aft[:3].flatten().tolist()\n\n    def combine(self, pathnames):\n        \"\"\"Combine predictions in waymo format for each sample together.\n\n        Args:\n            pathnames (str): Paths to save predictions.\n\n        Returns:\n            :obj:`Objects`: Combined predictions in Objects proto.\n        \"\"\"\n        combined = metrics_pb2.Objects()\n\n        for pathname in pathnames:\n            objects = metrics_pb2.Objects()\n            with open(pathname, 'rb') as f:\n                objects.ParseFromString(f.read())\n            for o in objects.objects:\n                combined.objects.append(o)\n\n        return combined\n"
  },
  {
    "path": "mmdet3d/core/points/__init__.py",
    "content": "from .base_points import BasePoints\nfrom .cam_points import CameraPoints\nfrom .depth_points import DepthPoints\nfrom .lidar_points import LiDARPoints\n\n__all__ = ['BasePoints', 'CameraPoints', 'DepthPoints', 'LiDARPoints']\n\n\ndef get_points_type(points_type):\n    \"\"\"Get the class of points according to coordinate type.\n\n    Args:\n        points_type (str): The type of points coordinate.\n            The valid value are \"CAMERA\", \"LIDAR\", or \"DEPTH\".\n\n    Returns:\n        class: Points type.\n    \"\"\"\n    if points_type == 'CAMERA':\n        points_cls = CameraPoints\n    elif points_type == 'LIDAR':\n        points_cls = LiDARPoints\n    elif points_type == 'DEPTH':\n        points_cls = DepthPoints\n    else:\n        raise ValueError('Only \"points_type\" of \"CAMERA\", \"LIDAR\", or \"DEPTH\"'\n                         f' are supported, got {points_type}')\n\n    return points_cls\n"
  },
  {
    "path": "mmdet3d/core/points/base_points.py",
    "content": "import numpy as np\nimport torch\nfrom abc import abstractmethod\n\n\nclass BasePoints(object):\n    \"\"\"Base class for Points.\n\n    Args:\n        tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.\n        points_dim (int): Number of the dimension of a point.\n            Each row is (x, y, z). Default to 3.\n        attribute_dims (dict): Dictionary to indicate the meaning of extra\n            dimension. Default to None.\n\n    Attributes:\n        tensor (torch.Tensor): Float matrix of N x points_dim.\n        points_dim (int): Integer indicating the dimension of a point.\n            Each row is (x, y, z, ...).\n        attribute_dims (bool): Dictionary to indicate the meaning of extra\n            dimension. Default to None.\n        rotation_axis (int): Default rotation axis for points rotation.\n    \"\"\"\n\n    def __init__(self, tensor, points_dim=3, attribute_dims=None):\n        if isinstance(tensor, torch.Tensor):\n            device = tensor.device\n        else:\n            device = torch.device('cpu')\n        tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)\n        if tensor.numel() == 0:\n            # Use reshape, so we don't end up creating a new tensor that\n            # does not depend on the inputs (and consequently confuses jit)\n            tensor = tensor.reshape((0, points_dim)).to(\n                dtype=torch.float32, device=device)\n        assert tensor.dim() == 2 and tensor.size(-1) == \\\n            points_dim, tensor.size()\n\n        self.tensor = tensor\n        self.points_dim = points_dim\n        self.attribute_dims = attribute_dims\n        self.rotation_axis = 0\n\n    @property\n    def coord(self):\n        \"\"\"torch.Tensor: Coordinates of each point with size (N, 3).\"\"\"\n        return self.tensor[:, :3]\n\n    @property\n    def height(self):\n        \"\"\"torch.Tensor: A vector with height of each point.\"\"\"\n        if self.attribute_dims is not None and \\\n                'height' in self.attribute_dims.keys():\n            return self.tensor[:, self.attribute_dims['height']]\n        else:\n            return None\n\n    @property\n    def color(self):\n        \"\"\"torch.Tensor: A vector with color of each point.\"\"\"\n        if self.attribute_dims is not None and \\\n                'color' in self.attribute_dims.keys():\n            return self.tensor[:, self.attribute_dims['color']]\n        else:\n            return None\n\n    @property\n    def shape(self):\n        \"\"\"torch.Shape: Shape of points.\"\"\"\n        return self.tensor.shape\n\n    def shuffle(self):\n        \"\"\"Shuffle the points.\"\"\"\n        self.tensor = self.tensor[torch.randperm(\n            self.__len__(), device=self.tensor.device)]\n\n    def rotate(self, rotation, axis=None):\n        \"\"\"Rotate points with the given rotation matrix or angle.\n\n        Args:\n            rotation (float, np.ndarray, torch.Tensor): Rotation matrix\n                or angle.\n            axis (int): Axis to rotate at. Defaults to None.\n        \"\"\"\n        if not isinstance(rotation, torch.Tensor):\n            rotation = self.tensor.new_tensor(rotation)\n        assert rotation.shape == torch.Size([3, 3]) or \\\n            rotation.numel() == 1\n\n        if axis is None:\n            axis = self.rotation_axis\n\n        if rotation.numel() == 1:\n            rot_sin = torch.sin(rotation)\n            rot_cos = torch.cos(rotation)\n            if axis == 1:\n                rot_mat_T = rotation.new_tensor([[rot_cos, 0, -rot_sin],\n                                                 [0, 1, 0],\n                                                 [rot_sin, 0, rot_cos]])\n            elif axis == 2 or axis == -1:\n                rot_mat_T = rotation.new_tensor([[rot_cos, -rot_sin, 0],\n                                                 [rot_sin, rot_cos, 0],\n                                                 [0, 0, 1]])\n            elif axis == 0:\n                rot_mat_T = rotation.new_tensor([[0, rot_cos, -rot_sin],\n                                                 [0, rot_sin, rot_cos],\n                                                 [1, 0, 0]])\n            else:\n                raise ValueError('axis should in range')\n            rot_mat_T = rot_mat_T.T\n        elif rotation.numel() == 9:\n            rot_mat_T = rotation\n        else:\n            raise NotImplementedError\n        self.tensor[:, :3] = self.tensor[:, :3] @ rot_mat_T\n\n    @abstractmethod\n    def flip(self, bev_direction='horizontal'):\n        \"\"\"Flip the points in BEV along given BEV direction.\"\"\"\n        pass\n\n    def translate(self, trans_vector):\n        \"\"\"Translate points with the given translation vector.\n\n        Args:\n            trans_vector (np.ndarray, torch.Tensor): Translation\n                vector of size 3 or nx3.\n        \"\"\"\n        if not isinstance(trans_vector, torch.Tensor):\n            trans_vector = self.tensor.new_tensor(trans_vector)\n        trans_vector = trans_vector.squeeze(0)\n        if trans_vector.dim() == 1:\n            assert trans_vector.shape[0] == 3\n        elif trans_vector.dim() == 2:\n            assert trans_vector.shape[0] == self.tensor.shape[0] and \\\n                trans_vector.shape[1] == 3\n        else:\n            raise NotImplementedError(\n                'Unsupported translation vector of shape {}'.format(\n                    trans_vector.shape))\n        self.tensor[:, :3] += trans_vector\n\n    def in_range_3d(self, point_range):\n        \"\"\"Check whether the points are in the given range.\n\n        Args:\n            point_range (list | torch.Tensor): The range of point\n                (x_min, y_min, z_min, x_max, y_max, z_max)\n\n        Note:\n            In the original implementation of SECOND, checking whether\n            a box in the range checks whether the points are in a convex\n            polygon, we try to reduce the burden for simpler cases.\n\n        Returns:\n            torch.Tensor: A binary vector indicating whether each point is \\\n                inside the reference range.\n        \"\"\"\n        in_range_flags = ((self.tensor[:, 0] > point_range[0])\n                          & (self.tensor[:, 1] > point_range[1])\n                          & (self.tensor[:, 2] > point_range[2])\n                          & (self.tensor[:, 0] < point_range[3])\n                          & (self.tensor[:, 1] < point_range[4])\n                          & (self.tensor[:, 2] < point_range[5]))\n        return in_range_flags\n\n    @abstractmethod\n    def in_range_bev(self, point_range):\n        \"\"\"Check whether the points are in the given range.\n\n        Args:\n            point_range (list | torch.Tensor): The range of point\n                in order of (x_min, y_min, x_max, y_max).\n\n        Returns:\n            torch.Tensor: Indicating whether each point is inside \\\n                the reference range.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def convert_to(self, dst, rt_mat=None):\n        \"\"\"Convert self to ``dst`` mode.\n\n        Args:\n            dst (:obj:`CoordMode`): The target Box mode.\n            rt_mat (np.ndarray | torch.Tensor): The rotation and translation\n                matrix between different coordinates. Defaults to None.\n                The conversion from `src` coordinates to `dst` coordinates\n                usually comes along the change of sensors, e.g., from camera\n                to LiDAR. This requires a transformation matrix.\n\n        Returns:\n            :obj:`BasePoints`: The converted box of the same type \\\n                in the `dst` mode.\n        \"\"\"\n        pass\n\n    def scale(self, scale_factor):\n        \"\"\"Scale the points with horizontal and vertical scaling factors.\n\n        Args:\n            scale_factors (float): Scale factors to scale the points.\n        \"\"\"\n        self.tensor[:, :3] *= scale_factor\n\n    def __getitem__(self, item):\n        \"\"\"\n        Note:\n            The following usage are allowed:\n            1. `new_points = points[3]`:\n                return a `Points` that contains only one point.\n            2. `new_points = points[2:10]`:\n                return a slice of points.\n            3. `new_points = points[vector]`:\n                where vector is a torch.BoolTensor with `length = len(points)`.\n                Nonzero elements in the vector will be selected.\n            4. `new_points = points[3:11, vector]`:\n                return a slice of points and attribute dims.\n            Note that the returned Points might share storage with this Points,\n            subject to Pytorch's indexing semantics.\n\n        Returns:\n            :obj:`BasePoints`: A new object of  \\\n                :class:`BasePoints` after indexing.\n        \"\"\"\n        original_type = type(self)\n        if isinstance(item, int):\n            return original_type(\n                self.tensor[item].view(1, -1),\n                points_dim=self.points_dim,\n                attribute_dims=self.attribute_dims)\n        elif isinstance(item, tuple) and len(item) == 2:\n            if isinstance(item[1], slice):\n                start = 0 if item[1].start is None else item[1].start\n                stop = self.tensor.shape[1] + \\\n                    1 if item[1].stop is None else item[1].stop\n                step = 1 if item[1].step is None else item[1].step\n                item = list(item)\n                item[1] = list(range(start, stop, step))\n                item = tuple(item)\n            p = self.tensor[item[0], item[1]]\n\n            keep_dims = list(\n                set(item[1]).intersection(set(range(3, self.tensor.shape[1]))))\n            if self.attribute_dims is not None:\n                attribute_dims = self.attribute_dims.copy()\n                for key in self.attribute_dims.keys():\n                    cur_attribute_dim = attribute_dims[key]\n                    if isinstance(cur_attribute_dim, int):\n                        cur_attribute_dims = [cur_attribute_dim]\n                    intersect_attr = list(\n                        set(cur_attribute_dims).intersection(set(keep_dims)))\n                    if len(intersect_attr) == 1:\n                        attribute_dims[key] = intersect_attr[0]\n                    elif len(intersect_attr) > 1:\n                        attribute_dims[key] = intersect_attr\n                    else:\n                        attribute_dims.pop(key)\n            else:\n                attribute_dims = None\n        elif isinstance(item, (slice, np.ndarray, torch.Tensor)):\n            p = self.tensor[item]\n            attribute_dims = self.attribute_dims\n        else:\n            raise NotImplementedError(f'Invalid slice {item}!')\n\n        assert p.dim() == 2, \\\n            f'Indexing on Points with {item} failed to return a matrix!'\n        return original_type(\n            p, points_dim=p.shape[1], attribute_dims=attribute_dims)\n\n    def __len__(self):\n        \"\"\"int: Number of points in the current object.\"\"\"\n        return self.tensor.shape[0]\n\n    def __repr__(self):\n        \"\"\"str: Return a strings that describes the object.\"\"\"\n        return self.__class__.__name__ + '(\\n    ' + str(self.tensor) + ')'\n\n    @classmethod\n    def cat(cls, points_list):\n        \"\"\"Concatenate a list of Points into a single Points.\n\n        Args:\n            points_list (list[:obj:`BasePoints`]): List of points.\n\n        Returns:\n            :obj:`BasePoints`: The concatenated Points.\n        \"\"\"\n        assert isinstance(points_list, (list, tuple))\n        if len(points_list) == 0:\n            return cls(torch.empty(0))\n        assert all(isinstance(points, cls) for points in points_list)\n\n        # use torch.cat (v.s. layers.cat)\n        # so the returned points never share storage with input\n        cat_points = cls(\n            torch.cat([p.tensor for p in points_list], dim=0),\n            points_dim=points_list[0].tensor.shape[1],\n            attribute_dims=points_list[0].attribute_dims)\n        return cat_points\n\n    def to(self, device):\n        \"\"\"Convert current points to a specific device.\n\n        Args:\n            device (str | :obj:`torch.device`): The name of the device.\n\n        Returns:\n            :obj:`BasePoints`: A new boxes object on the \\\n                specific device.\n        \"\"\"\n        original_type = type(self)\n        return original_type(\n            self.tensor.to(device),\n            points_dim=self.points_dim,\n            attribute_dims=self.attribute_dims)\n\n    def clone(self):\n        \"\"\"Clone the Points.\n\n        Returns:\n            :obj:`BasePoints`: Box object with the same properties \\\n                as self.\n        \"\"\"\n        original_type = type(self)\n        return original_type(\n            self.tensor.clone(),\n            points_dim=self.points_dim,\n            attribute_dims=self.attribute_dims)\n\n    @property\n    def device(self):\n        \"\"\"str: The device of the points are on.\"\"\"\n        return self.tensor.device\n\n    def __iter__(self):\n        \"\"\"Yield a point as a Tensor of shape (4,) at a time.\n\n        Returns:\n            torch.Tensor: A point of shape (4,).\n        \"\"\"\n        yield from self.tensor\n\n    def new_point(self, data):\n        \"\"\"Create a new point object with data.\n\n        The new point and its tensor has the similar properties \\\n            as self and self.tensor, respectively.\n\n        Args:\n            data (torch.Tensor | numpy.array | list): Data to be copied.\n\n        Returns:\n            :obj:`BasePoints`: A new point object with ``data``, \\\n                the object's other properties are similar to ``self``.\n        \"\"\"\n        new_tensor = self.tensor.new_tensor(data) \\\n            if not isinstance(data, torch.Tensor) else data.to(self.device)\n        original_type = type(self)\n        return original_type(\n            new_tensor,\n            points_dim=self.points_dim,\n            attribute_dims=self.attribute_dims)\n"
  },
  {
    "path": "mmdet3d/core/points/cam_points.py",
    "content": "from .base_points import BasePoints\n\n\nclass CameraPoints(BasePoints):\n    \"\"\"Points of instances in CAM coordinates.\n\n    Args:\n        tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.\n        points_dim (int): Number of the dimension of a point.\n            Each row is (x, y, z). Default to 3.\n        attribute_dims (dict): Dictionary to indicate the meaning of extra\n            dimension. Default to None.\n\n    Attributes:\n        tensor (torch.Tensor): Float matrix of N x points_dim.\n        points_dim (int): Integer indicating the dimension of a point.\n            Each row is (x, y, z, ...).\n        attribute_dims (bool): Dictionary to indicate the meaning of extra\n            dimension. Default to None.\n        rotation_axis (int): Default rotation axis for points rotation.\n    \"\"\"\n\n    def __init__(self, tensor, points_dim=3, attribute_dims=None):\n        super(CameraPoints, self).__init__(\n            tensor, points_dim=points_dim, attribute_dims=attribute_dims)\n        self.rotation_axis = 1\n\n    def flip(self, bev_direction='horizontal'):\n        \"\"\"Flip the boxes in BEV along given BEV direction.\"\"\"\n        if bev_direction == 'horizontal':\n            self.tensor[:, 0] = -self.tensor[:, 0]\n        elif bev_direction == 'vertical':\n            self.tensor[:, 2] = -self.tensor[:, 2]\n\n    def in_range_bev(self, point_range):\n        \"\"\"Check whether the points are in the given range.\n\n        Args:\n            point_range (list | torch.Tensor): The range of point\n                in order of (x_min, y_min, x_max, y_max).\n\n        Returns:\n            torch.Tensor: Indicating whether each point is inside \\\n                the reference range.\n        \"\"\"\n        in_range_flags = ((self.tensor[:, 0] > point_range[0])\n                          & (self.tensor[:, 2] > point_range[1])\n                          & (self.tensor[:, 0] < point_range[2])\n                          & (self.tensor[:, 2] < point_range[3]))\n        return in_range_flags\n\n    def convert_to(self, dst, rt_mat=None):\n        \"\"\"Convert self to ``dst`` mode.\n\n        Args:\n            dst (:obj:`CoordMode`): The target Point mode.\n            rt_mat (np.ndarray | torch.Tensor): The rotation and translation\n                matrix between different coordinates. Defaults to None.\n                The conversion from `src` coordinates to `dst` coordinates\n                usually comes along the change of sensors, e.g., from camera\n                to LiDAR. This requires a transformation matrix.\n\n        Returns:\n            :obj:`BasePoints`: The converted point of the same type \\\n                in the `dst` mode.\n        \"\"\"\n        from mmdet3d.core.bbox import Coord3DMode\n        return Coord3DMode.convert_point(\n            point=self, src=Coord3DMode.CAM, dst=dst, rt_mat=rt_mat)\n"
  },
  {
    "path": "mmdet3d/core/points/depth_points.py",
    "content": "from .base_points import BasePoints\n\n\nclass DepthPoints(BasePoints):\n    \"\"\"Points of instances in DEPTH coordinates.\n\n    Args:\n        tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.\n        points_dim (int): Number of the dimension of a point.\n            Each row is (x, y, z). Default to 3.\n        attribute_dims (dict): Dictionary to indicate the meaning of extra\n            dimension. Default to None.\n\n    Attributes:\n        tensor (torch.Tensor): Float matrix of N x points_dim.\n        points_dim (int): Integer indicating the dimension of a point.\n            Each row is (x, y, z, ...).\n        attribute_dims (bool): Dictionary to indicate the meaning of extra\n            dimension. Default to None.\n        rotation_axis (int): Default rotation axis for points rotation.\n    \"\"\"\n\n    def __init__(self, tensor, points_dim=3, attribute_dims=None):\n        super(DepthPoints, self).__init__(\n            tensor, points_dim=points_dim, attribute_dims=attribute_dims)\n        self.rotation_axis = 2\n\n    def flip(self, bev_direction='horizontal'):\n        \"\"\"Flip the boxes in BEV along given BEV direction.\"\"\"\n        if bev_direction == 'horizontal':\n            self.tensor[:, 0] = -self.tensor[:, 0]\n        elif bev_direction == 'vertical':\n            self.tensor[:, 1] = -self.tensor[:, 1]\n\n    def in_range_bev(self, point_range):\n        \"\"\"Check whether the points are in the given range.\n\n        Args:\n            point_range (list | torch.Tensor): The range of point\n                in order of (x_min, y_min, x_max, y_max).\n\n        Returns:\n            torch.Tensor: Indicating whether each point is inside \\\n                the reference range.\n        \"\"\"\n        in_range_flags = ((self.tensor[:, 0] > point_range[0])\n                          & (self.tensor[:, 1] > point_range[1])\n                          & (self.tensor[:, 0] < point_range[2])\n                          & (self.tensor[:, 1] < point_range[3]))\n        return in_range_flags\n\n    def convert_to(self, dst, rt_mat=None):\n        \"\"\"Convert self to ``dst`` mode.\n\n        Args:\n            dst (:obj:`CoordMode`): The target Point mode.\n            rt_mat (np.ndarray | torch.Tensor): The rotation and translation\n                matrix between different coordinates. Defaults to None.\n                The conversion from `src` coordinates to `dst` coordinates\n                usually comes along the change of sensors, e.g., from camera\n                to LiDAR. This requires a transformation matrix.\n\n        Returns:\n            :obj:`BasePoints`: The converted point of the same type \\\n                in the `dst` mode.\n        \"\"\"\n        from mmdet3d.core.bbox import Coord3DMode\n        return Coord3DMode.convert_point(\n            point=self, src=Coord3DMode.DEPTH, dst=dst, rt_mat=rt_mat)\n"
  },
  {
    "path": "mmdet3d/core/points/lidar_points.py",
    "content": "from .base_points import BasePoints\n\n\nclass LiDARPoints(BasePoints):\n    \"\"\"Points of instances in LIDAR coordinates.\n\n    Args:\n        tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.\n        points_dim (int): Number of the dimension of a point.\n            Each row is (x, y, z). Default to 3.\n        attribute_dims (dict): Dictionary to indicate the meaning of extra\n            dimension. Default to None.\n\n    Attributes:\n        tensor (torch.Tensor): Float matrix of N x points_dim.\n        points_dim (int): Integer indicating the dimension of a point.\n            Each row is (x, y, z, ...).\n        attribute_dims (bool): Dictionary to indicate the meaning of extra\n            dimension. Default to None.\n        rotation_axis (int): Default rotation axis for points rotation.\n    \"\"\"\n\n    def __init__(self, tensor, points_dim=3, attribute_dims=None):\n        super(LiDARPoints, self).__init__(\n            tensor, points_dim=points_dim, attribute_dims=attribute_dims)\n        self.rotation_axis = 2\n\n    def flip(self, bev_direction='horizontal'):\n        \"\"\"Flip the boxes in BEV along given BEV direction.\"\"\"\n        if bev_direction == 'horizontal':\n            self.tensor[:, 1] = -self.tensor[:, 1]\n        elif bev_direction == 'vertical':\n            self.tensor[:, 0] = -self.tensor[:, 0]\n\n    def in_range_bev(self, point_range):\n        \"\"\"Check whether the points are in the given range.\n\n        Args:\n            point_range (list | torch.Tensor): The range of point\n                in order of (x_min, y_min, x_max, y_max).\n\n        Returns:\n            torch.Tensor: Indicating whether each point is inside \\\n                the reference range.\n        \"\"\"\n        in_range_flags = ((self.tensor[:, 0] > point_range[0])\n                          & (self.tensor[:, 1] > point_range[1])\n                          & (self.tensor[:, 0] < point_range[2])\n                          & (self.tensor[:, 1] < point_range[3]))\n        return in_range_flags\n\n    def convert_to(self, dst, rt_mat=None):\n        \"\"\"Convert self to ``dst`` mode.\n\n        Args:\n            dst (:obj:`CoordMode`): The target Point mode.\n            rt_mat (np.ndarray | torch.Tensor): The rotation and translation\n                matrix between different coordinates. Defaults to None.\n                The conversion from `src` coordinates to `dst` coordinates\n                usually comes along the change of sensors, e.g., from camera\n                to LiDAR. This requires a transformation matrix.\n\n        Returns:\n            :obj:`BasePoints`: The converted point of the same type \\\n                in the `dst` mode.\n        \"\"\"\n        from mmdet3d.core.bbox import Coord3DMode\n        return Coord3DMode.convert_point(\n            point=self, src=Coord3DMode.LIDAR, dst=dst, rt_mat=rt_mat)\n"
  },
  {
    "path": "mmdet3d/core/post_processing/__init__.py",
    "content": "from mmdet.core.post_processing import (merge_aug_bboxes, merge_aug_masks,\n                                        merge_aug_proposals, merge_aug_scores,\n                                        multiclass_nms)\nfrom .box3d_nms import aligned_3d_nms, box3d_multiclass_nms, circle_nms\nfrom .merge_augs import merge_aug_bboxes_3d\n\n__all__ = [\n    'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes',\n    'merge_aug_scores', 'merge_aug_masks', 'box3d_multiclass_nms',\n    'aligned_3d_nms', 'merge_aug_bboxes_3d', 'circle_nms'\n]\n"
  },
  {
    "path": "mmdet3d/core/post_processing/box3d_nms.py",
    "content": "import numba\nimport numpy as np\nimport torch\n\nfrom mmdet3d.ops.iou3d.iou3d_utils import nms_gpu, nms_normal_gpu\n\n\ndef box3d_multiclass_nms(mlvl_bboxes,\n                         mlvl_bboxes_for_nms,\n                         mlvl_scores,\n                         score_thr,\n                         max_num,\n                         cfg,\n                         mlvl_dir_scores=None):\n    \"\"\"Multi-class nms for 3D boxes.\n\n    Args:\n        mlvl_bboxes (torch.Tensor): Multi-level boxes with shape (N, M).\n            M is the dimensions of boxes.\n        mlvl_bboxes_for_nms (torch.Tensor): Multi-level boxes with shape\n            (N, 4). N is the number of boxes.\n        mlvl_scores (torch.Tensor): Multi-level boxes with shape\n            (N, ). N is the number of boxes.\n        score_thr (float): Score thredhold to filter boxes with low\n            confidence.\n        max_num (int): Maximum number of boxes will be kept.\n        cfg (dict): Configuration dict of NMS.\n        mlvl_dir_scores (torch.Tensor, optional): Multi-level scores\n            of direction classifier. Defaults to None.\n\n    Returns:\n        tuple[torch.Tensor]: Return results after nms, including 3D \\\n            bounding boxes, scores, labels and direction scores.\n    \"\"\"\n    # do multi class nms\n    # the fg class id range: [0, num_classes-1]\n    num_classes = mlvl_scores.shape[1] - 1\n    bboxes = []\n    scores = []\n    labels = []\n    dir_scores = []\n    for i in range(0, num_classes):\n        # get bboxes and scores of this class\n        cls_inds = mlvl_scores[:, i] > score_thr\n        if not cls_inds.any():\n            continue\n\n        _scores = mlvl_scores[cls_inds, i]\n        _bboxes_for_nms = mlvl_bboxes_for_nms[cls_inds, :]\n\n        if cfg.use_rotate_nms:\n            nms_func = nms_gpu\n        else:\n            nms_func = nms_normal_gpu\n\n        selected = nms_func(_bboxes_for_nms, _scores, cfg.nms_thr)\n        _mlvl_bboxes = mlvl_bboxes[cls_inds, :]\n        bboxes.append(_mlvl_bboxes[selected])\n        scores.append(_scores[selected])\n        cls_label = mlvl_bboxes.new_full((len(selected), ),\n                                         i,\n                                         dtype=torch.long)\n        labels.append(cls_label)\n\n        if mlvl_dir_scores is not None:\n            _mlvl_dir_scores = mlvl_dir_scores[cls_inds]\n            dir_scores.append(_mlvl_dir_scores[selected])\n\n    if bboxes:\n        bboxes = torch.cat(bboxes, dim=0)\n        scores = torch.cat(scores, dim=0)\n        labels = torch.cat(labels, dim=0)\n        if mlvl_dir_scores is not None:\n            dir_scores = torch.cat(dir_scores, dim=0)\n        if bboxes.shape[0] > max_num:\n            _, inds = scores.sort(descending=True)\n            inds = inds[:max_num]\n            bboxes = bboxes[inds, :]\n            labels = labels[inds]\n            scores = scores[inds]\n            if mlvl_dir_scores is not None:\n                dir_scores = dir_scores[inds]\n    else:\n        bboxes = mlvl_scores.new_zeros((0, mlvl_bboxes.size(-1)))\n        scores = mlvl_scores.new_zeros((0, ))\n        labels = mlvl_scores.new_zeros((0, ), dtype=torch.long)\n        dir_scores = mlvl_scores.new_zeros((0, ))\n    return bboxes, scores, labels, dir_scores\n\n\ndef aligned_3d_nms(boxes, scores, classes, thresh):\n    \"\"\"3d nms for aligned boxes.\n\n    Args:\n        boxes (torch.Tensor): Aligned box with shape [n, 6].\n        scores (torch.Tensor): Scores of each box.\n        classes (torch.Tensor): Class of each box.\n        thresh (float): Iou threshold for nms.\n\n    Returns:\n        torch.Tensor: Indices of selected boxes.\n    \"\"\"\n    x1 = boxes[:, 0]\n    y1 = boxes[:, 1]\n    z1 = boxes[:, 2]\n    x2 = boxes[:, 3]\n    y2 = boxes[:, 4]\n    z2 = boxes[:, 5]\n    area = (x2 - x1) * (y2 - y1) * (z2 - z1)\n    zero = boxes.new_zeros(1, )\n\n    score_sorted = torch.argsort(scores)\n    pick = []\n    while (score_sorted.shape[0] != 0):\n        last = score_sorted.shape[0]\n        i = score_sorted[-1]\n        pick.append(i)\n\n        xx1 = torch.max(x1[i], x1[score_sorted[:last - 1]])\n        yy1 = torch.max(y1[i], y1[score_sorted[:last - 1]])\n        zz1 = torch.max(z1[i], z1[score_sorted[:last - 1]])\n        xx2 = torch.min(x2[i], x2[score_sorted[:last - 1]])\n        yy2 = torch.min(y2[i], y2[score_sorted[:last - 1]])\n        zz2 = torch.min(z2[i], z2[score_sorted[:last - 1]])\n        classes1 = classes[i]\n        classes2 = classes[score_sorted[:last - 1]]\n        inter_l = torch.max(zero, xx2 - xx1)\n        inter_w = torch.max(zero, yy2 - yy1)\n        inter_h = torch.max(zero, zz2 - zz1)\n\n        inter = inter_l * inter_w * inter_h\n        iou = inter / (area[i] + area[score_sorted[:last - 1]] - inter)\n        iou = iou * (classes1 == classes2).float()\n        score_sorted = score_sorted[torch.nonzero(\n            iou <= thresh, as_tuple=False).flatten()]\n\n    indices = boxes.new_tensor(pick, dtype=torch.long)\n    return indices\n\n\n@numba.jit(nopython=True)\ndef circle_nms(dets, thresh, socre_thre=0, post_max_size=83):\n    \"\"\"Circular NMS.\n\n    An object is only counted as positive if no other center\n    with a higher confidence exists within a radius r using a\n    bird-eye view distance metric.\n\n    Args:\n        dets (torch.Tensor): Detection results with the shape of [N, 3].\n        thresh (float): Value of threshold.\n        post_max_size (int): Max number of prediction to be kept. Defaults\n            to 83\n\n    Returns:\n        torch.Tensor: Indexes of the detections to be kept.\n    \"\"\"\n    x1 = dets[:, 0]\n    y1 = dets[:, 1]\n    scores = dets[:, 2]\n    order = scores.argsort()[::-1].astype(np.int32)  # highest->lowest\n    ndets = dets.shape[0]\n    suppressed = np.zeros((ndets), dtype=np.int32)\n    keep = []\n    for _i in range(ndets):\n        i = order[_i]  # start with highest score box\n        if suppressed[\n                i] == 1:  # if any box have enough iou with this, remove it\n            continue\n        keep.append(i)\n        for _j in range(_i + 1, ndets):\n            j = order[_j]\n            if suppressed[j] == 1:\n                continue\n            # calculate center distance between i and j box\n            dist = (x1[i] - x1[j])**2 + (y1[i] - y1[j])**2\n\n            # ovr = inter / areas[j]\n            if dist <= thresh and scores[i] - scores[j] > socre_thre:\n                suppressed[j] = 1\n    return keep[:post_max_size]\n"
  },
  {
    "path": "mmdet3d/core/post_processing/merge_augs.py",
    "content": "import torch\n\nfrom mmdet3d.ops.iou3d.iou3d_utils import nms_gpu, nms_normal_gpu\nfrom ..bbox import bbox3d2result, bbox3d_mapping_back, xywhr2xyxyr\n\n\ndef merge_aug_bboxes_3d(aug_results, img_metas, test_cfg):\n    \"\"\"Merge augmented detection 3D bboxes and scores.\n\n    Args:\n        aug_results (list[dict]): The dict of detection results.\n            The dict contains the following keys\n\n            - boxes_3d (:obj:`BaseInstance3DBoxes`): Detection bbox.\n            - scores_3d (torch.Tensor): Detection scores.\n            - labels_3d (torch.Tensor): Predicted box labels.\n        img_metas (list[dict]): Meta information of each sample.\n        test_cfg (dict): Test config.\n\n    Returns:\n        dict: Bounding boxes results in cpu mode, containing merged results.\n\n            - boxes_3d (:obj:`BaseInstance3DBoxes`): Merged detection bbox.\n            - scores_3d (torch.Tensor): Merged detection scores.\n            - labels_3d (torch.Tensor): Merged predicted box labels.\n    \"\"\"\n\n    assert len(aug_results) == len(img_metas), \\\n        '\"aug_results\" should have the same length as \"img_metas\", got len(' \\\n        f'aug_results)={len(aug_results)} and len(img_metas)={len(img_metas)}'\n\n    recovered_bboxes = []\n    recovered_scores = []\n    recovered_labels = []\n\n    for bboxes, img_info in zip(aug_results, img_metas):\n        scale_factor = img_info[0]['pcd_scale_factor']\n        pcd_horizontal_flip = img_info[0]['pcd_horizontal_flip']\n        pcd_vertical_flip = img_info[0]['pcd_vertical_flip']\n        recovered_scores.append(bboxes['scores_3d'])\n        recovered_labels.append(bboxes['labels_3d'])\n        bboxes = bbox3d_mapping_back(bboxes['boxes_3d'], scale_factor,\n                                     pcd_horizontal_flip, pcd_vertical_flip)\n        recovered_bboxes.append(bboxes)\n\n    aug_bboxes = recovered_bboxes[0].cat(recovered_bboxes)\n    aug_bboxes_for_nms = xywhr2xyxyr(aug_bboxes.bev)\n    aug_scores = torch.cat(recovered_scores, dim=0)\n    aug_labels = torch.cat(recovered_labels, dim=0)\n\n    # TODO: use a more elegent way to deal with nms\n    if test_cfg.use_rotate_nms:\n        nms_func = nms_gpu\n    else:\n        nms_func = nms_normal_gpu\n\n    merged_bboxes = []\n    merged_scores = []\n    merged_labels = []\n\n    # Apply multi-class nms when merge bboxes\n    if len(aug_labels) == 0:\n        return bbox3d2result(aug_bboxes, aug_scores, aug_labels)\n\n    for class_id in range(torch.max(aug_labels).item() + 1):\n        class_inds = (aug_labels == class_id)\n        bboxes_i = aug_bboxes[class_inds]\n        bboxes_nms_i = aug_bboxes_for_nms[class_inds, :]\n        scores_i = aug_scores[class_inds]\n        labels_i = aug_labels[class_inds]\n        if len(bboxes_nms_i) == 0:\n            continue\n        selected = nms_func(bboxes_nms_i, scores_i, test_cfg.nms_thr)\n\n        merged_bboxes.append(bboxes_i[selected, :])\n        merged_scores.append(scores_i[selected])\n        merged_labels.append(labels_i[selected])\n\n    merged_bboxes = merged_bboxes[0].cat(merged_bboxes)\n    merged_scores = torch.cat(merged_scores, dim=0)\n    merged_labels = torch.cat(merged_labels, dim=0)\n\n    _, order = merged_scores.sort(0, descending=True)\n    num = min(test_cfg.max_num, len(aug_bboxes))\n    order = order[:num]\n\n    merged_bboxes = merged_bboxes[order]\n    merged_scores = merged_scores[order]\n    merged_labels = merged_labels[order]\n\n    return bbox3d2result(merged_bboxes, merged_scores, merged_labels)\n"
  },
  {
    "path": "mmdet3d/core/utils/__init__.py",
    "content": "from .gaussian import draw_heatmap_gaussian, gaussian_2d, gaussian_radius\n\n__all__ = ['gaussian_2d', 'gaussian_radius', 'draw_heatmap_gaussian']\n"
  },
  {
    "path": "mmdet3d/core/utils/gaussian.py",
    "content": "import numpy as np\nimport torch\n\n\ndef gaussian_2d(shape, sigma=1):\n    \"\"\"Generate gaussian map.\n\n    Args:\n        shape (list[int]): Shape of the map.\n        sigma (float): Sigma to generate gaussian map.\n            Defaults to 1.\n\n    Returns:\n        np.ndarray: Generated gaussian map.\n    \"\"\"\n\n    m, n = [(ss - 1.) / 2. for ss in shape]\n    y, x = np.ogrid[-m:m + 1, -n:n + 1]\n\n    h = np.exp(-(x * x + y * y) / (2 * sigma * sigma))\n    h[h < np.finfo(h.dtype).eps * h.max()] = 0\n    return h\n\n\ndef draw_heatmap_gaussian(heatmap, center, radius, k=1):\n    \"\"\"Get gaussian masked heatmap.\n\n    Args:\n        heatmap (torch.Tensor): Heatmap to be masked.\n        center (torch.Tensor): Center coord of the heatmap.\n        radius (int): Radius of gausian.\n        K (int): Multiple of masked_gaussian. Defaults to 1.\n\n    Returns:\n        torch.Tensor: Masked heatmap.\n    \"\"\"\n    diameter = 2 * radius + 1\n    gaussian = gaussian_2d((diameter, diameter), sigma=diameter / 6)\n\n    x, y = int(center[0]), int(center[1])\n\n    height, width = heatmap.shape[0:2]\n\n    left, right = min(x, radius), min(width - x, radius + 1)\n    top, bottom = min(y, radius), min(height - y, radius + 1)\n\n    masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]\n    masked_gaussian = torch.from_numpy(\n        gaussian[radius - top:radius + bottom,\n                 radius - left:radius + right]).to(heatmap.device,\n                                                   torch.float32)\n    if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0:\n        torch.max(masked_heatmap, masked_gaussian * k, out=masked_heatmap)\n    return heatmap\n\n\ndef gaussian_radius(det_size, min_overlap=0.5):\n    \"\"\"Get radius of gaussian.\n\n    Args:\n        det_size (tuple[torch.Tensor]): Size of the detection result.\n        min_overlap (float): Gaussian_overlap. Defaults to 0.5.\n\n    Returns:\n        torch.Tensor: Computed radius.\n    \"\"\"\n    height, width = det_size\n\n    a1 = 1\n    b1 = (height + width)\n    c1 = width * height * (1 - min_overlap) / (1 + min_overlap)\n    sq1 = torch.sqrt(b1**2 - 4 * a1 * c1)\n    r1 = (b1 + sq1) / 2\n\n    a2 = 4\n    b2 = 2 * (height + width)\n    c2 = (1 - min_overlap) * width * height\n    sq2 = torch.sqrt(b2**2 - 4 * a2 * c2)\n    r2 = (b2 + sq2) / 2\n\n    a3 = 4 * min_overlap\n    b3 = -2 * min_overlap * (height + width)\n    c3 = (min_overlap - 1) * width * height\n    sq3 = torch.sqrt(b3**2 - 4 * a3 * c3)\n    r3 = (b3 + sq3) / 2\n    return min(r1, r2, r3)\n"
  },
  {
    "path": "mmdet3d/core/visualizer/__init__.py",
    "content": "from .show_result import show_result\n\n__all__ = ['show_result']\n"
  },
  {
    "path": "mmdet3d/core/visualizer/open3d_vis.py",
    "content": "import cv2\nimport numpy as np\nimport torch\nfrom matplotlib import pyplot as plt\n\ntry:\n    import open3d as o3d\n    from open3d import geometry\nexcept ImportError:\n    raise ImportError(\n        'Please run \"pip install open3d\" to install open3d first.')\n\n\ndef _draw_points(points,\n                 vis,\n                 points_size=2,\n                 point_color=(0.5, 0.5, 0.5),\n                 mode='xyz'):\n    \"\"\"Draw points on visualizer.\n\n    Args:\n        points (numpy.array | torch.tensor, shape=[N, 3+C]):\n            points to visualize.\n        vis (:obj:`open3d.visualization.Visualizer`): open3d visualizer.\n        points_size (int): the size of points to show on visualizer.\n            Default: 2.\n        point_color (tuple[float]): the color of points.\n            Default: (0.5, 0.5, 0.5).\n        mode (str):  indicate type of the input points, avaliable mode\n            ['xyz', 'xyzrgb']. Default: 'xyz'.\n\n    Returns:\n        tuple: points, color of each point.\n    \"\"\"\n    vis.get_render_option().point_size = points_size  # set points size\n    if isinstance(points, torch.Tensor):\n        points = points.cpu().numpy()\n\n    points = points.copy()\n    pcd = geometry.PointCloud()\n    if mode == 'xyz':\n        pcd.points = o3d.utility.Vector3dVector(points[:, :3])\n        points_colors = np.tile(np.array(point_color), (points.shape[0], 1))\n    elif mode == 'xyzrgb':\n        pcd.points = o3d.utility.Vector3dVector(points[:, :3])\n        points_colors = points[:, 3:6]\n    else:\n        raise NotImplementedError\n\n    pcd.colors = o3d.utility.Vector3dVector(points_colors)\n    vis.add_geometry(pcd)\n\n    return pcd, points_colors\n\n\ndef _draw_bboxes(bbox3d,\n                 vis,\n                 points_colors,\n                 pcd=None,\n                 bbox_color=(0, 1, 0),\n                 points_in_box_color=(1, 0, 0),\n                 rot_axis=2,\n                 center_mode='lidar_bottom',\n                 mode='xyz'):\n    \"\"\"Draw bbox on visualizer and change the color of points inside bbox3d.\n\n    Args:\n        bbox3d (numpy.array | torch.tensor, shape=[M, 7]):\n            3d bbox (x, y, z, dx, dy, dz, yaw) to visualize.\n        vis (:obj:`open3d.visualization.Visualizer`): open3d visualizer.\n        points_colors (numpy.array): color of each points.\n        pcd (:obj:`open3d.geometry.PointCloud`): point cloud. Default: None.\n        bbox_color (tuple[float]): the color of bbox. Default: (0, 1, 0).\n        points_in_box_color (tuple[float]):\n            the color of points inside bbox3d. Default: (1, 0, 0).\n        rot_axis (int): rotation axis of bbox. Default: 2.\n        center_mode (bool): indicate the center of bbox is bottom center\n            or gravity center. avaliable mode\n            ['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.\n        mode (str):  indicate type of the input points, avaliable mode\n            ['xyz', 'xyzrgb']. Default: 'xyz'.\n    \"\"\"\n    if isinstance(bbox3d, torch.Tensor):\n        bbox3d = bbox3d.cpu().numpy()\n    bbox3d = bbox3d.copy()\n\n    in_box_color = np.array(points_in_box_color)\n    for i in range(len(bbox3d)):\n        center = bbox3d[i, 0:3]\n        dim = bbox3d[i, 3:6]\n        yaw = np.zeros(3)\n        yaw[rot_axis] = -bbox3d[i, 6]\n        rot_mat = geometry.get_rotation_matrix_from_xyz(yaw)\n\n        if center_mode == 'lidar_bottom':\n            center[rot_axis] += dim[\n                rot_axis] / 2  # bottom center to gravity center\n        elif center_mode == 'camera_bottom':\n            center[rot_axis] -= dim[\n                rot_axis] / 2  # bottom center to gravity center\n        box3d = geometry.OrientedBoundingBox(center, rot_mat, dim)\n\n        line_set = geometry.LineSet.create_from_oriented_bounding_box(box3d)\n        line_set.paint_uniform_color(bbox_color)\n        # draw bboxes on visualizer\n        vis.add_geometry(line_set)\n\n        # change the color of points which are in box\n        if pcd is not None and mode == 'xyz':\n            indices = box3d.get_point_indices_within_bounding_box(pcd.points)\n            points_colors[indices] = in_box_color\n\n    # update points colors\n    if pcd is not None:\n        pcd.colors = o3d.utility.Vector3dVector(points_colors)\n        vis.update_geometry(pcd)\n\n\ndef show_pts_boxes(points,\n                   bbox3d=None,\n                   show=True,\n                   save_path=None,\n                   points_size=2,\n                   point_color=(0.5, 0.5, 0.5),\n                   bbox_color=(0, 1, 0),\n                   points_in_box_color=(1, 0, 0),\n                   rot_axis=2,\n                   center_mode='lidar_bottom',\n                   mode='xyz'):\n    \"\"\"Draw bbox and points on visualizer.\n\n    Args:\n        points (numpy.array | torch.tensor, shape=[N, 3+C]):\n            points to visualize.\n        bbox3d (numpy.array | torch.tensor, shape=[M, 7]):\n            3d bbox (x, y, z, dx, dy, dz, yaw) to visualize. Default: None.\n        show (bool): whether to show the visualization results. Default: True.\n        save_path (str): path to save visualized results. Default: None.\n        points_size (int): the size of points to show on visualizer.\n            Default: 2.\n        point_color (tuple[float]): the color of points.\n            Default: (0.5, 0.5, 0.5).\n        bbox_color (tuple[float]): the color of bbox. Default: (0, 1, 0).\n        points_in_box_color (tuple[float]):\n            the color of points which are in bbox3d. Default: (1, 0, 0).\n        rot_axis (int): rotation axis of bbox. Default: 2.\n        center_mode (bool): indicate the center of bbox is bottom center\n            or gravity center. avaliable mode\n            ['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.\n        mode (str):  indicate type of the input points, avaliable mode\n            ['xyz', 'xyzrgb']. Default: 'xyz'.\n    \"\"\"\n    # TODO: support score and class info\n    assert 0 <= rot_axis <= 2\n\n    # init visualizer\n    vis = o3d.visualization.Visualizer()\n    vis.create_window()\n    mesh_frame = geometry.TriangleMesh.create_coordinate_frame(\n        size=1, origin=[0, 0, 0])  # create coordinate frame\n    vis.add_geometry(mesh_frame)\n\n    # draw points\n    pcd, points_colors = _draw_points(points, vis, points_size, point_color,\n                                      mode)\n\n    # draw boxes\n    if bbox3d is not None:\n        _draw_bboxes(bbox3d, vis, points_colors, pcd, bbox_color,\n                     points_in_box_color, rot_axis, center_mode, mode)\n\n    if show:\n        vis.run()\n\n    if save_path is not None:\n        vis.capture_screen_image(save_path)\n\n    vis.destroy_window()\n\n\ndef _draw_bboxes_ind(bbox3d,\n                     vis,\n                     indices,\n                     points_colors,\n                     pcd=None,\n                     bbox_color=(0, 1, 0),\n                     points_in_box_color=(1, 0, 0),\n                     rot_axis=2,\n                     center_mode='lidar_bottom',\n                     mode='xyz'):\n    \"\"\"Draw bbox on visualizer and change the color or points inside bbox3d\n    with indices.\n\n    Args:\n        bbox3d (numpy.array | torch.tensor, shape=[M, 7]):\n            3d bbox (x, y, z, dx, dy, dz, yaw) to visualize.\n        vis (:obj:`open3d.visualization.Visualizer`): open3d visualizer.\n        indices (numpy.array | torch.tensor, shape=[N, M]):\n            indicate which bbox3d that each point lies in.\n        points_colors (numpy.array): color of each points.\n        pcd (:obj:`open3d.geometry.PointCloud`): point cloud. Default: None.\n        bbox_color (tuple[float]): the color of bbox. Default: (0, 1, 0).\n        points_in_box_color (tuple[float]):\n            the color of points which are in bbox3d. Default: (1, 0, 0).\n        rot_axis (int): rotation axis of bbox. Default: 2.\n        center_mode (bool): indicate the center of bbox is bottom center\n            or gravity center. avaliable mode\n            ['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.\n        mode (str):  indicate type of the input points, avaliable mode\n            ['xyz', 'xyzrgb']. Default: 'xyz'.\n    \"\"\"\n    if isinstance(bbox3d, torch.Tensor):\n        bbox3d = bbox3d.cpu().numpy()\n    if isinstance(indices, torch.Tensor):\n        indices = indices.cpu().numpy()\n    bbox3d = bbox3d.copy()\n\n    in_box_color = np.array(points_in_box_color)\n    for i in range(len(bbox3d)):\n        center = bbox3d[i, 0:3]\n        dim = bbox3d[i, 3:6]\n        yaw = np.zeros(3)\n        # TODO: fix problem of current coordinate system\n        # dim[0], dim[1] = dim[1], dim[0]  # for current coordinate\n        # yaw[rot_axis] = -(bbox3d[i, 6] - 0.5 * np.pi)\n        yaw[rot_axis] = -bbox3d[i, 6]\n        rot_mat = geometry.get_rotation_matrix_from_xyz(yaw)\n        if center_mode == 'lidar_bottom':\n            center[rot_axis] += dim[\n                rot_axis] / 2  # bottom center to gravity center\n        elif center_mode == 'camera_bottom':\n            center[rot_axis] -= dim[\n                rot_axis] / 2  # bottom center to gravity center\n        box3d = geometry.OrientedBoundingBox(center, rot_mat, dim)\n\n        line_set = geometry.LineSet.create_from_oriented_bounding_box(box3d)\n        line_set.paint_uniform_color(bbox_color)\n        # draw bboxes on visualizer\n        vis.add_geometry(line_set)\n\n        # change the color of points which are in box\n        if pcd is not None and mode == 'xyz':\n            points_colors[indices[:, i].astype(np.bool)] = in_box_color\n\n    # update points colors\n    if pcd is not None:\n        pcd.colors = o3d.utility.Vector3dVector(points_colors)\n        vis.update_geometry(pcd)\n\n\ndef show_pts_index_boxes(points,\n                         bbox3d=None,\n                         show=True,\n                         indices=None,\n                         save_path=None,\n                         points_size=2,\n                         point_color=(0.5, 0.5, 0.5),\n                         bbox_color=(0, 1, 0),\n                         points_in_box_color=(1, 0, 0),\n                         rot_axis=2,\n                         center_mode='lidar_bottom',\n                         mode='xyz'):\n    \"\"\"Draw bbox and points on visualizer with indices that indicate which\n    bbox3d that each point lies in.\n\n    Args:\n        points (numpy.array | torch.tensor, shape=[N, 3+C]):\n            points to visualize.\n        bbox3d (numpy.array | torch.tensor, shape=[M, 7]):\n            3d bbox (x, y, z, dx, dy, dz, yaw) to visualize. Default: None.\n        show (bool): whether to show the visualization results. Default: True.\n        indices (numpy.array | torch.tensor, shape=[N, M]):\n            indicate which bbox3d that each point lies in. Default: None.\n        save_path (str): path to save visualized results. Default: None.\n        points_size (int): the size of points to show on visualizer.\n            Default: 2.\n        point_color (tuple[float]): the color of points.\n            Default: (0.5, 0.5, 0.5).\n        bbox_color (tuple[float]): the color of bbox. Default: (0, 1, 0).\n        points_in_box_color (tuple[float]):\n            the color of points which are in bbox3d. Default: (1, 0, 0).\n        rot_axis (int): rotation axis of bbox. Default: 2.\n        center_mode (bool): indicate the center of bbox is bottom center\n            or gravity center. avaliable mode\n            ['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.\n        mode (str):  indicate type of the input points, avaliable mode\n            ['xyz', 'xyzrgb']. Default: 'xyz'.\n    \"\"\"\n    # TODO: support score and class info\n    assert 0 <= rot_axis <= 2\n\n    # init visualizer\n    vis = o3d.visualization.Visualizer()\n    vis.create_window()\n    mesh_frame = geometry.TriangleMesh.create_coordinate_frame(\n        size=1, origin=[0, 0, 0])  # create coordinate frame\n    vis.add_geometry(mesh_frame)\n\n    # draw points\n    pcd, points_colors = _draw_points(points, vis, points_size, point_color,\n                                      mode)\n\n    # draw boxes\n    if bbox3d is not None:\n        _draw_bboxes_ind(bbox3d, vis, indices, points_colors, pcd, bbox_color,\n                         points_in_box_color, rot_axis, center_mode, mode)\n\n    if show:\n        vis.run()\n\n    if save_path is not None:\n        vis.capture_screen_image(save_path)\n\n    vis.destroy_window()\n\n\ndef project_pts_on_img(points,\n                       raw_img,\n                       lidar2img_rt,\n                       max_distance=70,\n                       thickness=-1):\n    \"\"\"Project the 3D points cloud on 2D image.\n\n    Args:\n        points (numpy.array): 3D points cloud (x, y, z) to visualize.\n        raw_img (numpy.array): The numpy array of image.\n        lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix\n            according to the camera intrinsic parameters.\n        max_distance (float): the max distance of the points cloud.\n            Default: 70.\n        thickness (int, optional): The thickness of 2D points. Default: -1.\n    \"\"\"\n    img = raw_img.copy()\n    num_points = points.shape[0]\n    pts_4d = np.concatenate([points[:, :3], np.ones((num_points, 1))], axis=-1)\n    pts_2d = pts_4d @ lidar2img_rt.T\n\n    # cam_points is Tensor of Nx4 whose last column is 1\n    # transform camera coordinate to image coordinate\n    pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=99999)\n    pts_2d[:, 0] /= pts_2d[:, 2]\n    pts_2d[:, 1] /= pts_2d[:, 2]\n\n    fov_inds = ((pts_2d[:, 0] < img.shape[1])\n                & (pts_2d[:, 0] >= 0)\n                & (pts_2d[:, 1] < img.shape[0])\n                & (pts_2d[:, 1] >= 0))\n\n    imgfov_pts_2d = pts_2d[fov_inds, :3]  # u, v, d\n\n    cmap = plt.cm.get_cmap('hsv', 256)\n    cmap = np.array([cmap(i) for i in range(256)])[:, :3] * 255\n    for i in range(imgfov_pts_2d.shape[0]):\n        depth = imgfov_pts_2d[i, 2]\n        color = cmap[np.clip(int(max_distance * 10 / depth), 0, 255), :]\n        cv2.circle(\n            img,\n            center=(int(np.round(imgfov_pts_2d[i, 0])),\n                    int(np.round(imgfov_pts_2d[i, 1]))),\n            radius=1,\n            color=tuple(color),\n            thickness=thickness,\n        )\n    cv2.imshow('project_pts_img', img)\n    cv2.waitKey(100)\n\n\ndef project_bbox3d_on_img(bboxes3d,\n                          raw_img,\n                          lidar2img_rt,\n                          color=(0, 255, 0),\n                          thickness=1):\n    \"\"\"Project the 3D bbox on 2D image.\n\n    Args:\n        bboxes3d (numpy.array, shape=[M, 7]):\n            3d bbox (x, y, z, dx, dy, dz, yaw) to visualize.\n        raw_img (numpy.array): The numpy array of image.\n        lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix\n            according to the camera intrinsic parameters.\n        color (tuple[int]): the color to draw bboxes. Default: (0, 255, 0).\n        thickness (int, optional): The thickness of bboxes. Default: 1.\n    \"\"\"\n    img = raw_img.copy()\n    corners_3d = bboxes3d.corners\n    num_bbox = corners_3d.shape[0]\n    pts_4d = np.concatenate(\n        [corners_3d.reshape(-1, 3),\n         np.ones((num_bbox * 8, 1))], axis=-1)\n    pts_2d = pts_4d @ lidar2img_rt.T\n\n    pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=1e5)\n    pts_2d[:, 0] /= pts_2d[:, 2]\n    pts_2d[:, 1] /= pts_2d[:, 2]\n    imgfov_pts_2d = pts_2d[..., :2].reshape(num_bbox, 8, 2)\n\n    line_indices = ((0, 1), (0, 3), (0, 4), (1, 2), (1, 5), (3, 2), (3, 7),\n                    (4, 5), (4, 7), (2, 6), (5, 6), (6, 7))\n    for i in range(num_bbox):\n        corners = imgfov_pts_2d[i].astype(np.int)\n        for start, end in line_indices:\n            cv2.line(img, (corners[start, 0], corners[start, 1]),\n                     (corners[end, 0], corners[end, 1]), color, thickness,\n                     cv2.LINE_AA)\n\n    cv2.imshow('project_bbox3d_img', img)\n    cv2.waitKey(0)\n\n\nclass Visualizer(object):\n    r\"\"\"Online visualizer implemented with Open3d.\n\n    Args:\n        points (numpy.array, shape=[N, 3+C]): Points to visualize. The Points\n            cloud is in mode of Coord3DMode.DEPTH (please refer to\n            core.structures.coord_3d_mode).\n        bbox3d (numpy.array, shape=[M, 7]): 3d bbox (x, y, z, dx, dy, dz, yaw)\n            to visualize. The 3d bbox is in mode of Box3DMode.DEPTH with\n            gravity_center (please refer to core.structures.box_3d_mode).\n            Default: None.\n        save_path (str): path to save visualized results. Default: None.\n        points_size (int): the size of points to show on visualizer.\n            Default: 2.\n        point_color (tuple[float]): the color of points.\n            Default: (0.5, 0.5, 0.5).\n        bbox_color (tuple[float]): the color of bbox. Default: (0, 1, 0).\n        points_in_box_color (tuple[float]):\n            the color of points which are in bbox3d. Default: (1, 0, 0).\n        rot_axis (int): rotation axis of bbox. Default: 2.\n        center_mode (bool): indicate the center of bbox is bottom center\n            or gravity center. avaliable mode\n            ['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.\n        mode (str):  indicate type of the input points, avaliable mode\n            ['xyz', 'xyzrgb']. Default: 'xyz'.\n    \"\"\"\n\n    def __init__(self,\n                 points,\n                 bbox3d=None,\n                 save_path=None,\n                 points_size=2,\n                 point_color=(0.5, 0.5, 0.5),\n                 bbox_color=(0, 1, 0),\n                 points_in_box_color=(1, 0, 0),\n                 rot_axis=2,\n                 center_mode='lidar_bottom',\n                 mode='xyz'):\n        super(Visualizer, self).__init__()\n        assert 0 <= rot_axis <= 2\n\n        # init visualizer\n        self.o3d_visualizer = o3d.visualization.Visualizer()\n        self.o3d_visualizer.create_window()\n        mesh_frame = geometry.TriangleMesh.create_coordinate_frame(\n            size=1, origin=[0, 0, 0])  # create coordinate frame\n        self.o3d_visualizer.add_geometry(mesh_frame)\n\n        self.points_size = points_size\n        self.point_color = point_color\n        self.bbox_color = bbox_color\n        self.points_in_box_color = points_in_box_color\n        self.rot_axis = rot_axis\n        self.center_mode = center_mode\n        self.mode = mode\n\n        # draw points\n        if points is not None:\n            self.pcd, self.points_colors = _draw_points(\n                points, self.o3d_visualizer, points_size, point_color, mode)\n\n        # draw boxes\n        if bbox3d is not None:\n            _draw_bboxes(bbox3d, self.o3d_visualizer, self.points_colors,\n                         self.pcd, bbox_color, points_in_box_color, rot_axis,\n                         center_mode, mode)\n\n    def add_bboxes(self, bbox3d, bbox_color=None, points_in_box_color=None):\n        \"\"\"Add bounding box to visualizer.\n\n        Args:\n            bbox3d (numpy.array, shape=[M, 7]):\n                3D bbox (x, y, z, dx, dy, dz, yaw) to be visualized.\n                The 3d bbox is in mode of Box3DMode.DEPTH with\n                gravity_center (please refer to core.structures.box_3d_mode).\n            bbox_color (tuple[float]): the color of bbox. Defaule: None.\n            points_in_box_color (tuple[float]): the color of points which\n                are in bbox3d. Defaule: None.\n        \"\"\"\n        if bbox_color is None:\n            bbox_color = self.bbox_color\n        if points_in_box_color is None:\n            points_in_box_color = self.points_in_box_color\n        _draw_bboxes(bbox3d, self.o3d_visualizer, self.points_colors, self.pcd,\n                     bbox_color, points_in_box_color, self.rot_axis,\n                     self.center_mode, self.mode)\n\n    def show(self, save_path=None):\n        \"\"\"Visualize the points cloud.\n\n        Args:\n            save_path (str): path to save image. Default: None.\n        \"\"\"\n\n        self.o3d_visualizer.run()\n\n        if save_path is not None:\n            self.o3d_visualizer.capture_screen_image(save_path)\n\n        self.o3d_visualizer.destroy_window()\n        return\n"
  },
  {
    "path": "mmdet3d/core/visualizer/show_result.py",
    "content": "import mmcv\nimport numpy as np\nimport trimesh\nfrom os import path as osp\n\n\ndef _write_ply(points, out_filename):\n    \"\"\"Write points into ``ply`` format for meshlab visualization.\n\n    Args:\n        points (np.ndarray): Points in shape (N, dim).\n        out_filename (str): Filename to be saved.\n    \"\"\"\n    N = points.shape[0]\n    fout = open(out_filename, 'w')\n    for i in range(N):\n        if points.shape[1] == 6:\n            c = points[i, 3:].astype(int)\n            fout.write(\n                'v %f %f %f %d %d %d\\n' %\n                (points[i, 0], points[i, 1], points[i, 2], c[0], c[1], c[2]))\n\n        else:\n            fout.write('v %f %f %f\\n' %\n                       (points[i, 0], points[i, 1], points[i, 2]))\n    fout.close()\n\n\ndef _write_oriented_bbox(scene_bbox, out_filename):\n    \"\"\"Export oriented (around Z axis) scene bbox to meshes.\n\n    Args:\n        scene_bbox(list[ndarray] or ndarray): xyz pos of center and\n            3 lengths (dx,dy,dz) and heading angle around Z axis.\n            Y forward, X right, Z upward. heading angle of positive X is 0,\n            heading angle of positive Y is 90 degrees.\n        out_filename(str): Filename.\n    \"\"\"\n\n    def heading2rotmat(heading_angle):\n        rotmat = np.zeros((3, 3))\n        rotmat[2, 2] = 1\n        cosval = np.cos(heading_angle)\n        sinval = np.sin(heading_angle)\n        rotmat[0:2, 0:2] = np.array([[cosval, -sinval], [sinval, cosval]])\n        return rotmat\n\n    def convert_oriented_box_to_trimesh_fmt(box):\n        ctr = box[:3]\n        lengths = box[3:6]\n        trns = np.eye(4)\n        trns[0:3, 3] = ctr\n        trns[3, 3] = 1.0\n        trns[0:3, 0:3] = heading2rotmat(box[6])\n        box_trimesh_fmt = trimesh.creation.box(lengths, trns)\n        return box_trimesh_fmt\n\n    if len(scene_bbox) == 0:\n        scene_bbox = np.zeros((1, 7))\n    scene = trimesh.scene.Scene()\n    for box in scene_bbox:\n        scene.add_geometry(convert_oriented_box_to_trimesh_fmt(box))\n\n    mesh_list = trimesh.util.concatenate(scene.dump())\n    # save to ply file\n    trimesh.io.export.export_mesh(mesh_list, out_filename, file_type='ply')\n\n    return\n\n\ndef show_result(points, gt_bboxes, pred_bboxes, out_dir, filename, show=True):\n    \"\"\"Convert results into format that is directly readable for meshlab.\n\n    Args:\n        points (np.ndarray): Points.\n        gt_bboxes (np.ndarray): Ground truth boxes.\n        pred_bboxes (np.ndarray): Predicted boxes.\n        out_dir (str): Path of output directory\n        filename (str): Filename of the current frame.\n        show (bool): Visualize the results online.\n    \"\"\"\n    if show:\n        from .open3d_vis import Visualizer\n\n        vis = Visualizer(points)\n        if pred_bboxes is not None:\n            vis.add_bboxes(bbox3d=pred_bboxes)\n        if gt_bboxes is not None:\n            vis.add_bboxes(bbox3d=gt_bboxes, bbox_color=(0, 0, 1))\n        vis.show()\n\n    result_path = osp.join(out_dir, filename)\n    mmcv.mkdir_or_exist(result_path)\n\n    if points is not None:\n        _write_ply(points, osp.join(result_path, f'{filename}_points.obj'))\n\n    if gt_bboxes is not None:\n        # bottom center to gravity center\n        gt_bboxes[..., 2] += gt_bboxes[..., 5] / 2\n        # the positive direction for yaw in meshlab is clockwise\n        gt_bboxes[:, 6] *= -1\n        _write_oriented_bbox(gt_bboxes,\n                             osp.join(result_path, f'{filename}_gt.ply'))\n\n    if pred_bboxes is not None:\n        # bottom center to gravity center\n        pred_bboxes[..., 2] += pred_bboxes[..., 5] / 2\n        # the positive direction for yaw in meshlab is clockwise\n        pred_bboxes[:, 6] *= -1\n        _write_oriented_bbox(pred_bboxes,\n                             osp.join(result_path, f'{filename}_pred.ply'))\n"
  },
  {
    "path": "mmdet3d/core/voxel/__init__.py",
    "content": "from .builder import build_voxel_generator\nfrom .voxel_generator import VoxelGenerator\n\n__all__ = ['build_voxel_generator', 'VoxelGenerator']\n"
  },
  {
    "path": "mmdet3d/core/voxel/builder.py",
    "content": "import mmcv\n\nfrom . import voxel_generator\n\n\ndef build_voxel_generator(cfg, **kwargs):\n    \"\"\"Builder of voxel generator.\"\"\"\n    if isinstance(cfg, voxel_generator.VoxelGenerator):\n        return cfg\n    elif isinstance(cfg, dict):\n        return mmcv.runner.obj_from_dict(\n            cfg, voxel_generator, default_args=kwargs)\n    else:\n        raise TypeError('Invalid type {} for building a sampler'.format(\n            type(cfg)))\n"
  },
  {
    "path": "mmdet3d/core/voxel/voxel_generator.py",
    "content": "import numba\nimport numpy as np\n\n\nclass VoxelGenerator(object):\n    \"\"\"Voxel generator in numpy implementation.\n\n    Args:\n        voxel_size (list[float]): Size of a single voxel\n        point_cloud_range (list[float]): Range of points\n        max_num_points (int): Maximum number of points in a single voxel\n        max_voxels (int, optional): Maximum number of voxels.\n            Defaults to 20000.\n    \"\"\"\n\n    def __init__(self,\n                 voxel_size,\n                 point_cloud_range,\n                 max_num_points,\n                 max_voxels=20000):\n\n        point_cloud_range = np.array(point_cloud_range, dtype=np.float32)\n        # [0, -40, -3, 70.4, 40, 1]\n        voxel_size = np.array(voxel_size, dtype=np.float32)\n        grid_size = (point_cloud_range[3:] -\n                     point_cloud_range[:3]) / voxel_size\n        grid_size = np.round(grid_size).astype(np.int64)\n\n        self._voxel_size = voxel_size\n        self._point_cloud_range = point_cloud_range\n        self._max_num_points = max_num_points\n        self._max_voxels = max_voxels\n        self._grid_size = grid_size\n\n    def generate(self, points):\n        \"\"\"Generate voxels given points.\"\"\"\n        return points_to_voxel(points, self._voxel_size,\n                               self._point_cloud_range, self._max_num_points,\n                               True, self._max_voxels)\n\n    @property\n    def voxel_size(self):\n        \"\"\"list[float]: Size of a single voxel.\"\"\"\n        return self._voxel_size\n\n    @property\n    def max_num_points_per_voxel(self):\n        \"\"\"int: Maximum number of points per voxel.\"\"\"\n        return self._max_num_points\n\n    @property\n    def point_cloud_range(self):\n        \"\"\"list[float]: Range of point cloud.\"\"\"\n        return self._point_cloud_range\n\n    @property\n    def grid_size(self):\n        \"\"\"np.ndarray: The size of grids.\"\"\"\n        return self._grid_size\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        indent = ' ' * (len(repr_str) + 1)\n        repr_str += f'(voxel_size={self._voxel_size},\\n'\n        repr_str += indent + 'point_cloud_range='\n        repr_str += f'{self._point_cloud_range.tolist()},\\n'\n        repr_str += indent + f'max_num_points={self._max_num_points},\\n'\n        repr_str += indent + f'max_voxels={self._max_voxels},\\n'\n        repr_str += indent + f'grid_size={self._grid_size.tolist()}'\n        repr_str += ')'\n        return repr_str\n\n\ndef points_to_voxel(points,\n                    voxel_size,\n                    coors_range,\n                    max_points=35,\n                    reverse_index=True,\n                    max_voxels=20000):\n    \"\"\"convert kitti points(N, >=3) to voxels.\n\n    Args:\n        points (np.ndarray): [N, ndim]. points[:, :3] contain xyz points and \\\n            points[:, 3:] contain other information such as reflectivity.\n        voxel_size (list, tuple, np.ndarray): [3] xyz, indicate voxel size\n        coors_range (list[float | tuple[float] | ndarray]): Voxel range. \\\n            format: xyzxyz, minmax\n        max_points (int): Indicate maximum points contained in a voxel.\n        reverse_index (bool): Whether return reversed coordinates. \\\n            if points has xyz format and reverse_index is True, output \\\n            coordinates will be zyx format, but points in features always \\\n            xyz format.\n        max_voxels (int): Maximum number of voxels this function creates. \\\n            For second, 20000 is a good choice. Points should be shuffled for \\\n            randomness before this function because max_voxels drops points.\n\n    Returns:\n        tuple[np.ndarray]:\n            voxels: [M, max_points, ndim] float tensor. only contain points.\n            coordinates: [M, 3] int32 tensor.\n            num_points_per_voxel: [M] int32 tensor.\n    \"\"\"\n    if not isinstance(voxel_size, np.ndarray):\n        voxel_size = np.array(voxel_size, dtype=points.dtype)\n    if not isinstance(coors_range, np.ndarray):\n        coors_range = np.array(coors_range, dtype=points.dtype)\n    voxelmap_shape = (coors_range[3:] - coors_range[:3]) / voxel_size\n    voxelmap_shape = tuple(np.round(voxelmap_shape).astype(np.int32).tolist())\n    if reverse_index:\n        voxelmap_shape = voxelmap_shape[::-1]\n    # don't create large array in jit(nopython=True) code.\n    num_points_per_voxel = np.zeros(shape=(max_voxels, ), dtype=np.int32)\n    coor_to_voxelidx = -np.ones(shape=voxelmap_shape, dtype=np.int32)\n    voxels = np.zeros(\n        shape=(max_voxels, max_points, points.shape[-1]), dtype=points.dtype)\n    coors = np.zeros(shape=(max_voxels, 3), dtype=np.int32)\n    if reverse_index:\n        voxel_num = _points_to_voxel_reverse_kernel(\n            points, voxel_size, coors_range, num_points_per_voxel,\n            coor_to_voxelidx, voxels, coors, max_points, max_voxels)\n\n    else:\n        voxel_num = _points_to_voxel_kernel(points, voxel_size, coors_range,\n                                            num_points_per_voxel,\n                                            coor_to_voxelidx, voxels, coors,\n                                            max_points, max_voxels)\n\n    coors = coors[:voxel_num]\n    voxels = voxels[:voxel_num]\n    num_points_per_voxel = num_points_per_voxel[:voxel_num]\n\n    return voxels, coors, num_points_per_voxel\n\n\n@numba.jit(nopython=True)\ndef _points_to_voxel_reverse_kernel(points,\n                                    voxel_size,\n                                    coors_range,\n                                    num_points_per_voxel,\n                                    coor_to_voxelidx,\n                                    voxels,\n                                    coors,\n                                    max_points=35,\n                                    max_voxels=20000):\n    \"\"\"convert kitti points(N, >=3) to voxels.\n\n    Args:\n        points (np.ndarray): [N, ndim]. points[:, :3] contain xyz points and \\\n            points[:, 3:] contain other information such as reflectivity.\n        voxel_size (list, tuple, np.ndarray): [3] xyz, indicate voxel size \\\n        coors_range (list[float | tuple[float] | ndarray]): Range of voxels. \\\n            format: xyzxyz, minmax\n        num_points_per_voxel (int): Number of points per voxel.\n        coor_to_voxel_idx (np.ndarray): A voxel grid of shape (D, H, W), \\\n            which has the same shape as the complete voxel map. It indicates \\\n            the index of each corresponding voxel.\n        voxels (np.ndarray): Created empty voxels.\n        coors (np.ndarray): Created coordinates of each voxel.\n        max_points (int): Indicate maximum points contained in a voxel.\n        max_voxels (int): Maximum number of voxels this function create. \\\n            for second, 20000 is a good choice. Points should be shuffled for \\\n            randomness before this function because max_voxels drops points.\n\n    Returns:\n        tuple[np.ndarray]:\n            voxels: Shape [M, max_points, ndim], only contain points.\n            coordinates: Shape [M, 3].\n            num_points_per_voxel: Shape [M].\n    \"\"\"\n    # put all computations to one loop.\n    # we shouldn't create large array in main jit code, otherwise\n    # reduce performance\n    N = points.shape[0]\n    # ndim = points.shape[1] - 1\n    ndim = 3\n    ndim_minus_1 = ndim - 1\n    grid_size = (coors_range[3:] - coors_range[:3]) / voxel_size\n    # np.round(grid_size)\n    # grid_size = np.round(grid_size).astype(np.int64)(np.int32)\n    grid_size = np.round(grid_size, 0, grid_size).astype(np.int32)\n    coor = np.zeros(shape=(3, ), dtype=np.int32)\n    voxel_num = 0\n    failed = False\n    for i in range(N):\n        failed = False\n        for j in range(ndim):\n            c = np.floor((points[i, j] - coors_range[j]) / voxel_size[j])\n            if c < 0 or c >= grid_size[j]:\n                failed = True\n                break\n            coor[ndim_minus_1 - j] = c\n        if failed:\n            continue\n        voxelidx = coor_to_voxelidx[coor[0], coor[1], coor[2]]\n        if voxelidx == -1:\n            voxelidx = voxel_num\n            if voxel_num >= max_voxels:\n                break\n            voxel_num += 1\n            coor_to_voxelidx[coor[0], coor[1], coor[2]] = voxelidx\n            coors[voxelidx] = coor\n        num = num_points_per_voxel[voxelidx]\n        if num < max_points:\n            voxels[voxelidx, num] = points[i]\n            num_points_per_voxel[voxelidx] += 1\n    return voxel_num\n\n\n@numba.jit(nopython=True)\ndef _points_to_voxel_kernel(points,\n                            voxel_size,\n                            coors_range,\n                            num_points_per_voxel,\n                            coor_to_voxelidx,\n                            voxels,\n                            coors,\n                            max_points=35,\n                            max_voxels=20000):\n    \"\"\"convert kitti points(N, >=3) to voxels.\n\n    Args:\n        points (np.ndarray): [N, ndim]. points[:, :3] contain xyz points and \\\n            points[:, 3:] contain other information such as reflectivity.\n        voxel_size (list, tuple, np.ndarray): [3] xyz, indicate voxel size.\n        coors_range (list[float | tuple[float] | ndarray]): Range of voxels. \\\n            format: xyzxyz, minmax\n        num_points_per_voxel (int): Number of points per voxel.\n        coor_to_voxel_idx (np.ndarray): A voxel grid of shape (D, H, W), \\\n            which has the same shape as the complete voxel map. It indicates \\\n            the index of each corresponding voxel.\n        voxels (np.ndarray): Created empty voxels.\n        coors (np.ndarray): Created coordinates of each voxel.\n        max_points (int): Indicate maximum points contained in a voxel.\n        max_voxels (int): Maximum number of voxels this function create. \\\n            for second, 20000 is a good choice. Points should be shuffled for \\\n            randomness before this function because max_voxels drops points.\n\n    Returns:\n        tuple[np.ndarray]:\n            voxels: Shape [M, max_points, ndim], only contain points.\n            coordinates: Shape [M, 3].\n            num_points_per_voxel: Shape [M].\n    \"\"\"\n    N = points.shape[0]\n    # ndim = points.shape[1] - 1\n    ndim = 3\n    grid_size = (coors_range[3:] - coors_range[:3]) / voxel_size\n    # grid_size = np.round(grid_size).astype(np.int64)(np.int32)\n    grid_size = np.round(grid_size, 0, grid_size).astype(np.int32)\n\n    # lower_bound = coors_range[:3]\n    # upper_bound = coors_range[3:]\n    coor = np.zeros(shape=(3, ), dtype=np.int32)\n    voxel_num = 0\n    failed = False\n    for i in range(N):\n        failed = False\n        for j in range(ndim):\n            c = np.floor((points[i, j] - coors_range[j]) / voxel_size[j])\n            if c < 0 or c >= grid_size[j]:\n                failed = True\n                break\n            coor[j] = c\n        if failed:\n            continue\n        voxelidx = coor_to_voxelidx[coor[0], coor[1], coor[2]]\n        if voxelidx == -1:\n            voxelidx = voxel_num\n            if voxel_num >= max_voxels:\n                break\n            voxel_num += 1\n            coor_to_voxelidx[coor[0], coor[1], coor[2]] = voxelidx\n            coors[voxelidx] = coor\n        num = num_points_per_voxel[voxelidx]\n        if num < max_points:\n            voxels[voxelidx, num] = points[i]\n            num_points_per_voxel[voxelidx] += 1\n    return voxel_num\n"
  },
  {
    "path": "mmdet3d/datasets/__init__.py",
    "content": "from mmdet.datasets.builder import build_dataloader\nfrom .builder import DATASETS, build_dataset\nfrom .custom_3d import Custom3DDataset\nfrom .kitti_dataset import KittiDataset\nfrom .lyft_dataset import LyftDataset\nfrom .nuscenes_dataset import NuScenesDataset\nfrom .pipelines import (BackgroundPointsFilter, GlobalRotScaleTrans,\n                        IndoorPointSample, LoadAnnotations3D,\n                        LoadPointsFromFile, LoadPointsFromMultiSweeps,\n                        NormalizePointsColor, ObjectNoise, ObjectRangeFilter,\n                        ObjectSample, PointShuffle, PointsRangeFilter,\n                        RandomFlip3D, VoxelBasedPointSampler)\nfrom .scannet_dataset import ScanNetDataset\nfrom .semantickitti_dataset import SemanticKITTIDataset\nfrom .sunrgbd_dataset import SUNRGBDDataset\nfrom .waymo_dataset import WaymoDataset\nfrom .nuscenes_dataset_viewInfo import NuScenesDataset_ViewInfo\n\n__all__ = [\n    'KittiDataset', 'GroupSampler', 'DistributedGroupSampler',\n    'build_dataloader', 'RepeatFactorDataset', 'DATASETS', 'build_dataset',\n    'CocoDataset', 'NuScenesDataset', 'LyftDataset', 'ObjectSample',\n    'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans', 'PointShuffle',\n    'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D',\n    'LoadPointsFromFile', 'NormalizePointsColor', 'IndoorPointSample',\n    'LoadAnnotations3D', 'SUNRGBDDataset', 'ScanNetDataset',\n    'SemanticKITTIDataset', 'Custom3DDataset', 'LoadPointsFromMultiSweeps',\n    'WaymoDataset', 'BackgroundPointsFilter', 'VoxelBasedPointSampler',\n    'NuScenesDataset_ViewInfo'\n]\n"
  },
  {
    "path": "mmdet3d/datasets/builder.py",
    "content": "import platform\nfrom mmcv.utils import build_from_cfg\n\nfrom mmdet.datasets import DATASETS\nfrom mmdet.datasets.builder import _concat_dataset\n\nif platform.system() != 'Windows':\n    # https://github.com/pytorch/pytorch/issues/973\n    import resource\n    rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)\n    hard_limit = rlimit[1]\n    soft_limit = min(4096, hard_limit)\n    resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit))\n\n\ndef build_dataset(cfg, default_args=None):\n    from mmdet3d.datasets.dataset_wrappers import CBGSDataset\n    from mmdet.datasets.dataset_wrappers import (ClassBalancedDataset,\n                                                 ConcatDataset, RepeatDataset)\n\n    if isinstance(cfg, (list, tuple)):\n        dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg])\n    elif cfg['type'] == 'ConcatDataset':\n        dataset = ConcatDataset(\n            [build_dataset(c, default_args) for c in cfg['datasets']],\n            cfg.get('separate_eval', True))\n    elif cfg['type'] == 'RepeatDataset':\n        dataset = RepeatDataset(\n            build_dataset(cfg['dataset'], default_args), cfg['times'])\n    elif cfg['type'] == 'ClassBalancedDataset':\n        dataset = ClassBalancedDataset(\n            build_dataset(cfg['dataset'], default_args), cfg['oversample_thr'])\n    elif cfg['type'] == 'CBGSDataset':\n        dataset = CBGSDataset(build_dataset(cfg['dataset'], default_args))\n    elif isinstance(cfg.get('ann_file'), (list, tuple)):\n        dataset = _concat_dataset(cfg, default_args)\n    else:\n        dataset = build_from_cfg(cfg, DATASETS, default_args)\n\n    return dataset\n"
  },
  {
    "path": "mmdet3d/datasets/custom_3d.py",
    "content": "import mmcv\nimport numpy as np\nimport tempfile\nfrom os import path as osp\nfrom torch.utils.data import Dataset\n\nfrom mmdet.datasets import DATASETS\nfrom ..core.bbox import get_box_type\nfrom .pipelines import Compose\n\n\n@DATASETS.register_module()\nclass Custom3DDataset(Dataset):\n    \"\"\"Customized 3D dataset.\n\n    This is the base dataset of SUNRGB-D, ScanNet, nuScenes, and KITTI\n    dataset.\n\n    Args:\n        data_root (str): Path of dataset root.\n        ann_file (str): Path of annotation file.\n        pipeline (list[dict], optional): Pipeline used for data processing.\n            Defaults to None.\n        classes (tuple[str], optional): Classes used in the dataset.\n            Defaults to None.\n        modality (dict, optional): Modality to specify the sensor data used\n            as input. Defaults to None.\n        box_type_3d (str, optional): Type of 3D box of this dataset.\n            Based on the `box_type_3d`, the dataset will encapsulate the box\n            to its original format then converted them to `box_type_3d`.\n            Defaults to 'LiDAR'. Available options includes\n\n            - 'LiDAR': Box in LiDAR coordinates.\n            - 'Depth': Box in depth coordinates, usually for indoor dataset.\n            - 'Camera': Box in camera coordinates.\n        filter_empty_gt (bool, optional): Whether to filter empty GT.\n            Defaults to True.\n        test_mode (bool, optional): Whether the dataset is in test mode.\n            Defaults to False.\n    \"\"\"\n\n    def __init__(self,\n                 data_root,\n                 ann_file,\n                 pipeline=None,\n                 classes=None,\n                 modality=None,\n                 box_type_3d='LiDAR',\n                 filter_empty_gt=True,\n                 test_mode=False):\n        super().__init__()\n        self.data_root = data_root\n        self.ann_file = ann_file\n        self.test_mode = test_mode\n        self.modality = modality\n        self.filter_empty_gt = filter_empty_gt\n        self.box_type_3d, self.box_mode_3d = get_box_type(box_type_3d)\n\n        self.CLASSES = self.get_classes(classes)\n        self.cat2id = {name: i for i, name in enumerate(self.CLASSES)}\n        self.data_infos = self.load_annotations(self.ann_file)\n\n        if pipeline is not None:\n            self.pipeline = Compose(pipeline)\n\n        # set group flag for the sampler\n        if not self.test_mode:\n            self._set_group_flag()\n\n    def load_annotations(self, ann_file):\n        \"\"\"Load annotations from ann_file.\n\n        Args:\n            ann_file (str): Path of the annotation file.\n\n        Returns:\n            list[dict]: List of annotations.\n        \"\"\"\n        return mmcv.load(ann_file)\n\n    def get_data_info(self, index):\n        \"\"\"Get data info according to the given index.\n\n        Args:\n            index (int): Index of the sample data to get.\n\n        Returns:\n            dict: Data information that will be passed to the data \\\n                preprocessing pipelines. It includes the following keys:\n\n                - sample_idx (str): Sample index.\n                - pts_filename (str): Filename of point clouds.\n                - file_name (str): Filename of point clouds.\n                - ann_info (dict): Annotation info.\n        \"\"\"\n        info = self.data_infos[index]\n        sample_idx = info['point_cloud']['lidar_idx']\n        pts_filename = osp.join(self.data_root, info['pts_path'])\n\n        input_dict = dict(\n            pts_filename=pts_filename,\n            sample_idx=sample_idx,\n            file_name=pts_filename)\n\n        if not self.test_mode:\n            annos = self.get_ann_info(index)\n            input_dict['ann_info'] = annos\n            if self.filter_empty_gt and ~(annos['gt_labels_3d'] != -1).any():\n                return None\n        return input_dict\n\n    def pre_pipeline(self, results):\n        \"\"\"Initialization before data preparation.\n\n        Args:\n            results (dict): Dict before data preprocessing.\n\n                - img_fields (list): Image fields.\n                - bbox3d_fields (list): 3D bounding boxes fields.\n                - pts_mask_fields (list): Mask fields of points.\n                - pts_seg_fields (list): Mask fields of point segments.\n                - bbox_fields (list): Fields of bounding boxes.\n                - mask_fields (list): Fields of masks.\n                - seg_fields (list): Segment fields.\n                - box_type_3d (str): 3D box type.\n                - box_mode_3d (str): 3D box mode.\n        \"\"\"\n        results['img_fields'] = []\n        results['bbox3d_fields'] = []\n        results['pts_mask_fields'] = []\n        results['pts_seg_fields'] = []\n        results['bbox_fields'] = []\n        results['mask_fields'] = []\n        results['seg_fields'] = []\n        results['box_type_3d'] = self.box_type_3d\n        results['box_mode_3d'] = self.box_mode_3d\n\n    def prepare_train_data(self, index):\n        \"\"\"Training data preparation.\n\n        Args:\n            index (int): Index for accessing the target data.\n\n        Returns:\n            dict: Training data dict of the corresponding index.\n        \"\"\"\n        input_dict = self.get_data_info(index)\n        if input_dict is None:\n            return None\n        self.pre_pipeline(input_dict)\n        example = self.pipeline(input_dict)\n        if self.filter_empty_gt and \\\n                (example is None or\n                    ~(example['gt_labels_3d']._data != -1).any()):\n            return None\n        return example\n\n    def prepare_test_data(self, index):\n        \"\"\"Prepare data for testing.\n\n        Args:\n            index (int): Index for accessing the target data.\n\n        Returns:\n            dict: Testing data dict of the corresponding index.\n        \"\"\"\n        input_dict = self.get_data_info(index)\n        self.pre_pipeline(input_dict)\n        example = self.pipeline(input_dict)\n        return example\n\n    @classmethod\n    def get_classes(cls, classes=None):\n        \"\"\"Get class names of current dataset.\n\n        Args:\n            classes (Sequence[str] | str | None): If classes is None, use\n                default CLASSES defined by builtin dataset. If classes is a\n                string, take it as a file name. The file contains the name of\n                classes where each line contains one class name. If classes is\n                a tuple or list, override the CLASSES defined by the dataset.\n\n        Return:\n            list[str]: A list of class names.\n        \"\"\"\n        if classes is None:\n            return cls.CLASSES\n\n        if isinstance(classes, str):\n            # take it as a file path\n            class_names = mmcv.list_from_file(classes)\n        elif isinstance(classes, (tuple, list)):\n            class_names = classes\n        else:\n            raise ValueError(f'Unsupported type {type(classes)} of classes.')\n\n        return class_names\n\n    def format_results(self,\n                       outputs,\n                       pklfile_prefix=None,\n                       submission_prefix=None):\n        \"\"\"Format the results to pkl file.\n\n        Args:\n            outputs (list[dict]): Testing results of the dataset.\n            pklfile_prefix (str | None): The prefix of pkl files. It includes\n                the file path and the prefix of filename, e.g., \"a/b/prefix\".\n                If not specified, a temp file will be created. Default: None.\n\n        Returns:\n            tuple: (outputs, tmp_dir), outputs is the detection results, \\\n                tmp_dir is the temporal directory created for saving json \\\n                files when ``jsonfile_prefix`` is not specified.\n        \"\"\"\n        if pklfile_prefix is None:\n            tmp_dir = tempfile.TemporaryDirectory()\n            pklfile_prefix = osp.join(tmp_dir.name, 'results')\n            out = f'{pklfile_prefix}.pkl'\n        mmcv.dump(outputs, out)\n        return outputs, tmp_dir\n\n    def evaluate(self,\n                 results,\n                 metric=None,\n                 iou_thr=(0.25, 0.5),\n                 logger=None,\n                 show=False,\n                 out_dir=None):\n        \"\"\"Evaluate.\n\n        Evaluation in indoor protocol.\n\n        Args:\n            results (list[dict]): List of results.\n            metric (str | list[str]): Metrics to be evaluated.\n            iou_thr (list[float]): AP IoU thresholds.\n            show (bool): Whether to visualize.\n                Default: False.\n            out_dir (str): Path to save the visualization results.\n                Default: None.\n\n        Returns:\n            dict: Evaluation results.\n        \"\"\"\n        from mmdet3d.core.evaluation import indoor_eval\n        assert isinstance(\n            results, list), f'Expect results to be list, got {type(results)}.'\n        assert len(results) > 0, 'Expect length of results > 0.'\n        assert len(results) == len(self.data_infos)\n        assert isinstance(\n            results[0], dict\n        ), f'Expect elements in results to be dict, got {type(results[0])}.'\n        gt_annos = [info['annos'] for info in self.data_infos]\n        label2cat = {i: cat_id for i, cat_id in enumerate(self.CLASSES)}\n        ret_dict = indoor_eval(\n            gt_annos,\n            results,\n            iou_thr,\n            label2cat,\n            logger=logger,\n            box_type_3d=self.box_type_3d,\n            box_mode_3d=self.box_mode_3d)\n        if show:\n            self.show(results, out_dir)\n\n        return ret_dict\n\n    def __len__(self):\n        \"\"\"Return the length of data infos.\n\n        Returns:\n            int: Length of data infos.\n        \"\"\"\n        return len(self.data_infos)\n\n    def _rand_another(self, idx):\n        \"\"\"Randomly get another item with the same flag.\n\n        Returns:\n            int: Another index of item with the same flag.\n        \"\"\"\n        pool = np.where(self.flag == self.flag[idx])[0]\n        return np.random.choice(pool)\n\n    def __getitem__(self, idx):\n        \"\"\"Get item from infos according to the given index.\n\n        Returns:\n            dict: Data dictionary of the corresponding index.\n        \"\"\"\n        if self.test_mode:\n            return self.prepare_test_data(idx)\n        while True:\n            data = self.prepare_train_data(idx)\n            if data is None:\n                idx = self._rand_another(idx)\n                continue\n            return data\n\n    def _set_group_flag(self):\n        \"\"\"Set flag according to image aspect ratio.\n\n        Images with aspect ratio greater than 1 will be set as group 1,\n        otherwise group 0. In 3D datasets, they are all the same, thus are all\n        zeros.\n        \"\"\"\n        self.flag = np.zeros(len(self), dtype=np.uint8)\n"
  },
  {
    "path": "mmdet3d/datasets/dataset_wrappers.py",
    "content": "import numpy as np\n\nfrom .builder import DATASETS\n\n\n@DATASETS.register_module()\nclass CBGSDataset(object):\n    \"\"\"A wrapper of class sampled dataset with ann_file path. Implementation of\n    paper `Class-balanced Grouping and Sampling for Point Cloud 3D Object\n    Detection <https://arxiv.org/abs/1908.09492.>`_.\n\n    Balance the number of scenes under different classes.\n\n    Args:\n        dataset (:obj:`CustomDataset`): The dataset to be class sampled.\n    \"\"\"\n\n    def __init__(self, dataset):\n        self.dataset = dataset\n        self.CLASSES = dataset.CLASSES\n        self.cat2id = {name: i for i, name in enumerate(self.CLASSES)}\n        self.sample_indices = self._get_sample_indices()\n        # self.dataset.data_infos = self.data_infos\n        if hasattr(self.dataset, 'flag'):\n            self.flag = np.array(\n                [self.dataset.flag[ind] for ind in self.sample_indices],\n                dtype=np.uint8)\n\n    def _get_sample_indices(self):\n        \"\"\"Load annotations from ann_file.\n\n        Args:\n            ann_file (str): Path of the annotation file.\n\n        Returns:\n            list[dict]: List of annotations after class sampling.\n        \"\"\"\n        class_sample_idxs = {cat_id: [] for cat_id in self.cat2id.values()}\n        for idx in range(len(self.dataset)):\n            sample_cat_ids = self.dataset.get_cat_ids(idx)\n            for cat_id in sample_cat_ids:\n                class_sample_idxs[cat_id].append(idx)\n        duplicated_samples = sum(\n            [len(v) for _, v in class_sample_idxs.items()])\n        class_distribution = {\n            k: len(v) / duplicated_samples\n            for k, v in class_sample_idxs.items()\n        }\n\n        sample_indices = []\n\n        frac = 1.0 / len(self.CLASSES)\n        ratios = [frac / v for v in class_distribution.values()]\n        for cls_inds, ratio in zip(list(class_sample_idxs.values()), ratios):\n            sample_indices += np.random.choice(cls_inds,\n                                               int(len(cls_inds) *\n                                                   ratio)).tolist()\n        return sample_indices\n\n    def __getitem__(self, idx):\n        \"\"\"Get item from infos according to the given index.\n\n        Returns:\n            dict: Data dictionary of the corresponding index.\n        \"\"\"\n        ori_idx = self.sample_indices[idx]\n        return self.dataset[ori_idx]\n\n    def __len__(self):\n        \"\"\"Return the length of data infos.\n\n        Returns:\n            int: Length of data infos.\n        \"\"\"\n        return len(self.sample_indices)\n"
  },
  {
    "path": "mmdet3d/datasets/kitti2d_dataset.py",
    "content": "import mmcv\nimport numpy as np\n\nfrom mmdet.datasets import DATASETS, CustomDataset\n\n\n@DATASETS.register_module()\nclass Kitti2DDataset(CustomDataset):\n    r\"\"\"KITTI 2D Dataset.\n\n    This class serves as the API for experiments on the `KITTI Dataset\n    <http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d>`_.\n\n    Args:\n        data_root (str): Path of dataset root.\n        ann_file (str): Path of annotation file.\n        pipeline (list[dict], optional): Pipeline used for data processing.\n            Defaults to None.\n        classes (tuple[str], optional): Classes used in the dataset.\n            Defaults to None.\n        modality (dict, optional): Modality to specify the sensor data used\n            as input. Defaults to None.\n        box_type_3d (str, optional): Type of 3D box of this dataset.\n            Based on the `box_type_3d`, the dataset will encapsulate the box\n            to its original format then converted them to `box_type_3d`.\n            Defaults to 'LiDAR'. Available options includes\n\n            - 'LiDAR': Box in LiDAR coordinates.\n            - 'Depth': Box in depth coordinates, usually for indoor dataset.\n            - 'Camera': Box in camera coordinates.\n        filter_empty_gt (bool, optional): Whether to filter empty GT.\n            Defaults to True.\n        test_mode (bool, optional): Whether the dataset is in test mode.\n            Defaults to False.\n    \"\"\"\n\n    CLASSES = ('car', 'pedestrian', 'cyclist')\n    \"\"\"\n    Annotation format:\n    [\n        {\n            'image': {\n                'image_idx': 0,\n                'image_path': 'training/image_2/000000.png',\n                'image_shape': array([ 370, 1224], dtype=int32)\n            },\n            'point_cloud': {\n                 'num_features': 4,\n                 'velodyne_path': 'training/velodyne/000000.bin'\n             },\n             'calib': {\n                 'P0': <np.ndarray> (4, 4),\n                 'P1': <np.ndarray> (4, 4),\n                 'P2': <np.ndarray> (4, 4),\n                 'P3': <np.ndarray> (4, 4),\n                 'R0_rect':4x4 np.array,\n                 'Tr_velo_to_cam': 4x4 np.array,\n                 'Tr_imu_to_velo': 4x4 np.array\n             },\n             'annos': {\n                 'name': <np.ndarray> (n),\n                 'truncated': <np.ndarray> (n),\n                 'occluded': <np.ndarray> (n),\n                 'alpha': <np.ndarray> (n),\n                 'bbox': <np.ndarray> (n, 4),\n                 'dimensions': <np.ndarray> (n, 3),\n                 'location': <np.ndarray> (n, 3),\n                 'rotation_y': <np.ndarray> (n),\n                 'score': <np.ndarray> (n),\n                 'index': array([0], dtype=int32),\n                 'group_ids': array([0], dtype=int32),\n                 'difficulty': array([0], dtype=int32),\n                 'num_points_in_gt': <np.ndarray> (n),\n             }\n        }\n    ]\n    \"\"\"\n\n    def load_annotations(self, ann_file):\n        \"\"\"Load annotations from ann_file.\n\n        Args:\n            ann_file (str): Path of the annotation file.\n\n        Returns:\n            list[dict]: List of annotations.\n        \"\"\"\n        self.data_infos = mmcv.load(ann_file)\n        self.cat2label = {\n            cat_name: i\n            for i, cat_name in enumerate(self.CLASSES)\n        }\n        return self.data_infos\n\n    def _filter_imgs(self, min_size=32):\n        \"\"\"Filter images without ground truths.\"\"\"\n        valid_inds = []\n        for i, img_info in enumerate(self.data_infos):\n            if len(img_info['annos']['name']) > 0:\n                valid_inds.append(i)\n        return valid_inds\n\n    def get_ann_info(self, index):\n        \"\"\"Get annotation info according to the given index.\n\n        Args:\n            index (int): Index of the annotation data to get.\n\n        Returns:\n            dict: Annotation information consists of the following keys:\n\n                - bboxes (np.ndarray): Ground truth bboxes.\n                - labels (np.ndarray): Labels of ground truths.\n        \"\"\"\n        # Use index to get the annos, thus the evalhook could also use this api\n        info = self.data_infos[index]\n        annos = info['annos']\n        gt_names = annos['name']\n        gt_bboxes = annos['bbox']\n        difficulty = annos['difficulty']\n\n        # remove classes that is not needed\n        selected = self.keep_arrays_by_name(gt_names, self.CLASSES)\n        gt_bboxes = gt_bboxes[selected]\n        gt_names = gt_names[selected]\n        difficulty = difficulty[selected]\n        gt_labels = np.array([self.cat2label[n] for n in gt_names])\n\n        anns_results = dict(\n            bboxes=gt_bboxes.astype(np.float32),\n            labels=gt_labels,\n        )\n        return anns_results\n\n    def prepare_train_img(self, idx):\n        \"\"\"Training image preparation.\n\n        Args:\n            index (int): Index for accessing the target image data.\n\n        Returns:\n            dict: Training image data dict after preprocessing\n                corresponding to the index.\n        \"\"\"\n        img_raw_info = self.data_infos[idx]['image']\n        img_info = dict(filename=img_raw_info['image_path'])\n        ann_info = self.get_ann_info(idx)\n        if len(ann_info['bboxes']) == 0:\n            return None\n        results = dict(img_info=img_info, ann_info=ann_info)\n        if self.proposals is not None:\n            results['proposals'] = self.proposals[idx]\n        self.pre_pipeline(results)\n        return self.pipeline(results)\n\n    def prepare_test_img(self, idx):\n        \"\"\"Prepare data for testing.\n\n        Args:\n            index (int): Index for accessing the target image data.\n\n        Returns:\n            dict: Testing image data dict after preprocessing\n                corresponding to the index.\n        \"\"\"\n        img_raw_info = self.data_infos[idx]['image']\n        img_info = dict(filename=img_raw_info['image_path'])\n        results = dict(img_info=img_info)\n        if self.proposals is not None:\n            results['proposals'] = self.proposals[idx]\n        self.pre_pipeline(results)\n        return self.pipeline(results)\n\n    def drop_arrays_by_name(self, gt_names, used_classes):\n        \"\"\"Drop irrelevant ground truths by name.\n\n        Args:\n            gt_names (list[str]): Names of ground truths.\n            used_classes (list[str]): Classes of interest.\n\n        Returns:\n            np.ndarray: Indices of ground truths that will be dropped.\n        \"\"\"\n        inds = [i for i, x in enumerate(gt_names) if x not in used_classes]\n        inds = np.array(inds, dtype=np.int64)\n        return inds\n\n    def keep_arrays_by_name(self, gt_names, used_classes):\n        \"\"\"Keep useful ground truths by name.\n\n        Args:\n            gt_names (list[str]): Names of ground truths.\n            used_classes (list[str]): Classes of interest.\n\n        Returns:\n            np.ndarray: Indices of ground truths that will be keeped.\n        \"\"\"\n        inds = [i for i, x in enumerate(gt_names) if x in used_classes]\n        inds = np.array(inds, dtype=np.int64)\n        return inds\n\n    def reformat_bbox(self, outputs, out=None):\n        \"\"\"Reformat bounding boxes to KITTI 2D styles.\n\n        Args:\n            outputs (list[np.ndarray]): List of arrays storing the inferenced\n                bounding boxes and scores.\n            out (str | None): The prefix of output file. Default: None.\n\n        Returns:\n            list[dict]: A list of dictionaries with the kitti 2D format.\n        \"\"\"\n        from mmdet3d.core.bbox.transforms import bbox2result_kitti2d\n        sample_idx = [info['image']['image_idx'] for info in self.data_infos]\n        result_files = bbox2result_kitti2d(outputs, self.CLASSES, sample_idx,\n                                           out)\n        return result_files\n\n    def evaluate(self, result_files, eval_types=None):\n        \"\"\"Evaluation in KITTI protocol.\n\n        Args:\n            result_files (str): Path of result files.\n            eval_types (str): Types of evaluation. Default: None.\n                KITTI dataset only support 'bbox' evaluation type.\n\n        Returns:\n            tuple (str, dict): Average precision results in str format\n                and average precision results in dict format.\n        \"\"\"\n        from mmdet3d.core.evaluation import kitti_eval\n        eval_types = ['bbox'] if not eval_types else eval_types\n        assert eval_types in ('bbox', ['bbox'\n                                       ]), 'KITTI data set only evaluate bbox'\n        gt_annos = [info['annos'] for info in self.data_infos]\n        ap_result_str, ap_dict = kitti_eval(\n            gt_annos, result_files, self.CLASSES, eval_types=['bbox'])\n        return ap_result_str, ap_dict\n"
  },
  {
    "path": "mmdet3d/datasets/kitti_dataset.py",
    "content": "import copy\nimport mmcv\nimport numpy as np\nimport os\nimport tempfile\nimport torch\nfrom mmcv.utils import print_log\nfrom os import path as osp\n\nfrom mmdet.datasets import DATASETS\nfrom ..core import show_result\nfrom ..core.bbox import (Box3DMode, CameraInstance3DBoxes, Coord3DMode,\n                         points_cam2img)\nfrom .custom_3d import Custom3DDataset\n\n\n@DATASETS.register_module()\nclass KittiDataset(Custom3DDataset):\n    r\"\"\"KITTI Dataset.\n\n    This class serves as the API for experiments on the `KITTI Dataset\n    <http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d>`_.\n\n    Args:\n        data_root (str): Path of dataset root.\n        ann_file (str): Path of annotation file.\n        split (str): Split of input data.\n        pts_prefix (str, optional): Prefix of points files.\n            Defaults to 'velodyne'.\n        pipeline (list[dict], optional): Pipeline used for data processing.\n            Defaults to None.\n        classes (tuple[str], optional): Classes used in the dataset.\n            Defaults to None.\n        modality (dict, optional): Modality to specify the sensor data used\n            as input. Defaults to None.\n        box_type_3d (str, optional): Type of 3D box of this dataset.\n            Based on the `box_type_3d`, the dataset will encapsulate the box\n            to its original format then converted them to `box_type_3d`.\n            Defaults to 'LiDAR' in this dataset. Available options includes\n\n            - 'LiDAR': Box in LiDAR coordinates.\n            - 'Depth': Box in depth coordinates, usually for indoor dataset.\n            - 'Camera': Box in camera coordinates.\n        filter_empty_gt (bool, optional): Whether to filter empty GT.\n            Defaults to True.\n        test_mode (bool, optional): Whether the dataset is in test mode.\n            Defaults to False.\n        pcd_limit_range (list): The range of point cloud used to filter\n            invalid predicted boxes. Default: [0, -40, -3, 70.4, 40, 0.0].\n    \"\"\"\n    CLASSES = ('car', 'pedestrian', 'cyclist')\n\n    def __init__(self,\n                 data_root,\n                 ann_file,\n                 split,\n                 pts_prefix='velodyne',\n                 pipeline=None,\n                 classes=None,\n                 modality=None,\n                 box_type_3d='LiDAR',\n                 filter_empty_gt=True,\n                 test_mode=False,\n                 pcd_limit_range=[0, -40, -3, 70.4, 40, 0.0]):\n        super().__init__(\n            data_root=data_root,\n            ann_file=ann_file,\n            pipeline=pipeline,\n            classes=classes,\n            modality=modality,\n            box_type_3d=box_type_3d,\n            filter_empty_gt=filter_empty_gt,\n            test_mode=test_mode)\n\n        self.split = split\n        self.root_split = os.path.join(self.data_root, split)\n        assert self.modality is not None\n        self.pcd_limit_range = pcd_limit_range\n        self.pts_prefix = pts_prefix\n\n    def _get_pts_filename(self, idx):\n        \"\"\"Get point cloud filename according to the given index.\n\n        Args:\n            index (int): Index of the point cloud file to get.\n\n        Returns:\n            str: Name of the point cloud file.\n        \"\"\"\n        pts_filename = osp.join(self.root_split, self.pts_prefix,\n                                f'{idx:06d}.bin')\n        return pts_filename\n\n    def get_data_info(self, index):\n        \"\"\"Get data info according to the given index.\n\n        Args:\n            index (int): Index of the sample data to get.\n\n        Returns:\n            dict: Data information that will be passed to the data \\\n                preprocessing pipelines. It includes the following keys:\n\n                - sample_idx (str): Sample index.\n                - pts_filename (str): Filename of point clouds.\n                - img_prefix (str | None): Prefix of image files.\n                - img_info (dict): Image info.\n                - lidar2img (list[np.ndarray], optional): Transformations \\\n                    from lidar to different cameras.\n                - ann_info (dict): Annotation info.\n        \"\"\"\n        info = self.data_infos[index]\n        sample_idx = info['image']['image_idx']\n        img_filename = os.path.join(self.data_root,\n                                    info['image']['image_path'])\n\n        # TODO: consider use torch.Tensor only\n        rect = info['calib']['R0_rect'].astype(np.float32)\n        Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)\n        P2 = info['calib']['P2'].astype(np.float32)\n        lidar2img = P2 @ rect @ Trv2c\n\n        pts_filename = self._get_pts_filename(sample_idx)\n        input_dict = dict(\n            sample_idx=sample_idx,\n            pts_filename=pts_filename,\n            img_prefix=None,\n            img_info=dict(filename=img_filename),\n            lidar2img=lidar2img)\n\n        if not self.test_mode:\n            annos = self.get_ann_info(index)\n            input_dict['ann_info'] = annos\n\n        return input_dict\n\n    def get_ann_info(self, index):\n        \"\"\"Get annotation info according to the given index.\n\n        Args:\n            index (int): Index of the annotation data to get.\n\n        Returns:\n            dict: annotation information consists of the following keys:\n\n                - gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`): \\\n                    3D ground truth bboxes.\n                - gt_labels_3d (np.ndarray): Labels of ground truths.\n                - gt_bboxes (np.ndarray): 2D ground truth bboxes.\n                - gt_labels (np.ndarray): Labels of ground truths.\n                - gt_names (list[str]): Class names of ground truths.\n        \"\"\"\n        # Use index to get the annos, thus the evalhook could also use this api\n        info = self.data_infos[index]\n        rect = info['calib']['R0_rect'].astype(np.float32)\n        Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)\n\n        annos = info['annos']\n        # we need other objects to avoid collision when sample\n        annos = self.remove_dontcare(annos)\n        loc = annos['location']\n        dims = annos['dimensions']\n        rots = annos['rotation_y']\n        gt_names = annos['name']\n        gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]],\n                                      axis=1).astype(np.float32)\n\n        # convert gt_bboxes_3d to velodyne coordinates\n        gt_bboxes_3d = CameraInstance3DBoxes(gt_bboxes_3d).convert_to(\n            self.box_mode_3d, np.linalg.inv(rect @ Trv2c))\n        gt_bboxes = annos['bbox']\n\n        selected = self.drop_arrays_by_name(gt_names, ['DontCare'])\n        gt_bboxes = gt_bboxes[selected].astype('float32')\n        gt_names = gt_names[selected]\n\n        gt_labels = []\n        for cat in gt_names:\n            if cat in self.CLASSES:\n                gt_labels.append(self.CLASSES.index(cat))\n            else:\n                gt_labels.append(-1)\n        gt_labels = np.array(gt_labels).astype(np.int64)\n        gt_labels_3d = copy.deepcopy(gt_labels)\n\n        anns_results = dict(\n            gt_bboxes_3d=gt_bboxes_3d,\n            gt_labels_3d=gt_labels_3d,\n            bboxes=gt_bboxes,\n            labels=gt_labels,\n            gt_names=gt_names)\n        return anns_results\n\n    def drop_arrays_by_name(self, gt_names, used_classes):\n        \"\"\"Drop irrelevant ground truths by name.\n\n        Args:\n            gt_names (list[str]): Names of ground truths.\n            used_classes (list[str]): Classes of interest.\n\n        Returns:\n            np.ndarray: Indices of ground truths that will be dropped.\n        \"\"\"\n        inds = [i for i, x in enumerate(gt_names) if x not in used_classes]\n        inds = np.array(inds, dtype=np.int64)\n        return inds\n\n    def keep_arrays_by_name(self, gt_names, used_classes):\n        \"\"\"Keep useful ground truths by name.\n\n        Args:\n            gt_names (list[str]): Names of ground truths.\n            used_classes (list[str]): Classes of interest.\n\n        Returns:\n            np.ndarray: Indices of ground truths that will be keeped.\n        \"\"\"\n        inds = [i for i, x in enumerate(gt_names) if x in used_classes]\n        inds = np.array(inds, dtype=np.int64)\n        return inds\n\n    def remove_dontcare(self, ann_info):\n        \"\"\"Remove annotations that do not need to be cared.\n\n        Args:\n            ann_info (dict): Dict of annotation infos. The ``'DontCare'``\n                annotations will be removed according to ann_file['name'].\n\n        Returns:\n            dict: Annotations after filtering.\n        \"\"\"\n        img_filtered_annotations = {}\n        relevant_annotation_indices = [\n            i for i, x in enumerate(ann_info['name']) if x != 'DontCare'\n        ]\n        for key in ann_info.keys():\n            img_filtered_annotations[key] = (\n                ann_info[key][relevant_annotation_indices])\n        return img_filtered_annotations\n\n    def format_results(self,\n                       outputs,\n                       pklfile_prefix=None,\n                       submission_prefix=None):\n        \"\"\"Format the results to pkl file.\n\n        Args:\n            outputs (list[dict]): Testing results of the dataset.\n            pklfile_prefix (str | None): The prefix of pkl files. It includes\n                the file path and the prefix of filename, e.g., \"a/b/prefix\".\n                If not specified, a temp file will be created. Default: None.\n            submission_prefix (str | None): The prefix of submitted files. It\n                includes the file path and the prefix of filename, e.g.,\n                \"a/b/prefix\". If not specified, a temp file will be created.\n                Default: None.\n\n        Returns:\n            tuple: (result_files, tmp_dir), result_files is a dict containing \\\n                the json filepaths, tmp_dir is the temporal directory created \\\n                for saving json files when jsonfile_prefix is not specified.\n        \"\"\"\n        if pklfile_prefix is None:\n            tmp_dir = tempfile.TemporaryDirectory()\n            pklfile_prefix = osp.join(tmp_dir.name, 'results')\n        else:\n            tmp_dir = None\n\n        if not isinstance(outputs[0], dict):\n            result_files = self.bbox2result_kitti2d(outputs, self.CLASSES,\n                                                    pklfile_prefix,\n                                                    submission_prefix)\n        elif 'pts_bbox' in outputs[0] or 'img_bbox' in outputs[0]:\n            result_files = dict()\n            for name in outputs[0]:\n                results_ = [out[name] for out in outputs]\n                pklfile_prefix_ = pklfile_prefix + name\n                if submission_prefix is not None:\n                    submission_prefix_ = submission_prefix + name\n                else:\n                    submission_prefix_ = None\n                if 'img' in name:\n                    result_files = self.bbox2result_kitti2d(\n                        results_, self.CLASSES, pklfile_prefix_,\n                        submission_prefix_)\n                else:\n                    result_files_ = self.bbox2result_kitti(\n                        results_, self.CLASSES, pklfile_prefix_,\n                        submission_prefix_)\n                result_files[name] = result_files_\n        else:\n            result_files = self.bbox2result_kitti(outputs, self.CLASSES,\n                                                  pklfile_prefix,\n                                                  submission_prefix)\n        return result_files, tmp_dir\n\n    def evaluate(self,\n                 results,\n                 metric=None,\n                 logger=None,\n                 pklfile_prefix=None,\n                 submission_prefix=None,\n                 show=False,\n                 out_dir=None):\n        \"\"\"Evaluation in KITTI protocol.\n\n        Args:\n            results (list[dict]): Testing results of the dataset.\n            metric (str | list[str]): Metrics to be evaluated.\n            logger (logging.Logger | str | None): Logger used for printing\n                related information during evaluation. Default: None.\n            pklfile_prefix (str | None): The prefix of pkl files. It includes\n                the file path and the prefix of filename, e.g., \"a/b/prefix\".\n                If not specified, a temp file will be created. Default: None.\n            submission_prefix (str | None): The prefix of submission datas.\n                If not specified, the submission data will not be generated.\n            show (bool): Whether to visualize.\n                Default: False.\n            out_dir (str): Path to save the visualization results.\n                Default: None.\n\n        Returns:\n            dict[str, float]: Results of each evaluation metric.\n        \"\"\"\n        result_files, tmp_dir = self.format_results(results, pklfile_prefix)\n        from mmdet3d.core.evaluation import kitti_eval\n        gt_annos = [info['annos'] for info in self.data_infos]\n\n        if isinstance(result_files, dict):\n            ap_dict = dict()\n            for name, result_files_ in result_files.items():\n                eval_types = ['bbox', 'bev', '3d']\n                if 'img' in name:\n                    eval_types = ['bbox']\n                ap_result_str, ap_dict_ = kitti_eval(\n                    gt_annos,\n                    result_files_,\n                    self.CLASSES,\n                    eval_types=eval_types)\n                for ap_type, ap in ap_dict_.items():\n                    ap_dict[f'{name}/{ap_type}'] = float('{:.4f}'.format(ap))\n\n                print_log(\n                    f'Results of {name}:\\n' + ap_result_str, logger=logger)\n\n        else:\n            if metric == 'img_bbox':\n                ap_result_str, ap_dict = kitti_eval(\n                    gt_annos, result_files, self.CLASSES, eval_types=['bbox'])\n            else:\n                ap_result_str, ap_dict = kitti_eval(gt_annos, result_files,\n                                                    self.CLASSES)\n            print_log('\\n' + ap_result_str, logger=logger)\n\n        if tmp_dir is not None:\n            tmp_dir.cleanup()\n        if show:\n            self.show(results, out_dir)\n        return ap_dict\n\n    def bbox2result_kitti(self,\n                          net_outputs,\n                          class_names,\n                          pklfile_prefix=None,\n                          submission_prefix=None):\n        \"\"\"Convert 3D detection results to kitti format for evaluation and test\n        submission.\n\n        Args:\n            net_outputs (list[np.ndarray]): List of array storing the \\\n                inferenced bounding boxes and scores.\n            class_names (list[String]): A list of class names.\n            pklfile_prefix (str | None): The prefix of pkl file.\n            submission_prefix (str | None): The prefix of submission file.\n\n        Returns:\n            list[dict]: A list of dictionaries with the kitti format.\n        \"\"\"\n        assert len(net_outputs) == len(self.data_infos), \\\n            'invalid list length of network outputs'\n        if submission_prefix is not None:\n            mmcv.mkdir_or_exist(submission_prefix)\n\n        det_annos = []\n        print('\\nConverting prediction to KITTI format')\n        for idx, pred_dicts in enumerate(\n                mmcv.track_iter_progress(net_outputs)):\n            annos = []\n            info = self.data_infos[idx]\n            sample_idx = info['image']['image_idx']\n            image_shape = info['image']['image_shape'][:2]\n            box_dict = self.convert_valid_bboxes(pred_dicts, info)\n            anno = {\n                'name': [],\n                'truncated': [],\n                'occluded': [],\n                'alpha': [],\n                'bbox': [],\n                'dimensions': [],\n                'location': [],\n                'rotation_y': [],\n                'score': []\n            }\n            if len(box_dict['bbox']) > 0:\n                box_2d_preds = box_dict['bbox']\n                box_preds = box_dict['box3d_camera']\n                scores = box_dict['scores']\n                box_preds_lidar = box_dict['box3d_lidar']\n                label_preds = box_dict['label_preds']\n\n                for box, box_lidar, bbox, score, label in zip(\n                        box_preds, box_preds_lidar, box_2d_preds, scores,\n                        label_preds):\n                    bbox[2:] = np.minimum(bbox[2:], image_shape[::-1])\n                    bbox[:2] = np.maximum(bbox[:2], [0, 0])\n                    anno['name'].append(class_names[int(label)])\n                    anno['truncated'].append(0.0)\n                    anno['occluded'].append(0)\n                    anno['alpha'].append(\n                        -np.arctan2(-box_lidar[1], box_lidar[0]) + box[6])\n                    anno['bbox'].append(bbox)\n                    anno['dimensions'].append(box[3:6])\n                    anno['location'].append(box[:3])\n                    anno['rotation_y'].append(box[6])\n                    anno['score'].append(score)\n\n                anno = {k: np.stack(v) for k, v in anno.items()}\n                annos.append(anno)\n            else:\n                anno = {\n                    'name': np.array([]),\n                    'truncated': np.array([]),\n                    'occluded': np.array([]),\n                    'alpha': np.array([]),\n                    'bbox': np.zeros([0, 4]),\n                    'dimensions': np.zeros([0, 3]),\n                    'location': np.zeros([0, 3]),\n                    'rotation_y': np.array([]),\n                    'score': np.array([]),\n                }\n                annos.append(anno)\n\n            if submission_prefix is not None:\n                curr_file = f'{submission_prefix}/{sample_idx:06d}.txt'\n                with open(curr_file, 'w') as f:\n                    bbox = anno['bbox']\n                    loc = anno['location']\n                    dims = anno['dimensions']  # lhw -> hwl\n\n                    for idx in range(len(bbox)):\n                        print(\n                            '{} -1 -1 {:.4f} {:.4f} {:.4f} {:.4f} '\n                            '{:.4f} {:.4f} {:.4f} '\n                            '{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f}'.format(\n                                anno['name'][idx], anno['alpha'][idx],\n                                bbox[idx][0], bbox[idx][1], bbox[idx][2],\n                                bbox[idx][3], dims[idx][1], dims[idx][2],\n                                dims[idx][0], loc[idx][0], loc[idx][1],\n                                loc[idx][2], anno['rotation_y'][idx],\n                                anno['score'][idx]),\n                            file=f)\n\n            annos[-1]['sample_idx'] = np.array(\n                [sample_idx] * len(annos[-1]['score']), dtype=np.int64)\n\n            det_annos += annos\n\n        if pklfile_prefix is not None:\n            if not pklfile_prefix.endswith(('.pkl', '.pickle')):\n                out = f'{pklfile_prefix}.pkl'\n            mmcv.dump(det_annos, out)\n            print(f'Result is saved to {out}.')\n\n        return det_annos\n\n    def bbox2result_kitti2d(self,\n                            net_outputs,\n                            class_names,\n                            pklfile_prefix=None,\n                            submission_prefix=None):\n        \"\"\"Convert 2D detection results to kitti format for evaluation and test\n        submission.\n\n        Args:\n            net_outputs (list[np.ndarray]): List of array storing the \\\n                inferenced bounding boxes and scores.\n            class_names (list[String]): A list of class names.\n            pklfile_prefix (str | None): The prefix of pkl file.\n            submission_prefix (str | None): The prefix of submission file.\n\n        Returns:\n            list[dict]: A list of dictionaries have the kitti format\n        \"\"\"\n        assert len(net_outputs) == len(self.data_infos), \\\n            'invalid list length of network outputs'\n        det_annos = []\n        print('\\nConverting prediction to KITTI format')\n        for i, bboxes_per_sample in enumerate(\n                mmcv.track_iter_progress(net_outputs)):\n            annos = []\n            anno = dict(\n                name=[],\n                truncated=[],\n                occluded=[],\n                alpha=[],\n                bbox=[],\n                dimensions=[],\n                location=[],\n                rotation_y=[],\n                score=[])\n            sample_idx = self.data_infos[i]['image']['image_idx']\n\n            num_example = 0\n            for label in range(len(bboxes_per_sample)):\n                bbox = bboxes_per_sample[label]\n                for i in range(bbox.shape[0]):\n                    anno['name'].append(class_names[int(label)])\n                    anno['truncated'].append(0.0)\n                    anno['occluded'].append(0)\n                    anno['alpha'].append(0.0)\n                    anno['bbox'].append(bbox[i, :4])\n                    # set dimensions (height, width, length) to zero\n                    anno['dimensions'].append(\n                        np.zeros(shape=[3], dtype=np.float32))\n                    # set the 3D translation to (-1000, -1000, -1000)\n                    anno['location'].append(\n                        np.ones(shape=[3], dtype=np.float32) * (-1000.0))\n                    anno['rotation_y'].append(0.0)\n                    anno['score'].append(bbox[i, 4])\n                    num_example += 1\n\n            if num_example == 0:\n                annos.append(\n                    dict(\n                        name=np.array([]),\n                        truncated=np.array([]),\n                        occluded=np.array([]),\n                        alpha=np.array([]),\n                        bbox=np.zeros([0, 4]),\n                        dimensions=np.zeros([0, 3]),\n                        location=np.zeros([0, 3]),\n                        rotation_y=np.array([]),\n                        score=np.array([]),\n                    ))\n            else:\n                anno = {k: np.stack(v) for k, v in anno.items()}\n                annos.append(anno)\n\n            annos[-1]['sample_idx'] = np.array(\n                [sample_idx] * num_example, dtype=np.int64)\n            det_annos += annos\n\n        if pklfile_prefix is not None:\n            # save file in pkl format\n            pklfile_path = (\n                pklfile_prefix[:-4] if pklfile_prefix.endswith(\n                    ('.pkl', '.pickle')) else pklfile_prefix)\n            mmcv.dump(det_annos, pklfile_path)\n\n        if submission_prefix is not None:\n            # save file in submission format\n            mmcv.mkdir_or_exist(submission_prefix)\n            print(f'Saving KITTI submission to {submission_prefix}')\n            for i, anno in enumerate(det_annos):\n                sample_idx = self.data_infos[i]['image']['image_idx']\n                cur_det_file = f'{submission_prefix}/{sample_idx:06d}.txt'\n                with open(cur_det_file, 'w') as f:\n                    bbox = anno['bbox']\n                    loc = anno['location']\n                    dims = anno['dimensions'][::-1]  # lhw -> hwl\n                    for idx in range(len(bbox)):\n                        print(\n                            '{} -1 -1 {:4f} {:4f} {:4f} {:4f} {:4f} {:4f} '\n                            '{:4f} {:4f} {:4f} {:4f} {:4f} {:4f} {:4f}'.format(\n                                anno['name'][idx],\n                                anno['alpha'][idx],\n                                *bbox[idx],  # 4 float\n                                *dims[idx],  # 3 float\n                                *loc[idx],  # 3 float\n                                anno['rotation_y'][idx],\n                                anno['score'][idx]),\n                            file=f,\n                        )\n            print('Result is saved to {}'.format(submission_prefix))\n\n        return det_annos\n\n    def convert_valid_bboxes(self, box_dict, info):\n        \"\"\"Convert the predicted boxes into valid ones.\n\n        Args:\n            box_dict (dict): Box dictionaries to be converted.\n\n                - boxes_3d (:obj:`LiDARInstance3DBoxes`): 3D bounding boxes.\n                - scores_3d (torch.Tensor): Scores of boxes.\n                - labels_3d (torch.Tensor): Class labels of boxes.\n            info (dict): Data info.\n\n        Returns:\n            dict: Valid predicted boxes.\n\n                - bbox (np.ndarray): 2D bounding boxes.\n                - box3d_camera (np.ndarray): 3D bounding boxes in \\\n                    camera coordinate.\n                - box3d_lidar (np.ndarray): 3D bounding boxes in \\\n                    LiDAR coordinate.\n                - scores (np.ndarray): Scores of boxes.\n                - label_preds (np.ndarray): Class label predictions.\n                - sample_idx (int): Sample index.\n        \"\"\"\n        # TODO: refactor this function\n        box_preds = box_dict['boxes_3d']\n        scores = box_dict['scores_3d']\n        labels = box_dict['labels_3d']\n        sample_idx = info['image']['image_idx']\n        # TODO: remove the hack of yaw\n        box_preds.tensor[:, -1] = box_preds.tensor[:, -1] - np.pi\n        box_preds.limit_yaw(offset=0.5, period=np.pi * 2)\n\n        if len(box_preds) == 0:\n            return dict(\n                bbox=np.zeros([0, 4]),\n                box3d_camera=np.zeros([0, 7]),\n                box3d_lidar=np.zeros([0, 7]),\n                scores=np.zeros([0]),\n                label_preds=np.zeros([0, 4]),\n                sample_idx=sample_idx)\n\n        rect = info['calib']['R0_rect'].astype(np.float32)\n        Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)\n        P2 = info['calib']['P2'].astype(np.float32)\n        img_shape = info['image']['image_shape']\n        P2 = box_preds.tensor.new_tensor(P2)\n\n        box_preds_camera = box_preds.convert_to(Box3DMode.CAM, rect @ Trv2c)\n\n        box_corners = box_preds_camera.corners\n        box_corners_in_image = points_cam2img(box_corners, P2)\n        # box_corners_in_image: [N, 8, 2]\n        minxy = torch.min(box_corners_in_image, dim=1)[0]\n        maxxy = torch.max(box_corners_in_image, dim=1)[0]\n        box_2d_preds = torch.cat([minxy, maxxy], dim=1)\n        # Post-processing\n        # check box_preds_camera\n        image_shape = box_preds.tensor.new_tensor(img_shape)\n        valid_cam_inds = ((box_2d_preds[:, 0] < image_shape[1]) &\n                          (box_2d_preds[:, 1] < image_shape[0]) &\n                          (box_2d_preds[:, 2] > 0) & (box_2d_preds[:, 3] > 0))\n        # check box_preds\n        limit_range = box_preds.tensor.new_tensor(self.pcd_limit_range)\n        valid_pcd_inds = ((box_preds.center > limit_range[:3]) &\n                          (box_preds.center < limit_range[3:]))\n        valid_inds = valid_cam_inds & valid_pcd_inds.all(-1)\n\n        if valid_inds.sum() > 0:\n            return dict(\n                bbox=box_2d_preds[valid_inds, :].numpy(),\n                box3d_camera=box_preds_camera[valid_inds].tensor.numpy(),\n                box3d_lidar=box_preds[valid_inds].tensor.numpy(),\n                scores=scores[valid_inds].numpy(),\n                label_preds=labels[valid_inds].numpy(),\n                sample_idx=sample_idx,\n            )\n        else:\n            return dict(\n                bbox=np.zeros([0, 4]),\n                box3d_camera=np.zeros([0, 7]),\n                box3d_lidar=np.zeros([0, 7]),\n                scores=np.zeros([0]),\n                label_preds=np.zeros([0, 4]),\n                sample_idx=sample_idx,\n            )\n\n    def show(self, results, out_dir, show=True):\n        \"\"\"Results visualization.\n\n        Args:\n            results (list[dict]): List of bounding boxes results.\n            out_dir (str): Output directory of visualization result.\n            show (bool): Visualize the results online.\n        \"\"\"\n        assert out_dir is not None, 'Expect out_dir, got none.'\n        for i, result in enumerate(results):\n            example = self.prepare_test_data(i)\n            data_info = self.data_infos[i]\n            pts_path = data_info['point_cloud']['velodyne_path']\n            file_name = osp.split(pts_path)[-1].split('.')[0]\n            # for now we convert points into depth mode\n            points = example['points'][0]._data.numpy()\n            points = Coord3DMode.convert_point(points, Coord3DMode.LIDAR,\n                                               Coord3DMode.DEPTH)\n            gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor\n            gt_bboxes = Box3DMode.convert(gt_bboxes, Box3DMode.LIDAR,\n                                          Box3DMode.DEPTH)\n            pred_bboxes = result['boxes_3d'].tensor.numpy()\n            pred_bboxes = Box3DMode.convert(pred_bboxes, Box3DMode.LIDAR,\n                                            Box3DMode.DEPTH)\n            show_result(points, gt_bboxes, pred_bboxes, out_dir, file_name,\n                        show)\n"
  },
  {
    "path": "mmdet3d/datasets/lyft_dataset.py",
    "content": "import mmcv\nimport numpy as np\nimport pandas as pd\nimport tempfile\nfrom lyft_dataset_sdk.lyftdataset import LyftDataset as Lyft\nfrom lyft_dataset_sdk.utils.data_classes import Box as LyftBox\nfrom os import path as osp\nfrom pyquaternion import Quaternion\n\nfrom mmdet3d.core.evaluation.lyft_eval import lyft_eval\nfrom mmdet.datasets import DATASETS\nfrom ..core import show_result\nfrom ..core.bbox import Box3DMode, Coord3DMode, LiDARInstance3DBoxes\nfrom .custom_3d import Custom3DDataset\n\n\n@DATASETS.register_module()\nclass LyftDataset(Custom3DDataset):\n    r\"\"\"Lyft Dataset.\n\n    This class serves as the API for experiments on the Lyft Dataset.\n\n    Please refer to\n    `<https://www.kaggle.com/c/3d-object-detection-for-autonomous-vehicles/data>`_  # noqa\n    for data downloading.\n\n    Args:\n        ann_file (str): Path of annotation file.\n        pipeline (list[dict], optional): Pipeline used for data processing.\n            Defaults to None.\n        data_root (str): Path of dataset root.\n        classes (tuple[str], optional): Classes used in the dataset.\n            Defaults to None.\n        load_interval (int, optional): Interval of loading the dataset. It is\n            used to uniformly sample the dataset. Defaults to 1.\n        modality (dict, optional): Modality to specify the sensor data used\n            as input. Defaults to None.\n        box_type_3d (str, optional): Type of 3D box of this dataset.\n            Based on the `box_type_3d`, the dataset will encapsulate the box\n            to its original format then converted them to `box_type_3d`.\n            Defaults to 'LiDAR' in this dataset. Available options includes\n\n            - 'LiDAR': Box in LiDAR coordinates.\n            - 'Depth': Box in depth coordinates, usually for indoor dataset.\n            - 'Camera': Box in camera coordinates.\n        filter_empty_gt (bool, optional): Whether to filter empty GT.\n            Defaults to True.\n        test_mode (bool, optional): Whether the dataset is in test mode.\n            Defaults to False.\n    \"\"\"\n    NameMapping = {\n        'bicycle': 'bicycle',\n        'bus': 'bus',\n        'car': 'car',\n        'emergency_vehicle': 'emergency_vehicle',\n        'motorcycle': 'motorcycle',\n        'other_vehicle': 'other_vehicle',\n        'pedestrian': 'pedestrian',\n        'truck': 'truck',\n        'animal': 'animal'\n    }\n    DefaultAttribute = {\n        'car': 'is_stationary',\n        'truck': 'is_stationary',\n        'bus': 'is_stationary',\n        'emergency_vehicle': 'is_stationary',\n        'other_vehicle': 'is_stationary',\n        'motorcycle': 'is_stationary',\n        'bicycle': 'is_stationary',\n        'pedestrian': 'is_stationary',\n        'animal': 'is_stationary'\n    }\n    CLASSES = ('car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle',\n               'motorcycle', 'bicycle', 'pedestrian', 'animal')\n\n    def __init__(self,\n                 ann_file,\n                 pipeline=None,\n                 data_root=None,\n                 classes=None,\n                 load_interval=1,\n                 modality=None,\n                 box_type_3d='LiDAR',\n                 filter_empty_gt=True,\n                 test_mode=False):\n        self.load_interval = load_interval\n        super().__init__(\n            data_root=data_root,\n            ann_file=ann_file,\n            pipeline=pipeline,\n            classes=classes,\n            modality=modality,\n            box_type_3d=box_type_3d,\n            filter_empty_gt=filter_empty_gt,\n            test_mode=test_mode)\n\n        if self.modality is None:\n            self.modality = dict(\n                use_camera=False,\n                use_lidar=True,\n                use_radar=False,\n                use_map=False,\n                use_external=False,\n            )\n\n    def load_annotations(self, ann_file):\n        \"\"\"Load annotations from ann_file.\n\n        Args:\n            ann_file (str): Path of the annotation file.\n\n        Returns:\n            list[dict]: List of annotations sorted by timestamps.\n        \"\"\"\n        data = mmcv.load(ann_file)\n        data_infos = list(sorted(data['infos'], key=lambda e: e['timestamp']))\n        data_infos = data_infos[::self.load_interval]\n        self.metadata = data['metadata']\n        self.version = self.metadata['version']\n        return data_infos\n\n    def get_data_info(self, index):\n        \"\"\"Get data info according to the given index.\n\n        Args:\n            index (int): Index of the sample data to get.\n\n        Returns:\n            dict: Data information that will be passed to the data \\\n                preprocessing pipelines. It includes the following keys:\n\n                - sample_idx (str): sample index\n                - pts_filename (str): filename of point clouds\n                - sweeps (list[dict]): infos of sweeps\n                - timestamp (float): sample timestamp\n                - img_filename (str, optional): image filename\n                - lidar2img (list[np.ndarray], optional): transformations \\\n                    from lidar to different cameras\n                - ann_info (dict): annotation info\n        \"\"\"\n        info = self.data_infos[index]\n\n        # standard protocal modified from SECOND.Pytorch\n        input_dict = dict(\n            sample_idx=info['token'],\n            pts_filename=info['lidar_path'],\n            sweeps=info['sweeps'],\n            timestamp=info['timestamp'] / 1e6,\n        )\n\n        if self.modality['use_camera']:\n            image_paths = []\n            lidar2img_rts = []\n            for cam_type, cam_info in info['cams'].items():\n                image_paths.append(cam_info['data_path'])\n                # obtain lidar to image transformation matrix\n                lidar2cam_r = np.linalg.inv(cam_info['sensor2lidar_rotation'])\n                lidar2cam_t = cam_info[\n                    'sensor2lidar_translation'] @ lidar2cam_r.T\n                lidar2cam_rt = np.eye(4)\n                lidar2cam_rt[:3, :3] = lidar2cam_r.T\n                lidar2cam_rt[3, :3] = -lidar2cam_t\n                intrinsic = cam_info['cam_intrinsic']\n                viewpad = np.eye(4)\n                viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] = intrinsic\n                lidar2img_rt = (viewpad @ lidar2cam_rt.T)\n                lidar2img_rts.append(lidar2img_rt)\n\n            input_dict.update(\n                dict(\n                    img_filename=image_paths,\n                    lidar2img=lidar2img_rts,\n                ))\n\n        if not self.test_mode:\n            annos = self.get_ann_info(index)\n            input_dict['ann_info'] = annos\n\n        return input_dict\n\n    def get_ann_info(self, index):\n        \"\"\"Get annotation info according to the given index.\n\n        Args:\n            index (int): Index of the annotation data to get.\n\n        Returns:\n            dict: Annotation information consists of the following keys:\n\n                - gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`): \\\n                    3D ground truth bboxes.\n                - gt_labels_3d (np.ndarray): Labels of ground truths.\n                - gt_names (list[str]): Class names of ground truths.\n        \"\"\"\n        info = self.data_infos[index]\n        gt_bboxes_3d = info['gt_boxes']\n        gt_names_3d = info['gt_names']\n        gt_labels_3d = []\n        for cat in gt_names_3d:\n            if cat in self.CLASSES:\n                gt_labels_3d.append(self.CLASSES.index(cat))\n            else:\n                gt_labels_3d.append(-1)\n        gt_labels_3d = np.array(gt_labels_3d)\n\n        if 'gt_shape' in info:\n            gt_shape = info['gt_shape']\n            gt_bboxes_3d = np.concatenate([gt_bboxes_3d, gt_shape], axis=-1)\n\n        # the lyft box center is [0.5, 0.5, 0.5], we change it to be\n        # the same as KITTI (0.5, 0.5, 0)\n        gt_bboxes_3d = LiDARInstance3DBoxes(\n            gt_bboxes_3d,\n            box_dim=gt_bboxes_3d.shape[-1],\n            origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)\n\n        anns_results = dict(\n            gt_bboxes_3d=gt_bboxes_3d,\n            gt_labels_3d=gt_labels_3d,\n        )\n        return anns_results\n\n    def _format_bbox(self, results, jsonfile_prefix=None):\n        \"\"\"Convert the results to the standard format.\n\n        Args:\n            results (list[dict]): Testing results of the dataset.\n            jsonfile_prefix (str): The prefix of the output jsonfile.\n                You can specify the output directory/filename by\n                modifying the jsonfile_prefix. Default: None.\n\n        Returns:\n            str: Path of the output json file.\n        \"\"\"\n        lyft_annos = {}\n        mapped_class_names = self.CLASSES\n\n        print('Start to convert detection format...')\n        for sample_id, det in enumerate(mmcv.track_iter_progress(results)):\n            annos = []\n            boxes = output_to_lyft_box(det)\n            sample_token = self.data_infos[sample_id]['token']\n            boxes = lidar_lyft_box_to_global(self.data_infos[sample_id], boxes)\n            for i, box in enumerate(boxes):\n                name = mapped_class_names[box.label]\n                lyft_anno = dict(\n                    sample_token=sample_token,\n                    translation=box.center.tolist(),\n                    size=box.wlh.tolist(),\n                    rotation=box.orientation.elements.tolist(),\n                    name=name,\n                    score=box.score)\n                annos.append(lyft_anno)\n            lyft_annos[sample_token] = annos\n        lyft_submissions = {\n            'meta': self.modality,\n            'results': lyft_annos,\n        }\n\n        mmcv.mkdir_or_exist(jsonfile_prefix)\n        res_path = osp.join(jsonfile_prefix, 'results_lyft.json')\n        print('Results writes to', res_path)\n        mmcv.dump(lyft_submissions, res_path)\n        return res_path\n\n    def _evaluate_single(self,\n                         result_path,\n                         logger=None,\n                         metric='bbox',\n                         result_name='pts_bbox'):\n        \"\"\"Evaluation for a single model in Lyft protocol.\n\n        Args:\n            result_path (str): Path of the result file.\n            logger (logging.Logger | str | None): Logger used for printing\n                related information during evaluation. Default: None.\n            metric (str): Metric name used for evaluation. Default: 'bbox'.\n            result_name (str): Result name in the metric prefix.\n                Default: 'pts_bbox'.\n\n        Returns:\n            dict: Dictionary of evaluation details.\n        \"\"\"\n\n        output_dir = osp.join(*osp.split(result_path)[:-1])\n        lyft = Lyft(\n            data_path=osp.join(self.data_root, self.version),\n            json_path=osp.join(self.data_root, self.version, self.version),\n            verbose=True)\n        eval_set_map = {\n            'v1.01-train': 'val',\n        }\n        metrics = lyft_eval(lyft, self.data_root, result_path,\n                            eval_set_map[self.version], output_dir, logger)\n\n        # record metrics\n        detail = dict()\n        metric_prefix = f'{result_name}_Lyft'\n\n        for i, name in enumerate(metrics['class_names']):\n            AP = float(metrics['mAPs_cate'][i])\n            detail[f'{metric_prefix}/{name}_AP'] = AP\n\n        detail[f'{metric_prefix}/mAP'] = metrics['Final mAP']\n        return detail\n\n    def format_results(self, results, jsonfile_prefix=None, csv_savepath=None):\n        \"\"\"Format the results to json (standard format for COCO evaluation).\n\n        Args:\n            results (list[dict]): Testing results of the dataset.\n            jsonfile_prefix (str | None): The prefix of json files. It includes\n                the file path and the prefix of filename, e.g., \"a/b/prefix\".\n                If not specified, a temp file will be created. Default: None.\n            csv_savepath (str | None): The path for saving csv files.\n                It includes the file path and the csv filename,\n                e.g., \"a/b/filename.csv\". If not specified,\n                the result will not be converted to csv file.\n\n        Returns:\n            tuple: Returns (result_files, tmp_dir), where `result_files` is a \\\n                dict containing the json filepaths, `tmp_dir` is the temporal \\\n                directory created for saving json files when \\\n                `jsonfile_prefix` is not specified.\n        \"\"\"\n        assert isinstance(results, list), 'results must be a list'\n        assert len(results) == len(self), (\n            'The length of results is not equal to the dataset len: {} != {}'.\n            format(len(results), len(self)))\n\n        if jsonfile_prefix is None:\n            tmp_dir = tempfile.TemporaryDirectory()\n            jsonfile_prefix = osp.join(tmp_dir.name, 'results')\n        else:\n            tmp_dir = None\n\n        if not isinstance(results[0], dict):\n            result_files = self._format_bbox(results, jsonfile_prefix)\n        else:\n            result_files = dict()\n            for name in results[0]:\n                print(f'\\nFormating bboxes of {name}')\n                results_ = [out[name] for out in results]\n                tmp_file_ = osp.join(jsonfile_prefix, name)\n                result_files.update(\n                    {name: self._format_bbox(results_, tmp_file_)})\n        if csv_savepath is not None:\n            self.json2csv(result_files['pts_bbox'], csv_savepath)\n        return result_files, tmp_dir\n\n    def evaluate(self,\n                 results,\n                 metric='bbox',\n                 logger=None,\n                 jsonfile_prefix=None,\n                 csv_savepath=None,\n                 result_names=['pts_bbox'],\n                 show=False,\n                 out_dir=None):\n        \"\"\"Evaluation in Lyft protocol.\n\n        Args:\n            results (list[dict]): Testing results of the dataset.\n            metric (str | list[str]): Metrics to be evaluated.\n            logger (logging.Logger | str | None): Logger used for printing\n                related information during evaluation. Default: None.\n            jsonfile_prefix (str | None): The prefix of json files. It includes\n                the file path and the prefix of filename, e.g., \"a/b/prefix\".\n                If not specified, a temp file will be created. Default: None.\n            csv_savepath (str | None): The path for saving csv files.\n                It includes the file path and the csv filename,\n                e.g., \"a/b/filename.csv\". If not specified,\n                the result will not be converted to csv file.\n            show (bool): Whether to visualize.\n                Default: False.\n            out_dir (str): Path to save the visualization results.\n                Default: None.\n\n        Returns:\n            dict[str, float]: Evaluation results.\n        \"\"\"\n        result_files, tmp_dir = self.format_results(results, jsonfile_prefix,\n                                                    csv_savepath)\n\n        if isinstance(result_files, dict):\n            results_dict = dict()\n            for name in result_names:\n                print(f'Evaluating bboxes of {name}')\n                ret_dict = self._evaluate_single(result_files[name])\n            results_dict.update(ret_dict)\n        elif isinstance(result_files, str):\n            results_dict = self._evaluate_single(result_files)\n\n        if tmp_dir is not None:\n            tmp_dir.cleanup()\n\n        if show:\n            self.show(results, out_dir)\n        return results_dict\n\n    def show(self, results, out_dir):\n        \"\"\"Results visualization.\n\n        Args:\n            results (list[dict]): List of bounding boxes results.\n            out_dir (str): Output directory of visualization result.\n        \"\"\"\n        for i, result in enumerate(results):\n            example = self.prepare_test_data(i)\n            points = example['points'][0]._data.numpy()\n            data_info = self.data_infos[i]\n            pts_path = data_info['lidar_path']\n            file_name = osp.split(pts_path)[-1].split('.')[0]\n            # for now we convert points into depth mode\n            points = Coord3DMode.convert_point(points, Coord3DMode.LIDAR,\n                                               Coord3DMode.DEPTH)\n            inds = result['pts_bbox']['scores_3d'] > 0.1\n            gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor\n            gt_bboxes = Box3DMode.convert(gt_bboxes, Box3DMode.LIDAR,\n                                          Box3DMode.DEPTH)\n            pred_bboxes = result['pts_bbox']['boxes_3d'][inds].tensor.numpy()\n            pred_bboxes = Box3DMode.convert(pred_bboxes, Box3DMode.LIDAR,\n                                            Box3DMode.DEPTH)\n            show_result(points, gt_bboxes, pred_bboxes, out_dir, file_name)\n\n    def json2csv(self, json_path, csv_savepath):\n        \"\"\"Convert the json file to csv format for submission.\n\n        Args:\n            json_path (str): Path of the result json file.\n            csv_savepath (str): Path to save the csv file.\n        \"\"\"\n        results = mmcv.load(json_path)['results']\n        sample_list_path = osp.join(self.data_root, 'sample_submission.csv')\n        data = pd.read_csv(sample_list_path)\n        Id_list = list(data['Id'])\n        pred_list = list(data['PredictionString'])\n        cnt = 0\n        print('Converting the json to csv...')\n        for token in results.keys():\n            cnt += 1\n            predictions = results[token]\n            prediction_str = ''\n            for i in range(len(predictions)):\n                prediction_str += \\\n                    str(predictions[i]['score']) + ' ' + \\\n                    str(predictions[i]['translation'][0]) + ' ' + \\\n                    str(predictions[i]['translation'][1]) + ' ' + \\\n                    str(predictions[i]['translation'][2]) + ' ' + \\\n                    str(predictions[i]['size'][0]) + ' ' + \\\n                    str(predictions[i]['size'][1]) + ' ' + \\\n                    str(predictions[i]['size'][2]) + ' ' + \\\n                    str(Quaternion(list(predictions[i]['rotation']))\n                        .yaw_pitch_roll[0]) + ' ' + \\\n                    predictions[i]['name'] + ' '\n            prediction_str = prediction_str[:-1]\n            idx = Id_list.index(token)\n            pred_list[idx] = prediction_str\n        df = pd.DataFrame({'Id': Id_list, 'PredictionString': pred_list})\n        df.to_csv(csv_savepath, index=False)\n\n\ndef output_to_lyft_box(detection):\n    \"\"\"Convert the output to the box class in the Lyft.\n\n    Args:\n        detection (dict): Detection results.\n\n    Returns:\n        list[:obj:`LyftBox`]: List of standard LyftBoxes.\n    \"\"\"\n    box3d = detection['boxes_3d']\n    scores = detection['scores_3d'].numpy()\n    labels = detection['labels_3d'].numpy()\n\n    box_gravity_center = box3d.gravity_center.numpy()\n    box_dims = box3d.dims.numpy()\n    box_yaw = box3d.yaw.numpy()\n    # TODO: check whether this is necessary\n    # with dir_offset & dir_limit in the head\n    box_yaw = -box_yaw - np.pi / 2\n\n    box_list = []\n    for i in range(len(box3d)):\n        quat = Quaternion(axis=[0, 0, 1], radians=box_yaw[i])\n        box = LyftBox(\n            box_gravity_center[i],\n            box_dims[i],\n            quat,\n            label=labels[i],\n            score=scores[i])\n        box_list.append(box)\n    return box_list\n\n\ndef lidar_lyft_box_to_global(info, boxes):\n    \"\"\"Convert the box from ego to global coordinate.\n\n    Args:\n        info (dict): Info for a specific sample data, including the\n            calibration information.\n        boxes (list[:obj:`LyftBox`]): List of predicted LyftBoxes.\n\n    Returns:\n        list: List of standard LyftBoxes in the global\n            coordinate.\n    \"\"\"\n    box_list = []\n    for box in boxes:\n        # Move box to ego vehicle coord system\n        box.rotate(Quaternion(info['lidar2ego_rotation']))\n        box.translate(np.array(info['lidar2ego_translation']))\n        # Move box to global coord system\n        box.rotate(Quaternion(info['ego2global_rotation']))\n        box.translate(np.array(info['ego2global_translation']))\n        box_list.append(box)\n    return box_list\n"
  },
  {
    "path": "mmdet3d/datasets/nuscenes_dataset.py",
    "content": "import mmcv\nimport numpy as np\nimport pyquaternion\nimport tempfile\nfrom nuscenes.utils.data_classes import Box as NuScenesBox\nfrom os import path as osp\n\nfrom mmdet.datasets import DATASETS\nfrom ..core import show_result\nfrom ..core.bbox import Box3DMode, Coord3DMode, LiDARInstance3DBoxes\nfrom .custom_3d import Custom3DDataset\n\n\n@DATASETS.register_module()\nclass NuScenesDataset(Custom3DDataset):\n    r\"\"\"NuScenes Dataset.\n\n    This class serves as the API for experiments on the NuScenes Dataset.\n\n    Please refer to `NuScenes Dataset <https://www.nuscenes.org/download>`_\n    for data downloading.\n\n    Args:\n        ann_file (str): Path of annotation file.\n        pipeline (list[dict], optional): Pipeline used for data processing.\n            Defaults to None.\n        data_root (str): Path of dataset root.\n        classes (tuple[str], optional): Classes used in the dataset.\n            Defaults to None.\n        load_interval (int, optional): Interval of loading the dataset. It is\n            used to uniformly sample the dataset. Defaults to 1.\n        with_velocity (bool, optional): Whether include velocity prediction\n            into the experiments. Defaults to True.\n        modality (dict, optional): Modality to specify the sensor data used\n            as input. Defaults to None.\n        box_type_3d (str, optional): Type of 3D box of this dataset.\n            Based on the `box_type_3d`, the dataset will encapsulate the box\n            to its original format then converted them to `box_type_3d`.\n            Defaults to 'LiDAR' in this dataset. Available options includes.\n            - 'LiDAR': Box in LiDAR coordinates.\n            - 'Depth': Box in depth coordinates, usually for indoor dataset.\n            - 'Camera': Box in camera coordinates.\n        filter_empty_gt (bool, optional): Whether to filter empty GT.\n            Defaults to True.\n        test_mode (bool, optional): Whether the dataset is in test mode.\n            Defaults to False.\n        eval_version (bool, optional): Configuration version of evaluation.\n            Defaults to  'detection_cvpr_2019'.\n        use_valid_flag (bool): Whether to use `use_valid_flag` key in the info\n            file as mask to filter gt_boxes and gt_names. Defaults to False.\n    \"\"\"\n    NameMapping = {\n        'movable_object.barrier': 'barrier',\n        'vehicle.bicycle': 'bicycle',\n        'vehicle.bus.bendy': 'bus',\n        'vehicle.bus.rigid': 'bus',\n        'vehicle.car': 'car',\n        'vehicle.construction': 'construction_vehicle',\n        'vehicle.motorcycle': 'motorcycle',\n        'human.pedestrian.adult': 'pedestrian',\n        'human.pedestrian.child': 'pedestrian',\n        'human.pedestrian.construction_worker': 'pedestrian',\n        'human.pedestrian.police_officer': 'pedestrian',\n        'movable_object.trafficcone': 'traffic_cone',\n        'vehicle.trailer': 'trailer',\n        'vehicle.truck': 'truck'\n    }\n    DefaultAttribute = {\n        'car': 'vehicle.parked',\n        'pedestrian': 'pedestrian.moving',\n        'trailer': 'vehicle.parked',\n        'truck': 'vehicle.parked',\n        'bus': 'vehicle.moving',\n        'motorcycle': 'cycle.without_rider',\n        'construction_vehicle': 'vehicle.parked',\n        'bicycle': 'cycle.without_rider',\n        'barrier': '',\n        'traffic_cone': '',\n    }\n    AttrMapping = {\n        'cycle.with_rider': 0,\n        'cycle.without_rider': 1,\n        'pedestrian.moving': 2,\n        'pedestrian.standing': 3,\n        'pedestrian.sitting_lying_down': 4,\n        'vehicle.moving': 5,\n        'vehicle.parked': 6,\n        'vehicle.stopped': 7,\n    }\n    AttrMapping_rev = [\n        'cycle.with_rider',\n        'cycle.without_rider',\n        'pedestrian.moving',\n        'pedestrian.standing',\n        'pedestrian.sitting_lying_down',\n        'vehicle.moving',\n        'vehicle.parked',\n        'vehicle.stopped',\n    ]\n    CLASSES = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',\n               'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',\n               'barrier')\n\n    def __init__(self,\n                 ann_file,\n                 num_views=6,\n                 pipeline=None,\n                 data_root=None,\n                 classes=None,\n                 load_interval=1,\n                 with_velocity=True,\n                 modality=None,\n                 box_type_3d='LiDAR',\n                 filter_empty_gt=True,\n                 test_mode=False,\n                 eval_version='detection_cvpr_2019',\n                 use_valid_flag=False):\n        self.load_interval = load_interval\n        self.use_valid_flag = use_valid_flag\n        super().__init__(\n            data_root=data_root,\n            ann_file=ann_file,\n            pipeline=pipeline,\n            classes=classes,\n            modality=modality,\n            box_type_3d=box_type_3d,\n            filter_empty_gt=filter_empty_gt,\n            test_mode=test_mode)\n\n        self.num_views = num_views\n        assert self.num_views <= 6\n        self.with_velocity = with_velocity\n        self.eval_version = eval_version\n        from nuscenes.eval.detection.config import config_factory\n        self.eval_detection_configs = config_factory(self.eval_version)\n        if self.modality is None:\n            self.modality = dict(\n                use_camera=False,\n                use_lidar=True,\n                use_radar=False,\n                use_map=False,\n                use_external=False,\n            )\n\n    def get_cat_ids(self, idx):\n        \"\"\"Get category distribution of single scene.\n\n        Args:\n            idx (int): Index of the data_info.\n\n        Returns:\n            dict[list]: for each category, if the current scene\n                contains such boxes, store a list containing idx,\n                otherwise, store empty list.\n        \"\"\"\n        info = self.data_infos[idx]\n        if self.use_valid_flag:\n            mask = info['valid_flag']\n            gt_names = set(info['gt_names'][mask])\n        else:\n            gt_names = set(info['gt_names'])\n\n        cat_ids = []\n        for name in gt_names:\n            if name in self.CLASSES:\n                cat_ids.append(self.cat2id[name])\n        return cat_ids\n\n    def load_annotations(self, ann_file):\n        \"\"\"Load annotations from ann_file.\n\n        Args:\n            ann_file (str): Path of the annotation file.\n\n        Returns:\n            list[dict]: List of annotations sorted by timestamps.\n        \"\"\"\n        data = mmcv.load(ann_file)\n        data_infos = list(sorted(data['infos'], key=lambda e: e['timestamp']))\n        data_infos = data_infos[::self.load_interval]\n        self.metadata = data['metadata']\n        self.version = self.metadata['version']\n        return data_infos\n\n    def get_data_info(self, index):\n        \"\"\"Get data info according to the given index.\n\n        Args:\n            index (int): Index of the sample data to get.\n\n        Returns:\n            dict: Data information that will be passed to the data \\\n                preprocessing pipelines. It includes the following keys:\n\n                - sample_idx (str): Sample index.\n                - pts_filename (str): Filename of point clouds.\n                - sweeps (list[dict]): Infos of sweeps.\n                - timestamp (float): Sample timestamp.\n                - img_filename (str, optional): Image filename.\n                - lidar2img (list[np.ndarray], optional): Transformations \\\n                    from lidar to different cameras.\n                - ann_info (dict): Annotation info.\n        \"\"\"\n        info = self.data_infos[index]\n        # standard protocal modified from SECOND.Pytorch\n        input_dict = dict(\n            sample_idx=info['token'],\n            pts_filename=info['lidar_path'],\n            sweeps=info['sweeps'],\n            timestamp=info['timestamp'] / 1e6,\n        )\n\n        cam_orders = ['CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_RIGHT', 'CAM_BACK', 'CAM_BACK_LEFT']\n        if self.modality['use_camera']:\n            image_paths = []\n            lidar2img_rts = []\n            # for cam_type, cam_info in info['cams'].items():\n            intrinsics = []\n            lidar2cam_rs = []\n            lidar2cam_ts = []\n\n            for cam_type in cam_orders:\n                cam_info = info['cams'][cam_type]\n                image_paths.append(cam_info['data_path'])\n                # obtain lidar to image transformation matrix\n                lidar2cam_r = np.linalg.inv(cam_info['sensor2lidar_rotation'])\n                lidar2cam_t = cam_info[\n                    'sensor2lidar_translation'] @ lidar2cam_r.T\n                lidar2cam_rt = np.eye(4)\n                lidar2cam_rt[:3, :3] = lidar2cam_r.T\n                lidar2cam_rt[3, :3] = -lidar2cam_t\n                intrinsic = cam_info['cam_intrinsic']\n                viewpad = np.eye(4)\n                viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] = intrinsic\n                lidar2img_rt = (viewpad @ lidar2cam_rt.T)\n                lidar2img_rts.append(lidar2img_rt)\n                intrinsics.append(intrinsic)\n                lidar2cam_rs.append(lidar2cam_r)\n                lidar2cam_ts.append(lidar2cam_t)\n\n            input_dict.update(\n                dict(\n                    img_filename=image_paths,\n                    lidar2img=lidar2img_rts,\n                    cam_intrinsic=intrinsics,\n                    lidar2cam_r=lidar2cam_rs,\n                    lidar2cam_t=lidar2cam_ts,\n                ))\n\n        if not self.test_mode:\n            annos = self.get_ann_info(index)\n            input_dict['ann_info'] = annos\n\n        return input_dict\n\n    def get_ann_info(self, index):\n        \"\"\"Get annotation info according to the given index.\n\n        Args:\n            index (int): Index of the annotation data to get.\n\n        Returns:\n            dict: Annotation information consists of the following keys:\n\n                - gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`): \\\n                    3D ground truth bboxes\n                - gt_labels_3d (np.ndarray): Labels of ground truths.\n                - gt_names (list[str]): Class names of ground truths.\n        \"\"\"\n        info = self.data_infos[index]\n        # filter out bbox containing no points\n        if self.use_valid_flag:\n            mask = info['valid_flag']\n        else:\n            mask = info['num_lidar_pts'] > 0\n        gt_bboxes_3d = info['gt_boxes'][mask]\n        gt_names_3d = info['gt_names'][mask]\n        gt_labels_3d = []\n        for cat in gt_names_3d:\n            if cat in self.CLASSES:\n                gt_labels_3d.append(self.CLASSES.index(cat))\n            else:\n                gt_labels_3d.append(-1)\n        gt_labels_3d = np.array(gt_labels_3d)\n\n        if self.with_velocity:\n            gt_velocity = info['gt_velocity'][mask]\n            nan_mask = np.isnan(gt_velocity[:, 0])\n            gt_velocity[nan_mask] = [0.0, 0.0]\n            gt_bboxes_3d = np.concatenate([gt_bboxes_3d, gt_velocity], axis=-1)\n\n        # the nuscenes box center is [0.5, 0.5, 0.5], we change it to be\n        # the same as KITTI (0.5, 0.5, 0)\n        gt_bboxes_3d = LiDARInstance3DBoxes(\n            gt_bboxes_3d,\n            box_dim=gt_bboxes_3d.shape[-1],\n            origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)\n\n        anns_results = dict(\n            gt_bboxes_3d=gt_bboxes_3d,\n            gt_labels_3d=gt_labels_3d,\n            gt_names=gt_names_3d)\n        return anns_results\n\n    def _format_bbox(self, results, jsonfile_prefix=None):\n        \"\"\"Convert the results to the standard format.\n\n        Args:\n            results (list[dict]): Testing results of the dataset.\n            jsonfile_prefix (str): The prefix of the output jsonfile.\n                You can specify the output directory/filename by\n                modifying the jsonfile_prefix. Default: None.\n\n        Returns:\n            str: Path of the output json file.\n        \"\"\"\n        nusc_annos = {}\n        mapped_class_names = self.CLASSES\n\n        print('Start to convert detection format...')\n        for sample_id, det in enumerate(mmcv.track_iter_progress(results)):\n            annos = []\n            boxes = output_to_nusc_box(det)\n            sample_token = self.data_infos[sample_id]['token']\n            boxes = lidar_nusc_box_to_global(self.data_infos[sample_id], boxes,\n                                             mapped_class_names,\n                                             self.eval_detection_configs,\n                                             self.eval_version)\n            for i, box in enumerate(boxes):\n                name = mapped_class_names[box.label]\n                if np.sqrt(box.velocity[0]**2 + box.velocity[1]**2) > 0.2:\n                    if name in [\n                            'car',\n                            'construction_vehicle',\n                            'bus',\n                            'truck',\n                            'trailer',\n                    ]:\n                        attr = 'vehicle.moving'\n                    elif name in ['bicycle', 'motorcycle']:\n                        attr = 'cycle.with_rider'\n                    else:\n                        attr = NuScenesDataset.DefaultAttribute[name]\n                else:\n                    if name in ['pedestrian']:\n                        attr = 'pedestrian.standing'\n                    elif name in ['bus']:\n                        attr = 'vehicle.stopped'\n                    else:\n                        attr = NuScenesDataset.DefaultAttribute[name]\n\n                nusc_anno = dict(\n                    sample_token=sample_token,\n                    translation=box.center.tolist(),\n                    size=box.wlh.tolist(),\n                    rotation=box.orientation.elements.tolist(),\n                    velocity=box.velocity[:2].tolist(),\n                    detection_name=name,\n                    detection_score=box.score,\n                    attribute_name=attr)\n                annos.append(nusc_anno)\n            nusc_annos[sample_token] = annos\n        nusc_submissions = {\n            'meta': self.modality,\n            'results': nusc_annos,\n        }\n\n        mmcv.mkdir_or_exist(jsonfile_prefix)\n        res_path = osp.join(jsonfile_prefix, 'results_nusc.json')\n        print('Results writes to', res_path)\n        mmcv.dump(nusc_submissions, res_path)\n        return res_path\n\n    def _evaluate_single(self,\n                         result_path,\n                         logger=None,\n                         metric='bbox',\n                         result_name='pts_bbox'):\n        \"\"\"Evaluation for a single model in nuScenes protocol.\n\n        Args:\n            result_path (str): Path of the result file.\n            logger (logging.Logger | str | None): Logger used for printing\n                related information during evaluation. Default: None.\n            metric (str): Metric name used for evaluation. Default: 'bbox'.\n            result_name (str): Result name in the metric prefix.\n                Default: 'pts_bbox'.\n\n        Returns:\n            dict: Dictionary of evaluation details.\n        \"\"\"\n        from nuscenes import NuScenes\n        from nuscenes.eval.detection.evaluate import NuScenesEval\n\n        output_dir = osp.join(*osp.split(result_path)[:-1])\n        nusc = NuScenes(\n            version=self.version, dataroot=self.data_root, verbose=False)\n        eval_set_map = {\n            'v1.0-mini': 'mini_val',\n            'v1.0-trainval': 'val',\n        }\n        nusc_eval = NuScenesEval(\n            nusc,\n            config=self.eval_detection_configs,\n            result_path=result_path,\n            eval_set=eval_set_map[self.version],\n            output_dir=output_dir,\n            verbose=False)\n        nusc_eval.main(render_curves=False)\n\n        # record metrics\n        metrics = mmcv.load(osp.join(output_dir, 'metrics_summary.json'))\n        detail = dict()\n        metric_prefix = f'{result_name}_NuScenes'\n        for name in self.CLASSES:\n            for k, v in metrics['label_aps'][name].items():\n                val = float('{:.4f}'.format(v))\n                detail['{}/{}_AP_dist_{}'.format(metric_prefix, name, k)] = val\n            for k, v in metrics['label_tp_errors'][name].items():\n                val = float('{:.4f}'.format(v))\n                detail['{}/{}_{}'.format(metric_prefix, name, k)] = val\n\n        detail['{}/NDS'.format(metric_prefix)] = metrics['nd_score']\n        detail['{}/mAP'.format(metric_prefix)] = metrics['mean_ap']\n        return detail\n\n    def format_results(self, results, jsonfile_prefix=None):\n        \"\"\"Format the results to json (standard format for COCO evaluation).\n\n        Args:\n            results (list[dict]): Testing results of the dataset.\n            jsonfile_prefix (str | None): The prefix of json files. It includes\n                the file path and the prefix of filename, e.g., \"a/b/prefix\".\n                If not specified, a temp file will be created. Default: None.\n\n        Returns:\n            tuple: Returns (result_files, tmp_dir), where `result_files` is a \\\n                dict containing the json filepaths, `tmp_dir` is the temporal \\\n                directory created for saving json files when \\\n                `jsonfile_prefix` is not specified.\n        \"\"\"\n        assert isinstance(results, list), 'results must be a list'\n        assert len(results) == len(self), (\n            'The length of results is not equal to the dataset len: {} != {}'.\n            format(len(results), len(self)))\n\n        if jsonfile_prefix is None:\n            tmp_dir = tempfile.TemporaryDirectory()\n            jsonfile_prefix = osp.join(tmp_dir.name, 'results')\n        else:\n            tmp_dir = None\n\n        if not isinstance(results[0], dict):\n            result_files = self._format_bbox(results, jsonfile_prefix)\n        else:\n            result_files = dict()\n            for name in results[0]:\n                print(f'\\nFormating bboxes of {name}')\n                results_ = [out[name] for out in results]\n                tmp_file_ = osp.join(jsonfile_prefix, name)\n                result_files.update(\n                    {name: self._format_bbox(results_, tmp_file_)})\n        return result_files, tmp_dir\n\n    def evaluate(self,\n                 results,\n                 metric='bbox',\n                 logger=None,\n                 jsonfile_prefix=None,\n                 result_names=['pts_bbox'],\n                 show=False,\n                 out_dir=None):\n        \"\"\"Evaluation in nuScenes protocol.\n\n        Args:\n            results (list[dict]): Testing results of the dataset.\n            metric (str | list[str]): Metrics to be evaluated.\n            logger (logging.Logger | str | None): Logger used for printing\n                related information during evaluation. Default: None.\n            jsonfile_prefix (str | None): The prefix of json files. It includes\n                the file path and the prefix of filename, e.g., \"a/b/prefix\".\n                If not specified, a temp file will be created. Default: None.\n            show (bool): Whether to visualize.\n                Default: False.\n            out_dir (str): Path to save the visualization results.\n                Default: None.\n\n        Returns:\n            dict[str, float]: Results of each evaluation metric.\n        \"\"\"\n        result_files, tmp_dir = self.format_results(results, jsonfile_prefix)\n\n        if isinstance(result_files, dict):\n            results_dict = dict()\n            for name in result_names:\n                print('Evaluating bboxes of {}'.format(name))\n                ret_dict = self._evaluate_single(result_files[name])\n            results_dict.update(ret_dict)\n        elif isinstance(result_files, str):\n            results_dict = self._evaluate_single(result_files)\n\n        if tmp_dir is not None:\n            tmp_dir.cleanup()\n\n        if show:\n            self.show(results, out_dir)\n        return results_dict\n\n    def show(self, results, out_dir):\n        \"\"\"Results visualization.\n\n        Args:\n            results (list[dict]): List of bounding boxes results.\n            out_dir (str): Output directory of visualization result.\n        \"\"\"\n        for i, result in enumerate(results):\n            example = self.prepare_test_data(i)\n            points = example['points'][0]._data.numpy()\n            data_info = self.data_infos[i]\n            pts_path = data_info['lidar_path']\n            file_name = osp.split(pts_path)[-1].split('.')[0]\n            # for now we convert points into depth mode\n            points = Coord3DMode.convert_point(points, Coord3DMode.LIDAR,\n                                               Coord3DMode.DEPTH)\n            inds = result['pts_bbox']['scores_3d'] > 0.1\n            gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor\n            gt_bboxes = Box3DMode.convert(gt_bboxes, Box3DMode.LIDAR,\n                                          Box3DMode.DEPTH)\n            pred_bboxes = result['pts_bbox']['boxes_3d'][inds].tensor.numpy()\n            pred_bboxes = Box3DMode.convert(pred_bboxes, Box3DMode.LIDAR,\n                                            Box3DMode.DEPTH)\n            show_result(points, gt_bboxes, pred_bboxes, out_dir, file_name)\n\n\ndef output_to_nusc_box(detection):\n    \"\"\"Convert the output to the box class in the nuScenes.\n\n    Args:\n        detection (dict): Detection results.\n\n            - boxes_3d (:obj:`BaseInstance3DBoxes`): Detection bbox.\n            - scores_3d (torch.Tensor): Detection scores.\n            - labels_3d (torch.Tensor): Predicted box labels.\n\n    Returns:\n        list[:obj:`NuScenesBox`]: List of standard NuScenesBoxes.\n    \"\"\"\n    box3d = detection['boxes_3d']\n    scores = detection['scores_3d'].numpy()\n    labels = detection['labels_3d'].numpy()\n\n    box_gravity_center = box3d.gravity_center.numpy()\n    box_dims = box3d.dims.numpy()\n    box_yaw = box3d.yaw.numpy()\n    # TODO: check whether this is necessary\n    # with dir_offset & dir_limit in the head\n    box_yaw = -box_yaw - np.pi / 2\n\n    box_list = []\n    for i in range(len(box3d)):\n        quat = pyquaternion.Quaternion(axis=[0, 0, 1], radians=box_yaw[i])\n        velocity = (*box3d.tensor[i, 7:9], 0.0)\n        # velo_val = np.linalg.norm(box3d[i, 7:9])\n        # velo_ori = box3d[i, 6]\n        # velocity = (\n        # velo_val * np.cos(velo_ori), velo_val * np.sin(velo_ori), 0.0)\n        box = NuScenesBox(\n            box_gravity_center[i],\n            box_dims[i],\n            quat,\n            label=labels[i],\n            score=scores[i],\n            velocity=velocity)\n        box_list.append(box)\n    return box_list\n\n\ndef lidar_nusc_box_to_global(info,\n                             boxes,\n                             classes,\n                             eval_configs,\n                             eval_version='detection_cvpr_2019'):\n    \"\"\"Convert the box from ego to global coordinate.\n\n    Args:\n        info (dict): Info for a specific sample data, including the\n            calibration information.\n        boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.\n        classes (list[str]): Mapped classes in the evaluation.\n        eval_configs (object): Evaluation configuration object.\n        eval_version (str): Evaluation version.\n            Default: 'detection_cvpr_2019'\n\n    Returns:\n        list: List of standard NuScenesBoxes in the global\n            coordinate.\n    \"\"\"\n    box_list = []\n    for box in boxes:\n        # Move box to ego vehicle coord system\n        box.rotate(pyquaternion.Quaternion(info['lidar2ego_rotation']))\n        box.translate(np.array(info['lidar2ego_translation']))\n        # filter det in ego.\n        cls_range_map = eval_configs.class_range\n        radius = np.linalg.norm(box.center[:2], 2)\n        det_range = cls_range_map[classes[box.label]]\n        if radius > det_range:\n            continue\n        # Move box to global coord system\n        box.rotate(pyquaternion.Quaternion(info['ego2global_rotation']))\n        box.translate(np.array(info['ego2global_translation']))\n        box_list.append(box)\n    return box_list\n"
  },
  {
    "path": "mmdet3d/datasets/nuscenes_dataset_viewInfo.py",
    "content": "import mmcv\r\nimport numpy as np\r\nimport pyquaternion\r\nimport tempfile\r\nfrom nuscenes.utils.data_classes import Box as NuScenesBox\r\nfrom os import path as osp\r\n\r\nfrom mmdet.datasets import DATASETS\r\nfrom ..core import show_result\r\nfrom ..core.bbox import Box3DMode, Coord3DMode, LiDARInstance3DBoxes, CameraInstance3DBoxes\r\nfrom .nuscenes_dataset import NuScenesDataset\r\n\r\n\r\n@DATASETS.register_module()\r\nclass NuScenesDataset_ViewInfo(NuScenesDataset):\r\n    \"\"\"\r\n        Compared with NuScenesDataset, we also load 2d annotations\r\n    \"\"\"\r\n\r\n    def get_ann_info(self, index):\r\n        \"\"\"Get annotation info according to the given index.\r\n\r\n        Args:\r\n            index (int): Index of the annotation data to get.\r\n\r\n        Returns:\r\n            dict: Annotation information consists of the following keys:\r\n        \"\"\"\r\n        info = self.data_infos[index]\r\n        # filter out bbox containing no points\r\n        if self.use_valid_flag:\r\n            mask = info['valid_flag']\r\n        else:\r\n            mask = info['num_lidar_pts'] > 0\r\n\r\n        gt_bboxes_3d = info['gt_boxes'][mask]\r\n        gt_names_3d = info['gt_names'][mask]\r\n        gt_visible_3d = info['gt_visible'][mask]\r\n\r\n        # .copy() cannot be missed!\r\n        gt_bboxes2d_view = info['gt_bboxes2d_view'].copy()\r\n        gt_bboxes2d_view[..., :2] = gt_bboxes2d_view[..., :2] + gt_bboxes2d_view[..., 2:4] / 2\r\n\r\n        gt_bboxes_lidar_view = info['gt_bboxes_lidar_view'].copy()\r\n\r\n        gt_names2d_view = info['gt_names2d_view']\r\n        gt_viewsIDs = info['gt_viewsIDs']\r\n        gt_labels_3d = []\r\n        for cat in gt_names_3d:\r\n            if cat in self.CLASSES:\r\n                gt_labels_3d.append(self.CLASSES.index(cat))\r\n            else:\r\n                gt_labels_3d.append(-1)\r\n        gt_labels_3d = np.array(gt_labels_3d)\r\n\r\n        gt_labels2d_view = []\r\n        for cat in gt_names2d_view:\r\n            if cat in self.CLASSES:\r\n                gt_labels2d_view.append(self.CLASSES.index(cat))\r\n            else:\r\n                gt_labels2d_view.append(-1)\r\n        gt_labels2d_view = np.array(gt_labels2d_view)\r\n        gt_labels2d_view = np.stack([gt_labels2d_view, gt_viewsIDs], axis=-1)\r\n\r\n        gt_bboxes_cam_view = info['gt_bboxes_cam_view'].copy()\r\n\r\n        if self.with_velocity:\r\n            gt_velocity = info['gt_velocity'][mask].copy()\r\n            nan_mask = np.isnan(gt_velocity[:, 0])\r\n            gt_velocity[nan_mask] = [0.0, 0.0]\r\n            gt_bboxes_3d = np.concatenate([gt_bboxes_3d, gt_velocity], axis=-1)\r\n\r\n            gt_cam_vel = info['gt_velocity_cam_view'].copy()\r\n            nan_mask_cam = np.isnan(gt_cam_vel[:, 0])\r\n            gt_cam_vel[nan_mask_cam] = [0.0, 0.0]\r\n            gt_bboxes_cam_view = np.concatenate([gt_bboxes_cam_view, gt_cam_vel], axis=-1)\r\n\r\n            gt_lidar_vel =info['gt_velocity_lidar_view'].copy()\r\n            nan_mask_lidar = np.isnan(gt_lidar_vel[:, 0])\r\n            gt_lidar_vel[nan_mask_lidar] = [0.0, 0.0]\r\n\r\n            gt_bboxes_lidar_view = np.concatenate([gt_bboxes_lidar_view, gt_lidar_vel], axis=-1)\r\n\r\n        gt_bboxes_cam_view = CameraInstance3DBoxes(\r\n            gt_bboxes_cam_view,\r\n            box_dim=gt_bboxes_cam_view.shape[-1],\r\n            origin=(0.5, 0.5, 0.5)\r\n        )\r\n\r\n        gt_bboxes_lidar_view = LiDARInstance3DBoxes(\r\n            gt_bboxes_lidar_view,\r\n            box_dim=gt_bboxes_lidar_view.shape[-1],\r\n            origin=(0.5, 0.5, 0.5)\r\n        ).convert_to(self.box_mode_3d)\r\n\r\n        # the nuscenes box center is [0.5, 0.5, 0.5], we change it to be\r\n        # the same as KITTI (0.5, 0.5, 0)\r\n        gt_bboxes_3d = LiDARInstance3DBoxes(\r\n            gt_bboxes_3d,\r\n            box_dim=gt_bboxes_3d.shape[-1],\r\n            origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)\r\n\r\n        if \"gt_pts_centers_view\" in info:\r\n            gt_pts_centers_view = info['gt_pts_centers_view'].copy()\r\n            gt_img_centers_view = info['gt_img_centers_view'].copy()\r\n\r\n            anns_results = dict(\r\n                gt_bboxes_3d=gt_bboxes_3d,\r\n                gt_labels_3d=gt_labels_3d,\r\n                gt_visible_3d=gt_visible_3d,\r\n                gt_names=gt_names_3d,\r\n                bboxes=gt_bboxes2d_view,\r\n                labels=gt_labels2d_view,\r\n                pts_centers_view=gt_pts_centers_view,\r\n                img_centers_view=gt_img_centers_view,\r\n                bboxes_cam_view=gt_bboxes_cam_view,\r\n                bboxes_lidar_view=gt_bboxes_lidar_view,\r\n            )\r\n\r\n        else:\r\n            anns_results = dict(\r\n                gt_bboxes_3d=gt_bboxes_3d,\r\n                gt_labels_3d=gt_labels_3d,\r\n                gt_visible_3d=gt_visible_3d,\r\n                gt_names=gt_names_3d,\r\n                bboxes=gt_bboxes2d_view,\r\n                labels=gt_labels2d_view,\r\n            )\r\n\r\n        return anns_results\r\n\r\n    def get_data_info(self, index):\r\n        \"\"\"Get data info according to the given index.\r\n\r\n        Args:\r\n            index (int): Index of the sample data to get.\r\n\r\n        Returns:\r\n            dict: Data information that will be passed to the data \\\r\n                preprocessing pipelines. It includes the following keys:\r\n\r\n                - sample_idx (str): Sample index.\r\n                - pts_filename (str): Filename of point clouds.\r\n                - sweeps (list[dict]): Infos of sweeps.\r\n                - timestamp (float): Sample timestamp.\r\n                - img_filename (str, optional): Image filename.\r\n                - lidar2img (list[np.ndarray], optional): Transformations \\\r\n                    from lidar to different cameras.\r\n                - ann_info (dict): Annotation info.\r\n        \"\"\"\r\n        info = self.data_infos[index]\r\n        # standard protocal modified from SECOND.Pytorch\r\n\r\n        input_dict = dict(\r\n            sample_idx=info['token'],\r\n            pts_filename=info['lidar_path'],\r\n            sweeps=info['sweeps'],\r\n            timestamp=info['timestamp'] / 1e6,\r\n        )\r\n\r\n        cam_orders = ['CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_RIGHT', 'CAM_BACK', 'CAM_BACK_LEFT']\r\n        if self.modality['use_camera']:\r\n            image_paths = []\r\n            lidar2img_rts = []\r\n            # for cam_type, cam_info in info['cams'].items():\r\n            intrinsics = []\r\n            lidar2cam_rs = []\r\n            lidar2cam_ts = []\r\n\r\n            for cam_type in cam_orders:\r\n                cam_info = info['cams'][cam_type]\r\n                image_paths.append(cam_info['data_path'])\r\n                # obtain lidar to image transformation matrix\r\n                lidar2cam_r = np.linalg.inv(cam_info['sensor2lidar_rotation'])\r\n                lidar2cam_t = cam_info[\r\n                    'sensor2lidar_translation'] @ lidar2cam_r.T\r\n                lidar2cam_rt = np.eye(4)\r\n                lidar2cam_rt[:3, :3] = lidar2cam_r.T\r\n                lidar2cam_rt[3, :3] = -lidar2cam_t\r\n                intrinsic = cam_info['cam_intrinsic']\r\n                viewpad = np.eye(4)\r\n                viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] = intrinsic\r\n                lidar2img_rt = (viewpad @ lidar2cam_rt.T)\r\n                lidar2img_rts.append(lidar2img_rt.copy())\r\n                intrinsics.append(intrinsic.copy())\r\n                lidar2cam_rs.append(lidar2cam_r.copy())\r\n                lidar2cam_ts.append(-lidar2cam_t.copy())\r\n\r\n            input_dict.update(\r\n                dict(\r\n                    img_filename=image_paths,\r\n                    lidar2img=lidar2img_rts,\r\n                    cam_intrinsic=intrinsics,\r\n                    lidar2cam_r=lidar2cam_rs,\r\n                    lidar2cam_t=lidar2cam_ts,\r\n                ))\r\n\r\n        if not self.test_mode:\r\n            annos = self.get_ann_info(index)\r\n            input_dict['ann_info'] = annos\r\n\r\n        return input_dict\r\n"
  },
  {
    "path": "mmdet3d/datasets/pipelines/__init__.py",
    "content": "from mmdet.datasets.pipelines import Compose\nfrom .dbsampler import DataBaseSampler\nfrom .formating import Collect3D, DefaultFormatBundle, DefaultFormatBundle3D\nfrom .loading import (LoadAnnotations3D, LoadMultiViewImageFromFiles,\n                      LoadPointsFromFile, LoadPointsFromMultiSweeps,\n                      NormalizePointsColor, PointSegClassMapping,\n                      MyLoadAnnotations3D)\nfrom .test_time_aug import MultiScaleFlipAug3D\nfrom .transforms_3d import (BackgroundPointsFilter, GlobalRotScaleTrans,\n                            IndoorPointSample, ObjectNoise, ObjectRangeFilter,\n                            ObjectSample, PointShuffle, PointsRangeFilter,\n                            RandomFlip3D, VoxelBasedPointSampler, OurRandomFlip3D,\n                            OurGlobalRotScaleTrans, OurObjectRangeFilter)\nfrom .transforms_2d import OurRandomAffine, PhotoMetricDistortionMultiViewImage\n\n__all__ = [\n    'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans',\n    'PointShuffle', 'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D',\n    'Compose', 'LoadMultiViewImageFromFiles', 'LoadPointsFromFile',\n    'DefaultFormatBundle', 'DefaultFormatBundle3D', 'DataBaseSampler',\n    'NormalizePointsColor', 'LoadAnnotations3D', 'IndoorPointSample',\n    'PointSegClassMapping', 'MultiScaleFlipAug3D', 'LoadPointsFromMultiSweeps',\n    'BackgroundPointsFilter', 'VoxelBasedPointSampler', 'MyLoadAnnotations3D',\n    'OurRandomFlip3D', 'OurGlobalRotScaleTrans', 'OurRandomAffine',\n    'PhotoMetricDistortionMultiViewImage', 'OurObjectRangeFilter'\n]\n"
  },
  {
    "path": "mmdet3d/datasets/pipelines/data_augment_utils.py",
    "content": "import numba\nimport numpy as np\nimport warnings\nfrom numba.errors import NumbaPerformanceWarning\n\nfrom mmdet3d.core.bbox import box_np_ops\n\nwarnings.filterwarnings('ignore', category=NumbaPerformanceWarning)\n\n\n@numba.njit\ndef _rotation_box2d_jit_(corners, angle, rot_mat_T):\n    \"\"\"Rotate 2D boxes.\n\n    Args:\n        corners (np.ndarray): Corners of boxes.\n        angle (float): Rotation angle.\n        rot_mat_T (np.ndarray): Transposed rotation matrix.\n    \"\"\"\n    rot_sin = np.sin(angle)\n    rot_cos = np.cos(angle)\n    rot_mat_T[0, 0] = rot_cos\n    rot_mat_T[0, 1] = -rot_sin\n    rot_mat_T[1, 0] = rot_sin\n    rot_mat_T[1, 1] = rot_cos\n    corners[:] = corners @ rot_mat_T\n\n\n@numba.jit(nopython=True)\ndef box_collision_test(boxes, qboxes, clockwise=True):\n    \"\"\"Box collision test.\n\n    Args:\n        boxes (np.ndarray): Corners of current boxes.\n        qboxes (np.ndarray): Boxes to be avoid colliding.\n        clockwise (bool): Whether the corners are in clockwise order.\n            Default: True.\n    \"\"\"\n    N = boxes.shape[0]\n    K = qboxes.shape[0]\n    ret = np.zeros((N, K), dtype=np.bool_)\n    slices = np.array([1, 2, 3, 0])\n    lines_boxes = np.stack((boxes, boxes[:, slices, :]),\n                           axis=2)  # [N, 4, 2(line), 2(xy)]\n    lines_qboxes = np.stack((qboxes, qboxes[:, slices, :]), axis=2)\n    # vec = np.zeros((2,), dtype=boxes.dtype)\n    boxes_standup = box_np_ops.corner_to_standup_nd_jit(boxes)\n    qboxes_standup = box_np_ops.corner_to_standup_nd_jit(qboxes)\n    for i in range(N):\n        for j in range(K):\n            # calculate standup first\n            iw = (\n                min(boxes_standup[i, 2], qboxes_standup[j, 2]) -\n                max(boxes_standup[i, 0], qboxes_standup[j, 0]))\n            if iw > 0:\n                ih = (\n                    min(boxes_standup[i, 3], qboxes_standup[j, 3]) -\n                    max(boxes_standup[i, 1], qboxes_standup[j, 1]))\n                if ih > 0:\n                    for k in range(4):\n                        for box_l in range(4):\n                            A = lines_boxes[i, k, 0]\n                            B = lines_boxes[i, k, 1]\n                            C = lines_qboxes[j, box_l, 0]\n                            D = lines_qboxes[j, box_l, 1]\n                            acd = (D[1] - A[1]) * (C[0] -\n                                                   A[0]) > (C[1] - A[1]) * (\n                                                       D[0] - A[0])\n                            bcd = (D[1] - B[1]) * (C[0] -\n                                                   B[0]) > (C[1] - B[1]) * (\n                                                       D[0] - B[0])\n                            if acd != bcd:\n                                abc = (C[1] - A[1]) * (B[0] - A[0]) > (\n                                    B[1] - A[1]) * (\n                                        C[0] - A[0])\n                                abd = (D[1] - A[1]) * (B[0] - A[0]) > (\n                                    B[1] - A[1]) * (\n                                        D[0] - A[0])\n                                if abc != abd:\n                                    ret[i, j] = True  # collision.\n                                    break\n                        if ret[i, j] is True:\n                            break\n                    if ret[i, j] is False:\n                        # now check complete overlap.\n                        # box overlap qbox:\n                        box_overlap_qbox = True\n                        for box_l in range(4):  # point l in qboxes\n                            for k in range(4):  # corner k in boxes\n                                vec = boxes[i, k] - boxes[i, (k + 1) % 4]\n                                if clockwise:\n                                    vec = -vec\n                                cross = vec[1] * (\n                                    boxes[i, k, 0] - qboxes[j, box_l, 0])\n                                cross -= vec[0] * (\n                                    boxes[i, k, 1] - qboxes[j, box_l, 1])\n                                if cross >= 0:\n                                    box_overlap_qbox = False\n                                    break\n                            if box_overlap_qbox is False:\n                                break\n\n                        if box_overlap_qbox is False:\n                            qbox_overlap_box = True\n                            for box_l in range(4):  # point box_l in boxes\n                                for k in range(4):  # corner k in qboxes\n                                    vec = qboxes[j, k] - qboxes[j, (k + 1) % 4]\n                                    if clockwise:\n                                        vec = -vec\n                                    cross = vec[1] * (\n                                        qboxes[j, k, 0] - boxes[i, box_l, 0])\n                                    cross -= vec[0] * (\n                                        qboxes[j, k, 1] - boxes[i, box_l, 1])\n                                    if cross >= 0:  #\n                                        qbox_overlap_box = False\n                                        break\n                                if qbox_overlap_box is False:\n                                    break\n                            if qbox_overlap_box:\n                                ret[i, j] = True  # collision.\n                        else:\n                            ret[i, j] = True  # collision.\n    return ret\n\n\n@numba.njit\ndef noise_per_box(boxes, valid_mask, loc_noises, rot_noises):\n    \"\"\"Add noise to every box (only on the horizontal plane).\n\n    Args:\n        boxes (np.ndarray): Input boxes with shape (N, 5).\n        valid_mask (np.ndarray): Mask to indicate which boxes are valid\n            with shape (N).\n        loc_noises (np.ndarray): Location noises with shape (N, M, 3).\n        rot_noises (np.ndarray): Rotation noises with shape (N, M).\n\n    Returns:\n        np.ndarray: Mask to indicate whether the noise is\n            added successfully (pass the collision test).\n    \"\"\"\n    num_boxes = boxes.shape[0]\n    num_tests = loc_noises.shape[1]\n    box_corners = box_np_ops.box2d_to_corner_jit(boxes)\n    current_corners = np.zeros((4, 2), dtype=boxes.dtype)\n    rot_mat_T = np.zeros((2, 2), dtype=boxes.dtype)\n    success_mask = -np.ones((num_boxes, ), dtype=np.int64)\n    # print(valid_mask)\n    for i in range(num_boxes):\n        if valid_mask[i]:\n            for j in range(num_tests):\n                current_corners[:] = box_corners[i]\n                current_corners -= boxes[i, :2]\n                _rotation_box2d_jit_(current_corners, rot_noises[i, j],\n                                     rot_mat_T)\n                current_corners += boxes[i, :2] + loc_noises[i, j, :2]\n                coll_mat = box_collision_test(\n                    current_corners.reshape(1, 4, 2), box_corners)\n                coll_mat[0, i] = False\n                # print(coll_mat)\n                if not coll_mat.any():\n                    success_mask[i] = j\n                    box_corners[i] = current_corners\n                    break\n    return success_mask\n\n\n@numba.njit\ndef noise_per_box_v2_(boxes, valid_mask, loc_noises, rot_noises,\n                      global_rot_noises):\n    \"\"\"Add noise to every box (only on the horizontal plane). Version 2 used\n    when enable global rotations.\n\n    Args:\n        boxes (np.ndarray): Input boxes with shape (N, 5).\n        valid_mask (np.ndarray): Mask to indicate which boxes are valid\n            with shape (N).\n        loc_noises (np.ndarray): Location noises with shape (N, M, 3).\n        rot_noises (np.ndarray): Rotation noises with shape (N, M).\n\n    Returns:\n        np.ndarray: Mask to indicate whether the noise is\n            added successfully (pass the collision test).\n    \"\"\"\n    num_boxes = boxes.shape[0]\n    num_tests = loc_noises.shape[1]\n    box_corners = box_np_ops.box2d_to_corner_jit(boxes)\n    current_corners = np.zeros((4, 2), dtype=boxes.dtype)\n    current_box = np.zeros((1, 5), dtype=boxes.dtype)\n    rot_mat_T = np.zeros((2, 2), dtype=boxes.dtype)\n    dst_pos = np.zeros((2, ), dtype=boxes.dtype)\n    success_mask = -np.ones((num_boxes, ), dtype=np.int64)\n    corners_norm = np.zeros((4, 2), dtype=boxes.dtype)\n    corners_norm[1, 1] = 1.0\n    corners_norm[2] = 1.0\n    corners_norm[3, 0] = 1.0\n    corners_norm -= np.array([0.5, 0.5], dtype=boxes.dtype)\n    corners_norm = corners_norm.reshape(4, 2)\n    for i in range(num_boxes):\n        if valid_mask[i]:\n            for j in range(num_tests):\n                current_box[0, :] = boxes[i]\n                current_radius = np.sqrt(boxes[i, 0]**2 + boxes[i, 1]**2)\n                current_grot = np.arctan2(boxes[i, 0], boxes[i, 1])\n                dst_grot = current_grot + global_rot_noises[i, j]\n                dst_pos[0] = current_radius * np.sin(dst_grot)\n                dst_pos[1] = current_radius * np.cos(dst_grot)\n                current_box[0, :2] = dst_pos\n                current_box[0, -1] += (dst_grot - current_grot)\n\n                rot_sin = np.sin(current_box[0, -1])\n                rot_cos = np.cos(current_box[0, -1])\n                rot_mat_T[0, 0] = rot_cos\n                rot_mat_T[0, 1] = -rot_sin\n                rot_mat_T[1, 0] = rot_sin\n                rot_mat_T[1, 1] = rot_cos\n                current_corners[:] = current_box[\n                    0, 2:4] * corners_norm @ rot_mat_T + current_box[0, :2]\n                current_corners -= current_box[0, :2]\n                _rotation_box2d_jit_(current_corners, rot_noises[i, j],\n                                     rot_mat_T)\n                current_corners += current_box[0, :2] + loc_noises[i, j, :2]\n                coll_mat = box_collision_test(\n                    current_corners.reshape(1, 4, 2), box_corners)\n                coll_mat[0, i] = False\n                if not coll_mat.any():\n                    success_mask[i] = j\n                    box_corners[i] = current_corners\n                    loc_noises[i, j, :2] += (dst_pos - boxes[i, :2])\n                    rot_noises[i, j] += (dst_grot - current_grot)\n                    break\n    return success_mask\n\n\ndef _select_transform(transform, indices):\n    \"\"\"Select transform.\n\n    Args:\n        transform (np.ndarray): Transforms to select from.\n        indices (np.ndarray): Mask to indicate which transform to select.\n\n    Returns:\n        np.ndarray: Selected transforms.\n    \"\"\"\n    result = np.zeros((transform.shape[0], *transform.shape[2:]),\n                      dtype=transform.dtype)\n    for i in range(transform.shape[0]):\n        if indices[i] != -1:\n            result[i] = transform[i, indices[i]]\n    return result\n\n\n@numba.njit\ndef _rotation_matrix_3d_(rot_mat_T, angle, axis):\n    \"\"\"Get the 3D rotation matrix.\n\n    Args:\n        rot_mat_T (np.ndarray): Transposed rotation matrix.\n        angle (float): Rotation angle.\n        axis (int): Rotation axis.\n    \"\"\"\n    rot_sin = np.sin(angle)\n    rot_cos = np.cos(angle)\n    rot_mat_T[:] = np.eye(3)\n    if axis == 1:\n        rot_mat_T[0, 0] = rot_cos\n        rot_mat_T[0, 2] = -rot_sin\n        rot_mat_T[2, 0] = rot_sin\n        rot_mat_T[2, 2] = rot_cos\n    elif axis == 2 or axis == -1:\n        rot_mat_T[0, 0] = rot_cos\n        rot_mat_T[0, 1] = -rot_sin\n        rot_mat_T[1, 0] = rot_sin\n        rot_mat_T[1, 1] = rot_cos\n    elif axis == 0:\n        rot_mat_T[1, 1] = rot_cos\n        rot_mat_T[1, 2] = -rot_sin\n        rot_mat_T[2, 1] = rot_sin\n        rot_mat_T[2, 2] = rot_cos\n\n\n@numba.njit\ndef points_transform_(points, centers, point_masks, loc_transform,\n                      rot_transform, valid_mask):\n    \"\"\"Apply transforms to points and box centers.\n\n    Args:\n        points (np.ndarray): Input points.\n        centers (np.ndarray): Input box centers.\n        point_masks (np.ndarray): Mask to indicate which points need\n            to be transformed.\n        loc_transform (np.ndarray): Location transform to be applied.\n        rot_transform (np.ndarray): Rotation transform to be applied.\n        valid_mask (np.ndarray): Mask to indicate which boxes are valid.\n    \"\"\"\n    num_box = centers.shape[0]\n    num_points = points.shape[0]\n    rot_mat_T = np.zeros((num_box, 3, 3), dtype=points.dtype)\n    for i in range(num_box):\n        _rotation_matrix_3d_(rot_mat_T[i], rot_transform[i], 2)\n    for i in range(num_points):\n        for j in range(num_box):\n            if valid_mask[j]:\n                if point_masks[i, j] == 1:\n                    points[i, :3] -= centers[j, :3]\n                    points[i:i + 1, :3] = points[i:i + 1, :3] @ rot_mat_T[j]\n                    points[i, :3] += centers[j, :3]\n                    points[i, :3] += loc_transform[j]\n                    break  # only apply first box's transform\n\n\n@numba.njit\ndef box3d_transform_(boxes, loc_transform, rot_transform, valid_mask):\n    \"\"\"Transform 3D boxes.\n\n    Args:\n        boxes (np.ndarray): 3D boxes to be transformed.\n        loc_transform (np.ndarray): Location transform to be applied.\n        rot_transform (np.ndarray): Rotation transform to be applied.\n        valid_mask (np.ndarray | None): Mask to indicate which boxes are valid.\n    \"\"\"\n    num_box = boxes.shape[0]\n    for i in range(num_box):\n        if valid_mask[i]:\n            boxes[i, :3] += loc_transform[i]\n            boxes[i, 6] += rot_transform[i]\n\n\ndef noise_per_object_v3_(gt_boxes,\n                         points=None,\n                         valid_mask=None,\n                         rotation_perturb=np.pi / 4,\n                         center_noise_std=1.0,\n                         global_random_rot_range=np.pi / 4,\n                         num_try=100):\n    \"\"\"Random rotate or remove each groundtruth independently. use kitti viewer\n    to test this function points_transform_\n\n    Args:\n        gt_boxes (np.ndarray): Ground truth boxes with shape (N, 7).\n        points (np.ndarray | None): Input point cloud with shape (M, 4).\n            Default: None.\n        valid_mask (np.ndarray | None): Mask to indicate which boxes are valid.\n            Default: None.\n        rotation_perturb (float): Rotation perturbation. Default: pi / 4.\n        center_noise_std (float): Center noise standard deviation.\n            Default: 1.0.\n        global_random_rot_range (float): Global random rotation range.\n            Default: pi/4.\n        num_try (int): Number of try. Default: 100.\n    \"\"\"\n    num_boxes = gt_boxes.shape[0]\n    if not isinstance(rotation_perturb, (list, tuple, np.ndarray)):\n        rotation_perturb = [-rotation_perturb, rotation_perturb]\n    if not isinstance(global_random_rot_range, (list, tuple, np.ndarray)):\n        global_random_rot_range = [\n            -global_random_rot_range, global_random_rot_range\n        ]\n    enable_grot = np.abs(global_random_rot_range[0] -\n                         global_random_rot_range[1]) >= 1e-3\n\n    if not isinstance(center_noise_std, (list, tuple, np.ndarray)):\n        center_noise_std = [\n            center_noise_std, center_noise_std, center_noise_std\n        ]\n    if valid_mask is None:\n        valid_mask = np.ones((num_boxes, ), dtype=np.bool_)\n    center_noise_std = np.array(center_noise_std, dtype=gt_boxes.dtype)\n\n    loc_noises = np.random.normal(\n        scale=center_noise_std, size=[num_boxes, num_try, 3])\n    rot_noises = np.random.uniform(\n        rotation_perturb[0], rotation_perturb[1], size=[num_boxes, num_try])\n    gt_grots = np.arctan2(gt_boxes[:, 0], gt_boxes[:, 1])\n    grot_lowers = global_random_rot_range[0] - gt_grots\n    grot_uppers = global_random_rot_range[1] - gt_grots\n    global_rot_noises = np.random.uniform(\n        grot_lowers[..., np.newaxis],\n        grot_uppers[..., np.newaxis],\n        size=[num_boxes, num_try])\n\n    origin = (0.5, 0.5, 0)\n    gt_box_corners = box_np_ops.center_to_corner_box3d(\n        gt_boxes[:, :3],\n        gt_boxes[:, 3:6],\n        gt_boxes[:, 6],\n        origin=origin,\n        axis=2)\n\n    # TODO: rewrite this noise box function?\n    if not enable_grot:\n        selected_noise = noise_per_box(gt_boxes[:, [0, 1, 3, 4, 6]],\n                                       valid_mask, loc_noises, rot_noises)\n    else:\n        selected_noise = noise_per_box_v2_(gt_boxes[:, [0, 1, 3, 4, 6]],\n                                           valid_mask, loc_noises, rot_noises,\n                                           global_rot_noises)\n\n    loc_transforms = _select_transform(loc_noises, selected_noise)\n    rot_transforms = _select_transform(rot_noises, selected_noise)\n    surfaces = box_np_ops.corner_to_surfaces_3d_jit(gt_box_corners)\n    if points is not None:\n        # TODO: replace this points_in_convex function by my tools?\n        point_masks = box_np_ops.points_in_convex_polygon_3d_jit(\n            points[:, :3], surfaces)\n        points_transform_(points, gt_boxes[:, :3], point_masks, loc_transforms,\n                          rot_transforms, valid_mask)\n\n    box3d_transform_(gt_boxes, loc_transforms, rot_transforms, valid_mask)\n"
  },
  {
    "path": "mmdet3d/datasets/pipelines/dbsampler.py",
    "content": "import copy\nimport mmcv\nimport numpy as np\nimport os\n\nfrom mmdet3d.core.bbox import box_np_ops\nfrom mmdet3d.datasets.pipelines import data_augment_utils\nfrom mmdet.datasets import PIPELINES\nfrom ..registry import OBJECTSAMPLERS\n\n\nclass BatchSampler:\n    \"\"\"Class for sampling specific category of ground truths.\n\n    Args:\n        sample_list (list[dict]): List of samples.\n        name (str | None): The category of samples. Default: None.\n        epoch (int | None): Sampling epoch. Default: None.\n        shuffle (bool): Whether to shuffle indices. Default: False.\n        drop_reminder (bool): Drop reminder. Default: False.\n    \"\"\"\n\n    def __init__(self,\n                 sampled_list,\n                 name=None,\n                 epoch=None,\n                 shuffle=True,\n                 drop_reminder=False):\n        self._sampled_list = sampled_list\n        self._indices = np.arange(len(sampled_list))\n        if shuffle:\n            np.random.shuffle(self._indices)\n        self._idx = 0\n        self._example_num = len(sampled_list)\n        self._name = name\n        self._shuffle = shuffle\n        self._epoch = epoch\n        self._epoch_counter = 0\n        self._drop_reminder = drop_reminder\n\n    def _sample(self, num):\n        \"\"\"Sample specific number of ground truths and return indices.\n\n        Args:\n            num (int): Sampled number.\n\n        Returns:\n            list[int]: Indices of sampled ground truths.\n        \"\"\"\n        if self._idx + num >= self._example_num:\n            ret = self._indices[self._idx:].copy()\n            self._reset()\n        else:\n            ret = self._indices[self._idx:self._idx + num]\n            self._idx += num\n        return ret\n\n    def _reset(self):\n        \"\"\"Reset the index of batchsampler to zero.\"\"\"\n        assert self._name is not None\n        # print(\"reset\", self._name)\n        if self._shuffle:\n            np.random.shuffle(self._indices)\n        self._idx = 0\n\n    def sample(self, num):\n        \"\"\"Sample specific number of ground truths.\n\n        Args:\n            num (int): Sampled number.\n\n        Returns:\n            list[dict]: Sampled ground truths.\n        \"\"\"\n        indices = self._sample(num)\n        return [self._sampled_list[i] for i in indices]\n\n\n@OBJECTSAMPLERS.register_module()\nclass DataBaseSampler(object):\n    \"\"\"Class for sampling data from the ground truth database.\n\n    Args:\n        info_path (str): Path of groundtruth database info.\n        data_root (str): Path of groundtruth database.\n        rate (float): Rate of actual sampled over maximum sampled number.\n        prepare (dict): Name of preparation functions and the input value.\n        sample_groups (dict): Sampled classes and numbers.\n        classes (list[str]): List of classes. Default: None.\n        points_loader(dict): Config of points loader. Default: dict(\n            type='LoadPointsFromFile', load_dim=4, use_dim=[0,1,2,3])\n    \"\"\"\n\n    def __init__(self,\n                 info_path,\n                 data_root,\n                 rate,\n                 prepare,\n                 sample_groups,\n                 classes=None,\n                 points_loader=dict(\n                     type='LoadPointsFromFile',\n                     coord_type='LIDAR',\n                     load_dim=4,\n                     use_dim=[0, 1, 2, 3])):\n        super().__init__()\n        self.data_root = data_root\n        self.info_path = info_path\n        self.rate = rate\n        self.prepare = prepare\n        self.classes = classes\n        self.cat2label = {name: i for i, name in enumerate(classes)}\n        self.label2cat = {i: name for i, name in enumerate(classes)}\n        self.points_loader = mmcv.build_from_cfg(points_loader, PIPELINES)\n\n        db_infos = mmcv.load(info_path)\n\n        # filter database infos\n        from mmdet3d.utils import get_root_logger\n        logger = get_root_logger()\n        for k, v in db_infos.items():\n            logger.info(f'load {len(v)} {k} database infos')\n        for prep_func, val in prepare.items():\n            db_infos = getattr(self, prep_func)(db_infos, val)\n        logger.info('After filter database:')\n        for k, v in db_infos.items():\n            logger.info(f'load {len(v)} {k} database infos')\n\n        self.db_infos = db_infos\n\n        # load sample groups\n        # TODO: more elegant way to load sample groups\n        self.sample_groups = []\n        for name, num in sample_groups.items():\n            self.sample_groups.append({name: int(num)})\n\n        self.group_db_infos = self.db_infos  # just use db_infos\n        self.sample_classes = []\n        self.sample_max_nums = []\n        for group_info in self.sample_groups:\n            self.sample_classes += list(group_info.keys())\n            self.sample_max_nums += list(group_info.values())\n\n        self.sampler_dict = {}\n        for k, v in self.group_db_infos.items():\n            self.sampler_dict[k] = BatchSampler(v, k, shuffle=True)\n        # TODO: No group_sampling currently\n\n    @staticmethod\n    def filter_by_difficulty(db_infos, removed_difficulty):\n        \"\"\"Filter ground truths by difficulties.\n\n        Args:\n            db_infos (dict): Info of groundtruth database.\n            removed_difficulty (list): Difficulties that are not qualified.\n\n        Returns:\n            dict: Info of database after filtering.\n        \"\"\"\n        new_db_infos = {}\n        for key, dinfos in db_infos.items():\n            new_db_infos[key] = [\n                info for info in dinfos\n                if info['difficulty'] not in removed_difficulty\n            ]\n        return new_db_infos\n\n    @staticmethod\n    def filter_by_min_points(db_infos, min_gt_points_dict):\n        \"\"\"Filter ground truths by number of points in the bbox.\n\n        Args:\n            db_infos (dict): Info of groundtruth database.\n            min_gt_points_dict (dict): Different number of minimum points\n                needed for different categories of ground truths.\n\n        Returns:\n            dict: Info of database after filtering.\n        \"\"\"\n        for name, min_num in min_gt_points_dict.items():\n            min_num = int(min_num)\n            if min_num > 0:\n                filtered_infos = []\n                for info in db_infos[name]:\n                    if info['num_points_in_gt'] >= min_num:\n                        filtered_infos.append(info)\n                db_infos[name] = filtered_infos\n        return db_infos\n\n    def sample_all(self, gt_bboxes, gt_labels, img=None):\n        \"\"\"Sampling all categories of bboxes.\n\n        Args:\n            gt_bboxes (np.ndarray): Ground truth bounding boxes.\n            gt_labels (np.ndarray): Ground truth labels of boxes.\n\n        Returns:\n            dict: Dict of sampled 'pseudo ground truths'.\n\n                - gt_labels_3d (np.ndarray): ground truths labels \\\n                    of sampled objects.\n                - gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): \\\n                    sampled ground truth 3D bounding boxes\n                - points (np.ndarray): sampled points\n                - group_ids (np.ndarray): ids of sampled ground truths\n        \"\"\"\n        sampled_num_dict = {}\n        sample_num_per_class = []\n        for class_name, max_sample_num in zip(self.sample_classes,\n                                              self.sample_max_nums):\n            class_label = self.cat2label[class_name]\n            # sampled_num = int(max_sample_num -\n            #                   np.sum([n == class_name for n in gt_names]))\n            sampled_num = int(max_sample_num -\n                              np.sum([n == class_label for n in gt_labels]))\n            sampled_num = np.round(self.rate * sampled_num).astype(np.int64)\n            sampled_num_dict[class_name] = sampled_num\n            sample_num_per_class.append(sampled_num)\n\n        sampled = []\n        sampled_gt_bboxes = []\n        avoid_coll_boxes = gt_bboxes\n\n        for class_name, sampled_num in zip(self.sample_classes,\n                                           sample_num_per_class):\n            if sampled_num > 0:\n                sampled_cls = self.sample_class_v2(class_name, sampled_num,\n                                                   avoid_coll_boxes)\n\n                sampled += sampled_cls\n                if len(sampled_cls) > 0:\n                    if len(sampled_cls) == 1:\n                        sampled_gt_box = sampled_cls[0]['box3d_lidar'][\n                            np.newaxis, ...]\n                    else:\n                        sampled_gt_box = np.stack(\n                            [s['box3d_lidar'] for s in sampled_cls], axis=0)\n\n                    sampled_gt_bboxes += [sampled_gt_box]\n                    avoid_coll_boxes = np.concatenate(\n                        [avoid_coll_boxes, sampled_gt_box], axis=0)\n\n        ret = None\n        if len(sampled) > 0:\n            sampled_gt_bboxes = np.concatenate(sampled_gt_bboxes, axis=0)\n            # center = sampled_gt_bboxes[:, 0:3]\n\n            # num_sampled = len(sampled)\n            s_points_list = []\n            count = 0\n            for info in sampled:\n                file_path = os.path.join(\n                    self.data_root,\n                    info['path']) if self.data_root else info['path']\n                results = dict(pts_filename=file_path)\n                s_points = self.points_loader(results)['points']\n                s_points.translate(info['box3d_lidar'][:3])\n\n                count += 1\n\n                s_points_list.append(s_points)\n\n            gt_labels = np.array([self.cat2label[s['name']] for s in sampled],\n                                 dtype=np.long)\n            ret = {\n                'gt_labels_3d':\n                gt_labels,\n                'gt_bboxes_3d':\n                sampled_gt_bboxes,\n                'points':\n                s_points_list[0].cat(s_points_list),\n                'group_ids':\n                np.arange(gt_bboxes.shape[0],\n                          gt_bboxes.shape[0] + len(sampled))\n            }\n\n        return ret\n\n    def sample_class_v2(self, name, num, gt_bboxes):\n        \"\"\"Sampling specific categories of bounding boxes.\n\n        Args:\n            name (str): Class of objects to be sampled.\n            num (int): Number of sampled bboxes.\n            gt_bboxes (np.ndarray): Ground truth boxes.\n\n        Returns:\n            list[dict]: Valid samples after collision test.\n        \"\"\"\n        sampled = self.sampler_dict[name].sample(num)\n        sampled = copy.deepcopy(sampled)\n        num_gt = gt_bboxes.shape[0]\n        num_sampled = len(sampled)\n        gt_bboxes_bv = box_np_ops.center_to_corner_box2d(\n            gt_bboxes[:, 0:2], gt_bboxes[:, 3:5], gt_bboxes[:, 6])\n\n        sp_boxes = np.stack([i['box3d_lidar'] for i in sampled], axis=0)\n        boxes = np.concatenate([gt_bboxes, sp_boxes], axis=0).copy()\n\n        sp_boxes_new = boxes[gt_bboxes.shape[0]:]\n        sp_boxes_bv = box_np_ops.center_to_corner_box2d(\n            sp_boxes_new[:, 0:2], sp_boxes_new[:, 3:5], sp_boxes_new[:, 6])\n\n        total_bv = np.concatenate([gt_bboxes_bv, sp_boxes_bv], axis=0)\n        coll_mat = data_augment_utils.box_collision_test(total_bv, total_bv)\n        diag = np.arange(total_bv.shape[0])\n        coll_mat[diag, diag] = False\n\n        valid_samples = []\n        for i in range(num_gt, num_gt + num_sampled):\n            if coll_mat[i].any():\n                coll_mat[i] = False\n                coll_mat[:, i] = False\n            else:\n                valid_samples.append(sampled[i - num_gt])\n        return valid_samples\n"
  },
  {
    "path": "mmdet3d/datasets/pipelines/formating.py",
    "content": "import numpy as np\nfrom mmcv.parallel import DataContainer as DC\n\nfrom mmdet3d.core.bbox import BaseInstance3DBoxes\nfrom mmdet3d.core.points import BasePoints\nfrom mmdet.datasets.builder import PIPELINES\nfrom mmdet.datasets.pipelines import to_tensor\n\nPIPELINES._module_dict.pop('DefaultFormatBundle')\n\n\n@PIPELINES.register_module()\nclass DefaultFormatBundle(object):\n    \"\"\"Default formatting bundle.\n\n    It simplifies the pipeline of formatting common fields, including \"img\",\n    \"proposals\", \"gt_bboxes\", \"gt_labels\", \"gt_masks\" and \"gt_semantic_seg\".\n    These fields are formatted as follows.\n\n    - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True)\n    - proposals: (1)to tensor, (2)to DataContainer\n    - gt_bboxes: (1)to tensor, (2)to DataContainer\n    - gt_bboxes_ignore: (1)to tensor, (2)to DataContainer\n    - gt_labels: (1)to tensor, (2)to DataContainer\n    - gt_masks: (1)to tensor, (2)to DataContainer (cpu_only=True)\n    - gt_semantic_seg: (1)unsqueeze dim-0 (2)to tensor, \\\n                       (3)to DataContainer (stack=True)\n    \"\"\"\n\n    def __init__(self, ):\n        return\n\n    def __call__(self, results):\n        \"\"\"Call function to transform and format common fields in results.\n\n        Args:\n            results (dict): Result dict contains the data to convert.\n\n        Returns:\n            dict: The result dict contains the data that is formatted with\n                default bundle.\n        \"\"\"\n        if 'img' in results:\n            if isinstance(results['img'], list):\n                # process multiple imgs in single frame\n                imgs = [img.transpose(2, 0, 1) for img in results['img']]\n                imgs = np.ascontiguousarray(np.stack(imgs, axis=0))\n                results['img'] = DC(to_tensor(imgs), stack=True)\n            else:\n                img = np.ascontiguousarray(results['img'].transpose(2, 0, 1))\n                results['img'] = DC(to_tensor(img), stack=True)\n        for key in [\n                'proposals', 'gt_bboxes', 'gt_bboxes_ignore', 'gt_labels',\n                'gt_labels_3d', 'pts_instance_mask', 'pts_semantic_mask',\n                'gt_pts_centers_view', 'gt_img_centers_view', 'gt_visible_3d'\n        ]:\n            if key not in results:\n                continue\n            if isinstance(results[key], list):\n                results[key] = DC([to_tensor(res) for res in results[key]])\n            else:\n                results[key] = DC(to_tensor(results[key]))\n        if 'gt_bboxes_3d' in results:\n            if isinstance(results['gt_bboxes_3d'], BaseInstance3DBoxes):\n                results['gt_bboxes_3d'] = DC(\n                    results['gt_bboxes_3d'], cpu_only=True)\n            else:\n                results['gt_bboxes_3d'] = DC(\n                    to_tensor(results['gt_bboxes_3d']))\n\n        if 'gt_bboxes_cam_view' in results:\n            if isinstance(results['gt_bboxes_cam_view'], BaseInstance3DBoxes):\n                results['gt_bboxes_cam_view'] = DC(\n                    results['gt_bboxes_cam_view'], cpu_only=True)\n            else:\n                results['gt_bboxes_cam_view'] = DC(\n                    to_tensor(results['gt_bboxes_cam_view']))\n\n        if 'gt_bboxes_lidar_view' in results:\n            if isinstance(results['gt_bboxes_lidar_view'], BaseInstance3DBoxes):\n                results['gt_bboxes_lidar_view'] = DC(\n                    results['gt_bboxes_lidar_view'], cpu_only=True)\n            else:\n                results['gt_bboxes_lidar_view'] = DC(\n                    to_tensor(results['gt_bboxes_lidar_view']))\n\n        if 'gt_masks' in results:\n            results['gt_masks'] = DC(results['gt_masks'], cpu_only=True)\n        if 'gt_semantic_seg' in results:\n            results['gt_semantic_seg'] = DC(\n                to_tensor(results['gt_semantic_seg'][None, ...]), stack=True)\n\n        return results\n\n    def __repr__(self):\n        return self.__class__.__name__\n\n\n@PIPELINES.register_module()\nclass Collect3D(object):\n    \"\"\"Collect data from the loader relevant to the specific task.\n\n    This is usually the last stage of the data loader pipeline. Typically keys\n    is set to some subset of \"img\", \"proposals\", \"gt_bboxes\",\n    \"gt_bboxes_ignore\", \"gt_labels\", and/or \"gt_masks\".\n\n    The \"img_meta\" item is always populated.  The contents of the \"img_meta\"\n    dictionary depends on \"meta_keys\". By default this includes:\n\n        - 'img_shape': shape of the image input to the network as a tuple \\\n            (h, w, c).  Note that images may be zero padded on the \\\n            bottom/right if the batch tensor is larger than this shape.\n        - 'scale_factor': a float indicating the preprocessing scale\n        - 'flip': a boolean indicating if image flip transform was used\n        - 'filename': path to the image file\n        - 'ori_shape': original shape of the image as a tuple (h, w, c)\n        - 'pad_shape': image shape after padding\n        - 'lidar2img': transform from lidar to image\n        - 'pcd_horizontal_flip': a boolean indicating if point cloud is \\\n            flipped horizontally\n        - 'pcd_vertical_flip': a boolean indicating if point cloud is \\\n            flipped vertically\n        - 'box_mode_3d': 3D box mode\n        - 'box_type_3d': 3D box type\n        - 'img_norm_cfg': a dict of normalization information:\n\n            - mean: per channel mean subtraction\n            - std: per channel std divisor\n            - to_rgb: bool indicating if bgr was converted to rgb\n        - 'rect': rectification matrix\n        - 'Trv2c': transformation from velodyne to camera coordinate\n        - 'P2': transformation betweeen cameras\n        - 'pcd_trans': point cloud transformations\n        - 'sample_idx': sample index\n        - 'pcd_scale_factor': point cloud scale factor\n        - 'pcd_rotation': rotation applied to point cloud\n        - 'pts_filename': path to point cloud file.\n\n    Args:\n        keys (Sequence[str]): Keys of results to be collected in ``data``.\n        meta_keys (Sequence[str], optional): Meta keys to be converted to\n            ``mmcv.DataContainer`` and collected in ``data[img_metas]``.\n            Default: ('filename', 'ori_shape', 'img_shape', 'lidar2img', \\\n            'pad_shape', 'scale_factor', 'flip', 'pcd_horizontal_flip', \\\n            'pcd_vertical_flip', 'box_mode_3d', 'box_type_3d', \\\n            'img_norm_cfg', 'rect', 'Trv2c', 'P2', 'pcd_trans', \\\n            'sample_idx', 'pcd_scale_factor', 'pcd_rotation', 'pts_filename')\n    \"\"\"\n\n    def __init__(self,\n                 keys,\n                 meta_keys=('filename', 'ori_shape', 'img_shape', 'lidar2img',\n                            'pad_shape', 'scale_factor', 'flip', 'image_flip',\n                            'pcd_horizontal_flip', 'pcd_vertical_flip',\n                            'box_mode_3d', 'box_type_3d', 'img_norm_cfg',\n                            'rect', 'Trv2c', 'P2', 'pcd_trans', 'sample_idx',\n                            'pcd_scale_factor', 'pcd_rotation', 'pts_filename',\n                            'transformation_3d_flow', 'cam_intrinsic', 'lidar2cam_r',\n                            'lidar2cam_t', 'valid_shape', 'img_scale_ratios', 'pcd_rotation_angle')):\n        self.keys = keys\n        self.meta_keys = meta_keys\n\n    def __call__(self, results):\n        \"\"\"Call function to collect keys in results. The keys in ``meta_keys``\n        will be converted to :obj:`mmcv.DataContainer`.\n\n        Args:\n            results (dict): Result dict contains the data to collect.\n\n        Returns:\n            dict: The result dict contains the following keys\n                - keys in ``self.keys``\n                - ``img_metas``\n        \"\"\"\n        data = {}\n        img_metas = {}\n        for key in self.meta_keys:\n            if key in results:\n                img_metas[key] = results[key]\n\n        data['img_metas'] = DC(img_metas, cpu_only=True)\n        for key in self.keys:\n            data[key] = results[key]\n        return data\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        return self.__class__.__name__ + '(keys={}, meta_keys={})'.format(\n            self.keys, self.meta_keys)\n\n\n@PIPELINES.register_module()\nclass DefaultFormatBundle3D(DefaultFormatBundle):\n    \"\"\"Default formatting bundle.\n\n    It simplifies the pipeline of formatting common fields for voxels,\n    including \"proposals\", \"gt_bboxes\", \"gt_labels\", \"gt_masks\" and\n    \"gt_semantic_seg\".\n    These fields are formatted as follows.\n\n    - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True)\n    - proposals: (1)to tensor, (2)to DataContainer\n    - gt_bboxes: (1)to tensor, (2)to DataContainer\n    - gt_bboxes_ignore: (1)to tensor, (2)to DataContainer\n    - gt_labels: (1)to tensor, (2)to DataContainer\n    \"\"\"\n\n    def __init__(self, class_names, with_gt=True, with_label=True):\n        super(DefaultFormatBundle3D, self).__init__()\n        self.class_names = class_names\n        self.with_gt = with_gt\n        self.with_label = with_label\n\n    def __call__(self, results):\n        \"\"\"Call function to transform and format common fields in results.\n\n        Args:\n            results (dict): Result dict contains the data to convert.\n\n        Returns:\n            dict: The result dict contains the data that is formatted with\n                default bundle.\n        \"\"\"\n        # Format 3D data\n        if 'points' in results:\n            assert isinstance(results['points'], BasePoints)\n            results['points'] = DC(results['points'].tensor)\n\n        for key in ['voxels', 'coors', 'voxel_centers', 'num_points']:\n            if key not in results:\n                continue\n            results[key] = DC(to_tensor(results[key]), stack=False)\n\n        if self.with_gt:\n            # Clean GT bboxes in the final\n            if 'gt_bboxes_3d_mask' in results:\n                gt_bboxes_3d_mask = results['gt_bboxes_3d_mask']\n                results['gt_bboxes_3d'] = results['gt_bboxes_3d'][\n                    gt_bboxes_3d_mask]\n                if 'gt_names_3d' in results:\n                    results['gt_names_3d'] = results['gt_names_3d'][\n                        gt_bboxes_3d_mask]\n            if 'gt_bboxes_mask' in results:\n                gt_bboxes_mask = results['gt_bboxes_mask']\n                if 'gt_bboxes' in results:\n                    results['gt_bboxes'] = results['gt_bboxes'][gt_bboxes_mask]\n                results['gt_names'] = results['gt_names'][gt_bboxes_mask]\n            if self.with_label:\n                if 'gt_names' in results and len(results['gt_names']) == 0:\n                    results['gt_labels'] = np.array([], dtype=np.int64)\n                elif 'gt_names' in results and isinstance(\n                        results['gt_names'][0], list):\n                    # gt_labels might be a list of list in multi-view setting\n                    results['gt_labels'] = [\n                        np.array([self.class_names.index(n) for n in res],\n                                 dtype=np.int64) for res in results['gt_names']\n                    ]\n                elif 'gt_names' in results:\n                    results['gt_labels'] = np.array([\n                        self.class_names.index(n) for n in results['gt_names']\n                    ],\n                                                    dtype=np.int64)\n                # we still assume one pipeline for one frame LiDAR\n                # thus, the 3D name is list[string]\n                if 'gt_names_3d' in results:\n                    results['gt_labels_3d'] = np.array([\n                        self.class_names.index(n)\n                        for n in results['gt_names_3d']\n                    ],\n                                                       dtype=np.int64)\n        results = super(DefaultFormatBundle3D, self).__call__(results)\n        return results\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += '(class_names={}, '.format(self.class_names)\n        repr_str += 'with_gt={}, with_label={})'.format(\n            self.with_gt, self.with_label)\n        return repr_str\n"
  },
  {
    "path": "mmdet3d/datasets/pipelines/loading.py",
    "content": "import mmcv\nimport numpy as np\nimport torch\nimport cv2\nimport copy\n\nfrom mmdet3d.core.points import BasePoints, get_points_type\nfrom mmdet.datasets.builder import PIPELINES\nfrom mmdet.datasets.pipelines import LoadAnnotations\n\n\n@PIPELINES.register_module()\nclass MyResize(object):\n    \"\"\"Resize images & bbox & mask.\n\n    This transform resizes the input image to some scale. Bboxes and masks are\n    then resized with the same scale factor. If the input dict contains the key\n    \"scale\", then the scale in the input dict is used, otherwise the specified\n    scale in the init method is used. If the input dict contains the key\n    \"scale_factor\" (if MultiScaleFlipAug does not give img_scale but\n    scale_factor), the actual scale will be computed by image shape and\n    scale_factor.\n\n    `img_scale` can either be a tuple (single-scale) or a list of tuple\n    (multi-scale). There are 3 multiscale modes:\n\n    - ``ratio_range is not None``: randomly sample a ratio from the ratio \\\n      range and multiply it with the image scale.\n    - ``ratio_range is None`` and ``multiscale_mode == \"range\"``: randomly \\\n      sample a scale from the multiscale range.\n    - ``ratio_range is None`` and ``multiscale_mode == \"value\"``: randomly \\\n      sample a scale from multiple scales.\n\n    Args:\n        img_scale (tuple or list[tuple]): Images scales for resizing.\n        multiscale_mode (str): Either \"range\" or \"value\".\n        ratio_range (tuple[float]): (min_ratio, max_ratio)\n        keep_ratio (bool): Whether to keep the aspect ratio when resizing the\n            image.\n        bbox_clip_border (bool, optional): Whether clip the objects outside\n            the border of the image. Defaults to True.\n        backend (str): Image resize backend, choices are 'cv2' and 'pillow'.\n            These two backends generates slightly different results. Defaults\n            to 'cv2'.\n        override (bool, optional): Whether to override `scale` and\n            `scale_factor` so as to call resize twice. Default False. If True,\n            after the first resizing, the existed `scale` and `scale_factor`\n            will be ignored so the second resizing can be allowed.\n            This option is a work-around for multiple times of resize in DETR.\n            Defaults to False.\n    \"\"\"\n\n    def __init__(self,\n                 img_scale=None,\n                 multiscale_mode='range',\n                 ratio_range=None,\n                 keep_ratio=True,\n                 bbox_clip_border=True,\n                 backend='cv2',\n                 override=False):\n        if img_scale is None:\n            self.img_scale = None\n        else:\n            if isinstance(img_scale, list):\n                self.img_scale = img_scale\n            else:\n                self.img_scale = [img_scale]\n            assert mmcv.is_list_of(self.img_scale, tuple)\n\n        if ratio_range is not None:\n            # mode 1: given a scale and a range of image ratio\n            assert len(self.img_scale) == 1\n        else:\n            # mode 2: given multiple scales or a range of scales\n            assert multiscale_mode in ['value', 'range']\n\n        self.backend = backend\n        self.multiscale_mode = multiscale_mode\n        self.ratio_range = ratio_range\n        self.keep_ratio = keep_ratio\n        # TODO: refactor the override option in Resize\n        self.override = override\n        self.bbox_clip_border = bbox_clip_border\n\n    @staticmethod\n    def random_select(img_scales):\n        \"\"\"Randomly select an img_scale from given candidates.\n\n        Args:\n            img_scales (list[tuple]): Images scales for selection.\n\n        Returns:\n            (tuple, int): Returns a tuple ``(img_scale, scale_dix)``, \\\n                where ``img_scale`` is the selected image scale and \\\n                ``scale_idx`` is the selected index in the given candidates.\n        \"\"\"\n\n        assert mmcv.is_list_of(img_scales, tuple)\n        scale_idx = np.random.randint(len(img_scales))\n        img_scale = img_scales[scale_idx]\n        return img_scale, scale_idx\n\n    @staticmethod\n    def random_sample(img_scales):\n        \"\"\"Randomly sample an img_scale when ``multiscale_mode=='range'``.\n\n        Args:\n            img_scales (list[tuple]): Images scale range for sampling.\n                There must be two tuples in img_scales, which specify the lower\n                and uper bound of image scales.\n\n        Returns:\n            (tuple, None): Returns a tuple ``(img_scale, None)``, where \\\n                ``img_scale`` is sampled scale and None is just a placeholder \\\n                to be consistent with :func:`random_select`.\n        \"\"\"\n\n        assert mmcv.is_list_of(img_scales, tuple) and len(img_scales) == 2\n        img_scale_long = [max(s) for s in img_scales]\n        img_scale_short = [min(s) for s in img_scales]\n        long_edge = np.random.randint(\n            min(img_scale_long),\n            max(img_scale_long) + 1)\n        short_edge = np.random.randint(\n            min(img_scale_short),\n            max(img_scale_short) + 1)\n        img_scale = (long_edge, short_edge)\n        return img_scale, None\n\n    @staticmethod\n    def random_sample_ratio(img_scale, ratio_range):\n        \"\"\"Randomly sample an img_scale when ``ratio_range`` is specified.\n\n        A ratio will be randomly sampled from the range specified by\n        ``ratio_range``. Then it would be multiplied with ``img_scale`` to\n        generate sampled scale.\n\n        Args:\n            img_scale (tuple): Images scale base to multiply with ratio.\n            ratio_range (tuple[float]): The minimum and maximum ratio to scale\n                the ``img_scale``.\n\n        Returns:\n            (tuple, None): Returns a tuple ``(scale, None)``, where \\\n                ``scale`` is sampled ratio multiplied with ``img_scale`` and \\\n                None is just a placeholder to be consistent with \\\n                :func:`random_select`.\n        \"\"\"\n\n        assert isinstance(img_scale, tuple) and len(img_scale) == 2\n        min_ratio, max_ratio = ratio_range\n        assert min_ratio <= max_ratio\n        ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio\n        scale = int(img_scale[0] * ratio), int(img_scale[1] * ratio)\n        return scale, None\n\n    def _random_scale(self, results):\n        \"\"\"Randomly sample an img_scale according to ``ratio_range`` and\n        ``multiscale_mode``.\n\n        If ``ratio_range`` is specified, a ratio will be sampled and be\n        multiplied with ``img_scale``.\n        If multiple scales are specified by ``img_scale``, a scale will be\n        sampled according to ``multiscale_mode``.\n        Otherwise, single scale will be used.\n\n        Args:\n            results (dict): Result dict from :obj:`dataset`.\n\n        Returns:\n            dict: Two new keys 'scale` and 'scale_idx` are added into \\\n                ``results``, which would be used by subsequent pipelines.\n        \"\"\"\n\n        if self.ratio_range is not None:\n            scale, scale_idx = self.random_sample_ratio(\n                self.img_scale[0], self.ratio_range)\n        elif len(self.img_scale) == 1:\n            scale, scale_idx = self.img_scale[0], 0\n        elif self.multiscale_mode == 'range':\n            scale, scale_idx = self.random_sample(self.img_scale)\n        elif self.multiscale_mode == 'value':\n            scale, scale_idx = self.random_select(self.img_scale)\n        else:\n            raise NotImplementedError\n\n        results['scale'] = scale\n        results['scale_idx'] = scale_idx\n\n    def _resize_img(self, results):\n        \"\"\"Resize images with ``results['scale']``.\"\"\"\n        imgs = results['img']\n        results['img'] = [imgs[i] for i in range(len(imgs))]\n        for key in results.get('img_fields', ['img']):\n            for idx in range(len(results['img'])):\n                if self.keep_ratio:\n                    img, scale_factor = mmcv.imrescale(\n                        results[key][idx],\n                        results['scale'],\n                        return_scale=True,\n                        backend=self.backend)\n                    # the w_scale and h_scale has minor difference\n                    # a real fix should be done in the mmcv.imrescale in the future\n                    new_h, new_w = img.shape[:2]\n                    h, w = results[key][idx].shape[:2]\n                    w_scale = new_w / w\n                    h_scale = new_h / h\n                else:\n                    img, w_scale, h_scale = mmcv.imresize(\n                        results[key][idx],\n                        results['scale'],\n                        return_scale=True,\n                        backend=self.backend)\n                results[key][idx] = img\n\n            scale_factor = np.array([w_scale, h_scale, w_scale, h_scale],\n                                    dtype=np.float32)\n            results['img_shape'] = img.shape\n            # in case that there is no padding\n            results['pad_shape'] = img.shape\n            results['scale_factor'] = scale_factor\n            results['keep_ratio'] = self.keep_ratio\n            if 'valid_shape' in results:\n                scaling = np.array([[w_scale, h_scale]])\n                results['valid_shape'] = results['valid_shape'] * scaling\n\n    def _resize_bboxes(self, results):\n        \"\"\"Resize bounding boxes with ``results['scale_factor']``.\"\"\"\n        for key in results.get('bbox_fields', []):\n            bboxes = results[key] * results['scale_factor']\n            if self.bbox_clip_border:\n                img_shape = results['img_shape']\n                bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, img_shape[1])\n                bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, img_shape[0])\n            results[key] = bboxes\n\n    def _resize_centers(self, results):\n        centers = results['gt_img_centers_view']\n        centers[:, :2] = centers[:, :2] * results['scale_factor'][:2]\n        img_shape = results['img_shape']\n        centers[:, 0] = np.clip(centers[:, 0], 0, img_shape[1])\n        centers[:, 1] = np.clip(centers[:, 1], 0, img_shape[0])\n        results['gt_img_centers_view'] = centers\n\n    def _resize_masks(self, results):\n        \"\"\"Resize masks with ``results['scale']``\"\"\"\n        for key in results.get('mask_fields', []):\n            if results[key] is None:\n                continue\n            if self.keep_ratio:\n                results[key] = results[key].rescale(results['scale'])\n            else:\n                results[key] = results[key].resize(results['img_shape'][:2])\n\n    def _resize_seg(self, results):\n        \"\"\"Resize semantic segmentation map with ``results['scale']``.\"\"\"\n        for key in results.get('seg_fields', []):\n            if self.keep_ratio:\n                gt_seg = mmcv.imrescale(\n                    results[key],\n                    results['scale'],\n                    interpolation='nearest',\n                    backend=self.backend)\n            else:\n                gt_seg = mmcv.imresize(\n                    results[key],\n                    results['scale'],\n                    interpolation='nearest',\n                    backend=self.backend)\n            results['gt_semantic_seg'] = gt_seg\n\n    def _resize_camera(self, results):\n        scale_factor = results['scale_factor']\n        w_scale = scale_factor[0]\n        h_scale = scale_factor[1]\n        scaling_matrix = np.array([\n            [w_scale, 0, 0],\n            [0, h_scale, 0],\n            [0, 0, 1]\n        ])\n        for i in range(len(results['cam_intrinsic'])):\n            results['cam_intrinsic'][i] = scaling_matrix @ results['cam_intrinsic'][i]\n\n    def __call__(self, results):\n        \"\"\"Call function to resize images, bounding boxes, masks, semantic\n        segmentation map.\n\n        Args:\n            results (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Resized results, 'img_shape', 'pad_shape', 'scale_factor', \\\n                'keep_ratio' keys are added into result dict.\n        \"\"\"\n\n        if 'scale' not in results:\n            if 'scale_factor' in results:\n                img_shape = results['img'][0].shape[:2]\n                scale_factor = results['scale_factor']\n                assert isinstance(scale_factor, float)\n                results['scale'] = tuple(\n                    [int(x * scale_factor) for x in img_shape][::-1])\n            else:\n                self._random_scale(results)\n        else:\n            if not self.override:\n                assert 'scale_factor' not in results, (\n                    'scale and scale_factor cannot be both set.')\n            else:\n                results.pop('scale')\n                if 'scale_factor' in results:\n                    results.pop('scale_factor')\n                self._random_scale(results)\n\n        self._resize_img(results)\n        self._resize_bboxes(results)\n        self._resize_masks(results)\n        self._resize_seg(results)\n        if 'gt_img_centers_view' in results:\n            self._resize_centers(results)\n\n        if 'cam_intrinsic' in results:\n            self._resize_camera(results)\n\n        return results\n\n    def __repr__(self):\n        repr_str = self.__class__.__name__\n        repr_str += f'(img_scale={self.img_scale}, '\n        repr_str += f'multiscale_mode={self.multiscale_mode}, '\n        repr_str += f'ratio_range={self.ratio_range}, '\n        repr_str += f'keep_ratio={self.keep_ratio}, '\n        repr_str += f'bbox_clip_border={self.bbox_clip_border})'\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass MyNormalize(object):\n    \"\"\"Normalize the image.\n\n    Added key is \"img_norm_cfg\".\n\n    Args:\n        mean (sequence): Mean values of 3 channels.\n        std (sequence): Std values of 3 channels.\n        to_rgb (bool): Whether to convert the image from BGR to RGB,\n            default is true.\n    \"\"\"\n\n    def __init__(self, mean, std, to_rgb=True):\n        self.mean = np.array(mean, dtype=np.float32)\n        self.std = np.array(std, dtype=np.float32)\n        self.to_rgb = to_rgb\n\n    def __call__(self, results):\n        \"\"\"Call function to normalize images.\n\n        Args:\n            results (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Normalized results, 'img_norm_cfg' key is added into\n                result dict.\n        \"\"\"\n        for key in results.get('img_fields', ['img']):\n            for idx in range(len(results['img'])):\n                results[key][idx] = mmcv.imnormalize(results[key][idx], self.mean, self.std,\n                                                     self.to_rgb)\n        results['img_norm_cfg'] = dict(\n            mean=self.mean, std=self.std, to_rgb=self.to_rgb)\n        return results\n\n    def __repr__(self):\n        repr_str = self.__class__.__name__\n        repr_str += f'(mean={self.mean}, std={self.std}, to_rgb={self.to_rgb})'\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass MyPad(object):\n    \"\"\"Pad the image & mask.\n\n    There are two padding modes: (1) pad to a fixed size and (2) pad to the\n    minimum size that is divisible by some number.\n    Added keys are \"pad_shape\", \"pad_fixed_size\", \"pad_size_divisor\",\n\n    Args:\n        size (tuple, optional): Fixed padding size.\n        size_divisor (int, optional): The divisor of padded size.\n        pad_val (float, optional): Padding value, 0 by default.\n    \"\"\"\n\n    def __init__(self, size=None, size_divisor=None, pad_val=0):\n        self.size = size\n        self.size_divisor = size_divisor\n        self.pad_val = pad_val\n        # only one of size and size_divisor should be valid\n        assert size is not None or size_divisor is not None\n        assert size is None or size_divisor is None\n\n    def _pad_img(self, results):\n        \"\"\"Pad images according to ``self.size``.\"\"\"\n        for key in results.get('img_fields', ['img']):\n            if self.size is not None:\n                padded_img = mmcv.impad(\n                    results[key], shape=self.size, pad_val=self.pad_val)\n            elif self.size_divisor is not None:\n                for idx in range(len(results[key])):\n                    padded_img = mmcv.impad_to_multiple(\n                        results[key][idx], self.size_divisor, pad_val=self.pad_val)\n                    results[key][idx] = padded_img\n        results['pad_shape'] = padded_img.shape\n        results['pad_fixed_size'] = self.size\n        results['pad_size_divisor'] = self.size_divisor\n\n    def _pad_masks(self, results):\n        \"\"\"Pad masks according to ``results['pad_shape']``.\"\"\"\n        pad_shape = results['pad_shape'][:2]\n        for key in results.get('mask_fields', []):\n            results[key] = results[key].pad(pad_shape, pad_val=self.pad_val)\n\n    def _pad_seg(self, results):\n        \"\"\"Pad semantic segmentation map according to\n        ``results['pad_shape']``.\"\"\"\n        for key in results.get('seg_fields', []):\n            results[key] = mmcv.impad(\n                results[key], shape=results['pad_shape'][:2])\n\n    def __call__(self, results):\n        \"\"\"Call function to pad images, masks, semantic segmentation maps.\n\n        Args:\n            results (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Updated result dict.\n        \"\"\"\n        self._pad_img(results)\n        self._pad_masks(results)\n        self._pad_seg(results)\n        return results\n\n    def __repr__(self):\n        repr_str = self.__class__.__name__\n        repr_str += f'(size={self.size}, '\n        repr_str += f'size_divisor={self.size_divisor}, '\n        repr_str += f'pad_val={self.pad_val})'\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass LoadMultiViewImageFromFiles(object):\n    \"\"\"Load multi channel images from a list of separate channel files.\n\n    Expects results['img_filename'] to be a list of filenames.\n\n    Args:\n        to_float32 (bool): Whether to convert the img to float32.\n            Defaults to False.\n        color_type (str): Color type of the file. Defaults to 'unchanged'.\n    \"\"\"\n\n    def __init__(self, to_float32=False, img_scale=None, color_type='unchanged'):\n        self.to_float32 = to_float32\n        self.img_scale = img_scale\n        self.color_type = color_type\n\n    def pad(self, img):\n        # to pad the 5 input images into a same size (for Waymo)\n        if img.shape[0] != self.img_scale[0]:\n            img = np.concatenate([img, np.zeros_like(img[0:1280-886,:])], axis=0)\n        return img\n\n    def __call__(self, results):\n        \"\"\"Call function to load multi-view image from files.\n\n        Args:\n            results (dict): Result dict containing multi-view image filenames.\n\n        Returns:\n            dict: The result dict containing the multi-view image data. \\\n                Added keys and values are described below.\n\n                - filename (str): Multi-view image filenames.\n                - img (np.ndarray): Multi-view image arrays.\n                - img_shape (tuple[int]): Shape of multi-view image arrays.\n                - ori_shape (tuple[int]): Shape of original image arrays.\n                - pad_shape (tuple[int]): Shape of padded image arrays.\n                - scale_factor (float): Scale factor.\n                - img_norm_cfg (dict): Normalization configuration of images.\n        \"\"\"\n        filename = results['img_filename']\n        if self.img_scale is None:\n            img = np.stack(\n                [mmcv.imread(name, self.color_type) for name in filename], axis=-1)\n        else:\n            img = np.stack(\n                [self.pad(mmcv.imread(name, self.color_type)) for name in filename], axis=-1)\n        if self.to_float32:\n            img = img.astype(np.float32)\n        results['filename'] = filename\n        # unravel to list, see `DefaultFormatBundle` in formating.py\n        # which will transpose each image separately and then stack into array\n        results['img'] = [img[..., i] for i in range(img.shape[-1])]\n        results['img_shape'] = img.shape\n        results['ori_shape'] = img.shape\n        # Set initial values for default meta_keys\n        results['pad_shape'] = img.shape\n        # results['scale_factor'] = [1.0, 1.0]\n        num_channels = 1 if len(img.shape) < 3 else img.shape[2]\n        results['img_norm_cfg'] = dict(\n            mean=np.zeros(num_channels, dtype=np.float32),\n            std=np.ones(num_channels, dtype=np.float32),\n            to_rgb=False)\n        results['img_fields'] = ['img']\n        return results\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        return \"{} (to_float32={}, color_type='{}')\".format(\n            self.__class__.__name__, self.to_float32, self.color_type)\n\n\n@PIPELINES.register_module()\nclass LoadPointsFromMultiSweeps(object):\n    \"\"\"Load points from multiple sweeps.\n\n    This is usually used for nuScenes dataset to utilize previous sweeps.\n\n    Args:\n        sweeps_num (int): Number of sweeps. Defaults to 10.\n        load_dim (int): Dimension number of the loaded points. Defaults to 5.\n        use_dim (list[int]): Which dimension to use. Defaults to [0, 1, 2, 4].\n        file_client_args (dict): Config dict of file clients, refer to\n            https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py\n            for more details. Defaults to dict(backend='disk').\n        pad_empty_sweeps (bool): Whether to repeat keyframe when\n            sweeps is empty. Defaults to False.\n        remove_close (bool): Whether to remove close points.\n            Defaults to False.\n        test_mode (bool): If test_model=True used for testing, it will not\n            randomly sample sweeps but select the nearest N frames.\n            Defaults to False.\n    \"\"\"\n\n    def __init__(self,\n                 sweeps_num=10,\n                 load_dim=5,\n                 use_dim=[0, 1, 2, 4],\n                 file_client_args=dict(backend='disk'),\n                 pad_empty_sweeps=False,\n                 remove_close=False,\n                 test_mode=False):\n        self.load_dim = load_dim\n        self.sweeps_num = sweeps_num\n        self.use_dim = use_dim\n        self.file_client_args = file_client_args.copy()\n        self.file_client = None\n        self.pad_empty_sweeps = pad_empty_sweeps\n        self.remove_close = remove_close\n        self.test_mode = test_mode\n\n    def _load_points(self, pts_filename):\n        \"\"\"Private function to load point clouds data.\n\n        Args:\n            pts_filename (str): Filename of point clouds data.\n\n        Returns:\n            np.ndarray: An array containing point clouds data.\n        \"\"\"\n        if self.file_client is None:\n            self.file_client = mmcv.FileClient(**self.file_client_args)\n        try:\n            pts_bytes = self.file_client.get(pts_filename)\n            points = np.frombuffer(pts_bytes, dtype=np.float32)\n        except ConnectionError:\n            mmcv.check_file_exist(pts_filename)\n            if pts_filename.endswith('.npy'):\n                points = np.load(pts_filename)\n            else:\n                points = np.fromfile(pts_filename, dtype=np.float32)\n        return points\n\n    def _remove_close(self, points, radius=1.0):\n        \"\"\"Removes point too close within a certain radius from origin.\n\n        Args:\n            points (np.ndarray): Sweep points.\n            radius (float): Radius below which points are removed.\n                Defaults to 1.0.\n\n        Returns:\n            np.ndarray: Points after removing.\n        \"\"\"\n        if isinstance(points, np.ndarray):\n            points_numpy = points\n        elif isinstance(points, BasePoints):\n            points_numpy = points.tensor.numpy()\n        else:\n            raise NotImplementedError\n        x_filt = np.abs(points_numpy[:, 0]) < radius\n        y_filt = np.abs(points_numpy[:, 1]) < radius\n        not_close = np.logical_not(np.logical_and(x_filt, y_filt))\n        return points[not_close]\n\n    def __call__(self, results):\n        \"\"\"Call function to load multi-sweep point clouds from files.\n\n        Args:\n            results (dict): Result dict containing multi-sweep point cloud \\\n                filenames.\n\n        Returns:\n            dict: The result dict containing the multi-sweep points data. \\\n                Added key and value are described below.\n\n                - points (np.ndarray): Multi-sweep point cloud arrays.\n        \"\"\"\n        points = results['points']\n        points.tensor[:, 4] = 0\n        sweep_points_list = [points]\n        ts = results['timestamp']\n        if self.pad_empty_sweeps and len(results['sweeps']) == 0:\n            for i in range(self.sweeps_num):\n                if self.remove_close:\n                    sweep_points_list.append(self._remove_close(points))\n                else:\n                    sweep_points_list.append(points)\n        else:\n            if len(results['sweeps']) <= self.sweeps_num:\n                choices = np.arange(len(results['sweeps']))\n            elif self.test_mode:\n                choices = np.arange(self.sweeps_num)\n            else:\n                choices = np.random.choice(\n                    len(results['sweeps']), self.sweeps_num, replace=False)\n            for idx in choices:\n                sweep = results['sweeps'][idx]\n                points_sweep = self._load_points(sweep['data_path'])\n                points_sweep = np.copy(points_sweep).reshape(-1, self.load_dim)\n                if self.remove_close:\n                    points_sweep = self._remove_close(points_sweep)\n                sweep_ts = sweep['timestamp'] / 1e6\n                points_sweep[:, :3] = points_sweep[:, :3] @ sweep[\n                    'sensor2lidar_rotation'].T\n                points_sweep[:, :3] += sweep['sensor2lidar_translation']\n                points_sweep[:, 4] = ts - sweep_ts\n                points_sweep = points.new_point(points_sweep)\n                sweep_points_list.append(points_sweep)\n\n        points = points.cat(sweep_points_list)\n        points = points[:, self.use_dim]\n        results['points'] = points\n        return results\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        return f'{self.__class__.__name__}(sweeps_num={self.sweeps_num})'\n\n\n@PIPELINES.register_module()\nclass PointSegClassMapping(object):\n    \"\"\"Map original semantic class to valid category ids.\n\n    Map valid classes as 0~len(valid_cat_ids)-1 and\n    others as len(valid_cat_ids).\n\n    Args:\n        valid_cat_ids (tuple[int]): A tuple of valid category.\n    \"\"\"\n\n    def __init__(self, valid_cat_ids):\n        self.valid_cat_ids = valid_cat_ids\n\n    def __call__(self, results):\n        \"\"\"Call function to map original semantic class to valid category ids.\n\n        Args:\n            results (dict): Result dict containing point semantic masks.\n\n        Returns:\n            dict: The result dict containing the mapped category ids. \\\n                Updated key and value are described below.\n\n                - pts_semantic_mask (np.ndarray): Mapped semantic masks.\n        \"\"\"\n        assert 'pts_semantic_mask' in results\n        pts_semantic_mask = results['pts_semantic_mask']\n        neg_cls = len(self.valid_cat_ids)\n\n        for i in range(pts_semantic_mask.shape[0]):\n            if pts_semantic_mask[i] in self.valid_cat_ids:\n                converted_id = self.valid_cat_ids.index(pts_semantic_mask[i])\n                pts_semantic_mask[i] = converted_id\n            else:\n                pts_semantic_mask[i] = neg_cls\n\n        results['pts_semantic_mask'] = pts_semantic_mask\n        return results\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += '(valid_cat_ids={})'.format(self.valid_cat_ids)\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass NormalizePointsColor(object):\n    \"\"\"Normalize color of points.\n\n    Args:\n        color_mean (list[float]): Mean color of the point cloud.\n    \"\"\"\n\n    def __init__(self, color_mean):\n        self.color_mean = color_mean\n\n    def __call__(self, results):\n        \"\"\"Call function to normalize color of points.\n\n        Args:\n            results (dict): Result dict containing point clouds data.\n\n        Returns:\n            dict: The result dict containing the normalized points. \\\n                Updated key and value are described below.\n\n                - points (np.ndarray): Points after color normalization.\n        \"\"\"\n        points = results['points']\n        assert points.shape[1] >= 6, \\\n            f'Expect points have channel >=6, got {points.shape[1]}'\n        points[:, 3:6] = points[:, 3:6] - np.array(self.color_mean) / 256.0\n        results['points'] = points\n        return results\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += '(color_mean={})'.format(self.color_mean)\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass LoadPointsFromFile(object):\n    \"\"\"Load Points From File.\n\n    Load sunrgbd and scannet points from file.\n\n    Args:\n        load_dim (int): The dimension of the loaded points.\n            Defaults to 6.\n        coord_type (str): The type of coordinates of points cloud.\n            Available options includes:\n            - 'LIDAR': Points in LiDAR coordinates.\n            - 'DEPTH': Points in depth coordinates, usually for indoor dataset.\n            - 'CAMERA': Points in camera coordinates.\n        use_dim (list[int]): Which dimensions of the points to be used.\n            Defaults to [0, 1, 2]. For KITTI dataset, set use_dim=4\n            or use_dim=[0, 1, 2, 3] to use the intensity dimension.\n        shift_height (bool): Whether to use shifted height. Defaults to False.\n        file_client_args (dict): Config dict of file clients, refer to\n            https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py\n            for more details. Defaults to dict(backend='disk').\n    \"\"\"\n\n    def __init__(self,\n                 coord_type,\n                 load_dim=6,\n                 use_dim=[0, 1, 2],\n                 shift_height=False,\n                 file_client_args=dict(backend='disk')):\n        self.shift_height = shift_height\n        if isinstance(use_dim, int):\n            use_dim = list(range(use_dim))\n        assert max(use_dim) < load_dim, \\\n            f'Expect all used dimensions < {load_dim}, got {use_dim}'\n        assert coord_type in ['CAMERA', 'LIDAR', 'DEPTH']\n\n        self.coord_type = coord_type\n        self.load_dim = load_dim\n        self.use_dim = use_dim\n        self.file_client_args = file_client_args.copy()\n        self.file_client = None\n\n    def _load_points(self, pts_filename):\n        \"\"\"Private function to load point clouds data.\n\n        Args:\n            pts_filename (str): Filename of point clouds data.\n\n        Returns:\n            np.ndarray: An array containing point clouds data.\n        \"\"\"\n        if self.file_client is None:\n            self.file_client = mmcv.FileClient(**self.file_client_args)\n        try:\n            pts_bytes = self.file_client.get(pts_filename)\n            points = np.frombuffer(pts_bytes, dtype=np.float32)\n        except ConnectionError:\n            mmcv.check_file_exist(pts_filename)\n            if pts_filename.endswith('.npy'):\n                points = np.load(pts_filename)\n            else:\n                points = np.fromfile(pts_filename, dtype=np.float32)\n\n        return points\n\n    def __call__(self, results):\n        \"\"\"Call function to load points data from file.\n\n        Args:\n            results (dict): Result dict containing point clouds data.\n\n        Returns:\n            dict: The result dict containing the point clouds data. \\\n                Added key and value are described below.\n\n                - points (np.ndarray): Point clouds data.\n        \"\"\"\n        pts_filename = results['pts_filename']\n        points = self._load_points(pts_filename)\n        points = points.reshape(-1, self.load_dim)\n        points = points[:, self.use_dim]\n        attribute_dims = None\n\n        if self.shift_height:\n            floor_height = np.percentile(points[:, 2], 0.99)\n            height = points[:, 2] - floor_height\n            points = np.concatenate([points, np.expand_dims(height, 1)], 1)\n            attribute_dims = dict(height=3)\n\n        points_class = get_points_type(self.coord_type)\n        points = points_class(\n            points, points_dim=points.shape[-1], attribute_dims=attribute_dims)\n        results['points'] = points\n\n        return results\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__ + '('\n        repr_str += 'shift_height={}, '.format(self.shift_height)\n        repr_str += 'file_client_args={}), '.format(self.file_client_args)\n        repr_str += 'load_dim={}, '.format(self.load_dim)\n        repr_str += 'use_dim={})'.format(self.use_dim)\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass LoadAnnotations3D(LoadAnnotations):\n    \"\"\"Load Annotations3D.\n\n    Load instance mask and semantic mask of points and\n    encapsulate the items into related fields.\n\n    Args:\n        with_bbox_3d (bool, optional): Whether to load 3D boxes.\n            Defaults to True.\n        with_label_3d (bool, optional): Whether to load 3D labels.\n            Defaults to True.\n        with_mask_3d (bool, optional): Whether to load 3D instance masks.\n            for points. Defaults to False.\n        with_seg_3d (bool, optional): Whether to load 3D semantic masks.\n            for points. Defaults to False.\n        with_bbox (bool, optional): Whether to load 2D boxes.\n            Defaults to False.\n        with_label (bool, optional): Whether to load 2D labels.\n            Defaults to False.\n        with_mask (bool, optional): Whether to load 2D instance masks.\n            Defaults to False.\n        with_seg (bool, optional): Whether to load 2D semantic masks.\n            Defaults to False.\n        poly2mask (bool, optional): Whether to convert polygon annotations\n            to bitmasks. Defaults to True.\n        seg_3d_dtype (dtype, optional): Dtype of 3D semantic masks.\n            Defaults to int64\n        file_client_args (dict): Config dict of file clients, refer to\n            https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py\n            for more details.\n    \"\"\"\n\n    def __init__(self,\n                 with_bbox_3d=True,\n                 with_label_3d=True,\n                 with_mask_3d=False,\n                 with_seg_3d=False,\n                 with_bbox=False,\n                 with_label=False,\n                 with_mask=False,\n                 with_seg=False,\n                 poly2mask=True,\n                 seg_3d_dtype='int',\n                 file_client_args=dict(backend='disk')):\n        super().__init__(\n            with_bbox,\n            with_label,\n            with_mask,\n            with_seg,\n            poly2mask,\n            file_client_args=file_client_args)\n        self.with_bbox_3d = with_bbox_3d\n        self.with_label_3d = with_label_3d\n        self.with_mask_3d = with_mask_3d\n        self.with_seg_3d = with_seg_3d\n        self.seg_3d_dtype = seg_3d_dtype\n\n    def _load_bboxes_3d(self, results):\n        \"\"\"Private function to load 3D bounding box annotations.\n\n        Args:\n            results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.\n\n        Returns:\n            dict: The dict containing loaded 3D bounding box annotations.\n        \"\"\"\n        results['gt_bboxes_3d'] = results['ann_info']['gt_bboxes_3d']\n        results['bbox3d_fields'].append('gt_bboxes_3d')\n        return results\n\n    def _load_labels_3d(self, results):\n        \"\"\"Private function to load label annotations.\n\n        Args:\n            results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.\n\n        Returns:\n            dict: The dict containing loaded label annotations.\n        \"\"\"\n        results['gt_labels_3d'] = results['ann_info']['gt_labels_3d']\n        return results\n\n    def _load_masks_3d(self, results):\n        \"\"\"Private function to load 3D mask annotations.\n\n        Args:\n            results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.\n\n        Returns:\n            dict: The dict containing loaded 3D mask annotations.\n        \"\"\"\n        pts_instance_mask_path = results['ann_info']['pts_instance_mask_path']\n\n        if self.file_client is None:\n            self.file_client = mmcv.FileClient(**self.file_client_args)\n        try:\n            mask_bytes = self.file_client.get(pts_instance_mask_path)\n            pts_instance_mask = np.frombuffer(mask_bytes, dtype=np.int)\n        except ConnectionError:\n            mmcv.check_file_exist(pts_instance_mask_path)\n            pts_instance_mask = np.fromfile(\n                pts_instance_mask_path, dtype=np.long)\n\n        results['pts_instance_mask'] = pts_instance_mask\n        results['pts_mask_fields'].append('pts_instance_mask')\n        return results\n\n    def _load_semantic_seg_3d(self, results):\n        \"\"\"Private function to load 3D semantic segmentation annotations.\n\n        Args:\n            results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.\n\n        Returns:\n            dict: The dict containing the semantic segmentation annotations.\n        \"\"\"\n        pts_semantic_mask_path = results['ann_info']['pts_semantic_mask_path']\n\n        if self.file_client is None:\n            self.file_client = mmcv.FileClient(**self.file_client_args)\n        try:\n            mask_bytes = self.file_client.get(pts_semantic_mask_path)\n            # add .copy() to fix read-only bug\n            pts_semantic_mask = np.frombuffer(\n                mask_bytes, dtype=self.seg_3d_dtype).copy()\n        except ConnectionError:\n            mmcv.check_file_exist(pts_semantic_mask_path)\n            pts_semantic_mask = np.fromfile(\n                pts_semantic_mask_path, dtype=np.long)\n\n        results['pts_semantic_mask'] = pts_semantic_mask\n        results['pts_seg_fields'].append('pts_semantic_mask')\n        return results\n\n    def __call__(self, results):\n        \"\"\"Call function to load multiple types annotations.\n\n        Args:\n            results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.\n\n        Returns:\n            dict: The dict containing loaded 3D bounding box, label, mask and\n                semantic segmentation annotations.\n        \"\"\"\n        results = super().__call__(results)\n        if self.with_bbox_3d:\n            results = self._load_bboxes_3d(results)\n            if results is None:\n                return None\n        if self.with_label_3d:\n            results = self._load_labels_3d(results)\n        if self.with_mask_3d:\n            results = self._load_masks_3d(results)\n        if self.with_seg_3d:\n            results = self._load_semantic_seg_3d(results)\n\n        return results\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        indent_str = '    '\n        repr_str = self.__class__.__name__ + '(\\n'\n        repr_str += f'{indent_str}with_bbox_3d={self.with_bbox_3d}, '\n        repr_str += f'{indent_str}with_label_3d={self.with_label_3d}, '\n        repr_str += f'{indent_str}with_mask_3d={self.with_mask_3d}, '\n        repr_str += f'{indent_str}with_seg_3d={self.with_seg_3d}, '\n        repr_str += f'{indent_str}with_bbox={self.with_bbox}, '\n        repr_str += f'{indent_str}with_label={self.with_label}, '\n        repr_str += f'{indent_str}with_mask={self.with_mask}, '\n        repr_str += f'{indent_str}with_seg={self.with_seg}, '\n        repr_str += f'{indent_str}poly2mask={self.poly2mask})'\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass MyLoadAnnotations3D(LoadAnnotations3D):\n    def __init__(self, with_bbox_3d=True, with_label_3d=True, with_mask_3d=False, with_seg_3d=False, with_bbox=False,\n            with_label=False, with_mask=False, with_seg=False, poly2mask=True, with_centers=False, with_cam_bbox=False,\n            with_visible=False, seg_3d_dtype='int', file_client_args=dict(backend='disk')):\n        super().__init__(\n            with_bbox_3d=with_bbox_3d,\n            with_label_3d=with_label_3d,\n            with_mask_3d=with_mask_3d,\n            with_seg_3d=with_seg_3d,\n            with_bbox=with_bbox,\n            with_label=with_label,\n            with_mask=with_mask,\n            with_seg=with_seg,\n            poly2mask=poly2mask,\n            seg_3d_dtype=seg_3d_dtype,\n            file_client_args=file_client_args)\n        self.with_centers = with_centers\n        self.with_cam_bbox = with_cam_bbox\n        self.with_visible = with_visible\n\n    def __call__(self, results):\n        \"\"\"Call function to load multiple types annotations.\n\n        Args:\n            results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.\n\n        Returns:\n            dict: The dict containing loaded 3D bounding box, label, mask and\n                semantic segmentation annotations.\n        \"\"\"\n        results = super().__call__(results)\n        if self.with_centers:\n            results = self._load_centers_2d(results)\n\n        if self.with_cam_bbox:\n            results = self._load_cam_box(results)\n\n        if self.with_visible:\n            results = self._load_visible(results)\n\n        return results\n\n    def _load_centers_2d(self, results):\n        \"\"\"Private function to load label annotations.\n\n        Args:\n            results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.\n\n        Returns:\n            dict: The dict containing loaded label annotations.\n        \"\"\"\n        results['gt_pts_centers_view'] = results['ann_info']['pts_centers_view']\n        results['gt_img_centers_view'] = results['ann_info']['img_centers_view']\n\n        return results\n\n    def _load_cam_box(self, results):\n        \"\"\"Private function to load label annotations.\n\n        Args:\n            results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.\n\n        Returns:\n            dict: The dict containing loaded label annotations.\n        \"\"\"\n        results['gt_bboxes_cam_view'] = results['ann_info']['bboxes_cam_view']\n        results['gt_bboxes_lidar_view'] = results['ann_info']['bboxes_lidar_view']\n\n        return results\n\n    def _load_visible(self, results):\n        results['gt_visible_3d'] = results['ann_info']['gt_visible_3d']\n\n        return results\n\n\n@PIPELINES.register_module()\nclass SparseDepth(object):\n    \"\"\"\n    Generate a sparse depth map from the point clouds, the depth map should have the same size with image features\n    \"\"\"\n    def __init__(self, scale_factors, depth_mean=14.41, depth_var=156.89, exp_time=0):\n        self.scale_factors = scale_factors\n        self.depth_mean = depth_mean\n        self.depth_var = depth_var\n        self.exp_time = exp_time\n\n    def __call__(self, results):\n        all_points = results['points'].tensor\n        curr_mask = all_points[:, 4] == 0\n        points = all_points[curr_mask]\n        points = points[:, :3]\n        points_4d = torch.cat([points, torch.ones_like(points[:, :1])], dim=1)\n        lidar2cam_rs = results['lidar2cam_r']\n        lidar2cam_ts = results['lidar2cam_t']\n        cam_intrinsic = results['cam_intrinsic']\n\n        depth_features = []\n        for view_id in range(len(lidar2cam_rs)):\n            if 'valid_shape' in results:\n                h_shape = int(results['valid_shape'][view_id, 1])\n                w_shape = int(results['valid_shape'][view_id, 0])\n            else:\n                h_shape = results['pad_shape'][0]\n                w_shape = results['pad_shape'][1]\n\n            cam_ext = np.eye(4)\n            cam_int = np.eye(4)\n\n            cam_ext[:3, :3] = lidar2cam_rs[view_id]\n            cam_ext[:3, 3] = lidar2cam_ts[view_id]\n            cam_int[:3, :3] = cam_intrinsic[view_id]\n\n            cam_ext = torch.from_numpy(cam_ext).type_as(points_4d)\n            cam_int = torch.from_numpy(cam_int).type_as(points_4d)\n\n            points_4d_view = points_4d @ cam_ext.T\n            points_4d_view = points_4d_view @ cam_int.T\n\n            points_2d_view = points_4d_view[:, :2]\n            depth = points_4d_view[:, 2]\n            depth = torch.clamp(depth, min=1e-4)\n\n            points_2d_view[:, 0] = points_2d_view[:, 0] / depth\n            points_2d_view[:, 1] = points_2d_view[:, 1] / depth\n\n            valid_mask = (points_2d_view[:, 0] > 0) & (points_2d_view[:, 0] < w_shape-1) & \\\n                         (points_2d_view[:, 1] > 0) & (points_2d_view[:, 1] < h_shape-1)\n\n            points_2d_view = points_2d_view[valid_mask]\n            depth = depth[valid_mask]\n\n            sort_id = np.argsort(-depth)\n            points_2d_view = points_2d_view[sort_id]\n            depth = depth[sort_id]\n\n            depth_features_view = []\n            w_scale_shape = results['pad_shape'][1] // self.scale_factors[0]\n            h_scale_shape = results['pad_shape'][0] // self.scale_factors[0]\n\n            for scale in self.scale_factors:\n                w_scale_factor = 1.0 / scale\n                h_scale_factor = 1.0 / scale\n\n                scale_factor = torch.Tensor([[w_scale_factor, h_scale_factor]])\n\n                depth_feature = torch.zeros((2, h_scale_shape, w_scale_shape))\n\n                points_2d_view_scale = points_2d_view * scale_factor\n\n                cx = points_2d_view_scale[:, 0].long()\n                cy = points_2d_view_scale[:, 1].long()\n\n                depth_feature[0, cy, cx] = depth\n                depth_feature[1, cy, cx] = 1\n\n                if self.exp_time > 0:\n                    zero_inds = depth_feature[1] == 0\n                    depth_map = depth_feature[0]\n                    depth_map[zero_inds] = 9999\n                    for i in range(self.exp_time):\n                        depth_feature_new = torch.zeros_like(depth_map) + 9999\n                        depth_feature_new[1:] = torch.minimum(depth_feature_new[1:], depth_map[:-1])\n                        depth_feature_new[:-1] = torch.minimum(depth_feature_new[:-1], depth_map[1:])\n                        depth_feature_new[:, 1:] = torch.minimum(depth_feature_new[:, 1:], depth_map[:, :-1])\n                        depth_feature_new[:, :-1] = torch.minimum(depth_feature_new[:, :-1], depth_map[:, 1:])\n\n                        depth_map = torch.where(zero_inds, depth_feature_new, depth_map)\n                        zero_inds = depth_map == 9999\n                    depth_map[zero_inds] = 0\n                    depth_feature[0] = depth_map\n                    depth_feature[1, torch.logical_not(zero_inds)] = 1\n\n                depth_features_view.append(depth_feature)\n            depth_features_view = torch.stack(depth_features_view, dim=0)  # [num_scale, 2, h_scale_shape, w_scale_shape)\n            depth_features.append(depth_features_view)\n        depth_features = torch.stack(depth_features, dim=0)  # [num_view, num_scale, 2, h_scale_shape, w_scale_shape)\n\n        depth_features[:, :, 0] = (depth_features[:, :, 0] - self.depth_mean) / np.sqrt(self.depth_var)\n        depth_features[:, :, 0] = depth_features[:, :, 0] * depth_features[:, :, 1]\n        results['sparse_depth'] = depth_features\n\n        return results\n"
  },
  {
    "path": "mmdet3d/datasets/pipelines/test_time_aug.py",
    "content": "import mmcv\nimport warnings\nfrom copy import deepcopy\n\nfrom mmdet.datasets.builder import PIPELINES\nfrom mmdet.datasets.pipelines import Compose\n\n\n@PIPELINES.register_module()\nclass MultiScaleFlipAug3D(object):\n    \"\"\"Test-time augmentation with multiple scales and flipping.\n\n    Args:\n        transforms (list[dict]): Transforms to apply in each augmentation.\n        img_scale (tuple | list[tuple]: Images scales for resizing.\n        pts_scale_ratio (float | list[float]): Points scale ratios for\n            resizing.\n        flip (bool): Whether apply flip augmentation. Defaults to False.\n        flip_direction (str | list[str]): Flip augmentation directions\n            for images, options are \"horizontal\" and \"vertical\".\n            If flip_direction is list, multiple flip augmentations will\n            be applied. It has no effect when ``flip == False``.\n            Defaults to \"horizontal\".\n        pcd_horizontal_flip (bool): Whether apply horizontal flip augmentation\n            to point cloud. Defaults to True. Note that it works only when\n            'flip' is turned on.\n        pcd_vertical_flip (bool): Whether apply vertical flip augmentation\n            to point cloud. Defaults to True. Note that it works only when\n            'flip' is turned on.\n    \"\"\"\n\n    def __init__(self,\n                 transforms,\n                 img_scale,\n                 pts_scale_ratio,\n                 pts_rotation=0,\n                 flip=False,\n                 flip_direction='horizontal',\n                 pcd_horizontal_flip=False,\n                 pcd_vertical_flip=False):\n        self.transforms = Compose(transforms)\n        self.img_scale = img_scale if isinstance(img_scale,\n                                                 list) else [img_scale]\n        self.pts_scale_ratio = pts_scale_ratio \\\n            if isinstance(pts_scale_ratio, list) else[float(pts_scale_ratio)]\n\n        self.pts_rotation = pts_rotation if isinstance(pts_rotation, list) else[float(pts_rotation)]\n\n        assert mmcv.is_list_of(self.img_scale, tuple)\n        assert mmcv.is_list_of(self.pts_scale_ratio, float)\n        assert mmcv.is_list_of(self.pts_rotation, float)\n\n        self.flip = flip\n        self.pcd_horizontal_flip = pcd_horizontal_flip\n        self.pcd_vertical_flip = pcd_vertical_flip\n\n        self.flip_direction = flip_direction if isinstance(\n            flip_direction, list) else [flip_direction]\n        assert mmcv.is_list_of(self.flip_direction, str)\n        if not self.flip and self.flip_direction != ['horizontal']:\n            warnings.warn(\n                'flip_direction has no effect when flip is set to False')\n        if (self.flip and not any([(t['type'] == 'RandomFlip3D'\n                                    or t['type'] == 'RandomFlip')\n                                   for t in transforms])):\n            warnings.warn(\n                'flip has no effect when RandomFlip is not in transforms')\n\n    def __call__(self, results):\n        \"\"\"Call function to augment common fields in results.\n\n        Args:\n            results (dict): Result dict contains the data to augment.\n\n        Returns:\n            dict: The result dict contains the data that is augmented with \\\n                different scales and flips.\n        \"\"\"\n        aug_data = []\n\n        # modified from `flip_aug = [False, True] if self.flip else [False]`\n        # to reduce unnecessary scenes when using double flip augmentation\n        # during test time\n        flip_aug = [True] if self.flip else [False]\n        pcd_horizontal_flip_aug = [False, True] \\\n            if self.flip and self.pcd_horizontal_flip else [False]\n        pcd_vertical_flip_aug = [False, True] \\\n            if self.flip and self.pcd_vertical_flip else [False]\n        for scale in self.img_scale:\n            for pts_scale_ratio in self.pts_scale_ratio:\n                for pts_rotation in self.pts_rotation:\n                    for flip in flip_aug:\n                        for pcd_horizontal_flip in pcd_horizontal_flip_aug:\n                            for pcd_vertical_flip in pcd_vertical_flip_aug:\n                                for direction in self.flip_direction:\n                                    # results.copy will cause bug\n                                    # since it is shallow copy\n                                    _results = deepcopy(results)\n                                    _results['scale'] = scale\n                                    _results['flip'] = flip\n                                    _results['pcd_scale_factor'] = \\\n                                        pts_scale_ratio\n                                    _results['flip_direction'] = direction\n                                    _results['pcd_horizontal_flip'] = \\\n                                        pcd_horizontal_flip\n                                    _results['pcd_vertical_flip'] = \\\n                                        pcd_vertical_flip\n                                    _results['pcd_rotation_angle'] = pts_rotation\n                                    data = self.transforms(_results)\n                                    aug_data.append(data)\n        # list of dict to dict of list\n        aug_data_dict = {key: [] for key in aug_data[0]}\n        for data in aug_data:\n            for key, val in data.items():\n                aug_data_dict[key].append(val)\n        return aug_data_dict\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += f'(transforms={self.transforms}, '\n        repr_str += f'img_scale={self.img_scale}, flip={self.flip}, '\n        repr_str += f'pts_scale_ratio={self.pts_scale_ratio}, '\n        repr_str += f'flip_direction={self.flip_direction})'\n        return repr_str\n"
  },
  {
    "path": "mmdet3d/datasets/pipelines/transforms_2d.py",
    "content": "import copy\r\nimport inspect\r\nimport math\r\nimport warnings\r\n\r\nimport cv2\r\nimport mmcv\r\nimport numpy as np\r\nfrom numpy import random\r\n\r\nfrom mmdet.datasets.builder import PIPELINES\r\n\r\n\r\n@PIPELINES.register_module()\r\nclass OurRandomAffine:\r\n    \"\"\"Random affine transform data augmentation.\r\n    This operation randomly generates affine transform matrix which including\r\n    rotation, translation, shear and scaling transforms.\r\n    Args:\r\n        max_rotate_degree (float): Maximum degrees of rotation transform.\r\n            Default: 10.\r\n        max_translate_ratio (float): Maximum ratio of translation.\r\n            Default: 0.1.\r\n        scaling_ratio_range (tuple[float]): Min and max ratio of\r\n            scaling transform. Default: (0.5, 1.5).\r\n        max_shear_degree (float): Maximum degrees of shear\r\n            transform. Default: 2.\r\n        border (tuple[int]): Distance from height and width sides of input\r\n            image to adjust output shape. Only used in mosaic dataset.\r\n            Default: (0, 0).\r\n        border_val (tuple[int]): Border padding values of 3 channels.\r\n            Default: (114, 114, 114).\r\n        min_bbox_size (float): Width and height threshold to filter bboxes.\r\n            If the height or width of a box is smaller than this value, it\r\n            will be removed. Default: 2.\r\n        min_area_ratio (float): Threshold of area ratio between\r\n            original bboxes and wrapped bboxes. If smaller than this value,\r\n            the box will be removed. Default: 0.2.\r\n        max_aspect_ratio (float): Aspect ratio of width and height\r\n            threshold to filter bboxes. If max(h/w, w/h) larger than this\r\n            value, the box will be removed.\r\n        bbox_clip_border (bool, optional): Whether to clip the objects outside\r\n            the border of the image. In some dataset like MOT17, the gt bboxes\r\n            are allowed to cross the border of images. Therefore, we don't\r\n            need to clip the gt bboxes in these cases. Defaults to True.\r\n        skip_filter (bool): Whether to skip filtering rules. If it\r\n            is True, the filter rule will not be applied, and the\r\n            `min_bbox_size` and `min_area_ratio` and `max_aspect_ratio`\r\n            is invalid. Default to True.\r\n    \"\"\"\r\n\r\n    def __init__(self,\r\n                 # max_translate_ratio=0.1,\r\n                 scaling_ratio_range=(0.5, 1.5),\r\n                 flip_ratio=0.5,\r\n                 border=(0, 0),\r\n                 border_val=(103.53, 116.28, 123.675),\r\n                 bbox_clip_border=True,\r\n                 flip_sync_3d=False,\r\n                 scaling_sync_view=False,\r\n                 trans_when_scaling=True,\r\n    ):\r\n        # assert 0 <= max_translate_ratio <= 1\r\n        assert scaling_ratio_range[0] <= scaling_ratio_range[1]\r\n        assert scaling_ratio_range[0] > 0\r\n        # self.max_translate_ratio = max_translate_ratio\r\n        self.scaling_ratio_range = scaling_ratio_range\r\n        self.flip_ratio = flip_ratio\r\n        self.border = border\r\n        self.border_val = border_val\r\n        self.bbox_clip_border = bbox_clip_border\r\n        self.flip_sync = flip_sync_3d\r\n        self.scaling_sync_view = scaling_sync_view\r\n        self.trans_when_scaling = trans_when_scaling\r\n\r\n    def _transform_bbox(self, results, warp_mats, flips, width, height):\r\n        valid_mask = np.ones(results['gt_labels'].shape[0]) > 0\r\n\r\n        if 'gt_bboxes_cam_view' in results:\r\n            bboxes_cam = results['gt_bboxes_cam_view']\r\n        else:\r\n            bboxes_cam = None\r\n\r\n        for view_id in range(len(warp_mats)):\r\n            warp_matrix = warp_mats[view_id]\r\n            bbox_mask = results['gt_labels'][:, 1] == view_id\r\n\r\n            if np.sum(bbox_mask) == 0:\r\n                continue\r\n\r\n            flip = flips[view_id]\r\n            flip_matrix = self._get_flip_matrix(flip, width)\r\n\r\n            if bboxes_cam is not None:\r\n                if flip:\r\n                    bboxes_cam.tensor[bbox_mask, 0::7] = -bboxes_cam.tensor[bbox_mask, 0::7]\r\n                    bboxes_cam.tensor[bbox_mask, 6] = -bboxes_cam.tensor[bbox_mask, 6] + np.pi\r\n\r\n            bbox_view = results['gt_bboxes'][bbox_mask]\r\n            centers_view = results['gt_img_centers_view'][bbox_mask, :2]\r\n            num_bboxes = bbox_view.shape[0]\r\n\r\n            xtl = bbox_view[:, 0] - bbox_view[:, 2] / 2\r\n            ytl = bbox_view[:, 1] - bbox_view[:, 3] / 2\r\n            xtr = bbox_view[:, 0] + bbox_view[:, 2] / 2\r\n            ytr = bbox_view[:, 1] - bbox_view[:, 3] / 2\r\n\r\n            xbl = bbox_view[:, 0] - bbox_view[:, 2] / 2\r\n            ybl = bbox_view[:, 1] + bbox_view[:, 3] / 2\r\n            xbr = bbox_view[:, 0] + bbox_view[:, 2] / 2\r\n            ybr = bbox_view[:, 1] + bbox_view[:, 3] / 2\r\n\r\n            xs = np.vstack([xtl, xtr, xbl, xbr]).T  # [N, 4]\r\n            ys = np.vstack([ytl, ytr, ybl, ybr]).T  # [N, 4]\r\n\r\n            xs = xs.reshape(-1)  # [N*4,]\r\n            ys = ys.reshape(-1)  # [N*4,]\r\n            ones = np.ones_like(ys)\r\n\r\n            points = np.vstack([xs, ys, ones])  # [3, N*4]\r\n\r\n            warp_points = warp_matrix @ flip_matrix @ points  # [3, N*4]\r\n            warp_points = warp_points[:2] / warp_points[2]\r\n            xs = warp_points[0].reshape(num_bboxes, 4)  # [N, 4]\r\n            ys = warp_points[1].reshape(num_bboxes, 4)  # [N, 4]\r\n\r\n            xs_min = xs.min(1)  # [N, ]\r\n            ys_min = ys.min(1)  # [N, ]\r\n            xs_max = xs.max(1)  # [N, ]\r\n            ys_max = ys.max(1)  # [N, ]\r\n\r\n            if self.bbox_clip_border:\r\n                xs_min = xs_min.clip(0, width)\r\n                xs_max = xs_max.clip(0, width)\r\n                ys_min = ys_min.clip(0, height)\r\n                ys_max = ys_max.clip(0, height)\r\n\r\n            cxs = (xs_min + xs_max) / 2\r\n            cys = (ys_min + ys_max) / 2\r\n            ws = xs_max - xs_min\r\n            hs = ys_max - ys_min\r\n\r\n            warp_bboxes = np.vstack((cxs, cys, ws, hs)).T  # [N, 4]\r\n\r\n            ones = np.ones_like(centers_view[:, :1])  # [N, 1]\r\n            center_points = np.concatenate([centers_view, ones], axis=1).T  # [3, N]\r\n\r\n            warp_points = warp_matrix @ flip_matrix @ center_points  # [3, N]\r\n            warp_points = warp_points[:2] / warp_points[2]\r\n            new_center_points = warp_points.T  # [N, 2]\r\n\r\n            valid_mask_view = (new_center_points[:, 0] > 0) & (new_center_points[:, 0] < width-1) & (new_center_points[:, 1] > 0) & (new_center_points[:, 1] < height-1)\r\n\r\n            valid_mask[bbox_mask] = valid_mask_view\r\n\r\n            results['gt_bboxes'][bbox_mask] = warp_bboxes\r\n            results['gt_img_centers_view'][bbox_mask, :2] = new_center_points\r\n\r\n        if 'gt_bboxes_cam_view' in results:\r\n            results['gt_bboxes_cam_view'] = bboxes_cam[valid_mask]\r\n\r\n        results['gt_bboxes_lidar_view'] = results['gt_bboxes_lidar_view'][valid_mask]\r\n\r\n        results['gt_bboxes'] = results['gt_bboxes'][valid_mask]\r\n        results['gt_img_centers_view'] = results['gt_img_centers_view'][valid_mask]\r\n        results['gt_pts_centers_view'] = results['gt_pts_centers_view'][valid_mask]\r\n        results['gt_labels'] = results['gt_labels'][valid_mask]\r\n        return results\r\n\r\n    def _transform_camera(self, results, warp_mats, flips, width):\r\n        for id in range(len(warp_mats)):\r\n            flip = flips[id]\r\n            flip_matrix = self._get_flip_matrix(flip, width)\r\n\r\n            intrinsic = results['cam_intrinsic'][id]\r\n            warp_matrix = warp_mats[id] @ flip_matrix\r\n\r\n            # intrinsic = warp_matrix @ intrinsic\r\n            # results['cam_intrinsic'][id] = intrinsic\r\n\r\n            viewpad = np.eye(4)\r\n            viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] = warp_matrix\r\n            results['lidar2img'][id] = viewpad @ results['lidar2img'][id]\r\n\r\n            if flip:\r\n                flip_matrix = np.eye(3)\r\n                flip_matrix[0, 0] = -1\r\n                results['lidar2cam_r'][id] = flip_matrix @ results['lidar2cam_r'][id]\r\n                results['lidar2cam_t'][id] = flip_matrix @ results['lidar2cam_t'][id]\r\n                results['cam_intrinsic'][id][0, 2] = width - results['cam_intrinsic'][id][0, 2]\r\n\r\n            intrinsic = warp_mats[id] @ intrinsic\r\n            results['cam_intrinsic'][id] = intrinsic\r\n        return results\r\n\r\n    def __call__(self, results):\r\n        translate_mats = []\r\n        scale_mats = []\r\n        warp_mats = []\r\n        flips = []\r\n        scaling_ratios = []\r\n        valid_shapes = []\r\n        results['image_flip'] = []\r\n\r\n        flip_3d = False\r\n        if 'pcd_horizontal_flip' in results and results['pcd_horizontal_flip'] == True:\r\n            flip_3d = not flip_3d\r\n        if 'pcd_vertical_flip' in results and results['pcd_vertical_flip'] == True:\r\n            flip_3d = not flip_3d\r\n\r\n        if self.scaling_sync_view:\r\n            scaling_ratio = random.uniform(self.scaling_ratio_range[0], self.scaling_ratio_range[1])\r\n\r\n        for view_id in range(len(results['img'])):\r\n            img = results['img'][view_id]\r\n            height = img.shape[0] + self.border[0] * 2\r\n            width = img.shape[1] + self.border[1] * 2\r\n\r\n            if self.flip_sync:\r\n                flip = flip_3d\r\n            else:\r\n                flip = True if np.random.random() < self.flip_ratio else False\r\n\r\n            flips.append(flip)\r\n            if flip:\r\n                results['image_flip'].append(True)\r\n                img = cv2.flip(img, 1)\r\n            else:\r\n                results['image_flip'].append(False)\r\n\r\n            # Scaling\r\n            if not self.scaling_sync_view:\r\n                scaling_ratio = random.uniform(self.scaling_ratio_range[0], self.scaling_ratio_range[1])\r\n            scaling_matrix = self._get_scaling_matrix(scaling_ratio)\r\n            scaling_ratios.append(scaling_ratio)\r\n            reduction_ratio = min(1.0, scaling_ratio)\r\n            valid_shapes.append([reduction_ratio*width, reduction_ratio*height])\r\n\r\n            # Translation\r\n            if self.trans_when_scaling:\r\n                if scaling_ratio <= 1:\r\n                    trans_x = 0\r\n                    trans_y = 0\r\n                else:\r\n                    trans_x = random.uniform((1 - scaling_ratio) * width, 0)\r\n                    trans_y = random.uniform((1 - scaling_ratio) * height, 0)\r\n            else:\r\n                trans_x = 0\r\n                trans_y = 0\r\n\r\n            # trans_x = random.uniform(-self.max_translate_ratio, self.max_translate_ratio) * width\r\n            # trans_y = random.uniform(-self.max_translate_ratio, self.max_translate_ratio) * height\r\n            translate_matrix = self._get_translation_matrix(trans_x, trans_y)\r\n\r\n            warp_matrix = translate_matrix  @ scaling_matrix\r\n\r\n            img = cv2.warpPerspective(\r\n                img,\r\n                warp_matrix,\r\n                dsize=(width, height),\r\n                borderValue=self.border_val\r\n            )\r\n\r\n            results['img'][view_id] = img\r\n            translate_mats.append(translate_matrix)\r\n            scale_mats.append(scaling_matrix)\r\n            warp_mats.append(warp_matrix)\r\n            # results['img_shape'] = img.shape\r\n\r\n        results['valid_shape'] = np.array(valid_shapes)\r\n        results['img_scale_ratios'] = np.array(scaling_ratios)\r\n        if 'gt_bboxes' in results:\r\n            results = self._transform_bbox(results, warp_mats, flips, width, height)\r\n        results = self._transform_camera(results, warp_mats, flips, width)\r\n\r\n        return results\r\n\r\n    def __repr__(self):\r\n        repr_str = self.__class__.__name__\r\n        # repr_str += f'max_translate_ratio={self.max_translate_ratio}, '\r\n        repr_str += f'scaling_ratio={self.scaling_ratio_range}, '\r\n        repr_str += f'flip_ratio={self.flip_ratio}, '\r\n        repr_str += f'border={self.border}, '\r\n        repr_str += f'border_val={self.border_val}, '\r\n        return repr_str\r\n\r\n    @staticmethod\r\n    def _get_scaling_matrix(scale_ratio):\r\n        scaling_matrix = np.array(\r\n            [[scale_ratio, 0., 0.], [0., scale_ratio, 0.], [0., 0., 1.]],\r\n            dtype=np.float32)\r\n        return scaling_matrix\r\n\r\n    @staticmethod\r\n    def _get_translation_matrix(x, y):\r\n        translation_matrix = np.array([[1, 0., x], [0., 1, y], [0., 0., 1.]],\r\n                                      dtype=np.float32)\r\n        return translation_matrix\r\n\r\n    @staticmethod\r\n    def _get_flip_matrix(flip, width):\r\n        if flip:\r\n            flip_matrix = np.array([\r\n                [-1, 0, width],\r\n                [0, 1, 0],\r\n                [0, 0, 1]\r\n            ])\r\n        else:\r\n            flip_matrix = np.eye(3)\r\n        return flip_matrix\r\n\r\n\r\n@PIPELINES.register_module()\r\nclass PhotoMetricDistortionMultiViewImage:\r\n    \"\"\"Apply photometric distortion to image sequentially, every transformation\r\n    is applied with a probability of 0.5. The position of random contrast is in\r\n    second or second to last.\r\n    1. random brightness\r\n    2. random contrast (mode 0)\r\n    3. convert color from BGR to HSV\r\n    4. random saturation\r\n    5. random hue\r\n    6. convert color from HSV to BGR\r\n    7. random contrast (mode 1)\r\n    8. randomly swap channels\r\n    Args:\r\n        brightness_delta (int): delta of brightness.\r\n        contrast_range (tuple): range of contrast.\r\n        saturation_range (tuple): range of saturation.\r\n        hue_delta (int): delta of hue.\r\n    \"\"\"\r\n\r\n    def __init__(self,\r\n                 brightness_delta=32,\r\n                 contrast_range=(0.5, 1.5),\r\n                 saturation_range=(0.5, 1.5),\r\n                 hue_delta=18,\r\n                 swap_channel=True):\r\n        self.brightness_delta = brightness_delta\r\n        self.contrast_lower, self.contrast_upper = contrast_range\r\n        self.saturation_lower, self.saturation_upper = saturation_range\r\n        self.hue_delta = hue_delta\r\n        self.swap_channel = swap_channel\r\n\r\n    def __call__(self, results):\r\n        \"\"\"Call function to perform photometric distortion on images.\r\n        Args:\r\n            results (dict): Result dict from loading pipeline.\r\n        Returns:\r\n            dict: Result dict with images distorted.\r\n        \"\"\"\r\n        imgs = results['img']\r\n        new_imgs = []\r\n        for img_ in imgs:\r\n            img = img_.astype(np.float32)\r\n            assert img.dtype == np.float32, \\\r\n                'PhotoMetricDistortion needs the input image of dtype np.float32,'\\\r\n                ' please set \"to_float32=True\" in \"LoadImageFromFile\" pipeline'\r\n            # random brightness\r\n            if random.randint(2):\r\n                delta = random.uniform(-self.brightness_delta,\r\n                                    self.brightness_delta)\r\n                img += delta\r\n                img = np.clip(img, a_max=255, a_min=0)\r\n\r\n            # mode == 0 --> do random contrast first\r\n            # mode == 1 --> do random contrast last\r\n            mode = random.randint(2)\r\n            if mode == 1:\r\n                if random.randint(2):\r\n                    alpha = random.uniform(self.contrast_lower,\r\n                                        self.contrast_upper)\r\n                    img *= alpha\r\n                    img = np.clip(img, a_max=255, a_min=0)\r\n\r\n            # convert color from BGR to HSV\r\n            img = mmcv.bgr2hsv(img)\r\n\r\n            # random saturation\r\n            if random.randint(2):\r\n                img[..., 1] *= random.uniform(self.saturation_lower,\r\n                                            self.saturation_upper)\r\n                img[..., 1] = np.clip(img[..., 1], a_max=1, a_min=0)\r\n\r\n            # random hue\r\n            if random.randint(2):\r\n                img[..., 0] += random.uniform(-self.hue_delta, self.hue_delta)\r\n                img[..., 0][img[..., 0] > 360] -= 360\r\n                img[..., 0][img[..., 0] < 0] += 360\r\n\r\n            # convert color from HSV to BGR\r\n            img = mmcv.hsv2bgr(img)\r\n\r\n            # random contrast\r\n            if mode == 0:\r\n                if random.randint(2):\r\n                    # import pdb\r\n                    # pdb.set_trace()\r\n                    alpha = random.uniform(self.contrast_lower, self.contrast_upper)\r\n                    img *= alpha\r\n                    # import pdb\r\n                    # pdb.set_trace()\r\n                    img = np.clip(img, a_max=255, a_min=0)\r\n\r\n            # randomly swap channels\r\n            if self.swap_channel:\r\n                if random.randint(2):\r\n                    img = img[..., random.permutation(3)]\r\n            new_imgs.append(img.astype(np.uint8))\r\n        results['img'] = new_imgs\r\n        return results\r\n\r\n    def __repr__(self):\r\n        repr_str = self.__class__.__name__\r\n        repr_str += f'(\\nbrightness_delta={self.brightness_delta},\\n'\r\n        repr_str += 'contrast_range='\r\n        repr_str += f'{(self.contrast_lower, self.contrast_upper)},\\n'\r\n        repr_str += 'saturation_range='\r\n        repr_str += f'{(self.saturation_lower, self.saturation_upper)},\\n'\r\n        repr_str += f'hue_delta={self.hue_delta})'\r\n        return repr_str\r\n"
  },
  {
    "path": "mmdet3d/datasets/pipelines/transforms_3d.py",
    "content": "import numpy as np\nfrom mmcv import is_tuple_of\nfrom mmcv.utils import build_from_cfg\n\nfrom mmdet3d.core import VoxelGenerator\nfrom mmdet3d.core.bbox import box_np_ops\nfrom mmdet.datasets.builder import PIPELINES\nfrom mmdet.datasets.pipelines import RandomFlip\nfrom ..registry import OBJECTSAMPLERS\nfrom .data_augment_utils import noise_per_object_v3_\n\n\n@PIPELINES.register_module()\nclass RandomFlip3D(RandomFlip):\n    \"\"\"Flip the points & bbox.\n\n    If the input dict contains the key \"flip\", then the flag will be used,\n    otherwise it will be randomly decided by a ratio specified in the init\n    method.\n\n    Args:\n        sync_2d (bool, optional): Whether to apply flip according to the 2D\n            images. If True, it will apply the same flip as that to 2D images.\n            If False, it will decide whether to flip randomly and independently\n            to that of 2D images. Defaults to True.\n        flip_ratio_bev_horizontal (float, optional): The flipping probability\n            in horizontal direction. Defaults to 0.0.\n        flip_ratio_bev_vertical (float, optional): The flipping probability\n            in vertical direction. Defaults to 0.0.\n    \"\"\"\n\n    def __init__(self,\n                 sync_2d=True,\n                 flip_ratio_bev_horizontal=0.0,\n                 flip_ratio_bev_vertical=0.0,\n                 **kwargs):\n        super(RandomFlip3D, self).__init__(\n            flip_ratio=flip_ratio_bev_horizontal, **kwargs)\n        self.sync_2d = sync_2d\n        self.flip_ratio_bev_vertical = flip_ratio_bev_vertical\n        if flip_ratio_bev_horizontal is not None:\n            assert isinstance(\n                flip_ratio_bev_horizontal,\n                (int, float)) and 0 <= flip_ratio_bev_horizontal <= 1\n        if flip_ratio_bev_vertical is not None:\n            assert isinstance(\n                flip_ratio_bev_vertical,\n                (int, float)) and 0 <= flip_ratio_bev_vertical <= 1\n\n    def random_flip_data_3d(self, input_dict, direction='horizontal'):\n        \"\"\"Flip 3D data randomly.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n            direction (str): Flip direction. Default: horizontal.\n\n        Returns:\n            dict: Flipped results, 'points', 'bbox3d_fields' keys are \\\n                updated in the result dict.\n        \"\"\"\n        assert direction in ['horizontal', 'vertical']\n        if len(input_dict['bbox3d_fields']) == 0:  # test mode\n            input_dict['bbox3d_fields'].append('empty_box3d')\n            input_dict['empty_box3d'] = input_dict['box_type_3d'](\n                np.array([], dtype=np.float32))\n        assert len(input_dict['bbox3d_fields']) == 1\n        for key in input_dict['bbox3d_fields']:\n            input_dict['points'] = input_dict[key].flip(\n                direction, points=input_dict['points'])\n\n    def __call__(self, input_dict):\n        \"\"\"Call function to flip points, values in the ``bbox3d_fields`` and \\\n        also flip 2D image and its annotations.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Flipped results, 'flip', 'flip_direction', \\\n                'pcd_horizontal_flip' and 'pcd_vertical_flip' keys are added \\\n                into result dict.\n        \"\"\"\n        # filp 2D image and its annotations\n        super(RandomFlip3D, self).__call__(input_dict)\n\n        if self.sync_2d:\n            input_dict['pcd_horizontal_flip'] = input_dict['flip']\n            input_dict['pcd_vertical_flip'] = False\n        else:\n            if 'pcd_horizontal_flip' not in input_dict:\n                flip_horizontal = True if np.random.rand(\n                ) < self.flip_ratio else False\n                input_dict['pcd_horizontal_flip'] = flip_horizontal\n            if 'pcd_vertical_flip' not in input_dict:\n                flip_vertical = True if np.random.rand(\n                ) < self.flip_ratio_bev_vertical else False\n                input_dict['pcd_vertical_flip'] = flip_vertical\n\n        if 'transformation_3d_flow' not in input_dict:\n            input_dict['transformation_3d_flow'] = []\n\n        if input_dict['pcd_horizontal_flip']:\n            self.random_flip_data_3d(input_dict, 'horizontal')\n            input_dict['transformation_3d_flow'].extend(['HF'])\n        if input_dict['pcd_vertical_flip']:\n            self.random_flip_data_3d(input_dict, 'vertical')\n            input_dict['transformation_3d_flow'].extend(['VF'])\n        return input_dict\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += '(sync_2d={},'.format(self.sync_2d)\n        repr_str += 'flip_ratio_bev_vertical={})'.format(\n            self.flip_ratio_bev_vertical)\n        return repr_str\n\n@PIPELINES.register_module()\nclass OurRandomFlip3D(object):\n    \"\"\"Flip the points & bbox.\n\n    If the input dict contains the key \"flip\", then the flag will be used,\n    otherwise it will be randomly decided by a ratio specified in the init\n    method.\n\n    Args:\n        sync_2d (bool, optional): Whether to apply flip according to the 2D\n            images. If True, it will apply the same flip as that to 2D images.\n            If False, it will decide whether to flip randomly and independently\n            to that of 2D images. Defaults to True.\n        flip_ratio_bev_horizontal (float, optional): The flipping probability\n            in horizontal direction. Defaults to 0.0.\n        flip_ratio_bev_vertical (float, optional): The flipping probability\n            in vertical direction. Defaults to 0.0.\n    \"\"\"\n\n    def __init__(self,\n                 sync_2d=True,\n                 flip_ratio_bev_horizontal=0.0,\n                 flip_ratio_bev_vertical=0.0,\n                 **kwargs):\n        # super(OurRandomFlip3D, self).__init__(\n        #     flip_ratio=flip_ratio_bev_horizontal, **kwargs)\n        self.sync_2d = sync_2d\n        self.flip_ratio = flip_ratio_bev_horizontal\n        self.flip_ratio_bev_vertical = flip_ratio_bev_vertical\n        if flip_ratio_bev_horizontal is not None:\n            assert isinstance(\n                flip_ratio_bev_horizontal,\n                (int, float)) and 0 <= flip_ratio_bev_horizontal <= 1\n        if flip_ratio_bev_vertical is not None:\n            assert isinstance(\n                flip_ratio_bev_vertical,\n                (int, float)) and 0 <= flip_ratio_bev_vertical <= 1\n\n    def random_flip_data_3d(self, input_dict, direction='horizontal'):\n        \"\"\"Flip 3D data randomly.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n            direction (str): Flip direction. Default: horizontal.\n\n        Returns:\n            dict: Flipped results, 'points', 'bbox3d_fields' keys are \\\n                updated in the result dict.\n        \"\"\"\n        assert direction in ['horizontal', 'vertical']\n        if len(input_dict['bbox3d_fields']) == 0:  # test mode\n            input_dict['bbox3d_fields'].append('empty_box3d')\n            input_dict['empty_box3d'] = input_dict['box_type_3d'](\n                np.array([], dtype=np.float32))\n        assert len(input_dict['bbox3d_fields']) == 1\n        for key in input_dict['bbox3d_fields']:\n            input_dict['points'] = input_dict[key].flip(\n                direction, points=input_dict['points'])\n\n        if direction == 'horizontal':\n            diag = np.ones(3)\n            diag[1] = -1\n        elif direction == 'vertical':\n            diag = np.ones(3)\n            diag[0] = -1\n\n        matrix = np.diag(diag)\n        for id in range(len(input_dict['lidar2cam_r'])):\n            input_dict['lidar2cam_r'][id] = input_dict['lidar2cam_r'][id] @ matrix\n\n        if 'gt_pts_centers_view' in input_dict and input_dict['gt_pts_centers_view'].shape[0] > 0:\n            input_dict['gt_pts_centers_view'] = input_dict['gt_pts_centers_view'] @ matrix\n\n        if 'gt_bboxes_lidar_view' in input_dict:\n            input_dict['gt_bboxes_lidar_view'].flip(direction)\n\n    def __call__(self, input_dict):\n        \"\"\"Call function to flip points, values in the ``bbox3d_fields`` and \\\n        also flip 2D image and its annotations.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Flipped results, 'flip', 'flip_direction', \\\n                'pcd_horizontal_flip' and 'pcd_vertical_flip' keys are added \\\n                into result dict.\n        \"\"\"\n        # filp 2D image and its annotations\n        # super(OurRandomFlip3D, self).__call__(input_dict)\n\n        if self.sync_2d:\n            input_dict['pcd_horizontal_flip'] = input_dict['flip']\n            input_dict['pcd_vertical_flip'] = False\n        else:\n            if 'pcd_horizontal_flip' not in input_dict:\n                flip_horizontal = True if np.random.rand(\n                ) < self.flip_ratio else False\n                input_dict['pcd_horizontal_flip'] = flip_horizontal\n            if 'pcd_vertical_flip' not in input_dict:\n                flip_vertical = True if np.random.rand(\n                ) < self.flip_ratio_bev_vertical else False\n                input_dict['pcd_vertical_flip'] = flip_vertical\n\n        if 'transformation_3d_flow' not in input_dict:\n            input_dict['transformation_3d_flow'] = []\n\n        if input_dict['pcd_horizontal_flip']:\n            self.random_flip_data_3d(input_dict, 'horizontal')\n            input_dict['transformation_3d_flow'].extend(['HF'])\n        if input_dict['pcd_vertical_flip']:\n            self.random_flip_data_3d(input_dict, 'vertical')\n            input_dict['transformation_3d_flow'].extend(['VF'])\n        return input_dict\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += '(sync_2d={},'.format(self.sync_2d)\n        repr_str += 'flip_ratio_bev_vertical={})'.format(\n            self.flip_ratio_bev_vertical)\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass ObjectSample(object):\n    \"\"\"Sample GT objects to the data.\n\n    Args:\n        db_sampler (dict): Config dict of the database sampler.\n        sample_2d (bool): Whether to also paste 2D image patch to the images\n            This should be true when applying multi-modality cut-and-paste.\n            Defaults to False.\n    \"\"\"\n\n    def __init__(self, db_sampler, sample_2d=False):\n        self.sampler_cfg = db_sampler\n        self.sample_2d = sample_2d\n        if 'type' not in db_sampler.keys():\n            db_sampler['type'] = 'DataBaseSampler'\n        self.db_sampler = build_from_cfg(db_sampler, OBJECTSAMPLERS)\n\n    @staticmethod\n    def remove_points_in_boxes(points, boxes):\n        \"\"\"Remove the points in the sampled bounding boxes.\n\n        Args:\n            points (np.ndarray): Input point cloud array.\n            boxes (np.ndarray): Sampled ground truth boxes.\n\n        Returns:\n            np.ndarray: Points with those in the boxes removed.\n        \"\"\"\n        masks = box_np_ops.points_in_rbbox(points.coord.numpy(), boxes)\n        points = points[np.logical_not(masks.any(-1))]\n        return points\n\n    def __call__(self, input_dict):\n        \"\"\"Call function to sample ground truth objects to the data.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after object sampling augmentation, \\\n                'points', 'gt_bboxes_3d', 'gt_labels_3d' keys are updated \\\n                in the result dict.\n        \"\"\"\n        gt_bboxes_3d = input_dict['gt_bboxes_3d']\n        gt_labels_3d = input_dict['gt_labels_3d']\n\n        # change to float for blending operation\n        points = input_dict['points']\n        if self.sample_2d:\n            img = input_dict['img']\n            gt_bboxes_2d = input_dict['gt_bboxes']\n            # Assume for now 3D & 2D bboxes are the same\n            sampled_dict = self.db_sampler.sample_all(\n                gt_bboxes_3d.tensor.numpy(),\n                gt_labels_3d,\n                gt_bboxes_2d=gt_bboxes_2d,\n                img=img)\n        else:\n            sampled_dict = self.db_sampler.sample_all(\n                gt_bboxes_3d.tensor.numpy(), gt_labels_3d, img=None)\n\n        if sampled_dict is not None:\n            sampled_gt_bboxes_3d = sampled_dict['gt_bboxes_3d']\n            sampled_points = sampled_dict['points']\n            sampled_gt_labels = sampled_dict['gt_labels_3d']\n\n            gt_labels_3d = np.concatenate([gt_labels_3d, sampled_gt_labels],\n                                          axis=0)\n            gt_bboxes_3d = gt_bboxes_3d.new_box(\n                np.concatenate(\n                    [gt_bboxes_3d.tensor.numpy(), sampled_gt_bboxes_3d]))\n\n            points = self.remove_points_in_boxes(points, sampled_gt_bboxes_3d)\n            # check the points dimension\n            points = points.cat([sampled_points, points])\n\n            if self.sample_2d:\n                sampled_gt_bboxes_2d = sampled_dict['gt_bboxes_2d']\n                gt_bboxes_2d = np.concatenate(\n                    [gt_bboxes_2d, sampled_gt_bboxes_2d]).astype(np.float32)\n\n                input_dict['gt_bboxes'] = gt_bboxes_2d\n                input_dict['img'] = sampled_dict['img']\n\n        input_dict['gt_bboxes_3d'] = gt_bboxes_3d\n        input_dict['gt_labels_3d'] = gt_labels_3d.astype(np.long)\n        input_dict['points'] = points\n\n        return input_dict\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += f' sample_2d={self.sample_2d},'\n        repr_str += f' data_root={self.sampler_cfg.data_root},'\n        repr_str += f' info_path={self.sampler_cfg.info_path},'\n        repr_str += f' rate={self.sampler_cfg.rate},'\n        repr_str += f' prepare={self.sampler_cfg.prepare},'\n        repr_str += f' classes={self.sampler_cfg.classes},'\n        repr_str += f' sample_groups={self.sampler_cfg.sample_groups}'\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass ObjectNoise(object):\n    \"\"\"Apply noise to each GT objects in the scene.\n\n    Args:\n        translation_std (list[float], optional): Standard deviation of the\n            distribution where translation noise are sampled from.\n            Defaults to [0.25, 0.25, 0.25].\n        global_rot_range (list[float], optional): Global rotation to the scene.\n            Defaults to [0.0, 0.0].\n        rot_range (list[float], optional): Object rotation range.\n            Defaults to [-0.15707963267, 0.15707963267].\n        num_try (int, optional): Number of times to try if the noise applied is\n            invalid. Defaults to 100.\n    \"\"\"\n\n    def __init__(self,\n                 translation_std=[0.25, 0.25, 0.25],\n                 global_rot_range=[0.0, 0.0],\n                 rot_range=[-0.15707963267, 0.15707963267],\n                 num_try=100):\n        self.translation_std = translation_std\n        self.global_rot_range = global_rot_range\n        self.rot_range = rot_range\n        self.num_try = num_try\n\n    def __call__(self, input_dict):\n        \"\"\"Call function to apply noise to each ground truth in the scene.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after adding noise to each object, \\\n                'points', 'gt_bboxes_3d' keys are updated in the result dict.\n        \"\"\"\n        gt_bboxes_3d = input_dict['gt_bboxes_3d']\n        points = input_dict['points']\n\n        # TODO: check this inplace function\n        numpy_box = gt_bboxes_3d.tensor.numpy()\n        numpy_points = points.tensor.numpy()\n\n        noise_per_object_v3_(\n            numpy_box,\n            numpy_points,\n            rotation_perturb=self.rot_range,\n            center_noise_std=self.translation_std,\n            global_random_rot_range=self.global_rot_range,\n            num_try=self.num_try)\n\n        input_dict['gt_bboxes_3d'] = gt_bboxes_3d.new_box(numpy_box)\n        input_dict['points'] = points.new_point(numpy_points)\n        return input_dict\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += '(num_try={},'.format(self.num_try)\n        repr_str += ' translation_std={},'.format(self.translation_std)\n        repr_str += ' global_rot_range={},'.format(self.global_rot_range)\n        repr_str += ' rot_range={})'.format(self.rot_range)\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass GlobalRotScaleTrans(object):\n    \"\"\"Apply global rotation, scaling and translation to a 3D scene.\n\n    Args:\n        rot_range (list[float]): Range of rotation angle.\n            Defaults to [-0.78539816, 0.78539816] (close to [-pi/4, pi/4]).\n        scale_ratio_range (list[float]): Range of scale ratio.\n            Defaults to [0.95, 1.05].\n        translation_std (list[float]): The standard deviation of ranslation\n            noise. This apply random translation to a scene by a noise, which\n            is sampled from a gaussian distribution whose standard deviation\n            is set by ``translation_std``. Defaults to [0, 0, 0]\n        shift_height (bool): Whether to shift height.\n            (the fourth dimension of indoor points) when scaling.\n            Defaults to False.\n    \"\"\"\n\n    def __init__(self,\n                 rot_range=[-0.78539816, 0.78539816],\n                 scale_ratio_range=[0.95, 1.05],\n                 translation_std=[0, 0, 0],\n                 shift_height=False):\n        self.rot_range = rot_range\n        self.scale_ratio_range = scale_ratio_range\n        self.translation_std = translation_std\n        self.shift_height = shift_height\n\n    def _trans_bbox_points(self, input_dict):\n        \"\"\"Private function to translate bounding boxes and points.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after translation, 'points', 'pcd_trans' \\\n                and keys in input_dict['bbox3d_fields'] are updated \\\n                in the result dict.\n        \"\"\"\n        if not isinstance(self.translation_std, (list, tuple, np.ndarray)):\n            translation_std = [\n                self.translation_std, self.translation_std,\n                self.translation_std\n            ]\n        else:\n            translation_std = self.translation_std\n        translation_std = np.array(translation_std, dtype=np.float32)\n        trans_factor = np.random.normal(scale=translation_std, size=3).T\n\n        input_dict['points'].translate(trans_factor)\n        input_dict['pcd_trans'] = trans_factor\n        for key in input_dict['bbox3d_fields']:\n            input_dict[key].translate(trans_factor)\n\n    def _rot_bbox_points(self, input_dict):\n        \"\"\"Private function to rotate bounding boxes and points.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after rotation, 'points', 'pcd_rotation' \\\n                and keys in input_dict['bbox3d_fields'] are updated \\\n                in the result dict.\n        \"\"\"\n        rotation = self.rot_range\n        if not isinstance(rotation, list):\n            rotation = [-rotation, rotation]\n        noise_rotation = np.random.uniform(rotation[0], rotation[1])\n\n        for key in input_dict['bbox3d_fields']:\n            if len(input_dict[key].tensor) != 0:\n                points, rot_mat_T = input_dict[key].rotate(\n                    noise_rotation, input_dict['points'])\n                input_dict['points'] = points\n                input_dict['pcd_rotation'] = rot_mat_T\n        # input_dict['points_instance'].rotate(noise_rotation)\n\n    def _scale_bbox_points(self, input_dict):\n        \"\"\"Private function to scale bounding boxes and points.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after scaling, 'points'and keys in \\\n                input_dict['bbox3d_fields'] are updated in the result dict.\n        \"\"\"\n        scale = input_dict['pcd_scale_factor']\n        points = input_dict['points']\n        points.scale(scale)\n        if self.shift_height:\n            assert 'height' in points.attribute_dims.keys()\n            points.tensor[:, points.attribute_dims['height']] *= scale\n        input_dict['points'] = points\n\n        for key in input_dict['bbox3d_fields']:\n            input_dict[key].scale(scale)\n\n    def _random_scale(self, input_dict):\n        \"\"\"Private function to randomly set the scale factor.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after scaling, 'pcd_scale_factor' are updated \\\n                in the result dict.\n        \"\"\"\n        scale_factor = np.random.uniform(self.scale_ratio_range[0],\n                                         self.scale_ratio_range[1])\n        input_dict['pcd_scale_factor'] = scale_factor\n\n    def __call__(self, input_dict):\n        \"\"\"Private function to rotate, scale and translate bounding boxes and \\\n        points.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after scaling, 'points', 'pcd_rotation',\n                'pcd_scale_factor', 'pcd_trans' and keys in \\\n                input_dict['bbox3d_fields'] are updated in the result dict.\n        \"\"\"\n        if 'transformation_3d_flow' not in input_dict:\n            input_dict['transformation_3d_flow'] = []\n\n        self._rot_bbox_points(input_dict)\n\n        if 'pcd_scale_factor' not in input_dict:\n            self._random_scale(input_dict)\n        self._scale_bbox_points(input_dict)\n\n        self._trans_bbox_points(input_dict)\n\n        input_dict['transformation_3d_flow'].extend(['R', 'S', 'T'])\n        return input_dict\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += '(rot_range={},'.format(self.rot_range)\n        repr_str += ' scale_ratio_range={},'.format(self.scale_ratio_range)\n        repr_str += ' translation_std={})'.format(self.translation_std)\n        repr_str += ' shift_height={})'.format(self.shift_height)\n        return repr_str\n\n@PIPELINES.register_module()\nclass OurGlobalRotScaleTrans(object):\n    \"\"\"Apply global rotation, scaling and translation to a 3D scene.\n\n    Args:\n        rot_range (list[float]): Range of rotation angle.\n            Defaults to [-0.78539816, 0.78539816] (close to [-pi/4, pi/4]).\n        scale_ratio_range (list[float]): Range of scale ratio.\n            Defaults to [0.95, 1.05].\n        translation_std (list[float]): The standard deviation of ranslation\n            noise. This apply random translation to a scene by a noise, which\n            is sampled from a gaussian distribution whose standard deviation\n            is set by ``translation_std``. Defaults to [0, 0, 0]\n        shift_height (bool): Whether to shift height.\n            (the fourth dimension of indoor points) when scaling.\n            Defaults to False.\n    \"\"\"\n\n    def __init__(self,\n                 rot_range=[-0.78539816, 0.78539816],\n                 scale_ratio_range=[0.95, 1.05],\n                 translation_std=[0, 0, 0],\n                 shift_height=False):\n        self.rot_range = rot_range\n        self.scale_ratio_range = scale_ratio_range\n        self.translation_std = translation_std\n        self.shift_height = shift_height\n\n    def _trans_bbox_points(self, input_dict):\n        \"\"\"Private function to translate bounding boxes and points.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after translation, 'points', 'pcd_trans' \\\n                and keys in input_dict['bbox3d_fields'] are updated \\\n                in the result dict.\n        \"\"\"\n        if not isinstance(self.translation_std, (list, tuple, np.ndarray)):\n            translation_std = [\n                self.translation_std, self.translation_std,\n                self.translation_std\n            ]\n        else:\n            translation_std = self.translation_std\n        translation_std = np.array(translation_std, dtype=np.float32)\n        trans_factor = np.random.normal(scale=translation_std, size=3).T\n\n        input_dict['points'].translate(trans_factor)\n        input_dict['pcd_trans'] = trans_factor\n        for key in input_dict['bbox3d_fields']:\n            input_dict[key].translate(trans_factor)\n\n        for id in range(len(input_dict['lidar2cam_t'])):\n            input_dict['lidar2cam_t'][id] = input_dict['lidar2cam_t'][id] - input_dict['lidar2cam_r'][id] @ trans_factor\n\n        if 'gt_pts_centers_view' in input_dict:\n            input_dict['gt_pts_centers_view'] = input_dict['gt_pts_centers_view'] + trans_factor\n\n        if 'gt_bboxes_lidar_view' in input_dict:\n            input_dict['gt_bboxes_lidar_view'].translate(trans_factor)\n\n    def _rot_bbox_points(self, input_dict):\n        \"\"\"Private function to rotate bounding boxes and points.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after rotation, 'points', 'pcd_rotation' \\\n                and keys in input_dict['bbox3d_fields'] are updated \\\n                in the result dict.\n        \"\"\"\n        noise_rotation = input_dict['pcd_rotation_angle']\n        rot_mat_T = None\n        for key in input_dict['bbox3d_fields']:\n            if len(input_dict[key].tensor) != 0:\n                points, rot_mat_T = input_dict[key].rotate(\n                    noise_rotation, input_dict['points'])\n                input_dict['points'] = points\n                input_dict['pcd_rotation'] = rot_mat_T\n\n        if rot_mat_T is not None:\n            rot_mat_T_np = rot_mat_T.numpy()\n            for id in range(len(input_dict['lidar2cam_r'])):\n                input_dict['lidar2cam_r'][id] = input_dict['lidar2cam_r'][id] @ rot_mat_T_np\n\n            if input_dict['gt_pts_centers_view'].shape[0] > 0:\n                input_dict['gt_pts_centers_view'] = input_dict['gt_pts_centers_view'] @ rot_mat_T_np\n\n            if 'gt_bboxes_lidar_view' in input_dict:\n                input_dict['gt_bboxes_lidar_view'].rotate(noise_rotation)\n\n    def _scale_bbox_points(self, input_dict):\n        \"\"\"Private function to scale bounding boxes and points.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after scaling, 'points'and keys in \\\n                input_dict['bbox3d_fields'] are updated in the result dict.\n        \"\"\"\n        scale = input_dict['pcd_scale_factor']\n        points = input_dict['points']\n        points.scale(scale)\n        if self.shift_height:\n            assert 'height' in points.attribute_dims.keys()\n            points.tensor[:, points.attribute_dims['height']] *= scale\n        input_dict['points'] = points\n\n        for key in input_dict['bbox3d_fields']:\n            input_dict[key].scale(scale)\n\n        if 'gt_img_centers_view' in input_dict and input_dict['gt_img_centers_view'].shape[0] > 0:\n            input_dict['gt_img_centers_view'][:, 2] *= scale\n\n        for id in range(len(input_dict['lidar2cam_t'])):\n            input_dict['lidar2cam_t'][id] = input_dict['lidar2cam_t'][id] * scale\n\n        if 'gt_pts_centers_view' in input_dict and input_dict['gt_pts_centers_view'].shape[0] > 0:\n            input_dict['gt_pts_centers_view'] = input_dict['gt_pts_centers_view'] * scale\n\n            if 'gt_bboxes_cam_view' in input_dict:\n                input_dict['gt_bboxes_cam_view'].scale(scale)\n            if 'gt_bboxes_lidar_view' in input_dict:\n                input_dict['gt_bboxes_lidar_view'].scale(scale)\n\n    def _random_scale(self, input_dict):\n        \"\"\"Private function to randomly set the scale factor.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after scaling, 'pcd_scale_factor' are updated \\\n                in the result dict.\n        \"\"\"\n        scale_factor = np.random.uniform(self.scale_ratio_range[0],\n                                         self.scale_ratio_range[1])\n        input_dict['pcd_scale_factor'] = scale_factor\n\n    def _random_rotation(self, input_dict):\n        rotation = self.rot_range\n        if not isinstance(rotation, list):\n            rotation = [-rotation, rotation]\n        noise_rotation = np.random.uniform(rotation[0], rotation[1])\n        input_dict['pcd_rotation_angle'] = noise_rotation\n\n    def __call__(self, input_dict):\n        \"\"\"Private function to rotate, scale and translate bounding boxes and \\\n        points.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after scaling, 'points', 'pcd_rotation',\n                'pcd_scale_factor', 'pcd_trans' and keys in \\\n                input_dict['bbox3d_fields'] are updated in the result dict.\n        \"\"\"\n        if 'transformation_3d_flow' not in input_dict:\n            input_dict['transformation_3d_flow'] = []\n\n        if 'pcd_rotation_angle' not in input_dict:\n            self._random_rotation(input_dict)\n        self._rot_bbox_points(input_dict)\n\n        if 'pcd_scale_factor' not in input_dict:\n            self._random_scale(input_dict)\n        self._scale_bbox_points(input_dict)\n\n        self._trans_bbox_points(input_dict)\n\n        input_dict['transformation_3d_flow'].extend(['R', 'S', 'T'])\n        return input_dict\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += '(rot_range={},'.format(self.rot_range)\n        repr_str += ' scale_ratio_range={},'.format(self.scale_ratio_range)\n        repr_str += ' translation_std={})'.format(self.translation_std)\n        repr_str += ' shift_height={})'.format(self.shift_height)\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass PointShuffle(object):\n    \"\"\"Shuffle input points.\"\"\"\n\n    def __call__(self, input_dict):\n        \"\"\"Call function to shuffle points.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after filtering, 'points' keys are updated \\\n                in the result dict.\n        \"\"\"\n        input_dict['points'].shuffle()\n        return input_dict\n\n    def __repr__(self):\n        return self.__class__.__name__\n\n\n@PIPELINES.register_module()\nclass ObjectRangeFilter(object):\n    \"\"\"Filter objects by the range.\n\n    Args:\n        point_cloud_range (list[float]): Point cloud range.\n    \"\"\"\n\n    def __init__(self, point_cloud_range):\n        self.pcd_range = np.array(point_cloud_range, dtype=np.float32)\n        self.bev_range = self.pcd_range[[0, 1, 3, 4]]\n\n    def __call__(self, input_dict):\n        \"\"\"Call function to filter objects by the range.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after filtering, 'gt_bboxes_3d', 'gt_labels_3d' \\\n                keys are updated in the result dict.\n        \"\"\"\n        gt_bboxes_3d = input_dict['gt_bboxes_3d']\n        gt_labels_3d = input_dict['gt_labels_3d']\n        mask = gt_bboxes_3d.in_range_bev(self.bev_range)\n        gt_bboxes_3d = gt_bboxes_3d[mask]\n        # mask is a torch tensor but gt_labels_3d is still numpy array\n        # using mask to index gt_labels_3d will cause bug when\n        # len(gt_labels_3d) == 1, where mask=1 will be interpreted\n        # as gt_labels_3d[1] and cause out of index error\n        gt_labels_3d = gt_labels_3d[mask.numpy().astype(np.bool)]\n\n        # limit rad to [-pi, pi]\n        gt_bboxes_3d.limit_yaw(offset=0.5, period=2 * np.pi)\n        input_dict['gt_bboxes_3d'] = gt_bboxes_3d\n        input_dict['gt_labels_3d'] = gt_labels_3d\n\n        return input_dict\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += '(point_cloud_range={})'.format(self.pcd_range.tolist())\n        return repr_str\n\n\n\n@PIPELINES.register_module()\nclass OurObjectRangeFilter(object):\n    \"\"\"Filter objects by the range.\n\n    Args:\n        point_cloud_range (list[float]): Point cloud range.\n    \"\"\"\n\n    def __init__(self, point_cloud_range):\n        self.pcd_range = np.array(point_cloud_range, dtype=np.float32)\n        self.bev_range = self.pcd_range[[0, 1, 3, 4]]\n\n    def __call__(self, input_dict):\n        \"\"\"Call function to filter objects by the range.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after filtering, 'gt_bboxes_3d', 'gt_labels_3d' \\\n                keys are updated in the result dict.\n        \"\"\"\n        gt_bboxes_3d = input_dict['gt_bboxes_3d']\n        gt_labels_3d = input_dict['gt_labels_3d']\n        mask = gt_bboxes_3d.in_range_bev(self.bev_range)\n        gt_bboxes_3d = gt_bboxes_3d[mask]\n        # mask is a torch tensor but gt_labels_3d is still numpy array\n        # using mask to index gt_labels_3d will cause bug when\n        # len(gt_labels_3d) == 1, where mask=1 will be interpreted\n        # as gt_labels_3d[1] and cause out of index error\n        gt_labels_3d = gt_labels_3d[mask.numpy().astype(np.bool)]\n\n        # limit rad to [-pi, pi]\n        gt_bboxes_3d.limit_yaw(offset=0.5, period=2 * np.pi)\n        input_dict['gt_bboxes_3d'] = gt_bboxes_3d\n        input_dict['gt_labels_3d'] = gt_labels_3d\n\n        if 'gt_visible_3d' in input_dict:\n            gt_visible_3d = input_dict['gt_visible_3d']\n            gt_visible_3d = gt_visible_3d[mask.numpy().astype(np.bool)]\n            input_dict['gt_visible_3d'] = gt_visible_3d\n\n        pts_2d = input_dict['gt_pts_centers_view']\n        mask_2d = (pts_2d[:, 0] > self.bev_range[0]) & (pts_2d[:, 0] < self.bev_range[2]) & (pts_2d[:, 1] > self.bev_range[1]) & (pts_2d[:, 1] < self.bev_range[3])\n\n        input_dict['gt_bboxes'] = input_dict['gt_bboxes'][mask_2d]\n        input_dict['gt_labels'] = input_dict['gt_labels'][mask_2d]\n\n        input_dict['gt_img_centers_view'] = input_dict['gt_img_centers_view'][mask_2d]\n        input_dict['gt_pts_centers_view'] = input_dict['gt_pts_centers_view'][mask_2d]\n        if 'gt_bboxes_cam_view' in input_dict:\n            input_dict['gt_bboxes_cam_view'] = input_dict['gt_bboxes_cam_view'][mask_2d]\n\n        input_dict['gt_bboxes_lidar_view'] = input_dict['gt_bboxes_lidar_view'][mask_2d]\n\n        return input_dict\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += '(point_cloud_range={})'.format(self.pcd_range.tolist())\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass PointsRangeFilter(object):\n    \"\"\"Filter points by the range.\n\n    Args:\n        point_cloud_range (list[float]): Point cloud range.\n    \"\"\"\n\n    def __init__(self, point_cloud_range):\n        self.pcd_range = np.array(point_cloud_range, dtype=np.float32)\n\n    def __call__(self, input_dict):\n        \"\"\"Call function to filter points by the range.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after filtering, 'points' keys are updated \\\n                in the result dict.\n        \"\"\"\n        points = input_dict['points']\n        points_mask = points.in_range_3d(self.pcd_range)\n        clean_points = points[points_mask]\n        input_dict['points'] = clean_points\n        return input_dict\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += '(point_cloud_range={})'.format(self.pcd_range.tolist())\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass ObjectNameFilter(object):\n    \"\"\"Filter GT objects by their names.\n\n    Args:\n        classes (list[str]): List of class names to be kept for training.\n    \"\"\"\n\n    def __init__(self, classes):\n        self.classes = classes\n        self.labels = list(range(len(self.classes)))\n\n    def __call__(self, input_dict):\n        \"\"\"Call function to filter objects by their names.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after filtering, 'gt_bboxes_3d', 'gt_labels_3d' \\\n                keys are updated in the result dict.\n        \"\"\"\n        gt_labels_3d = input_dict['gt_labels_3d']\n        gt_bboxes_mask = np.array([n in self.labels for n in gt_labels_3d],\n                                  dtype=np.bool_)\n        input_dict['gt_bboxes_3d'] = input_dict['gt_bboxes_3d'][gt_bboxes_mask]\n        input_dict['gt_labels_3d'] = input_dict['gt_labels_3d'][gt_bboxes_mask]\n\n        if 'gt_visible_3d' in input_dict:\n            input_dict['gt_visible_3d'] = input_dict['gt_visible_3d'][gt_bboxes_mask]\n\n        if 'gt_labels' in input_dict:\n            gt_labels = input_dict['gt_labels']\n            if gt_labels.shape[0] > 0:\n                gt_bboxes_mask = np.array([n[0] in self.labels for n in gt_labels],\n                                  dtype=np.bool_)\n                input_dict['gt_bboxes'] = input_dict['gt_bboxes'][gt_bboxes_mask]\n                input_dict['gt_labels'] = input_dict['gt_labels'][gt_bboxes_mask]\n\n                if 'gt_img_centers_view' in input_dict:\n                    input_dict['gt_img_centers_view'] = input_dict['gt_img_centers_view'][gt_bboxes_mask]\n                    input_dict['gt_pts_centers_view'] = input_dict['gt_pts_centers_view'][gt_bboxes_mask]\n\n                if 'gt_bboxes_cam_view' in input_dict:\n                    input_dict['gt_bboxes_cam_view'] = input_dict['gt_bboxes_cam_view'][gt_bboxes_mask]\n\n                if 'gt_bboxes_lidar_view' in input_dict:\n                    input_dict['gt_bboxes_lidar_view'] = input_dict['gt_bboxes_lidar_view'][gt_bboxes_mask]\n\n        return input_dict\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += f'(classes={self.classes})'\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass IndoorPointSample(object):\n    \"\"\"Indoor point sample.\n\n    Sampling data to a certain number.\n\n    Args:\n        name (str): Name of the dataset.\n        num_points (int): Number of points to be sampled.\n    \"\"\"\n\n    def __init__(self, num_points):\n        self.num_points = num_points\n\n    def points_random_sampling(self,\n                               points,\n                               num_samples,\n                               replace=None,\n                               return_choices=False):\n        \"\"\"Points random sampling.\n\n        Sample points to a certain number.\n\n        Args:\n            points (np.ndarray): 3D Points.\n            num_samples (int): Number of samples to be sampled.\n            replace (bool): Whether the sample is with or without replacement.\n            Defaults to None.\n            return_choices (bool): Whether return choice. Defaults to False.\n\n        Returns:\n            tuple[np.ndarray] | np.ndarray:\n\n                - points (np.ndarray): 3D Points.\n                - choices (np.ndarray, optional): The generated random samples.\n        \"\"\"\n        if replace is None:\n            replace = (points.shape[0] < num_samples)\n        choices = np.random.choice(\n            points.shape[0], num_samples, replace=replace)\n        if return_choices:\n            return points[choices], choices\n        else:\n            return points[choices]\n\n    def __call__(self, results):\n        \"\"\"Call function to sample points to in indoor scenes.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after sampling, 'points', 'pts_instance_mask' \\\n                and 'pts_semantic_mask' keys are updated in the result dict.\n        \"\"\"\n        points = results['points']\n        points, choices = self.points_random_sampling(\n            points, self.num_points, return_choices=True)\n\n        pts_instance_mask = results.get('pts_instance_mask', None)\n        pts_semantic_mask = results.get('pts_semantic_mask', None)\n        results['points'] = points\n\n        if pts_instance_mask is not None and pts_semantic_mask is not None:\n            pts_instance_mask = pts_instance_mask[choices]\n            pts_semantic_mask = pts_semantic_mask[choices]\n            results['pts_instance_mask'] = pts_instance_mask\n            results['pts_semantic_mask'] = pts_semantic_mask\n\n        return results\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += '(num_points={})'.format(self.num_points)\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass BackgroundPointsFilter(object):\n    \"\"\"Filter background points near the bounding box.\n\n    Args:\n        bbox_enlarge_range (tuple[float], float): Bbox enlarge range.\n    \"\"\"\n\n    def __init__(self, bbox_enlarge_range):\n        assert (is_tuple_of(bbox_enlarge_range, float)\n                and len(bbox_enlarge_range) == 3) \\\n            or isinstance(bbox_enlarge_range, float), \\\n            f'Invalid arguments bbox_enlarge_range {bbox_enlarge_range}'\n\n        if isinstance(bbox_enlarge_range, float):\n            bbox_enlarge_range = [bbox_enlarge_range] * 3\n        self.bbox_enlarge_range = np.array(\n            bbox_enlarge_range, dtype=np.float32)[np.newaxis, :]\n\n    def __call__(self, input_dict):\n        \"\"\"Call function to filter points by the range.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after filtering, 'points' keys are updated \\\n                in the result dict.\n        \"\"\"\n        points = input_dict['points']\n        gt_bboxes_3d = input_dict['gt_bboxes_3d']\n\n        gt_bboxes_3d_np = gt_bboxes_3d.tensor.numpy()\n        gt_bboxes_3d_np[:, :3] = gt_bboxes_3d.gravity_center.numpy()\n        enlarged_gt_bboxes_3d = gt_bboxes_3d_np.copy()\n        enlarged_gt_bboxes_3d[:, 3:6] += self.bbox_enlarge_range\n        points_numpy = points.tensor.numpy()\n        foreground_masks = box_np_ops.points_in_rbbox(points_numpy,\n                                                      gt_bboxes_3d_np)\n        enlarge_foreground_masks = box_np_ops.points_in_rbbox(\n            points_numpy, enlarged_gt_bboxes_3d)\n        foreground_masks = foreground_masks.max(1)\n        enlarge_foreground_masks = enlarge_foreground_masks.max(1)\n        valid_masks = ~np.logical_and(~foreground_masks,\n                                      enlarge_foreground_masks)\n\n        input_dict['points'] = points[valid_masks]\n        pts_instance_mask = input_dict.get('pts_instance_mask', None)\n        if pts_instance_mask is not None:\n            input_dict['pts_instance_mask'] = pts_instance_mask[valid_masks]\n\n        pts_semantic_mask = input_dict.get('pts_semantic_mask', None)\n        if pts_semantic_mask is not None:\n            input_dict['pts_semantic_mask'] = pts_semantic_mask[valid_masks]\n        return input_dict\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += '(bbox_enlarge_range={})'.format(\n            self.bbox_enlarge_range.tolist())\n        return repr_str\n\n\n@PIPELINES.register_module()\nclass VoxelBasedPointSampler(object):\n    \"\"\"Voxel based point sampler.\n\n    Apply voxel sampling to multiple sweep points.\n\n    Args:\n        cur_sweep_cfg (dict): Config for sampling current points.\n        prev_sweep_cfg (dict): Config for sampling previous points.\n        time_dim (int): Index that indicate the time dimention\n            for input points.\n    \"\"\"\n\n    def __init__(self, cur_sweep_cfg, prev_sweep_cfg=None, time_dim=3):\n        self.cur_voxel_generator = VoxelGenerator(**cur_sweep_cfg)\n        self.cur_voxel_num = self.cur_voxel_generator._max_voxels\n        self.time_dim = time_dim\n        if prev_sweep_cfg is not None:\n            assert prev_sweep_cfg['max_num_points'] == \\\n                cur_sweep_cfg['max_num_points']\n            self.prev_voxel_generator = VoxelGenerator(**prev_sweep_cfg)\n            self.prev_voxel_num = self.prev_voxel_generator._max_voxels\n        else:\n            self.prev_voxel_generator = None\n            self.prev_voxel_num = 0\n\n    def _sample_points(self, points, sampler, point_dim):\n        \"\"\"Sample points for each points subset.\n\n        Args:\n            points (np.ndarray): Points subset to be sampled.\n            sampler (VoxelGenerator): Voxel based sampler for\n                each points subset.\n            point_dim (int): The dimention of each points\n\n        Returns:\n            np.ndarray: Sampled points.\n        \"\"\"\n        voxels, coors, num_points_per_voxel = sampler.generate(points)\n        if voxels.shape[0] < sampler._max_voxels:\n            padding_points = np.zeros([\n                sampler._max_voxels - voxels.shape[0], sampler._max_num_points,\n                point_dim\n            ],\n                                      dtype=points.dtype)\n            padding_points[:] = voxels[0]\n            sample_points = np.concatenate([voxels, padding_points], axis=0)\n        else:\n            sample_points = voxels\n\n        return sample_points\n\n    def __call__(self, results):\n        \"\"\"Call function to sample points from multiple sweeps.\n\n        Args:\n            input_dict (dict): Result dict from loading pipeline.\n\n        Returns:\n            dict: Results after sampling, 'points', 'pts_instance_mask' \\\n                and 'pts_semantic_mask' keys are updated in the result dict.\n        \"\"\"\n        points = results['points']\n        original_dim = points.shape[1]\n\n        # TODO: process instance and semantic mask while _max_num_points\n        # is larger than 1\n        # Extend points with seg and mask fields\n        map_fields2dim = []\n        start_dim = original_dim\n        points_numpy = points.tensor.numpy()\n        extra_channel = [points_numpy]\n        for idx, key in enumerate(results['pts_mask_fields']):\n            map_fields2dim.append((key, idx + start_dim))\n            extra_channel.append(results[key][..., None])\n\n        start_dim += len(results['pts_mask_fields'])\n        for idx, key in enumerate(results['pts_seg_fields']):\n            map_fields2dim.append((key, idx + start_dim))\n            extra_channel.append(results[key][..., None])\n\n        points_numpy = np.concatenate(extra_channel, axis=-1)\n\n        # Split points into two part, current sweep points and\n        # previous sweeps points.\n        # TODO: support different sampling methods for next sweeps points\n        # and previous sweeps points.\n        cur_points_flag = (points_numpy[:, self.time_dim] == 0)\n        cur_sweep_points = points_numpy[cur_points_flag]\n        prev_sweeps_points = points_numpy[~cur_points_flag]\n        if prev_sweeps_points.shape[0] == 0:\n            prev_sweeps_points = cur_sweep_points\n\n        # Shuffle points before sampling\n        np.random.shuffle(cur_sweep_points)\n        np.random.shuffle(prev_sweeps_points)\n\n        cur_sweep_points = self._sample_points(cur_sweep_points,\n                                               self.cur_voxel_generator,\n                                               points_numpy.shape[1])\n        if self.prev_voxel_generator is not None:\n            prev_sweeps_points = self._sample_points(prev_sweeps_points,\n                                                     self.prev_voxel_generator,\n                                                     points_numpy.shape[1])\n\n            points_numpy = np.concatenate(\n                [cur_sweep_points, prev_sweeps_points], 0)\n        else:\n            points_numpy = cur_sweep_points\n\n        if self.cur_voxel_generator._max_num_points == 1:\n            points_numpy = points_numpy.squeeze(1)\n        results['points'] = points.new_point(points_numpy[..., :original_dim])\n\n        # Restore the correspoinding seg and mask fields\n        for key, dim_index in map_fields2dim:\n            results[key] = points_numpy[..., dim_index]\n\n        return results\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n\n        def _auto_indent(repr_str, indent):\n            repr_str = repr_str.split('\\n')\n            repr_str = [' ' * indent + t + '\\n' for t in repr_str]\n            repr_str = ''.join(repr_str)[:-1]\n            return repr_str\n\n        repr_str = self.__class__.__name__\n        indent = 4\n        repr_str += '(\\n'\n        repr_str += ' ' * indent + f'num_cur_sweep={self.cur_voxel_num},\\n'\n        repr_str += ' ' * indent + f'num_prev_sweep={self.prev_voxel_num},\\n'\n        repr_str += ' ' * indent + f'time_dim={self.time_dim},\\n'\n        repr_str += ' ' * indent + 'cur_voxel_generator=\\n'\n        repr_str += f'{_auto_indent(repr(self.cur_voxel_generator), 8)},\\n'\n        repr_str += ' ' * indent + 'prev_voxel_generator=\\n'\n        repr_str += f'{_auto_indent(repr(self.prev_voxel_generator), 8)})'\n        return repr_str\n"
  },
  {
    "path": "mmdet3d/datasets/registry.py",
    "content": "from mmcv.utils import Registry\n\nOBJECTSAMPLERS = Registry('Object sampler')\n"
  },
  {
    "path": "mmdet3d/datasets/scannet_dataset.py",
    "content": "import numpy as np\nfrom os import path as osp\n\nfrom mmdet3d.core import show_result\nfrom mmdet3d.core.bbox import DepthInstance3DBoxes\nfrom mmdet.datasets import DATASETS\nfrom .custom_3d import Custom3DDataset\n\n\n@DATASETS.register_module()\nclass ScanNetDataset(Custom3DDataset):\n    r\"\"\"ScanNet Dataset.\n\n    This class serves as the API for experiments on the ScanNet Dataset.\n\n    Please refer to the `github repo <https://github.com/ScanNet/ScanNet>`_\n    for data downloading.\n\n    Args:\n        data_root (str): Path of dataset root.\n        ann_file (str): Path of annotation file.\n        pipeline (list[dict], optional): Pipeline used for data processing.\n            Defaults to None.\n        classes (tuple[str], optional): Classes used in the dataset.\n            Defaults to None.\n        modality (dict, optional): Modality to specify the sensor data used\n            as input. Defaults to None.\n        box_type_3d (str, optional): Type of 3D box of this dataset.\n            Based on the `box_type_3d`, the dataset will encapsulate the box\n            to its original format then converted them to `box_type_3d`.\n            Defaults to 'Depth' in this dataset. Available options includes\n\n            - 'LiDAR': Box in LiDAR coordinates.\n            - 'Depth': Box in depth coordinates, usually for indoor dataset.\n            - 'Camera': Box in camera coordinates.\n        filter_empty_gt (bool, optional): Whether to filter empty GT.\n            Defaults to True.\n        test_mode (bool, optional): Whether the dataset is in test mode.\n            Defaults to False.\n    \"\"\"\n    CLASSES = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',\n               'bookshelf', 'picture', 'counter', 'desk', 'curtain',\n               'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',\n               'garbagebin')\n\n    def __init__(self,\n                 data_root,\n                 ann_file,\n                 pipeline=None,\n                 classes=None,\n                 modality=None,\n                 box_type_3d='Depth',\n                 filter_empty_gt=True,\n                 test_mode=False):\n        super().__init__(\n            data_root=data_root,\n            ann_file=ann_file,\n            pipeline=pipeline,\n            classes=classes,\n            modality=modality,\n            box_type_3d=box_type_3d,\n            filter_empty_gt=filter_empty_gt,\n            test_mode=test_mode)\n\n    def get_ann_info(self, index):\n        \"\"\"Get annotation info according to the given index.\n\n        Args:\n            index (int): Index of the annotation data to get.\n\n        Returns:\n            dict: annotation information consists of the following keys:\n\n                - gt_bboxes_3d (:obj:`DepthInstance3DBoxes`): \\\n                    3D ground truth bboxes\n                - gt_labels_3d (np.ndarray): Labels of ground truths.\n                - pts_instance_mask_path (str): Path of instance masks.\n                - pts_semantic_mask_path (str): Path of semantic masks.\n        \"\"\"\n        # Use index to get the annos, thus the evalhook could also use this api\n        info = self.data_infos[index]\n        if info['annos']['gt_num'] != 0:\n            gt_bboxes_3d = info['annos']['gt_boxes_upright_depth'].astype(\n                np.float32)  # k, 6\n            gt_labels_3d = info['annos']['class'].astype(np.long)\n        else:\n            gt_bboxes_3d = np.zeros((0, 6), dtype=np.float32)\n            gt_labels_3d = np.zeros((0, ), dtype=np.long)\n\n        # to target box structure\n        gt_bboxes_3d = DepthInstance3DBoxes(\n            gt_bboxes_3d,\n            box_dim=gt_bboxes_3d.shape[-1],\n            with_yaw=False,\n            origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)\n\n        pts_instance_mask_path = osp.join(self.data_root,\n                                          info['pts_instance_mask_path'])\n        pts_semantic_mask_path = osp.join(self.data_root,\n                                          info['pts_semantic_mask_path'])\n\n        anns_results = dict(\n            gt_bboxes_3d=gt_bboxes_3d,\n            gt_labels_3d=gt_labels_3d,\n            pts_instance_mask_path=pts_instance_mask_path,\n            pts_semantic_mask_path=pts_semantic_mask_path)\n        return anns_results\n\n    def show(self, results, out_dir, show=True):\n        \"\"\"Results visualization.\n\n        Args:\n            results (list[dict]): List of bounding boxes results.\n            out_dir (str): Output directory of visualization result.\n            show (bool): Visualize the results online.\n        \"\"\"\n        assert out_dir is not None, 'Expect out_dir, got none.'\n        for i, result in enumerate(results):\n            data_info = self.data_infos[i]\n            pts_path = data_info['pts_path']\n            file_name = osp.split(pts_path)[-1].split('.')[0]\n            points = np.fromfile(\n                osp.join(self.data_root, pts_path),\n                dtype=np.float32).reshape(-1, 6)\n            gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor\n            pred_bboxes = result['boxes_3d'].tensor.numpy()\n            show_result(points, gt_bboxes, pred_bboxes, out_dir, file_name,\n                        show)\n"
  },
  {
    "path": "mmdet3d/datasets/semantickitti_dataset.py",
    "content": "from os import path as osp\n\nfrom mmdet.datasets import DATASETS\nfrom .custom_3d import Custom3DDataset\n\n\n@DATASETS.register_module()\nclass SemanticKITTIDataset(Custom3DDataset):\n    r\"\"\"SemanticKITTI Dataset.\n\n    This class serves as the API for experiments on the SemanticKITTI Dataset\n    Please refer to <http://www.semantic-kitti.org/dataset.html>`_\n    for data downloading\n\n    Args:\n        data_root (str): Path of dataset root.\n        ann_file (str): Path of annotation file.\n        pipeline (list[dict], optional): Pipeline used for data processing.\n            Defaults to None.\n        classes (tuple[str], optional): Classes used in the dataset.\n            Defaults to None.\n        modality (dict, optional): Modality to specify the sensor data used\n            as input. Defaults to None.\n        box_type_3d (str, optional): NO 3D box for this dataset.\n            You can choose any type\n            Based on the `box_type_3d`, the dataset will encapsulate the box\n            to its original format then converted them to `box_type_3d`.\n            Defaults to 'LiDAR' in this dataset. Available options includes\n\n            - 'LiDAR': Box in LiDAR coordinates.\n            - 'Depth': Box in depth coordinates, usually for indoor dataset.\n            - 'Camera': Box in camera coordinates.\n        filter_empty_gt (bool, optional): Whether to filter empty GT.\n            Defaults to True.\n        test_mode (bool, optional): Whether the dataset is in test mode.\n            Defaults to False.\n    \"\"\"\n    CLASSES = ('unlabeled', 'car', 'bicycle', 'motorcycle', 'truck', 'bus',\n               'person', 'bicyclist', 'motorcyclist', 'road', 'parking',\n               'sidewalk', 'other-ground', 'building', 'fence', 'vegetation',\n               'trunck', 'terrian', 'pole', 'traffic-sign')\n\n    def __init__(self,\n                 data_root,\n                 ann_file,\n                 pipeline=None,\n                 classes=None,\n                 modality=None,\n                 box_type_3d='Lidar',\n                 filter_empty_gt=False,\n                 test_mode=False):\n        super().__init__(\n            data_root=data_root,\n            ann_file=ann_file,\n            pipeline=pipeline,\n            classes=classes,\n            modality=modality,\n            box_type_3d=box_type_3d,\n            filter_empty_gt=filter_empty_gt,\n            test_mode=test_mode)\n\n    def get_ann_info(self, index):\n        \"\"\"Get annotation info according to the given index.\n\n        Args:\n            index (int): Index of the annotation data to get.\n\n        Returns:\n            dict: annotation information consists of the following keys:\n\n                - pts_semantic_mask_path (str): Path of semantic masks.\n        \"\"\"\n        # Use index to get the annos, thus the evalhook could also use this api\n        info = self.data_infos[index]\n\n        pts_semantic_mask_path = osp.join(self.data_root,\n                                          info['pts_semantic_mask_path'])\n\n        anns_results = dict(pts_semantic_mask_path=pts_semantic_mask_path)\n        return anns_results\n"
  },
  {
    "path": "mmdet3d/datasets/sunrgbd_dataset.py",
    "content": "import numpy as np\nfrom collections import OrderedDict\nfrom os import path as osp\n\nfrom mmdet3d.core import show_result\nfrom mmdet3d.core.bbox import DepthInstance3DBoxes\nfrom mmdet.core import eval_map\nfrom mmdet.datasets import DATASETS\nfrom .custom_3d import Custom3DDataset\n\n\n@DATASETS.register_module()\nclass SUNRGBDDataset(Custom3DDataset):\n    r\"\"\"SUNRGBD Dataset.\n\n    This class serves as the API for experiments on the SUNRGBD Dataset.\n\n    See the `download page <http://rgbd.cs.princeton.edu/challenge.html>`_\n    for data downloading.\n\n    Args:\n        data_root (str): Path of dataset root.\n        ann_file (str): Path of annotation file.\n        pipeline (list[dict], optional): Pipeline used for data processing.\n            Defaults to None.\n        classes (tuple[str], optional): Classes used in the dataset.\n            Defaults to None.\n        modality (dict, optional): Modality to specify the sensor data used\n            as input. Defaults to None.\n        box_type_3d (str, optional): Type of 3D box of this dataset.\n            Based on the `box_type_3d`, the dataset will encapsulate the box\n            to its original format then converted them to `box_type_3d`.\n            Defaults to 'Depth' in this dataset. Available options includes\n\n            - 'LiDAR': Box in LiDAR coordinates.\n            - 'Depth': Box in depth coordinates, usually for indoor dataset.\n            - 'Camera': Box in camera coordinates.\n        filter_empty_gt (bool, optional): Whether to filter empty GT.\n            Defaults to True.\n        test_mode (bool, optional): Whether the dataset is in test mode.\n            Defaults to False.\n    \"\"\"\n    CLASSES = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',\n               'night_stand', 'bookshelf', 'bathtub')\n\n    def __init__(self,\n                 data_root,\n                 ann_file,\n                 pipeline=None,\n                 classes=None,\n                 modality=dict(use_camera=True, use_lidar=True),\n                 box_type_3d='Depth',\n                 filter_empty_gt=True,\n                 test_mode=False):\n        super().__init__(\n            data_root=data_root,\n            ann_file=ann_file,\n            pipeline=pipeline,\n            classes=classes,\n            modality=modality,\n            box_type_3d=box_type_3d,\n            filter_empty_gt=filter_empty_gt,\n            test_mode=test_mode)\n        assert 'use_camera' in self.modality and \\\n            'use_lidar' in self.modality\n        assert self.modality['use_camera'] or self.modality['use_lidar']\n\n    def get_data_info(self, index):\n        \"\"\"Get data info according to the given index.\n\n        Args:\n            index (int): Index of the sample data to get.\n\n        Returns:\n            dict: Data information that will be passed to the data \\\n                preprocessing pipelines. It includes the following keys:\n\n                - sample_idx (str): Sample index.\n                - pts_filename (str, optional): Filename of point clouds.\n                - file_name (str, optional): Filename of point clouds.\n                - img_prefix (str | None, optional): Prefix of image files.\n                - img_info (dict, optional): Image info.\n                - calib (dict, optional): Camera calibration info.\n                - ann_info (dict): Annotation info.\n        \"\"\"\n        info = self.data_infos[index]\n        sample_idx = info['point_cloud']['lidar_idx']\n        assert info['point_cloud']['lidar_idx'] == info['image']['image_idx']\n        input_dict = dict(sample_idx=sample_idx)\n\n        if self.modality['use_lidar']:\n            pts_filename = osp.join(self.data_root, info['pts_path'])\n            input_dict['pts_filename'] = pts_filename\n            input_dict['file_name'] = pts_filename\n\n        if self.modality['use_camera']:\n            img_filename = osp.join(\n                osp.join(self.data_root, 'sunrgbd_trainval'),\n                info['image']['image_path'])\n            input_dict['img_prefix'] = None\n            input_dict['img_info'] = dict(filename=img_filename)\n            calib = info['calib']\n            input_dict['calib'] = calib\n\n        if not self.test_mode:\n            annos = self.get_ann_info(index)\n            input_dict['ann_info'] = annos\n            if self.filter_empty_gt and len(annos['gt_bboxes_3d']) == 0:\n                return None\n        return input_dict\n\n    def get_ann_info(self, index):\n        \"\"\"Get annotation info according to the given index.\n\n        Args:\n            index (int): Index of the annotation data to get.\n\n        Returns:\n            dict: annotation information consists of the following keys:\n\n                - gt_bboxes_3d (:obj:`DepthInstance3DBoxes`): \\\n                    3D ground truth bboxes\n                - gt_labels_3d (np.ndarray): Labels of ground truths.\n                - pts_instance_mask_path (str): Path of instance masks.\n                - pts_semantic_mask_path (str): Path of semantic masks.\n        \"\"\"\n        # Use index to get the annos, thus the evalhook could also use this api\n        info = self.data_infos[index]\n        if info['annos']['gt_num'] != 0:\n            gt_bboxes_3d = info['annos']['gt_boxes_upright_depth'].astype(\n                np.float32)  # k, 6\n            gt_labels_3d = info['annos']['class'].astype(np.long)\n        else:\n            gt_bboxes_3d = np.zeros((0, 7), dtype=np.float32)\n            gt_labels_3d = np.zeros((0, ), dtype=np.long)\n\n        # to target box structure\n        gt_bboxes_3d = DepthInstance3DBoxes(\n            gt_bboxes_3d, origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)\n\n        anns_results = dict(\n            gt_bboxes_3d=gt_bboxes_3d, gt_labels_3d=gt_labels_3d)\n\n        if self.modality['use_camera']:\n            if info['annos']['gt_num'] != 0:\n                gt_bboxes_2d = info['annos']['bbox'].astype(np.float32)\n            else:\n                gt_bboxes_2d = np.zeros((0, 4), dtype=np.float32)\n            anns_results['bboxes'] = gt_bboxes_2d\n            anns_results['labels'] = gt_labels_3d\n\n        return anns_results\n\n    def show(self, results, out_dir, show=True):\n        \"\"\"Results visualization.\n\n        Args:\n            results (list[dict]): List of bounding boxes results.\n            out_dir (str): Output directory of visualization result.\n            show (bool): Visualize the results online.\n        \"\"\"\n        assert out_dir is not None, 'Expect out_dir, got none.'\n        for i, result in enumerate(results):\n            data_info = self.data_infos[i]\n            pts_path = data_info['pts_path']\n            file_name = osp.split(pts_path)[-1].split('.')[0]\n            points = np.fromfile(\n                osp.join(self.data_root, pts_path),\n                dtype=np.float32).reshape(-1, 6)\n            points[:, 3:] *= 255\n            gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor\n            pred_bboxes = result['boxes_3d'].tensor.numpy()\n            show_result(points, gt_bboxes, pred_bboxes, out_dir, file_name,\n                        show)\n\n    def evaluate(self,\n                 results,\n                 metric=None,\n                 iou_thr=(0.25, 0.5),\n                 iou_thr_2d=(0.5, ),\n                 logger=None,\n                 show=False,\n                 out_dir=None):\n\n        # evaluate 3D detection performance\n        if isinstance(results[0], dict):\n            return super().evaluate(results, metric, iou_thr, logger, show,\n                                    out_dir)\n        # evaluate 2D detection performance\n        else:\n            eval_results = OrderedDict()\n            annotations = [self.get_ann_info(i) for i in range(len(self))]\n            iou_thr_2d = (iou_thr_2d) if isinstance(iou_thr_2d,\n                                                    float) else iou_thr_2d\n            for iou_thr_2d_single in iou_thr_2d:\n                mean_ap, _ = eval_map(\n                    results,\n                    annotations,\n                    scale_ranges=None,\n                    iou_thr=iou_thr_2d_single,\n                    dataset=self.CLASSES,\n                    logger=logger)\n                eval_results['mAP_' + str(iou_thr_2d_single)] = mean_ap\n            return eval_results\n"
  },
  {
    "path": "mmdet3d/datasets/waymo_dataset.py",
    "content": "import mmcv\nimport numpy as np\nimport os\nimport tempfile\nimport torch\nfrom mmcv.utils import print_log\nfrom os import path as osp\n\nfrom mmdet.datasets import DATASETS\nfrom ..core.bbox import Box3DMode, points_cam2img\nfrom .kitti_dataset import KittiDataset\n\n\n@DATASETS.register_module()\nclass WaymoDataset(KittiDataset):\n    \"\"\"Waymo Dataset.\n\n    This class serves as the API for experiments on the Waymo Dataset.\n\n    Please refer to `<https://waymo.com/open/download/>`_for data downloading.\n    It is recommended to symlink the dataset root to $MMDETECTION3D/data and\n    organize them as the doc shows.\n\n    Args:\n        data_root (str): Path of dataset root.\n        ann_file (str): Path of annotation file.\n        split (str): Split of input data.\n        pts_prefix (str, optional): Prefix of points files.\n            Defaults to 'velodyne'.\n        pipeline (list[dict], optional): Pipeline used for data processing.\n            Defaults to None.\n        classes (tuple[str], optional): Classes used in the dataset.\n            Defaults to None.\n        modality (dict, optional): Modality to specify the sensor data used\n            as input. Defaults to None.\n        box_type_3d (str, optional): Type of 3D box of this dataset.\n            Based on the `box_type_3d`, the dataset will encapsulate the box\n            to its original format then converted them to `box_type_3d`.\n            Defaults to 'LiDAR' in this dataset. Available options includes\n\n            - 'LiDAR': box in LiDAR coordinates\n            - 'Depth': box in depth coordinates, usually for indoor dataset\n            - 'Camera': box in camera coordinates\n        filter_empty_gt (bool, optional): Whether to filter empty GT.\n            Defaults to True.\n        test_mode (bool, optional): Whether the dataset is in test mode.\n            Defaults to False.\n        pcd_limit_range (list): The range of point cloud used to filter\n            invalid predicted boxes. Default: [-85, -85, -5, 85, 85, 5].\n    \"\"\"\n\n    CLASSES = ('Car', 'Cyclist', 'Pedestrian')\n\n    def __init__(self,\n                 data_root,\n                 ann_file,\n                 split,\n                 num_views=5,\n                 pts_prefix='velodyne',\n                 pipeline=None,\n                 classes=None,\n                 modality=None,\n                 box_type_3d='LiDAR',\n                 filter_empty_gt=True,\n                 test_mode=False,\n                 load_interval=1,\n                 pcd_limit_range=[-85, -85, -5, 85, 85, 5]):\n        super().__init__(\n            data_root=data_root,\n            ann_file=ann_file,\n            split=split,\n            pts_prefix=pts_prefix,\n            pipeline=pipeline,\n            classes=classes,\n            modality=modality,\n            box_type_3d=box_type_3d,\n            filter_empty_gt=filter_empty_gt,\n            test_mode=test_mode,\n            pcd_limit_range=pcd_limit_range)\n\n        self.num_views = num_views\n        assert self.num_views <= 5\n        # to load a subset, just set the load_interval in the dataset config\n        self.data_infos = self.data_infos[::load_interval]\n        if hasattr(self, 'flag'):\n            self.flag = self.flag[::load_interval]\n\n    def _get_pts_filename(self, idx):\n        pts_filename = osp.join(self.root_split, self.pts_prefix,\n                                f'{idx:07d}.bin')\n        return pts_filename\n\n    def get_data_info(self, index):\n        \"\"\"Get data info according to the given index.\n\n        Args:\n            index (int): Index of the sample data to get.\n\n        Returns:\n            dict: Standard input_dict consists of the\n                data information.\n\n                - sample_idx (str): sample index\n                - pts_filename (str): filename of point clouds\n                - img_prefix (str | None): prefix of image files\n                - img_info (dict): image info\n                - lidar2img (list[np.ndarray], optional): transformations from\n                    lidar to different cameras\n                - ann_info (dict): annotation info\n        \"\"\"\n        info = self.data_infos[index]\n        sample_idx = info['image']['image_idx']\n        img_filename = os.path.join(self.data_root,\n                                    info['image']['image_path'])\n\n        # TODO: consider use torch.Tensor only\n        rect = info['calib']['R0_rect'].astype(np.float32)\n        Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)\n        P0 = info['calib']['P0'].astype(np.float32)\n        lidar2img = P0 @ rect @ Trv2c\n\n        # the Tr_velo_to_cam is computed for all images but not saved in .info for img1-4\n        # the size of img0-2: 1280x1920; img3-4: 886x1920\n        if self.modality['use_camera']:\n            image_paths = []\n            lidar2img_rts = []\n\n            # load calibration for all 5 images.\n            calib_path = img_filename.replace('image_0', 'calib').replace('.png', '.txt')\n            Tr_velo_to_cam_list = []\n            with open(calib_path, 'r') as f:\n                lines = f.readlines()\n            for line_num in range(6, 6 + self.num_views):\n                trans = np.array([float(info) for info in lines[line_num].split(' ')[1:13]]).reshape(3, 4)\n                trans = np.concatenate([trans, np.array([[0., 0., 0., 1.]])], axis=0).astype(np.float32)\n                Tr_velo_to_cam_list.append(trans)\n            assert np.allclose(Tr_velo_to_cam_list[0], info['calib']['Tr_velo_to_cam'].astype(np.float32))\n\n            for idx_img in range(self.num_views):\n                rect = info['calib']['R0_rect'].astype(np.float32)\n                # Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)\n                Trv2c = Tr_velo_to_cam_list[idx_img]\n                P0 = info['calib'][f'P{idx_img}'].astype(np.float32)\n                lidar2img = P0 @ rect @ Trv2c\n\n                image_paths.append(img_filename.replace('image_0', f'image_{idx_img}'))\n                lidar2img_rts.append(lidar2img)\n\n        pts_filename = self._get_pts_filename(sample_idx)\n        input_dict = dict(\n            sample_idx=sample_idx,\n            pts_filename=pts_filename,\n            img_prefix=None,\n        )\n        if self.modality['use_camera']:\n            input_dict['img_filename'] = image_paths\n            input_dict['lidar2img'] = lidar2img_rts\n\n        if not self.test_mode:\n            annos = self.get_ann_info(index)\n            input_dict['ann_info'] = annos\n\n        return input_dict\n\n    def format_results(self,\n                       outputs,\n                       pklfile_prefix=None,\n                       submission_prefix=None,\n                       data_format='waymo'):\n        \"\"\"Format the results to pkl file.\n\n        Args:\n            outputs (list[dict]): Testing results of the dataset.\n            pklfile_prefix (str | None): The prefix of pkl files. It includes\n                the file path and the prefix of filename, e.g., \"a/b/prefix\".\n                If not specified, a temp file will be created. Default: None.\n            submission_prefix (str | None): The prefix of submitted files. It\n                includes the file path and the prefix of filename, e.g.,\n                \"a/b/prefix\". If not specified, a temp file will be created.\n                Default: None.\n            data_format (str | None): Output data format. Default: 'waymo'.\n                Another supported choice is 'kitti'.\n\n        Returns:\n            tuple: (result_files, tmp_dir), result_files is a dict containing\n                the json filepaths, tmp_dir is the temporal directory created\n                for saving json files when jsonfile_prefix is not specified.\n        \"\"\"\n        if pklfile_prefix is None:\n            tmp_dir = tempfile.TemporaryDirectory()\n            pklfile_prefix = osp.join(tmp_dir.name, 'results')\n        else:\n            tmp_dir = None\n\n        assert ('waymo' in data_format or 'kitti' in data_format), \\\n            f'invalid data_format {data_format}'\n\n        if (not isinstance(outputs[0], dict)) or 'img_bbox' in outputs[0]:\n            raise TypeError('Not supported type for reformat results.')\n        elif 'pts_bbox' in outputs[0]:\n            result_files = dict()\n            for name in outputs[0]:\n                results_ = [out[name] for out in outputs]\n                pklfile_prefix_ = pklfile_prefix + name\n                if submission_prefix is not None:\n                    submission_prefix_ = f'{submission_prefix}_{name}'\n                else:\n                    submission_prefix_ = None\n                result_files_ = self.bbox2result_kitti(results_, self.CLASSES,\n                                                       pklfile_prefix_,\n                                                       submission_prefix_)\n                result_files[name] = result_files_\n        else:\n            result_files = self.bbox2result_kitti(outputs, self.CLASSES,\n                                                  pklfile_prefix,\n                                                  submission_prefix)\n        if 'waymo' in data_format:\n            from ..core.evaluation.waymo_utils.prediction_kitti_to_waymo import \\\n                KITTI2Waymo  # noqa\n            waymo_root = osp.join(\n                self.data_root.split('kitti_format')[0], 'waymo_format')\n            if self.split == 'training':\n                waymo_tfrecords_dir = osp.join(waymo_root, 'validation')\n                prefix = '1'\n            elif self.split == 'testing':\n                waymo_tfrecords_dir = osp.join(waymo_root, 'testing')\n                prefix = '2'\n            else:\n                raise ValueError('Not supported split value.')\n            save_tmp_dir = tempfile.TemporaryDirectory()\n            waymo_results_save_dir = save_tmp_dir.name\n            waymo_results_final_path = f'{pklfile_prefix}.bin'\n            if 'pts_bbox' in result_files:\n                converter = KITTI2Waymo(result_files['pts_bbox'],\n                                        waymo_tfrecords_dir,\n                                        waymo_results_save_dir,\n                                        waymo_results_final_path, prefix)\n            else:\n                converter = KITTI2Waymo(result_files, waymo_tfrecords_dir,\n                                        waymo_results_save_dir,\n                                        waymo_results_final_path, prefix)\n            converter.convert()\n            save_tmp_dir.cleanup()\n\n        return result_files, tmp_dir\n\n    def evaluate(self,\n                 results,\n                 metric='waymo',\n                 logger=None,\n                 pklfile_prefix=None,\n                 submission_prefix=None,\n                 show=False,\n                 out_dir=None):\n        \"\"\"Evaluation in KITTI protocol.\n\n        Args:\n            results (list[dict]): Testing results of the dataset.\n            metric (str | list[str]): Metrics to be evaluated.\n                Default: 'waymo'. Another supported metric is 'kitti'.\n            logger (logging.Logger | str | None): Logger used for printing\n                related information during evaluation. Default: None.\n            pklfile_prefix (str | None): The prefix of pkl files. It includes\n                the file path and the prefix of filename, e.g., \"a/b/prefix\".\n                If not specified, a temp file will be created. Default: None.\n            submission_prefix (str | None): The prefix of submission datas.\n                If not specified, the submission data will not be generated.\n            show (bool): Whether to visualize.\n                Default: False.\n            out_dir (str): Path to save the visualization results.\n                Default: None.\n\n        Returns:\n            dict[str: float]: results of each evaluation metric\n        \"\"\"\n        assert ('waymo' in metric or 'kitti' in metric), \\\n            f'invalid metric {metric}'\n        if 'kitti' in metric:\n            result_files, tmp_dir = self.format_results(\n                results,\n                pklfile_prefix,\n                submission_prefix,\n                data_format='kitti')\n            from mmdet3d.core.evaluation import kitti_eval\n            gt_annos = [info['annos'] for info in self.data_infos]\n\n            if isinstance(result_files, dict):\n                ap_dict = dict()\n                for name, result_files_ in result_files.items():\n                    eval_types = ['bev', '3d']\n                    ap_result_str, ap_dict_ = kitti_eval(\n                        gt_annos,\n                        result_files_,\n                        self.CLASSES,\n                        eval_types=eval_types)\n                    for ap_type, ap in ap_dict_.items():\n                        ap_dict[f'{name}/{ap_type}'] = float(\n                            '{:.4f}'.format(ap))\n\n                    print_log(\n                        f'Results of {name}:\\n' + ap_result_str, logger=logger)\n\n            else:\n                ap_result_str, ap_dict = kitti_eval(\n                    gt_annos,\n                    result_files,\n                    self.CLASSES,\n                    eval_types=['bev', '3d'])\n                print_log('\\n' + ap_result_str, logger=logger)\n        if 'waymo' in metric:\n            waymo_root = osp.join(\n                self.data_root.split('kitti_format')[0], 'waymo_format')\n            if pklfile_prefix is None:\n                eval_tmp_dir = tempfile.TemporaryDirectory()\n                pklfile_prefix = osp.join(eval_tmp_dir.name, 'results')\n            else:\n                eval_tmp_dir = None\n            result_files, tmp_dir = self.format_results(\n                results,\n                pklfile_prefix,\n                submission_prefix,\n                data_format='waymo')\n            import subprocess\n            ret_bytes = subprocess.check_output(\n                'mmdet3d/core/evaluation/waymo_utils/' +\n                f'compute_detection_metrics_main {pklfile_prefix}.bin ' +\n                f'{waymo_root}/gt.bin',\n                shell=True)\n            ret_texts = ret_bytes.decode('utf-8')\n            print_log(ret_texts)\n            # parse the text to get ap_dict\n            ap_dict = {\n                'Vehicle/L1 mAP': 0,\n                'Vehicle/L1 mAPH': 0,\n                'Vehicle/L2 mAP': 0,\n                'Vehicle/L2 mAPH': 0,\n                'Pedestrian/L1 mAP': 0,\n                'Pedestrian/L1 mAPH': 0,\n                'Pedestrian/L2 mAP': 0,\n                'Pedestrian/L2 mAPH': 0,\n                'Sign/L1 mAP': 0,\n                'Sign/L1 mAPH': 0,\n                'Sign/L2 mAP': 0,\n                'Sign/L2 mAPH': 0,\n                'Cyclist/L1 mAP': 0,\n                'Cyclist/L1 mAPH': 0,\n                'Cyclist/L2 mAP': 0,\n                'Cyclist/L2 mAPH': 0,\n                'Overall/L1 mAP': 0,\n                'Overall/L1 mAPH': 0,\n                'Overall/L2 mAP': 0,\n                'Overall/L2 mAPH': 0\n            }\n            mAP_splits = ret_texts.split('mAP ')\n            mAPH_splits = ret_texts.split('mAPH ')\n            for idx, key in enumerate(ap_dict.keys()):\n                split_idx = int(idx / 2) + 1\n                if idx % 2 == 0:  # mAP\n                    ap_dict[key] = float(mAP_splits[split_idx].split(']')[0])\n                else:  # mAPH\n                    ap_dict[key] = float(mAPH_splits[split_idx].split(']')[0])\n            ap_dict['Overall/L1 mAP'] = \\\n                (ap_dict['Vehicle/L1 mAP'] + ap_dict['Pedestrian/L1 mAP'] +\n                 ap_dict['Cyclist/L1 mAP']) / 3\n            ap_dict['Overall/L1 mAPH'] = \\\n                (ap_dict['Vehicle/L1 mAPH'] + ap_dict['Pedestrian/L1 mAPH'] +\n                 ap_dict['Cyclist/L1 mAPH']) / 3\n            ap_dict['Overall/L2 mAP'] = \\\n                (ap_dict['Vehicle/L2 mAP'] + ap_dict['Pedestrian/L2 mAP'] +\n                 ap_dict['Cyclist/L2 mAP']) / 3\n            ap_dict['Overall/L2 mAPH'] = \\\n                (ap_dict['Vehicle/L2 mAPH'] + ap_dict['Pedestrian/L2 mAPH'] +\n                 ap_dict['Cyclist/L2 mAPH']) / 3\n            if eval_tmp_dir is not None:\n                eval_tmp_dir.cleanup()\n\n        if tmp_dir is not None:\n            tmp_dir.cleanup()\n\n        if show:\n            self.show(results, out_dir)\n        return ap_dict\n\n    def bbox2result_kitti(self,\n                          net_outputs,\n                          class_names,\n                          pklfile_prefix=None,\n                          submission_prefix=None):\n        \"\"\"Convert results to kitti format for evaluation and test submission.\n\n        Args:\n            net_outputs (List[np.ndarray]): list of array storing the\n                bbox and score\n            class_nanes (List[String]): A list of class names\n            pklfile_prefix (str | None): The prefix of pkl file.\n            submission_prefix (str | None): The prefix of submission file.\n\n        Returns:\n            List[dict]: A list of dict have the kitti 3d format\n        \"\"\"\n        assert len(net_outputs) == len(self.data_infos), \\\n            'invalid list length of network outputs'\n        if submission_prefix is not None:\n            mmcv.mkdir_or_exist(submission_prefix)\n\n        det_annos = []\n        print('\\nConverting prediction to KITTI format')\n        for idx, pred_dicts in enumerate(\n                mmcv.track_iter_progress(net_outputs)):\n            annos = []\n            info = self.data_infos[idx]\n            sample_idx = info['image']['image_idx']\n            image_shape = info['image']['image_shape'][:2]\n\n            box_dict = self.convert_valid_bboxes(pred_dicts, info)\n            if len(box_dict['bbox']) > 0:\n                box_2d_preds = box_dict['bbox']\n                box_preds = box_dict['box3d_camera']\n                scores = box_dict['scores']\n                box_preds_lidar = box_dict['box3d_lidar']\n                label_preds = box_dict['label_preds']\n\n                anno = {\n                    'name': [],\n                    'truncated': [],\n                    'occluded': [],\n                    'alpha': [],\n                    'bbox': [],\n                    'dimensions': [],\n                    'location': [],\n                    'rotation_y': [],\n                    'score': []\n                }\n\n                for box, box_lidar, bbox, score, label in zip(\n                        box_preds, box_preds_lidar, box_2d_preds, scores,\n                        label_preds):\n                    bbox[2:] = np.minimum(bbox[2:], image_shape[::-1])\n                    bbox[:2] = np.maximum(bbox[:2], [0, 0])\n                    anno['name'].append(class_names[int(label)])\n                    anno['truncated'].append(0.0)\n                    anno['occluded'].append(0)\n                    anno['alpha'].append(\n                        -np.arctan2(-box_lidar[1], box_lidar[0]) + box[6])\n                    anno['bbox'].append(bbox)\n                    anno['dimensions'].append(box[3:6])\n                    anno['location'].append(box[:3])\n                    anno['rotation_y'].append(box[6])\n                    anno['score'].append(score)\n\n                anno = {k: np.stack(v) for k, v in anno.items()}\n                annos.append(anno)\n\n                if submission_prefix is not None:\n                    curr_file = f'{submission_prefix}/{sample_idx:07d}.txt'\n                    with open(curr_file, 'w') as f:\n                        bbox = anno['bbox']\n                        loc = anno['location']\n                        dims = anno['dimensions']  # lhw -> hwl\n\n                        for idx in range(len(bbox)):\n                            print(\n                                '{} -1 -1 {:.4f} {:.4f} {:.4f} {:.4f} '\n                                '{:.4f} {:.4f} {:.4f} '\n                                '{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f}'.\n                                format(anno['name'][idx], anno['alpha'][idx],\n                                       bbox[idx][0], bbox[idx][1],\n                                       bbox[idx][2], bbox[idx][3],\n                                       dims[idx][1], dims[idx][2],\n                                       dims[idx][0], loc[idx][0], loc[idx][1],\n                                       loc[idx][2], anno['rotation_y'][idx],\n                                       anno['score'][idx]),\n                                file=f)\n            else:\n                annos.append({\n                    'name': np.array([]),\n                    'truncated': np.array([]),\n                    'occluded': np.array([]),\n                    'alpha': np.array([]),\n                    'bbox': np.zeros([0, 4]),\n                    'dimensions': np.zeros([0, 3]),\n                    'location': np.zeros([0, 3]),\n                    'rotation_y': np.array([]),\n                    'score': np.array([]),\n                })\n            annos[-1]['sample_idx'] = np.array(\n                [sample_idx] * len(annos[-1]['score']), dtype=np.int64)\n\n            det_annos += annos\n\n        if pklfile_prefix is not None:\n            if not pklfile_prefix.endswith(('.pkl', '.pickle')):\n                out = f'{pklfile_prefix}.pkl'\n            mmcv.dump(det_annos, out)\n            print(f'Result is saved to {out}.')\n\n        return det_annos\n\n    def convert_valid_bboxes(self, box_dict, info):\n        \"\"\"Convert the boxes into valid format.\n\n        Args:\n            box_dict (dict): Bounding boxes to be converted.\n\n                - boxes_3d (:obj:``LiDARInstance3DBoxes``): 3D bounding boxes.\n                - scores_3d (np.ndarray): Scores of predicted boxes.\n                - labels_3d (np.ndarray): Class labels of predicted boxes.\n            info (dict): Dataset information dictionary.\n\n        Returns:\n            dict: Valid boxes after conversion.\n\n                - bbox (np.ndarray): 2D bounding boxes (in camera 0).\n                - box3d_camera (np.ndarray): 3D boxes in camera coordinates.\n                - box3d_lidar (np.ndarray): 3D boxes in lidar coordinates.\n                - scores (np.ndarray): Scores of predicted boxes.\n                - label_preds (np.ndarray): Class labels of predicted boxes.\n                - sample_idx (np.ndarray): Sample index.\n        \"\"\"\n        # TODO: refactor this function\n        box_preds = box_dict['boxes_3d']\n        scores = box_dict['scores_3d']\n        labels = box_dict['labels_3d']\n        sample_idx = info['image']['image_idx']\n        # TODO: remove the hack of yaw\n        box_preds.limit_yaw(offset=0.5, period=np.pi * 2)\n\n        if len(box_preds) == 0:\n            return dict(\n                bbox=np.zeros([0, 4]),\n                box3d_camera=np.zeros([0, 7]),\n                box3d_lidar=np.zeros([0, 7]),\n                scores=np.zeros([0]),\n                label_preds=np.zeros([0, 4]),\n                sample_idx=sample_idx)\n\n        rect = info['calib']['R0_rect'].astype(np.float32)\n        Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)\n        P0 = info['calib']['P0'].astype(np.float32)\n        P0 = box_preds.tensor.new_tensor(P0)\n\n        box_preds_camera = box_preds.convert_to(Box3DMode.CAM, rect @ Trv2c)\n\n        box_corners = box_preds_camera.corners\n        box_corners_in_image = points_cam2img(box_corners, P0)\n        # box_corners_in_image: [N, 8, 2]\n        minxy = torch.min(box_corners_in_image, dim=1)[0]\n        maxxy = torch.max(box_corners_in_image, dim=1)[0]\n        box_2d_preds = torch.cat([minxy, maxxy], dim=1)\n        # Post-processing\n        # check box_preds\n        limit_range = box_preds.tensor.new_tensor(self.pcd_limit_range)\n        valid_pcd_inds = ((box_preds.center > limit_range[:3]) &\n                          (box_preds.center < limit_range[3:]))\n        valid_inds = valid_pcd_inds.all(-1)\n\n        if valid_inds.sum() > 0:\n            return dict(\n                bbox=box_2d_preds[valid_inds, :].numpy(),\n                box3d_camera=box_preds_camera[valid_inds].tensor.numpy(),\n                box3d_lidar=box_preds[valid_inds].tensor.numpy(),\n                scores=scores[valid_inds].numpy(),\n                label_preds=labels[valid_inds].numpy(),\n                sample_idx=sample_idx,\n            )\n        else:\n            return dict(\n                bbox=np.zeros([0, 4]),\n                box3d_camera=np.zeros([0, 7]),\n                box3d_lidar=np.zeros([0, 7]),\n                scores=np.zeros([0]),\n                label_preds=np.zeros([0, 4]),\n                sample_idx=sample_idx,\n            )\n"
  },
  {
    "path": "mmdet3d/models/__init__.py",
    "content": "from .backbones import *  # noqa: F401,F403\nfrom .builder import (build_backbone, build_detector, build_fusion_layer,\n                      build_head, build_loss, build_middle_encoder, build_neck,\n                      build_roi_extractor, build_shared_head,\n                      build_voxel_encoder)\nfrom .dense_heads import *  # noqa: F401,F403\nfrom .detectors import *  # noqa: F401,F403\nfrom .fusion_layers import *  # noqa: F401,F403\nfrom .losses import *  # noqa: F401,F403\nfrom .middle_encoders import *  # noqa: F401,F403\nfrom .model_utils import *  # noqa: F401,F403\nfrom .necks import *  # noqa: F401,F403\nfrom .registry import FUSION_LAYERS, MIDDLE_ENCODERS, VOXEL_ENCODERS\nfrom .roi_heads import *  # noqa: F401,F403\nfrom .voxel_encoders import *  # noqa: F401,F403\n\n__all__ = [\n    'VOXEL_ENCODERS', 'MIDDLE_ENCODERS', 'FUSION_LAYERS', 'build_backbone',\n    'build_neck', 'build_roi_extractor', 'build_shared_head', 'build_head',\n    'build_loss', 'build_detector', 'build_fusion_layer',\n    'build_middle_encoder', 'build_voxel_encoder'\n]\n"
  },
  {
    "path": "mmdet3d/models/backbones/DLA.py",
    "content": "import torch\nimport torch.nn as nn\nimport torch.utils.checkpoint as cp\nfrom mmcv.cnn import (build_conv_layer, build_norm_layer, build_plugin_layer,\n                      constant_init, kaiming_init)\nfrom mmcv.runner import load_checkpoint\nfrom torch.nn.modules.batchnorm import _BatchNorm\n\nfrom mmdet.utils import get_root_logger\nfrom ..builder import BACKBONES\n\ntry:\n    from dcn_v2 import DCN\n    # from .DCNv2.dcn_v2 import DCN\nexcept:\n    print('import DCN failed')\n    DCN = None\nimport numpy as np\nimport math\n\nBN_MOMENTUM = 0.1\n\n\ndef get_model_url(data='imagenet', name='dla34', hash='ba72cf86'):\n    return join('http://dl.yf.io/dla/models', data, '{}-{}.pth'.format(name, hash))\n\n\ndef conv3x3(in_planes, out_planes, stride=1):\n    \"3x3 convolution with padding\"\n    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,\n                     padding=1, bias=False)\n\n\nclass BasicBlock(nn.Module):\n    def __init__(self, inplanes, planes, stride=1, dilation=1):\n        super(BasicBlock, self).__init__()\n        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3,\n                               stride=stride, padding=dilation,\n                               bias=False, dilation=dilation)\n        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)\n        self.relu = nn.ReLU(inplace=True)\n        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,\n                               stride=1, padding=dilation,\n                               bias=False, dilation=dilation)\n        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)\n        self.stride = stride\n\n    def forward(self, x, residual=None):\n        if residual is None:\n            residual = x\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n\n        out += residual\n        out = self.relu(out)\n\n        return out\n\n\nclass Bottleneck(nn.Module):\n    expansion = 2\n\n    def __init__(self, inplanes, planes, stride=1, dilation=1):\n        super(Bottleneck, self).__init__()\n        expansion = Bottleneck.expansion\n        bottle_planes = planes // expansion\n        self.conv1 = nn.Conv2d(inplanes, bottle_planes,\n                               kernel_size=1, bias=False)\n        self.bn1 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)\n        self.conv2 = nn.Conv2d(bottle_planes, bottle_planes, kernel_size=3,\n                               stride=stride, padding=dilation,\n                               bias=False, dilation=dilation)\n        self.bn2 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)\n        self.conv3 = nn.Conv2d(bottle_planes, planes,\n                               kernel_size=1, bias=False)\n        self.bn3 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)\n        self.relu = nn.ReLU(inplace=True)\n        self.stride = stride\n\n    def forward(self, x, residual=None):\n        if residual is None:\n            residual = x\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n        out = self.relu(out)\n\n        out = self.conv3(out)\n        out = self.bn3(out)\n\n        out += residual\n        out = self.relu(out)\n\n        return out\n\n\nclass BottleneckX(nn.Module):\n    expansion = 2\n    cardinality = 32\n\n    def __init__(self, inplanes, planes, stride=1, dilation=1):\n        super(BottleneckX, self).__init__()\n        cardinality = BottleneckX.cardinality\n        # dim = int(math.floor(planes * (BottleneckV5.expansion / 64.0)))\n        # bottle_planes = dim * cardinality\n        bottle_planes = planes * cardinality // 32\n        self.conv1 = nn.Conv2d(inplanes, bottle_planes,\n                               kernel_size=1, bias=False)\n        self.bn1 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)\n        self.conv2 = nn.Conv2d(bottle_planes, bottle_planes, kernel_size=3,\n                               stride=stride, padding=dilation, bias=False,\n                               dilation=dilation, groups=cardinality)\n        self.bn2 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)\n        self.conv3 = nn.Conv2d(bottle_planes, planes,\n                               kernel_size=1, bias=False)\n        self.bn3 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)\n        self.relu = nn.ReLU(inplace=True)\n        self.stride = stride\n\n    def forward(self, x, residual=None):\n        if residual is None:\n            residual = x\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n        out = self.relu(out)\n\n        out = self.conv3(out)\n        out = self.bn3(out)\n\n        out += residual\n        out = self.relu(out)\n\n        return out\n\n\nclass Root(nn.Module):\n    def __init__(self, in_channels, out_channels, kernel_size, residual):\n        super(Root, self).__init__()\n        self.conv = nn.Conv2d(\n            in_channels, out_channels, 1,\n            stride=1, bias=False, padding=(kernel_size - 1) // 2)\n        self.bn = nn.BatchNorm2d(out_channels, momentum=BN_MOMENTUM)\n        self.relu = nn.ReLU(inplace=True)\n        self.residual = residual\n\n    def forward(self, *x):\n        children = x\n        x = self.conv(torch.cat(x, 1))\n        x = self.bn(x)\n        if self.residual:\n            x += children[0]\n        x = self.relu(x)\n\n        return x\n\n\nclass Tree(nn.Module):\n    def __init__(self, levels, block, in_channels, out_channels, stride=1,\n                 level_root=False, root_dim=0, root_kernel_size=1,\n                 dilation=1, root_residual=False):\n        super(Tree, self).__init__()\n        if root_dim == 0:\n            root_dim = 2 * out_channels\n        if level_root:\n            root_dim += in_channels\n        if levels == 1:\n            self.tree1 = block(in_channels, out_channels, stride,\n                               dilation=dilation)\n            self.tree2 = block(out_channels, out_channels, 1,\n                               dilation=dilation)\n        else:\n            self.tree1 = Tree(levels - 1, block, in_channels, out_channels,\n                              stride, root_dim=0,\n                              root_kernel_size=root_kernel_size,\n                              dilation=dilation, root_residual=root_residual)\n            self.tree2 = Tree(levels - 1, block, out_channels, out_channels,\n                              root_dim=root_dim + out_channels,\n                              root_kernel_size=root_kernel_size,\n                              dilation=dilation, root_residual=root_residual)\n        if levels == 1:\n            self.root = Root(root_dim, out_channels, root_kernel_size,\n                             root_residual)\n        self.level_root = level_root\n        self.root_dim = root_dim\n        self.downsample = None\n        self.project = None\n        self.levels = levels\n        if stride > 1:\n            self.downsample = nn.MaxPool2d(stride, stride=stride)\n        if in_channels != out_channels:\n            self.project = nn.Sequential(\n                nn.Conv2d(in_channels, out_channels,\n                          kernel_size=1, stride=1, bias=False),\n                nn.BatchNorm2d(out_channels, momentum=BN_MOMENTUM)\n            )\n\n    def forward(self, x, residual=None, children=None):\n        children = [] if children is None else children\n        bottom = self.downsample(x) if self.downsample else x\n        residual = self.project(bottom) if self.project else bottom\n        if self.level_root:\n            children.append(bottom)\n        x1 = self.tree1(x, residual)\n        if self.levels == 1:\n            x2 = self.tree2(x1)\n            x = self.root(x2, x1, *children)\n        else:\n            children.append(x1)\n            x = self.tree2(x1, children=children)\n        return x\n\n\nclass DLA(nn.Module):\n    def __init__(self, levels, channels, num_classes=1000,\n                 block=BasicBlock, residual_root=False, linear_root=False,\n                 opt=None):\n        super(DLA, self).__init__()\n        self.channels = channels\n        self.num_classes = num_classes\n        self.base_layer = nn.Sequential(\n            nn.Conv2d(3, channels[0], kernel_size=7, stride=1,\n                      padding=3, bias=False),\n            nn.BatchNorm2d(channels[0], momentum=BN_MOMENTUM),\n            nn.ReLU(inplace=True))\n        self.level0 = self._make_conv_level(\n            channels[0], channels[0], levels[0])\n        self.level1 = self._make_conv_level(\n            channels[0], channels[1], levels[1], stride=2)\n        self.level2 = Tree(levels[2], block, channels[1], channels[2], 2,\n                           level_root=False,\n                           root_residual=residual_root)\n        self.level3 = Tree(levels[3], block, channels[2], channels[3], 2,\n                           level_root=True, root_residual=residual_root)\n        self.level4 = Tree(levels[4], block, channels[3], channels[4], 2,\n                           level_root=True, root_residual=residual_root)\n        self.level5 = Tree(levels[5], block, channels[4], channels[5], 2,\n                           level_root=True, root_residual=residual_root)\n        if opt.pre_img:\n            self.pre_img_layer = nn.Sequential(\n                nn.Conv2d(3, channels[0], kernel_size=7, stride=1,\n                          padding=3, bias=False),\n                nn.BatchNorm2d(channels[0], momentum=BN_MOMENTUM),\n                nn.ReLU(inplace=True))\n        if opt.pre_hm:\n            self.pre_hm_layer = nn.Sequential(\n                nn.Conv2d(1, channels[0], kernel_size=7, stride=1,\n                          padding=3, bias=False),\n                nn.BatchNorm2d(channels[0], momentum=BN_MOMENTUM),\n                nn.ReLU(inplace=True))\n        # for m in self.modules():\n        #     if isinstance(m, nn.Conv2d):\n        #         n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels\n        #         m.weight.data.normal_(0, math.sqrt(2. / n))\n        #     elif isinstance(m, nn.BatchNorm2d):\n        #         m.weight.data.fill_(1)\n        #         m.bias.data.zero_()\n\n    def _make_level(self, block, inplanes, planes, blocks, stride=1):\n        downsample = None\n        if stride != 1 or inplanes != planes:\n            downsample = nn.Sequential(\n                nn.MaxPool2d(stride, stride=stride),\n                nn.Conv2d(inplanes, planes,\n                          kernel_size=1, stride=1, bias=False),\n                nn.BatchNorm2d(planes, momentum=BN_MOMENTUM),\n            )\n\n        layers = []\n        layers.append(block(inplanes, planes, stride, downsample=downsample))\n        for i in range(1, blocks):\n            layers.append(block(inplanes, planes))\n\n        return nn.Sequential(*layers)\n\n    def _make_conv_level(self, inplanes, planes, convs, stride=1, dilation=1):\n        modules = []\n        for i in range(convs):\n            modules.extend([\n                nn.Conv2d(inplanes, planes, kernel_size=3,\n                          stride=stride if i == 0 else 1,\n                          padding=dilation, bias=False, dilation=dilation),\n                nn.BatchNorm2d(planes, momentum=BN_MOMENTUM),\n                nn.ReLU(inplace=True)])\n            inplanes = planes\n        return nn.Sequential(*modules)\n\n    def forward(self, x, pre_img=None, pre_hm=None):\n        y = []\n        x = self.base_layer(x)\n        if pre_img is not None:\n            x = x + self.pre_img_layer(pre_img)\n        if pre_hm is not None:\n            x = x + self.pre_hm_layer(pre_hm)\n        for i in range(6):\n            x = getattr(self, 'level{}'.format(i))(x)\n            y.append(x)\n\n        return y\n\n    def load_pretrained_model(self, data='imagenet', name='dla34', hash='ba72cf86'):\n        # fc = self.fc\n        if name.endswith('.pth'):\n            model_weights = torch.load(data + name)\n        else:\n            model_url = get_model_url(data, name, hash)\n            model_weights = model_zoo.load_url(model_url)\n        num_classes = len(model_weights[list(model_weights.keys())[-1]])\n        self.fc = nn.Conv2d(\n            self.channels[-1], num_classes,\n            kernel_size=1, stride=1, padding=0, bias=True)\n        self.load_state_dict(model_weights, strict=False)\n        # self.fc = fc\n\n\ndef dla34(pretrained=True, **kwargs):  # DLA-34\n    model = DLA([1, 1, 1, 2, 2, 1],\n                [16, 32, 64, 128, 256, 512],\n                block=BasicBlock, **kwargs)\n    if pretrained:\n        model.load_pretrained_model(\n            data='imagenet', name='dla34', hash='ba72cf86')\n    else:\n        print('Warning: No ImageNet pretrain!!')\n    return model\n\n\ndef dla102(pretrained=None, **kwargs):  # DLA-102\n    Bottleneck.expansion = 2\n    model = DLA([1, 1, 1, 3, 4, 1], [16, 32, 128, 256, 512, 1024],\n                block=Bottleneck, residual_root=True, **kwargs)\n    if pretrained:\n        model.load_pretrained_model(\n            data='imagenet', name='dla102', hash='d94d9790')\n    return model\n\n\ndef dla46_c(pretrained=None, **kwargs):  # DLA-46-C\n    Bottleneck.expansion = 2\n    model = DLA([1, 1, 1, 2, 2, 1],\n                [16, 32, 64, 64, 128, 256],\n                block=Bottleneck, **kwargs)\n    if pretrained is not None:\n        model.load_pretrained_model(\n            data='imagenet', name='dla46_c', hash='2bfd52c3')\n    return model\n\n\ndef dla46x_c(pretrained=None, **kwargs):  # DLA-X-46-C\n    BottleneckX.expansion = 2\n    model = DLA([1, 1, 1, 2, 2, 1],\n                [16, 32, 64, 64, 128, 256],\n                block=BottleneckX, **kwargs)\n    if pretrained is not None:\n        model.load_pretrained_model(\n            data='imagenet', name='dla46x_c', hash='d761bae7')\n    return model\n\n\ndef dla60x_c(pretrained=None, **kwargs):  # DLA-X-60-C\n    BottleneckX.expansion = 2\n    model = DLA([1, 1, 1, 2, 3, 1],\n                [16, 32, 64, 64, 128, 256],\n                block=BottleneckX, **kwargs)\n    if pretrained is not None:\n        model.load_pretrained_model(\n            data='imagenet', name='dla60x_c', hash='b870c45c')\n    return model\n\n\ndef dla60(pretrained=None, **kwargs):  # DLA-60\n    Bottleneck.expansion = 2\n    model = DLA([1, 1, 1, 2, 3, 1],\n                [16, 32, 128, 256, 512, 1024],\n                block=Bottleneck, **kwargs)\n    if pretrained is not None:\n        model.load_pretrained_model(\n            data='imagenet', name='dla60', hash='24839fc4')\n    return model\n\n\ndef dla60x(pretrained=None, **kwargs):  # DLA-X-60\n    BottleneckX.expansion = 2\n    model = DLA([1, 1, 1, 2, 3, 1],\n                [16, 32, 128, 256, 512, 1024],\n                block=BottleneckX, **kwargs)\n    if pretrained is not None:\n        model.load_pretrained_model(\n            data='imagenet', name='dla60x', hash='d15cacda')\n    return model\n\n\ndef dla102x(pretrained=None, **kwargs):  # DLA-X-102\n    BottleneckX.expansion = 2\n    model = DLA([1, 1, 1, 3, 4, 1], [16, 32, 128, 256, 512, 1024],\n                block=BottleneckX, residual_root=True, **kwargs)\n    if pretrained is not None:\n        model.load_pretrained_model(\n            data='imagenet', name='dla102x', hash='ad62be81')\n    return model\n\n\ndef dla102x2(pretrained=None, **kwargs):  # DLA-X-102 64\n    BottleneckX.cardinality = 64\n    model = DLA([1, 1, 1, 3, 4, 1], [16, 32, 128, 256, 512, 1024],\n                block=BottleneckX, residual_root=True, **kwargs)\n    if pretrained is not None:\n        model.load_pretrained_model(\n            data='imagenet', name='dla102x2', hash='262837b6')\n    return model\n\n\ndef dla169(pretrained=None, **kwargs):  # DLA-169\n    Bottleneck.expansion = 2\n    model = DLA([1, 1, 2, 3, 5, 1], [16, 32, 128, 256, 512, 1024],\n                block=Bottleneck, residual_root=True, **kwargs)\n    if pretrained is not None:\n        model.load_pretrained_model(\n            data='imagenet', name='dla169', hash='0914e092')\n    return model\n\n\nclass Identity(nn.Module):\n\n    def __init__(self):\n        super(Identity, self).__init__()\n\n    def forward(self, x):\n        return x\n\n\ndef fill_fc_weights(layers):\n    for m in layers.modules():\n        if isinstance(m, nn.Conv2d):\n            if m.bias is not None:\n                nn.init.constant_(m.bias, 0)\n\n\ndef fill_up_weights(up):\n    w = up.weight.data\n    f = math.ceil(w.size(2) / 2)\n    c = (2 * f - 1 - f % 2) / (2. * f)\n    for i in range(w.size(2)):\n        for j in range(w.size(3)):\n            w[0, 0, i, j] = \\\n                (1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))\n    for c in range(1, w.size(0)):\n        w[c, 0, :, :] = w[0, 0, :, :]\n\n\nclass Conv(nn.Module):\n    def __init__(self, chi, cho):\n        super(Conv, self).__init__()\n        self.conv = nn.Sequential(\n            nn.Conv2d(chi, cho, kernel_size=1, stride=1, bias=False),\n            nn.BatchNorm2d(cho, momentum=BN_MOMENTUM),\n            nn.ReLU(inplace=True))\n\n    def forward(self, x):\n        return self.conv(x)\n\n\nclass GlobalConv(nn.Module):\n    def __init__(self, chi, cho, k=7, d=1):\n        super(GlobalConv, self).__init__()\n        gcl = nn.Sequential(\n            nn.Conv2d(chi, cho, kernel_size=(k, 1), stride=1, bias=False,\n                      dilation=d, padding=(d * (k // 2), 0)),\n            nn.Conv2d(cho, cho, kernel_size=(1, k), stride=1, bias=False,\n                      dilation=d, padding=(0, d * (k // 2))))\n        gcr = nn.Sequential(\n            nn.Conv2d(chi, cho, kernel_size=(1, k), stride=1, bias=False,\n                      dilation=d, padding=(0, d * (k // 2))),\n            nn.Conv2d(cho, cho, kernel_size=(k, 1), stride=1, bias=False,\n                      dilation=d, padding=(d * (k // 2), 0)))\n        fill_fc_weights(gcl)\n        fill_fc_weights(gcr)\n        self.gcl = gcl\n        self.gcr = gcr\n        self.act = nn.Sequential(\n            nn.BatchNorm2d(cho, momentum=BN_MOMENTUM),\n            nn.ReLU(inplace=True)\n        )\n\n    def forward(self, x):\n        x = self.gcl(x) + self.gcr(x)\n        x = self.act(x)\n        return x\n\n\nclass DeformConv(nn.Module):\n    def __init__(self, chi, cho):\n        super(DeformConv, self).__init__()\n        self.actf = nn.Sequential(\n            nn.BatchNorm2d(cho, momentum=BN_MOMENTUM),\n            nn.ReLU(inplace=True)\n        )\n        self.conv = DCN(chi, cho, kernel_size=(3, 3), stride=1, padding=1, dilation=1, deformable_groups=1)\n\n    def forward(self, x):\n        x = self.conv(x)\n        x = self.actf(x)\n        return x\n\n\nclass IDAUp(nn.Module):\n    def __init__(self, o, channels, up_f, node_type=(DeformConv, DeformConv)):\n        super(IDAUp, self).__init__()\n        for i in range(1, len(channels)):\n            c = channels[i]\n            f = int(up_f[i])\n            proj = node_type[0](c, o)\n            node = node_type[1](o, o)\n\n            up = nn.ConvTranspose2d(o, o, f * 2, stride=f,\n                                    padding=f // 2, output_padding=0,\n                                    groups=o, bias=False)\n            fill_up_weights(up)\n\n            setattr(self, 'proj_' + str(i), proj)\n            setattr(self, 'up_' + str(i), up)\n            setattr(self, 'node_' + str(i), node)\n\n    def forward(self, layers, startp, endp):\n        for i in range(startp + 1, endp):\n            upsample = getattr(self, 'up_' + str(i - startp))\n            project = getattr(self, 'proj_' + str(i - startp))\n            layers[i] = upsample(project(layers[i]))\n            node = getattr(self, 'node_' + str(i - startp))\n            layers[i] = node(layers[i] + layers[i - 1])\n\n\nclass DLAUp(nn.Module):\n    def __init__(self, startp, channels, scales, in_channels=None,\n                 node_type=DeformConv):\n        super(DLAUp, self).__init__()\n        self.startp = startp\n        if in_channels is None:\n            in_channels = channels\n        self.channels = channels\n        channels = list(channels)\n        scales = np.array(scales, dtype=int)\n        for i in range(len(channels) - 1):\n            j = -i - 2\n            setattr(self, 'ida_{}'.format(i),\n                    IDAUp(channels[j], in_channels[j:],\n                          scales[j:] // scales[j],\n                          node_type=node_type))\n            scales[j + 1:] = scales[j]\n            in_channels[j + 1:] = [channels[j] for _ in channels[j + 1:]]\n\n    def forward(self, layers):\n        out = [layers[-1]]  # start with 32\n        for i in range(len(layers) - self.startp - 1):\n            ida = getattr(self, 'ida_{}'.format(i))\n            ida(layers, len(layers) - i - 2, len(layers))\n            out.insert(0, layers[-1])\n        return out\n\n\nclass Interpolate(nn.Module):\n    def __init__(self, scale, mode):\n        super(Interpolate, self).__init__()\n        self.scale = scale\n        self.mode = mode\n\n    def forward(self, x):\n        x = F.interpolate(x, scale_factor=self.scale, mode=self.mode, align_corners=False)\n        return x\n\n\nDLA_NODE = {\n    'dcn': (DeformConv, DeformConv),\n    'gcn': (Conv, GlobalConv),\n    'conv': (Conv, Conv),\n}\n\n\nclass BaseModel(nn.Module):\n    def __init__(self, heads, head_convs, num_stacks, last_channel, opt=None):\n        super(BaseModel, self).__init__()\n        if opt is not None and opt.head_kernel != 3:\n            print('Using head kernel:', opt.head_kernel)\n            head_kernel = opt.head_kernel\n        else:\n            head_kernel = 3\n        self.num_stacks = num_stacks\n        self.heads = heads\n        for head in self.heads:\n            classes = self.heads[head]\n            head_conv = head_convs[head]\n            if len(head_conv) > 0:\n                out = nn.Conv2d(head_conv[-1], classes,\n                                kernel_size=1, stride=1, padding=0, bias=True)\n                conv = nn.Conv2d(last_channel, head_conv[0],\n                                 kernel_size=head_kernel,\n                                 padding=head_kernel // 2, bias=True)\n                convs = [conv]\n                for k in range(1, len(head_conv)):\n                    convs.append(nn.Conv2d(head_conv[k - 1], head_conv[k],\n                                           kernel_size=1, bias=True))\n                if len(convs) == 1:\n                    fc = nn.Sequential(conv, nn.ReLU(inplace=True), out)\n                elif len(convs) == 2:\n                    fc = nn.Sequential(\n                        convs[0], nn.ReLU(inplace=True),\n                        convs[1], nn.ReLU(inplace=True), out)\n                elif len(convs) == 3:\n                    fc = nn.Sequential(\n                        convs[0], nn.ReLU(inplace=True),\n                        convs[1], nn.ReLU(inplace=True),\n                        convs[2], nn.ReLU(inplace=True), out)\n                elif len(convs) == 4:\n                    fc = nn.Sequential(\n                        convs[0], nn.ReLU(inplace=True),\n                        convs[1], nn.ReLU(inplace=True),\n                        convs[2], nn.ReLU(inplace=True),\n                        convs[3], nn.ReLU(inplace=True), out)\n                if 'hm' in head:\n                    fc[-1].bias.data.fill_(opt.prior_bias)\n                else:\n                    fill_fc_weights(fc)\n            else:\n                fc = nn.Conv2d(last_channel, classes,\n                               kernel_size=1, stride=1, padding=0, bias=True)\n                if 'hm' in head:\n                    fc.bias.data.fill_(opt.prior_bias)\n                else:\n                    fill_fc_weights(fc)\n            self.__setattr__(head, fc)\n\n    def img2feats(self, x):\n        raise NotImplementedError\n\n    def imgpre2feats(self, x, pre_img=None, pre_hm=None):\n        raise NotImplementedError\n\n    def forward(self, x, pre_img=None, pre_hm=None):\n        if (pre_hm is not None) or (pre_img is not None):\n            feats = self.imgpre2feats(x, pre_img, pre_hm)\n        else:\n            feats = self.img2feats(x)\n        return feats\n    #   out = []\n    #   if self.opt.model_output_list:\n    #     for s in range(self.num_stacks):\n    #       z = []\n    #       for head in sorted(self.heads):\n    #           z.append(self.__getattr__(head)(feats[s]))\n    #       out.append(z)\n    #   else:\n    #     for s in range(self.num_stacks):\n    #       z = {}\n    #       for head in self.heads:\n    #           z[head] = self.__getattr__(head)(feats[s])\n    #       out.append(z)\n    #   return out\n\n\n@BACKBONES.register_module()\nclass DLASeg(BaseModel):\n    def __init__(self, num_layers, heads, head_convs):\n        opt = Opt()\n        super(DLASeg, self).__init__(\n            heads, head_convs, 1, 64 if num_layers == 34 else 128, opt=opt)\n        down_ratio = 4\n        self.opt = opt\n        self.node_type = DLA_NODE[opt.dla_node]\n        print('Using node type:', self.node_type)\n        self.first_level = int(np.log2(down_ratio))\n        self.last_level = 5\n        self.base = globals()['dla{}'.format(num_layers)](pretrained=False, opt=opt)\n\n        channels = self.base.channels\n        scales = [2 ** i for i in range(len(channels[self.first_level:]))]\n        self.dla_up = DLAUp(\n            self.first_level, channels[self.first_level:], scales,\n            node_type=self.node_type)\n        out_channel = channels[self.first_level]\n\n        self.ida_up = IDAUp(\n            out_channel, channels[self.first_level:self.last_level],\n            [2 ** i for i in range(self.last_level - self.first_level)],\n            node_type=self.node_type)\n\n    def init_weights(self, pretrained=None):\n        if isinstance(pretrained, str):\n            logger = get_root_logger()\n            load_checkpoint(self, pretrained, strict=False, logger=logger)\n        else:\n            pass\n\n    def img2feats(self, x):\n        x = self.base(x)\n        x = self.dla_up(x)\n\n        y = []\n        for i in range(self.last_level - self.first_level):\n            y.append(x[i].clone())\n        self.ida_up(y, 0, len(y))\n\n        return [y[-1]]\n\n    def imgpre2feats(self, x, pre_img=None, pre_hm=None):\n        x = self.base(x, pre_img, pre_hm)\n        x = self.dla_up(x)\n\n        y = []\n        for i in range(self.last_level - self.first_level):\n            y.append(x[i].clone())\n        self.ida_up(y, 0, len(y))\n\n        return [y[-1]]\n\n\nclass Opt:\n    head_kernel = 3\n    levels = [1, 1, 1, 2, 2, 1]\n    channels = [16, 32, 64, 128, 256, 512]\n    pre_img = False\n    pre_hm = False\n    dla_node = 'dcn'\n    model_output_list = False\n\n\n# if __name__ == '__main__':\n# from mmdet.models import DLASeg\n# opt = Opt()\n# model = DLASeg(34, {}, -1, Opt)\n# checkpoints = torch.load('checkpoints/nuScenes_3Ddetection_e140.pth')\n# model.load_state_dict(checkpoints['state_dict'], strict=False)\n\n\n"
  },
  {
    "path": "mmdet3d/models/backbones/__init__.py",
    "content": "from mmdet.models.backbones import SSDVGG, HRNet, ResNet, ResNetV1d, ResNeXt\nfrom .multi_backbone import MultiBackbone\nfrom .nostem_regnet import NoStemRegNet\nfrom .pointnet2_sa_msg import PointNet2SAMSG\nfrom .pointnet2_sa_ssg import PointNet2SASSG\nfrom .second import SECOND\nfrom .DLA import DLASeg\nfrom .swin import SwinTransformer\n\n__all__ = [\n    'ResNet', 'ResNetV1d', 'ResNeXt', 'SSDVGG', 'HRNet', 'NoStemRegNet',\n    'SECOND', 'PointNet2SASSG', 'PointNet2SAMSG', 'MultiBackbone', 'DLASeg',\n    'SwinTransformer'\n]\n"
  },
  {
    "path": "mmdet3d/models/backbones/base_pointnet.py",
    "content": "from abc import ABCMeta\nfrom mmcv.runner import load_checkpoint\nfrom torch import nn as nn\n\n\nclass BasePointNet(nn.Module, metaclass=ABCMeta):\n    \"\"\"Base class for PointNet.\"\"\"\n\n    def __init__(self):\n        super(BasePointNet, self).__init__()\n        self.fp16_enabled = False\n\n    def init_weights(self, pretrained=None):\n        \"\"\"Initialize the weights of PointNet backbone.\"\"\"\n        # Do not initialize the conv layers\n        # to follow the original implementation\n        if isinstance(pretrained, str):\n            from mmdet3d.utils import get_root_logger\n            logger = get_root_logger()\n            load_checkpoint(self, pretrained, strict=False, logger=logger)\n\n    @staticmethod\n    def _split_point_feats(points):\n        \"\"\"Split coordinates and features of input points.\n\n        Args:\n            points (torch.Tensor): Point coordinates with features,\n                with shape (B, N, 3 + input_feature_dim).\n\n        Returns:\n            torch.Tensor: Coordinates of input points.\n            torch.Tensor: Features of input points.\n        \"\"\"\n        xyz = points[..., 0:3].contiguous()\n        if points.size(-1) > 3:\n            features = points[..., 3:].transpose(1, 2).contiguous()\n        else:\n            features = None\n\n        return xyz, features\n"
  },
  {
    "path": "mmdet3d/models/backbones/multi_backbone.py",
    "content": "import copy\nimport torch\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import auto_fp16, load_checkpoint\nfrom torch import nn as nn\n\nfrom mmdet.models import BACKBONES, build_backbone\n\n\n@BACKBONES.register_module()\nclass MultiBackbone(nn.Module):\n    \"\"\"MultiBackbone with different configs.\n\n    Args:\n        num_streams (int): The number of backbones.\n        backbones (list or dict): A list of backbone configs.\n        aggregation_mlp_channels (list[int]): Specify the mlp layers\n            for feature aggregation.\n        conv_cfg (dict): Config dict of convolutional layers.\n        norm_cfg (dict): Config dict of normalization layers.\n        act_cfg (dict): Config dict of activation layers.\n        suffixes (list): A list of suffixes to rename the return dict\n            for each backbone.\n    \"\"\"\n\n    def __init__(self,\n                 num_streams,\n                 backbones,\n                 aggregation_mlp_channels=None,\n                 conv_cfg=dict(type='Conv1d'),\n                 norm_cfg=dict(type='BN1d', eps=1e-5, momentum=0.01),\n                 act_cfg=dict(type='ReLU'),\n                 suffixes=('net0', 'net1'),\n                 **kwargs):\n        super().__init__()\n        assert isinstance(backbones, dict) or isinstance(backbones, list)\n        if isinstance(backbones, dict):\n            backbones_list = []\n            for ind in range(num_streams):\n                backbones_list.append(copy.deepcopy(backbones))\n            backbones = backbones_list\n\n        assert len(backbones) == num_streams\n        assert len(suffixes) == num_streams\n\n        self.backbone_list = nn.ModuleList()\n        # Rename the ret_dict with different suffixs.\n        self.suffixes = suffixes\n\n        out_channels = 0\n\n        for backbone_cfg in backbones:\n            out_channels += backbone_cfg['fp_channels'][-1][-1]\n            self.backbone_list.append(build_backbone(backbone_cfg))\n\n        # Feature aggregation layers\n        if aggregation_mlp_channels is None:\n            aggregation_mlp_channels = [\n                out_channels, out_channels // 2,\n                out_channels // len(self.backbone_list)\n            ]\n        else:\n            aggregation_mlp_channels.insert(0, out_channels)\n\n        self.aggregation_layers = nn.Sequential()\n        for i in range(len(aggregation_mlp_channels) - 1):\n            self.aggregation_layers.add_module(\n                f'layer{i}',\n                ConvModule(\n                    aggregation_mlp_channels[i],\n                    aggregation_mlp_channels[i + 1],\n                    1,\n                    padding=0,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    act_cfg=act_cfg,\n                    bias=True,\n                    inplace=True))\n\n    def init_weights(self, pretrained=None):\n        \"\"\"Initialize the weights of PointNet++ backbone.\"\"\"\n        # Do not initialize the conv layers\n        # to follow the original implementation\n        if isinstance(pretrained, str):\n            from mmdet3d.utils import get_root_logger\n            logger = get_root_logger()\n            load_checkpoint(self, pretrained, strict=False, logger=logger)\n\n    @auto_fp16()\n    def forward(self, points):\n        \"\"\"Forward pass.\n\n        Args:\n            points (torch.Tensor): point coordinates with features,\n                with shape (B, N, 3 + input_feature_dim).\n\n        Returns:\n            dict[str, list[torch.Tensor]]: Outputs from multiple backbones.\n\n                - fp_xyz[suffix] (list[torch.Tensor]): The coordinates of\n                  each fp features.\n                - fp_features[suffix] (list[torch.Tensor]): The features\n                  from each Feature Propagate Layers.\n                - fp_indices[suffix] (list[torch.Tensor]): Indices of the\n                  input points.\n                - hd_feature (torch.Tensor): The aggregation feature\n                  from multiple backbones.\n        \"\"\"\n        ret = {}\n        fp_features = []\n        for ind in range(len(self.backbone_list)):\n            cur_ret = self.backbone_list[ind](points)\n            cur_suffix = self.suffixes[ind]\n            fp_features.append(cur_ret['fp_features'][-1])\n            if cur_suffix != '':\n                for k in cur_ret.keys():\n                    cur_ret[k + '_' + cur_suffix] = cur_ret.pop(k)\n            ret.update(cur_ret)\n\n        # Combine the features here\n        hd_feature = torch.cat(fp_features, dim=1)\n        hd_feature = self.aggregation_layers(hd_feature)\n        ret['hd_feature'] = hd_feature\n        return ret\n"
  },
  {
    "path": "mmdet3d/models/backbones/nostem_regnet.py",
    "content": "from mmdet.models.backbones import RegNet\nfrom ..builder import BACKBONES\n\n\n@BACKBONES.register_module()\nclass NoStemRegNet(RegNet):\n    \"\"\"RegNet backbone without Stem for 3D detection.\n\n    More details can be found in `paper <https://arxiv.org/abs/2003.13678>`_ .\n\n    Args:\n        arch (dict): The parameter of RegNets.\n            - w0 (int): Initial width.\n            - wa (float): Slope of width.\n            - wm (float): Quantization parameter to quantize the width.\n            - depth (int): Depth of the backbone.\n            - group_w (int): Width of group.\n            - bot_mul (float): Bottleneck ratio, i.e. expansion of bottlneck.\n        strides (Sequence[int]): Strides of the first block of each stage.\n        base_channels (int): Base channels after stem layer.\n        in_channels (int): Number of input image channels. Normally 3.\n        dilations (Sequence[int]): Dilation of each stage.\n        out_indices (Sequence[int]): Output from which stages.\n        style (str): `pytorch` or `caffe`. If set to \"pytorch\", the stride-two\n            layer is the 3x3 conv layer, otherwise the stride-two layer is\n            the first 1x1 conv layer.\n        frozen_stages (int): Stages to be frozen (all param fixed). -1 means\n            not freezing any parameters.\n        norm_cfg (dict): Dictionary to construct and config norm layer.\n        norm_eval (bool): Whether to set norm layers to eval mode, namely,\n            freeze running stats (mean and var). Note: Effect on Batch Norm\n            and its variants only.\n        with_cp (bool): Use checkpoint or not. Using checkpoint will save some\n            memory while slowing down the training speed.\n        zero_init_residual (bool): Whether to use zero init for last norm layer\n            in resblocks to let them behave as identity.\n\n    Example:\n        >>> from mmdet3d.models import NoStemRegNet\n        >>> import torch\n        >>> self = NoStemRegNet(\n                arch=dict(\n                    w0=88,\n                    wa=26.31,\n                    wm=2.25,\n                    group_w=48,\n                    depth=25,\n                    bot_mul=1.0))\n        >>> self.eval()\n        >>> inputs = torch.rand(1, 64, 16, 16)\n        >>> level_outputs = self.forward(inputs)\n        >>> for level_out in level_outputs:\n        ...     print(tuple(level_out.shape))\n        (1, 96, 8, 8)\n        (1, 192, 4, 4)\n        (1, 432, 2, 2)\n        (1, 1008, 1, 1)\n    \"\"\"\n\n    def __init__(self, arch, **kwargs):\n        super(NoStemRegNet, self).__init__(arch, **kwargs)\n\n    def _make_stem_layer(self, in_channels, base_channels):\n        \"\"\"Override the original function that do not initialize a stem layer\n        since 3D detector's voxel encoder works like a stem layer.\"\"\"\n        return\n\n    def forward(self, x):\n        \"\"\"Forward function of backbone.\n\n        Args:\n            x (torch.Tensor): Features in shape (N, C, H, W).\n\n        Returns:\n            tuple[torch.Tensor]: Multi-scale features.\n        \"\"\"\n        outs = []\n        for i, layer_name in enumerate(self.res_layers):\n            res_layer = getattr(self, layer_name)\n            x = res_layer(x)\n            if i in self.out_indices:\n                outs.append(x)\n        return tuple(outs)\n"
  },
  {
    "path": "mmdet3d/models/backbones/pointnet2_sa_msg.py",
    "content": "import torch\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import auto_fp16\nfrom torch import nn as nn\n\nfrom mmdet3d.ops import build_sa_module\nfrom mmdet.models import BACKBONES\nfrom .base_pointnet import BasePointNet\n\n\n@BACKBONES.register_module()\nclass PointNet2SAMSG(BasePointNet):\n    \"\"\"PointNet2 with Multi-scale grouping.\n\n    Args:\n        in_channels (int): Input channels of point cloud.\n        num_points (tuple[int]): The number of points which each SA\n            module samples.\n        radii (tuple[float]): Sampling radii of each SA module.\n        num_samples (tuple[int]): The number of samples for ball\n            query in each SA module.\n        sa_channels (tuple[tuple[int]]): Out channels of each mlp in SA module.\n        aggregation_channels (tuple[int]): Out channels of aggregation\n            multi-scale grouping features.\n        fps_mods (tuple[int]): Mod of FPS for each SA module.\n        fps_sample_range_lists (tuple[tuple[int]]): The number of sampling\n            points which each SA module samples.\n        dilated_group (tuple[bool]): Whether to use dilated ball query for\n        out_indices (Sequence[int]): Output from which stages.\n        norm_cfg (dict): Config of normalization layer.\n        sa_cfg (dict): Config of set abstraction module, which may contain\n            the following keys and values:\n\n            - pool_mod (str): Pool method ('max' or 'avg') for SA modules.\n            - use_xyz (bool): Whether to use xyz as a part of features.\n            - normalize_xyz (bool): Whether to normalize xyz with radii in\n              each SA module.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 num_points=(2048, 1024, 512, 256),\n                 radii=((0.2, 0.4, 0.8), (0.4, 0.8, 1.6), (1.6, 3.2, 4.8)),\n                 num_samples=((32, 32, 64), (32, 32, 64), (32, 32, 32)),\n                 sa_channels=(((16, 16, 32), (16, 16, 32), (32, 32, 64)),\n                              ((64, 64, 128), (64, 64, 128), (64, 96, 128)),\n                              ((128, 128, 256), (128, 192, 256), (128, 256,\n                                                                  256))),\n                 aggregation_channels=(64, 128, 256),\n                 fps_mods=(('D-FPS'), ('FS'), ('F-FPS', 'D-FPS')),\n                 fps_sample_range_lists=((-1), (-1), (512, -1)),\n                 dilated_group=(True, True, True),\n                 out_indices=(2, ),\n                 norm_cfg=dict(type='BN2d'),\n                 sa_cfg=dict(\n                     type='PointSAModuleMSG',\n                     pool_mod='max',\n                     use_xyz=True,\n                     normalize_xyz=False)):\n        super().__init__()\n        self.num_sa = len(sa_channels)\n        self.out_indices = out_indices\n        assert max(out_indices) < self.num_sa\n        assert len(num_points) == len(radii) == len(num_samples) == len(\n            sa_channels) == len(aggregation_channels)\n\n        self.SA_modules = nn.ModuleList()\n        self.aggregation_mlps = nn.ModuleList()\n        sa_in_channel = in_channels - 3  # number of channels without xyz\n        skip_channel_list = [sa_in_channel]\n\n        for sa_index in range(self.num_sa):\n            cur_sa_mlps = list(sa_channels[sa_index])\n            sa_out_channel = 0\n            for radius_index in range(len(radii[sa_index])):\n                cur_sa_mlps[radius_index] = [sa_in_channel] + list(\n                    cur_sa_mlps[radius_index])\n                sa_out_channel += cur_sa_mlps[radius_index][-1]\n\n            if isinstance(fps_mods[sa_index], tuple):\n                cur_fps_mod = list(fps_mods[sa_index])\n            else:\n                cur_fps_mod = list([fps_mods[sa_index]])\n\n            if isinstance(fps_sample_range_lists[sa_index], tuple):\n                cur_fps_sample_range_list = list(\n                    fps_sample_range_lists[sa_index])\n            else:\n                cur_fps_sample_range_list = list(\n                    [fps_sample_range_lists[sa_index]])\n\n            self.SA_modules.append(\n                build_sa_module(\n                    num_point=num_points[sa_index],\n                    radii=radii[sa_index],\n                    sample_nums=num_samples[sa_index],\n                    mlp_channels=cur_sa_mlps,\n                    fps_mod=cur_fps_mod,\n                    fps_sample_range_list=cur_fps_sample_range_list,\n                    dilated_group=dilated_group[sa_index],\n                    norm_cfg=norm_cfg,\n                    cfg=sa_cfg,\n                    bias=True))\n            skip_channel_list.append(sa_out_channel)\n            self.aggregation_mlps.append(\n                ConvModule(\n                    sa_out_channel,\n                    aggregation_channels[sa_index],\n                    conv_cfg=dict(type='Conv1d'),\n                    norm_cfg=dict(type='BN1d'),\n                    kernel_size=1,\n                    bias=True))\n            sa_in_channel = aggregation_channels[sa_index]\n\n    @auto_fp16(apply_to=('points', ))\n    def forward(self, points):\n        \"\"\"Forward pass.\n\n        Args:\n            points (torch.Tensor): point coordinates with features,\n                with shape (B, N, 3 + input_feature_dim).\n\n        Returns:\n            dict[str, torch.Tensor]: Outputs of the last SA module.\n\n                - sa_xyz (torch.Tensor): The coordinates of sa features.\n                - sa_features (torch.Tensor): The features from the\n                    last Set Aggregation Layers.\n                - sa_indices (torch.Tensor): Indices of the \\\n                    input points.\n        \"\"\"\n        xyz, features = self._split_point_feats(points)\n\n        batch, num_points = xyz.shape[:2]\n        indices = xyz.new_tensor(range(num_points)).unsqueeze(0).repeat(\n            batch, 1).long()\n\n        sa_xyz = [xyz]\n        sa_features = [features]\n        sa_indices = [indices]\n\n        out_sa_xyz = []\n        out_sa_features = []\n        out_sa_indices = []\n\n        for i in range(self.num_sa):\n            cur_xyz, cur_features, cur_indices = self.SA_modules[i](\n                sa_xyz[i], sa_features[i])\n            cur_features = self.aggregation_mlps[i](cur_features)\n            sa_xyz.append(cur_xyz)\n            sa_features.append(cur_features)\n            sa_indices.append(\n                torch.gather(sa_indices[-1], 1, cur_indices.long()))\n            if i in self.out_indices:\n                out_sa_xyz.append(sa_xyz[-1])\n                out_sa_features.append(sa_features[-1])\n                out_sa_indices.append(sa_indices[-1])\n\n        return dict(\n            sa_xyz=out_sa_xyz,\n            sa_features=out_sa_features,\n            sa_indices=out_sa_indices)\n"
  },
  {
    "path": "mmdet3d/models/backbones/pointnet2_sa_ssg.py",
    "content": "import torch\nfrom mmcv.runner import auto_fp16\nfrom torch import nn as nn\n\nfrom mmdet3d.ops import PointFPModule, build_sa_module\nfrom mmdet.models import BACKBONES\nfrom .base_pointnet import BasePointNet\n\n\n@BACKBONES.register_module()\nclass PointNet2SASSG(BasePointNet):\n    \"\"\"PointNet2 with Single-scale grouping.\n\n    Args:\n        in_channels (int): Input channels of point cloud.\n        num_points (tuple[int]): The number of points which each SA\n            module samples.\n        radius (tuple[float]): Sampling radii of each SA module.\n        num_samples (tuple[int]): The number of samples for ball\n            query in each SA module.\n        sa_channels (tuple[tuple[int]]): Out channels of each mlp in SA module.\n        fp_channels (tuple[tuple[int]]): Out channels of each mlp in FP module.\n        norm_cfg (dict): Config of normalization layer.\n        sa_cfg (dict): Config of set abstraction module, which may contain\n            the following keys and values:\n\n            - pool_mod (str): Pool method ('max' or 'avg') for SA modules.\n            - use_xyz (bool): Whether to use xyz as a part of features.\n            - normalize_xyz (bool): Whether to normalize xyz with radii in\n              each SA module.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 num_points=(2048, 1024, 512, 256),\n                 radius=(0.2, 0.4, 0.8, 1.2),\n                 num_samples=(64, 32, 16, 16),\n                 sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),\n                              (128, 128, 256)),\n                 fp_channels=((256, 256), (256, 256)),\n                 norm_cfg=dict(type='BN2d'),\n                 sa_cfg=dict(\n                     type='PointSAModule',\n                     pool_mod='max',\n                     use_xyz=True,\n                     normalize_xyz=True)):\n        super().__init__()\n        self.num_sa = len(sa_channels)\n        self.num_fp = len(fp_channels)\n\n        assert len(num_points) == len(radius) == len(num_samples) == len(\n            sa_channels)\n        assert len(sa_channels) >= len(fp_channels)\n\n        self.SA_modules = nn.ModuleList()\n        sa_in_channel = in_channels - 3  # number of channels without xyz\n        skip_channel_list = [sa_in_channel]\n\n        for sa_index in range(self.num_sa):\n            cur_sa_mlps = list(sa_channels[sa_index])\n            cur_sa_mlps = [sa_in_channel] + cur_sa_mlps\n            sa_out_channel = cur_sa_mlps[-1]\n\n            self.SA_modules.append(\n                build_sa_module(\n                    num_point=num_points[sa_index],\n                    radius=radius[sa_index],\n                    num_sample=num_samples[sa_index],\n                    mlp_channels=cur_sa_mlps,\n                    norm_cfg=norm_cfg,\n                    cfg=sa_cfg))\n            skip_channel_list.append(sa_out_channel)\n            sa_in_channel = sa_out_channel\n\n        self.FP_modules = nn.ModuleList()\n\n        fp_source_channel = skip_channel_list.pop()\n        fp_target_channel = skip_channel_list.pop()\n        for fp_index in range(len(fp_channels)):\n            cur_fp_mlps = list(fp_channels[fp_index])\n            cur_fp_mlps = [fp_source_channel + fp_target_channel] + cur_fp_mlps\n            self.FP_modules.append(PointFPModule(mlp_channels=cur_fp_mlps))\n            if fp_index != len(fp_channels) - 1:\n                fp_source_channel = cur_fp_mlps[-1]\n                fp_target_channel = skip_channel_list.pop()\n\n    @auto_fp16(apply_to=('points', ))\n    def forward(self, points):\n        \"\"\"Forward pass.\n\n        Args:\n            points (torch.Tensor): point coordinates with features,\n                with shape (B, N, 3 + input_feature_dim).\n\n        Returns:\n            dict[str, list[torch.Tensor]]: Outputs after SA and FP modules.\n\n                - fp_xyz (list[torch.Tensor]): The coordinates of \\\n                    each fp features.\n                - fp_features (list[torch.Tensor]): The features \\\n                    from each Feature Propagate Layers.\n                - fp_indices (list[torch.Tensor]): Indices of the \\\n                    input points.\n        \"\"\"\n        xyz, features = self._split_point_feats(points)\n\n        batch, num_points = xyz.shape[:2]\n        indices = xyz.new_tensor(range(num_points)).unsqueeze(0).repeat(\n            batch, 1).long()\n\n        sa_xyz = [xyz]\n        sa_features = [features]\n        sa_indices = [indices]\n\n        for i in range(self.num_sa):\n            cur_xyz, cur_features, cur_indices = self.SA_modules[i](\n                sa_xyz[i], sa_features[i])\n            sa_xyz.append(cur_xyz)\n            sa_features.append(cur_features)\n            sa_indices.append(\n                torch.gather(sa_indices[-1], 1, cur_indices.long()))\n\n        fp_xyz = [sa_xyz[-1]]\n        fp_features = [sa_features[-1]]\n        fp_indices = [sa_indices[-1]]\n\n        for i in range(self.num_fp):\n            fp_features.append(self.FP_modules[i](\n                sa_xyz[self.num_sa - i - 1], sa_xyz[self.num_sa - i],\n                sa_features[self.num_sa - i - 1], fp_features[-1]))\n            fp_xyz.append(sa_xyz[self.num_sa - i - 1])\n            fp_indices.append(sa_indices[self.num_sa - i - 1])\n\n        ret = dict(\n            fp_xyz=fp_xyz, fp_features=fp_features, fp_indices=fp_indices)\n        return ret\n"
  },
  {
    "path": "mmdet3d/models/backbones/second.py",
    "content": "from mmcv.cnn import build_conv_layer, build_norm_layer\nfrom mmcv.runner import load_checkpoint\nfrom torch import nn as nn\n\nfrom mmdet.models import BACKBONES\n\n\n@BACKBONES.register_module()\nclass SECOND(nn.Module):\n    \"\"\"Backbone network for SECOND/PointPillars/PartA2/MVXNet.\n\n    Args:\n        in_channels (int): Input channels.\n        out_channels (list[int]): Output channels for multi-scale feature maps.\n        layer_nums (list[int]): Number of layers in each stage.\n        layer_strides (list[int]): Strides of each stage.\n        norm_cfg (dict): Config dict of normalization layers.\n        conv_cfg (dict): Config dict of convolutional layers.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels=128,\n                 out_channels=[128, 128, 256],\n                 layer_nums=[3, 5, 5],\n                 layer_strides=[2, 2, 2],\n                 norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),\n                 conv_cfg=dict(type='Conv2d', bias=False)):\n        super(SECOND, self).__init__()\n        assert len(layer_strides) == len(layer_nums)\n        assert len(out_channels) == len(layer_nums)\n\n        in_filters = [in_channels, *out_channels[:-1]]\n        # note that when stride > 1, conv2d with same padding isn't\n        # equal to pad-conv2d. we should use pad-conv2d.\n        blocks = []\n        for i, layer_num in enumerate(layer_nums):\n            block = [\n                build_conv_layer(\n                    conv_cfg,\n                    in_filters[i],\n                    out_channels[i],\n                    3,\n                    stride=layer_strides[i],\n                    padding=1),\n                build_norm_layer(norm_cfg, out_channels[i])[1],\n                nn.ReLU(inplace=True),\n            ]\n            for j in range(layer_num):\n                block.append(\n                    build_conv_layer(\n                        conv_cfg,\n                        out_channels[i],\n                        out_channels[i],\n                        3,\n                        padding=1))\n                block.append(build_norm_layer(norm_cfg, out_channels[i])[1])\n                block.append(nn.ReLU(inplace=True))\n\n            block = nn.Sequential(*block)\n            blocks.append(block)\n\n        self.blocks = nn.ModuleList(blocks)\n\n    def init_weights(self, pretrained=None):\n        \"\"\"Initialize weights of the 2D backbone.\"\"\"\n        # Do not initialize the conv layers\n        # to follow the original implementation\n        if isinstance(pretrained, str):\n            from mmdet3d.utils import get_root_logger\n            logger = get_root_logger()\n            load_checkpoint(self, pretrained, strict=False, logger=logger)\n\n    def forward(self, x):\n        \"\"\"Forward function.\n\n        Args:\n            x (torch.Tensor): Input with shape (N, C, H, W).\n\n        Returns:\n            tuple[torch.Tensor]: Multi-scale features.\n        \"\"\"\n        outs = []\n        for i in range(len(self.blocks)):\n            x = self.blocks[i](x)\n            outs.append(x)\n        return tuple(outs)\n"
  },
  {
    "path": "mmdet3d/models/backbones/swin.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\r\nimport warnings\r\nfrom collections import OrderedDict\r\nfrom copy import deepcopy\r\nfrom typing import Sequence, Iterable, Optional\r\nfrom torch import Tensor\r\n\r\nimport math\r\nimport torch\r\nimport torch.nn as nn\r\nimport torch.nn.functional as F\r\nimport torch.utils.checkpoint as cp\r\nfrom mmcv.cnn import build_norm_layer, constant_init, build_conv_layer, build_activation_layer, xavier_init\r\nfrom mmcv.runner import BaseModule, _load_checkpoint\r\nfrom mmcv.utils import get_logger\r\n\r\nfrom mmdet.models.builder import BACKBONES\r\n\r\nfrom mmdet3d.models.utils.drop import build_dropout\r\nfrom mmdet3d.models.utils.transformer import FFN, to_2tuple, ModuleList\r\nfrom mmdet.utils import get_root_logger\r\n\r\ndef _no_grad_trunc_normal_(tensor: Tensor, mean: float, std: float, a: float,\r\n                           b: float) -> Tensor:\r\n    # Method based on\r\n    # https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf\r\n    # Modified from\r\n    # https://github.com/pytorch/pytorch/blob/master/torch/nn/init.py\r\n    def norm_cdf(x):\r\n        # Computes standard normal cumulative distribution function\r\n        return (1. + math.erf(x / math.sqrt(2.))) / 2.\r\n\r\n    if (mean < a - 2 * std) or (mean > b + 2 * std):\r\n        warnings.warn(\r\n            'mean is more than 2 std from [a, b] in nn.init.trunc_normal_. '\r\n            'The distribution of values may be incorrect.',\r\n            stacklevel=2)\r\n\r\n    with torch.no_grad():\r\n        # Values are generated by using a truncated uniform distribution and\r\n        # then using the inverse CDF for the normal distribution.\r\n        # Get upper and lower cdf values\r\n        lower = norm_cdf((a - mean) / std)\r\n        upper = norm_cdf((b - mean) / std)\r\n\r\n        # Uniformly fill tensor with values from [lower, upper], then translate\r\n        # to [2lower-1, 2upper-1].\r\n        tensor.uniform_(2 * lower - 1, 2 * upper - 1)\r\n\r\n        # Use inverse cdf transform for normal distribution to get truncated\r\n        # standard normal\r\n        tensor.erfinv_()\r\n\r\n        # Transform to proper mean, std\r\n        tensor.mul_(std * math.sqrt(2.))\r\n        tensor.add_(mean)\r\n\r\n        # Clamp to ensure it's in the proper range\r\n        tensor.clamp_(min=a, max=b)\r\n        return tensor\r\n\r\n\r\ndef trunc_normal_(tensor: Tensor,\r\n                  mean: float = 0.,\r\n                  std: float = 1.,\r\n                  a: float = -2.,\r\n                  b: float = 2.) -> Tensor:\r\n    r\"\"\"Fills the input Tensor with values drawn from a truncated normal\r\n    distribution. The values are effectively drawn from the normal distribution\r\n    :math:`\\mathcal{N}(\\text{mean}, \\text{std}^2)` with values outside\r\n    :math:`[a, b]` redrawn until they are within the bounds. The method used\r\n    for generating the random values works best when :math:`a \\leq \\text{mean}\r\n    \\leq b`.\r\n    Modified from\r\n    https://github.com/pytorch/pytorch/blob/master/torch/nn/init.py\r\n    Args:\r\n        tensor (``torch.Tensor``): an n-dimensional `torch.Tensor`.\r\n        mean (float): the mean of the normal distribution.\r\n        std (float): the standard deviation of the normal distribution.\r\n        a (float): the minimum cutoff value.\r\n        b (float): the maximum cutoff value.\r\n    \"\"\"\r\n    return _no_grad_trunc_normal_(tensor, mean, std, a, b)\r\n\r\ndef trunc_normal_init(module: nn.Module,\r\n                      mean: float = 0,\r\n                      std: float = 1,\r\n                      a: float = -2,\r\n                      b: float = 2,\r\n                      bias: float = 0) -> None:\r\n    if hasattr(module, 'weight') and module.weight is not None:\r\n        trunc_normal_(module.weight, mean, std, a, b)  # type: ignore\r\n    if hasattr(module, 'bias') and module.bias is not None:\r\n        nn.init.constant_(module.bias, bias)  # type: ignore\r\n\r\n\r\nclass AdaptivePadding(nn.Module):\r\n    \"\"\"Applies padding to input (if needed) so that input can get fully covered\r\n    by filter you specified. It support two modes \"same\" and \"corner\". The\r\n    \"same\" mode is same with \"SAME\" padding mode in TensorFlow, pad zero around\r\n    input. The \"corner\"  mode would pad zero to bottom right.\r\n    Args:\r\n        kernel_size (int | tuple): Size of the kernel:\r\n        stride (int | tuple): Stride of the filter. Default: 1:\r\n        dilation (int | tuple): Spacing between kernel elements.\r\n            Default: 1\r\n        padding (str): Support \"same\" and \"corner\", \"corner\" mode\r\n            would pad zero to bottom right, and \"same\" mode would\r\n            pad zero around input. Default: \"corner\".\r\n    Example:\r\n        >>> kernel_size = 16\r\n        >>> stride = 16\r\n        >>> dilation = 1\r\n        >>> input = torch.rand(1, 1, 15, 17)\r\n        >>> adap_pad = AdaptivePadding(\r\n        >>>     kernel_size=kernel_size,\r\n        >>>     stride=stride,\r\n        >>>     dilation=dilation,\r\n        >>>     padding=\"corner\")\r\n        >>> out = adap_pad(input)\r\n        >>> assert (out.shape[2], out.shape[3]) == (16, 32)\r\n        >>> input = torch.rand(1, 1, 16, 17)\r\n        >>> out = adap_pad(input)\r\n        >>> assert (out.shape[2], out.shape[3]) == (16, 32)\r\n    \"\"\"\r\n\r\n    def __init__(self, kernel_size=1, stride=1, dilation=1, padding='corner'):\r\n\r\n        super(AdaptivePadding, self).__init__()\r\n\r\n        assert padding in ('same', 'corner')\r\n\r\n        kernel_size = to_2tuple(kernel_size)\r\n        stride = to_2tuple(stride)\r\n        padding = to_2tuple(padding)\r\n        dilation = to_2tuple(dilation)\r\n\r\n        self.padding = padding\r\n        self.kernel_size = kernel_size\r\n        self.stride = stride\r\n        self.dilation = dilation\r\n\r\n    def get_pad_shape(self, input_shape):\r\n        input_h, input_w = input_shape\r\n        kernel_h, kernel_w = self.kernel_size\r\n        stride_h, stride_w = self.stride\r\n        output_h = math.ceil(input_h / stride_h)\r\n        output_w = math.ceil(input_w / stride_w)\r\n        pad_h = max((output_h - 1) * stride_h +\r\n                    (kernel_h - 1) * self.dilation[0] + 1 - input_h, 0)\r\n        pad_w = max((output_w - 1) * stride_w +\r\n                    (kernel_w - 1) * self.dilation[1] + 1 - input_w, 0)\r\n        return pad_h, pad_w\r\n\r\n    def forward(self, x):\r\n        pad_h, pad_w = self.get_pad_shape(x.size()[-2:])\r\n        if pad_h > 0 or pad_w > 0:\r\n            if self.padding == 'corner':\r\n                x = F.pad(x, [0, pad_w, 0, pad_h])\r\n            elif self.padding == 'same':\r\n                x = F.pad(x, [\r\n                    pad_w // 2, pad_w - pad_w // 2, pad_h // 2,\r\n                    pad_h - pad_h // 2\r\n                ])\r\n        return x\r\n\r\n\r\nclass PatchEmbed(BaseModule):\r\n    \"\"\"Image to Patch Embedding.\r\n    We use a conv layer to implement PatchEmbed.\r\n    Args:\r\n        in_channels (int): The num of input channels. Default: 3\r\n        embed_dims (int): The dimensions of embedding. Default: 768\r\n        conv_type (str): The config dict for embedding\r\n            conv layer type selection. Default: \"Conv2d.\r\n        kernel_size (int): The kernel_size of embedding conv. Default: 16.\r\n        stride (int): The slide stride of embedding conv.\r\n            Default: None (Would be set as `kernel_size`).\r\n        padding (int | tuple | string ): The padding length of\r\n            embedding conv. When it is a string, it means the mode\r\n            of adaptive padding, support \"same\" and \"corner\" now.\r\n            Default: \"corner\".\r\n        dilation (int): The dilation rate of embedding conv. Default: 1.\r\n        bias (bool): Bias of embed conv. Default: True.\r\n        norm_cfg (dict, optional): Config dict for normalization layer.\r\n            Default: None.\r\n        input_size (int | tuple | None): The size of input, which will be\r\n            used to calculate the out size. Only work when `dynamic_size`\r\n            is False. Default: None.\r\n        init_cfg (`mmcv.ConfigDict`, optional): The Config for initialization.\r\n            Default: None.\r\n    \"\"\"\r\n\r\n    def __init__(\r\n        self,\r\n        in_channels=3,\r\n        embed_dims=768,\r\n        conv_type='Conv2d',\r\n        kernel_size=16,\r\n        stride=16,\r\n        padding='corner',\r\n        dilation=1,\r\n        bias=True,\r\n        norm_cfg=None,\r\n        input_size=None,\r\n        init_cfg=None,\r\n    ):\r\n        super(PatchEmbed, self).__init__(init_cfg=init_cfg)\r\n\r\n        self.embed_dims = embed_dims\r\n        if stride is None:\r\n            stride = kernel_size\r\n\r\n        kernel_size = to_2tuple(kernel_size)\r\n        stride = to_2tuple(stride)\r\n        dilation = to_2tuple(dilation)\r\n\r\n        if isinstance(padding, str):\r\n            self.adap_padding = AdaptivePadding(\r\n                kernel_size=kernel_size,\r\n                stride=stride,\r\n                dilation=dilation,\r\n                padding=padding)\r\n            # disable the padding of conv\r\n            padding = 0\r\n        else:\r\n            self.adap_padding = None\r\n        padding = to_2tuple(padding)\r\n\r\n        self.projection = build_conv_layer(\r\n            dict(type=conv_type),\r\n            in_channels=in_channels,\r\n            out_channels=embed_dims,\r\n            kernel_size=kernel_size,\r\n            stride=stride,\r\n            padding=padding,\r\n            dilation=dilation,\r\n            bias=bias)\r\n\r\n        if norm_cfg is not None:\r\n            self.norm = build_norm_layer(norm_cfg, embed_dims)[1]\r\n        else:\r\n            self.norm = None\r\n\r\n        if input_size:\r\n            input_size = to_2tuple(input_size)\r\n            # `init_out_size` would be used outside to\r\n            # calculate the num_patches\r\n            # when `use_abs_pos_embed` outside\r\n            self.init_input_size = input_size\r\n            if self.adap_padding:\r\n                pad_h, pad_w = self.adap_padding.get_pad_shape(input_size)\r\n                input_h, input_w = input_size\r\n                input_h = input_h + pad_h\r\n                input_w = input_w + pad_w\r\n                input_size = (input_h, input_w)\r\n\r\n            # https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html\r\n            h_out = (input_size[0] + 2 * padding[0] - dilation[0] *\r\n                     (kernel_size[0] - 1) - 1) // stride[0] + 1\r\n            w_out = (input_size[1] + 2 * padding[1] - dilation[1] *\r\n                     (kernel_size[1] - 1) - 1) // stride[1] + 1\r\n            self.init_out_size = (h_out, w_out)\r\n        else:\r\n            self.init_input_size = None\r\n            self.init_out_size = None\r\n\r\n    def forward(self, x):\r\n        \"\"\"\r\n        Args:\r\n            x (Tensor): Has shape (B, C, H, W). In most case, C is 3.\r\n        Returns:\r\n            tuple: Contains merged results and its spatial shape.\r\n                - x (Tensor): Has shape (B, out_h * out_w, embed_dims)\r\n                - out_size (tuple[int]): Spatial shape of x, arrange as\r\n                    (out_h, out_w).\r\n        \"\"\"\r\n\r\n        if self.adap_padding:\r\n            x = self.adap_padding(x)\r\n\r\n        x = self.projection(x)\r\n        out_size = (x.shape[2], x.shape[3])\r\n        x = x.flatten(2).transpose(1, 2)\r\n        if self.norm is not None:\r\n            x = self.norm(x)\r\n        return x, out_size\r\n\r\nclass PatchMerging(BaseModule):\r\n    \"\"\"Merge patch feature map.\r\n    This layer groups feature map by kernel_size, and applies norm and linear\r\n    layers to the grouped feature map. Our implementation uses `nn.Unfold` to\r\n    merge patch, which is about 25% faster than original implementation.\r\n    Instead, we need to modify pretrained models for compatibility.\r\n    Args:\r\n        in_channels (int): The num of input channels.\r\n            to gets fully covered by filter and stride you specified..\r\n            Default: True.\r\n        out_channels (int): The num of output channels.\r\n        kernel_size (int | tuple, optional): the kernel size in the unfold\r\n            layer. Defaults to 2.\r\n        stride (int | tuple, optional): the stride of the sliding blocks in the\r\n            unfold layer. Default: None. (Would be set as `kernel_size`)\r\n        padding (int | tuple | string ): The padding length of\r\n            embedding conv. When it is a string, it means the mode\r\n            of adaptive padding, support \"same\" and \"corner\" now.\r\n            Default: \"corner\".\r\n        dilation (int | tuple, optional): dilation parameter in the unfold\r\n            layer. Default: 1.\r\n        bias (bool, optional): Whether to add bias in linear layer or not.\r\n            Defaults: False.\r\n        norm_cfg (dict, optional): Config dict for normalization layer.\r\n            Default: dict(type='LN').\r\n        init_cfg (dict, optional): The extra config for initialization.\r\n            Default: None.\r\n    \"\"\"\r\n\r\n    def __init__(self,\r\n                 in_channels,\r\n                 out_channels,\r\n                 kernel_size=2,\r\n                 stride=None,\r\n                 padding='corner',\r\n                 dilation=1,\r\n                 bias=False,\r\n                 norm_cfg=dict(type='LN'),\r\n                 init_cfg=None):\r\n        super().__init__(init_cfg=init_cfg)\r\n        self.in_channels = in_channels\r\n        self.out_channels = out_channels\r\n        if stride:\r\n            stride = stride\r\n        else:\r\n            stride = kernel_size\r\n\r\n        kernel_size = to_2tuple(kernel_size)\r\n        stride = to_2tuple(stride)\r\n        dilation = to_2tuple(dilation)\r\n\r\n        if isinstance(padding, str):\r\n            self.adap_padding = AdaptivePadding(\r\n                kernel_size=kernel_size,\r\n                stride=stride,\r\n                dilation=dilation,\r\n                padding=padding)\r\n            # disable the padding of unfold\r\n            padding = 0\r\n        else:\r\n            self.adap_padding = None\r\n\r\n        padding = to_2tuple(padding)\r\n        self.sampler = nn.Unfold(\r\n            kernel_size=kernel_size,\r\n            dilation=dilation,\r\n            padding=padding,\r\n            stride=stride)\r\n\r\n        sample_dim = kernel_size[0] * kernel_size[1] * in_channels\r\n\r\n        if norm_cfg is not None:\r\n            self.norm = build_norm_layer(norm_cfg, sample_dim)[1]\r\n        else:\r\n            self.norm = None\r\n\r\n        self.reduction = nn.Linear(sample_dim, out_channels, bias=bias)\r\n\r\n    def forward(self, x, input_size):\r\n        \"\"\"\r\n        Args:\r\n            x (Tensor): Has shape (B, H*W, C_in).\r\n            input_size (tuple[int]): The spatial shape of x, arrange as (H, W).\r\n                Default: None.\r\n        Returns:\r\n            tuple: Contains merged results and its spatial shape.\r\n                - x (Tensor): Has shape (B, Merged_H * Merged_W, C_out)\r\n                - out_size (tuple[int]): Spatial shape of x, arrange as\r\n                    (Merged_H, Merged_W).\r\n        \"\"\"\r\n        B, L, C = x.shape\r\n        assert isinstance(input_size, Sequence), f'Expect ' \\\r\n                                                 f'input_size is ' \\\r\n                                                 f'`Sequence` ' \\\r\n                                                 f'but get {input_size}'\r\n\r\n        H, W = input_size\r\n        assert L == H * W, 'input feature has wrong size'\r\n\r\n        x = x.view(B, H, W, C).permute([0, 3, 1, 2])  # B, C, H, W\r\n        # Use nn.Unfold to merge patch. About 25% faster than original method,\r\n        # but need to modify pretrained model for compatibility\r\n\r\n        if self.adap_padding:\r\n            x = self.adap_padding(x)\r\n            H, W = x.shape[-2:]\r\n\r\n        x = self.sampler(x)\r\n        # if kernel_size=2 and stride=2, x should has shape (B, 4*C, H/2*W/2)\r\n\r\n        out_h = (H + 2 * self.sampler.padding[0] - self.sampler.dilation[0] *\r\n                 (self.sampler.kernel_size[0] - 1) -\r\n                 1) // self.sampler.stride[0] + 1\r\n        out_w = (W + 2 * self.sampler.padding[1] - self.sampler.dilation[1] *\r\n                 (self.sampler.kernel_size[1] - 1) -\r\n                 1) // self.sampler.stride[1] + 1\r\n\r\n        output_size = (out_h, out_w)\r\n        x = x.transpose(1, 2)  # B, H/2*W/2, 4*C\r\n        x = self.norm(x) if self.norm else x\r\n        x = self.reduction(x)\r\n        return x, output_size\r\n\r\ndef swin_converter(ckpt):\r\n\r\n    new_ckpt = OrderedDict()\r\n\r\n    def correct_unfold_reduction_order(x):\r\n        out_channel, in_channel = x.shape\r\n        x = x.reshape(out_channel, 4, in_channel // 4)\r\n        x = x[:, [0, 2, 1, 3], :].transpose(1,\r\n                                            2).reshape(out_channel, in_channel)\r\n        return x\r\n\r\n    def correct_unfold_norm_order(x):\r\n        in_channel = x.shape[0]\r\n        x = x.reshape(4, in_channel // 4)\r\n        x = x[[0, 2, 1, 3], :].transpose(0, 1).reshape(in_channel)\r\n        return x\r\n\r\n    for k, v in ckpt.items():\r\n        if k.startswith('head'):\r\n            continue\r\n        elif k.startswith('layers'):\r\n            new_v = v\r\n            if 'attn.' in k:\r\n                new_k = k.replace('attn.', 'attn.w_msa.')\r\n            elif 'mlp.' in k:\r\n                if 'mlp.fc1.' in k:\r\n                    new_k = k.replace('mlp.fc1.', 'ffn.layers.0.0.')\r\n                elif 'mlp.fc2.' in k:\r\n                    new_k = k.replace('mlp.fc2.', 'ffn.layers.1.')\r\n                else:\r\n                    new_k = k.replace('mlp.', 'ffn.')\r\n            elif 'downsample' in k:\r\n                new_k = k\r\n                if 'reduction.' in k:\r\n                    new_v = correct_unfold_reduction_order(v)\r\n                elif 'norm.' in k:\r\n                    new_v = correct_unfold_norm_order(v)\r\n            else:\r\n                new_k = k\r\n            new_k = new_k.replace('layers', 'stages', 1)\r\n        elif k.startswith('patch_embed'):\r\n            new_v = v\r\n            if 'proj' in k:\r\n                new_k = k.replace('proj', 'projection')\r\n            else:\r\n                new_k = k\r\n        else:\r\n            new_v = v\r\n            new_k = k\r\n\r\n        new_ckpt['backbone.' + new_k] = new_v\r\n\r\n    return new_ckpt\r\n\r\n\r\nclass WindowMSA(BaseModule):\r\n    \"\"\"Window based multi-head self-attention (W-MSA) module with relative\r\n    position bias.\r\n\r\n    Args:\r\n        embed_dims (int): Number of input channels.\r\n        num_heads (int): Number of attention heads.\r\n        window_size (tuple[int]): The height and width of the window.\r\n        qkv_bias (bool, optional):  If True, add a learnable bias to q, k, v.\r\n            Default: True.\r\n        qk_scale (float | None, optional): Override default qk scale of\r\n            head_dim ** -0.5 if set. Default: None.\r\n        attn_drop_rate (float, optional): Dropout ratio of attention weight.\r\n            Default: 0.0\r\n        proj_drop_rate (float, optional): Dropout ratio of output. Default: 0.\r\n        init_cfg (dict | None, optional): The Config for initialization.\r\n            Default: None.\r\n    \"\"\"\r\n\r\n    def __init__(self,\r\n                 embed_dims,\r\n                 num_heads,\r\n                 window_size,\r\n                 qkv_bias=True,\r\n                 qk_scale=None,\r\n                 attn_drop_rate=0.,\r\n                 proj_drop_rate=0.,\r\n                 init_cfg=None):\r\n\r\n        super().__init__()\r\n        self.embed_dims = embed_dims\r\n        self.window_size = window_size  # Wh, Ww\r\n        self.num_heads = num_heads\r\n        head_embed_dims = embed_dims // num_heads\r\n        self.scale = qk_scale or head_embed_dims**-0.5\r\n        self.init_cfg = init_cfg\r\n\r\n        # define a parameter table of relative position bias\r\n        self.relative_position_bias_table = nn.Parameter(\r\n            torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1),\r\n                        num_heads))  # 2*Wh-1 * 2*Ww-1, nH\r\n\r\n        # About 2x faster than original impl\r\n        Wh, Ww = self.window_size\r\n        rel_index_coords = self.double_step_seq(2 * Ww - 1, Wh, 1, Ww)\r\n        rel_position_index = rel_index_coords + rel_index_coords.T\r\n        rel_position_index = rel_position_index.flip(1).contiguous()\r\n        self.register_buffer('relative_position_index', rel_position_index)\r\n\r\n        self.qkv = nn.Linear(embed_dims, embed_dims * 3, bias=qkv_bias)\r\n        self.attn_drop = nn.Dropout(attn_drop_rate)\r\n        self.proj = nn.Linear(embed_dims, embed_dims)\r\n        self.proj_drop = nn.Dropout(proj_drop_rate)\r\n\r\n        self.softmax = nn.Softmax(dim=-1)\r\n\r\n    def init_weights(self):\r\n        trunc_normal_(self.relative_position_bias_table, std=0.02)\r\n\r\n    def forward(self, x, mask=None):\r\n        \"\"\"\r\n        Args:\r\n\r\n            x (tensor): input features with shape of (num_windows*B, N, C)\r\n            mask (tensor | None, Optional): mask with shape of (num_windows,\r\n                Wh*Ww, Wh*Ww), value should be between (-inf, 0].\r\n        \"\"\"\r\n        B, N, C = x.shape\r\n        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads,\r\n                                  C // self.num_heads).permute(2, 0, 3, 1, 4)\r\n        # make torchscript happy (cannot use tensor as tuple)\r\n        q, k, v = qkv[0], qkv[1], qkv[2]\r\n\r\n        q = q * self.scale\r\n        attn = (q @ k.transpose(-2, -1))\r\n\r\n        relative_position_bias = self.relative_position_bias_table[\r\n            self.relative_position_index.view(-1)].view(\r\n                self.window_size[0] * self.window_size[1],\r\n                self.window_size[0] * self.window_size[1],\r\n                -1)  # Wh*Ww,Wh*Ww,nH\r\n        relative_position_bias = relative_position_bias.permute(\r\n            2, 0, 1).contiguous()  # nH, Wh*Ww, Wh*Ww\r\n        attn = attn + relative_position_bias.unsqueeze(0)\r\n\r\n        if mask is not None:\r\n            nW = mask.shape[0]\r\n            attn = attn.view(B // nW, nW, self.num_heads, N,\r\n                             N) + mask.unsqueeze(1).unsqueeze(0)\r\n            attn = attn.view(-1, self.num_heads, N, N)\r\n        attn = self.softmax(attn)\r\n\r\n        attn = self.attn_drop(attn)\r\n\r\n        x = (attn @ v).transpose(1, 2).reshape(B, N, C)\r\n        x = self.proj(x)\r\n        x = self.proj_drop(x)\r\n        return x\r\n\r\n    @staticmethod\r\n    def double_step_seq(step1, len1, step2, len2):\r\n        seq1 = torch.arange(0, step1 * len1, step1)\r\n        seq2 = torch.arange(0, step2 * len2, step2)\r\n        return (seq1[:, None] + seq2[None, :]).reshape(1, -1)\r\n\r\n\r\nclass ShiftWindowMSA(BaseModule):\r\n    \"\"\"Shifted Window Multihead Self-Attention Module.\r\n\r\n    Args:\r\n        embed_dims (int): Number of input channels.\r\n        num_heads (int): Number of attention heads.\r\n        window_size (int): The height and width of the window.\r\n        shift_size (int, optional): The shift step of each window towards\r\n            right-bottom. If zero, act as regular window-msa. Defaults to 0.\r\n        qkv_bias (bool, optional): If True, add a learnable bias to q, k, v.\r\n            Default: True\r\n        qk_scale (float | None, optional): Override default qk scale of\r\n            head_dim ** -0.5 if set. Defaults: None.\r\n        attn_drop_rate (float, optional): Dropout ratio of attention weight.\r\n            Defaults: 0.\r\n        proj_drop_rate (float, optional): Dropout ratio of output.\r\n            Defaults: 0.\r\n        dropout_layer (dict, optional): The dropout_layer used before output.\r\n            Defaults: dict(type='DropPath', drop_prob=0.).\r\n        init_cfg (dict, optional): The extra config for initialization.\r\n            Default: None.\r\n    \"\"\"\r\n\r\n    def __init__(self,\r\n                 embed_dims,\r\n                 num_heads,\r\n                 window_size,\r\n                 shift_size=0,\r\n                 qkv_bias=True,\r\n                 qk_scale=None,\r\n                 attn_drop_rate=0,\r\n                 proj_drop_rate=0,\r\n                 dropout_layer=dict(type='DropPath', drop_prob=0.),\r\n                 init_cfg=None):\r\n        super().__init__(init_cfg)\r\n\r\n        self.window_size = window_size\r\n        self.shift_size = shift_size\r\n        assert 0 <= self.shift_size < self.window_size\r\n\r\n        self.w_msa = WindowMSA(\r\n            embed_dims=embed_dims,\r\n            num_heads=num_heads,\r\n            window_size=to_2tuple(window_size),\r\n            qkv_bias=qkv_bias,\r\n            qk_scale=qk_scale,\r\n            attn_drop_rate=attn_drop_rate,\r\n            proj_drop_rate=proj_drop_rate,\r\n            init_cfg=None)\r\n\r\n        self.drop = build_dropout(dropout_layer)\r\n\r\n    def forward(self, query, hw_shape):\r\n        B, L, C = query.shape\r\n        H, W = hw_shape\r\n        assert L == H * W, 'input feature has wrong size'\r\n        query = query.view(B, H, W, C)\r\n\r\n        # pad feature maps to multiples of window size\r\n        pad_r = (self.window_size - W % self.window_size) % self.window_size\r\n        pad_b = (self.window_size - H % self.window_size) % self.window_size\r\n        query = F.pad(query, (0, 0, 0, pad_r, 0, pad_b))\r\n        H_pad, W_pad = query.shape[1], query.shape[2]\r\n\r\n        # cyclic shift\r\n        if self.shift_size > 0:\r\n            shifted_query = torch.roll(\r\n                query,\r\n                shifts=(-self.shift_size, -self.shift_size),\r\n                dims=(1, 2))\r\n\r\n            # calculate attention mask for SW-MSA\r\n            img_mask = torch.zeros((1, H_pad, W_pad, 1), device=query.device)\r\n            h_slices = (slice(0, -self.window_size),\r\n                        slice(-self.window_size,\r\n                              -self.shift_size), slice(-self.shift_size, None))\r\n            w_slices = (slice(0, -self.window_size),\r\n                        slice(-self.window_size,\r\n                              -self.shift_size), slice(-self.shift_size, None))\r\n            cnt = 0\r\n            for h in h_slices:\r\n                for w in w_slices:\r\n                    img_mask[:, h, w, :] = cnt\r\n                    cnt += 1\r\n\r\n            # nW, window_size, window_size, 1\r\n            mask_windows = self.window_partition(img_mask)\r\n            mask_windows = mask_windows.view(\r\n                -1, self.window_size * self.window_size)\r\n            attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)\r\n            attn_mask = attn_mask.masked_fill(attn_mask != 0,\r\n                                              float(-100.0)).masked_fill(\r\n                                                  attn_mask == 0, float(0.0))\r\n        else:\r\n            shifted_query = query\r\n            attn_mask = None\r\n\r\n        # nW*B, window_size, window_size, C\r\n        query_windows = self.window_partition(shifted_query)\r\n        # nW*B, window_size*window_size, C\r\n        query_windows = query_windows.view(-1, self.window_size**2, C)\r\n\r\n        # W-MSA/SW-MSA (nW*B, window_size*window_size, C)\r\n        attn_windows = self.w_msa(query_windows, mask=attn_mask)\r\n\r\n        # merge windows\r\n        attn_windows = attn_windows.view(-1, self.window_size,\r\n                                         self.window_size, C)\r\n\r\n        # B H' W' C\r\n        shifted_x = self.window_reverse(attn_windows, H_pad, W_pad)\r\n        # reverse cyclic shift\r\n        if self.shift_size > 0:\r\n            x = torch.roll(\r\n                shifted_x,\r\n                shifts=(self.shift_size, self.shift_size),\r\n                dims=(1, 2))\r\n        else:\r\n            x = shifted_x\r\n\r\n        if pad_r > 0 or pad_b:\r\n            x = x[:, :H, :W, :].contiguous()\r\n\r\n        x = x.view(B, H * W, C)\r\n\r\n        x = self.drop(x)\r\n        return x\r\n\r\n    def window_reverse(self, windows, H, W):\r\n        \"\"\"\r\n        Args:\r\n            windows: (num_windows*B, window_size, window_size, C)\r\n            H (int): Height of image\r\n            W (int): Width of image\r\n        Returns:\r\n            x: (B, H, W, C)\r\n        \"\"\"\r\n        window_size = self.window_size\r\n        B = int(windows.shape[0] / (H * W / window_size / window_size))\r\n        x = windows.view(B, H // window_size, W // window_size, window_size,\r\n                         window_size, -1)\r\n        x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)\r\n        return x\r\n\r\n    def window_partition(self, x):\r\n        \"\"\"\r\n        Args:\r\n            x: (B, H, W, C)\r\n        Returns:\r\n            windows: (num_windows*B, window_size, window_size, C)\r\n        \"\"\"\r\n        B, H, W, C = x.shape\r\n        window_size = self.window_size\r\n        x = x.view(B, H // window_size, window_size, W // window_size,\r\n                   window_size, C)\r\n        windows = x.permute(0, 1, 3, 2, 4, 5).contiguous()\r\n        windows = windows.view(-1, window_size, window_size, C)\r\n        return windows\r\n\r\n\r\nclass SwinBlock(BaseModule):\r\n    \"\"\"\"\r\n    Args:\r\n        embed_dims (int): The feature dimension.\r\n        num_heads (int): Parallel attention heads.\r\n        feedforward_channels (int): The hidden dimension for FFNs.\r\n        window_size (int, optional): The local window scale. Default: 7.\r\n        shift (bool, optional): whether to shift window or not. Default False.\r\n        qkv_bias (bool, optional): enable bias for qkv if True. Default: True.\r\n        qk_scale (float | None, optional): Override default qk scale of\r\n            head_dim ** -0.5 if set. Default: None.\r\n        drop_rate (float, optional): Dropout rate. Default: 0.\r\n        attn_drop_rate (float, optional): Attention dropout rate. Default: 0.\r\n        drop_path_rate (float, optional): Stochastic depth rate. Default: 0.\r\n        act_cfg (dict, optional): The config dict of activation function.\r\n            Default: dict(type='GELU').\r\n        norm_cfg (dict, optional): The config dict of normalization.\r\n            Default: dict(type='LN').\r\n        with_cp (bool, optional): Use checkpoint or not. Using checkpoint\r\n            will save some memory while slowing down the training speed.\r\n            Default: False.\r\n        init_cfg (dict | list | None, optional): The init config.\r\n            Default: None.\r\n    \"\"\"\r\n\r\n    def __init__(self,\r\n                 embed_dims,\r\n                 num_heads,\r\n                 feedforward_channels,\r\n                 window_size=7,\r\n                 shift=False,\r\n                 qkv_bias=True,\r\n                 qk_scale=None,\r\n                 drop_rate=0.,\r\n                 attn_drop_rate=0.,\r\n                 drop_path_rate=0.,\r\n                 act_cfg=dict(type='GELU'),\r\n                 norm_cfg=dict(type='LN'),\r\n                 with_cp=False,\r\n                 init_cfg=None):\r\n\r\n        super(SwinBlock, self).__init__()\r\n\r\n        self.init_cfg = init_cfg\r\n        self.with_cp = with_cp\r\n\r\n        self.norm1 = build_norm_layer(norm_cfg, embed_dims)[1]\r\n        self.attn = ShiftWindowMSA(\r\n            embed_dims=embed_dims,\r\n            num_heads=num_heads,\r\n            window_size=window_size,\r\n            shift_size=window_size // 2 if shift else 0,\r\n            qkv_bias=qkv_bias,\r\n            qk_scale=qk_scale,\r\n            attn_drop_rate=attn_drop_rate,\r\n            proj_drop_rate=drop_rate,\r\n            dropout_layer=dict(type='DropPath', drop_prob=drop_path_rate),\r\n            init_cfg=None)\r\n\r\n        self.norm2 = build_norm_layer(norm_cfg, embed_dims)[1]\r\n        self.ffn = FFN(\r\n            embed_dims=embed_dims,\r\n            feedforward_channels=feedforward_channels,\r\n            num_fcs=2,\r\n            ffn_drop=drop_rate,\r\n            dropout_layer=dict(type='DropPath', drop_prob=drop_path_rate),\r\n            act_cfg=act_cfg,\r\n            add_identity=True,\r\n            init_cfg=None)\r\n\r\n    def forward(self, x, hw_shape):\r\n\r\n        def _inner_forward(x):\r\n            identity = x\r\n            x = self.norm1(x)\r\n            x = self.attn(x, hw_shape)\r\n\r\n            x = x + identity\r\n\r\n            identity = x\r\n            x = self.norm2(x)\r\n            x = self.ffn(x, identity=identity)\r\n\r\n            return x\r\n\r\n        if self.with_cp and x.requires_grad:\r\n            x = cp.checkpoint(_inner_forward, x)\r\n        else:\r\n            x = _inner_forward(x)\r\n\r\n        return x\r\n\r\n\r\nclass SwinBlockSequence(BaseModule):\r\n    \"\"\"Implements one stage in Swin Transformer.\r\n\r\n    Args:\r\n        embed_dims (int): The feature dimension.\r\n        num_heads (int): Parallel attention heads.\r\n        feedforward_channels (int): The hidden dimension for FFNs.\r\n        depth (int): The number of blocks in this stage.\r\n        window_size (int, optional): The local window scale. Default: 7.\r\n        qkv_bias (bool, optional): enable bias for qkv if True. Default: True.\r\n        qk_scale (float | None, optional): Override default qk scale of\r\n            head_dim ** -0.5 if set. Default: None.\r\n        drop_rate (float, optional): Dropout rate. Default: 0.\r\n        attn_drop_rate (float, optional): Attention dropout rate. Default: 0.\r\n        drop_path_rate (float | list[float], optional): Stochastic depth\r\n            rate. Default: 0.\r\n        downsample (BaseModule | None, optional): The downsample operation\r\n            module. Default: None.\r\n        act_cfg (dict, optional): The config dict of activation function.\r\n            Default: dict(type='GELU').\r\n        norm_cfg (dict, optional): The config dict of normalization.\r\n            Default: dict(type='LN').\r\n        with_cp (bool, optional): Use checkpoint or not. Using checkpoint\r\n            will save some memory while slowing down the training speed.\r\n            Default: False.\r\n        init_cfg (dict | list | None, optional): The init config.\r\n            Default: None.\r\n    \"\"\"\r\n\r\n    def __init__(self,\r\n                 embed_dims,\r\n                 num_heads,\r\n                 feedforward_channels,\r\n                 depth,\r\n                 window_size=7,\r\n                 qkv_bias=True,\r\n                 qk_scale=None,\r\n                 drop_rate=0.,\r\n                 attn_drop_rate=0.,\r\n                 drop_path_rate=0.,\r\n                 downsample=None,\r\n                 act_cfg=dict(type='GELU'),\r\n                 norm_cfg=dict(type='LN'),\r\n                 with_cp=False,\r\n                 init_cfg=None):\r\n        super().__init__(init_cfg=init_cfg)\r\n\r\n        if isinstance(drop_path_rate, list):\r\n            drop_path_rates = drop_path_rate\r\n            assert len(drop_path_rates) == depth\r\n        else:\r\n            drop_path_rates = [deepcopy(drop_path_rate) for _ in range(depth)]\r\n\r\n        self.blocks = ModuleList()\r\n        for i in range(depth):\r\n            block = SwinBlock(\r\n                embed_dims=embed_dims,\r\n                num_heads=num_heads,\r\n                feedforward_channels=feedforward_channels,\r\n                window_size=window_size,\r\n                shift=False if i % 2 == 0 else True,\r\n                qkv_bias=qkv_bias,\r\n                qk_scale=qk_scale,\r\n                drop_rate=drop_rate,\r\n                attn_drop_rate=attn_drop_rate,\r\n                drop_path_rate=drop_path_rates[i],\r\n                act_cfg=act_cfg,\r\n                norm_cfg=norm_cfg,\r\n                with_cp=with_cp,\r\n                init_cfg=None)\r\n            self.blocks.append(block)\r\n\r\n        self.downsample = downsample\r\n\r\n    def forward(self, x, hw_shape):\r\n        for block in self.blocks:\r\n            x = block(x, hw_shape)\r\n\r\n        if self.downsample:\r\n            x_down, down_hw_shape = self.downsample(x, hw_shape)\r\n            return x_down, down_hw_shape, x, hw_shape\r\n        else:\r\n            return x, hw_shape, x, hw_shape\r\n\r\n\r\n@BACKBONES.register_module()\r\nclass SwinTransformer(BaseModule):\r\n    \"\"\" Swin Transformer\r\n    A PyTorch implement of : `Swin Transformer:\r\n    Hierarchical Vision Transformer using Shifted Windows`  -\r\n        https://arxiv.org/abs/2103.14030\r\n\r\n    Inspiration from\r\n    https://github.com/microsoft/Swin-Transformer\r\n\r\n    Args:\r\n        pretrain_img_size (int | tuple[int]): The size of input image when\r\n            pretrain. Defaults: 224.\r\n        in_channels (int): The num of input channels.\r\n            Defaults: 3.\r\n        embed_dims (int): The feature dimension. Default: 96.\r\n        patch_size (int | tuple[int]): Patch size. Default: 4.\r\n        window_size (int): Window size. Default: 7.\r\n        mlp_ratio (int): Ratio of mlp hidden dim to embedding dim.\r\n            Default: 4.\r\n        depths (tuple[int]): Depths of each Swin Transformer stage.\r\n            Default: (2, 2, 6, 2).\r\n        num_heads (tuple[int]): Parallel attention heads of each Swin\r\n            Transformer stage. Default: (3, 6, 12, 24).\r\n        strides (tuple[int]): The patch merging or patch embedding stride of\r\n            each Swin Transformer stage. (In swin, we set kernel size equal to\r\n            stride.) Default: (4, 2, 2, 2).\r\n        out_indices (tuple[int]): Output from which stages.\r\n            Default: (0, 1, 2, 3).\r\n        qkv_bias (bool, optional): If True, add a learnable bias to query, key,\r\n            value. Default: True\r\n        qk_scale (float | None, optional): Override default qk scale of\r\n            head_dim ** -0.5 if set. Default: None.\r\n        patch_norm (bool): If add a norm layer for patch embed and patch\r\n            merging. Default: True.\r\n        drop_rate (float): Dropout rate. Defaults: 0.\r\n        attn_drop_rate (float): Attention dropout rate. Default: 0.\r\n        drop_path_rate (float): Stochastic depth rate. Defaults: 0.1.\r\n        use_abs_pos_embed (bool): If True, add absolute position embedding to\r\n            the patch embedding. Defaults: False.\r\n        act_cfg (dict): Config dict for activation layer.\r\n            Default: dict(type='GELU').\r\n        norm_cfg (dict): Config dict for normalization layer at\r\n            output of backone. Defaults: dict(type='LN').\r\n        with_cp (bool, optional): Use checkpoint or not. Using checkpoint\r\n            will save some memory while slowing down the training speed.\r\n            Default: False.\r\n        pretrained (str, optional): model pretrained path. Default: None.\r\n        convert_weights (bool): The flag indicates whether the\r\n            pre-trained model is from the original repo. We may need\r\n            to convert some keys to make it compatible.\r\n            Default: False.\r\n        frozen_stages (int): Stages to be frozen (stop grad and set eval mode).\r\n            Default: -1 (-1 means not freezing any parameters).\r\n        init_cfg (dict, optional): The Config for initialization.\r\n            Defaults to None.\r\n    \"\"\"\r\n\r\n    def __init__(self,\r\n                 pretrain_img_size=224,\r\n                 in_channels=3,\r\n                 embed_dims=96,\r\n                 patch_size=4,\r\n                 window_size=7,\r\n                 mlp_ratio=4,\r\n                 depths=(2, 2, 6, 2),\r\n                 num_heads=(3, 6, 12, 24),\r\n                 strides=(4, 2, 2, 2),\r\n                 out_indices=(0, 1, 2, 3),\r\n                 qkv_bias=True,\r\n                 qk_scale=None,\r\n                 patch_norm=True,\r\n                 drop_rate=0.,\r\n                 attn_drop_rate=0.,\r\n                 drop_path_rate=0.1,\r\n                 use_abs_pos_embed=False,\r\n                 act_cfg=dict(type='GELU'),\r\n                 norm_cfg=dict(type='LN'),\r\n                 with_cp=False,\r\n                 pretrained=None,\r\n                 convert_weights=False,\r\n                 frozen_stages=-1,\r\n                 init_cfg=None):\r\n        self.convert_weights = convert_weights\r\n        self.frozen_stages = frozen_stages\r\n        if isinstance(pretrain_img_size, int):\r\n            pretrain_img_size = to_2tuple(pretrain_img_size)\r\n        elif isinstance(pretrain_img_size, tuple):\r\n            if len(pretrain_img_size) == 1:\r\n                pretrain_img_size = to_2tuple(pretrain_img_size[0])\r\n            assert len(pretrain_img_size) == 2, \\\r\n                f'The size of image should have length 1 or 2, ' \\\r\n                f'but got {len(pretrain_img_size)}'\r\n\r\n        assert not (init_cfg and pretrained), \\\r\n            'init_cfg and pretrained cannot be specified at the same time'\r\n        if isinstance(pretrained, str):\r\n            warnings.warn('DeprecationWarning: pretrained is deprecated, '\r\n                          'please use \"init_cfg\" instead')\r\n            self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)\r\n        elif pretrained is None:\r\n            self.init_cfg = init_cfg\r\n        else:\r\n            raise TypeError('pretrained must be a str or None')\r\n\r\n        super(SwinTransformer, self).__init__(init_cfg=init_cfg)\r\n\r\n        num_layers = len(depths)\r\n        self.out_indices = out_indices\r\n        self.use_abs_pos_embed = use_abs_pos_embed\r\n\r\n        assert strides[0] == patch_size, 'Use non-overlapping patch embed.'\r\n\r\n        self.patch_embed = PatchEmbed(\r\n            in_channels=in_channels,\r\n            embed_dims=embed_dims,\r\n            conv_type='Conv2d',\r\n            kernel_size=patch_size,\r\n            stride=strides[0],\r\n            norm_cfg=norm_cfg if patch_norm else None,\r\n            init_cfg=None)\r\n\r\n        if self.use_abs_pos_embed:\r\n            patch_row = pretrain_img_size[0] // patch_size\r\n            patch_col = pretrain_img_size[1] // patch_size\r\n            self.absolute_pos_embed = nn.Parameter(\r\n                torch.zeros((1, embed_dims, patch_row, patch_col)))\r\n\r\n        self.drop_after_pos = nn.Dropout(p=drop_rate)\r\n\r\n        # set stochastic depth decay rule\r\n        total_depth = sum(depths)\r\n        dpr = [\r\n            x.item() for x in torch.linspace(0, drop_path_rate, total_depth)\r\n        ]\r\n\r\n        self.stages = ModuleList()\r\n        in_channels = embed_dims\r\n        for i in range(num_layers):\r\n            if i < num_layers - 1:\r\n                downsample = PatchMerging(\r\n                    in_channels=in_channels,\r\n                    out_channels=2 * in_channels,\r\n                    stride=strides[i + 1],\r\n                    norm_cfg=norm_cfg if patch_norm else None,\r\n                    init_cfg=None)\r\n            else:\r\n                downsample = None\r\n\r\n            stage = SwinBlockSequence(\r\n                embed_dims=in_channels,\r\n                num_heads=num_heads[i],\r\n                feedforward_channels=mlp_ratio * in_channels,\r\n                depth=depths[i],\r\n                window_size=window_size,\r\n                qkv_bias=qkv_bias,\r\n                qk_scale=qk_scale,\r\n                drop_rate=drop_rate,\r\n                attn_drop_rate=attn_drop_rate,\r\n                drop_path_rate=dpr[sum(depths[:i]):sum(depths[:i + 1])],\r\n                downsample=downsample,\r\n                act_cfg=act_cfg,\r\n                norm_cfg=norm_cfg,\r\n                with_cp=with_cp,\r\n                init_cfg=None)\r\n            self.stages.append(stage)\r\n            if downsample:\r\n                in_channels = downsample.out_channels\r\n\r\n        self.num_features = [int(embed_dims * 2**i) for i in range(num_layers)]\r\n        # Add a norm layer for each output\r\n        for i in out_indices:\r\n            layer = build_norm_layer(norm_cfg, self.num_features[i])[1]\r\n            layer_name = f'norm{i}'\r\n            self.add_module(layer_name, layer)\r\n\r\n    def train(self, mode=True):\r\n        \"\"\"Convert the model into training mode while keep layers freezed.\"\"\"\r\n        super(SwinTransformer, self).train(mode)\r\n        self._freeze_stages()\r\n\r\n    def _freeze_stages(self):\r\n        if self.frozen_stages >= 0:\r\n            self.patch_embed.eval()\r\n            for param in self.patch_embed.parameters():\r\n                param.requires_grad = False\r\n            if self.use_abs_pos_embed:\r\n                self.absolute_pos_embed.requires_grad = False\r\n            self.drop_after_pos.eval()\r\n\r\n        for i in range(1, self.frozen_stages + 1):\r\n\r\n            if (i - 1) in self.out_indices:\r\n                norm_layer = getattr(self, f'norm{i-1}')\r\n                norm_layer.eval()\r\n                for param in norm_layer.parameters():\r\n                    param.requires_grad = False\r\n\r\n            m = self.stages[i - 1]\r\n            m.eval()\r\n            for param in m.parameters():\r\n                param.requires_grad = False\r\n\r\n    def init_weights(self, pretrained=None):\r\n        logger = get_root_logger()\r\n        if pretrained is None:\r\n            logger.warn(f'No pre-trained weights for '\r\n                        f'{self.__class__.__name__}, '\r\n                        f'training start from scratch')\r\n            if self.use_abs_pos_embed:\r\n                trunc_normal_(self.absolute_pos_embed, std=0.02)\r\n            for m in self.modules():\r\n                if isinstance(m, nn.Linear):\r\n                    trunc_normal_init(m, std=.02, bias=0.)\r\n                elif isinstance(m, nn.LayerNorm):\r\n                    constant_init(m, 1.0)\r\n        else:\r\n            # assert 'checkpoint' in self.init_cfg, f'Only support ' \\\r\n            #                                       f'specify `Pretrained` in ' \\\r\n            #                                       f'`init_cfg` in ' \\\r\n            #                                       f'{self.__class__.__name__} '\r\n\r\n            ckpt = _load_checkpoint(pretrained, logger=logger, map_location='cpu')\r\n            if 'state_dict' in ckpt:\r\n                _state_dict = ckpt['state_dict']\r\n            elif 'model' in ckpt:\r\n                _state_dict = ckpt['model']\r\n            else:\r\n                _state_dict = ckpt\r\n            if self.convert_weights:\r\n                # supported loading weight from original repo,\r\n                _state_dict = swin_converter(_state_dict)\r\n\r\n            state_dict = OrderedDict()\r\n            for k, v in _state_dict.items():\r\n                if k.startswith('backbone.'):\r\n                    state_dict[k[9:]] = v\r\n\r\n            # strip prefix of state_dict\r\n            if list(state_dict.keys())[0].startswith('module.'):\r\n                state_dict = {k[7:]: v for k, v in state_dict.items()}\r\n\r\n            # reshape absolute position embedding\r\n            if state_dict.get('absolute_pos_embed') is not None:\r\n                absolute_pos_embed = state_dict['absolute_pos_embed']\r\n                N1, L, C1 = absolute_pos_embed.size()\r\n                N2, C2, H, W = self.absolute_pos_embed.size()\r\n                if N1 != N2 or C1 != C2 or L != H * W:\r\n                    logger.warning('Error in loading absolute_pos_embed, pass')\r\n                else:\r\n                    state_dict['absolute_pos_embed'] = absolute_pos_embed.view(\r\n                        N2, H, W, C2).permute(0, 3, 1, 2).contiguous()\r\n\r\n            # interpolate position bias table if needed\r\n            relative_position_bias_table_keys = [\r\n                k for k in state_dict.keys()\r\n                if 'relative_position_bias_table' in k\r\n            ]\r\n            for table_key in relative_position_bias_table_keys:\r\n                import pdb\r\n                pdb.set_trace()\r\n                table_pretrained = state_dict[table_key]\r\n                table_current = self.state_dict()[table_key]\r\n                L1, nH1 = table_pretrained.size()\r\n                L2, nH2 = table_current.size()\r\n                if nH1 != nH2:\r\n                    logger.warning(f'Error in loading {table_key}, pass')\r\n                elif L1 != L2:\r\n                    S1 = int(L1**0.5)\r\n                    S2 = int(L2**0.5)\r\n                    table_pretrained_resized = F.interpolate(\r\n                        table_pretrained.permute(1, 0).reshape(1, nH1, S1, S1),\r\n                        size=(S2, S2),\r\n                        mode='bicubic')\r\n                    state_dict[table_key] = table_pretrained_resized.view(\r\n                        nH2, L2).permute(1, 0).contiguous()\r\n\r\n            # load state_dict\r\n            self.load_state_dict(state_dict, False)\r\n\r\n    def forward(self, x):\r\n        x, hw_shape = self.patch_embed(x)\r\n\r\n        if self.use_abs_pos_embed:\r\n            h, w = self.absolute_pos_embed.shape[1:3]\r\n            if hw_shape[0] != h or hw_shape[1] != w:\r\n                absolute_pos_embed = F.interpolate(\r\n                    self.absolute_pos_embed,\r\n                    size=hw_shape,\r\n                    mode='bicubic',\r\n                    align_corners=False).flatten(2).transpose(1, 2)\r\n            else:\r\n                absolute_pos_embed = self.absolute_pos_embed.flatten(\r\n                    2).transpose(1, 2)\r\n            x = x + absolute_pos_embed\r\n        x = self.drop_after_pos(x)\r\n\r\n        outs = []\r\n        for i, stage in enumerate(self.stages):\r\n            x, hw_shape, out, out_hw_shape = stage(x, hw_shape)\r\n            if i in self.out_indices:\r\n                norm_layer = getattr(self, f'norm{i}')\r\n                out = norm_layer(out)\r\n                out = out.view(-1, *out_hw_shape,\r\n                               self.num_features[i]).permute(0, 3, 1,\r\n                                                             2).contiguous()\r\n                outs.append(out)\r\n\r\n        return outs"
  },
  {
    "path": "mmdet3d/models/builder.py",
    "content": "import warnings\n\nfrom mmdet.models.builder import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS,\n                                  ROI_EXTRACTORS, SHARED_HEADS, build)\nfrom .registry import FUSION_LAYERS, MIDDLE_ENCODERS, VOXEL_ENCODERS\n\n\ndef build_backbone(cfg):\n    \"\"\"Build backbone.\"\"\"\n    return build(cfg, BACKBONES)\n\n\ndef build_neck(cfg):\n    \"\"\"Build neck.\"\"\"\n    return build(cfg, NECKS)\n\n\ndef build_roi_extractor(cfg):\n    \"\"\"Build RoI feature extractor.\"\"\"\n    return build(cfg, ROI_EXTRACTORS)\n\n\ndef build_shared_head(cfg):\n    \"\"\"Build shared head of detector.\"\"\"\n    return build(cfg, SHARED_HEADS)\n\n\ndef build_head(cfg):\n    \"\"\"Build head.\"\"\"\n    return build(cfg, HEADS)\n\n\ndef build_loss(cfg):\n    \"\"\"Build loss function.\"\"\"\n    return build(cfg, LOSSES)\n\n\ndef build_detector(cfg, train_cfg=None, test_cfg=None):\n    \"\"\"Build detector.\"\"\"\n    if train_cfg is not None or test_cfg is not None:\n        warnings.warn(\n            'train_cfg and test_cfg is deprecated, '\n            'please specify them in model', UserWarning)\n    assert cfg.get('train_cfg') is None or train_cfg is None, \\\n        'train_cfg specified in both outer field and model field '\n    assert cfg.get('test_cfg') is None or test_cfg is None, \\\n        'test_cfg specified in both outer field and model field '\n    return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg))\n\n\ndef build_voxel_encoder(cfg):\n    \"\"\"Build voxel encoder.\"\"\"\n    return build(cfg, VOXEL_ENCODERS)\n\n\ndef build_middle_encoder(cfg):\n    \"\"\"Build middle level encoder.\"\"\"\n    return build(cfg, MIDDLE_ENCODERS)\n\n\ndef build_fusion_layer(cfg):\n    \"\"\"Build fusion layer.\"\"\"\n    return build(cfg, FUSION_LAYERS)\n"
  },
  {
    "path": "mmdet3d/models/dense_heads/__init__.py",
    "content": "from .anchor3d_head import Anchor3DHead\nfrom .base_conv_bbox_head import BaseConvBboxHead\nfrom .centerpoint_head import CenterHead\nfrom .free_anchor3d_head import FreeAnchor3DHead\nfrom .parta2_rpn_head import PartA2RPNHead\nfrom .shape_aware_head import ShapeAwareHead\nfrom .ssd_3d_head import SSD3DHead\nfrom .vote_head import VoteHead\nfrom .transfusion_head import TransFusionHead\nfrom .sparsefusion_head_deform import SparseFusionHead2D_Deform\n\n__all__ = [\n    'Anchor3DHead', 'FreeAnchor3DHead', 'PartA2RPNHead', 'VoteHead',\n    'SSD3DHead', 'BaseConvBboxHead', 'CenterHead', 'ShapeAwareHead',\n    'TransFusionHead', 'SparseFusionHead2D_Deform'\n]\n"
  },
  {
    "path": "mmdet3d/models/dense_heads/anchor3d_head.py",
    "content": "import numpy as np\nimport torch\nfrom mmcv.cnn import bias_init_with_prob, normal_init\nfrom mmcv.runner import force_fp32\nfrom torch import nn as nn\n\nfrom mmdet3d.core import (PseudoSampler, box3d_multiclass_nms, limit_period,\n                          xywhr2xyxyr)\nfrom mmdet.core import (build_anchor_generator, build_assigner,\n                        build_bbox_coder, build_sampler, multi_apply)\nfrom mmdet.models import HEADS\nfrom ..builder import build_loss\nfrom .train_mixins import AnchorTrainMixin\n\n\n@HEADS.register_module()\nclass Anchor3DHead(nn.Module, AnchorTrainMixin):\n    \"\"\"Anchor head for SECOND/PointPillars/MVXNet/PartA2.\n\n    Args:\n        num_classes (int): Number of classes.\n        in_channels (int): Number of channels in the input feature map.\n        train_cfg (dict): Train configs.\n        test_cfg (dict): Test configs.\n        feat_channels (int): Number of channels of the feature map.\n        use_direction_classifier (bool): Whether to add a direction classifier.\n        anchor_generator(dict): Config dict of anchor generator.\n        assigner_per_size (bool): Whether to do assignment for each separate\n            anchor size.\n        assign_per_class (bool): Whether to do assignment for each class.\n        diff_rad_by_sin (bool): Whether to change the difference into sin\n            difference for box regression loss.\n        dir_offset (float | int): The offset of BEV rotation angles.\n            (TODO: may be moved into box coder)\n        dir_limit_offset (float | int): The limited range of BEV\n            rotation angles. (TODO: may be moved into box coder)\n        bbox_coder (dict): Config dict of box coders.\n        loss_cls (dict): Config of classification loss.\n        loss_bbox (dict): Config of localization loss.\n        loss_dir (dict): Config of direction classifier loss.\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 train_cfg,\n                 test_cfg,\n                 feat_channels=256,\n                 use_direction_classifier=True,\n                 anchor_generator=dict(\n                     type='Anchor3DRangeGenerator',\n                     range=[0, -39.68, -1.78, 69.12, 39.68, -1.78],\n                     strides=[2],\n                     sizes=[[1.6, 3.9, 1.56]],\n                     rotations=[0, 1.57],\n                     custom_values=[],\n                     reshape_out=False),\n                 assigner_per_size=False,\n                 assign_per_class=False,\n                 diff_rad_by_sin=True,\n                 dir_offset=0,\n                 dir_limit_offset=1,\n                 bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),\n                 loss_cls=dict(\n                     type='CrossEntropyLoss',\n                     use_sigmoid=True,\n                     loss_weight=1.0),\n                 loss_bbox=dict(\n                     type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),\n                 loss_dir=dict(type='CrossEntropyLoss', loss_weight=0.2)):\n        super().__init__()\n        self.in_channels = in_channels\n        self.num_classes = num_classes\n        self.feat_channels = feat_channels\n        self.diff_rad_by_sin = diff_rad_by_sin\n        self.use_direction_classifier = use_direction_classifier\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        self.assigner_per_size = assigner_per_size\n        self.assign_per_class = assign_per_class\n        self.dir_offset = dir_offset\n        self.dir_limit_offset = dir_limit_offset\n        self.fp16_enabled = False\n\n        # build anchor generator\n        self.anchor_generator = build_anchor_generator(anchor_generator)\n        # In 3D detection, the anchor stride is connected with anchor size\n        self.num_anchors = self.anchor_generator.num_base_anchors\n        # build box coder\n        self.bbox_coder = build_bbox_coder(bbox_coder)\n        self.box_code_size = self.bbox_coder.code_size\n\n        # build loss function\n        self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)\n        self.sampling = loss_cls['type'] not in ['FocalLoss', 'GHMC']\n        if not self.use_sigmoid_cls:\n            self.num_classes += 1\n        self.loss_cls = build_loss(loss_cls)\n        self.loss_bbox = build_loss(loss_bbox)\n        self.loss_dir = build_loss(loss_dir)\n        self.fp16_enabled = False\n\n        self._init_layers()\n        self._init_assigner_sampler()\n\n    def _init_assigner_sampler(self):\n        \"\"\"Initialize the target assigner and sampler of the head.\"\"\"\n        if self.train_cfg is None:\n            return\n\n        if self.sampling:\n            self.bbox_sampler = build_sampler(self.train_cfg.sampler)\n        else:\n            self.bbox_sampler = PseudoSampler()\n        if isinstance(self.train_cfg.assigner, dict):\n            self.bbox_assigner = build_assigner(self.train_cfg.assigner)\n        elif isinstance(self.train_cfg.assigner, list):\n            self.bbox_assigner = [\n                build_assigner(res) for res in self.train_cfg.assigner\n            ]\n\n    def _init_layers(self):\n        \"\"\"Initialize neural network layers of the head.\"\"\"\n        self.cls_out_channels = self.num_anchors * self.num_classes\n        self.conv_cls = nn.Conv2d(self.feat_channels, self.cls_out_channels, 1)\n        self.conv_reg = nn.Conv2d(self.feat_channels,\n                                  self.num_anchors * self.box_code_size, 1)\n        if self.use_direction_classifier:\n            self.conv_dir_cls = nn.Conv2d(self.feat_channels,\n                                          self.num_anchors * 2, 1)\n\n    def init_weights(self):\n        \"\"\"Initialize the weights of head.\"\"\"\n        bias_cls = bias_init_with_prob(0.01)\n        normal_init(self.conv_cls, std=0.01, bias=bias_cls)\n        normal_init(self.conv_reg, std=0.01)\n\n    def forward_single(self, x):\n        \"\"\"Forward function on a single-scale feature map.\n\n        Args:\n            x (torch.Tensor): Input features.\n\n        Returns:\n            tuple[torch.Tensor]: Contain score of each class, bbox \\\n                regression and direction classification predictions.\n        \"\"\"\n        cls_score = self.conv_cls(x)\n        bbox_pred = self.conv_reg(x)\n        dir_cls_preds = None\n        if self.use_direction_classifier:\n            dir_cls_preds = self.conv_dir_cls(x)\n        return cls_score, bbox_pred, dir_cls_preds\n\n    def forward(self, feats):\n        \"\"\"Forward pass.\n\n        Args:\n            feats (list[torch.Tensor]): Multi-level features, e.g.,\n                features produced by FPN.\n\n        Returns:\n            tuple[list[torch.Tensor]]: Multi-level class score, bbox \\\n                and direction predictions.\n        \"\"\"\n        return multi_apply(self.forward_single, feats)\n\n    def get_anchors(self, featmap_sizes, input_metas, device='cuda'):\n        \"\"\"Get anchors according to feature map sizes.\n\n        Args:\n            featmap_sizes (list[tuple]): Multi-level feature map sizes.\n            input_metas (list[dict]): contain pcd and img's meta info.\n            device (str): device of current module.\n\n        Returns:\n            list[list[torch.Tensor]]: Anchors of each image, valid flags \\\n                of each image.\n        \"\"\"\n        num_imgs = len(input_metas)\n        # since feature map sizes of all images are the same, we only compute\n        # anchors for one time\n        multi_level_anchors = self.anchor_generator.grid_anchors(\n            featmap_sizes, device=device)\n        anchor_list = [multi_level_anchors for _ in range(num_imgs)]\n        return anchor_list\n\n    def loss_single(self, cls_score, bbox_pred, dir_cls_preds, labels,\n                    label_weights, bbox_targets, bbox_weights, dir_targets,\n                    dir_weights, num_total_samples):\n        \"\"\"Calculate loss of Single-level results.\n\n        Args:\n            cls_score (torch.Tensor): Class score in single-level.\n            bbox_pred (torch.Tensor): Bbox prediction in single-level.\n            dir_cls_preds (torch.Tensor): Predictions of direction class\n                in single-level.\n            labels (torch.Tensor): Labels of class.\n            label_weights (torch.Tensor): Weights of class loss.\n            bbox_targets (torch.Tensor): Targets of bbox predictions.\n            bbox_weights (torch.Tensor): Weights of bbox loss.\n            dir_targets (torch.Tensor): Targets of direction predictions.\n            dir_weights (torch.Tensor): Weights of direction loss.\n            num_total_samples (int): The number of valid samples.\n\n        Returns:\n            tuple[torch.Tensor]: Losses of class, bbox \\\n                and direction, respectively.\n        \"\"\"\n        # classification loss\n        if num_total_samples is None:\n            num_total_samples = int(cls_score.shape[0])\n        labels = labels.reshape(-1)\n        label_weights = label_weights.reshape(-1)\n        cls_score = cls_score.permute(0, 2, 3, 1).reshape(-1, self.num_classes)\n        assert labels.max().item() <= self.num_classes\n        loss_cls = self.loss_cls(\n            cls_score, labels, label_weights, avg_factor=num_total_samples)\n\n        # regression loss\n        bbox_pred = bbox_pred.permute(0, 2, 3,\n                                      1).reshape(-1, self.box_code_size)\n        bbox_targets = bbox_targets.reshape(-1, self.box_code_size)\n        bbox_weights = bbox_weights.reshape(-1, self.box_code_size)\n\n        bg_class_ind = self.num_classes\n        pos_inds = ((labels >= 0)\n                    & (labels < bg_class_ind)).nonzero(\n                        as_tuple=False).reshape(-1)\n        num_pos = len(pos_inds)\n\n        pos_bbox_pred = bbox_pred[pos_inds]\n        pos_bbox_targets = bbox_targets[pos_inds]\n        pos_bbox_weights = bbox_weights[pos_inds]\n\n        # dir loss\n        if self.use_direction_classifier:\n            dir_cls_preds = dir_cls_preds.permute(0, 2, 3, 1).reshape(-1, 2)\n            dir_targets = dir_targets.reshape(-1)\n            dir_weights = dir_weights.reshape(-1)\n            pos_dir_cls_preds = dir_cls_preds[pos_inds]\n            pos_dir_targets = dir_targets[pos_inds]\n            pos_dir_weights = dir_weights[pos_inds]\n\n        if num_pos > 0:\n            code_weight = self.train_cfg.get('code_weight', None)\n            if code_weight:\n                pos_bbox_weights = pos_bbox_weights * bbox_weights.new_tensor(\n                    code_weight)\n            if self.diff_rad_by_sin:\n                pos_bbox_pred, pos_bbox_targets = self.add_sin_difference(\n                    pos_bbox_pred, pos_bbox_targets)\n            loss_bbox = self.loss_bbox(\n                pos_bbox_pred,\n                pos_bbox_targets,\n                pos_bbox_weights,\n                avg_factor=num_total_samples)\n\n            # direction classification loss\n            loss_dir = None\n            if self.use_direction_classifier:\n                loss_dir = self.loss_dir(\n                    pos_dir_cls_preds,\n                    pos_dir_targets,\n                    pos_dir_weights,\n                    avg_factor=num_total_samples)\n        else:\n            loss_bbox = pos_bbox_pred.sum()\n            if self.use_direction_classifier:\n                loss_dir = pos_dir_cls_preds.sum()\n\n        return loss_cls, loss_bbox, loss_dir\n\n    @staticmethod\n    def add_sin_difference(boxes1, boxes2):\n        \"\"\"Convert the rotation difference to difference in sine function.\n\n        Args:\n            boxes1 (torch.Tensor): Original Boxes in shape (NxC), where C>=7\n                and the 7th dimension is rotation dimension.\n            boxes2 (torch.Tensor): Target boxes in shape (NxC), where C>=7 and\n                the 7th dimension is rotation dimension.\n\n        Returns:\n            tuple[torch.Tensor]: ``boxes1`` and ``boxes2`` whose 7th \\\n                dimensions are changed.\n        \"\"\"\n        rad_pred_encoding = torch.sin(boxes1[..., 6:7]) * torch.cos(\n            boxes2[..., 6:7])\n        rad_tg_encoding = torch.cos(boxes1[..., 6:7]) * torch.sin(boxes2[...,\n                                                                         6:7])\n        boxes1 = torch.cat(\n            [boxes1[..., :6], rad_pred_encoding, boxes1[..., 7:]], dim=-1)\n        boxes2 = torch.cat([boxes2[..., :6], rad_tg_encoding, boxes2[..., 7:]],\n                           dim=-1)\n        return boxes1, boxes2\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'dir_cls_preds'))\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             dir_cls_preds,\n             gt_bboxes,\n             gt_labels,\n             input_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Calculate losses.\n\n        Args:\n            cls_scores (list[torch.Tensor]): Multi-level class scores.\n            bbox_preds (list[torch.Tensor]): Multi-level bbox predictions.\n            dir_cls_preds (list[torch.Tensor]): Multi-level direction\n                class predictions.\n            gt_bboxes (list[:obj:`BaseInstance3DBoxes`]): Gt bboxes\n                of each sample.\n            gt_labels (list[torch.Tensor]): Gt labels of each sample.\n            input_metas (list[dict]): Contain pcd and img's meta info.\n            gt_bboxes_ignore (None | list[torch.Tensor]): Specify\n                which bounding.\n\n        Returns:\n            dict[str, list[torch.Tensor]]: Classification, bbox, and \\\n                direction losses of each level.\n\n                - loss_cls (list[torch.Tensor]): Classification losses.\n                - loss_bbox (list[torch.Tensor]): Box regression losses.\n                - loss_dir (list[torch.Tensor]): Direction classification \\\n                    losses.\n        \"\"\"\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        assert len(featmap_sizes) == self.anchor_generator.num_levels\n        device = cls_scores[0].device\n        anchor_list = self.get_anchors(\n            featmap_sizes, input_metas, device=device)\n        label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1\n        cls_reg_targets = self.anchor_target_3d(\n            anchor_list,\n            gt_bboxes,\n            input_metas,\n            gt_bboxes_ignore_list=gt_bboxes_ignore,\n            gt_labels_list=gt_labels,\n            num_classes=self.num_classes,\n            label_channels=label_channels,\n            sampling=self.sampling)\n\n        if cls_reg_targets is None:\n            return None\n        (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list,\n         dir_targets_list, dir_weights_list, num_total_pos,\n         num_total_neg) = cls_reg_targets\n        num_total_samples = (\n            num_total_pos + num_total_neg if self.sampling else num_total_pos)\n\n        # num_total_samples = None\n        losses_cls, losses_bbox, losses_dir = multi_apply(\n            self.loss_single,\n            cls_scores,\n            bbox_preds,\n            dir_cls_preds,\n            labels_list,\n            label_weights_list,\n            bbox_targets_list,\n            bbox_weights_list,\n            dir_targets_list,\n            dir_weights_list,\n            num_total_samples=num_total_samples)\n        return dict(\n            loss_cls=losses_cls, loss_bbox=losses_bbox, loss_dir=losses_dir)\n\n    def get_bboxes(self,\n                   cls_scores,\n                   bbox_preds,\n                   dir_cls_preds,\n                   input_metas,\n                   cfg=None,\n                   rescale=False):\n        \"\"\"Get bboxes of anchor head.\n\n        Args:\n            cls_scores (list[torch.Tensor]): Multi-level class scores.\n            bbox_preds (list[torch.Tensor]): Multi-level bbox predictions.\n            dir_cls_preds (list[torch.Tensor]): Multi-level direction\n                class predictions.\n            input_metas (list[dict]): Contain pcd and img's meta info.\n            cfg (None | :obj:`ConfigDict`): Training or testing config.\n            rescale (list[torch.Tensor]): Whether th rescale bbox.\n\n        Returns:\n            list[tuple]: Prediction resultes of batches.\n        \"\"\"\n        assert len(cls_scores) == len(bbox_preds)\n        assert len(cls_scores) == len(dir_cls_preds)\n        num_levels = len(cls_scores)\n        featmap_sizes = [cls_scores[i].shape[-2:] for i in range(num_levels)]\n        device = cls_scores[0].device\n        mlvl_anchors = self.anchor_generator.grid_anchors(\n            featmap_sizes, device=device)\n        mlvl_anchors = [\n            anchor.reshape(-1, self.box_code_size) for anchor in mlvl_anchors\n        ]\n\n        result_list = []\n        for img_id in range(len(input_metas)):\n            cls_score_list = [\n                cls_scores[i][img_id].detach() for i in range(num_levels)\n            ]\n            bbox_pred_list = [\n                bbox_preds[i][img_id].detach() for i in range(num_levels)\n            ]\n            dir_cls_pred_list = [\n                dir_cls_preds[i][img_id].detach() for i in range(num_levels)\n            ]\n\n            input_meta = input_metas[img_id]\n            proposals = self.get_bboxes_single(cls_score_list, bbox_pred_list,\n                                               dir_cls_pred_list, mlvl_anchors,\n                                               input_meta, cfg, rescale)\n            result_list.append(proposals)\n        return result_list\n\n    def get_bboxes_single(self,\n                          cls_scores,\n                          bbox_preds,\n                          dir_cls_preds,\n                          mlvl_anchors,\n                          input_meta,\n                          cfg=None,\n                          rescale=False):\n        \"\"\"Get bboxes of single branch.\n\n        Args:\n            cls_scores (torch.Tensor): Class score in single batch.\n            bbox_preds (torch.Tensor): Bbox prediction in single batch.\n            dir_cls_preds (torch.Tensor): Predictions of direction class\n                in single batch.\n            mlvl_anchors (List[torch.Tensor]): Multi-level anchors\n                in single batch.\n            input_meta (list[dict]): Contain pcd and img's meta info.\n            cfg (None | :obj:`ConfigDict`): Training or testing config.\n            rescale (list[torch.Tensor]): whether th rescale bbox.\n\n        Returns:\n            tuple: Contain predictions of single batch.\n\n                - bboxes (:obj:`BaseInstance3DBoxes`): Predicted 3d bboxes.\n                - scores (torch.Tensor): Class score of each bbox.\n                - labels (torch.Tensor): Label of each bbox.\n        \"\"\"\n        cfg = self.test_cfg if cfg is None else cfg\n        assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)\n        mlvl_bboxes = []\n        mlvl_scores = []\n        mlvl_dir_scores = []\n        for cls_score, bbox_pred, dir_cls_pred, anchors in zip(\n                cls_scores, bbox_preds, dir_cls_preds, mlvl_anchors):\n            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]\n            assert cls_score.size()[-2:] == dir_cls_pred.size()[-2:]\n            dir_cls_pred = dir_cls_pred.permute(1, 2, 0).reshape(-1, 2)\n            dir_cls_score = torch.max(dir_cls_pred, dim=-1)[1]\n\n            cls_score = cls_score.permute(1, 2,\n                                          0).reshape(-1, self.num_classes)\n            if self.use_sigmoid_cls:\n                scores = cls_score.sigmoid()\n            else:\n                scores = cls_score.softmax(-1)\n            bbox_pred = bbox_pred.permute(1, 2,\n                                          0).reshape(-1, self.box_code_size)\n\n            nms_pre = cfg.get('nms_pre', -1)\n            if nms_pre > 0 and scores.shape[0] > nms_pre:\n                if self.use_sigmoid_cls:\n                    max_scores, _ = scores.max(dim=1)\n                else:\n                    max_scores, _ = scores[:, :-1].max(dim=1)\n                _, topk_inds = max_scores.topk(nms_pre)\n                anchors = anchors[topk_inds, :]\n                bbox_pred = bbox_pred[topk_inds, :]\n                scores = scores[topk_inds, :]\n                dir_cls_score = dir_cls_score[topk_inds]\n\n            bboxes = self.bbox_coder.decode(anchors, bbox_pred)\n            mlvl_bboxes.append(bboxes)\n            mlvl_scores.append(scores)\n            mlvl_dir_scores.append(dir_cls_score)\n\n        mlvl_bboxes = torch.cat(mlvl_bboxes)\n        mlvl_bboxes_for_nms = xywhr2xyxyr(input_meta['box_type_3d'](\n            mlvl_bboxes, box_dim=self.box_code_size).bev)\n        mlvl_scores = torch.cat(mlvl_scores)\n        mlvl_dir_scores = torch.cat(mlvl_dir_scores)\n\n        if self.use_sigmoid_cls:\n            # Add a dummy background class to the front when using sigmoid\n            padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)\n            mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)\n\n        score_thr = cfg.get('score_thr', 0)\n        results = box3d_multiclass_nms(mlvl_bboxes, mlvl_bboxes_for_nms,\n                                       mlvl_scores, score_thr, cfg.max_num,\n                                       cfg, mlvl_dir_scores)\n        bboxes, scores, labels, dir_scores = results\n        if bboxes.shape[0] > 0:\n            dir_rot = limit_period(bboxes[..., 6] - self.dir_offset,\n                                   self.dir_limit_offset, np.pi)\n            bboxes[..., 6] = (\n                dir_rot + self.dir_offset +\n                np.pi * dir_scores.to(bboxes.dtype))\n        bboxes = input_meta['box_type_3d'](bboxes, box_dim=self.box_code_size)\n        return bboxes, scores, labels\n"
  },
  {
    "path": "mmdet3d/models/dense_heads/base_conv_bbox_head.py",
    "content": "from mmcv.cnn import ConvModule\nfrom mmcv.cnn.bricks import build_conv_layer\nfrom torch import nn as nn\n\nfrom mmdet.models.builder import HEADS\n\n\n@HEADS.register_module()\nclass BaseConvBboxHead(nn.Module):\n    r\"\"\"More general bbox head, with shared conv layers and two optional\n    separated branches.\n\n    .. code-block:: none\n\n                     /-> cls convs -> cls_score\n        shared convs\n                     \\-> reg convs -> bbox_pred\n    \"\"\"\n\n    def __init__(self,\n                 in_channels=0,\n                 shared_conv_channels=(),\n                 cls_conv_channels=(),\n                 num_cls_out_channels=0,\n                 reg_conv_channels=(),\n                 num_reg_out_channels=0,\n                 conv_cfg=dict(type='Conv1d'),\n                 norm_cfg=dict(type='BN1d'),\n                 act_cfg=dict(type='ReLU'),\n                 bias='auto',\n                 *args,\n                 **kwargs):\n        super(BaseConvBboxHead, self).__init__(*args, **kwargs)\n        assert in_channels > 0\n        assert num_cls_out_channels > 0\n        assert num_reg_out_channels > 0\n        self.in_channels = in_channels\n        self.shared_conv_channels = shared_conv_channels\n        self.cls_conv_channels = cls_conv_channels\n        self.num_cls_out_channels = num_cls_out_channels\n        self.reg_conv_channels = reg_conv_channels\n        self.num_reg_out_channels = num_reg_out_channels\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        self.act_cfg = act_cfg\n        self.bias = bias\n\n        # add shared convs\n        if len(self.shared_conv_channels) > 0:\n            self.shared_convs = self._add_conv_branch(\n                self.in_channels, self.shared_conv_channels)\n            out_channels = self.shared_conv_channels[-1]\n        else:\n            out_channels = self.in_channels\n\n        # add cls specific branch\n        prev_channel = out_channels\n        if len(self.cls_conv_channels) > 0:\n            self.cls_convs = self._add_conv_branch(prev_channel,\n                                                   self.cls_conv_channels)\n            prev_channel = self.cls_conv_channels[-1]\n\n        self.conv_cls = build_conv_layer(\n            conv_cfg,\n            in_channels=prev_channel,\n            out_channels=num_cls_out_channels,\n            kernel_size=1)\n        # add reg specific branch\n        prev_channel = out_channels\n        if len(self.reg_conv_channels) > 0:\n            self.reg_convs = self._add_conv_branch(prev_channel,\n                                                   self.reg_conv_channels)\n            prev_channel = self.reg_conv_channels[-1]\n\n        self.conv_reg = build_conv_layer(\n            conv_cfg,\n            in_channels=prev_channel,\n            out_channels=num_reg_out_channels,\n            kernel_size=1)\n\n    def _add_conv_branch(self, in_channels, conv_channels):\n        \"\"\"Add shared or separable branch.\"\"\"\n        conv_spec = [in_channels] + list(conv_channels)\n        # add branch specific conv layers\n        conv_layers = nn.Sequential()\n        for i in range(len(conv_spec) - 1):\n            conv_layers.add_module(\n                f'layer{i}',\n                ConvModule(\n                    conv_spec[i],\n                    conv_spec[i + 1],\n                    kernel_size=1,\n                    padding=0,\n                    conv_cfg=self.conv_cfg,\n                    norm_cfg=self.norm_cfg,\n                    act_cfg=self.act_cfg,\n                    bias=self.bias,\n                    inplace=True))\n        return conv_layers\n\n    def init_weights(self):\n        # conv layers are already initialized by ConvModule\n        pass\n\n    def forward(self, feats):\n        \"\"\"Forward.\n\n        Args:\n            feats (Tensor): Input features\n\n        Returns:\n            Tensor: Class scores predictions\n            Tensor: Regression predictions\n        \"\"\"\n        # shared part\n        if len(self.shared_conv_channels) > 0:\n            x = self.shared_convs(feats)\n\n        # separate branches\n        x_cls = x\n        x_reg = x\n\n        if len(self.cls_conv_channels) > 0:\n            x_cls = self.cls_convs(x_cls)\n        cls_score = self.conv_cls(x_cls)\n\n        if len(self.reg_conv_channels) > 0:\n            x_reg = self.reg_convs(x_reg)\n        bbox_pred = self.conv_reg(x_reg)\n\n        return cls_score, bbox_pred\n"
  },
  {
    "path": "mmdet3d/models/dense_heads/centerpoint_head.py",
    "content": "import copy\nimport numpy as np\nimport torch\nfrom mmcv.cnn import ConvModule, build_conv_layer, kaiming_init\nfrom mmcv.runner import force_fp32\nfrom torch import nn\n\nfrom mmdet3d.core import (circle_nms, draw_heatmap_gaussian, gaussian_radius,\n                          xywhr2xyxyr)\nfrom mmdet3d.models import builder\nfrom mmdet3d.models.builder import HEADS, build_loss\nfrom mmdet3d.models.utils import clip_sigmoid\nfrom mmdet3d.ops.iou3d.iou3d_utils import nms_gpu\nfrom mmdet.core import build_bbox_coder, multi_apply\n\n\n@HEADS.register_module()\nclass SeparateHead(nn.Module):\n    \"\"\"SeparateHead for CenterHead.\n\n    Args:\n        in_channels (int): Input channels for conv_layer.\n        heads (dict): Conv information.\n        head_conv (int): Output channels.\n            Default: 64.\n        final_kernal (int): Kernal size for the last conv layer.\n            Deafult: 1.\n        init_bias (float): Initial bias. Default: -2.19.\n        conv_cfg (dict): Config of conv layer.\n            Default: dict(type='Conv2d')\n        norm_cfg (dict): Config of norm layer.\n            Default: dict(type='BN2d').\n        bias (str): Type of bias. Default: 'auto'.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 heads,\n                 head_conv=64,\n                 final_kernel=1,\n                 init_bias=-2.19,\n                 conv_cfg=dict(type='Conv2d'),\n                 norm_cfg=dict(type='BN2d'),\n                 bias='auto',\n                 **kwargs):\n        super(SeparateHead, self).__init__()\n\n        self.heads = heads\n        self.init_bias = init_bias\n        for head in self.heads:\n            classes, num_conv = self.heads[head]\n\n            conv_layers = []\n            c_in = in_channels\n            for i in range(num_conv - 1):\n                conv_layers.append(\n                    ConvModule(\n                        c_in,\n                        head_conv,\n                        kernel_size=final_kernel,\n                        stride=1,\n                        padding=final_kernel // 2,\n                        bias=bias,\n                        conv_cfg=conv_cfg,\n                        norm_cfg=norm_cfg))\n                c_in = head_conv\n\n            conv_layers.append(\n                build_conv_layer(\n                    conv_cfg,\n                    head_conv,\n                    classes,\n                    kernel_size=final_kernel,\n                    stride=1,\n                    padding=final_kernel // 2,\n                    bias=True))\n            conv_layers = nn.Sequential(*conv_layers)\n\n            self.__setattr__(head, conv_layers)\n\n    def init_weights(self):\n        \"\"\"Initialize weights.\"\"\"\n        for head in self.heads:\n            if head == 'heatmap':\n                self.__getattr__(head)[-1].bias.data.fill_(self.init_bias)\n            else:\n                for m in self.__getattr__(head).modules():\n                    if isinstance(m, nn.Conv2d):\n                        kaiming_init(m)\n\n    def forward(self, x):\n        \"\"\"Forward function for SepHead.\n\n        Args:\n            x (torch.Tensor): Input feature map with the shape of\n                [B, 512, 128, 128].\n\n        Returns:\n            dict[str: torch.Tensor]: contains the following keys:\n\n                -reg （torch.Tensor): 2D regression value with the \\\n                    shape of [B, 2, H, W].\n                -height (torch.Tensor): Height value with the \\\n                    shape of [B, 1, H, W].\n                -dim (torch.Tensor): Size value with the shape \\\n                    of [B, 3, H, W].\n                -rot (torch.Tensor): Rotation value with the \\\n                    shape of [B, 2, H, W].\n                -vel (torch.Tensor): Velocity value with the \\\n                    shape of [B, 2, H, W].\n                -heatmap (torch.Tensor): Heatmap with the shape of \\\n                    [B, N, H, W].\n        \"\"\"\n        ret_dict = dict()\n        for head in self.heads:\n            ret_dict[head] = self.__getattr__(head)(x)\n\n        return ret_dict\n\n\n@HEADS.register_module()\nclass DCNSeparateHead(nn.Module):\n    r\"\"\"DCNSeparateHead for CenterHead.\n\n    .. code-block:: none\n            /-----> DCN for heatmap task -----> heatmap task.\n    feature\n            \\-----> DCN for regression tasks -----> regression tasks\n\n    Args:\n        in_channels (int): Input channels for conv_layer.\n        heads (dict): Conv information.\n        dcn_config (dict): Config of dcn layer.\n        num_cls (int): Output channels.\n            Default: 64.\n        final_kernal (int): Kernal size for the last conv layer.\n            Deafult: 1.\n        init_bias (float): Initial bias. Default: -2.19.\n        conv_cfg (dict): Config of conv layer.\n            Default: dict(type='Conv2d')\n        norm_cfg (dict): Config of norm layer.\n            Default: dict(type='BN2d').\n        bias (str): Type of bias. Default: 'auto'.\n    \"\"\"  # noqa: W605\n\n    def __init__(self,\n                 in_channels,\n                 num_cls,\n                 heads,\n                 dcn_config,\n                 head_conv=64,\n                 final_kernel=1,\n                 init_bias=-2.19,\n                 conv_cfg=dict(type='Conv2d'),\n                 norm_cfg=dict(type='BN2d'),\n                 bias='auto',\n                 **kwargs):\n        super(DCNSeparateHead, self).__init__()\n        if 'heatmap' in heads:\n            heads.pop('heatmap')\n        # feature adaptation with dcn\n        # use separate features for classification / regression\n        self.feature_adapt_cls = build_conv_layer(dcn_config)\n\n        self.feature_adapt_reg = build_conv_layer(dcn_config)\n\n        # heatmap prediction head\n        cls_head = [\n            ConvModule(\n                in_channels,\n                head_conv,\n                kernel_size=3,\n                padding=1,\n                conv_cfg=conv_cfg,\n                bias=bias,\n                norm_cfg=norm_cfg),\n            build_conv_layer(\n                conv_cfg,\n                head_conv,\n                num_cls,\n                kernel_size=3,\n                stride=1,\n                padding=1,\n                bias=bias)\n        ]\n        self.cls_head = nn.Sequential(*cls_head)\n        self.init_bias = init_bias\n        # other regression target\n        self.task_head = SeparateHead(\n            in_channels,\n            heads,\n            head_conv=head_conv,\n            final_kernel=final_kernel,\n            bias=bias)\n\n    def init_weights(self):\n        \"\"\"Initialize weights.\"\"\"\n        self.cls_head[-1].bias.data.fill_(self.init_bias)\n        self.task_head.init_weights()\n\n    def forward(self, x):\n        \"\"\"Forward function for DCNSepHead.\n\n        Args:\n            x (torch.Tensor): Input feature map with the shape of\n                [B, 512, 128, 128].\n\n        Returns:\n            dict[str: torch.Tensor]: contains the following keys:\n\n                -reg （torch.Tensor): 2D regression value with the \\\n                    shape of [B, 2, H, W].\n                -height (torch.Tensor): Height value with the \\\n                    shape of [B, 1, H, W].\n                -dim (torch.Tensor): Size value with the shape \\\n                    of [B, 3, H, W].\n                -rot (torch.Tensor): Rotation value with the \\\n                    shape of [B, 2, H, W].\n                -vel (torch.Tensor): Velocity value with the \\\n                    shape of [B, 2, H, W].\n                -heatmap (torch.Tensor): Heatmap with the shape of \\\n                    [B, N, H, W].\n        \"\"\"\n        center_feat = self.feature_adapt_cls(x)\n        reg_feat = self.feature_adapt_reg(x)\n\n        cls_score = self.cls_head(center_feat)\n        ret = self.task_head(reg_feat)\n        ret['heatmap'] = cls_score\n\n        return ret\n\n\n@HEADS.register_module()\nclass CenterHead(nn.Module):\n    \"\"\"CenterHead for CenterPoint.\n\n    Args:\n        mode (str): Mode of the head. Default: '3d'.\n        in_channels (list[int] | int): Channels of the input feature map.\n            Default: [128].\n        tasks (list[dict]): Task information including class number\n            and class names. Default: None.\n        dataset (str): Name of the dataset. Default: 'nuscenes'.\n        weight (float): Weight for location loss. Default: 0.25.\n        code_weights (list[int]): Code weights for location loss. Default: [].\n        common_heads (dict): Conv information for common heads.\n            Default: dict().\n        loss_cls (dict): Config of classification loss function.\n            Default: dict(type='GaussianFocalLoss', reduction='mean').\n        loss_bbox (dict): Config of regression loss function.\n            Default: dict(type='L1Loss', reduction='none').\n        separate_head (dict): Config of separate head. Default: dict(\n            type='SeparateHead', init_bias=-2.19, final_kernel=3)\n        share_conv_channel (int): Output channels for share_conv_layer.\n            Default: 64.\n        num_heatmap_convs (int): Number of conv layers for heatmap conv layer.\n            Default: 2.\n        conv_cfg (dict): Config of conv layer.\n            Default: dict(type='Conv2d')\n        norm_cfg (dict): Config of norm layer.\n            Default: dict(type='BN2d').\n        bias (str): Type of bias. Default: 'auto'.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels=[128],\n                 tasks=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 bbox_coder=None,\n                 common_heads=dict(),\n                 loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),\n                 loss_bbox=dict(\n                     type='L1Loss', reduction='none', loss_weight=0.25),\n                 separate_head=dict(\n                     type='SeparateHead', init_bias=-2.19, final_kernel=3),\n                 share_conv_channel=64,\n                 num_heatmap_convs=2,\n                 conv_cfg=dict(type='Conv2d'),\n                 norm_cfg=dict(type='BN2d'),\n                 bias='auto',\n                 norm_bbox=True):\n        super(CenterHead, self).__init__()\n\n        num_classes = [len(t['class_names']) for t in tasks]\n        self.class_names = [t['class_names'] for t in tasks]\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        self.in_channels = in_channels\n        self.num_classes = num_classes\n        self.norm_bbox = norm_bbox\n\n        self.loss_cls = build_loss(loss_cls)\n        self.loss_bbox = build_loss(loss_bbox)\n        self.bbox_coder = build_bbox_coder(bbox_coder)\n        self.num_anchor_per_locs = [n for n in num_classes]\n        self.fp16_enabled = False\n\n        # a shared convolution\n        self.shared_conv = ConvModule(\n            in_channels,\n            share_conv_channel,\n            kernel_size=3,\n            padding=1,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg,\n            bias=bias)\n\n        self.task_heads = nn.ModuleList()\n\n        for num_cls in num_classes:\n            heads = copy.deepcopy(common_heads)\n            heads.update(dict(heatmap=(num_cls, num_heatmap_convs)))\n            separate_head.update(\n                in_channels=share_conv_channel, heads=heads, num_cls=num_cls)\n            self.task_heads.append(builder.build_head(separate_head))\n\n    def init_weights(self):\n        \"\"\"Initialize weights.\"\"\"\n        for task_head in self.task_heads:\n            task_head.init_weights()\n\n    def forward_single(self, x):\n        \"\"\"Forward function for CenterPoint.\n\n        Args:\n            x (torch.Tensor): Input feature map with the shape of\n                [B, 512, 128, 128].\n\n        Returns:\n            list[dict]: Output results for tasks.\n        \"\"\"\n        ret_dicts = []\n\n        x = self.shared_conv(x)\n\n        for task in self.task_heads:\n            ret_dicts.append(task(x))\n\n        return ret_dicts\n\n    def forward(self, feats):\n        \"\"\"Forward pass.\n\n        Args:\n            feats (list[torch.Tensor]): Multi-level features, e.g.,\n                features produced by FPN.\n\n        Returns:\n            tuple(list[dict]): Output results for tasks.\n        \"\"\"\n        return multi_apply(self.forward_single, feats)\n\n    def _gather_feat(self, feat, ind, mask=None):\n        \"\"\"Gather feature map.\n\n        Given feature map and index, return indexed feature map.\n\n        Args:\n            feat (torch.tensor): Feature map with the shape of [B, H*W, 10].\n            ind (torch.Tensor): Index of the ground truth boxes with the\n                shape of [B, max_obj].\n            mask (torch.Tensor): Mask of the feature map with the shape\n                of [B, max_obj]. Default: None.\n\n        Returns:\n            torch.Tensor: Feature map after gathering with the shape\n                of [B, max_obj, 10].\n        \"\"\"\n        dim = feat.size(2)\n        ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)\n        feat = feat.gather(1, ind)\n        if mask is not None:\n            mask = mask.unsqueeze(2).expand_as(feat)\n            feat = feat[mask]\n            feat = feat.view(-1, dim)\n        return feat\n\n    def get_targets(self, gt_bboxes_3d, gt_labels_3d):\n        \"\"\"Generate targets.\n\n        Args:\n            gt_bboxes_3d (list[:obj:`LiDARInstance3DBoxes`]): Ground\n                truth gt boxes.\n            gt_labels_3d (list[torch.Tensor]): Labels of boxes.\n\n        Returns:\n            Returns:\n                tuple[list[torch.Tensor]]: Tuple of target including \\\n                    the following results in order.\n\n                    - list[torch.Tensor]: Heatmap scores.\n                    - list[torch.Tensor]: Ground truth boxes.\n                    - list[torch.Tensor]: Indexes indicating the \\\n                        position of the valid boxes.\n                    - list[torch.Tensor]: Masks indicating which \\\n                        boxes are valid.\n        \"\"\"\n        heatmaps, anno_boxes, inds, masks = multi_apply(\n            self.get_targets_single, gt_bboxes_3d, gt_labels_3d)\n        # transpose heatmaps, because the dimension of tensors in each task is\n        # different, we have to use numpy instead of torch to do the transpose.\n        heatmaps = np.array(heatmaps).transpose(1, 0).tolist()\n        heatmaps = [torch.stack(hms_) for hms_ in heatmaps]\n        # transpose anno_boxes\n        anno_boxes = np.array(anno_boxes).transpose(1, 0).tolist()\n        anno_boxes = [torch.stack(anno_boxes_) for anno_boxes_ in anno_boxes]\n        # transpose inds\n        inds = np.array(inds).transpose(1, 0).tolist()\n        inds = [torch.stack(inds_) for inds_ in inds]\n        # transpose inds\n        masks = np.array(masks).transpose(1, 0).tolist()\n        masks = [torch.stack(masks_) for masks_ in masks]\n        return heatmaps, anno_boxes, inds, masks\n\n    def get_targets_single(self, gt_bboxes_3d, gt_labels_3d):\n        \"\"\"Generate training targets for a single sample.\n\n        Args:\n            gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`): Ground truth gt boxes.\n            gt_labels_3d (torch.Tensor): Labels of boxes.\n\n        Returns:\n            tuple[list[torch.Tensor]]: Tuple of target including \\\n                the following results in order.\n\n                - list[torch.Tensor]: Heatmap scores.\n                - list[torch.Tensor]: Ground truth boxes.\n                - list[torch.Tensor]: Indexes indicating the position \\\n                    of the valid boxes.\n                - list[torch.Tensor]: Masks indicating which boxes \\\n                    are valid.\n        \"\"\"\n        device = gt_labels_3d.device\n        gt_bboxes_3d = torch.cat(\n            (gt_bboxes_3d.gravity_center, gt_bboxes_3d.tensor[:, 3:]),\n            dim=1).to(device)\n        max_objs = self.train_cfg['max_objs'] * self.train_cfg['dense_reg']\n        grid_size = torch.tensor(self.train_cfg['grid_size'])\n        pc_range = torch.tensor(self.train_cfg['point_cloud_range'])\n        voxel_size = torch.tensor(self.train_cfg['voxel_size'])\n\n        feature_map_size = grid_size[:2] // self.train_cfg['out_size_factor']\n\n        # reorganize the gt_dict by tasks\n        task_masks = []\n        flag = 0\n        for class_name in self.class_names:\n            task_masks.append([\n                torch.where(gt_labels_3d == class_name.index(i) + flag)\n                for i in class_name\n            ])\n            flag += len(class_name)\n\n        task_boxes = []\n        task_classes = []\n        flag2 = 0\n        for idx, mask in enumerate(task_masks):\n            task_box = []\n            task_class = []\n            for m in mask:\n                task_box.append(gt_bboxes_3d[m])\n                # 0 is background for each task, so we need to add 1 here.\n                task_class.append(gt_labels_3d[m] + 1 - flag2)\n            task_boxes.append(torch.cat(task_box, axis=0).to(device))\n            task_classes.append(torch.cat(task_class).long().to(device))\n            flag2 += len(mask)\n        draw_gaussian = draw_heatmap_gaussian\n        heatmaps, anno_boxes, inds, masks = [], [], [], []\n\n        for idx, task_head in enumerate(self.task_heads):\n            heatmap = gt_bboxes_3d.new_zeros(\n                (len(self.class_names[idx]), feature_map_size[1],\n                 feature_map_size[0]))\n\n            anno_box = gt_bboxes_3d.new_zeros((max_objs, 10),\n                                              dtype=torch.float32)\n\n            ind = gt_labels_3d.new_zeros((max_objs), dtype=torch.int64)\n            mask = gt_bboxes_3d.new_zeros((max_objs), dtype=torch.uint8)\n\n            num_objs = min(task_boxes[idx].shape[0], max_objs)\n\n            for k in range(num_objs):\n                cls_id = task_classes[idx][k] - 1\n\n                width = task_boxes[idx][k][3]\n                length = task_boxes[idx][k][4]\n                width = width / voxel_size[0] / self.train_cfg[\n                    'out_size_factor']\n                length = length / voxel_size[1] / self.train_cfg[\n                    'out_size_factor']\n\n                if width > 0 and length > 0:\n                    radius = gaussian_radius(\n                        (length, width),\n                        min_overlap=self.train_cfg['gaussian_overlap'])\n                    radius = max(self.train_cfg['min_radius'], int(radius))\n\n                    # be really careful for the coordinate system of\n                    # your box annotation.\n                    x, y, z = task_boxes[idx][k][0], task_boxes[idx][k][\n                        1], task_boxes[idx][k][2]\n\n                    coor_x = (\n                        x - pc_range[0]\n                    ) / voxel_size[0] / self.train_cfg['out_size_factor']\n                    coor_y = (\n                        y - pc_range[1]\n                    ) / voxel_size[1] / self.train_cfg['out_size_factor']\n\n                    center = torch.tensor([coor_x, coor_y],\n                                          dtype=torch.float32,\n                                          device=device)\n                    center_int = center.to(torch.int32)\n\n                    # throw out not in range objects to avoid out of array\n                    # area when creating the heatmap\n                    if not (0 <= center_int[0] < feature_map_size[0]\n                            and 0 <= center_int[1] < feature_map_size[1]):\n                        continue\n\n                    draw_gaussian(heatmap[cls_id], center_int, radius)\n\n                    new_idx = k\n                    x, y = center_int[0], center_int[1]\n\n                    assert (y * feature_map_size[0] + x <\n                            feature_map_size[0] * feature_map_size[1])\n\n                    ind[new_idx] = y * feature_map_size[0] + x\n                    mask[new_idx] = 1\n                    # TODO: support other outdoor dataset\n                    vx, vy = task_boxes[idx][k][7:]\n                    rot = task_boxes[idx][k][6]\n                    box_dim = task_boxes[idx][k][3:6]\n                    if self.norm_bbox:\n                        box_dim = box_dim.log()\n                    anno_box[new_idx] = torch.cat([\n                        center - torch.tensor([x, y], device=device),\n                        z.unsqueeze(0), box_dim,\n                        torch.sin(rot).unsqueeze(0),\n                        torch.cos(rot).unsqueeze(0),\n                        vx.unsqueeze(0),\n                        vy.unsqueeze(0)\n                    ])\n\n            heatmaps.append(heatmap)\n            anno_boxes.append(anno_box)\n            masks.append(mask)\n            inds.append(ind)\n        return heatmaps, anno_boxes, inds, masks\n\n    @force_fp32(apply_to=('preds_dicts'))\n    def loss(self, gt_bboxes_3d, gt_labels_3d, preds_dicts, **kwargs):\n        \"\"\"Loss function for CenterHead.\n\n        Args:\n            gt_bboxes_3d (list[:obj:`LiDARInstance3DBoxes`]): Ground\n                truth gt boxes.\n            gt_labels_3d (list[torch.Tensor]): Labels of boxes.\n            preds_dicts (dict): Output of forward function.\n\n        Returns:\n            dict[str:torch.Tensor]: Loss of heatmap and bbox of each task.\n        \"\"\"\n        heatmaps, anno_boxes, inds, masks = self.get_targets(\n            gt_bboxes_3d, gt_labels_3d)\n        loss_dict = dict()\n        for task_id, preds_dict in enumerate(preds_dicts):\n            # heatmap focal loss\n            preds_dict[0]['heatmap'] = clip_sigmoid(preds_dict[0]['heatmap'])\n            num_pos = heatmaps[task_id].eq(1).float().sum().item()\n            loss_heatmap = self.loss_cls(\n                preds_dict[0]['heatmap'],\n                heatmaps[task_id],\n                avg_factor=max(num_pos, 1))\n            target_box = anno_boxes[task_id]\n            # reconstruct the anno_box from multiple reg heads\n            preds_dict[0]['anno_box'] = torch.cat(\n                (preds_dict[0]['reg'], preds_dict[0]['height'],\n                 preds_dict[0]['dim'], preds_dict[0]['rot'],\n                 preds_dict[0]['vel']),\n                dim=1)\n\n            # Regression loss for dimension, offset, height, rotation\n            ind = inds[task_id]\n            num = masks[task_id].float().sum()\n            pred = preds_dict[0]['anno_box'].permute(0, 2, 3, 1).contiguous()\n            pred = pred.view(pred.size(0), -1, pred.size(3))\n            pred = self._gather_feat(pred, ind)\n            mask = masks[task_id].unsqueeze(2).expand_as(target_box).float()\n            isnotnan = (~torch.isnan(target_box)).float()\n            mask *= isnotnan\n\n            code_weights = self.train_cfg.get('code_weights', None)\n            bbox_weights = mask * mask.new_tensor(code_weights)\n            loss_bbox = self.loss_bbox(\n                pred, target_box, bbox_weights, avg_factor=(num + 1e-4))\n            loss_dict[f'task{task_id}.loss_heatmap'] = loss_heatmap\n            loss_dict[f'task{task_id}.loss_bbox'] = loss_bbox\n        return loss_dict\n\n    def get_bboxes(self, preds_dicts, img_metas, img=None, rescale=False):\n        \"\"\"Generate bboxes from bbox head predictions.\n\n        Args:\n            preds_dicts (tuple[list[dict]]): Prediction results.\n            img_metas (list[dict]): Point cloud and image's meta info.\n\n        Returns:\n            list[dict]: Decoded bbox, scores and labels after nms.\n        \"\"\"\n        rets = []\n        for task_id, preds_dict in enumerate(preds_dicts):\n            num_class_with_bg = self.num_classes[task_id]\n            batch_size = preds_dict[0]['heatmap'].shape[0]\n            batch_heatmap = preds_dict[0]['heatmap'].sigmoid()\n\n            batch_reg = preds_dict[0]['reg']\n            batch_hei = preds_dict[0]['height']\n\n            if self.norm_bbox:\n                batch_dim = torch.exp(preds_dict[0]['dim'])\n            else:\n                batch_dim = preds_dict[0]['dim']\n\n            batch_rots = preds_dict[0]['rot'][:, 0].unsqueeze(1)\n            batch_rotc = preds_dict[0]['rot'][:, 1].unsqueeze(1)\n\n            if 'vel' in preds_dict[0]:\n                batch_vel = preds_dict[0]['vel']\n            else:\n                batch_vel = None\n            temp = self.bbox_coder.decode(\n                batch_heatmap,\n                batch_rots,\n                batch_rotc,\n                batch_hei,\n                batch_dim,\n                batch_vel,\n                reg=batch_reg,\n                task_id=task_id)\n            assert self.test_cfg['nms_type'] in ['circle', 'rotate']\n            batch_reg_preds = [box['bboxes'] for box in temp]\n            batch_cls_preds = [box['scores'] for box in temp]\n            batch_cls_labels = [box['labels'] for box in temp]\n            if self.test_cfg['nms_type'] == 'circle':\n                ret_task = []\n                for i in range(batch_size):\n                    boxes3d = temp[i]['bboxes']\n                    scores = temp[i]['scores']\n                    labels = temp[i]['labels']\n                    centers = boxes3d[:, [0, 1]]\n                    boxes = torch.cat([centers, scores.view(-1, 1)], dim=1)\n                    keep = torch.tensor(\n                        circle_nms(\n                            boxes.detach().cpu().numpy(),\n                            self.test_cfg['min_radius'][task_id],\n                            post_max_size=self.test_cfg['post_max_size']),\n                        dtype=torch.long,\n                        device=boxes.device)\n\n                    boxes3d = boxes3d[keep]\n                    scores = scores[keep]\n                    labels = labels[keep]\n                    ret = dict(bboxes=boxes3d, scores=scores, labels=labels)\n                    ret_task.append(ret)\n                rets.append(ret_task)\n            else:\n                rets.append(\n                    self.get_task_detections(num_class_with_bg,\n                                             batch_cls_preds, batch_reg_preds,\n                                             batch_cls_labels, img_metas))\n\n        # Merge branches results\n        num_samples = len(rets[0])\n\n        ret_list = []\n        for i in range(num_samples):\n            for k in rets[0][i].keys():\n                if k == 'bboxes':\n                    bboxes = torch.cat([ret[i][k] for ret in rets])\n                    bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 5] * 0.5\n                    bboxes = img_metas[i]['box_type_3d'](\n                        bboxes, self.bbox_coder.code_size)\n                elif k == 'scores':\n                    scores = torch.cat([ret[i][k] for ret in rets])\n                elif k == 'labels':\n                    flag = 0\n                    for j, num_class in enumerate(self.num_classes):\n                        rets[j][i][k] += flag\n                        flag += num_class\n                    labels = torch.cat([ret[i][k].int() for ret in rets])\n            ret_list.append([bboxes, scores, labels])\n        return ret_list\n\n    def get_task_detections(self, num_class_with_bg, batch_cls_preds,\n                            batch_reg_preds, batch_cls_labels, img_metas):\n        \"\"\"Rotate nms for each task.\n\n        Args:\n            num_class_with_bg (int): Number of classes for the current task.\n            batch_cls_preds (list[torch.Tensor]): Prediction score with the\n                shape of [N].\n            batch_reg_preds (list[torch.Tensor]): Prediction bbox with the\n                shape of [N, 9].\n            batch_cls_labels (list[torch.Tensor]): Prediction label with the\n                shape of [N].\n            img_metas (list[dict]): Meta information of each sample.\n\n        Returns:\n            list[dict[str: torch.Tensor]]: contains the following keys:\n\n                -bboxes (torch.Tensor): Prediction bboxes after nms with the \\\n                    shape of [N, 9].\n                -scores (torch.Tensor): Prediction scores after nms with the \\\n                    shape of [N].\n                -labels (torch.Tensor): Prediction labels after nms with the \\\n                    shape of [N].\n        \"\"\"\n        predictions_dicts = []\n        post_center_range = self.test_cfg['post_center_limit_range']\n        if len(post_center_range) > 0:\n            post_center_range = torch.tensor(\n                post_center_range,\n                dtype=batch_reg_preds[0].dtype,\n                device=batch_reg_preds[0].device)\n\n        for i, (box_preds, cls_preds, cls_labels) in enumerate(\n                zip(batch_reg_preds, batch_cls_preds, batch_cls_labels)):\n\n            # Apply NMS in birdeye view\n\n            # get highest score per prediction, than apply nms\n            # to remove overlapped box.\n            if num_class_with_bg == 1:\n                top_scores = cls_preds.squeeze(-1)\n                top_labels = torch.zeros(\n                    cls_preds.shape[0],\n                    device=cls_preds.device,\n                    dtype=torch.long)\n\n            else:\n                top_labels = cls_labels.long()\n                top_scores = cls_preds.squeeze(-1)\n\n            if self.test_cfg['score_threshold'] > 0.0:\n                thresh = torch.tensor(\n                    [self.test_cfg['score_threshold']],\n                    device=cls_preds.device).type_as(cls_preds)\n                top_scores_keep = top_scores >= thresh\n                top_scores = top_scores.masked_select(top_scores_keep)\n\n            if top_scores.shape[0] != 0:\n                if self.test_cfg['score_threshold'] > 0.0:\n                    box_preds = box_preds[top_scores_keep]\n                    top_labels = top_labels[top_scores_keep]\n\n                boxes_for_nms = xywhr2xyxyr(img_metas[i]['box_type_3d'](\n                    box_preds[:, :], self.bbox_coder.code_size).bev)\n                # the nms in 3d detection just remove overlap boxes.\n\n                selected = nms_gpu(\n                    boxes_for_nms,\n                    top_scores,\n                    thresh=self.test_cfg['nms_thr'],\n                    pre_maxsize=self.test_cfg['pre_max_size'],\n                    post_max_size=self.test_cfg['post_max_size'])\n            else:\n                selected = []\n\n            # if selected is not None:\n            selected_boxes = box_preds[selected]\n            selected_labels = top_labels[selected]\n            selected_scores = top_scores[selected]\n\n            # finally generate predictions.\n            if selected_boxes.shape[0] != 0:\n                box_preds = selected_boxes\n                scores = selected_scores\n                label_preds = selected_labels\n                final_box_preds = box_preds\n                final_scores = scores\n                final_labels = label_preds\n                if post_center_range is not None:\n                    mask = (final_box_preds[:, :3] >=\n                            post_center_range[:3]).all(1)\n                    mask &= (final_box_preds[:, :3] <=\n                             post_center_range[3:]).all(1)\n                    predictions_dict = dict(\n                        bboxes=final_box_preds[mask],\n                        scores=final_scores[mask],\n                        labels=final_labels[mask])\n                else:\n                    predictions_dict = dict(\n                        bboxes=final_box_preds,\n                        scores=final_scores,\n                        labels=final_labels)\n            else:\n                dtype = batch_reg_preds[0].dtype\n                device = batch_reg_preds[0].device\n                predictions_dict = dict(\n                    bboxes=torch.zeros([0, self.bbox_coder.code_size],\n                                       dtype=dtype,\n                                       device=device),\n                    scores=torch.zeros([0], dtype=dtype, device=device),\n                    labels=torch.zeros([0],\n                                       dtype=top_labels.dtype,\n                                       device=device))\n\n            predictions_dicts.append(predictions_dict)\n        return predictions_dicts\n"
  },
  {
    "path": "mmdet3d/models/dense_heads/free_anchor3d_head.py",
    "content": "import torch\nfrom mmcv.runner import force_fp32\nfrom torch.nn import functional as F\n\nfrom mmdet3d.core.bbox import bbox_overlaps_nearest_3d\nfrom mmdet.models import HEADS\nfrom .anchor3d_head import Anchor3DHead\nfrom .train_mixins import get_direction_target\n\n\n@HEADS.register_module()\nclass FreeAnchor3DHead(Anchor3DHead):\n    r\"\"\"`FreeAnchor <https://arxiv.org/abs/1909.02466>`_ head for 3D detection.\n\n    Note:\n        This implementation is directly modified from the `mmdet implementation\n        <https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/dense_heads/free_anchor_retina_head.py>`_  # noqa\n        We find it also works on 3D detection with minor modification, i.e.,\n        different hyper-parameters and a additional direction classifier.\n\n    Args:\n        pre_anchor_topk (int): Number of boxes that be token in each bag.\n        bbox_thr (float): The threshold of the saturated linear function. It is\n            usually the same with the IoU threshold used in NMS.\n        gamma (float): Gamma parameter in focal loss.\n        alpha (float): Alpha parameter in focal loss.\n        kwargs (dict): Other arguments are the same as those in :class:`Anchor3DHead`.\n    \"\"\"\n\n    def __init__(self,\n                 pre_anchor_topk=50,\n                 bbox_thr=0.6,\n                 gamma=2.0,\n                 alpha=0.5,\n                 **kwargs):\n        super().__init__(**kwargs)\n        self.pre_anchor_topk = pre_anchor_topk\n        self.bbox_thr = bbox_thr\n        self.gamma = gamma\n        self.alpha = alpha\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'dir_cls_preds'))\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             dir_cls_preds,\n             gt_bboxes,\n             gt_labels,\n             input_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Calculate loss of FreeAnchor head.\n\n        Args:\n            cls_scores (list[torch.Tensor]): Classification scores of\n                different samples.\n            bbox_preds (list[torch.Tensor]): Box predictions of\n                different samples\n            dir_cls_preds (list[torch.Tensor]): Direction predictions of\n                different samples\n            gt_bboxes (list[:obj:`BaseInstance3DBoxes`]): Ground truth boxes.\n            gt_labels (list[torch.Tensor]): Ground truth labels.\n            input_metas (list[dict]): List of input meta information.\n            gt_bboxes_ignore (list[:obj:`BaseInstance3DBoxes`], optional):\n                Ground truth boxes that should be ignored. Defaults to None.\n\n        Returns:\n            dict[str, torch.Tensor]: Loss items.\n\n                - positive_bag_loss (torch.Tensor): Loss of positive samples.\n                - negative_bag_loss (torch.Tensor): Loss of negative samples.\n        \"\"\"\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        assert len(featmap_sizes) == self.anchor_generator.num_levels\n\n        anchor_list = self.get_anchors(featmap_sizes, input_metas)\n        anchors = [torch.cat(anchor) for anchor in anchor_list]\n\n        # concatenate each level\n        cls_scores = [\n            cls_score.permute(0, 2, 3, 1).reshape(\n                cls_score.size(0), -1, self.num_classes)\n            for cls_score in cls_scores\n        ]\n        bbox_preds = [\n            bbox_pred.permute(0, 2, 3, 1).reshape(\n                bbox_pred.size(0), -1, self.box_code_size)\n            for bbox_pred in bbox_preds\n        ]\n        dir_cls_preds = [\n            dir_cls_pred.permute(0, 2, 3,\n                                 1).reshape(dir_cls_pred.size(0), -1, 2)\n            for dir_cls_pred in dir_cls_preds\n        ]\n\n        cls_scores = torch.cat(cls_scores, dim=1)\n        bbox_preds = torch.cat(bbox_preds, dim=1)\n        dir_cls_preds = torch.cat(dir_cls_preds, dim=1)\n\n        cls_prob = torch.sigmoid(cls_scores)\n        box_prob = []\n        num_pos = 0\n        positive_losses = []\n        for _, (anchors_, gt_labels_, gt_bboxes_, cls_prob_, bbox_preds_,\n                dir_cls_preds_) in enumerate(\n                    zip(anchors, gt_labels, gt_bboxes, cls_prob, bbox_preds,\n                        dir_cls_preds)):\n\n            gt_bboxes_ = gt_bboxes_.tensor.to(anchors_.device)\n\n            with torch.no_grad():\n                # box_localization: a_{j}^{loc}, shape: [j, 4]\n                pred_boxes = self.bbox_coder.decode(anchors_, bbox_preds_)\n\n                # object_box_iou: IoU_{ij}^{loc}, shape: [i, j]\n                object_box_iou = bbox_overlaps_nearest_3d(\n                    gt_bboxes_, pred_boxes)\n\n                # object_box_prob: P{a_{j} -> b_{i}}, shape: [i, j]\n                t1 = self.bbox_thr\n                t2 = object_box_iou.max(\n                    dim=1, keepdim=True).values.clamp(min=t1 + 1e-12)\n                object_box_prob = ((object_box_iou - t1) / (t2 - t1)).clamp(\n                    min=0, max=1)\n\n                # object_cls_box_prob: P{a_{j} -> b_{i}}, shape: [i, c, j]\n                num_obj = gt_labels_.size(0)\n                indices = torch.stack(\n                    [torch.arange(num_obj).type_as(gt_labels_), gt_labels_],\n                    dim=0)\n\n                object_cls_box_prob = torch.sparse_coo_tensor(\n                    indices, object_box_prob)\n\n                # image_box_iou: P{a_{j} \\in A_{+}}, shape: [c, j]\n                \"\"\"\n                from \"start\" to \"end\" implement:\n                image_box_iou = torch.sparse.max(object_cls_box_prob,\n                                                 dim=0).t()\n\n                \"\"\"\n                # start\n                box_cls_prob = torch.sparse.sum(\n                    object_cls_box_prob, dim=0).to_dense()\n\n                indices = torch.nonzero(box_cls_prob, as_tuple=False).t_()\n                if indices.numel() == 0:\n                    image_box_prob = torch.zeros(\n                        anchors_.size(0),\n                        self.num_classes).type_as(object_box_prob)\n                else:\n                    nonzero_box_prob = torch.where(\n                        (gt_labels_.unsqueeze(dim=-1) == indices[0]),\n                        object_box_prob[:, indices[1]],\n                        torch.tensor(\n                            [0]).type_as(object_box_prob)).max(dim=0).values\n\n                    # upmap to shape [j, c]\n                    image_box_prob = torch.sparse_coo_tensor(\n                        indices.flip([0]),\n                        nonzero_box_prob,\n                        size=(anchors_.size(0), self.num_classes)).to_dense()\n                # end\n\n                box_prob.append(image_box_prob)\n\n            # construct bags for objects\n            match_quality_matrix = bbox_overlaps_nearest_3d(\n                gt_bboxes_, anchors_)\n            _, matched = torch.topk(\n                match_quality_matrix,\n                self.pre_anchor_topk,\n                dim=1,\n                sorted=False)\n            del match_quality_matrix\n\n            # matched_cls_prob: P_{ij}^{cls}\n            matched_cls_prob = torch.gather(\n                cls_prob_[matched], 2,\n                gt_labels_.view(-1, 1, 1).repeat(1, self.pre_anchor_topk,\n                                                 1)).squeeze(2)\n\n            # matched_box_prob: P_{ij}^{loc}\n            matched_anchors = anchors_[matched]\n            matched_object_targets = self.bbox_coder.encode(\n                matched_anchors,\n                gt_bboxes_.unsqueeze(dim=1).expand_as(matched_anchors))\n\n            # direction classification loss\n            loss_dir = None\n            if self.use_direction_classifier:\n                # also calculate direction prob: P_{ij}^{dir}\n                matched_dir_targets = get_direction_target(\n                    matched_anchors,\n                    matched_object_targets,\n                    self.dir_offset,\n                    one_hot=False)\n                loss_dir = self.loss_dir(\n                    dir_cls_preds_[matched].transpose(-2, -1),\n                    matched_dir_targets,\n                    reduction_override='none')\n\n            # generate bbox weights\n            if self.diff_rad_by_sin:\n                bbox_preds_[matched], matched_object_targets = \\\n                    self.add_sin_difference(\n                        bbox_preds_[matched], matched_object_targets)\n            bbox_weights = matched_anchors.new_ones(matched_anchors.size())\n            # Use pop is not right, check performance\n            code_weight = self.train_cfg.get('code_weight', None)\n            if code_weight:\n                bbox_weights = bbox_weights * bbox_weights.new_tensor(\n                    code_weight)\n            loss_bbox = self.loss_bbox(\n                bbox_preds_[matched],\n                matched_object_targets,\n                bbox_weights,\n                reduction_override='none').sum(-1)\n\n            if loss_dir is not None:\n                loss_bbox += loss_dir\n            matched_box_prob = torch.exp(-loss_bbox)\n\n            # positive_losses: {-log( Mean-max(P_{ij}^{cls} * P_{ij}^{loc}) )}\n            num_pos += len(gt_bboxes_)\n            positive_losses.append(\n                self.positive_bag_loss(matched_cls_prob, matched_box_prob))\n\n        positive_loss = torch.cat(positive_losses).sum() / max(1, num_pos)\n\n        # box_prob: P{a_{j} \\in A_{+}}\n        box_prob = torch.stack(box_prob, dim=0)\n\n        # negative_loss:\n        # \\sum_{j}{ FL((1 - P{a_{j} \\in A_{+}}) * (1 - P_{j}^{bg})) } / n||B||\n        negative_loss = self.negative_bag_loss(cls_prob, box_prob).sum() / max(\n            1, num_pos * self.pre_anchor_topk)\n\n        losses = {\n            'positive_bag_loss': positive_loss,\n            'negative_bag_loss': negative_loss\n        }\n        return losses\n\n    def positive_bag_loss(self, matched_cls_prob, matched_box_prob):\n        \"\"\"Generate positive bag loss.\n\n        Args:\n            matched_cls_prob (torch.Tensor): Classification probability\n                of matched positive samples.\n            matched_box_prob (torch.Tensor): Bounding box probability\n                of matched positive samples.\n\n        Returns:\n            torch.Tensor: Loss of positive samples.\n        \"\"\"\n        # bag_prob = Mean-max(matched_prob)\n        matched_prob = matched_cls_prob * matched_box_prob\n        weight = 1 / torch.clamp(1 - matched_prob, 1e-12, None)\n        weight /= weight.sum(dim=1).unsqueeze(dim=-1)\n        bag_prob = (weight * matched_prob).sum(dim=1)\n        # positive_bag_loss = -self.alpha * log(bag_prob)\n        bag_prob = bag_prob.clamp(0, 1)  # to avoid bug of BCE, check\n        return self.alpha * F.binary_cross_entropy(\n            bag_prob, torch.ones_like(bag_prob), reduction='none')\n\n    def negative_bag_loss(self, cls_prob, box_prob):\n        \"\"\"Generate negative bag loss.\n\n        Args:\n            cls_prob (torch.Tensor): Classification probability\n                of negative samples.\n            box_prob (torch.Tensor): Bounding box probability\n                of negative samples.\n\n        Returns:\n            torch.Tensor: Loss of negative samples.\n        \"\"\"\n        prob = cls_prob * (1 - box_prob)\n        prob = prob.clamp(0, 1)  # to avoid bug of BCE, check\n        negative_bag_loss = prob**self.gamma * F.binary_cross_entropy(\n            prob, torch.zeros_like(prob), reduction='none')\n        return (1 - self.alpha) * negative_bag_loss\n"
  },
  {
    "path": "mmdet3d/models/dense_heads/parta2_rpn_head.py",
    "content": "from __future__ import division\n\nimport numpy as np\nimport torch\nfrom mmcv.runner import force_fp32\n\nfrom mmdet3d.core import limit_period, xywhr2xyxyr\nfrom mmdet3d.ops.iou3d.iou3d_utils import nms_gpu, nms_normal_gpu\nfrom mmdet.models import HEADS\nfrom .anchor3d_head import Anchor3DHead\n\n\n@HEADS.register_module()\nclass PartA2RPNHead(Anchor3DHead):\n    \"\"\"RPN head for PartA2.\n\n    Note:\n        The main difference between the PartA2 RPN head and the Anchor3DHead\n        lies in their output during inference. PartA2 RPN head further returns\n        the original classification score for the second stage since the bbox\n        head in RoI head does not do classification task.\n\n        Different from RPN heads in 2D detectors, this RPN head does\n        multi-class classification task and uses FocalLoss like the SECOND and\n        PointPillars do. But this head uses class agnostic nms rather than\n        multi-class nms.\n\n    Args:\n        num_classes (int): Number of classes.\n        in_channels (int): Number of channels in the input feature map.\n        train_cfg (dict): Train configs.\n        test_cfg (dict): Test configs.\n        feat_channels (int): Number of channels of the feature map.\n        use_direction_classifier (bool): Whether to add a direction classifier.\n        anchor_generator(dict): Config dict of anchor generator.\n        assigner_per_size (bool): Whether to do assignment for each separate\n            anchor size.\n        assign_per_class (bool): Whether to do assignment for each class.\n        diff_rad_by_sin (bool): Whether to change the difference into sin\n            difference for box regression loss.\n        dir_offset (float | int): The offset of BEV rotation angles\n            (TODO: may be moved into box coder)\n        dir_limit_offset (float | int): The limited range of BEV\n            rotation angles. (TODO: may be moved into box coder)\n        bbox_coder (dict): Config dict of box coders.\n        loss_cls (dict): Config of classification loss.\n        loss_bbox (dict): Config of localization loss.\n        loss_dir (dict): Config of direction classifier loss.\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 train_cfg,\n                 test_cfg,\n                 feat_channels=256,\n                 use_direction_classifier=True,\n                 anchor_generator=dict(\n                     type='Anchor3DRangeGenerator',\n                     range=[0, -39.68, -1.78, 69.12, 39.68, -1.78],\n                     strides=[2],\n                     sizes=[[1.6, 3.9, 1.56]],\n                     rotations=[0, 1.57],\n                     custom_values=[],\n                     reshape_out=False),\n                 assigner_per_size=False,\n                 assign_per_class=False,\n                 diff_rad_by_sin=True,\n                 dir_offset=0,\n                 dir_limit_offset=1,\n                 bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),\n                 loss_cls=dict(\n                     type='CrossEntropyLoss',\n                     use_sigmoid=True,\n                     loss_weight=1.0),\n                 loss_bbox=dict(\n                     type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),\n                 loss_dir=dict(type='CrossEntropyLoss', loss_weight=0.2)):\n        super().__init__(num_classes, in_channels, train_cfg, test_cfg,\n                         feat_channels, use_direction_classifier,\n                         anchor_generator, assigner_per_size, assign_per_class,\n                         diff_rad_by_sin, dir_offset, dir_limit_offset,\n                         bbox_coder, loss_cls, loss_bbox, loss_dir)\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'dir_cls_preds'))\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             dir_cls_preds,\n             gt_bboxes,\n             gt_labels,\n             input_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Calculate losses.\n\n        Args:\n            cls_scores (list[torch.Tensor]): Multi-level class scores.\n            bbox_preds (list[torch.Tensor]): Multi-level bbox predictions.\n            dir_cls_preds (list[torch.Tensor]): Multi-level direction\n                class predictions.\n            gt_bboxes (list[:obj:`BaseInstance3DBoxes`]): Ground truth boxes \\\n                of each sample.\n            gt_labels (list[torch.Tensor]): Labels of each sample.\n            input_metas (list[dict]): Point cloud and image's meta info.\n            gt_bboxes_ignore (None | list[torch.Tensor]): Specify\n                which bounding.\n\n        Returns:\n            dict[str, list[torch.Tensor]]: Classification, bbox, and \\\n                direction losses of each level.\n\n                - loss_rpn_cls (list[torch.Tensor]): Classification losses.\n                - loss_rpn_bbox (list[torch.Tensor]): Box regression losses.\n                - loss_rpn_dir (list[torch.Tensor]): Direction classification \\\n                    losses.\n        \"\"\"\n        loss_dict = super().loss(cls_scores, bbox_preds, dir_cls_preds,\n                                 gt_bboxes, gt_labels, input_metas,\n                                 gt_bboxes_ignore)\n        # change the loss key names to avoid conflict\n        return dict(\n            loss_rpn_cls=loss_dict['loss_cls'],\n            loss_rpn_bbox=loss_dict['loss_bbox'],\n            loss_rpn_dir=loss_dict['loss_dir'])\n\n    def get_bboxes_single(self,\n                          cls_scores,\n                          bbox_preds,\n                          dir_cls_preds,\n                          mlvl_anchors,\n                          input_meta,\n                          cfg,\n                          rescale=False):\n        \"\"\"Get bboxes of single branch.\n\n        Args:\n            cls_scores (torch.Tensor): Class score in single batch.\n            bbox_preds (torch.Tensor): Bbox prediction in single batch.\n            dir_cls_preds (torch.Tensor): Predictions of direction class\n                in single batch.\n            mlvl_anchors (List[torch.Tensor]): Multi-level anchors\n                in single batch.\n            input_meta (list[dict]): Contain pcd and img's meta info.\n            cfg (None | :obj:`ConfigDict`): Training or testing config.\n            rescale (list[torch.Tensor]): whether th rescale bbox.\n\n        Returns:\n            dict: Predictions of single batch containing the following keys:\n\n                - boxes_3d (:obj:`BaseInstance3DBoxes`): Predicted 3d bboxes.\n                - scores_3d (torch.Tensor): Score of each bbox.\n                - labels_3d (torch.Tensor): Label of each bbox.\n                - cls_preds (torch.Tensor): Class score of each bbox.\n        \"\"\"\n        assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)\n        mlvl_bboxes = []\n        mlvl_max_scores = []\n        mlvl_label_pred = []\n        mlvl_dir_scores = []\n        mlvl_cls_score = []\n        for cls_score, bbox_pred, dir_cls_pred, anchors in zip(\n                cls_scores, bbox_preds, dir_cls_preds, mlvl_anchors):\n            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]\n            assert cls_score.size()[-2:] == dir_cls_pred.size()[-2:]\n            dir_cls_pred = dir_cls_pred.permute(1, 2, 0).reshape(-1, 2)\n            dir_cls_score = torch.max(dir_cls_pred, dim=-1)[1]\n\n            cls_score = cls_score.permute(1, 2,\n                                          0).reshape(-1, self.num_classes)\n\n            if self.use_sigmoid_cls:\n                scores = cls_score.sigmoid()\n            else:\n                scores = cls_score.softmax(-1)\n            bbox_pred = bbox_pred.permute(1, 2,\n                                          0).reshape(-1, self.box_code_size)\n\n            nms_pre = cfg.get('nms_pre', -1)\n            if self.use_sigmoid_cls:\n                max_scores, pred_labels = scores.max(dim=1)\n            else:\n                max_scores, pred_labels = scores[:, :-1].max(dim=1)\n            # get topk\n            if nms_pre > 0 and scores.shape[0] > nms_pre:\n                topk_scores, topk_inds = max_scores.topk(nms_pre)\n                anchors = anchors[topk_inds, :]\n                bbox_pred = bbox_pred[topk_inds, :]\n                max_scores = topk_scores\n                cls_score = scores[topk_inds, :]\n                dir_cls_score = dir_cls_score[topk_inds]\n                pred_labels = pred_labels[topk_inds]\n\n            bboxes = self.bbox_coder.decode(anchors, bbox_pred)\n            mlvl_bboxes.append(bboxes)\n            mlvl_max_scores.append(max_scores)\n            mlvl_cls_score.append(cls_score)\n            mlvl_label_pred.append(pred_labels)\n            mlvl_dir_scores.append(dir_cls_score)\n\n        mlvl_bboxes = torch.cat(mlvl_bboxes)\n        mlvl_bboxes_for_nms = xywhr2xyxyr(input_meta['box_type_3d'](\n            mlvl_bboxes, box_dim=self.box_code_size).bev)\n        mlvl_max_scores = torch.cat(mlvl_max_scores)\n        mlvl_label_pred = torch.cat(mlvl_label_pred)\n        mlvl_dir_scores = torch.cat(mlvl_dir_scores)\n        # shape [k, num_class] before sigmoid\n        # PartA2 need to keep raw classification score\n        # becase the bbox head in the second stage does not have\n        # classification branch,\n        # roi head need this score as classification score\n        mlvl_cls_score = torch.cat(mlvl_cls_score)\n\n        score_thr = cfg.get('score_thr', 0)\n        result = self.class_agnostic_nms(mlvl_bboxes, mlvl_bboxes_for_nms,\n                                         mlvl_max_scores, mlvl_label_pred,\n                                         mlvl_cls_score, mlvl_dir_scores,\n                                         score_thr, cfg.nms_post, cfg,\n                                         input_meta)\n\n        return result\n\n    def class_agnostic_nms(self, mlvl_bboxes, mlvl_bboxes_for_nms,\n                           mlvl_max_scores, mlvl_label_pred, mlvl_cls_score,\n                           mlvl_dir_scores, score_thr, max_num, cfg,\n                           input_meta):\n        \"\"\"Class agnostic nms for single batch.\n\n        Args:\n            mlvl_bboxes (torch.Tensor): Bboxes from Multi-level.\n            mlvl_bboxes_for_nms (torch.Tensor): Bboxes for nms\n                (bev or minmax boxes) from Multi-level.\n            mlvl_max_scores (torch.Tensor): Max scores of Multi-level bbox.\n            mlvl_label_pred (torch.Tensor): Class predictions\n                of Multi-level bbox.\n            mlvl_cls_score (torch.Tensor): Class scores of\n                Multi-level bbox.\n            mlvl_dir_scores (torch.Tensor): Direction scores of\n                Multi-level bbox.\n            score_thr (int): Score threshold.\n            max_num (int): Max number of bboxes after nms.\n            cfg (None | :obj:`ConfigDict`): Training or testing config.\n            input_meta (dict): Contain pcd and img's meta info.\n\n        Returns:\n            dict: Predictions of single batch. Contain the keys:\n\n                - boxes_3d (:obj:`BaseInstance3DBoxes`): Predicted 3d bboxes.\n                - scores_3d (torch.Tensor): Score of each bbox.\n                - labels_3d (torch.Tensor): Label of each bbox.\n                - cls_preds (torch.Tensor): Class score of each bbox.\n        \"\"\"\n        bboxes = []\n        scores = []\n        labels = []\n        dir_scores = []\n        cls_scores = []\n        score_thr_inds = mlvl_max_scores > score_thr\n        _scores = mlvl_max_scores[score_thr_inds]\n        _bboxes_for_nms = mlvl_bboxes_for_nms[score_thr_inds, :]\n        if cfg.use_rotate_nms:\n            nms_func = nms_gpu\n        else:\n            nms_func = nms_normal_gpu\n        selected = nms_func(_bboxes_for_nms, _scores, cfg.nms_thr)\n\n        _mlvl_bboxes = mlvl_bboxes[score_thr_inds, :]\n        _mlvl_dir_scores = mlvl_dir_scores[score_thr_inds]\n        _mlvl_label_pred = mlvl_label_pred[score_thr_inds]\n        _mlvl_cls_score = mlvl_cls_score[score_thr_inds]\n\n        if len(selected) > 0:\n            bboxes.append(_mlvl_bboxes[selected])\n            scores.append(_scores[selected])\n            labels.append(_mlvl_label_pred[selected])\n            cls_scores.append(_mlvl_cls_score[selected])\n            dir_scores.append(_mlvl_dir_scores[selected])\n            dir_rot = limit_period(bboxes[-1][..., 6] - self.dir_offset,\n                                   self.dir_limit_offset, np.pi)\n            bboxes[-1][..., 6] = (\n                dir_rot + self.dir_offset +\n                np.pi * dir_scores[-1].to(bboxes[-1].dtype))\n\n        if bboxes:\n            bboxes = torch.cat(bboxes, dim=0)\n            scores = torch.cat(scores, dim=0)\n            cls_scores = torch.cat(cls_scores, dim=0)\n            labels = torch.cat(labels, dim=0)\n            dir_scores = torch.cat(dir_scores, dim=0)\n            if bboxes.shape[0] > max_num:\n                _, inds = scores.sort(descending=True)\n                inds = inds[:max_num]\n                bboxes = bboxes[inds, :]\n                labels = labels[inds]\n                scores = scores[inds]\n                cls_scores = cls_scores[inds]\n            bboxes = input_meta['box_type_3d'](\n                bboxes, box_dim=self.box_code_size)\n            return dict(\n                boxes_3d=bboxes,\n                scores_3d=scores,\n                labels_3d=labels,\n                cls_preds=cls_scores  # raw scores [max_num, cls_num]\n            )\n        else:\n            return dict(\n                boxes_3d=input_meta['box_type_3d'](\n                    mlvl_bboxes.new_zeros([0, self.box_code_size]),\n                    box_dim=self.box_code_size),\n                scores_3d=mlvl_bboxes.new_zeros([0]),\n                labels_3d=mlvl_bboxes.new_zeros([0]),\n                cls_preds=mlvl_bboxes.new_zeros([0, mlvl_cls_score.shape[-1]]))\n"
  },
  {
    "path": "mmdet3d/models/dense_heads/shape_aware_head.py",
    "content": "import numpy as np\nimport torch\nfrom mmcv.cnn import ConvModule, bias_init_with_prob, normal_init\nfrom torch import nn as nn\n\nfrom mmdet3d.core import box3d_multiclass_nms, limit_period, xywhr2xyxyr\nfrom mmdet.core import multi_apply\nfrom mmdet.models import HEADS\nfrom ..builder import build_head\nfrom .anchor3d_head import Anchor3DHead\n\n\n@HEADS.register_module()\nclass BaseShapeHead(nn.Module):\n    \"\"\"Base Shape-aware Head in Shape Signature Network.\n\n    Note:\n        This base shape-aware grouping head uses default settings for small\n        objects. For large and huge objects, it is recommended to use\n        heavier heads, like (64, 64, 64) and (128, 128, 64, 64, 64) in\n        shared conv channels, (2, 1, 1) and (2, 1, 2, 1, 1) in shared\n        conv strides. For tiny objects, we can use smaller heads, like\n        (32, 32) channels and (1, 1) strides.\n\n    Args:\n        num_cls (int): Number of classes.\n        num_base_anchors (int): Number of anchors per location.\n        box_code_size (int): The dimension of boxes to be encoded.\n        in_channels (int): Input channels for convolutional layers.\n        shared_conv_channels (tuple): Channels for shared convolutional \\\n            layers. Default: (64, 64). \\\n        shared_conv_strides (tuple): Strides for shared convolutional \\\n            layers. Default: (1, 1).\n        use_direction_classifier (bool, optional): Whether to use direction \\\n            classifier. Default: True.\n        conv_cfg (dict): Config of conv layer. Default: dict(type='Conv2d')\n        norm_cfg (dict): Config of norm layer. Default: dict(type='BN2d').\n        bias (bool|str, optional): Type of bias. Default: False.\n    \"\"\"\n\n    def __init__(self,\n                 num_cls,\n                 num_base_anchors,\n                 box_code_size,\n                 in_channels,\n                 shared_conv_channels=(64, 64),\n                 shared_conv_strides=(1, 1),\n                 use_direction_classifier=True,\n                 conv_cfg=dict(type='Conv2d'),\n                 norm_cfg=dict(type='BN2d'),\n                 bias=False):\n        super().__init__()\n        self.num_cls = num_cls\n        self.num_base_anchors = num_base_anchors\n        self.use_direction_classifier = use_direction_classifier\n        self.box_code_size = box_code_size\n\n        assert len(shared_conv_channels) == len(shared_conv_strides), \\\n            'Lengths of channels and strides list should be equal.'\n\n        self.shared_conv_channels = [in_channels] + list(shared_conv_channels)\n        self.shared_conv_strides = list(shared_conv_strides)\n\n        shared_conv = []\n        for i in range(len(self.shared_conv_strides)):\n            shared_conv.append(\n                ConvModule(\n                    self.shared_conv_channels[i],\n                    self.shared_conv_channels[i + 1],\n                    kernel_size=3,\n                    stride=self.shared_conv_strides[i],\n                    padding=1,\n                    conv_cfg=conv_cfg,\n                    bias=bias,\n                    norm_cfg=norm_cfg))\n\n        self.shared_conv = nn.Sequential(*shared_conv)\n\n        out_channels = self.shared_conv_channels[-1]\n        self.conv_cls = nn.Conv2d(out_channels, num_base_anchors * num_cls, 1)\n        self.conv_reg = nn.Conv2d(out_channels,\n                                  num_base_anchors * box_code_size, 1)\n\n        if use_direction_classifier:\n            self.conv_dir_cls = nn.Conv2d(out_channels, num_base_anchors * 2,\n                                          1)\n\n    def init_weights(self):\n        \"\"\"Initialize weights.\"\"\"\n        bias_cls = bias_init_with_prob(0.01)\n        # shared conv layers have already been initialized by ConvModule\n        normal_init(self.conv_cls, std=0.01, bias=bias_cls)\n        normal_init(self.conv_reg, std=0.01)\n        if self.use_direction_classifier:\n            normal_init(self.conv_dir_cls, std=0.01, bias=bias_cls)\n\n    def forward(self, x):\n        \"\"\"Forward function for SmallHead.\n\n        Args:\n            x (torch.Tensor): Input feature map with the shape of\n                [B, C, H, W].\n\n        Returns:\n            dict[torch.Tensor]: Contain score of each class, bbox \\\n                regression and direction classification predictions. \\\n                Note that all the returned tensors are reshaped as \\\n                [bs*num_base_anchors*H*W, num_cls/box_code_size/dir_bins]. \\\n                It is more convenient to concat anchors for different \\\n                classes even though they have different feature map sizes.\n        \"\"\"\n        x = self.shared_conv(x)\n        cls_score = self.conv_cls(x)\n        bbox_pred = self.conv_reg(x)\n        featmap_size = bbox_pred.shape[-2:]\n        H, W = featmap_size\n        B = bbox_pred.shape[0]\n        cls_score = cls_score.view(-1, self.num_base_anchors, self.num_cls, H,\n                                   W).permute(0, 1, 3, 4,\n                                              2).reshape(B, -1, self.num_cls)\n        bbox_pred = bbox_pred.view(-1, self.num_base_anchors,\n                                   self.box_code_size, H, W).permute(\n                                       0, 1, 3, 4,\n                                       2).reshape(B, -1, self.box_code_size)\n\n        dir_cls_preds = None\n        if self.use_direction_classifier:\n            dir_cls_preds = self.conv_dir_cls(x)\n            dir_cls_preds = dir_cls_preds.view(-1, self.num_base_anchors, 2, H,\n                                               W).permute(0, 1, 3, 4,\n                                                          2).reshape(B, -1, 2)\n        ret = dict(\n            cls_score=cls_score,\n            bbox_pred=bbox_pred,\n            dir_cls_preds=dir_cls_preds,\n            featmap_size=featmap_size)\n        return ret\n\n\n@HEADS.register_module()\nclass ShapeAwareHead(Anchor3DHead):\n    \"\"\"Shape-aware grouping head for SSN.\n\n    Args:\n        tasks (dict): Shape-aware groups of multi-class objects.\n        assign_per_class (bool, optional): Whether to do assignment for each \\\n            class. Default: True.\n        kwargs (dict): Other arguments are the same as those in \\\n            :class:`Anchor3DHead`.\n    \"\"\"\n\n    def __init__(self, tasks, assign_per_class=True, **kwargs):\n        self.tasks = tasks\n        self.featmap_sizes = []\n        super().__init__(assign_per_class=assign_per_class, **kwargs)\n\n    def _init_layers(self):\n        \"\"\"Initialize neural network layers of the head.\"\"\"\n        self.heads = nn.ModuleList()\n        cls_ptr = 0\n        for task in self.tasks:\n            sizes = self.anchor_generator.sizes[cls_ptr:cls_ptr +\n                                                task['num_class']]\n            num_size = torch.tensor(sizes).reshape(-1, 3).size(0)\n            num_rot = len(self.anchor_generator.rotations)\n            num_base_anchors = num_rot * num_size\n            branch = dict(\n                type='BaseShapeHead',\n                num_cls=self.num_classes,\n                num_base_anchors=num_base_anchors,\n                box_code_size=self.box_code_size,\n                in_channels=self.in_channels,\n                shared_conv_channels=task['shared_conv_channels'],\n                shared_conv_strides=task['shared_conv_strides'])\n            self.heads.append(build_head(branch))\n            cls_ptr += task['num_class']\n\n    def init_weights(self):\n        \"\"\"Initialize the weights of head.\"\"\"\n        for head in self.heads:\n            head.init_weights()\n\n    def forward_single(self, x):\n        \"\"\"Forward function on a single-scale feature map.\n\n        Args:\n            x (torch.Tensor): Input features.\n        Returns:\n            tuple[torch.Tensor]: Contain score of each class, bbox \\\n                regression and direction classification predictions.\n        \"\"\"\n        results = []\n\n        for head in self.heads:\n            results.append(head(x))\n\n        cls_score = torch.cat([result['cls_score'] for result in results],\n                              dim=1)\n        bbox_pred = torch.cat([result['bbox_pred'] for result in results],\n                              dim=1)\n        dir_cls_preds = None\n        if self.use_direction_classifier:\n            dir_cls_preds = torch.cat(\n                [result['dir_cls_preds'] for result in results], dim=1)\n\n        self.featmap_sizes = []\n        for i, task in enumerate(self.tasks):\n            for _ in range(task['num_class']):\n                self.featmap_sizes.append(results[i]['featmap_size'])\n        assert len(self.featmap_sizes) == len(self.anchor_generator.ranges), \\\n            'Length of feature map sizes must be equal to length of ' + \\\n            'different ranges of anchor generator.'\n\n        return cls_score, bbox_pred, dir_cls_preds\n\n    def loss_single(self, cls_score, bbox_pred, dir_cls_preds, labels,\n                    label_weights, bbox_targets, bbox_weights, dir_targets,\n                    dir_weights, num_total_samples):\n        \"\"\"Calculate loss of Single-level results.\n\n        Args:\n            cls_score (torch.Tensor): Class score in single-level.\n            bbox_pred (torch.Tensor): Bbox prediction in single-level.\n            dir_cls_preds (torch.Tensor): Predictions of direction class\n                in single-level.\n            labels (torch.Tensor): Labels of class.\n            label_weights (torch.Tensor): Weights of class loss.\n            bbox_targets (torch.Tensor): Targets of bbox predictions.\n            bbox_weights (torch.Tensor): Weights of bbox loss.\n            dir_targets (torch.Tensor): Targets of direction predictions.\n            dir_weights (torch.Tensor): Weights of direction loss.\n            num_total_samples (int): The number of valid samples.\n\n        Returns:\n            tuple[torch.Tensor]: Losses of class, bbox \\\n                and direction, respectively.\n        \"\"\"\n        # classification loss\n        if num_total_samples is None:\n            num_total_samples = int(cls_score.shape[0])\n        labels = labels.reshape(-1)\n        label_weights = label_weights.reshape(-1)\n        cls_score = cls_score.reshape(-1, self.num_classes)\n        loss_cls = self.loss_cls(\n            cls_score, labels, label_weights, avg_factor=num_total_samples)\n\n        # regression loss\n        bbox_targets = bbox_targets.reshape(-1, self.box_code_size)\n        bbox_weights = bbox_weights.reshape(-1, self.box_code_size)\n        code_weight = self.train_cfg.get('code_weight', None)\n\n        if code_weight:\n            bbox_weights = bbox_weights * bbox_weights.new_tensor(code_weight)\n        bbox_pred = bbox_pred.reshape(-1, self.box_code_size)\n        if self.diff_rad_by_sin:\n            bbox_pred, bbox_targets = self.add_sin_difference(\n                bbox_pred, bbox_targets)\n        loss_bbox = self.loss_bbox(\n            bbox_pred,\n            bbox_targets,\n            bbox_weights,\n            avg_factor=num_total_samples)\n\n        # direction classification loss\n        loss_dir = None\n        if self.use_direction_classifier:\n            dir_cls_preds = dir_cls_preds.reshape(-1, 2)\n            dir_targets = dir_targets.reshape(-1)\n            dir_weights = dir_weights.reshape(-1)\n            loss_dir = self.loss_dir(\n                dir_cls_preds,\n                dir_targets,\n                dir_weights,\n                avg_factor=num_total_samples)\n\n        return loss_cls, loss_bbox, loss_dir\n\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             dir_cls_preds,\n             gt_bboxes,\n             gt_labels,\n             input_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Calculate losses.\n\n        Args:\n            cls_scores (list[torch.Tensor]): Multi-level class scores.\n            bbox_preds (list[torch.Tensor]): Multi-level bbox predictions.\n            dir_cls_preds (list[torch.Tensor]): Multi-level direction\n                class predictions.\n            gt_bboxes (list[:obj:`BaseInstance3DBoxes`]): Gt bboxes\n                of each sample.\n            gt_labels (list[torch.Tensor]): Gt labels of each sample.\n            input_metas (list[dict]): Contain pcd and img's meta info.\n            gt_bboxes_ignore (None | list[torch.Tensor]): Specify\n                which bounding.\n\n        Returns:\n            dict[str, list[torch.Tensor]]: Classification, bbox, and \\\n                direction losses of each level.\n\n                - loss_cls (list[torch.Tensor]): Classification losses.\n                - loss_bbox (list[torch.Tensor]): Box regression losses.\n                - loss_dir (list[torch.Tensor]): Direction classification \\\n                    losses.\n        \"\"\"\n        device = cls_scores[0].device\n        anchor_list = self.get_anchors(\n            self.featmap_sizes, input_metas, device=device)\n        cls_reg_targets = self.anchor_target_3d(\n            anchor_list,\n            gt_bboxes,\n            input_metas,\n            gt_bboxes_ignore_list=gt_bboxes_ignore,\n            gt_labels_list=gt_labels,\n            num_classes=self.num_classes,\n            sampling=self.sampling)\n\n        if cls_reg_targets is None:\n            return None\n        (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list,\n         dir_targets_list, dir_weights_list, num_total_pos,\n         num_total_neg) = cls_reg_targets\n        num_total_samples = (\n            num_total_pos + num_total_neg if self.sampling else num_total_pos)\n\n        # num_total_samples = None\n        losses_cls, losses_bbox, losses_dir = multi_apply(\n            self.loss_single,\n            cls_scores,\n            bbox_preds,\n            dir_cls_preds,\n            labels_list,\n            label_weights_list,\n            bbox_targets_list,\n            bbox_weights_list,\n            dir_targets_list,\n            dir_weights_list,\n            num_total_samples=num_total_samples)\n        return dict(\n            loss_cls=losses_cls, loss_bbox=losses_bbox, loss_dir=losses_dir)\n\n    def get_bboxes(self,\n                   cls_scores,\n                   bbox_preds,\n                   dir_cls_preds,\n                   input_metas,\n                   cfg=None,\n                   rescale=False):\n        \"\"\"Get bboxes of anchor head.\n\n        Args:\n            cls_scores (list[torch.Tensor]): Multi-level class scores.\n            bbox_preds (list[torch.Tensor]): Multi-level bbox predictions.\n            dir_cls_preds (list[torch.Tensor]): Multi-level direction\n                class predictions.\n            input_metas (list[dict]): Contain pcd and img's meta info.\n            cfg (None | :obj:`ConfigDict`): Training or testing config.\n                Default: None.\n            rescale (list[torch.Tensor], optional): Whether to rescale bbox.\n                Default: False.\n\n        Returns:\n            list[tuple]: Prediction resultes of batches.\n        \"\"\"\n        assert len(cls_scores) == len(bbox_preds)\n        assert len(cls_scores) == len(dir_cls_preds)\n        num_levels = len(cls_scores)\n        assert num_levels == 1, 'Only support single level inference.'\n        device = cls_scores[0].device\n        mlvl_anchors = self.anchor_generator.grid_anchors(\n            self.featmap_sizes, device=device)\n        # `anchor` is a list of anchors for different classes\n        mlvl_anchors = [torch.cat(anchor, dim=0) for anchor in mlvl_anchors]\n\n        result_list = []\n        for img_id in range(len(input_metas)):\n            cls_score_list = [\n                cls_scores[i][img_id].detach() for i in range(num_levels)\n            ]\n            bbox_pred_list = [\n                bbox_preds[i][img_id].detach() for i in range(num_levels)\n            ]\n            dir_cls_pred_list = [\n                dir_cls_preds[i][img_id].detach() for i in range(num_levels)\n            ]\n\n            input_meta = input_metas[img_id]\n            proposals = self.get_bboxes_single(cls_score_list, bbox_pred_list,\n                                               dir_cls_pred_list, mlvl_anchors,\n                                               input_meta, cfg, rescale)\n            result_list.append(proposals)\n        return result_list\n\n    def get_bboxes_single(self,\n                          cls_scores,\n                          bbox_preds,\n                          dir_cls_preds,\n                          mlvl_anchors,\n                          input_meta,\n                          cfg=None,\n                          rescale=False):\n        \"\"\"Get bboxes of single branch.\n\n        Args:\n            cls_scores (torch.Tensor): Class score in single batch.\n            bbox_preds (torch.Tensor): Bbox prediction in single batch.\n            dir_cls_preds (torch.Tensor): Predictions of direction class\n                in single batch.\n            mlvl_anchors (List[torch.Tensor]): Multi-level anchors\n                in single batch.\n            input_meta (list[dict]): Contain pcd and img's meta info.\n            cfg (None | :obj:`ConfigDict`): Training or testing config.\n            rescale (list[torch.Tensor], optional): whether to rescale bbox. \\\n                Default: False.\n\n        Returns:\n            tuple: Contain predictions of single batch.\n\n                - bboxes (:obj:`BaseInstance3DBoxes`): Predicted 3d bboxes.\n                - scores (torch.Tensor): Class score of each bbox.\n                - labels (torch.Tensor): Label of each bbox.\n        \"\"\"\n        cfg = self.test_cfg if cfg is None else cfg\n        assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)\n        mlvl_bboxes = []\n        mlvl_scores = []\n        mlvl_dir_scores = []\n        for cls_score, bbox_pred, dir_cls_pred, anchors in zip(\n                cls_scores, bbox_preds, dir_cls_preds, mlvl_anchors):\n            assert cls_score.size()[-2] == bbox_pred.size()[-2]\n            assert cls_score.size()[-2] == dir_cls_pred.size()[-2]\n            dir_cls_score = torch.max(dir_cls_pred, dim=-1)[1]\n\n            if self.use_sigmoid_cls:\n                scores = cls_score.sigmoid()\n            else:\n                scores = cls_score.softmax(-1)\n\n            nms_pre = cfg.get('nms_pre', -1)\n            if nms_pre > 0 and scores.shape[0] > nms_pre:\n                if self.use_sigmoid_cls:\n                    max_scores, _ = scores.max(dim=1)\n                else:\n                    max_scores, _ = scores[:, :-1].max(dim=1)\n                _, topk_inds = max_scores.topk(nms_pre)\n                anchors = anchors[topk_inds, :]\n                bbox_pred = bbox_pred[topk_inds, :]\n                scores = scores[topk_inds, :]\n                dir_cls_score = dir_cls_score[topk_inds]\n\n            bboxes = self.bbox_coder.decode(anchors, bbox_pred)\n            mlvl_bboxes.append(bboxes)\n            mlvl_scores.append(scores)\n            mlvl_dir_scores.append(dir_cls_score)\n\n        mlvl_bboxes = torch.cat(mlvl_bboxes)\n        mlvl_bboxes_for_nms = xywhr2xyxyr(input_meta['box_type_3d'](\n            mlvl_bboxes, box_dim=self.box_code_size).bev)\n        mlvl_scores = torch.cat(mlvl_scores)\n        mlvl_dir_scores = torch.cat(mlvl_dir_scores)\n\n        if self.use_sigmoid_cls:\n            # Add a dummy background class to the front when using sigmoid\n            padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)\n            mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)\n\n        score_thr = cfg.get('score_thr', 0)\n        results = box3d_multiclass_nms(mlvl_bboxes, mlvl_bboxes_for_nms,\n                                       mlvl_scores, score_thr, cfg.max_num,\n                                       cfg, mlvl_dir_scores)\n        bboxes, scores, labels, dir_scores = results\n        if bboxes.shape[0] > 0:\n            dir_rot = limit_period(bboxes[..., 6] - self.dir_offset,\n                                   self.dir_limit_offset, np.pi)\n            bboxes[..., 6] = (\n                dir_rot + self.dir_offset +\n                np.pi * dir_scores.to(bboxes.dtype))\n        bboxes = input_meta['box_type_3d'](bboxes, box_dim=self.box_code_size)\n        return bboxes, scores, labels\n"
  },
  {
    "path": "mmdet3d/models/dense_heads/sparsefusion_head_deform.py",
    "content": "import copy\r\nimport numpy as np\r\nimport torch\r\nimport functools\r\nimport pickle\r\nimport os\r\nfrom mmcv.cnn import ConvModule, build_conv_layer, kaiming_init\r\nfrom mmcv.runner import force_fp32\r\nfrom torch import nn\r\nimport torch.nn.functional as F\r\nimport time\r\n\r\nfrom mmdet3d.core import (circle_nms, draw_heatmap_gaussian, gaussian_radius,\r\n                          xywhr2xyxyr, limit_period, PseudoSampler, BboxOverlaps3D)\r\nfrom mmdet3d.models.builder import HEADS, build_loss\r\nfrom mmdet3d.ops.iou3d.iou3d_utils import nms_gpu\r\nfrom mmdet3d.models.utils import clip_sigmoid, inverse_sigmoid\r\nfrom mmdet3d.models.fusion_layers import apply_3d_transformation\r\nfrom mmdet.core import build_bbox_coder, multi_apply, build_assigner, build_sampler, AssignResult\r\n\r\nfrom mmdet3d.models.utils import FFN, TransformerDecoderLayer, PositionEmbeddingLearned, PositionEmbeddingLearnedwoNorm,\\\r\n    PointTransformer2D_3D, ImageTransformer_Cam_3D_MS, ProjectionLayerNorm, FusionTransformer2D_3D_Self, \\\r\n    ViewTransformer, DepthEncoderResNet, LayerNorm, ConvLN, FFNLN, normalize_pos\r\n\r\nfrom mmdet3d.models.utils.ops.modules import MSDeformAttn\r\nfrom mmdet3d.models.utils.deformable_decoder import DeformableTransformerDecoderLayer\r\n\r\n\r\n@HEADS.register_module()\r\nclass SparseFusionHead2D_Deform(nn.Module):\r\n    def __init__(self,\r\n                 num_views=0,\r\n                 in_channels_img=64,\r\n                 out_size_factor_img=4,\r\n                 num_proposals=128,\r\n                 num_img_proposals=128,\r\n                 in_channels=128 * 3,\r\n                 hidden_channel=128,\r\n                 num_classes=4,\r\n                 # config for Transformer\r\n                 num_pts_decoder_layers=1,\r\n                 num_img_decoder_layers=1,\r\n                 num_fusion_decoder_layers=1,\r\n                 num_heads=8,\r\n                 initialize_by_heatmap=True,\r\n                 semantic_transfer=True,\r\n                 cross_only=True,\r\n                 range_num=5,\r\n                 cross_heatmap_layer=1,\r\n                 img_heatmap_layer=2,\r\n                 img_reg_layer=3,\r\n                 nms_kernel_size=3,\r\n                 img_nms_kernel_size=3,\r\n                 ffn_channel=256,\r\n                 dropout=0.1,\r\n                 bn_momentum=0.1,\r\n                 activation='relu',\r\n                 # config for FFN\r\n                 common_heads=dict(),\r\n                 conv_cfg=dict(type='Conv1d'),\r\n                 norm_cfg=dict(type='BN1d'),\r\n                 bias='auto',\r\n                 # loss\r\n                 loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),\r\n                 loss_bbox=dict(type='L1Loss', reduction='mean'),\r\n                 loss_heatmap=dict(type='GaussianFocalLoss', reduction='mean'),\r\n                 loss_heatmap_2d=dict(type='GaussianFocalLoss', reduction='mean'),\r\n                 loss_center_2d=dict(type='L1Loss', reduction='mean'),\r\n                 # others\r\n                 train_cfg=None,\r\n                 test_cfg=None,\r\n                 bbox_coder=None,\r\n                 bbox_2d_coder=None,\r\n                 use_camera='se',\r\n                 level_num=4,\r\n                 img_reg_bn=False,\r\n                 geometric_transfer=True,\r\n                 view_transform=True,\r\n                 depth_input_channel=2,\r\n                 ):\r\n        super(SparseFusionHead2D_Deform, self).__init__()\r\n        self.num_proposals = num_proposals\r\n        self.num_img_proposals = num_img_proposals\r\n        self.num_classes = num_classes\r\n        self.bbox_coder = build_bbox_coder(bbox_coder)\r\n        self.bbox_2d_coder = build_bbox_coder(bbox_2d_coder)\r\n\r\n        self.bn_momentum = bn_momentum\r\n        self.train_cfg = train_cfg\r\n        self.test_cfg = test_cfg\r\n        self.initialize_by_heatmap = initialize_by_heatmap\r\n        self.semantic_transfer = semantic_transfer\r\n        self.cross_only = cross_only\r\n        self.level_num = level_num\r\n        self.in_channels_img = in_channels_img\r\n        self.view_transform = view_transform\r\n        self.range_num = range_num\r\n\r\n        self.loss_cls = build_loss(loss_cls)\r\n        self.loss_bbox = build_loss(loss_bbox)\r\n        self.loss_heatmap = build_loss(loss_heatmap)\r\n        self.loss_heatmap_2d = build_loss(loss_heatmap_2d)\r\n        self.loss_center_2d = build_loss(loss_center_2d)\r\n\r\n        self.num_img_decoder_layers = num_img_decoder_layers\r\n        self.num_pts_decoder_layers = num_pts_decoder_layers\r\n        self.num_fusion_decoder_layers = num_fusion_decoder_layers\r\n        self.hidden_channel = hidden_channel\r\n        self.sampling = False\r\n        self.out_size_factor_img = out_size_factor_img\r\n        self.geometric_transfer = geometric_transfer\r\n\r\n        self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)\r\n        if not self.use_sigmoid_cls:\r\n            self.num_classes += 1\r\n\r\n        heads3d = copy.deepcopy(common_heads)\r\n        heads3d.update(dict(heatmap=(self.num_classes, 2)))\r\n        pts_prediction_heads = FFN(hidden_channel, heads3d, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=bias)\r\n\r\n        fusion_heads = dict(center=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2), heatmap=(self.num_classes, 2))\r\n        fusion_prediction_heads = FFN(hidden_channel, fusion_heads, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=bias)\r\n\r\n        heads2d = dict(center_2d=(2, img_reg_layer, img_reg_bn), depth_2d=(1, img_reg_layer, img_reg_bn), cls=(self.num_classes, 2),\r\n                    dim_2d=(3, img_reg_layer, img_reg_bn), rot_2d=(2, img_reg_layer, img_reg_bn), vel_2d=(2, img_reg_layer, img_reg_bn)\r\n                )\r\n\r\n        # img_prediction_heads = FFN(hidden_channel, heads2d, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=bias)\r\n        img_prediction_heads = FFNLN(hidden_channel, heads2d, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=bias)\r\n\r\n        pts_query_pos_embed = [PositionEmbeddingLearned(2, hidden_channel) for _ in range(num_pts_decoder_layers)]\r\n        pts_key_pos_embed = [PositionEmbeddingLearned(2, hidden_channel) for _ in range(num_pts_decoder_layers)]\r\n        self.point_transformer = PointTransformer2D_3D(\r\n            hidden_channel=hidden_channel, num_heads=num_heads, num_decoder_layers=num_pts_decoder_layers,\r\n            prediction_heads=pts_prediction_heads, ffn_channel=ffn_channel, dropout=dropout, activation=activation, test_cfg=test_cfg,\r\n            query_pos=pts_query_pos_embed, key_pos=pts_key_pos_embed\r\n        )\r\n\r\n        img_query_pos_embed = [PositionEmbeddingLearnedwoNorm(2, hidden_channel) for _ in range(num_img_decoder_layers)]\r\n        img_key_pos_embed = [PositionEmbeddingLearnedwoNorm(2, hidden_channel) for _ in range(num_img_decoder_layers)]\r\n\r\n        self.img_transformer = ImageTransformer_Cam_3D_MS(\r\n            hidden_channel=hidden_channel, num_heads=num_heads, num_decoder_layers=num_img_decoder_layers, out_size_factor_img=out_size_factor_img,\r\n            num_views=num_views, prediction_heads=img_prediction_heads, ffn_channel=ffn_channel, dropout=dropout, activation=activation, test_cfg=test_cfg,\r\n            query_pos=img_query_pos_embed, key_pos=img_key_pos_embed\r\n        )\r\n\r\n        if view_transform:\r\n            heads_view = dict(center_view=(2, 2), height_view=(1, 2), dim_view=(3, 2), rot_view=(2, 2),\r\n                              vel_view=(2, 2), heatmap_view=(self.num_classes, 2))\r\n            view_prediction_heads = FFN(hidden_channel, heads_view, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=bias)\r\n            # view_prediction_heads = FFNLN(hidden_channel, heads_view, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=bias)\r\n\r\n            view_query_pos_embed = PositionEmbeddingLearnedwoNorm(9, hidden_channel)\r\n            view_key_pos_embed = PositionEmbeddingLearnedwoNorm(9, hidden_channel)\r\n\r\n            view_projection = ProjectionLayerNorm(hidden_channel)\r\n\r\n            self.view_transformer = ViewTransformer(\r\n                hidden_channel=hidden_channel, num_heads=num_heads, prediction_heads=view_prediction_heads,\r\n                ffn_channel=ffn_channel, dropout=dropout, activation=activation, test_cfg=test_cfg,\r\n                query_pos=view_query_pos_embed, key_pos=view_key_pos_embed, view_projection=view_projection,\r\n                use_camera=use_camera\r\n            )\r\n\r\n        fusion_query_pos_embed = [PositionEmbeddingLearned(2, hidden_channel) for _ in range(self.num_fusion_decoder_layers)]\r\n        fusion_key_pos_embed = [PositionEmbeddingLearned(2, hidden_channel) for _ in range(self.num_fusion_decoder_layers)]\r\n\r\n        fuse_pts_projection = ProjectionLayerNorm(hidden_channel)\r\n        fuse_img_projection = ProjectionLayerNorm(hidden_channel)\r\n\r\n        self.fusion_transformer = FusionTransformer2D_3D_Self(\r\n            hidden_channel=hidden_channel, num_heads=num_heads, num_decoder_layers=num_fusion_decoder_layers,\r\n            prediction_heads=fusion_prediction_heads, ffn_channel=ffn_channel, dropout=dropout,\r\n            activation=activation, test_cfg=test_cfg, query_pos=fusion_query_pos_embed, key_pos=fusion_query_pos_embed,\r\n            pts_projection=fuse_pts_projection, img_projection=fuse_img_projection,\r\n            num_proposals=num_proposals\r\n        )\r\n\r\n        if self.initialize_by_heatmap and self.semantic_transfer:\r\n            self.heatmap_pts_proj = nn.Sequential(\r\n                nn.Linear(hidden_channel, hidden_channel),\r\n                nn.LayerNorm(hidden_channel)\r\n            )\r\n            self.heatmap_img_proj = nn.Sequential(\r\n                nn.Linear(hidden_channel, hidden_channel),\r\n                nn.LayerNorm(hidden_channel)\r\n            )\r\n            self.cross_heatmap_head = self.build_heatmap_LN(hidden_channel, bias, num_classes, layer_num=cross_heatmap_layer)\r\n\r\n            colattn_query_pos = PositionEmbeddingLearnedwoNorm(3, hidden_channel)\r\n            colattn_key_pos = PositionEmbeddingLearnedwoNorm(2, hidden_channel)\r\n            self.cross_heatmap_decoder = DeformableTransformerDecoderLayer(\r\n                hidden_channel, num_heads, dim_feedforward=ffn_channel, dropout=dropout, activation=activation,\r\n                self_posembed=colattn_query_pos, cross_posembed=colattn_key_pos, cross_only=False\r\n            )\r\n\r\n            self.reduce_conv = ConvLN(\r\n                hidden_channel+1, hidden_channel, kernel_size=3, padding=1\r\n            )\r\n\r\n        # a shared convolution\r\n        self.shared_conv = build_conv_layer(\r\n            dict(type='Conv2d'),\r\n            in_channels,\r\n            hidden_channel,\r\n            kernel_size=3,\r\n            padding=1,\r\n            bias=bias,\r\n        )\r\n\r\n        # transformer decoder layers for object query with LiDAR feature\r\n        self.num_views = num_views\r\n        if self.geometric_transfer:\r\n            self.shared_conv_img = nn.Identity()\r\n            blocks = [1] * self.level_num\r\n            assert len(blocks) == self.level_num\r\n            self.depth_resnet = DepthEncoderResNet(depth_input_channel, in_channels_img, hidden_channel, depth_layers=blocks)\r\n\r\n        else:\r\n            self.shared_conv_img = build_conv_layer(\r\n                dict(type='Conv2d'),\r\n                in_channels_img,  # channel of img feature map\r\n                hidden_channel,\r\n                kernel_size=3,\r\n                padding=1,\r\n                bias=bias,\r\n            )\r\n\r\n        # Position Embedding for Cross-Attention, which is re-used during training\r\n        x_size = self.test_cfg['grid_size'][0] // self.test_cfg['out_size_factor']\r\n        y_size = self.test_cfg['grid_size'][1] // self.test_cfg['out_size_factor']\r\n        self.bev_pos = self.create_2D_grid(x_size, y_size)\r\n\r\n        if self.initialize_by_heatmap:\r\n            self.heatmap_head = self.build_heatmap(hidden_channel, bias, num_classes)\r\n            self.img_heatmap_head = nn.ModuleList()\r\n            for lvl in range(self.level_num):\r\n                self.img_heatmap_head.append(self.build_heatmap_LN(hidden_channel, bias, num_classes, layer_num=img_heatmap_layer))\r\n\r\n            self.class_encoding = nn.Conv1d(num_classes, hidden_channel, 1)\r\n            self.img_class_encoding = nn.Conv1d(num_classes, hidden_channel, 1)\r\n        else:\r\n            # query feature\r\n            self.pts_query_feat = nn.Parameter(torch.randn(1, hidden_channel, self.num_proposals))\r\n            self.pts_query_pos = nn.Parameter(torch.rand([1, self.num_proposals, 2])*torch.Tensor([x_size, y_size]).reshape(1, 1, 2), requires_grad=True)\r\n\r\n            self.img_query_feat = nn.Parameter(torch.randn(1, hidden_channel, self.num_img_proposals))\r\n            self.img_query_pos = nn.Parameter(torch.rand([1, self.num_img_proposals, 2]), requires_grad=True)\r\n            self.img_query_pos = inverse_sigmoid(self.img_query_pos)\r\n\r\n        self.nms_kernel_size = nms_kernel_size\r\n        self.img_nms_kernel_size = img_nms_kernel_size\r\n        self.img_feat_pos = None\r\n        self.img_feat_collapsed_pos = None\r\n\r\n        self.init_weights()\r\n        self._init_assigner_sampler()\r\n\r\n    def create_2D_grid(self, x_size, y_size):\r\n        meshgrid = [[0, x_size - 1, x_size], [0, y_size - 1, y_size]]\r\n        batch_y, batch_x = torch.meshgrid(*[torch.linspace(it[0], it[1], it[2]) for it in meshgrid])\r\n        batch_x = batch_x + 0.5\r\n        batch_y = batch_y + 0.5\r\n        coord_base = torch.cat([batch_x[None], batch_y[None]], dim=0)[None]\r\n        coord_base = coord_base.view(1, 2, -1).permute(0, 2, 1)\r\n        return coord_base\r\n\r\n    def init_bn_momentum(self):\r\n        for m in self.modules():\r\n            if isinstance(m, (nn.BatchNorm2d, nn.BatchNorm1d)):\r\n                m.momentum = self.bn_momentum\r\n\r\n    def init_weights(self):\r\n        # initialize transformer\r\n        for m in self.parameters():\r\n            if m.dim() > 1:\r\n                nn.init.xavier_uniform_(m)\r\n        for m in self.modules():\r\n            if isinstance(m, MSDeformAttn):\r\n                m._reset_parameters()\r\n\r\n        self.init_bn_momentum()\r\n\r\n        if self.geometric_transfer:\r\n            level_pos = torch.zeros([self.level_num, self.hidden_channel])\r\n        else:\r\n            level_pos = torch.zeros([self.level_num, self.in_channels_img])\r\n        self.level_pos = nn.Parameter(level_pos, requires_grad=True)\r\n        torch.nn.init.normal_(self.level_pos)\r\n\r\n    def _init_assigner_sampler(self):\r\n        \"\"\"Initialize the target assigner and sampler of the head.\"\"\"\r\n        if self.train_cfg is None:\r\n            return\r\n\r\n        if self.sampling:\r\n            self.bbox_sampler = build_sampler(self.train_cfg.sampler)\r\n        else:\r\n            self.bbox_sampler = PseudoSampler()\r\n        if isinstance(self.train_cfg.assigner, dict):\r\n            self.bbox_assigner = build_assigner(self.train_cfg.assigner)\r\n        elif isinstance(self.train_cfg.assigner, list):\r\n            self.bbox_assigner = [\r\n                build_assigner(res) for res in self.train_cfg.assigner\r\n            ]\r\n        if isinstance(self.train_cfg.assigner_2d, dict):\r\n            self.bbox_assigner_2d = build_assigner(self.train_cfg.assigner_2d)\r\n        elif isinstance(self.train_cfg.assigner_2d, list):\r\n            self.bbox_assigner_2d = [\r\n                build_assigner(res) for res in self.train_cfg.assigner_2d\r\n            ]\r\n\r\n    def forward_single(self, inputs, img_inputs, img_metas, sparse_depth):\r\n        \"\"\"\r\n        Args:\r\n            inputs (torch.Tensor): Input feature map with the shape of\r\n                [B, C, 128(H), 128(W)]. (consistent with L748)\r\n            img_inputs (torch.Tensor): Input feature map with the shape of\r\n                [B*num_view, C, image_H, image_W]\r\n\r\n            sparse_depth (torch.Tensor): Input normalized depth with the shape of\r\n                [B, num_views, num_scales, depth_C, depth_H, depth_W]\r\n\r\n        Returns:\r\n            list[dict]: Output results for tasks.\r\n        \"\"\"\r\n\r\n        batch_size = inputs.shape[0]\r\n        sparse_depth = sparse_depth[:, :, 0, :2]\r\n\r\n        if self.geometric_transfer:\r\n            sparse_depth = sparse_depth.view(batch_size*self.num_views, 1, -1, sparse_depth.shape[-2], sparse_depth.shape[-1])\r\n            img_inputs = self.depth_resnet(sparse_depth[:, 0], img_inputs)\r\n\r\n        img_feats = []\r\n        for i in range(self.level_num):\r\n            img_inputs_level = img_inputs[i] + self.level_pos[i].reshape(1, self.level_pos[i].shape[0], 1, 1)\r\n            img_feat = self.shared_conv_img(img_inputs_level)\r\n            img_feats.append(img_feat)\r\n        input_padding_mask = self.construct_input_padding_mask(img_feats, img_metas)\r\n        # input_padding_mask = None\r\n        img_feats_pos = []\r\n        normal_img_feats_pos = []\r\n        for lvl in range(self.level_num):\r\n            h, w = img_feats[lvl].shape[-2], img_feats[lvl].shape[-1]\r\n            img_feat_pos = self.create_2D_grid(h, w).to(img_feats[lvl].device)  # (1, h*w, 2)\r\n            img_feats_pos.append(img_feat_pos)\r\n            normal_img_feat_pos = normalize_pos(img_feat_pos, w, h)  # (1, h*w, 2)\r\n            normal_img_feats_pos.append(normal_img_feat_pos)\r\n        normal_img_feats_pos_stack = torch.cat(normal_img_feats_pos, dim=1)  # (1, h*w (sum), 2)\r\n        self.normal_img_feats_pos_stack = normal_img_feats_pos_stack\r\n        normal_img_feats_pos_repeat = normal_img_feats_pos_stack.repeat(batch_size, 1, 1)\r\n\r\n        proj_matrix = self.construct_projection_matrix(img_metas, normal_img_feats_pos_stack.device)\r\n\r\n        inputs, min_voxel_height, max_voxel_height = inputs[:, :-2], inputs[:, -2], inputs[:, -1]\r\n        lidar_feat = self.shared_conv(inputs)  # [BS, C, H, W]\r\n        #################################\r\n        # image to BEV\r\n        #################################\r\n        lidar_feat_flatten = lidar_feat.view(batch_size, lidar_feat.shape[1], -1)  # [BS, C, H*W]\r\n        bev_pos = self.bev_pos.repeat(batch_size, 1, 1).to(lidar_feat.device)  # [BS, H*W, 2]\r\n        if self.initialize_by_heatmap:\r\n            if self.semantic_transfer:\r\n                img_feat_cross = []\r\n                for level in range(self.level_num):\r\n                    img_feat_cross.append(img_feats[level].clone())\r\n            else:\r\n                img_feat_cross = None\r\n            heatmap, dense_heatmap, pts_top_proposals_class, pts_top_proposals_index = self.generate_heatmap(lidar_feat.clone(), min_voxel_height, max_voxel_height, batch_size, img_metas, proj_matrix['lidar2img_rt'], img_feat_cross, input_padding_mask)\r\n            pts_query_feat = lidar_feat_flatten.gather(\r\n                index=pts_top_proposals_index[:, None, :].expand(-1, lidar_feat_flatten.shape[1], -1), dim=-1\r\n            )  # [BS, C, num_proposals]\r\n            # add category embedding\r\n            one_hot = F.one_hot(pts_top_proposals_class, num_classes=self.num_classes).permute(0, 2, 1)  # [BS, num_classes, num_proposals]\r\n            query_cat_encoding = self.class_encoding(one_hot.float())  # [BS, C, num_proposals]\r\n            self.query_labels = pts_top_proposals_class\r\n            pts_query_feat += query_cat_encoding\r\n            pts_query_pos = bev_pos.gather(\r\n                index=pts_top_proposals_index[:, None, :].permute(0, 2, 1).expand(-1, -1, bev_pos.shape[-1]), dim=1\r\n            )  # [BS, num_proposals, 2]\r\n        else:\r\n            pts_query_feat = self.pts_query_feat.repeat(batch_size, 1, 1)  # [BS, C, num_proposals]\r\n            pts_query_pos = self.pts_query_pos.repeat(batch_size, 1, 1).to(lidar_feat.device)  # [BS, num_proposals, 2]\r\n\r\n        if self.initialize_by_heatmap:\r\n            img_feats_heatmap = []\r\n            for lvl in range(self.level_num):\r\n                img_feats_heatmap.append(img_feats[lvl].clone())\r\n\r\n            img_heatmap, img_dense_heatmap, img_top_proposals_class, img_top_proposals_index, img_top_proposals_view_idx, img_top_proposals_pos_id = \\\r\n                self.generate_heatmap_img(img_feats_heatmap, batch_size)\r\n            img_feats_flatten = []\r\n            for lvl in range(self.level_num):\r\n                img_feat = img_feats[lvl]\r\n                h, w = img_feat.shape[-2], img_feat.shape[-1]\r\n                img_feat_flatten = img_feat.reshape(batch_size, self.num_views, self.hidden_channel, h * w)\r\n                img_feat_flatten = img_feat_flatten.permute(0, 2, 1, 3)  # [BS, C, num_view, h*w]\r\n                img_feats_flatten.append(img_feat_flatten)\r\n            img_feat_stack = torch.cat(img_feats_flatten, dim=-1)  # [BS, C, num_view, h*w (sum)]\r\n            img_feat_stack = img_feat_stack.view(batch_size, self.hidden_channel, self.num_views*img_feat_stack.shape[-1])\r\n            normal_img_query_pos = normal_img_feats_pos_repeat.gather(\r\n                index=img_top_proposals_pos_id[:, None, :].permute(0, 2, 1).expand(-1, -1, normal_img_feats_pos_stack.shape[-1]), dim=1\r\n            )  # [BS, num_proposals, 2]\r\n            img_query_feat = img_feat_stack.gather(\r\n                index=img_top_proposals_index[:, None, :].expand(-1, img_feat_stack.shape[1], -1), dim=-1\r\n            )  # [BS, C, num_proposals]\r\n            img_query_view = img_top_proposals_view_idx.clone()  #  [BS, num_proposals]\r\n            one_hot = F.one_hot(img_top_proposals_class, num_classes=self.num_classes).permute(0, 2, 1)  # [BS, num_classes, num_proposals]\r\n            self.img_query_label = img_top_proposals_class\r\n            img_query_cat_encoding = self.img_class_encoding(one_hot.float())  # [BS, C, num_proposals]\r\n            img_query_feat += img_query_cat_encoding\r\n        else:\r\n            img_query_feat = self.img_query_feat.repeat(batch_size, 1, 1)  # [BS, C, num_proposals]\r\n            normal_img_query_pos = self.img_query_pos.repeat(batch_size, 1, 1).to(img_feat.device)  # [BS, num_proposals, 2]\r\n            img_query_pos_view = torch.arange(self.num_img_proposals).reshape(1, -1).repeat(batch_size, 1).to(img_feat.device)\r\n            img_query_view = img_query_pos_view % self.num_views\r\n        view_proj_matrix = self.construction_view_projection_matrix(proj_matrix, img_query_view)\r\n\r\n        #################################\r\n        # transformer decoder layer (LiDAR feature as K,V)\r\n        #################################\r\n        ret_dicts = []\r\n        pts_query_feat, pts_query_pos, pts_ret_dicts = self.point_transformer(pts_query_feat, pts_query_pos, lidar_feat_flatten, bev_pos)\r\n        ret_dicts.extend(pts_ret_dicts)\r\n\r\n        #################################\r\n        # transformer decoder layer (img feature as K,V)\r\n        #################################\r\n\r\n        img_query_feat, normal_img_query_pos, img_query_pos_bev, camera_info, img_ret_dicts = \\\r\n            self.img_transformer(img_query_feat, normal_img_query_pos, img_query_view, img_feats, normal_img_feats_pos_stack, view_proj_matrix['lidar2cam_rt'], view_proj_matrix['cam_intrinsic'], img_metas, input_padding_mask)\r\n\r\n        #################################\r\n        # view transformation layer\r\n        #################################\r\n\r\n        if self.view_transform:\r\n            img_query_feat, img_query_pos_bev, view_ret_dicts = self.view_transformer(img_query_feat, img_query_pos_bev, normal_img_query_pos[..., :2], img_ret_dicts, camera_info)\r\n\r\n        img_query_pos_bev = img_query_pos_bev[..., :2]\r\n\r\n        #################################\r\n        # fusion layer\r\n        #################################\r\n\r\n        all_query_feat, all_query_pos, fusion_ret_dicts = self.fusion_transformer(pts_query_feat, pts_query_pos, img_query_feat, img_query_pos_bev)\r\n\r\n        ret_dicts.extend(fusion_ret_dicts)\r\n        if self.initialize_by_heatmap:\r\n            ret_dicts[0]['query_heatmap_score'] = heatmap.gather(index=pts_top_proposals_index[:, None, :].expand(-1, self.num_classes, -1), dim=-1)  # [bs, num_classes, num_proposals]\r\n            ret_dicts[0]['dense_heatmap'] = dense_heatmap\r\n            ret_dicts[0]['img_query_heatmap_score'] = img_heatmap.gather(index=img_top_proposals_index[:, None, :].expand(-1, self.num_classes, -1), dim=-1)  # [bs, num_classes, num_proposals]\r\n            ret_dicts[0]['img_dense_heatmap'] = img_dense_heatmap\r\n\r\n        # return all the layer's results for auxiliary superivison\r\n        new_res = {}\r\n        for key in ret_dicts[0].keys():\r\n            if key not in ['dense_heatmap', 'query_heatmap_score', 'img_query_heatmap_score', 'img_dense_heatmap']:\r\n                new_res[key] = torch.cat([ret_dict[key] for ret_dict in ret_dicts], dim=-1)\r\n            else:\r\n                new_res[key] = ret_dicts[0][key]\r\n        for key in img_ret_dicts[0].keys():\r\n            new_res[key] = torch.cat([ret_dict[key] for ret_dict in img_ret_dicts], dim=-1)\r\n        new_res['view'] = img_query_view.repeat(1, self.num_img_decoder_layers)\r\n        if self.view_transform:\r\n            for key in view_ret_dicts[0].keys():\r\n                new_res[key] = torch.cat([ret_dict[key] for ret_dict in view_ret_dicts], dim=-1)\r\n\r\n        return [new_res]\r\n\r\n    def forward(self, feats, img_feats, img_metas, sparse_depth=None):\r\n        \"\"\"Forward pass.\r\n\r\n        Args:\r\n            feats (list[torch.Tensor]): Multi-level features, e.g.,\r\n                features produced by FPN.\r\n\r\n        Returns:\r\n            tuple(list[dict]): Output results. first index by level, second index by layer\r\n        \"\"\"\r\n\r\n        if img_feats is None:\r\n            img_feats = [None]\r\n        else:\r\n            img_feats = [img_feats[:self.level_num]]\r\n        if sparse_depth is None:\r\n            sparse_depth = [None]\r\n        else:\r\n            sparse_depth = [sparse_depth[:, :, :self.level_num]]\r\n        res = multi_apply(self.forward_single, feats, img_feats, [img_metas], sparse_depth)\r\n        assert len(res) == 1, \"only support one level features.\"\r\n        return res\r\n\r\n    def construct_input_padding_mask(self, img_feats, img_metas):\r\n        batch_size = len(img_metas)\r\n        device = img_feats[0].device\r\n        img_h_lvl = []\r\n        img_w_lvl = []\r\n        for img_feat_lvl in img_feats:\r\n            img_h_lvl.append(img_feat_lvl.shape[-2])\r\n            img_w_lvl.append(img_feat_lvl.shape[-1])\r\n        padding_mask = []\r\n        for sample_idx in range(batch_size):\r\n            sample_mask = []\r\n            for view_idx in range(self.num_views):\r\n                view_mask = []\r\n\r\n                if 'valid_shape' in img_metas[sample_idx]:\r\n                    valid_shape = img_metas[sample_idx]['valid_shape'][view_idx] / self.out_size_factor_img\r\n                else:\r\n                    valid_shape = np.array([img_metas[sample_idx]['img_shape'][1], img_metas[sample_idx]['img_shape'][0]]) / self.out_size_factor_img\r\n                for lvl_idx in range(self.level_num):\r\n                    lvl_mask = torch.ones([img_h_lvl[lvl_idx], img_w_lvl[lvl_idx]], dtype=torch.bool, device=device)\r\n                    valid_shape_lvl = valid_shape // (2 ** lvl_idx)\r\n                    valid_w_lvl = int(valid_shape_lvl[0])\r\n                    valid_h_lvl = int(valid_shape_lvl[1])\r\n                    lvl_mask[:valid_h_lvl, :valid_w_lvl] = False\r\n                    view_mask.append(lvl_mask.view(-1))\r\n                view_mask = torch.cat(view_mask)\r\n                sample_mask.append(view_mask)\r\n            sample_mask = torch.stack(sample_mask, dim=0)\r\n            padding_mask.append(sample_mask)\r\n        padding_mask = torch.stack(padding_mask, dim=0)\r\n\r\n        return padding_mask\r\n\r\n    def construction_view_projection_matrix(self, proj_matrix, img_query_view):\r\n        view_proj_matrix = {}\r\n        batch_size = img_query_view.shape[0]\r\n        batch_ids = torch.arange(batch_size)[:, None].repeat(1, self.num_img_proposals)\r\n        batch_ids = batch_ids.to(img_query_view.device)\r\n        for key in proj_matrix:\r\n            view_proj_matrix[key] = proj_matrix[key][batch_ids, img_query_view]\r\n        return view_proj_matrix\r\n\r\n    def construct_projection_matrix(self, img_metas, device):\r\n        batch_size = len(img_metas)\r\n        cam_ints = torch.zeros([batch_size, self.num_views, 4, 4], device=device)\r\n        cam_ints[:, :, 3, 3] = 1\r\n        for sample_id in range(batch_size):\r\n            cam_ints[sample_id, :, :3, :3] = torch.Tensor(img_metas[sample_id]['cam_intrinsic']).to(device)\r\n\r\n        lidar2cam_rt = torch.zeros([batch_size, self.num_views, 4, 4], device=device)\r\n        lidar2cam_rt[:, :, 3, 3] = 1\r\n        for sample_id in range(batch_size):\r\n            lidar2cam_rt[sample_id, :, :3, :3] = torch.Tensor(img_metas[sample_id]['lidar2cam_r']).to(device)\r\n            lidar2cam_rt[sample_id, :, :3, 3] = torch.Tensor(img_metas[sample_id]['lidar2cam_t']).to(device)\r\n\r\n        lidar2img_rt = torch.matmul(cam_ints, lidar2cam_rt)\r\n        proj_matrix = {\"cam_intrinsic\": cam_ints, \"lidar2cam_rt\": lidar2cam_rt, \"lidar2img_rt\": lidar2img_rt}\r\n        return proj_matrix\r\n\r\n    def build_heatmap_LN(self, hidden_channel, bias, num_classes, layer_num=2, kernel_size=3):\r\n        layers = []\r\n        for i in range(layer_num-1):\r\n            layers.append(ConvLN(\r\n                hidden_channel,\r\n                hidden_channel,\r\n                kernel_size=kernel_size,\r\n                padding=(kernel_size-1)//2,\r\n            ))\r\n\r\n        layers.append(build_conv_layer(\r\n            dict(type='Conv2d'),\r\n            hidden_channel,\r\n            num_classes,\r\n            kernel_size=kernel_size,\r\n            padding=(kernel_size-1)//2,\r\n            bias=bias,\r\n        ))\r\n        return nn.Sequential(*layers)\r\n\r\n    def build_heatmap(self, hidden_channel, bias, num_classes, layer_num=2, kernel_size=3):\r\n        layers = []\r\n        for i in range(layer_num-1):\r\n            layers.append(ConvModule(\r\n                hidden_channel,\r\n                hidden_channel,\r\n                kernel_size=kernel_size,\r\n                padding=(kernel_size-1)//2,\r\n                bias=bias,\r\n                conv_cfg=dict(type='Conv2d'),\r\n                norm_cfg=dict(type='BN2d'),\r\n            ))\r\n\r\n        layers.append(build_conv_layer(\r\n            dict(type='Conv2d'),\r\n            hidden_channel,\r\n            num_classes,\r\n            kernel_size=kernel_size,\r\n            padding=(kernel_size-1)//2,\r\n            bias=bias,\r\n        ))\r\n        return nn.Sequential(*layers)\r\n\r\n    def generate_heatmap_deform(self, lidar_feat, img_feat, voxel_height, img_metas, lidar2img_rt, input_padding_mask=None):\r\n        # img_feat [bs*num_view, C, img_h, img_w]\r\n        # lidar_feat [BS, C, H, W]\r\n\r\n        batch_size = lidar_feat.shape[0]\r\n        H, W = lidar_feat.shape[2], lidar_feat.shape[3]\r\n        voxel_height = voxel_height.view(batch_size, H*W)\r\n        valid_height_mask = voxel_height > -50\r\n\r\n        level_start_index = [0]\r\n        spatial_shapes = []\r\n        img_feats_flatten = []\r\n\r\n        for lvl in range(self.level_num):\r\n            img_h_lvl, img_w_lvl = img_feat[lvl].shape[-2], img_feat[lvl].shape[-1]\r\n            img_feat[lvl] = self.heatmap_img_proj(img_feat[lvl].permute(0, 2, 3, 1)).permute(0, 3, 1, 2)\r\n            # img_feat[lvl] = self.heatmap_img_proj(img_feat[lvl])\r\n            img_feat[lvl] = img_feat[lvl].view(batch_size, self.num_views, self.hidden_channel, img_h_lvl, img_w_lvl)\r\n            img_feat_flatten = img_feat[lvl].view(batch_size, self.num_views, self.hidden_channel, img_h_lvl*img_w_lvl)\r\n            img_feats_flatten.append(img_feat_flatten)\r\n            level_start_index.append(level_start_index[-1] + img_h_lvl * img_w_lvl)\r\n            spatial_shapes.append([img_h_lvl, img_w_lvl])\r\n\r\n        level_start_index = level_start_index[:-1]\r\n        level_start_index = torch.LongTensor(level_start_index).to(lidar_feat.device)\r\n        spatial_shapes = torch.LongTensor(spatial_shapes).to(lidar_feat.device)\r\n        img_feats_stack = torch.cat(img_feats_flatten, dim=3)  # [bs, num_view, C, h*w (sum)]\r\n        normal_img_feats_pos_stack = self.normal_img_feats_pos_stack  # [1, h*w (sum), 2]\r\n\r\n        lidar_feat = self.heatmap_pts_proj(lidar_feat.permute(0, 2, 3, 1)).permute(0, 3, 1, 2)\r\n        # lidar_feat = self.heatmap_pts_proj(lidar_feat)\r\n\r\n        lidar_feat_flatten = lidar_feat.reshape(batch_size, self.hidden_channel, H*W)  # [bs, C, H*W]\r\n        lidar_feat_output = torch.zeros(batch_size, self.hidden_channel, H*W).to(lidar_feat.device)\r\n        lidar_feat_count = torch.zeros(batch_size, 1, H*W).to(lidar_feat.device)\r\n\r\n        bev_pos = self.bev_pos.repeat(batch_size, 1, 1).to(lidar_feat.device)\r\n        query_pos_realmetric = bev_pos.permute(0, 2, 1) * self.test_cfg['out_size_factor'] * \\\r\n                               self.test_cfg['voxel_size'][0] + self.test_cfg['pc_range'][0]  # (bs, 2, H*W)\r\n\r\n        query_pos_3d = torch.cat([query_pos_realmetric, voxel_height[:, None]], dim=1) # (bs, 3, H*W)\r\n        points_4d = torch.cat([query_pos_3d, torch.ones_like(query_pos_3d[:, :1])], dim=1).permute(0, 2, 1)  # (bs, H*W, 4)\r\n        points_2d = torch.matmul(points_4d[:, None], lidar2img_rt.transpose(-1, -2))  # (bs, num_view, H*W, 4)\r\n        points_2d[..., 2] = torch.clamp(points_2d[..., 2], min=1e-5)\r\n        points_2d[..., :2] = points_2d[..., :2] / points_2d[..., 2:3] / self.out_size_factor_img\r\n\r\n        if 'valid_shape' in img_metas[0]:\r\n            valid_shape = []\r\n            for sample_idx in range(batch_size):\r\n                sample_valid_shape = img_metas[sample_idx]['valid_shape'] / self.out_size_factor_img\r\n                valid_shape.append(sample_valid_shape)\r\n            valid_shape = np.array(valid_shape)\r\n            valid_img_w = valid_shape[..., 0]\r\n            valid_img_h = valid_shape[..., 1]\r\n        else:\r\n            valid_img_w = np.full([batch_size, self.num_views], img_feat[0].shape[-1])\r\n            valid_img_h = np.full([batch_size, self.num_views], img_feat[0].shape[-2])\r\n\r\n        valid_img_w = torch.from_numpy(valid_img_w).to(points_2d.device)\r\n        valid_img_h = torch.from_numpy(valid_img_h).to(points_2d.device)\r\n\r\n        img_h, img_w = img_feat[0].shape[-2], img_feat[0].shape[-1]\r\n        center_xs = points_2d[..., 0]  # (bs, num_view, H*W)\r\n        center_ys = points_2d[..., 1]\r\n\r\n        on_the_image = (center_xs >= 0) & (center_xs < valid_img_w[..., None]) & (center_ys >= 0) & \\\r\n                       (center_ys < valid_img_h[..., None]) & valid_height_mask[:, None]  # [bs, num_view, H*W]\r\n        depth = points_2d[..., 2]   # [bs, num_view, H*W]\r\n        depth = torch.log(depth)\r\n\r\n        for sample_idx in range(batch_size):\r\n            on_the_image_sample = on_the_image[sample_idx]  # [num_view, H*W]\r\n            bincount = torch.sum(on_the_image_sample, dim=1)\r\n            max_len = torch.max(bincount)\r\n            sample_query_feature = torch.zeros([self.num_views, self.hidden_channel, max_len], device=points_2d.device)\r\n            sample_query_pos = torch.zeros([self.num_views, max_len, 3], device=points_2d.device)\r\n            sample_reference_points = torch.zeros([self.num_views, max_len, 2], device=points_2d.device)\r\n            sample_padding_mask = torch.zeros([self.num_views, max_len], device=points_2d.device, dtype=torch.bool)\r\n\r\n            for view_idx in range(self.num_views):\r\n                on_the_image_view = on_the_image_sample[view_idx]\r\n                center_xs_view = center_xs[sample_idx, view_idx, on_the_image_view]  # [N, ]\r\n                center_ys_view = center_ys[sample_idx, view_idx, on_the_image_view]  # [N, ]\r\n                reference_points = torch.stack([center_xs_view / img_w, center_ys_view / img_h], dim=-1)  # [N, 2]\r\n\r\n                view_count = bincount[view_idx]\r\n                sample_reference_points[view_idx, :view_count] = reference_points\r\n                sample_query_feature[view_idx, :, :view_count] = lidar_feat_flatten[sample_idx, :, on_the_image_view]\r\n                sample_query_pos[view_idx, :view_count, 2] = depth[sample_idx, view_idx, on_the_image_view]\r\n                sample_padding_mask[view_idx, view_count:] = True\r\n\r\n            sample_centers_normal = sample_reference_points * 2 - 1\r\n            sample_query_img_feat = []\r\n            for lvl in range(self.level_num):\r\n                img_feat_lvl = img_feat[lvl][sample_idx]\r\n                img_feat_lvl = F.grid_sample(img_feat_lvl, sample_centers_normal[:, None], mode='bilinear', padding_mode=\"border\", align_corners=False)\r\n                img_feat_lvl = img_feat_lvl[:, :, 0]\r\n                sample_query_img_feat.append(img_feat_lvl)\r\n            sample_query_img_feat = torch.stack(sample_query_img_feat, dim=0)\r\n            sample_query_img_feat = torch.max(sample_query_img_feat, dim=0)[0]  # [num_view, C, max_len]\r\n\r\n            sample_query_feature = sample_query_feature + sample_query_img_feat\r\n            sample_query_pos[..., :2] = inverse_sigmoid(sample_reference_points)\r\n\r\n            sample_reference_points = sample_reference_points[:, :, None].repeat(1, 1, self.level_num, 1)\r\n\r\n            if batch_size == 1: # whether it is doing evaluation or training\r\n                if input_padding_mask is None:\r\n                    sample_input_padding_mask = None\r\n                else:\r\n                    sample_input_padding_mask = input_padding_mask[sample_idx:sample_idx+1]\r\n                output = self.cross_heatmap_decoder(\r\n                    sample_query_feature, img_feats_stack[sample_idx],\r\n                    sample_query_pos, normal_img_feats_pos_stack.repeat(self.num_views, 1, 1),\r\n                    reference_points=sample_reference_points, level_start_index=level_start_index, spatial_shapes=spatial_shapes,\r\n                    query_padding_mask=sample_padding_mask, input_padding_mask=sample_input_padding_mask\r\n                )\r\n            else:\r\n                output = []\r\n                for view_idx in range(self.num_views):\r\n                    view_query_feature = sample_query_feature[view_idx, :, torch.logical_not(sample_padding_mask[view_idx])]\r\n                    view_query_pos = sample_query_pos[view_idx, torch.logical_not(sample_padding_mask[view_idx])]\r\n                    view_reference_points = sample_reference_points[view_idx, torch.logical_not(sample_padding_mask[view_idx])]\r\n\r\n                    if input_padding_mask is None:\r\n                        view_input_padding_mask = None\r\n                    else:\r\n                        view_input_padding_mask = input_padding_mask[sample_idx, view_idx, None]\r\n\r\n                    output_item = self.cross_heatmap_decoder(\r\n                        view_query_feature[None], img_feats_stack[sample_idx, view_idx, None],\r\n                        view_query_pos[None], normal_img_feats_pos_stack,\r\n                        reference_points=view_reference_points[None], level_start_index=level_start_index, spatial_shapes=spatial_shapes,\r\n                        input_padding_mask=view_input_padding_mask\r\n                    )\r\n                    output_item_pad = torch.zeros([output_item.shape[1], sample_padding_mask.shape[1]]).type_as(output_item)\r\n                    output_item_pad[:, torch.logical_not(sample_padding_mask[view_idx])] = output_item[0]\r\n                    output.append(output_item_pad)\r\n                output = torch.stack(output, dim=0)\r\n\r\n            for view_idx in range(self.num_views):\r\n                view_count = bincount[view_idx]\r\n                on_the_image_view = on_the_image_sample[view_idx]\r\n                overlap_mask = lidar_feat_count[sample_idx, 0, on_the_image_view] > 0\r\n                output_view = output[view_idx, :, :view_count]\r\n                nonoverlap_mask = torch.logical_not(overlap_mask)\r\n                lidar_feat_output_view = lidar_feat_output[sample_idx, :, on_the_image_view]\r\n                lidar_feat_output_view[:, overlap_mask] = torch.maximum(lidar_feat_output_view[:, overlap_mask], output_view[:, overlap_mask])\r\n                lidar_feat_output_view[:, nonoverlap_mask] = output_view[:, nonoverlap_mask]\r\n                lidar_feat_output[sample_idx, :, on_the_image_view] = lidar_feat_output_view\r\n                lidar_feat_count[sample_idx, :, on_the_image_view] += 1\r\n\r\n        lidar_feat_output = lidar_feat_output.reshape(batch_size, lidar_feat_output.shape[1], H, W)\r\n        # lidar_feat_output = self.reduce_conv(lidar_feat_output)\r\n        lidar_feat_count = lidar_feat_count.reshape(batch_size, 1, H, W)\r\n        lidar_feat_flag = torch.where(lidar_feat_count>0, torch.ones_like(lidar_feat_count), torch.zeros_like(lidar_feat_count))\r\n        lidar_feat_output = lidar_feat_output + (1 - lidar_feat_flag) * lidar_feat\r\n        lidar_feat_output = torch.cat([lidar_feat_output, lidar_feat_flag], dim=1)\r\n        lidar_feat_output = self.reduce_conv(lidar_feat_output)\r\n\r\n        heatmap_output = self.cross_heatmap_head(lidar_feat_output.contiguous())\r\n\r\n        return heatmap_output\r\n\r\n    def generate_heatmap(self, lidar_feat, min_voxel_height, max_voxel_height, batch_size, img_metas, lidar2img_rt, img_feat=None, input_padding_mask=None):\r\n        dense_heatmap = self.heatmap_head(lidar_feat)  # [BS, num_class, H, W]\r\n        if img_feat is None:\r\n            heatmap = dense_heatmap.detach().sigmoid()  # [BS, num_class, H, W]\r\n        else:\r\n            voxel_height = (min_voxel_height + max_voxel_height) / 2\r\n            dense_heatmap_cross = self.generate_heatmap_deform(lidar_feat, img_feat, voxel_height, img_metas, lidar2img_rt, input_padding_mask)\r\n\r\n            if self.cross_only:\r\n                heatmap = dense_heatmap_cross.detach().sigmoid()\r\n            else:\r\n                heatmap = (dense_heatmap.detach().sigmoid() + dense_heatmap_cross.detach().sigmoid()) / 2\r\n            dense_heatmap = dense_heatmap_cross\r\n        padding = self.nms_kernel_size // 2\r\n        local_max = torch.zeros_like(heatmap)\r\n        # equals to nms radius = voxel_size * out_size_factor * kenel_size\r\n        local_max_inner = F.max_pool2d(heatmap, kernel_size=self.nms_kernel_size, stride=1, padding=0)\r\n        local_max[:, :, padding:(-padding), padding:(-padding)] = local_max_inner\r\n        ## for Pedestrian & Traffic_cone in nuScenes\r\n        if self.test_cfg['dataset'] == 'nuScenes':\r\n            local_max[:, 8, ] = F.max_pool2d(heatmap[:, 8], kernel_size=1, stride=1, padding=0)\r\n            local_max[:, 9, ] = F.max_pool2d(heatmap[:, 9], kernel_size=1, stride=1, padding=0)\r\n        elif self.test_cfg['dataset'] == 'Waymo':  # for Pedestrian & Cyclist in Waymo\r\n            local_max[:, 1, ] = F.max_pool2d(heatmap[:, 1], kernel_size=1, stride=1, padding=0)\r\n            local_max[:, 2, ] = F.max_pool2d(heatmap[:, 2], kernel_size=1, stride=1, padding=0)\r\n        heatmap = heatmap * (heatmap == local_max)  # [BS, num_class, H, W]\r\n        heatmap = heatmap.view(batch_size, heatmap.shape[1], -1)  # [BS, num_class, H*W]\r\n\r\n        # top #num_proposals among all classes\r\n        top_proposals = heatmap.reshape(batch_size, -1).argsort(dim=-1, descending=True)[..., :self.num_proposals]  # [BS, num_proposals]\r\n\r\n        top_proposals_class = top_proposals // heatmap.shape[-1]  # [BS, num_proposals]\r\n        top_proposals_index = top_proposals % heatmap.shape[-1]  # [BS, num_proposals]\r\n        return heatmap, dense_heatmap, top_proposals_class, top_proposals_index\r\n\r\n    def generate_heatmap_img(self, img_feats, batch_size):\r\n\r\n        img_dense_heatmaps = []\r\n        img_heatmaps = []\r\n        for lvl in range(self.level_num):\r\n\r\n            # img_dense_heatmap = self.img_heatmap_head(img_feats[lvl])  # [BS*num_view, num_class, h, w]\r\n            img_dense_heatmap = self.img_heatmap_head[lvl](img_feats[lvl])  # [BS*num_view, num_class, h, w]\r\n\r\n            img_heatmap = img_dense_heatmap.detach().sigmoid()  # [BS*num_view, num_class, h, w]\r\n            padding = self.img_nms_kernel_size // 2\r\n            local_max = torch.zeros_like(img_heatmap)\r\n            # equals to nms radius = voxel_size * out_size_factor * kenel_size\r\n            local_max_inner = F.max_pool2d(img_heatmap, kernel_size=self.img_nms_kernel_size, stride=1, padding=0)\r\n            local_max[:, :, padding:(-padding), padding:(-padding)] = local_max_inner\r\n            img_heatmap = img_heatmap * (img_heatmap == local_max)  # [BS*num_view, num_class, h, w]\r\n            img_heatmap = img_heatmap.view(batch_size, self.num_views, img_heatmap.shape[1], -1)  # [BS, num_views, num_class, h*w]\r\n            img_heatmap = img_heatmap.permute(0, 2, 1, 3) # [BS, num_class, num_views, h*w]\r\n            img_heatmaps.append(img_heatmap)\r\n\r\n            img_dense_heatmap = img_dense_heatmap.view(batch_size, self.num_views, img_dense_heatmap.shape[1],\r\n                        img_dense_heatmap.shape[2], img_dense_heatmap.shape[3])  # [BS, num_views, num_class, h, w]\r\n            img_dense_heatmap = img_dense_heatmap.permute(0, 2, 1, 3, 4)  # [BS, num_class, num_views, h, w]\r\n            img_dense_heatmap = img_dense_heatmap.view(batch_size, self.num_classes, self.num_views, img_dense_heatmap.shape[-2]*img_dense_heatmap.shape[-1])\r\n            img_dense_heatmaps.append(img_dense_heatmap)\r\n\r\n        img_heatmap_stack = torch.cat(img_heatmaps, dim=3)  # [BS, num_class, num_views, h*w (sum)]\r\n        # top #num_proposals among all classes\r\n        top_proposals = img_heatmap_stack.view(batch_size, -1).argsort(dim=-1, descending=True)[..., :self.num_img_proposals]  # [BS, num_proposals]\r\n        top_proposals_class = top_proposals // (img_heatmap_stack.shape[-1]*img_heatmap_stack.shape[-2])  # [BS, num_proposals]\r\n\r\n        top_proposals_view_index = top_proposals % (img_heatmap_stack.shape[-1]*img_heatmap_stack.shape[-2]) // img_heatmap_stack.shape[-1]  # [BS, num_proposals]\r\n        top_proposals_pos_index = top_proposals % img_heatmap_stack.shape[-1]  # [BS, num_proposals]\r\n        top_proposals_index = top_proposals % (img_heatmap_stack.shape[-1]*img_heatmap_stack.shape[-2])  # [BS, num_proposals]\r\n\r\n        img_heatmap_stack = img_heatmap_stack.contiguous().view(batch_size, img_heatmap_stack.shape[1], -1)\r\n        img_dense_heatmaps_stack = torch.cat(img_dense_heatmaps, dim=-1)\r\n\r\n        return img_heatmap_stack, img_dense_heatmaps_stack, top_proposals_class, top_proposals_index, top_proposals_view_index, top_proposals_pos_index\r\n\r\n    def get_targets(self, gt_bboxes_3d, gt_labels_3d, gt_bboxes, gt_labels, gt_img_centers_view, gt_bboxes_cam_view, gt_visible, gt_bboxes_lidar_view, preds_dict, img_metas):\r\n        \"\"\"Generate training targets.\r\n\r\n        Args:\r\n            gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`): Ground truth gt boxes.\r\n            gt_labels_3d (torch.Tensor): Labels of boxes.\r\n            preds_dicts (tuple of dict): first index by layer (default 1)\r\n        Returns:\r\n            tuple[torch.Tensor]: Tuple of target including \\\r\n                the following results in order.\r\n\r\n                - torch.Tensor: classification target.  [BS, num_proposals]\r\n                - torch.Tensor: classification weights (mask)  [BS, num_proposals]\r\n                - torch.Tensor: regression target. [BS, num_proposals, 8]\r\n                - torch.Tensor: regression weights. [BS, num_proposals, 8]\r\n        \"\"\"\r\n        # change preds_dict into list of dict (index by batch_id)\r\n        # preds_dict[0]['center'].shape [bs, 3, num_proposal]\r\n        list_of_pred_dict = []\r\n        for batch_idx in range(len(gt_bboxes_3d)):\r\n            pred_dict = {}\r\n            for key in preds_dict[0].keys():\r\n                pred_dict[key] = preds_dict[0][key][batch_idx:batch_idx + 1]\r\n            list_of_pred_dict.append(pred_dict)\r\n\r\n        assert len(gt_bboxes_3d) == len(list_of_pred_dict)\r\n\r\n        res_tuple = multi_apply(self.get_targets_single, gt_bboxes_3d, gt_labels_3d, gt_visible, list_of_pred_dict, np.arange(len(gt_labels_3d)))\r\n\r\n        labels = torch.cat(res_tuple[0], dim=0)\r\n        label_weights = torch.cat(res_tuple[1], dim=0)\r\n        bbox_targets = torch.cat(res_tuple[2], dim=0)\r\n        bbox_weights = torch.cat(res_tuple[3], dim=0)\r\n        ious = torch.cat(res_tuple[4], dim=0)\r\n        num_pos_layer = np.concatenate(res_tuple[5], axis=0)  # [BS, num_layer]\r\n        # matched_ious = np.mean(res_tuple[6])\r\n        matched_ious = torch.cat(res_tuple[6], dim=0)\r\n\r\n        res_tuple_2d = multi_apply(self.get_targets_single_2d, gt_bboxes, gt_labels, gt_img_centers_view, gt_bboxes_cam_view, gt_bboxes_lidar_view, list_of_pred_dict, img_metas, np.arange(len(gt_bboxes)))\r\n        labels_2d = torch.cat(res_tuple_2d[0], dim=0)\r\n        label_weights_2d = torch.cat(res_tuple_2d[1], dim=0)\r\n        bbox_targets_2d = torch.cat(res_tuple_2d[2], dim=0)\r\n        bbox_weights_2d = torch.cat(res_tuple_2d[3], dim=0)\r\n        ious_2d = torch.cat(res_tuple_2d[4], dim=0)\r\n        num_pos_layer_2d = np.concatenate(res_tuple_2d[5], axis=0)  # [BS, num_layer]\r\n        matched_ious_2d = torch.cat(res_tuple_2d[6], dim=0)\r\n\r\n        if self.view_transform:\r\n            res_tuple_view = multi_apply(self.get_targets_single_view, gt_bboxes_3d, gt_labels_3d, gt_visible, list_of_pred_dict, np.arange(len(gt_bboxes)))\r\n            labels_view = torch.cat(res_tuple_view[0], dim=0)\r\n            label_weights_view = torch.cat(res_tuple_view[1], dim=0)\r\n            bbox_targets_view = torch.cat(res_tuple_view[2], dim=0)\r\n            bbox_weights_view = torch.cat(res_tuple_view[3], dim=0)\r\n            ious_view = torch.cat(res_tuple_view[4], dim=0)\r\n            num_pos_layer_view = np.concatenate(res_tuple_view[5], axis=0)  # [BS, num_layer]\r\n            matched_ious_view = torch.cat(res_tuple_view[6], dim=0)\r\n\r\n        if self.initialize_by_heatmap:\r\n            heatmap = torch.cat(res_tuple[7], dim=0)\r\n            heatmap_2d = torch.cat(res_tuple_2d[7], dim=0)\r\n            if self.view_transform:\r\n                return labels, label_weights, bbox_targets, bbox_weights, ious, num_pos_layer, matched_ious, heatmap, \\\r\n                   labels_2d, label_weights_2d, bbox_targets_2d, bbox_weights_2d, ious_2d, num_pos_layer_2d, \\\r\n                   matched_ious_2d, heatmap_2d, labels_view, label_weights_view, bbox_targets_view, bbox_weights_view, \\\r\n                   ious_view, num_pos_layer_view, matched_ious_view\r\n            else:\r\n                return labels, label_weights, bbox_targets, bbox_weights, ious, num_pos_layer, matched_ious, heatmap, \\\r\n                   labels_2d, label_weights_2d, bbox_targets_2d, bbox_weights_2d, ious_2d, num_pos_layer_2d, \\\r\n                   matched_ious_2d, heatmap_2d\r\n        else:\r\n            if self.view_transform:\r\n                return labels, label_weights, bbox_targets, bbox_weights, ious, num_pos_layer, matched_ious, \\\r\n                    labels_2d, label_weights_2d, bbox_targets_2d, bbox_weights_2d, ious_2d, num_pos_layer_2d, matched_ious_2d, \\\r\n                    labels_view, label_weights_view, bbox_targets_view, bbox_weights_view, ious_view, num_pos_layer_view, \\\r\n                    matched_ious_view\r\n            else:\r\n                return labels, label_weights, bbox_targets, bbox_weights, ious, num_pos_layer, matched_ious, \\\r\n                    labels_2d, label_weights_2d, bbox_targets_2d, bbox_weights_2d, ious_2d, num_pos_layer_2d, matched_ious_2d,\r\n\r\n    def get_targets_single_2d(self, gt_bboxes, gt_labels, gt_centers_2d, gt_bboxes_cam_view, gt_bboxes_lidar_view, preds_dict, img_metas, batch_idx):\r\n        num_proposals = preds_dict['cls'].shape[-1]\r\n        loc_cam_3d = copy.deepcopy(preds_dict['loc_cam_3d'].detach())\r\n        dim = copy.deepcopy(preds_dict['dim_2d'].detach())\r\n        rot = copy.deepcopy(preds_dict['rot_2d'].detach())\r\n        if 'vel_2d' in preds_dict.keys():\r\n            vel = copy.deepcopy(preds_dict['vel_2d'].detach())\r\n        else:\r\n            vel = None\r\n        view = copy.deepcopy(preds_dict['view'].detach())[0] # [num_proposals, ]\r\n        score = copy.deepcopy(preds_dict['cls'].detach())\r\n\r\n        bboxes_dict = self.bbox_2d_coder.decode(score, rot, dim, loc_cam_3d, vel)\r\n        bboxes_3d_tensor = bboxes_dict[0]['bboxes']\r\n\r\n        gt_bboxes_3d_tensor = gt_bboxes_cam_view.tensor.to(score.device)\r\n        gt_bboxes_lidar_view_tensor = gt_bboxes_lidar_view.tensor.to(score.device)\r\n\r\n        assert gt_bboxes_lidar_view_tensor.shape[0] == gt_bboxes_3d_tensor.shape[0]\r\n\r\n        img_shape = img_metas['pad_shape']\r\n        img_scale =[img_shape[1], img_shape[0], img_shape[1], img_shape[0]]\r\n\r\n        img_scale = torch.Tensor(img_scale).to(score.device).unsqueeze(0)\r\n        gt_centers_2d = gt_centers_2d.float()\r\n        normal_gt_centers = gt_centers_2d[..., :2] / img_scale[..., :2]\r\n        normal_gt_bboxes = gt_bboxes.float() / img_scale\r\n\r\n        assign_result_list = []\r\n        for idx_layer in range(self.num_img_decoder_layers):\r\n            bboxes_tensor_layer = bboxes_3d_tensor[idx_layer*self.num_img_proposals:(idx_layer+1)*self.num_img_proposals, :]  # [num_proposals, 10]\r\n            score_layer = score[..., idx_layer*self.num_img_proposals:(idx_layer+1)*self.num_img_proposals]  # [1, num_class, num_proposal]\r\n            view_layer = view[idx_layer*self.num_img_proposals:(idx_layer+1)*self.num_img_proposals]  # [num_proposals]\r\n\r\n            assign_result = self.bbox_assigner_2d.assign(bboxes_tensor_layer, gt_bboxes_3d_tensor, gt_labels, score_layer, view_layer, self.train_cfg)\r\n            assign_result_list.append(assign_result)\r\n\r\n        # combine assign result of each layer\r\n        assign_result_ensemble = AssignResult(\r\n            num_gts=sum([res.num_gts for res in assign_result_list]),\r\n            gt_inds=torch.cat([res.gt_inds for res in assign_result_list]),\r\n            max_overlaps=torch.cat([res.max_overlaps for res in assign_result_list]),\r\n            labels=torch.cat([res.labels for res in assign_result_list]),\r\n        )\r\n        sampling_result = self.bbox_sampler.sample(assign_result_ensemble, bboxes_3d_tensor, gt_bboxes_3d_tensor)\r\n        pos_inds = sampling_result.pos_inds\r\n        neg_inds = sampling_result.neg_inds\r\n        assert len(pos_inds) + len(neg_inds) == num_proposals\r\n\r\n        start = 0\r\n        pos_num_layers = []\r\n        for idx_layer in range(self.num_img_decoder_layers):\r\n            layer_num_proposal = self.num_img_proposals\r\n            layer_mask = torch.logical_and(pos_inds>=start, pos_inds<start+layer_num_proposal)\r\n            pos_inds_layer = pos_inds[layer_mask]\r\n            count = pos_inds_layer.shape[0]\r\n            pos_num_layers.append(count)\r\n            start += layer_num_proposal\r\n        pos_num_layers = np.array(pos_num_layers)\r\n        assert np.sum(pos_num_layers) == pos_inds.shape[0]\r\n\r\n        # create target for loss computation\r\n        bbox_targets = torch.zeros([num_proposals, self.bbox_2d_coder.code_size]).to(score.device)\r\n        bbox_weights = torch.zeros([num_proposals, self.bbox_2d_coder.code_size]).to(score.device)\r\n        view_targets = score.new_zeros(num_proposals, dtype=torch.long)\r\n        ious = assign_result_ensemble.max_overlaps\r\n        ious = torch.clamp(ious, min=0.0, max=1.0)\r\n        labels = score.new_zeros(num_proposals, dtype=torch.long)\r\n        label_weights = score.new_ones(num_proposals, dtype=torch.long)\r\n        center_targets = torch.zeros([num_proposals, 2]).to(score.device)\r\n        center_weights = torch.zeros([num_proposals, 2]).to(score.device)\r\n        depth_labels = score.new_zeros(num_proposals)\r\n        depth_weights = score.new_zeros(num_proposals, dtype=torch.long)\r\n\r\n        bbox_lidar_targets = torch.zeros([self.num_img_proposals, self.bbox_coder.code_size]).to(score.device)\r\n        bbox_lidar_weights = torch.zeros([self.num_img_proposals, self.bbox_coder.code_size]).to(score.device)\r\n        labels_lidar = score.new_zeros(self.num_img_proposals, dtype=torch.long)\r\n        label_lidar_weights = score.new_ones(self.num_img_proposals, dtype=torch.long)\r\n        pos_inds_lastlayer = pos_inds[layer_mask] - (self.num_img_decoder_layers - 1) * self.num_img_proposals\r\n        pos_assigned_gt_inds_lastlayer = sampling_result.pos_assigned_gt_inds[layer_mask]\r\n\r\n        ious_lidar = torch.zeros_like(bbox_lidar_targets[:, 0]) - 1\r\n\r\n        if gt_labels is not None:  # default label is -1\r\n            labels += self.num_classes\r\n            labels_lidar += self.num_classes\r\n\r\n        # both pos and neg have classification loss, only pos has regression and iou loss\r\n        if len(pos_inds) > 0:\r\n            # bbox_targets[pos_inds, :] = sampling_result.pos_gt_bboxes\r\n\r\n            bbox_weights[pos_inds, :] = 1.0\r\n            pos_gt_bboxes = sampling_result.pos_gt_bboxes\r\n\r\n            pos_bbox_targets = self.bbox_2d_coder.encode(pos_gt_bboxes)\r\n            bbox_targets[pos_inds, :pos_bbox_targets.shape[1]] = pos_bbox_targets\r\n\r\n            view_targets[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds, 1]\r\n\r\n            if gt_labels is None:\r\n                labels[pos_inds] = 1\r\n            else:\r\n                labels[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds, 0]\r\n            if self.train_cfg.pos_weight <= 0:\r\n                label_weights[pos_inds] = 1.0\r\n            else:\r\n                label_weights[pos_inds] = self.train_cfg.pos_weight\r\n\r\n            center_targets[pos_inds, :] = normal_gt_centers[sampling_result.pos_assigned_gt_inds, :2]\r\n            center_weights[pos_inds] = 1.0\r\n\r\n            depth = gt_centers_2d[sampling_result.pos_assigned_gt_inds, 2]\r\n            depth_labels[pos_inds] = depth\r\n            depth_weights[pos_inds] = 1\r\n\r\n            view_mask_ignore = view_targets != view\r\n            bbox_weights[view_mask_ignore, :] = 0\r\n            label_weights[view_mask_ignore] = 0\r\n\r\n            if len(neg_inds) > 0:\r\n                label_weights[neg_inds] = 1.0\r\n\r\n            bbox_targets[:, :2] = center_targets\r\n            bbox_targets[:, 2] = depth_labels\r\n\r\n        # # compute dense heatmap targets\r\n        if self.initialize_by_heatmap:\r\n            device = labels.device\r\n            feature_map_size = (img_shape[1] // self.out_size_factor_img, img_shape[0] // self.out_size_factor_img)\r\n\r\n            w, h = feature_map_size\r\n            heatmaps = []\r\n            for lvl in range(self.level_num):\r\n                heatmaps.append(score.new_zeros(self.num_classes, self.num_views, h, w))\r\n                h = h // 2\r\n                w = w // 2\r\n\r\n            for idx in range(len(gt_bboxes)):\r\n                width = gt_bboxes[idx][2]\r\n                length = gt_bboxes[idx][3]\r\n\r\n                max_l = max(length, width)\r\n\r\n                width = width / self.out_size_factor_img\r\n                length = length / self.out_size_factor_img\r\n                view_id = gt_labels[idx][1]\r\n                if width > 0 and length > 0:\r\n                    radius = gaussian_radius((length, width), min_overlap=self.train_cfg['gaussian_overlap_2d'])\r\n                    radius = max(self.train_cfg['min_radius'], radius)\r\n                    radius = min(self.train_cfg['max_radius'], radius)\r\n\r\n                    x, y = gt_centers_2d[idx][0], gt_centers_2d[idx][1]\r\n                    # x, y = gt_bboxes[idx][0], gt_bboxes[idx][1]\r\n\r\n                    coor_x = x / self.out_size_factor_img\r\n                    coor_y = y / self.out_size_factor_img\r\n\r\n                    center = torch.tensor([coor_x, coor_y], dtype=torch.float32, device=device)\r\n\r\n                    if self.level_num == 4:\r\n                        if max_l < 48:\r\n                            lvl = 0\r\n                        elif max_l < 96:\r\n                            lvl = 1\r\n                            center = center / 2\r\n                            radius = radius / 2\r\n                        elif max_l < 192:\r\n                            lvl = 2\r\n                            center = center / 4\r\n                            radius = radius / 4\r\n                        else:\r\n                            lvl = 3\r\n                            center = center / 8\r\n                            radius = radius / 8\r\n                    elif self.level_num == 3:\r\n                        if max_l < 48:\r\n                            lvl = 0\r\n                        elif max_l < 96:\r\n                            lvl = 1\r\n                            center = center / 2\r\n                            radius = radius / 2\r\n                        else:\r\n                            lvl = 2\r\n                            center = center / 4\r\n                            radius = radius / 4\r\n                    elif self.level_num == 2:\r\n                        if max_l < 96:\r\n                            lvl = 0\r\n                        else:\r\n                            lvl = 1\r\n                            center = center / 2\r\n                            radius = radius / 2\r\n                    else:\r\n                        assert self.level_num == 1\r\n                        lvl = 0\r\n\r\n                    center_int = center.to(torch.int32)\r\n                    radius = int(radius)\r\n\r\n                    draw_heatmap_gaussian(heatmaps[lvl][gt_labels[idx][0], view_id], center_int, radius)\r\n\r\n            for lvl in range(self.level_num):\r\n                heatmaps[lvl] = heatmaps[lvl].view(self.num_classes, self.num_views, heatmaps[lvl].shape[-2]*heatmaps[lvl].shape[-1])\r\n            heatmap = torch.cat(heatmaps, dim=-1)\r\n            matched_ious = torch.ones_like(ious) * -1\r\n            matched_ious[pos_inds] = ious[pos_inds]\r\n\r\n            return labels[None], label_weights[None], bbox_targets[None], bbox_weights[None], ious[None], pos_num_layers[None], matched_ious[None], heatmap[None], labels_lidar[None], label_lidar_weights[None], bbox_lidar_targets[None], bbox_lidar_weights[None], ious_lidar[None]\r\n        else:\r\n            matched_ious = torch.ones_like(ious) * -1\r\n            matched_ious[pos_inds] = ious[pos_inds]\r\n            return labels[None], label_weights[None], bbox_targets[None], bbox_weights[None], ious[None], pos_num_layers[None], matched_ious[None], labels_lidar[None], label_lidar_weights[None], bbox_lidar_targets[None], bbox_lidar_weights[None], ious_lidar[None]\r\n\r\n    def get_targets_single(self, gt_bboxes_3d, gt_labels_3d, gt_visible, preds_dict, batch_idx):\r\n        \"\"\"Generate training targets for a single sample.\r\n\r\n        Args:\r\n            gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`): Ground truth gt boxes.\r\n            gt_labels_3d (torch.Tensor): Labels of boxes.\r\n            gt_bboxes (:obj:`LiDARInstance3DBoxes`): Ground truth gt boxes 2d.\r\n            gt_labels (torch.Tensor): Labels of boxes 2d.\r\n            preds_dict (dict): dict of prediction result for a single sample\r\n        Returns:\r\n            tuple[torch.Tensor]: Tuple of target including \\\r\n                the following results in order.\r\n\r\n                - torch.Tensor: classification target.  [1, num_proposals]\r\n                - torch.Tensor: classification weights (mask)  [1, num_proposals]\r\n                - torch.Tensor: regression target. [1, num_proposals, 8]\r\n                - torch.Tensor: regression weights. [1, num_proposals, 8]\r\n                - torch.Tensor: iou target. [1, num_proposals]\r\n                - int: number of positive proposals\r\n        \"\"\"\r\n        num_proposals = preds_dict['center'].shape[-1]\r\n\r\n        # get pred boxes, carefully ! donot change the network outputs\r\n        score = copy.deepcopy(preds_dict['heatmap'].detach())\r\n        center = copy.deepcopy(preds_dict['center'].detach())\r\n        height = copy.deepcopy(preds_dict['height'].detach())\r\n        dim = copy.deepcopy(preds_dict['dim'].detach())\r\n        rot = copy.deepcopy(preds_dict['rot'].detach())\r\n        if 'vel' in preds_dict.keys():\r\n            vel = copy.deepcopy(preds_dict['vel'].detach())\r\n        else:\r\n            vel = None\r\n\r\n        boxes_dict = self.bbox_coder.decode(score, rot, dim, center, height, vel)  # decode the prediction to real world metric bbox\r\n        bboxes_tensor = boxes_dict[0]['bboxes']\r\n        gt_bboxes_tensor = gt_bboxes_3d.tensor.to(score.device)\r\n\r\n        num_fusion_decoder_layers = self.num_fusion_decoder_layers\r\n\r\n        num_layer = self.num_pts_decoder_layers + num_fusion_decoder_layers\r\n\r\n        start = 0\r\n        pos_inds = []\r\n        neg_inds = []\r\n        pos_gt_bboxes = []\r\n        pos_gt_labels = []\r\n        ious = []\r\n        for idx_layer in range(num_layer):\r\n            layer_num_proposal = self.get_layer_num_proposal(idx_layer)\r\n\r\n            bboxes_tensor_layer = bboxes_tensor[start:start + layer_num_proposal, :]\r\n            score_layer = score[..., start:start + layer_num_proposal]\r\n\r\n            gt_bboxes_tensor_layer = gt_bboxes_tensor\r\n            gt_labels_3d_layer = gt_labels_3d\r\n\r\n            if self.train_cfg.assigner.type == 'HungarianAssigner3D':\r\n                assign_result = self.bbox_assigner.assign(bboxes_tensor_layer, gt_bboxes_tensor_layer, gt_labels_3d_layer, score_layer, self.train_cfg)\r\n            elif self.train_cfg.assigner.type == 'HeuristicAssigner':\r\n                assign_result = self.bbox_assigner.assign(bboxes_tensor_layer, gt_bboxes_tensor_layer, None, gt_labels_3d_layer, self.query_labels[batch_idx])\r\n            else:\r\n                raise NotImplementedError\r\n            # assign_result_list.append(assign_result)\r\n\r\n            sampling_result_layer = self.bbox_sampler.sample(assign_result, bboxes_tensor_layer, gt_bboxes_tensor_layer)\r\n            pos_inds_layer = sampling_result_layer.pos_inds + start\r\n            neg_inds_layer = sampling_result_layer.neg_inds + start\r\n\r\n            pos_inds.append(pos_inds_layer)\r\n            neg_inds.append(neg_inds_layer)\r\n\r\n            pos_gt_bboxes_layer = sampling_result_layer.pos_gt_bboxes\r\n            pos_gt_labels_layer = gt_labels_3d_layer[sampling_result_layer.pos_assigned_gt_inds]\r\n\r\n            pos_gt_bboxes.append(pos_gt_bboxes_layer)\r\n            pos_gt_labels.append(pos_gt_labels_layer)\r\n\r\n            ious_layer = assign_result.max_overlaps\r\n            ious.append(ious_layer)\r\n            start += layer_num_proposal\r\n\r\n\r\n        pos_inds = torch.cat(pos_inds)\r\n        neg_inds = torch.cat(neg_inds)\r\n\r\n\r\n        pos_gt_bboxes = torch.cat(pos_gt_bboxes, dim=0)\r\n        pos_gt_labels = torch.cat(pos_gt_labels, dim=0)\r\n        assert len(pos_inds) + len(neg_inds) == num_proposals\r\n\r\n        start = 0\r\n        pos_num_layers = []\r\n        for idx_layer in range(num_layer):\r\n            layer_num_proposal = self.get_layer_num_proposal(idx_layer)\r\n            count = pos_inds[torch.logical_and(pos_inds>=start, pos_inds<start+layer_num_proposal)].shape[0]\r\n            pos_num_layers.append(count)\r\n            start += layer_num_proposal\r\n        pos_num_layers = np.array(pos_num_layers)\r\n        assert np.sum(pos_num_layers) == pos_inds.shape[0]\r\n\r\n        # create target for loss computation\r\n        bbox_targets = torch.zeros([num_proposals, self.bbox_coder.code_size]).to(center.device)\r\n        bbox_weights = torch.zeros([num_proposals, self.bbox_coder.code_size]).to(center.device)\r\n        ious = torch.cat(ious)\r\n        ious = torch.clamp(ious, min=0.0, max=1.0)\r\n        labels = bboxes_tensor.new_zeros(num_proposals, dtype=torch.long)\r\n        label_weights = bboxes_tensor.new_zeros(num_proposals, dtype=torch.long)\r\n\r\n        if gt_labels_3d is not None:  # default label is -1\r\n            labels += self.num_classes\r\n\r\n        # both pos and neg have classification loss, only pos has regression and iou loss\r\n        if len(neg_inds) > 0:\r\n            label_weights[neg_inds] = 1.0\r\n\r\n        if len(pos_inds) > 0:\r\n            pos_bbox_targets = self.bbox_coder.encode(pos_gt_bboxes)\r\n\r\n            bbox_targets[pos_inds, :] = pos_bbox_targets\r\n            bbox_weights[pos_inds, :] = 1.0\r\n\r\n            if gt_labels_3d is None:\r\n                labels[pos_inds] = 1\r\n            else:\r\n                labels[pos_inds] = pos_gt_labels\r\n            if self.train_cfg.pos_weight <= 0:\r\n                label_weights[pos_inds] = 1.0\r\n            else:\r\n                label_weights[pos_inds] = self.train_cfg.pos_weight\r\n\r\n        # # compute dense heatmap targets\r\n        if self.initialize_by_heatmap:\r\n            device = labels.device\r\n            gt_bboxes_3d = torch.cat([gt_bboxes_3d.gravity_center, gt_bboxes_3d.tensor[:, 3:]], dim=1).to(device)\r\n            grid_size = torch.tensor(self.train_cfg['grid_size'])\r\n            pc_range = torch.tensor(self.train_cfg['point_cloud_range'])\r\n            voxel_size = torch.tensor(self.train_cfg['voxel_size'])\r\n            feature_map_size = grid_size[:2] // self.train_cfg['out_size_factor']  # [x_len, y_len]\r\n            heatmap = gt_bboxes_3d.new_zeros(self.num_classes, feature_map_size[1], feature_map_size[0])\r\n            for idx in range(len(gt_bboxes_3d)):\r\n                width = gt_bboxes_3d[idx][3]\r\n                length = gt_bboxes_3d[idx][4]\r\n                width = width / voxel_size[0] / self.train_cfg['out_size_factor']\r\n                length = length / voxel_size[1] / self.train_cfg['out_size_factor']\r\n                if width > 0 and length > 0:\r\n                    radius = gaussian_radius((length, width), min_overlap=self.train_cfg['gaussian_overlap'])\r\n                    radius = max(self.train_cfg['min_radius'], int(radius))\r\n                    x, y = gt_bboxes_3d[idx][0], gt_bboxes_3d[idx][1]\r\n\r\n                    coor_x = (x - pc_range[0]) / voxel_size[0] / self.train_cfg['out_size_factor']\r\n                    coor_y = (y - pc_range[1]) / voxel_size[1] / self.train_cfg['out_size_factor']\r\n\r\n                    center_img = torch.tensor([coor_x, coor_y], dtype=torch.float32, device=device)\r\n                    center_int = center_img.to(torch.int32)\r\n                    draw_heatmap_gaussian(heatmap[gt_labels_3d[idx]], center_int, radius)\r\n\r\n            matched_ious = torch.ones_like(ious) * -1\r\n            matched_ious[pos_inds] = ious[pos_inds]\r\n            return labels[None], label_weights[None], bbox_targets[None], bbox_weights[None], ious[None], pos_num_layers[None], matched_ious[None], heatmap[None]\r\n        else:\r\n            matched_ious = torch.ones_like(ious) * -1\r\n            matched_ious[pos_inds] = ious[pos_inds]\r\n            return labels[None], label_weights[None], bbox_targets[None], bbox_weights[None], ious[None], pos_num_layers[None], matched_ious[None]\r\n\r\n    def get_targets_single_view(self, gt_bboxes_3d, gt_labels_3d, gt_visible_3d, preds_dict, batch_idx):\r\n        \"\"\"Generate training targets for a single sample.\r\n\r\n        Args:\r\n            gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`): Ground truth gt boxes.\r\n            gt_labels_3d (torch.Tensor): Labels of boxes.\r\n            gt_bboxes (:obj:`LiDARInstance3DBoxes`): Ground truth gt boxes 2d.\r\n            gt_labels (torch.Tensor): Labels of boxes 2d.\r\n            preds_dict (dict): dict of prediction result for a single sample\r\n        Returns:\r\n            tuple[torch.Tensor]: Tuple of target including \\\r\n                the following results in order.\r\n\r\n                - torch.Tensor: classification target.  [1, num_proposals]\r\n                - torch.Tensor: classification weights (mask)  [1, num_proposals]\r\n                - torch.Tensor: regression target. [1, num_proposals, 8]\r\n                - torch.Tensor: regression weights. [1, num_proposals, 8]\r\n                - torch.Tensor: iou target. [1, num_proposals]\r\n                - int: number of positive proposals\r\n        \"\"\"\r\n        num_proposals = preds_dict['center_view'].shape[-1]\r\n\r\n        # get pred boxes, carefully ! donot change the network outputs\r\n        score = copy.deepcopy(preds_dict['heatmap_view'].detach())\r\n        center = copy.deepcopy(preds_dict['center_view'].detach())\r\n        height = copy.deepcopy(preds_dict['height_view'].detach())\r\n        dim = copy.deepcopy(preds_dict['dim_view'].detach())\r\n        rot = copy.deepcopy(preds_dict['rot_view'].detach())\r\n        if 'vel_view' in preds_dict.keys():\r\n            vel = copy.deepcopy(preds_dict['vel_view'].detach())\r\n        else:\r\n            vel = None\r\n\r\n        boxes_dict = self.bbox_coder.decode(score, rot, dim, center, height, vel)  # decode the prediction to real world metric bbox\r\n        bboxes_tensor = boxes_dict[0]['bboxes']\r\n\r\n        assert gt_visible_3d.shape[0] == gt_bboxes_3d.tensor.shape[0] == gt_labels_3d.shape[0]\r\n        gt_mask = gt_visible_3d == 1\r\n        gt_bboxes_3d = gt_bboxes_3d[gt_mask]\r\n        gt_labels_3d = gt_labels_3d[gt_mask]\r\n        gt_bboxes_tensor = gt_bboxes_3d.tensor.to(score.device)\r\n\r\n        num_layer = 1\r\n        assign_result_list = []\r\n        start = 0\r\n        for idx_layer in range(num_layer):\r\n            layer_num_proposal = self.get_layer_num_proposal(idx_layer)\r\n\r\n            bboxes_tensor_layer = bboxes_tensor[start:start + layer_num_proposal, :]\r\n            score_layer = score[..., start:start + layer_num_proposal]\r\n            start += layer_num_proposal\r\n\r\n            if self.train_cfg.assigner.type == 'HungarianAssigner3D':\r\n                assign_result = self.bbox_assigner.assign(bboxes_tensor_layer, gt_bboxes_tensor, gt_labels_3d, score_layer, self.train_cfg)\r\n            elif self.train_cfg.assigner.type == 'HeuristicAssigner':\r\n                assign_result = self.bbox_assigner.assign(bboxes_tensor_layer, gt_bboxes_tensor, None, gt_labels_3d, self.query_labels[batch_idx])\r\n            else:\r\n                raise NotImplementedError\r\n            assign_result_list.append(assign_result)\r\n\r\n        # combine assign result of each layer\r\n        assign_result_ensemble = AssignResult(\r\n            num_gts=sum([res.num_gts for res in assign_result_list]),\r\n            gt_inds=torch.cat([res.gt_inds for res in assign_result_list]),\r\n            max_overlaps=torch.cat([res.max_overlaps for res in assign_result_list]),\r\n            labels=torch.cat([res.labels for res in assign_result_list]),\r\n        )\r\n        sampling_result = self.bbox_sampler.sample(assign_result_ensemble, bboxes_tensor, gt_bboxes_tensor)\r\n        pos_inds = sampling_result.pos_inds\r\n        neg_inds = sampling_result.neg_inds\r\n        assert len(pos_inds) + len(neg_inds) == num_proposals\r\n\r\n        start = 0\r\n        pos_num_layers = []\r\n        for idx_layer in range(num_layer):\r\n            layer_num_proposal = self.get_layer_num_proposal(idx_layer)\r\n            count = pos_inds[torch.logical_and(pos_inds>=start, pos_inds<start+layer_num_proposal)].shape[0]\r\n            pos_num_layers.append(count)\r\n            start += layer_num_proposal\r\n        pos_num_layers = np.array(pos_num_layers)\r\n        assert np.sum(pos_num_layers) == pos_inds.shape[0]\r\n\r\n        # create target for loss computation\r\n        bbox_targets = torch.zeros([num_proposals, self.bbox_coder.code_size]).to(center.device)\r\n        bbox_weights = torch.zeros([num_proposals, self.bbox_coder.code_size]).to(center.device)\r\n        ious = assign_result_ensemble.max_overlaps\r\n        ious = torch.clamp(ious, min=0.0, max=1.0)\r\n        labels = bboxes_tensor.new_zeros(num_proposals, dtype=torch.long)\r\n        label_weights = bboxes_tensor.new_zeros(num_proposals, dtype=torch.long)\r\n\r\n        if gt_labels_3d is not None:  # default label is -1\r\n            labels += self.num_classes\r\n\r\n        # both pos and neg have classification loss, only pos has regression and iou loss\r\n        if len(pos_inds) > 0:\r\n            pos_bbox_targets = self.bbox_coder.encode(sampling_result.pos_gt_bboxes)\r\n\r\n            bbox_targets[pos_inds, :] = pos_bbox_targets\r\n            bbox_weights[pos_inds, :] = 1.0\r\n\r\n            if gt_labels_3d is None:\r\n                labels[pos_inds] = 1\r\n            else:\r\n                labels[pos_inds] = gt_labels_3d[sampling_result.pos_assigned_gt_inds]\r\n            if self.train_cfg.pos_weight <= 0:\r\n                label_weights[pos_inds] = 1.0\r\n            else:\r\n                label_weights[pos_inds] = self.train_cfg.pos_weight\r\n\r\n        if len(neg_inds) > 0:\r\n            label_weights[neg_inds] = 1.0\r\n\r\n        matched_ious = torch.ones_like(ious) * -1\r\n        matched_ious[pos_inds] = ious[pos_inds]\r\n\r\n        return labels[None], label_weights[None], bbox_targets[None], bbox_weights[None], ious[None], pos_num_layers[None], matched_ious[None]\r\n\r\n    @force_fp32(apply_to=('preds_dicts'))\r\n    def loss(self, gt_bboxes_3d, gt_labels_3d, gt_bboxes, gt_labels, gt_pts_centers_view, gt_img_centers_view, gt_bboxes_cam_view, gt_visible_3d, gt_bboxes_lidar_view, img_metas, preds_dicts, **kwargs):\r\n        \"\"\"Loss function for CenterHead.\r\n\r\n        Args:\r\n            **The followings are in the same order of \"gt_bboxes_3d\" :**\r\n            gt_bboxes_3d (list[:obj:`LiDARInstance3DBoxes`]): Ground\r\n                truth gt boxes.\r\n            gt_labels_3d (list[torch.Tensor]): Labels of boxes.\r\n            gt_visible_3d (list[torch.Tensor]): visibility of LiDAR boxes for camera\r\n\r\n            **The followings are in the same order of \"gt_bboxes\":**\r\n            gt_bboxes (list[torch.Tensor]): Ground truth of projected 2d boxes.\r\n            (one LiDAR box may be projected to zero/one/two camera views, so \"gt_bboxes\" has different number with \"gt_bboxes_3d\")\r\n            gt_labels (list[torch.Tensor]): Labels and camera view ids of projected 2d boxes.\r\n            gt_pts_centers_view (list[torch.Tensor]): 3D center of each boxes in the LiDAR coordinate\r\n            gt_img_centers_view (list[torch.Tensor]): 3D center of each boxes in the corresponding camera coordinate\r\n            gt_bboxes_cam_view (list[:obj:`CameraInstance3DBoxes`]): ground truth boxes in the corresponding camera coordinate\r\n            gt_bboxes_lidar_view (list[:obj:`LiDARInstance3DBoxes`]): ground truth boxes in the LiDAR coordinate\r\n\r\n            preds_dicts (list[list[dict]]): Output of forward function.\r\n\r\n        Returns:\r\n            dict[str:torch.Tensor]: Loss of heatmap and bbox of each task.\r\n        \"\"\"\r\n        if self.initialize_by_heatmap:\r\n            if self.view_transform:\r\n                labels, label_weights, bbox_targets, bbox_weights, ious, num_pos_layer, matched_ious, heatmap, \\\r\n                labels_2d, label_weights_2d, bbox_targets_2d, bbox_weights_2d, ious_2d, num_pos_layer_2d, \\\r\n                matched_ious_2d, heatmap_2d, labels_view, label_weights_view, bbox_targets_view, bbox_weights_view, ious_view, \\\r\n                num_pos_layer_view, matched_ious_view = self.get_targets(gt_bboxes_3d, gt_labels_3d, gt_bboxes, gt_labels, gt_img_centers_view, gt_bboxes_cam_view, gt_visible_3d, gt_bboxes_lidar_view, preds_dicts[0], img_metas)\r\n            else:\r\n                labels, label_weights, bbox_targets, bbox_weights, ious, num_pos_layer, matched_ious, heatmap, \\\r\n                labels_2d, label_weights_2d, bbox_targets_2d, bbox_weights_2d, ious_2d, num_pos_layer_2d, \\\r\n                matched_ious_2d, heatmap_2d = self.get_targets(gt_bboxes_3d, gt_labels_3d, gt_bboxes, gt_labels, gt_img_centers_view, gt_bboxes_cam_view, gt_visible_3d, gt_bboxes_lidar_view, preds_dicts[0], img_metas)\r\n        else:\r\n            if self.view_transform:\r\n                labels, label_weights, bbox_targets, bbox_weights, ious, num_pos_layer, matched_ious, \\\r\n                labels_2d, label_weights_2d, bbox_targets_2d, bbox_weights_2d, ious_2d, num_pos_layer_2d, \\\r\n                matched_ious_2d, labels_view, label_weights_view, bbox_targets_view, bbox_weights_view, ious_view, \\\r\n                num_pos_layer_view, matched_ious_view = self.get_targets(gt_bboxes_3d, gt_labels_3d, gt_bboxes, gt_labels, gt_img_centers_view, gt_bboxes_cam_view, gt_visible_3d, preds_dicts[0], img_metas)\r\n            else:\r\n                labels, label_weights, bbox_targets, bbox_weights, ious, num_pos_layer, matched_ious, \\\r\n                labels_2d, label_weights_2d, bbox_targets_2d, bbox_weights_2d, ious_2d, num_pos_layer_2d, matched_ious_2d = \\\r\n                    self.get_targets(gt_bboxes_3d, gt_labels_3d, gt_bboxes, gt_labels, gt_img_centers_view, gt_bboxes_cam_view, gt_visible_3d, preds_dicts[0], img_metas)        # if hasattr(self, 'on_the_image_mask'):\r\n\r\n        preds_dict = preds_dicts[0][0]\r\n        loss_dict = dict()\r\n\r\n        if self.initialize_by_heatmap:\r\n            # compute heatmap loss\r\n            loss_heatmap = self.loss_heatmap(clip_sigmoid(preds_dict['dense_heatmap']), heatmap, avg_factor=max(heatmap.eq(1).float().sum().item(), 1))\r\n            if 'valid_shape' in img_metas[0].keys():\r\n                bs = heatmap_2d.shape[0]\r\n                num_view = heatmap_2d.shape[2]\r\n                # heatmap_2d_weight = torch.zeros_like(heatmap_2d)\r\n                heatmaps_2d_weight = []\r\n                img_w, img_h = self.test_cfg['img_scale']\r\n                img_w = img_w // self.out_size_factor_img\r\n                img_h = img_h // self.out_size_factor_img\r\n                for lvl in range(self.level_num):\r\n                    heatmap_2d_weight = torch.zeros(heatmap_2d.shape[0], self.num_classes, self.num_views, img_h, img_w).to(heatmap_2d.device)\r\n                    heatmaps_2d_weight.append(heatmap_2d_weight)\r\n                    img_h = img_h // 2\r\n                    img_w = img_w // 2\r\n\r\n                for sample_idx in range(bs):\r\n                    for view_idx in range(num_view):\r\n                        valid_shape = img_metas[sample_idx]['valid_shape'][view_idx] / self.out_size_factor_img\r\n                        red_width = int(valid_shape[0])\r\n                        red_height = int(valid_shape[1])\r\n                        for lvl in range(self.level_num):\r\n                            heatmaps_2d_weight[lvl][sample_idx, :, view_idx, :red_height, :red_width] = 1\r\n                            red_width = red_width // 2\r\n                            red_height = red_height // 2\r\n\r\n                for lvl in range(self.level_num):\r\n                    heatmaps_2d_weight[lvl] = heatmaps_2d_weight[lvl].view(heatmaps_2d_weight[lvl].shape[0], self.num_classes, self.num_views, heatmaps_2d_weight[lvl].shape[-2]*heatmaps_2d_weight[lvl].shape[-1])\r\n                heatmap_2d_weight = torch.cat(heatmaps_2d_weight, dim=-1)\r\n\r\n                loss_heatmap_2d = self.loss_heatmap_2d(clip_sigmoid(preds_dict['img_dense_heatmap']), heatmap_2d, weight=heatmap_2d_weight, avg_factor=max(heatmap_2d.eq(1).float().sum().item(), 1))\r\n            else:\r\n                loss_heatmap_2d = self.loss_heatmap_2d(clip_sigmoid(preds_dict['img_dense_heatmap']), heatmap_2d, avg_factor=max(heatmap_2d.eq(1).float().sum().item(), 1))\r\n\r\n            loss_dict['loss_heatmap'] = loss_heatmap\r\n            loss_dict['loss_heatmap_2d'] = loss_heatmap_2d\r\n\r\n        # compute loss for each layer\r\n        start = 0\r\n        num_pos_layer = np.sum(num_pos_layer, axis=0)\r\n        num_pos_layer_2d = np.sum(num_pos_layer_2d, axis=0)\r\n        if self.view_transform:\r\n            num_pos_layer_view = np.sum(num_pos_layer_view, axis=0)\r\n\r\n        num_fusion_decoder_layers = self.num_fusion_decoder_layers\r\n\r\n        num_layer = self.num_pts_decoder_layers + num_fusion_decoder_layers\r\n        for idx_layer in range(num_layer):\r\n            layer_num_proposals = self.get_layer_num_proposal(idx_layer)\r\n            if idx_layer < self.num_pts_decoder_layers:\r\n                prefix = f'layer_pts_{idx_layer}'\r\n            else:\r\n                prefix = f'layer_fusion_{idx_layer-self.num_pts_decoder_layers}'\r\n\r\n            layer_labels = labels[..., start:start + layer_num_proposals].reshape(-1)\r\n            layer_label_weights = label_weights[..., start:start + layer_num_proposals].reshape(-1)\r\n            layer_score = preds_dict['heatmap'][..., start:start + layer_num_proposals]\r\n            layer_cls_score = layer_score.permute(0, 2, 1).reshape(-1, self.num_classes)\r\n            layer_loss_cls = self.loss_cls(layer_cls_score, layer_labels, layer_label_weights, avg_factor=max(num_pos_layer[idx_layer], 1))\r\n\r\n            layer_center = preds_dict['center'][..., start:start + layer_num_proposals]\r\n            layer_height = preds_dict['height'][..., start:start + layer_num_proposals]\r\n            layer_rot = preds_dict['rot'][..., start:start + layer_num_proposals]\r\n            layer_dim = preds_dict['dim'][..., start:start + layer_num_proposals]\r\n            preds = torch.cat([layer_center, layer_height, layer_dim, layer_rot], dim=1).permute(0, 2, 1)  # [BS, num_proposals, code_size]\r\n            if 'vel' in preds_dict.keys():\r\n                layer_vel = preds_dict['vel'][..., start:start + layer_num_proposals]\r\n                preds = torch.cat([layer_center, layer_height, layer_dim, layer_rot, layer_vel], dim=1).permute(0, 2, 1)  # [BS, num_proposals, code_size]\r\n            code_weights = self.train_cfg.get('code_weights', None)\r\n            layer_bbox_weights = bbox_weights[:, start:start + layer_num_proposals, :]\r\n            layer_reg_weights = layer_bbox_weights * layer_bbox_weights.new_tensor(code_weights)\r\n            layer_bbox_targets = bbox_targets[:, start:start + layer_num_proposals, :]\r\n            layer_loss_bbox = self.loss_bbox(preds, layer_bbox_targets, layer_reg_weights, avg_factor=max(num_pos_layer[idx_layer], 1))\r\n\r\n            layer_match_ious = matched_ious[..., start:start + layer_num_proposals]\r\n            layer_match_ious = torch.sum(layer_match_ious*(layer_match_ious>=0), dim=-1) / torch.sum(layer_match_ious>=0, dim=-1)\r\n            layer_match_ious = torch.mean(layer_match_ious)\r\n            start += layer_num_proposals\r\n\r\n            loss_dict[f'{prefix}_loss_cls'] = layer_loss_cls\r\n            loss_dict[f'{prefix}_loss_bbox'] = layer_loss_bbox\r\n            loss_dict[f'{prefix}_matched_ious'] = layer_match_ious\r\n\r\n        start = 0\r\n        for idx_layer in range(self.num_img_decoder_layers):\r\n            prefix = f'layer_img_{idx_layer}'\r\n            layer_num_proposals = self.num_img_proposals\r\n            layer_labels_2d = labels_2d[..., start:start + layer_num_proposals].reshape(-1)\r\n            layer_label_weights_2d = label_weights_2d[..., start:start + layer_num_proposals].reshape(-1)\r\n            layer_score_2d = preds_dict['cls'][..., start:start + layer_num_proposals]\r\n            layer_cls_score_2d = layer_score_2d.permute(0, 2, 1).reshape(-1, self.num_classes)\r\n            layer_loss_cls_2d = self.loss_cls(layer_cls_score_2d, layer_labels_2d, layer_label_weights_2d, avg_factor=max(num_pos_layer_2d[idx_layer], 1))\r\n            preds_2d_center = preds_dict['center_2d'][..., start:start + layer_num_proposals]  # [bs, 2, num_proposal]\r\n            preds_2d_depth = preds_dict['depth_2d'][..., start:start + layer_num_proposals]  # [bs, 1, num_proposal]\r\n            preds_2d_dim = preds_dict['dim_2d'][..., start:start + layer_num_proposals]  # [bs, 3, num_proposal]\r\n            preds_2d_rot = preds_dict['rot_2d'][..., start:start + layer_num_proposals]  # [bs, 2, num_proposal]\r\n            preds_2d_vel = preds_dict['vel_2d'][..., start:start + layer_num_proposals]  # [bs, 2, num_proposal]\r\n            preds_2d = torch.cat([preds_2d_center, preds_2d_depth[:, :1], preds_2d_dim, preds_2d_rot, preds_2d_vel], dim=1).permute(0, 2, 1)  # [bs, num_proposal, 10]\r\n            layer_bbox_targets_2d = bbox_targets_2d[:, start:start + layer_num_proposals, :preds_2d.shape[2]]\r\n            layer_reg_weights_2d = bbox_weights_2d[:, start:start + layer_num_proposals, :preds_2d.shape[2]]\r\n            code_weights = self.train_cfg.get('img_code_weights', None)\r\n            layer_reg_weights_2d = layer_reg_weights_2d * layer_reg_weights_2d.new_tensor(code_weights)\r\n            layer_loss_center_2d = self.loss_center_2d(preds_2d[...,:2], layer_bbox_targets_2d[...,:2], layer_reg_weights_2d[...,:2], avg_factor=max(num_pos_layer_2d[idx_layer], 1))\r\n\r\n            layer_loss_depth_2d = self.loss_bbox(preds_2d[...,2:3], layer_bbox_targets_2d[...,2:3], layer_reg_weights_2d[...,2:3], avg_factor=max(num_pos_layer_2d[idx_layer], 1))\r\n            layer_loss_dim_2d = self.loss_bbox(preds_2d[...,3:6], layer_bbox_targets_2d[...,3:6], layer_reg_weights_2d[...,3:6], avg_factor=max(num_pos_layer_2d[idx_layer], 1))\r\n            layer_loss_rot_2d = self.loss_bbox(preds_2d[...,6:8], layer_bbox_targets_2d[...,6:8], layer_reg_weights_2d[...,6:8], avg_factor=max(num_pos_layer_2d[idx_layer], 1))\r\n            layer_match_ious_2d = matched_ious_2d[..., start:start + layer_num_proposals]\r\n            layer_match_ious_2d = torch.sum(layer_match_ious_2d*(layer_match_ious_2d>=0), dim=-1) / (torch.sum(layer_match_ious_2d>=0, dim=-1) + 1e-2)\r\n            layer_match_ious_2d = torch.mean(layer_match_ious_2d)\r\n            start += layer_num_proposals\r\n            loss_dict[f'{prefix}_loss_cls_2d'] = layer_loss_cls_2d\r\n            loss_dict[f'{prefix}_loss_center_2d'] = layer_loss_center_2d\r\n            loss_dict[f'{prefix}_loss_depth_2d'] = layer_loss_depth_2d\r\n            loss_dict[f'{prefix}_loss_dim_2d'] = layer_loss_dim_2d\r\n            loss_dict[f'{prefix}_loss_rot_2d'] = layer_loss_rot_2d\r\n            if preds_2d.shape[-1] > 8:\r\n                layer_loss_vel_2d = self.loss_bbox(preds_2d[...,8:10], layer_bbox_targets_2d[...,8:10], layer_reg_weights_2d[...,8:10], avg_factor=max(num_pos_layer_2d[idx_layer], 1))\r\n                loss_dict[f'{prefix}_loss_vel_2d'] = layer_loss_vel_2d\r\n            else:\r\n                layer_loss_vel_2d = 0\r\n            loss_dict[f'{prefix}_matched_ious_2d'] = layer_match_ious_2d\r\n            loss_dict[f'{prefix}_reg_bbox_2d'] = (layer_loss_center_2d+layer_loss_depth_2d+layer_loss_dim_2d+layer_loss_rot_2d+layer_loss_vel_2d).detach()\r\n        if self.view_transform:\r\n            layer_labels_view = labels_view.reshape(-1)\r\n            layer_label_weights_view = label_weights_view.reshape(-1)\r\n            layer_cls_score = preds_dict['heatmap_view'].permute(0, 2, 1).reshape(-1, self.num_classes)\r\n            layer_loss_cls_view = self.loss_cls(\r\n                layer_cls_score, layer_labels_view, layer_label_weights_view, avg_factor=max(num_pos_layer_view[0], 1)\r\n            )\r\n            layer_center_view = preds_dict['center_view']\r\n            layer_height_view = preds_dict['height_view']\r\n            layer_rot_view = preds_dict['rot_view']\r\n            layer_dim_view = preds_dict['dim_view']\r\n            preds_view = torch.cat([layer_center_view, layer_height_view, layer_dim_view, layer_rot_view],\r\n                                   dim=1).permute(0, 2, 1)  # [BS, num_proposals, code_size]\r\n            if 'vel' in preds_dict.keys():\r\n                layer_vel_view = preds_dict['vel_view']\r\n                preds_view = torch.cat([layer_center_view, layer_height_view, layer_dim_view, layer_rot_view, layer_vel_view],\r\n                                  dim=1).permute(0, 2, 1)  # [BS, num_proposals, code_size]\r\n            code_weights = self.train_cfg.get('code_weights', None)\r\n            layer_reg_weights_view = bbox_weights_view * bbox_weights_view.new_tensor(code_weights)\r\n            layer_loss_bbox_view = self.loss_bbox(preds_view, bbox_targets_view, layer_reg_weights_view, avg_factor=max(num_pos_layer_view[0], 1))\r\n\r\n            layer_match_ious_view = matched_ious_view\r\n            layer_match_ious_view = torch.sum(layer_match_ious_view * (layer_match_ious_view >= 0), dim=-1) / torch.sum(\r\n                layer_match_ious_view >= 0, dim=-1)\r\n            layer_match_ious_view = torch.mean(layer_match_ious_view)\r\n            loss_dict['view_loss_cls'] = layer_loss_cls_view\r\n\r\n            loss_dict['view_loss_bbox'] = layer_loss_bbox_view\r\n            loss_dict['view_matched_ious'] = layer_match_ious_view\r\n\r\n        return loss_dict\r\n\r\n    def get_bboxes(self, preds_dicts, img_metas, img=None, rescale=False, for_roi=False):\r\n        \"\"\"Generate bboxes from bbox head predictions.\r\n\r\n        Args:\r\n            preds_dicts (tuple[list[dict]]): Prediction results.\r\n\r\n        Returns:\r\n            list[list[dict]]: Decoded bbox, scores and labels for each layer & each batch\r\n        \"\"\"\r\n        rets = []\r\n        for id, preds_dict in enumerate(preds_dicts):\r\n            layer_num_proposal = self.num_proposals + self.num_img_proposals\r\n            batch_size = preds_dict[0]['heatmap'].shape[0]\r\n\r\n            batch_score_raw = preds_dict[0]['heatmap'][..., -layer_num_proposal:].sigmoid()\r\n\r\n            one_hot = F.one_hot(self.query_labels, num_classes=self.num_classes).permute(0, 2, 1)\r\n            query_heatmap_score = preds_dict[0]['query_heatmap_score'] * one_hot\r\n            one_hot_img = F.one_hot(self.img_query_label, num_classes=self.num_classes).permute(0, 2, 1)\r\n            img_query_label_decoder = torch.max(preds_dict[0]['cls'], dim=1)[1]\r\n            one_hot_img_decoder = F.one_hot(img_query_label_decoder, num_classes=self.num_classes).permute(0, 2, 1)\r\n            img_query_heatmap_score = preds_dict[0]['img_query_heatmap_score'] * one_hot_img * one_hot_img_decoder * 0.5\r\n            query_heatmap_score = torch.cat([query_heatmap_score, img_query_heatmap_score], dim=2)\r\n\r\n\r\n            batch_score = batch_score_raw * query_heatmap_score\r\n\r\n            batch_center = preds_dict[0]['center'][..., -layer_num_proposal:]\r\n            batch_height = preds_dict[0]['height'][..., -layer_num_proposal:]\r\n            batch_dim = preds_dict[0]['dim'][..., -layer_num_proposal:]\r\n            batch_rot = preds_dict[0]['rot'][..., -layer_num_proposal:]\r\n\r\n            batch_vel = None\r\n            if 'vel' in preds_dict[0]:\r\n                batch_vel = preds_dict[0]['vel'][..., -layer_num_proposal:]\r\n\r\n            temp = self.bbox_coder.decode(batch_score, batch_rot, batch_dim, batch_center, batch_height, batch_vel, filter=True)\r\n\r\n            if self.test_cfg['dataset'] == 'nuScenes':\r\n                self.tasks = [\r\n                    dict(num_class=1, class_names=['car'], indices=[0], radius=0.35),\r\n                    dict(num_class=1, class_names=['truck'], indices=[1], radius=0.35),\r\n                    dict(num_class=1, class_names=['construction_vehicle'], indices=[2], radius=0.35),\r\n                    dict(num_class=1, class_names=['bus'], indices=[3], radius=0.35),\r\n                    dict(num_class=1, class_names=['trailer'], indices=[4], radius=0.35),\r\n                    dict(num_class=1, class_names=['barrier'], indices=[5], radius=0.175),\r\n                    dict(num_class=1, class_names=['motorcycle'], indices=[6], radius=0.1),\r\n                    dict(num_class=1, class_names=['bicycle'], indices=[7], radius=-1),\r\n\r\n                    dict(num_class=1, class_names=['pedestrian'], indices=[8], radius=0.1),\r\n                    dict(num_class=1, class_names=['traffic_cone'], indices=[9], radius=0.1),\r\n                ]\r\n\r\n                # self.tasks = [\r\n                #     dict(num_class=8, class_names=[], indices=[0, 1, 2, 3, 4, 5, 6, 7], radius=-1),\r\n                #     dict(num_class=1, class_names=['pedestrian'], indices=[8], radius=0.175),\r\n                #     dict(num_class=1, class_names=['traffic_cone'], indices=[9], radius=0.175),\r\n                # ]\r\n            elif self.test_cfg['dataset'] == 'Waymo':\r\n                self.tasks = [\r\n                    dict(num_class=1, class_names=['Car'], indices=[0], radius=0.7),\r\n                    dict(num_class=1, class_names=['Pedestrian'], indices=[1], radius=0.7),\r\n                    dict(num_class=1, class_names=['Cyclist'], indices=[2], radius=0.7),\r\n                ]\r\n\r\n            ret_layer = []\r\n            for i in range(batch_size):\r\n                boxes3d = temp[i]['bboxes']\r\n                scores = temp[i]['scores']\r\n                labels = temp[i]['labels']\r\n\r\n                ## adopt circle nms for different categories\r\n                if self.test_cfg['nms_type'] != None:\r\n                    keep_mask = torch.zeros_like(scores)\r\n                    for task in self.tasks:\r\n                        task_mask = torch.zeros_like(scores)\r\n                        for cls_idx in task['indices']:\r\n                            task_mask += labels == cls_idx\r\n                        task_mask = task_mask.bool()\r\n                        if task['radius'] > 0 and task_mask.sum() > 0:\r\n                            if self.test_cfg['nms_type'] == 'circle':\r\n                                boxes_for_nms = torch.cat([boxes3d[task_mask][:, :2], scores[:, None][task_mask]], dim=1)\r\n                                task_keep_indices = torch.tensor(\r\n                                    circle_nms(\r\n                                        boxes_for_nms.detach().cpu().numpy(),\r\n                                        task['radius'],\r\n                                        # 5,\r\n                                        post_max_size=500\r\n                                    )\r\n                                )\r\n                            else:\r\n                                boxes_for_nms = xywhr2xyxyr(img_metas[i]['box_type_3d'](boxes3d[task_mask][:, :7], 7).bev)\r\n                                top_scores = scores[task_mask]\r\n\r\n                                task_keep_indices = nms_gpu(\r\n                                    boxes_for_nms,\r\n                                    top_scores,\r\n                                    thresh=task['radius'],\r\n                                    # pre_maxsize=self.test_cfg['pre_maxsize'],\r\n                                    # post_max_size=self.test_cfg['post_maxsize'],\r\n                                )\r\n                        else:\r\n                            task_keep_indices = torch.arange(task_mask.sum())\r\n                        if task_keep_indices.shape[0] != 0:\r\n                            keep_indices = torch.where(task_mask != 0)[0][task_keep_indices]\r\n                            keep_mask[keep_indices] = 1\r\n                    keep_mask = keep_mask.bool()\r\n                    ret = dict(bboxes=boxes3d[keep_mask], scores=scores[keep_mask], labels=labels[keep_mask])\r\n                else:  # no nms\r\n                    ret = dict(bboxes=boxes3d, scores=scores, labels=labels)\r\n                ret_layer.append(ret)\r\n            rets.append(ret_layer)\r\n        assert len(rets) == 1\r\n        assert len(rets[0]) == 1\r\n\r\n        res = [[\r\n            img_metas[0]['box_type_3d'](rets[0][0]['bboxes'], box_dim=rets[0][0]['bboxes'].shape[-1]),\r\n            rets[0][0]['scores'],\r\n            rets[0][0]['labels'].int()\r\n        ]]\r\n        return res\r\n\r\n    def get_layer_num_proposal(self, idx_layer):\r\n        if idx_layer >= self.num_pts_decoder_layers:\r\n            layer_num_proposal = self.num_proposals + self.num_img_proposals\r\n        else:\r\n            layer_num_proposal = self.num_proposals\r\n\r\n        return layer_num_proposal"
  },
  {
    "path": "mmdet3d/models/dense_heads/ssd_3d_head.py",
    "content": "import torch\nfrom mmcv.ops.nms import batched_nms\nfrom mmcv.runner import force_fp32\nfrom torch.nn import functional as F\n\nfrom mmdet3d.core.bbox.structures import (DepthInstance3DBoxes,\n                                          LiDARInstance3DBoxes,\n                                          rotation_3d_in_axis)\nfrom mmdet3d.models.builder import build_loss\nfrom mmdet.core import multi_apply\nfrom mmdet.models import HEADS\nfrom .vote_head import VoteHead\n\n\n@HEADS.register_module()\nclass SSD3DHead(VoteHead):\n    r\"\"\"Bbox head of `3DSSD <https://arxiv.org/abs/2002.10187>`_.\n\n    Args:\n        num_classes (int): The number of class.\n        bbox_coder (:obj:`BaseBBoxCoder`): Bbox coder for encoding and\n            decoding boxes.\n        in_channels (int): The number of input feature channel.\n        train_cfg (dict): Config for training.\n        test_cfg (dict): Config for testing.\n        vote_module_cfg (dict): Config of VoteModule for point-wise votes.\n        vote_aggregation_cfg (dict): Config of vote aggregation layer.\n        pred_layer_cfg (dict): Config of classfication and regression\n            prediction layers.\n        conv_cfg (dict): Config of convolution in prediction layer.\n        norm_cfg (dict): Config of BN in prediction layer.\n        act_cfg (dict): Config of activation in prediction layer.\n        objectness_loss (dict): Config of objectness loss.\n        center_loss (dict): Config of center loss.\n        dir_class_loss (dict): Config of direction classification loss.\n        dir_res_loss (dict): Config of direction residual regression loss.\n        size_res_loss (dict): Config of size residual regression loss.\n        corner_loss (dict): Config of bbox corners regression loss.\n        vote_loss (dict): Config of candidate points regression loss.\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 bbox_coder,\n                 in_channels=256,\n                 train_cfg=None,\n                 test_cfg=None,\n                 vote_module_cfg=None,\n                 vote_aggregation_cfg=None,\n                 pred_layer_cfg=None,\n                 conv_cfg=dict(type='Conv1d'),\n                 norm_cfg=dict(type='BN1d'),\n                 act_cfg=dict(type='ReLU'),\n                 objectness_loss=None,\n                 center_loss=None,\n                 dir_class_loss=None,\n                 dir_res_loss=None,\n                 size_res_loss=None,\n                 corner_loss=None,\n                 vote_loss=None):\n        super(SSD3DHead, self).__init__(\n            num_classes,\n            bbox_coder,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            vote_module_cfg=vote_module_cfg,\n            vote_aggregation_cfg=vote_aggregation_cfg,\n            pred_layer_cfg=pred_layer_cfg,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg,\n            objectness_loss=objectness_loss,\n            center_loss=center_loss,\n            dir_class_loss=dir_class_loss,\n            dir_res_loss=dir_res_loss,\n            size_class_loss=None,\n            size_res_loss=size_res_loss,\n            semantic_loss=None)\n\n        self.corner_loss = build_loss(corner_loss)\n        self.vote_loss = build_loss(vote_loss)\n        self.num_candidates = vote_module_cfg['num_points']\n\n    def _get_cls_out_channels(self):\n        \"\"\"Return the channel number of classification outputs.\"\"\"\n        # Class numbers (k) + objectness (1)\n        return self.num_classes\n\n    def _get_reg_out_channels(self):\n        \"\"\"Return the channel number of regression outputs.\"\"\"\n        # Bbox classification and regression\n        # (center residual (3), size regression (3)\n        # heading class+residual (num_dir_bins*2)),\n        return 3 + 3 + self.num_dir_bins * 2\n\n    def _extract_input(self, feat_dict):\n        \"\"\"Extract inputs from features dictionary.\n\n        Args:\n            feat_dict (dict): Feature dict from backbone.\n\n        Returns:\n            torch.Tensor: Coordinates of input points.\n            torch.Tensor: Features of input points.\n            torch.Tensor: Indices of input points.\n        \"\"\"\n        seed_points = feat_dict['sa_xyz'][-1]\n        seed_features = feat_dict['sa_features'][-1]\n        seed_indices = feat_dict['sa_indices'][-1]\n\n        return seed_points, seed_features, seed_indices\n\n    @force_fp32(apply_to=('bbox_preds', ))\n    def loss(self,\n             bbox_preds,\n             points,\n             gt_bboxes_3d,\n             gt_labels_3d,\n             pts_semantic_mask=None,\n             pts_instance_mask=None,\n             img_metas=None,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute loss.\n\n        Args:\n            bbox_preds (dict): Predictions from forward of SSD3DHead.\n            points (list[torch.Tensor]): Input points.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \\\n                bboxes of each sample.\n            gt_labels_3d (list[torch.Tensor]): Labels of each sample.\n            pts_semantic_mask (None | list[torch.Tensor]): Point-wise\n                semantic mask.\n            pts_instance_mask (None | list[torch.Tensor]): Point-wise\n                instance mask.\n            img_metas (list[dict]): Contain pcd and img's meta info.\n            gt_bboxes_ignore (None | list[torch.Tensor]): Specify\n                which bounding.\n\n        Returns:\n            dict: Losses of 3DSSD.\n        \"\"\"\n        targets = self.get_targets(points, gt_bboxes_3d, gt_labels_3d,\n                                   pts_semantic_mask, pts_instance_mask,\n                                   bbox_preds)\n        (vote_targets, center_targets, size_res_targets, dir_class_targets,\n         dir_res_targets, mask_targets, centerness_targets, corner3d_targets,\n         vote_mask, positive_mask, negative_mask, centerness_weights,\n         box_loss_weights, heading_res_loss_weight) = targets\n\n        # calculate centerness loss\n        centerness_loss = self.objectness_loss(\n            bbox_preds['obj_scores'].transpose(2, 1),\n            centerness_targets,\n            weight=centerness_weights)\n\n        # calculate center loss\n        center_loss = self.center_loss(\n            bbox_preds['center_offset'],\n            center_targets,\n            weight=box_loss_weights.unsqueeze(-1))\n\n        # calculate direction class loss\n        dir_class_loss = self.dir_class_loss(\n            bbox_preds['dir_class'].transpose(1, 2),\n            dir_class_targets,\n            weight=box_loss_weights)\n\n        # calculate direction residual loss\n        dir_res_loss = self.dir_res_loss(\n            bbox_preds['dir_res_norm'],\n            dir_res_targets.unsqueeze(-1).repeat(1, 1, self.num_dir_bins),\n            weight=heading_res_loss_weight)\n\n        # calculate size residual loss\n        size_loss = self.size_res_loss(\n            bbox_preds['size'],\n            size_res_targets,\n            weight=box_loss_weights.unsqueeze(-1))\n\n        # calculate corner loss\n        one_hot_dir_class_targets = dir_class_targets.new_zeros(\n            bbox_preds['dir_class'].shape)\n        one_hot_dir_class_targets.scatter_(2, dir_class_targets.unsqueeze(-1),\n                                           1)\n        pred_bbox3d = self.bbox_coder.decode(\n            dict(\n                center=bbox_preds['center'],\n                dir_res=bbox_preds['dir_res'],\n                dir_class=one_hot_dir_class_targets,\n                size=bbox_preds['size']))\n        pred_bbox3d = pred_bbox3d.reshape(-1, pred_bbox3d.shape[-1])\n        pred_bbox3d = img_metas[0]['box_type_3d'](\n            pred_bbox3d.clone(),\n            box_dim=pred_bbox3d.shape[-1],\n            with_yaw=self.bbox_coder.with_rot,\n            origin=(0.5, 0.5, 0.5))\n        pred_corners3d = pred_bbox3d.corners.reshape(-1, 8, 3)\n        corner_loss = self.corner_loss(\n            pred_corners3d,\n            corner3d_targets.reshape(-1, 8, 3),\n            weight=box_loss_weights.view(-1, 1, 1))\n\n        # calculate vote loss\n        vote_loss = self.vote_loss(\n            bbox_preds['vote_offset'].transpose(1, 2),\n            vote_targets,\n            weight=vote_mask.unsqueeze(-1))\n\n        losses = dict(\n            centerness_loss=centerness_loss,\n            center_loss=center_loss,\n            dir_class_loss=dir_class_loss,\n            dir_res_loss=dir_res_loss,\n            size_res_loss=size_loss,\n            corner_loss=corner_loss,\n            vote_loss=vote_loss)\n\n        return losses\n\n    def get_targets(self,\n                    points,\n                    gt_bboxes_3d,\n                    gt_labels_3d,\n                    pts_semantic_mask=None,\n                    pts_instance_mask=None,\n                    bbox_preds=None):\n        \"\"\"Generate targets of ssd3d head.\n\n        Args:\n            points (list[torch.Tensor]): Points of each batch.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \\\n                bboxes of each batch.\n            gt_labels_3d (list[torch.Tensor]): Labels of each batch.\n            pts_semantic_mask (None | list[torch.Tensor]): Point-wise semantic\n                label of each batch.\n            pts_instance_mask (None | list[torch.Tensor]): Point-wise instance\n                label of each batch.\n            bbox_preds (torch.Tensor): Bounding box predictions of ssd3d head.\n\n        Returns:\n            tuple[torch.Tensor]: Targets of ssd3d head.\n        \"\"\"\n        # find empty example\n        for index in range(len(gt_labels_3d)):\n            if len(gt_labels_3d[index]) == 0:\n                fake_box = gt_bboxes_3d[index].tensor.new_zeros(\n                    1, gt_bboxes_3d[index].tensor.shape[-1])\n                gt_bboxes_3d[index] = gt_bboxes_3d[index].new_box(fake_box)\n                gt_labels_3d[index] = gt_labels_3d[index].new_zeros(1)\n\n        if pts_semantic_mask is None:\n            pts_semantic_mask = [None for i in range(len(gt_labels_3d))]\n            pts_instance_mask = [None for i in range(len(gt_labels_3d))]\n\n        aggregated_points = [\n            bbox_preds['aggregated_points'][i]\n            for i in range(len(gt_labels_3d))\n        ]\n\n        seed_points = [\n            bbox_preds['seed_points'][i, :self.num_candidates].detach()\n            for i in range(len(gt_labels_3d))\n        ]\n\n        (vote_targets, center_targets, size_res_targets, dir_class_targets,\n         dir_res_targets, mask_targets, centerness_targets, corner3d_targets,\n         vote_mask, positive_mask, negative_mask) = multi_apply(\n             self.get_targets_single, points, gt_bboxes_3d, gt_labels_3d,\n             pts_semantic_mask, pts_instance_mask, aggregated_points,\n             seed_points)\n\n        center_targets = torch.stack(center_targets)\n        positive_mask = torch.stack(positive_mask)\n        negative_mask = torch.stack(negative_mask)\n        dir_class_targets = torch.stack(dir_class_targets)\n        dir_res_targets = torch.stack(dir_res_targets)\n        size_res_targets = torch.stack(size_res_targets)\n        mask_targets = torch.stack(mask_targets)\n        centerness_targets = torch.stack(centerness_targets).detach()\n        corner3d_targets = torch.stack(corner3d_targets)\n        vote_targets = torch.stack(vote_targets)\n        vote_mask = torch.stack(vote_mask)\n\n        center_targets -= bbox_preds['aggregated_points']\n\n        centerness_weights = (positive_mask +\n                              negative_mask).unsqueeze(-1).repeat(\n                                  1, 1, self.num_classes).float()\n        centerness_weights = centerness_weights / \\\n            (centerness_weights.sum() + 1e-6)\n        vote_mask = vote_mask / (vote_mask.sum() + 1e-6)\n\n        box_loss_weights = positive_mask / (positive_mask.sum() + 1e-6)\n\n        batch_size, proposal_num = dir_class_targets.shape[:2]\n        heading_label_one_hot = dir_class_targets.new_zeros(\n            (batch_size, proposal_num, self.num_dir_bins))\n        heading_label_one_hot.scatter_(2, dir_class_targets.unsqueeze(-1), 1)\n        heading_res_loss_weight = heading_label_one_hot * \\\n            box_loss_weights.unsqueeze(-1)\n\n        return (vote_targets, center_targets, size_res_targets,\n                dir_class_targets, dir_res_targets, mask_targets,\n                centerness_targets, corner3d_targets, vote_mask, positive_mask,\n                negative_mask, centerness_weights, box_loss_weights,\n                heading_res_loss_weight)\n\n    def get_targets_single(self,\n                           points,\n                           gt_bboxes_3d,\n                           gt_labels_3d,\n                           pts_semantic_mask=None,\n                           pts_instance_mask=None,\n                           aggregated_points=None,\n                           seed_points=None):\n        \"\"\"Generate targets of ssd3d head for single batch.\n\n        Args:\n            points (torch.Tensor): Points of each batch.\n            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth \\\n                boxes of each batch.\n            gt_labels_3d (torch.Tensor): Labels of each batch.\n            pts_semantic_mask (None | torch.Tensor): Point-wise semantic\n                label of each batch.\n            pts_instance_mask (None | torch.Tensor): Point-wise instance\n                label of each batch.\n            aggregated_points (torch.Tensor): Aggregated points from\n                candidate points layer.\n            seed_points (torch.Tensor): Seed points of candidate points.\n\n        Returns:\n            tuple[torch.Tensor]: Targets of ssd3d head.\n        \"\"\"\n        assert self.bbox_coder.with_rot or pts_semantic_mask is not None\n        gt_bboxes_3d = gt_bboxes_3d.to(points.device)\n        valid_gt = gt_labels_3d != -1\n        gt_bboxes_3d = gt_bboxes_3d[valid_gt]\n        gt_labels_3d = gt_labels_3d[valid_gt]\n\n        # Generate fake GT for empty scene\n        if valid_gt.sum() == 0:\n            vote_targets = points.new_zeros(self.num_candidates, 3)\n            center_targets = points.new_zeros(self.num_candidates, 3)\n            size_res_targets = points.new_zeros(self.num_candidates, 3)\n            dir_class_targets = points.new_zeros(\n                self.num_candidates, dtype=torch.int64)\n            dir_res_targets = points.new_zeros(self.num_candidates)\n            mask_targets = points.new_zeros(\n                self.num_candidates, dtype=torch.int64)\n            centerness_targets = points.new_zeros(self.num_candidates,\n                                                  self.num_classes)\n            corner3d_targets = points.new_zeros(self.num_candidates, 8, 3)\n            vote_mask = points.new_zeros(self.num_candidates, dtype=torch.bool)\n            positive_mask = points.new_zeros(\n                self.num_candidates, dtype=torch.bool)\n            negative_mask = points.new_ones(\n                self.num_candidates, dtype=torch.bool)\n            return (vote_targets, center_targets, size_res_targets,\n                    dir_class_targets, dir_res_targets, mask_targets,\n                    centerness_targets, corner3d_targets, vote_mask,\n                    positive_mask, negative_mask)\n\n        gt_corner3d = gt_bboxes_3d.corners\n\n        (center_targets, size_targets, dir_class_targets,\n         dir_res_targets) = self.bbox_coder.encode(gt_bboxes_3d, gt_labels_3d)\n\n        points_mask, assignment = self._assign_targets_by_points_inside(\n            gt_bboxes_3d, aggregated_points)\n\n        center_targets = center_targets[assignment]\n        size_res_targets = size_targets[assignment]\n        mask_targets = gt_labels_3d[assignment]\n        dir_class_targets = dir_class_targets[assignment]\n        dir_res_targets = dir_res_targets[assignment]\n        corner3d_targets = gt_corner3d[assignment]\n\n        top_center_targets = center_targets.clone()\n        top_center_targets[:, 2] += size_res_targets[:, 2]\n        dist = torch.norm(aggregated_points - top_center_targets, dim=1)\n        dist_mask = dist < self.train_cfg.pos_distance_thr\n        positive_mask = (points_mask.max(1)[0] > 0) * dist_mask\n        negative_mask = (points_mask.max(1)[0] == 0)\n\n        # Centerness loss targets\n        canonical_xyz = aggregated_points - center_targets\n        if self.bbox_coder.with_rot:\n            # TODO: Align points rotation implementation of\n            # LiDARInstance3DBoxes and DepthInstance3DBoxes\n            canonical_xyz = rotation_3d_in_axis(\n                canonical_xyz.unsqueeze(0).transpose(0, 1),\n                -gt_bboxes_3d.yaw[assignment], 2).squeeze(1)\n        distance_front = torch.clamp(\n            size_res_targets[:, 0] - canonical_xyz[:, 0], min=0)\n        distance_back = torch.clamp(\n            size_res_targets[:, 0] + canonical_xyz[:, 0], min=0)\n        distance_left = torch.clamp(\n            size_res_targets[:, 1] - canonical_xyz[:, 1], min=0)\n        distance_right = torch.clamp(\n            size_res_targets[:, 1] + canonical_xyz[:, 1], min=0)\n        distance_top = torch.clamp(\n            size_res_targets[:, 2] - canonical_xyz[:, 2], min=0)\n        distance_bottom = torch.clamp(\n            size_res_targets[:, 2] + canonical_xyz[:, 2], min=0)\n\n        centerness_l = torch.min(distance_front, distance_back) / torch.max(\n            distance_front, distance_back)\n        centerness_w = torch.min(distance_left, distance_right) / torch.max(\n            distance_left, distance_right)\n        centerness_h = torch.min(distance_bottom, distance_top) / torch.max(\n            distance_bottom, distance_top)\n        centerness_targets = torch.clamp(\n            centerness_l * centerness_w * centerness_h, min=0)\n        centerness_targets = centerness_targets.pow(1 / 3.0)\n        centerness_targets = torch.clamp(centerness_targets, min=0, max=1)\n\n        proposal_num = centerness_targets.shape[0]\n        one_hot_centerness_targets = centerness_targets.new_zeros(\n            (proposal_num, self.num_classes))\n        one_hot_centerness_targets.scatter_(1, mask_targets.unsqueeze(-1), 1)\n        centerness_targets = centerness_targets.unsqueeze(\n            1) * one_hot_centerness_targets\n\n        # Vote loss targets\n        enlarged_gt_bboxes_3d = gt_bboxes_3d.enlarged_box(\n            self.train_cfg.expand_dims_length)\n        enlarged_gt_bboxes_3d.tensor[:, 2] -= self.train_cfg.expand_dims_length\n        vote_mask, vote_assignment = self._assign_targets_by_points_inside(\n            enlarged_gt_bboxes_3d, seed_points)\n\n        vote_targets = gt_bboxes_3d.gravity_center\n        vote_targets = vote_targets[vote_assignment] - seed_points\n        vote_mask = vote_mask.max(1)[0] > 0\n\n        return (vote_targets, center_targets, size_res_targets,\n                dir_class_targets, dir_res_targets, mask_targets,\n                centerness_targets, corner3d_targets, vote_mask, positive_mask,\n                negative_mask)\n\n    def get_bboxes(self, points, bbox_preds, input_metas, rescale=False):\n        \"\"\"Generate bboxes from sdd3d head predictions.\n\n        Args:\n            points (torch.Tensor): Input points.\n            bbox_preds (dict): Predictions from sdd3d head.\n            input_metas (list[dict]): Point cloud and image's meta info.\n            rescale (bool): Whether to rescale bboxes.\n\n        Returns:\n            list[tuple[torch.Tensor]]: Bounding boxes, scores and labels.\n        \"\"\"\n        # decode boxes\n        sem_scores = F.sigmoid(bbox_preds['obj_scores']).transpose(1, 2)\n        obj_scores = sem_scores.max(-1)[0]\n        bbox3d = self.bbox_coder.decode(bbox_preds)\n\n        batch_size = bbox3d.shape[0]\n        results = list()\n\n        for b in range(batch_size):\n            bbox_selected, score_selected, labels = self.multiclass_nms_single(\n                obj_scores[b], sem_scores[b], bbox3d[b], points[b, ..., :3],\n                input_metas[b])\n            bbox = input_metas[b]['box_type_3d'](\n                bbox_selected.clone(),\n                box_dim=bbox_selected.shape[-1],\n                with_yaw=self.bbox_coder.with_rot)\n            results.append((bbox, score_selected, labels))\n\n        return results\n\n    def multiclass_nms_single(self, obj_scores, sem_scores, bbox, points,\n                              input_meta):\n        \"\"\"Multi-class nms in single batch.\n\n        Args:\n            obj_scores (torch.Tensor): Objectness score of bounding boxes.\n            sem_scores (torch.Tensor): semantic class score of bounding boxes.\n            bbox (torch.Tensor): Predicted bounding boxes.\n            points (torch.Tensor): Input points.\n            input_meta (dict): Point cloud and image's meta info.\n\n        Returns:\n            tuple[torch.Tensor]: Bounding boxes, scores and labels.\n        \"\"\"\n        num_bbox = bbox.shape[0]\n        bbox = input_meta['box_type_3d'](\n            bbox.clone(),\n            box_dim=bbox.shape[-1],\n            with_yaw=self.bbox_coder.with_rot,\n            origin=(0.5, 0.5, 1.0))\n\n        if isinstance(bbox, LiDARInstance3DBoxes):\n            box_idx = bbox.points_in_boxes(points)\n            box_indices = box_idx.new_zeros([num_bbox + 1])\n            box_idx[box_idx == -1] = num_bbox\n            box_indices.scatter_add_(0, box_idx.long(),\n                                     box_idx.new_ones(box_idx.shape))\n            box_indices = box_indices[:-1]\n            nonempty_box_mask = box_indices >= 0\n        elif isinstance(bbox, DepthInstance3DBoxes):\n            box_indices = bbox.points_in_boxes(points)\n            nonempty_box_mask = box_indices.T.sum(1) >= 0\n        else:\n            raise NotImplementedError('Unsupported bbox type!')\n\n        corner3d = bbox.corners\n        minmax_box3d = corner3d.new(torch.Size((corner3d.shape[0], 6)))\n        minmax_box3d[:, :3] = torch.min(corner3d, dim=1)[0]\n        minmax_box3d[:, 3:] = torch.max(corner3d, dim=1)[0]\n\n        bbox_classes = torch.argmax(sem_scores, -1)\n        nms_selected = batched_nms(\n            minmax_box3d[nonempty_box_mask][:, [0, 1, 3, 4]],\n            obj_scores[nonempty_box_mask], bbox_classes[nonempty_box_mask],\n            self.test_cfg.nms_cfg)[1]\n\n        if nms_selected.shape[0] > self.test_cfg.max_output_num:\n            nms_selected = nms_selected[:self.test_cfg.max_output_num]\n\n        # filter empty boxes and boxes with low score\n        scores_mask = (obj_scores >= self.test_cfg.score_thr)\n        nonempty_box_inds = torch.nonzero(\n            nonempty_box_mask, as_tuple=False).flatten()\n        nonempty_mask = torch.zeros_like(bbox_classes).scatter(\n            0, nonempty_box_inds[nms_selected], 1)\n        selected = (nonempty_mask.bool() & scores_mask.bool())\n\n        if self.test_cfg.per_class_proposal:\n            bbox_selected, score_selected, labels = [], [], []\n            for k in range(sem_scores.shape[-1]):\n                bbox_selected.append(bbox[selected].tensor)\n                score_selected.append(obj_scores[selected])\n                labels.append(\n                    torch.zeros_like(bbox_classes[selected]).fill_(k))\n            bbox_selected = torch.cat(bbox_selected, 0)\n            score_selected = torch.cat(score_selected, 0)\n            labels = torch.cat(labels, 0)\n        else:\n            bbox_selected = bbox[selected].tensor\n            score_selected = obj_scores[selected]\n            labels = bbox_classes[selected]\n\n        return bbox_selected, score_selected, labels\n\n    def _assign_targets_by_points_inside(self, bboxes_3d, points):\n        \"\"\"Compute assignment by checking whether point is inside bbox.\n\n        Args:\n            bboxes_3d (BaseInstance3DBoxes): Instance of bounding boxes.\n            points (torch.Tensor): Points of a batch.\n\n        Returns:\n            tuple[torch.Tensor]: Flags indicating whether each point is\n                inside bbox and the index of box where each point are in.\n        \"\"\"\n        # TODO: align points_in_boxes function in each box_structures\n        num_bbox = bboxes_3d.tensor.shape[0]\n        if isinstance(bboxes_3d, LiDARInstance3DBoxes):\n            assignment = bboxes_3d.points_in_boxes(points).long()\n            points_mask = assignment.new_zeros(\n                [assignment.shape[0], num_bbox + 1])\n            assignment[assignment == -1] = num_bbox\n            points_mask.scatter_(1, assignment.unsqueeze(1), 1)\n            points_mask = points_mask[:, :-1]\n            assignment[assignment == num_bbox] = num_bbox - 1\n        elif isinstance(bboxes_3d, DepthInstance3DBoxes):\n            points_mask = bboxes_3d.points_in_boxes(points)\n            assignment = points_mask.argmax(dim=-1)\n        else:\n            raise NotImplementedError('Unsupported bbox type!')\n\n        return points_mask, assignment\n"
  },
  {
    "path": "mmdet3d/models/dense_heads/train_mixins.py",
    "content": "import numpy as np\nimport torch\n\nfrom mmdet3d.core import limit_period\nfrom mmdet.core import images_to_levels, multi_apply\n\n\nclass AnchorTrainMixin(object):\n    \"\"\"Mixin class for target assigning of dense heads.\"\"\"\n\n    def anchor_target_3d(self,\n                         anchor_list,\n                         gt_bboxes_list,\n                         input_metas,\n                         gt_bboxes_ignore_list=None,\n                         gt_labels_list=None,\n                         label_channels=1,\n                         num_classes=1,\n                         sampling=True):\n        \"\"\"Compute regression and classification targets for anchors.\n\n        Args:\n            anchor_list (list[list]): Multi level anchors of each image.\n            gt_bboxes_list (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                bboxes of each image.\n            input_metas (list[dict]): Meta info of each image.\n            gt_bboxes_ignore_list (None | list): Ignore list of gt bboxes.\n            gt_labels_list (list[torch.Tensor]): Gt labels of batches.\n            label_channels (int): The channel of labels.\n            num_classes (int): The number of classes.\n            sampling (bool): Whether to sample anchors.\n\n        Returns:\n            tuple (list, list, list, list, list, list, int, int):\n                Anchor targets, including labels, label weights,\n                bbox targets, bbox weights, direction targets,\n                direction weights, number of postive anchors and\n                number of negative anchors.\n        \"\"\"\n        num_imgs = len(input_metas)\n        assert len(anchor_list) == num_imgs\n\n        if isinstance(anchor_list[0][0], list):\n            # sizes of anchors are different\n            # anchor number of a single level\n            num_level_anchors = [\n                sum([anchor.size(0) for anchor in anchors])\n                for anchors in anchor_list[0]\n            ]\n            for i in range(num_imgs):\n                anchor_list[i] = anchor_list[i][0]\n        else:\n            # anchor number of multi levels\n            num_level_anchors = [\n                anchors.view(-1, self.box_code_size).size(0)\n                for anchors in anchor_list[0]\n            ]\n            # concat all level anchors and flags to a single tensor\n            for i in range(num_imgs):\n                anchor_list[i] = torch.cat(anchor_list[i])\n\n        # compute targets for each image\n        if gt_bboxes_ignore_list is None:\n            gt_bboxes_ignore_list = [None for _ in range(num_imgs)]\n        if gt_labels_list is None:\n            gt_labels_list = [None for _ in range(num_imgs)]\n\n        (all_labels, all_label_weights, all_bbox_targets, all_bbox_weights,\n         all_dir_targets, all_dir_weights, pos_inds_list,\n         neg_inds_list) = multi_apply(\n             self.anchor_target_3d_single,\n             anchor_list,\n             gt_bboxes_list,\n             gt_bboxes_ignore_list,\n             gt_labels_list,\n             input_metas,\n             label_channels=label_channels,\n             num_classes=num_classes,\n             sampling=sampling)\n\n        # no valid anchors\n        if any([labels is None for labels in all_labels]):\n            return None\n        # sampled anchors of all images\n        num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list])\n        num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list])\n        # split targets to a list w.r.t. multiple levels\n        labels_list = images_to_levels(all_labels, num_level_anchors)\n        label_weights_list = images_to_levels(all_label_weights,\n                                              num_level_anchors)\n        bbox_targets_list = images_to_levels(all_bbox_targets,\n                                             num_level_anchors)\n        bbox_weights_list = images_to_levels(all_bbox_weights,\n                                             num_level_anchors)\n        dir_targets_list = images_to_levels(all_dir_targets, num_level_anchors)\n        dir_weights_list = images_to_levels(all_dir_weights, num_level_anchors)\n        return (labels_list, label_weights_list, bbox_targets_list,\n                bbox_weights_list, dir_targets_list, dir_weights_list,\n                num_total_pos, num_total_neg)\n\n    def anchor_target_3d_single(self,\n                                anchors,\n                                gt_bboxes,\n                                gt_bboxes_ignore,\n                                gt_labels,\n                                input_meta,\n                                label_channels=1,\n                                num_classes=1,\n                                sampling=True):\n        \"\"\"Compute targets of anchors in single batch.\n\n        Args:\n            anchors (torch.Tensor): Concatenated multi-level anchor.\n            gt_bboxes (:obj:`BaseInstance3DBoxes`): Gt bboxes.\n            gt_bboxes_ignore (torch.Tensor): Ignored gt bboxes.\n            gt_labels (torch.Tensor): Gt class labels.\n            input_meta (dict): Meta info of each image.\n            label_channels (int): The channel of labels.\n            num_classes (int): The number of classes.\n            sampling (bool): Whether to sample anchors.\n\n        Returns:\n            tuple[torch.Tensor]: Anchor targets.\n        \"\"\"\n        if isinstance(self.bbox_assigner,\n                      list) and (not isinstance(anchors, list)):\n            feat_size = anchors.size(0) * anchors.size(1) * anchors.size(2)\n            rot_angles = anchors.size(-2)\n            assert len(self.bbox_assigner) == anchors.size(-3)\n            (total_labels, total_label_weights, total_bbox_targets,\n             total_bbox_weights, total_dir_targets, total_dir_weights,\n             total_pos_inds, total_neg_inds) = [], [], [], [], [], [], [], []\n            current_anchor_num = 0\n            for i, assigner in enumerate(self.bbox_assigner):\n                current_anchors = anchors[..., i, :, :].reshape(\n                    -1, self.box_code_size)\n                current_anchor_num += current_anchors.size(0)\n                if self.assign_per_class:\n                    gt_per_cls = (gt_labels == i)\n                    anchor_targets = self.anchor_target_single_assigner(\n                        assigner, current_anchors, gt_bboxes[gt_per_cls, :],\n                        gt_bboxes_ignore, gt_labels[gt_per_cls], input_meta,\n                        num_classes, sampling)\n                else:\n                    anchor_targets = self.anchor_target_single_assigner(\n                        assigner, current_anchors, gt_bboxes, gt_bboxes_ignore,\n                        gt_labels, input_meta, num_classes, sampling)\n\n                (labels, label_weights, bbox_targets, bbox_weights,\n                 dir_targets, dir_weights, pos_inds, neg_inds) = anchor_targets\n                total_labels.append(labels.reshape(feat_size, 1, rot_angles))\n                total_label_weights.append(\n                    label_weights.reshape(feat_size, 1, rot_angles))\n                total_bbox_targets.append(\n                    bbox_targets.reshape(feat_size, 1, rot_angles,\n                                         anchors.size(-1)))\n                total_bbox_weights.append(\n                    bbox_weights.reshape(feat_size, 1, rot_angles,\n                                         anchors.size(-1)))\n                total_dir_targets.append(\n                    dir_targets.reshape(feat_size, 1, rot_angles))\n                total_dir_weights.append(\n                    dir_weights.reshape(feat_size, 1, rot_angles))\n                total_pos_inds.append(pos_inds)\n                total_neg_inds.append(neg_inds)\n\n            total_labels = torch.cat(total_labels, dim=-2).reshape(-1)\n            total_label_weights = torch.cat(\n                total_label_weights, dim=-2).reshape(-1)\n            total_bbox_targets = torch.cat(\n                total_bbox_targets, dim=-3).reshape(-1, anchors.size(-1))\n            total_bbox_weights = torch.cat(\n                total_bbox_weights, dim=-3).reshape(-1, anchors.size(-1))\n            total_dir_targets = torch.cat(\n                total_dir_targets, dim=-2).reshape(-1)\n            total_dir_weights = torch.cat(\n                total_dir_weights, dim=-2).reshape(-1)\n            total_pos_inds = torch.cat(total_pos_inds, dim=0).reshape(-1)\n            total_neg_inds = torch.cat(total_neg_inds, dim=0).reshape(-1)\n            return (total_labels, total_label_weights, total_bbox_targets,\n                    total_bbox_weights, total_dir_targets, total_dir_weights,\n                    total_pos_inds, total_neg_inds)\n        elif isinstance(self.bbox_assigner, list) and isinstance(\n                anchors, list):\n            # class-aware anchors with different feature map sizes\n            assert len(self.bbox_assigner) == len(anchors), \\\n                'The number of bbox assigners and anchors should be the same.'\n            (total_labels, total_label_weights, total_bbox_targets,\n             total_bbox_weights, total_dir_targets, total_dir_weights,\n             total_pos_inds, total_neg_inds) = [], [], [], [], [], [], [], []\n            current_anchor_num = 0\n            for i, assigner in enumerate(self.bbox_assigner):\n                current_anchors = anchors[i]\n                current_anchor_num += current_anchors.size(0)\n                if self.assign_per_class:\n                    gt_per_cls = (gt_labels == i)\n                    anchor_targets = self.anchor_target_single_assigner(\n                        assigner, current_anchors, gt_bboxes[gt_per_cls, :],\n                        gt_bboxes_ignore, gt_labels[gt_per_cls], input_meta,\n                        num_classes, sampling)\n                else:\n                    anchor_targets = self.anchor_target_single_assigner(\n                        assigner, current_anchors, gt_bboxes, gt_bboxes_ignore,\n                        gt_labels, input_meta, num_classes, sampling)\n\n                (labels, label_weights, bbox_targets, bbox_weights,\n                 dir_targets, dir_weights, pos_inds, neg_inds) = anchor_targets\n                total_labels.append(labels)\n                total_label_weights.append(label_weights)\n                total_bbox_targets.append(\n                    bbox_targets.reshape(-1, anchors[i].size(-1)))\n                total_bbox_weights.append(\n                    bbox_weights.reshape(-1, anchors[i].size(-1)))\n                total_dir_targets.append(dir_targets)\n                total_dir_weights.append(dir_weights)\n                total_pos_inds.append(pos_inds)\n                total_neg_inds.append(neg_inds)\n\n            total_labels = torch.cat(total_labels, dim=0)\n            total_label_weights = torch.cat(total_label_weights, dim=0)\n            total_bbox_targets = torch.cat(total_bbox_targets, dim=0)\n            total_bbox_weights = torch.cat(total_bbox_weights, dim=0)\n            total_dir_targets = torch.cat(total_dir_targets, dim=0)\n            total_dir_weights = torch.cat(total_dir_weights, dim=0)\n            total_pos_inds = torch.cat(total_pos_inds, dim=0)\n            total_neg_inds = torch.cat(total_neg_inds, dim=0)\n            return (total_labels, total_label_weights, total_bbox_targets,\n                    total_bbox_weights, total_dir_targets, total_dir_weights,\n                    total_pos_inds, total_neg_inds)\n        else:\n            return self.anchor_target_single_assigner(self.bbox_assigner,\n                                                      anchors, gt_bboxes,\n                                                      gt_bboxes_ignore,\n                                                      gt_labels, input_meta,\n                                                      num_classes, sampling)\n\n    def anchor_target_single_assigner(self,\n                                      bbox_assigner,\n                                      anchors,\n                                      gt_bboxes,\n                                      gt_bboxes_ignore,\n                                      gt_labels,\n                                      input_meta,\n                                      num_classes=1,\n                                      sampling=True):\n        \"\"\"Assign anchors and encode positive anchors.\n\n        Args:\n            bbox_assigner (BaseAssigner): assign positive and negative boxes.\n            anchors (torch.Tensor): Concatenated multi-level anchor.\n            gt_bboxes (:obj:`BaseInstance3DBoxes`): Gt bboxes.\n            gt_bboxes_ignore (torch.Tensor): Ignored gt bboxes.\n            gt_labels (torch.Tensor): Gt class labels.\n            input_meta (dict): Meta info of each image.\n            num_classes (int): The number of classes.\n            sampling (bool): Whether to sample anchors.\n\n        Returns:\n            tuple[torch.Tensor]: Anchor targets.\n        \"\"\"\n        anchors = anchors.reshape(-1, anchors.size(-1))\n        num_valid_anchors = anchors.shape[0]\n        bbox_targets = torch.zeros_like(anchors)\n        bbox_weights = torch.zeros_like(anchors)\n        dir_targets = anchors.new_zeros((anchors.shape[0]), dtype=torch.long)\n        dir_weights = anchors.new_zeros((anchors.shape[0]), dtype=torch.float)\n        labels = anchors.new_zeros(num_valid_anchors, dtype=torch.long)\n        label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float)\n        if len(gt_bboxes) > 0:\n            if not isinstance(gt_bboxes, torch.Tensor):\n                gt_bboxes = gt_bboxes.tensor.to(anchors.device)\n            assign_result = bbox_assigner.assign(anchors, gt_bboxes,\n                                                 gt_bboxes_ignore, gt_labels)\n            sampling_result = self.bbox_sampler.sample(assign_result, anchors,\n                                                       gt_bboxes)\n            pos_inds = sampling_result.pos_inds\n            neg_inds = sampling_result.neg_inds\n        else:\n            pos_inds = torch.nonzero(\n                anchors.new_zeros((anchors.shape[0], ), dtype=torch.bool) > 0,\n                as_tuple=False).squeeze(-1).unique()\n            neg_inds = torch.nonzero(\n                anchors.new_zeros((anchors.shape[0], ), dtype=torch.bool) == 0,\n                as_tuple=False).squeeze(-1).unique()\n\n        if gt_labels is not None:\n            labels += num_classes\n        if len(pos_inds) > 0:\n            pos_bbox_targets = self.bbox_coder.encode(\n                sampling_result.pos_bboxes, sampling_result.pos_gt_bboxes)\n            pos_dir_targets = get_direction_target(\n                sampling_result.pos_bboxes,\n                pos_bbox_targets,\n                self.dir_offset,\n                one_hot=False)\n            bbox_targets[pos_inds, :] = pos_bbox_targets\n            bbox_weights[pos_inds, :] = 1.0\n            dir_targets[pos_inds] = pos_dir_targets\n            dir_weights[pos_inds] = 1.0\n\n            if gt_labels is None:\n                labels[pos_inds] = 1\n            else:\n                labels[pos_inds] = gt_labels[\n                    sampling_result.pos_assigned_gt_inds]\n            if self.train_cfg.pos_weight <= 0:\n                label_weights[pos_inds] = 1.0\n            else:\n                label_weights[pos_inds] = self.train_cfg.pos_weight\n\n        if len(neg_inds) > 0:\n            label_weights[neg_inds] = 1.0\n        return (labels, label_weights, bbox_targets, bbox_weights, dir_targets,\n                dir_weights, pos_inds, neg_inds)\n\n\ndef get_direction_target(anchors,\n                         reg_targets,\n                         dir_offset=0,\n                         num_bins=2,\n                         one_hot=True):\n    \"\"\"Encode direction to 0 ~ num_bins-1.\n\n    Args:\n        anchors (torch.Tensor): Concatenated multi-level anchor.\n        reg_targets (torch.Tensor): Bbox regression targets.\n        dir_offset (int): Direction offset.\n        num_bins (int): Number of bins to divide 2*PI.\n        one_hot (bool): Whether to encode as one hot.\n\n    Returns:\n        torch.Tensor: Encoded direction targets.\n    \"\"\"\n    rot_gt = reg_targets[..., 6] + anchors[..., 6]\n    offset_rot = limit_period(rot_gt - dir_offset, 0, 2 * np.pi)\n    dir_cls_targets = torch.floor(offset_rot / (2 * np.pi / num_bins)).long()\n    dir_cls_targets = torch.clamp(dir_cls_targets, min=0, max=num_bins - 1)\n    if one_hot:\n        dir_targets = torch.zeros(\n            *list(dir_cls_targets.shape),\n            num_bins,\n            dtype=anchors.dtype,\n            device=dir_cls_targets.device)\n        dir_targets.scatter_(dir_cls_targets.unsqueeze(dim=-1).long(), 1.0)\n        dir_cls_targets = dir_targets\n    return dir_cls_targets\n"
  },
  {
    "path": "mmdet3d/models/dense_heads/transfusion_head.py",
    "content": "import copy\nimport numpy as np\nimport torch\nfrom mmcv.cnn import ConvModule, build_conv_layer, kaiming_init\nfrom mmcv.runner import force_fp32\nfrom torch import nn\nimport torch.nn.functional as F\nfrom torch.nn.parameter import Parameter\nfrom torch.nn import Linear\nfrom torch.nn.init import xavier_uniform_, constant_\n\nfrom mmdet3d.core import (circle_nms, draw_heatmap_gaussian, gaussian_radius,\n                          xywhr2xyxyr, limit_period, PseudoSampler)\nfrom mmdet3d.core.bbox.structures import rotation_3d_in_axis\nfrom mmdet3d.core import Box3DMode, LiDARInstance3DBoxes\nfrom mmdet3d.models import builder\nfrom mmdet3d.models.builder import HEADS, build_loss\nfrom mmdet3d.models.utils import clip_sigmoid\nfrom mmdet3d.models.fusion_layers import apply_3d_transformation\nfrom mmdet3d.ops.iou3d.iou3d_utils import nms_gpu\nfrom mmdet.core import build_bbox_coder, multi_apply, build_assigner, build_sampler, AssignResult\nfrom mmdet3d.ops.roiaware_pool3d import points_in_boxes_batch\n\n\nclass PositionEmbeddingLearned(nn.Module):\n    \"\"\"\n    Absolute pos embedding, learned.\n    \"\"\"\n\n    def __init__(self, input_channel, num_pos_feats=288):\n        super().__init__()\n        self.position_embedding_head = nn.Sequential(\n            nn.Conv1d(input_channel, num_pos_feats, kernel_size=1),\n            nn.BatchNorm1d(num_pos_feats),\n            nn.ReLU(inplace=True),\n            nn.Conv1d(num_pos_feats, num_pos_feats, kernel_size=1))\n\n    def forward(self, xyz):\n        xyz = xyz.transpose(1, 2).contiguous()\n        position_embedding = self.position_embedding_head(xyz)\n        return position_embedding\n\n\nclass TransformerDecoderLayer(nn.Module):\n    def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation=\"relu\",\n                 self_posembed=None, cross_posembed=None, cross_only=False):\n        super().__init__()\n        self.cross_only = cross_only\n        if not self.cross_only:\n            self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout)\n        self.multihead_attn = MultiheadAttention(d_model, nhead, dropout=dropout)\n        # Implementation of Feedforward model\n        self.linear1 = nn.Linear(d_model, dim_feedforward)\n        self.dropout = nn.Dropout(dropout)\n        self.linear2 = nn.Linear(dim_feedforward, d_model)\n\n        self.norm1 = nn.LayerNorm(d_model)\n        self.norm2 = nn.LayerNorm(d_model)\n        self.norm3 = nn.LayerNorm(d_model)\n        self.dropout1 = nn.Dropout(dropout)\n        self.dropout2 = nn.Dropout(dropout)\n        self.dropout3 = nn.Dropout(dropout)\n\n        def _get_activation_fn(activation):\n            \"\"\"Return an activation function given a string\"\"\"\n            if activation == \"relu\":\n                return F.relu\n            if activation == \"gelu\":\n                return F.gelu\n            if activation == \"glu\":\n                return F.glu\n            raise RuntimeError(F\"activation should be relu/gelu, not {activation}.\")\n\n        self.activation = _get_activation_fn(activation)\n\n        self.self_posembed = self_posembed\n        self.cross_posembed = cross_posembed\n\n    def with_pos_embed(self, tensor, pos_embed):\n        return tensor if pos_embed is None else tensor + pos_embed\n\n    def forward(self, query, key, query_pos, key_pos, attn_mask=None):\n        \"\"\"\n        :param query: B C Pq\n        :param key: B C Pk\n        :param query_pos: B Pq 3/6\n        :param key_pos: B Pk 3/6\n        :param value_pos: [B Pq 3/6]\n        :return:\n        \"\"\"\n        # NxCxP to PxNxC\n        if self.self_posembed is not None:\n            query_pos_embed = self.self_posembed(query_pos).permute(2, 0, 1)\n        else:\n            query_pos_embed = None\n        if self.cross_posembed is not None:\n            key_pos_embed = self.cross_posembed(key_pos).permute(2, 0, 1)\n        else:\n            key_pos_embed = None\n\n        query = query.permute(2, 0, 1)\n        key = key.permute(2, 0, 1)\n\n        if not self.cross_only:\n            q = k = v = self.with_pos_embed(query, query_pos_embed)\n            query2 = self.self_attn(q, k, value=v)[0]\n            query = query + self.dropout1(query2)\n            query = self.norm1(query)\n\n        query2 = self.multihead_attn(query=self.with_pos_embed(query, query_pos_embed),\n                                     key=self.with_pos_embed(key, key_pos_embed),\n                                     value=self.with_pos_embed(key, key_pos_embed), attn_mask=attn_mask)[0]\n        query = query + self.dropout2(query2)\n        query = self.norm2(query)\n\n        query2 = self.linear2(self.dropout(self.activation(self.linear1(query))))\n        query = query + self.dropout3(query2)\n        query = self.norm3(query)\n\n        # NxCxP to PxNxC\n        query = query.permute(1, 2, 0)\n        return query\n\n\nclass MultiheadAttention(nn.Module):\n    r\"\"\"Allows the model to jointly attend to information\n    from different representation subspaces.\n    See reference: Attention Is All You Need\n    .. math::\n        \\text{MultiHead}(Q, K, V) = \\text{Concat}(head_1,\\dots,head_h)W^O\n        \\text{where} head_i = \\text{Attention}(QW_i^Q, KW_i^K, VW_i^V)\n    Args:\n        embed_dim: total dimension of the model.\n        num_heads: parallel attention heads.\n        dropout: a Dropout layer on attn_output_weights. Default: 0.0.\n        bias: add bias as module parameter. Default: True.\n        add_bias_kv: add bias to the key and value sequences at dim=0.\n        add_zero_attn: add a new batch of zeros to the key and\n                       value sequences at dim=1.\n        kdim: total number of features in key. Default: None.\n        vdim: total number of features in key. Default: None.\n        Note: if kdim and vdim are None, they will be set to embed_dim such that\n        query, key, and value have the same number of features.\n    Examples::\n        >>> multihead_attn = nn.MultiheadAttention(embed_dim, num_heads)\n        >>> attn_output, attn_output_weights = multihead_attn(query, key, value)\n    \"\"\"\n\n    def __init__(self, embed_dim, num_heads, dropout=0., bias=True, add_bias_kv=False, add_zero_attn=False, kdim=None,\n                 vdim=None):\n        super(MultiheadAttention, self).__init__()\n        self.embed_dim = embed_dim\n        self.kdim = kdim if kdim is not None else embed_dim\n        self.vdim = vdim if vdim is not None else embed_dim\n        self._qkv_same_embed_dim = self.kdim == embed_dim and self.vdim == embed_dim\n\n        self.num_heads = num_heads\n        self.dropout = dropout\n        self.head_dim = embed_dim // num_heads\n        assert self.head_dim * num_heads == self.embed_dim, \"embed_dim must be divisible by num_heads\"\n\n        self.in_proj_weight = Parameter(torch.empty(3 * embed_dim, embed_dim))\n\n        if self._qkv_same_embed_dim is False:\n            self.q_proj_weight = Parameter(torch.Tensor(embed_dim, embed_dim))\n            self.k_proj_weight = Parameter(torch.Tensor(embed_dim, self.kdim))\n            self.v_proj_weight = Parameter(torch.Tensor(embed_dim, self.vdim))\n\n        if bias:\n            self.in_proj_bias = Parameter(torch.empty(3 * embed_dim))\n        else:\n            self.register_parameter('in_proj_bias', None)\n        self.out_proj = Linear(embed_dim, embed_dim, bias=bias)\n\n        if add_bias_kv:\n            self.bias_k = Parameter(torch.empty(1, 1, embed_dim))\n            self.bias_v = Parameter(torch.empty(1, 1, embed_dim))\n        else:\n            self.bias_k = self.bias_v = None\n\n        self.add_zero_attn = add_zero_attn\n\n        self._reset_parameters()\n\n    def _reset_parameters(self):\n        if self._qkv_same_embed_dim:\n            xavier_uniform_(self.in_proj_weight)\n        else:\n            xavier_uniform_(self.q_proj_weight)\n            xavier_uniform_(self.k_proj_weight)\n            xavier_uniform_(self.v_proj_weight)\n\n        if self.in_proj_bias is not None:\n            constant_(self.in_proj_bias, 0.)\n            constant_(self.out_proj.bias, 0.)\n        if self.bias_k is not None:\n            xavier_normal_(self.bias_k)\n        if self.bias_v is not None:\n            xavier_normal_(self.bias_v)\n\n    def forward(self, query, key, value, key_padding_mask=None, need_weights=True, attn_mask=None):\n        r\"\"\"\n    Args:\n        query, key, value: map a query and a set of key-value pairs to an output.\n            See \"Attention Is All You Need\" for more details.\n        key_padding_mask: if provided, specified padding elements in the key will\n            be ignored by the attention. This is an binary mask. When the value is True,\n            the corresponding value on the attention layer will be filled with -inf.\n        need_weights: output attn_output_weights.\n        attn_mask: mask that prevents attention to certain positions. This is an additive mask\n            (i.e. the values will be added to the attention layer).\n    Shape:\n        - Inputs:\n        - query: :math:`(L, N, E)` where L is the target sequence length, N is the batch size, E is\n          the embedding dimension.\n        - key: :math:`(S, N, E)`, where S is the source sequence length, N is the batch size, E is\n          the embedding dimension.\n        - value: :math:`(S, N, E)` where S is the source sequence length, N is the batch size, E is\n          the embedding dimension.\n        - key_padding_mask: :math:`(N, S)`, ByteTensor, where N is the batch size, S is the source sequence length.\n        - attn_mask: :math:`(L, S)` where L is the target sequence length, S is the source sequence length.\n        - Outputs:\n        - attn_output: :math:`(L, N, E)` where L is the target sequence length, N is the batch size,\n          E is the embedding dimension.\n        - attn_output_weights: :math:`(N, L, S)` where N is the batch size,\n          L is the target sequence length, S is the source sequence length.\n        \"\"\"\n        if hasattr(self, '_qkv_same_embed_dim') and self._qkv_same_embed_dim is False:\n            return multi_head_attention_forward(\n                query, key, value, self.embed_dim, self.num_heads,\n                self.in_proj_weight, self.in_proj_bias,\n                self.bias_k, self.bias_v, self.add_zero_attn,\n                self.dropout, self.out_proj.weight, self.out_proj.bias,\n                training=self.training,\n                key_padding_mask=key_padding_mask, need_weights=need_weights,\n                attn_mask=attn_mask, use_separate_proj_weight=True,\n                q_proj_weight=self.q_proj_weight, k_proj_weight=self.k_proj_weight,\n                v_proj_weight=self.v_proj_weight)\n        else:\n            if not hasattr(self, '_qkv_same_embed_dim'):\n                warnings.warn('A new version of MultiheadAttention module has been implemented. \\\n                    Please re-train your model with the new module',\n                              UserWarning)\n\n            return multi_head_attention_forward(\n                query, key, value, self.embed_dim, self.num_heads,\n                self.in_proj_weight, self.in_proj_bias,\n                self.bias_k, self.bias_v, self.add_zero_attn,\n                self.dropout, self.out_proj.weight, self.out_proj.bias,\n                training=self.training,\n                key_padding_mask=key_padding_mask, need_weights=need_weights,\n                attn_mask=attn_mask)\n\n\ndef multi_head_attention_forward(query,  # type: Tensor\n                                 key,  # type: Tensor\n                                 value,  # type: Tensor\n                                 embed_dim_to_check,  # type: int\n                                 num_heads,  # type: int\n                                 in_proj_weight,  # type: Tensor\n                                 in_proj_bias,  # type: Tensor\n                                 bias_k,  # type: Optional[Tensor]\n                                 bias_v,  # type: Optional[Tensor]\n                                 add_zero_attn,  # type: bool\n                                 dropout_p,  # type: float\n                                 out_proj_weight,  # type: Tensor\n                                 out_proj_bias,  # type: Tensor\n                                 training=True,  # type: bool\n                                 key_padding_mask=None,  # type: Optional[Tensor]\n                                 need_weights=True,  # type: bool\n                                 attn_mask=None,  # type: Optional[Tensor]\n                                 use_separate_proj_weight=False,  # type: bool\n                                 q_proj_weight=None,  # type: Optional[Tensor]\n                                 k_proj_weight=None,  # type: Optional[Tensor]\n                                 v_proj_weight=None,  # type: Optional[Tensor]\n                                 static_k=None,  # type: Optional[Tensor]\n                                 static_v=None,  # type: Optional[Tensor]\n                                 ):\n    # type: (...) -> Tuple[Tensor, Optional[Tensor]]\n    r\"\"\"\n    Args:\n        query, key, value: map a query and a set of key-value pairs to an output.\n            See \"Attention Is All You Need\" for more details.\n        embed_dim_to_check: total dimension of the model.\n        num_heads: parallel attention heads.\n        in_proj_weight, in_proj_bias: input projection weight and bias.\n        bias_k, bias_v: bias of the key and value sequences to be added at dim=0.\n        add_zero_attn: add a new batch of zeros to the key and\n                       value sequences at dim=1.\n        dropout_p: probability of an element to be zeroed.\n        out_proj_weight, out_proj_bias: the output projection weight and bias.\n        training: apply dropout if is ``True``.\n        key_padding_mask: if provided, specified padding elements in the key will\n            be ignored by the attention. This is an binary mask. When the value is True,\n            the corresponding value on the attention layer will be filled with -inf.\n        need_weights: output attn_output_weights.\n        attn_mask: mask that prevents attention to certain positions. This is an additive mask\n            (i.e. the values will be added to the attention layer).\n        use_separate_proj_weight: the function accept the proj. weights for query, key,\n            and value in differnt forms. If false, in_proj_weight will be used, which is\n            a combination of q_proj_weight, k_proj_weight, v_proj_weight.\n        q_proj_weight, k_proj_weight, v_proj_weight, in_proj_bias: input projection weight and bias.\n        static_k, static_v: static key and value used for attention operators.\n    Shape:\n        Inputs:\n        - query: :math:`(L, N, E)` where L is the target sequence length, N is the batch size, E is\n          the embedding dimension.\n        - key: :math:`(S, N, E)`, where S is the source sequence length, N is the batch size, E is\n          the embedding dimension.\n        - value: :math:`(S, N, E)` where S is the source sequence length, N is the batch size, E is\n          the embedding dimension.\n        - key_padding_mask: :math:`(N, S)`, ByteTensor, where N is the batch size, S is the source sequence length.\n        - attn_mask: :math:`(L, S)` where L is the target sequence length, S is the source sequence length.\n        - static_k: :math:`(N*num_heads, S, E/num_heads)`, where S is the source sequence length,\n          N is the batch size, E is the embedding dimension. E/num_heads is the head dimension.\n        - static_v: :math:`(N*num_heads, S, E/num_heads)`, where S is the source sequence length,\n          N is the batch size, E is the embedding dimension. E/num_heads is the head dimension.\n        Outputs:\n        - attn_output: :math:`(L, N, E)` where L is the target sequence length, N is the batch size,\n          E is the embedding dimension.\n        - attn_output_weights: :math:`(N, L, S)` where N is the batch size,\n          L is the target sequence length, S is the source sequence length.\n    \"\"\"\n\n    qkv_same = torch.equal(query, key) and torch.equal(key, value)\n    kv_same = torch.equal(key, value)\n\n    tgt_len, bsz, embed_dim = query.size()\n    assert embed_dim == embed_dim_to_check\n    assert list(query.size()) == [tgt_len, bsz, embed_dim]\n    assert key.size() == value.size()\n\n    head_dim = embed_dim // num_heads\n    assert head_dim * num_heads == embed_dim, \"embed_dim must be divisible by num_heads\"\n    scaling = float(head_dim) ** -0.5\n\n    if use_separate_proj_weight is not True:\n        if qkv_same:\n            # self-attention\n            q, k, v = F.linear(query, in_proj_weight, in_proj_bias).chunk(3, dim=-1)\n\n        elif kv_same:\n            # encoder-decoder attention\n            # This is inline in_proj function with in_proj_weight and in_proj_bias\n            _b = in_proj_bias\n            _start = 0\n            _end = embed_dim\n            _w = in_proj_weight[_start:_end, :]\n            if _b is not None:\n                _b = _b[_start:_end]\n            q = F.linear(query, _w, _b)\n\n            if key is None:\n                assert value is None\n                k = None\n                v = None\n            else:\n\n                # This is inline in_proj function with in_proj_weight and in_proj_bias\n                _b = in_proj_bias\n                _start = embed_dim\n                _end = None\n                _w = in_proj_weight[_start:, :]\n                if _b is not None:\n                    _b = _b[_start:]\n                k, v = F.linear(key, _w, _b).chunk(2, dim=-1)\n\n        else:\n            # This is inline in_proj function with in_proj_weight and in_proj_bias\n            _b = in_proj_bias\n            _start = 0\n            _end = embed_dim\n            _w = in_proj_weight[_start:_end, :]\n            if _b is not None:\n                _b = _b[_start:_end]\n            q = F.linear(query, _w, _b)\n\n            # This is inline in_proj function with in_proj_weight and in_proj_bias\n            _b = in_proj_bias\n            _start = embed_dim\n            _end = embed_dim * 2\n            _w = in_proj_weight[_start:_end, :]\n            if _b is not None:\n                _b = _b[_start:_end]\n            k = F.linear(key, _w, _b)\n\n            # This is inline in_proj function with in_proj_weight and in_proj_bias\n            _b = in_proj_bias\n            _start = embed_dim * 2\n            _end = None\n            _w = in_proj_weight[_start:, :]\n            if _b is not None:\n                _b = _b[_start:]\n            v = F.linear(value, _w, _b)\n    else:\n        q_proj_weight_non_opt = torch.jit._unwrap_optional(q_proj_weight)\n        len1, len2 = q_proj_weight_non_opt.size()\n        assert len1 == embed_dim and len2 == query.size(-1)\n\n        k_proj_weight_non_opt = torch.jit._unwrap_optional(k_proj_weight)\n        len1, len2 = k_proj_weight_non_opt.size()\n        assert len1 == embed_dim and len2 == key.size(-1)\n\n        v_proj_weight_non_opt = torch.jit._unwrap_optional(v_proj_weight)\n        len1, len2 = v_proj_weight_non_opt.size()\n        assert len1 == embed_dim and len2 == value.size(-1)\n\n        if in_proj_bias is not None:\n            q = F.linear(query, q_proj_weight_non_opt, in_proj_bias[0:embed_dim])\n            k = F.linear(key, k_proj_weight_non_opt, in_proj_bias[embed_dim:(embed_dim * 2)])\n            v = F.linear(value, v_proj_weight_non_opt, in_proj_bias[(embed_dim * 2):])\n        else:\n            q = F.linear(query, q_proj_weight_non_opt, in_proj_bias)\n            k = F.linear(key, k_proj_weight_non_opt, in_proj_bias)\n            v = F.linear(value, v_proj_weight_non_opt, in_proj_bias)\n    q = q * scaling\n\n    if bias_k is not None and bias_v is not None:\n        if static_k is None and static_v is None:\n            k = torch.cat([k, bias_k.repeat(1, bsz, 1)])\n            v = torch.cat([v, bias_v.repeat(1, bsz, 1)])\n            if attn_mask is not None:\n                attn_mask = torch.cat([attn_mask,\n                                       torch.zeros((attn_mask.size(0), 1),\n                                                   dtype=attn_mask.dtype,\n                                                   device=attn_mask.device)], dim=1)\n            if key_padding_mask is not None:\n                key_padding_mask = torch.cat(\n                    [key_padding_mask, torch.zeros((key_padding_mask.size(0), 1),\n                                                   dtype=key_padding_mask.dtype,\n                                                   device=key_padding_mask.device)], dim=1)\n        else:\n            assert static_k is None, \"bias cannot be added to static key.\"\n            assert static_v is None, \"bias cannot be added to static value.\"\n    else:\n        assert bias_k is None\n        assert bias_v is None\n\n    q = q.contiguous().view(tgt_len, bsz * num_heads, head_dim).transpose(0, 1)\n    if k is not None:\n        k = k.contiguous().view(-1, bsz * num_heads, head_dim).transpose(0, 1)\n    if v is not None:\n        v = v.contiguous().view(-1, bsz * num_heads, head_dim).transpose(0, 1)\n\n    if static_k is not None:\n        assert static_k.size(0) == bsz * num_heads\n        assert static_k.size(2) == head_dim\n        k = static_k\n\n    if static_v is not None:\n        assert static_v.size(0) == bsz * num_heads\n        assert static_v.size(2) == head_dim\n        v = static_v\n\n    src_len = k.size(1)\n\n    if key_padding_mask is not None:\n        assert key_padding_mask.size(0) == bsz\n        assert key_padding_mask.size(1) == src_len\n\n    if add_zero_attn:\n        src_len += 1\n        k = torch.cat([k, torch.zeros((k.size(0), 1) + k.size()[2:], dtype=k.dtype, device=k.device)], dim=1)\n        v = torch.cat([v, torch.zeros((v.size(0), 1) + v.size()[2:], dtype=v.dtype, device=v.device)], dim=1)\n        if attn_mask is not None:\n            if len(attn_mask.shape) == 2:\n                attn_mask = torch.cat([attn_mask, torch.zeros((attn_mask.size(0), 1),\n                                                          dtype=attn_mask.dtype,\n                                                          device=attn_mask.device)], dim=1)\n            else:\n                attn_mask = torch.cat([attn_mask, torch.zeros((attn_mask.size(0), attn_mask.size(1), 1),\n                                                          dtype=attn_mask.dtype,\n                                                          device=attn_mask.device)], dim=2)\n        if key_padding_mask is not None:\n            key_padding_mask = torch.cat(\n                [key_padding_mask, torch.zeros((key_padding_mask.size(0), 1),\n                                               dtype=key_padding_mask.dtype,\n                                               device=key_padding_mask.device)], dim=1)\n\n    attn_output_weights = torch.bmm(q, k.transpose(1, 2))\n    assert list(attn_output_weights.size()) == [bsz * num_heads, tgt_len, src_len]\n\n    if attn_mask is not None:\n        if len(attn_mask.shape) == 2:\n            attn_mask = attn_mask.unsqueeze(0)\n        else:\n            attn_mask = attn_mask.unsqueeze(1).repeat(1, num_heads, 1, 1)\n            attn_mask = attn_mask.reshape(attn_mask.size(0)*num_heads, attn_mask.size(2), attn_mask.size(3))\n        attn_output_weights += attn_mask\n\n    if key_padding_mask is not None:\n        attn_output_weights = attn_output_weights.view(bsz, num_heads, tgt_len, src_len)\n        attn_output_weights = attn_output_weights.masked_fill(\n            key_padding_mask.unsqueeze(1).unsqueeze(2),\n            float('-inf'),\n        )\n        attn_output_weights = attn_output_weights.view(bsz * num_heads, tgt_len, src_len)\n\n    attn_output_weights = F.softmax(\n        attn_output_weights, dim=-1)\n    attn_output_weights = F.dropout(attn_output_weights, p=dropout_p, training=training)\n\n    attn_output = torch.bmm(attn_output_weights, v)\n    assert list(attn_output.size()) == [bsz * num_heads, tgt_len, head_dim]\n    attn_output = attn_output.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim)\n    attn_output = F.linear(attn_output, out_proj_weight, out_proj_bias)\n\n    if need_weights:\n        # average attention weights over heads\n        attn_output_weights = attn_output_weights.view(bsz, num_heads, tgt_len, src_len)\n        return attn_output, attn_output_weights.sum(dim=1) / num_heads\n    else:\n        return attn_output, None\n\n\nclass FFN(nn.Module):\n    def __init__(self,\n                 in_channels,\n                 heads,\n                 head_conv=64,\n                 final_kernel=1,\n                 init_bias=-2.19,\n                 conv_cfg=dict(type='Conv1d'),\n                 norm_cfg=dict(type='BN1d'),\n                 bias='auto',\n                 **kwargs):\n        super(FFN, self).__init__()\n\n        self.heads = heads\n        self.init_bias = init_bias\n        for head in self.heads:\n            classes, num_conv = self.heads[head]\n\n            conv_layers = []\n            c_in = in_channels\n            for i in range(num_conv - 1):\n                conv_layers.append(\n                    ConvModule(\n                        c_in,\n                        head_conv,\n                        kernel_size=final_kernel,\n                        stride=1,\n                        padding=final_kernel // 2,\n                        bias=bias,\n                        conv_cfg=conv_cfg,\n                        norm_cfg=norm_cfg))\n                c_in = head_conv\n\n            conv_layers.append(\n                build_conv_layer(\n                    conv_cfg,\n                    head_conv,\n                    classes,\n                    kernel_size=final_kernel,\n                    stride=1,\n                    padding=final_kernel // 2,\n                    bias=True))\n            conv_layers = nn.Sequential(*conv_layers)\n\n            self.__setattr__(head, conv_layers)\n\n    def init_weights(self):\n        \"\"\"Initialize weights.\"\"\"\n        for head in self.heads:\n            if head == 'heatmap':\n                self.__getattr__(head)[-1].bias.data.fill_(self.init_bias)\n            else:\n                for m in self.__getattr__(head).modules():\n                    if isinstance(m, nn.Conv2d):\n                        kaiming_init(m)\n\n    def forward(self, x):\n        \"\"\"Forward function for SepHead.\n\n        Args:\n            x (torch.Tensor): Input feature map with the shape of\n                [B, 512, 128, 128].\n\n        Returns:\n            dict[str: torch.Tensor]: contains the following keys:\n\n                -reg （torch.Tensor): 2D regression value with the \\\n                    shape of [B, 2, H, W].\n                -height (torch.Tensor): Height value with the \\\n                    shape of [B, 1, H, W].\n                -dim (torch.Tensor): Size value with the shape \\\n                    of [B, 3, H, W].\n                -rot (torch.Tensor): Rotation value with the \\\n                    shape of [B, 1, H, W].\n                -vel (torch.Tensor): Velocity value with the \\\n                    shape of [B, 2, H, W].\n                -heatmap (torch.Tensor): Heatmap with the shape of \\\n                    [B, N, H, W].\n        \"\"\"\n        ret_dict = dict()\n        for head in self.heads:\n            ret_dict[head] = self.__getattr__(head)(x)\n\n        return ret_dict\n\n\n@HEADS.register_module()\nclass TransFusionHead(nn.Module):\n    def __init__(self,\n                 fuse_img=False,\n                 num_views=0,\n                 in_channels_img=64,\n                 out_size_factor_img=4,\n                 num_proposals=128,\n                 auxiliary=True,\n                 in_channels=128 * 3,\n                 hidden_channel=128,\n                 num_classes=4,\n                 # config for Transformer\n                 num_decoder_layers=3,\n                 num_heads=8,\n                 learnable_query_pos=False,\n                 initialize_by_heatmap=False,\n                 nms_kernel_size=1,\n                 ffn_channel=256,\n                 dropout=0.1,\n                 bn_momentum=0.1,\n                 activation='relu',\n                 # config for FFN\n                 common_heads=dict(),\n                 num_heatmap_convs=2,\n                 conv_cfg=dict(type='Conv1d'),\n                 norm_cfg=dict(type='BN1d'),\n                 bias='auto',\n                 # loss\n                 loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),\n                 loss_iou=dict(type='VarifocalLoss', use_sigmoid=True, iou_weighted=True, reduction='mean'),\n                 loss_bbox=dict(type='L1Loss', reduction='mean'),\n                 loss_heatmap=dict(type='GaussianFocalLoss', reduction='mean'),\n                 # others\n                 train_cfg=None,\n                 test_cfg=None,\n                 bbox_coder=None,\n                 ):\n        super(TransFusionHead, self).__init__()\n\n        self.num_classes = num_classes\n        self.num_proposals = num_proposals\n        self.auxiliary = auxiliary\n        self.in_channels = in_channels\n        self.num_heads = num_heads\n        self.num_decoder_layers = num_decoder_layers\n        self.bn_momentum = bn_momentum\n        self.learnable_query_pos = learnable_query_pos\n        self.initialize_by_heatmap = initialize_by_heatmap\n        self.nms_kernel_size = nms_kernel_size\n        if self.initialize_by_heatmap is True:\n            assert self.learnable_query_pos is False, \"initialized by heatmap is conflicting with learnable query position\"\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n\n        self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)\n        if not self.use_sigmoid_cls:\n            self.num_classes += 1\n        self.loss_cls = build_loss(loss_cls)\n        self.loss_bbox = build_loss(loss_bbox)\n        self.loss_iou = build_loss(loss_iou)\n        self.loss_heatmap = build_loss(loss_heatmap)\n\n        self.bbox_coder = build_bbox_coder(bbox_coder)\n        self.sampling = False\n\n        # a shared convolution\n        self.shared_conv = build_conv_layer(\n            dict(type='Conv2d'),\n            in_channels,\n            hidden_channel,\n            kernel_size=3,\n            padding=1,\n            bias=bias,\n        )\n\n        if self.initialize_by_heatmap:\n            layers = []\n            layers.append(ConvModule(\n                hidden_channel,\n                hidden_channel,\n                kernel_size=3,\n                padding=1,\n                bias=bias,\n                conv_cfg=dict(type='Conv2d'),\n                norm_cfg=dict(type='BN2d'),\n            ))\n            layers.append(build_conv_layer(\n                dict(type='Conv2d'),\n                hidden_channel,\n                num_classes,\n                kernel_size=3,\n                padding=1,\n                bias=bias,\n            ))\n            self.heatmap_head = nn.Sequential(*layers)\n            self.class_encoding = nn.Conv1d(num_classes, hidden_channel, 1)\n        else:\n            # query feature\n            self.query_feat = nn.Parameter(torch.randn(1, hidden_channel, self.num_proposals))\n            self.query_pos = nn.Parameter(torch.rand([1, self.num_proposals, 2]), requires_grad=learnable_query_pos)\n\n        # transformer decoder layers for object query with LiDAR feature\n        self.decoder = nn.ModuleList()\n        for i in range(self.num_decoder_layers):\n            self.decoder.append(\n                TransformerDecoderLayer(\n                    hidden_channel, num_heads, ffn_channel, dropout, activation,\n                    self_posembed=PositionEmbeddingLearned(2, hidden_channel),\n                    cross_posembed=PositionEmbeddingLearned(2, hidden_channel),\n                ))\n\n        # Prediction Head\n        self.prediction_heads = nn.ModuleList()\n        for i in range(self.num_decoder_layers):\n            heads = copy.deepcopy(common_heads)\n            heads.update(dict(heatmap=(self.num_classes, num_heatmap_convs)))\n            self.prediction_heads.append(FFN(hidden_channel, heads, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=bias))\n\n        self.fuse_img = fuse_img\n        if self.fuse_img:\n            self.num_views = num_views\n            self.out_size_factor_img = out_size_factor_img\n            self.shared_conv_img = build_conv_layer(\n                dict(type='Conv2d'),\n                in_channels_img,  # channel of img feature map\n                hidden_channel,\n                kernel_size=3,\n                padding=1,\n                bias=bias,\n            )\n            if self.initialize_by_heatmap:\n                self.heatmap_head_img = copy.deepcopy(self.heatmap_head)\n            # transformer decoder layers for img fusion\n            self.decoder.append(\n                TransformerDecoderLayer(\n                    hidden_channel, num_heads, ffn_channel, dropout, activation,\n                    self_posembed=PositionEmbeddingLearned(2, hidden_channel),\n                    cross_posembed=PositionEmbeddingLearned(2, hidden_channel),\n                ))\n            # cross-attention only layers for projecting img feature onto BEV\n            for i in range(num_views):\n                self.decoder.append(\n                    TransformerDecoderLayer(\n                        hidden_channel, num_heads, ffn_channel, dropout, activation,\n                        self_posembed=PositionEmbeddingLearned(2, hidden_channel),\n                        cross_posembed=PositionEmbeddingLearned(2, hidden_channel),\n                        cross_only=True,\n                    ))\n            self.fc = nn.Sequential(*[nn.Conv1d(hidden_channel, hidden_channel, kernel_size=1)])\n\n            heads = copy.deepcopy(common_heads)\n            heads.update(dict(heatmap=(self.num_classes, num_heatmap_convs)))\n            self.prediction_heads.append(FFN(hidden_channel * 2, heads, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=bias))\n\n        self.init_weights()\n        self._init_assigner_sampler()\n\n        # Position Embedding for Cross-Attention, which is re-used during training\n        x_size = self.test_cfg['grid_size'][0] // self.test_cfg['out_size_factor']\n        y_size = self.test_cfg['grid_size'][1] // self.test_cfg['out_size_factor']\n        self.bev_pos = self.create_2D_grid(x_size, y_size)\n\n        self.img_feat_pos = None\n        self.img_feat_collapsed_pos = None\n\n    def create_2D_grid(self, x_size, y_size):\n        meshgrid = [[0, x_size - 1, x_size], [0, y_size - 1, y_size]]\n        batch_y, batch_x = torch.meshgrid(*[torch.linspace(it[0], it[1], it[2]) for it in meshgrid])\n        batch_x = batch_x + 0.5\n        batch_y = batch_y + 0.5\n        coord_base = torch.cat([batch_x[None], batch_y[None]], dim=0)[None]\n        coord_base = coord_base.view(1, 2, -1).permute(0, 2, 1)\n        return coord_base\n\n    def init_weights(self):\n        # initialize transformer\n        for m in self.decoder.parameters():\n            if m.dim() > 1:\n                nn.init.xavier_uniform_(m)\n        if hasattr(self, 'query'):\n            nn.init.xavier_normal_(self.query)\n        self.init_bn_momentum()\n\n    def init_bn_momentum(self):\n        for m in self.modules():\n            if isinstance(m, (nn.BatchNorm2d, nn.BatchNorm1d)):\n                m.momentum = self.bn_momentum\n\n    def _init_assigner_sampler(self):\n        \"\"\"Initialize the target assigner and sampler of the head.\"\"\"\n        if self.train_cfg is None:\n            return\n\n        if self.sampling:\n            self.bbox_sampler = build_sampler(self.train_cfg.sampler)\n        else:\n            self.bbox_sampler = PseudoSampler()\n        if isinstance(self.train_cfg.assigner, dict):\n            self.bbox_assigner = build_assigner(self.train_cfg.assigner)\n        elif isinstance(self.train_cfg.assigner, list):\n            self.bbox_assigner = [\n                build_assigner(res) for res in self.train_cfg.assigner\n            ]\n\n    def forward_single(self, inputs, img_inputs, img_metas):\n        \"\"\"Forward function for CenterPoint.\n\n        Args:\n            inputs (torch.Tensor): Input feature map with the shape of\n                [B, 512, 128(H), 128(W)]. (consistent with L748)\n\n        Returns:\n            list[dict]: Output results for tasks.\n        \"\"\"\n        batch_size = inputs.shape[0]\n        lidar_feat = self.shared_conv(inputs)\n\n        #################################\n        # image to BEV\n        #################################\n        lidar_feat_flatten = lidar_feat.view(batch_size, lidar_feat.shape[1], -1)  # [BS, C, H*W]\n        bev_pos = self.bev_pos.repeat(batch_size, 1, 1).to(lidar_feat.device)\n\n        if self.fuse_img:\n            img_feat = self.shared_conv_img(img_inputs)  # [BS * n_views, C, H, W]\n\n            img_h, img_w, num_channel = img_inputs.shape[-2], img_inputs.shape[-1], img_feat.shape[1]\n            raw_img_feat = img_feat.view(batch_size, self.num_views, num_channel, img_h, img_w).permute(0, 2, 3, 1, 4) # [BS, C, H, n_views, W]\n            img_feat = raw_img_feat.reshape(batch_size, num_channel, img_h, img_w * self.num_views)  # [BS, C, H, n_views*W]\n            img_feat_collapsed = img_feat.max(2).values\n            img_feat_collapsed = self.fc(img_feat_collapsed).view(batch_size, num_channel, img_w * self.num_views)\n\n            # positional encoding for image guided query initialization\n            if self.img_feat_collapsed_pos is None:\n                img_feat_collapsed_pos = self.img_feat_collapsed_pos = self.create_2D_grid(1, img_feat_collapsed.shape[-1]).to(img_feat.device)\n            else:\n                img_feat_collapsed_pos = self.img_feat_collapsed_pos\n\n            bev_feat = lidar_feat_flatten\n            for idx_view in range(self.num_views):\n                bev_feat = self.decoder[2 + idx_view](bev_feat, img_feat_collapsed[..., img_w * idx_view:img_w * (idx_view + 1)], bev_pos, img_feat_collapsed_pos[:, img_w * idx_view:img_w * (idx_view + 1)])\n\n        #################################\n        # image guided query initialization\n        #################################\n        if self.initialize_by_heatmap:\n            dense_heatmap = self.heatmap_head(lidar_feat)\n            dense_heatmap_img = None\n            if self.fuse_img:\n                dense_heatmap_img = self.heatmap_head_img(bev_feat.view(lidar_feat.shape))  # [BS, num_classes, H, W]\n                heatmap = (dense_heatmap.detach().sigmoid() + dense_heatmap_img.detach().sigmoid()) / 2\n            else:\n                heatmap = dense_heatmap.detach().sigmoid()\n            padding = self.nms_kernel_size // 2\n            local_max = torch.zeros_like(heatmap)\n            # equals to nms radius = voxel_size * out_size_factor * kenel_size\n            local_max_inner = F.max_pool2d(heatmap, kernel_size=self.nms_kernel_size, stride=1, padding=0)\n            local_max[:, :, padding:(-padding), padding:(-padding)] = local_max_inner\n            ## for Pedestrian & Traffic_cone in nuScenes\n            if self.test_cfg['dataset'] == 'nuScenes':\n                local_max[:, 8, ] = F.max_pool2d(heatmap[:, 8], kernel_size=1, stride=1, padding=0)\n                local_max[:, 9, ] = F.max_pool2d(heatmap[:, 9], kernel_size=1, stride=1, padding=0)\n            elif self.test_cfg['dataset'] == 'Waymo':  # for Pedestrian & Cyclist in Waymo\n                local_max[:, 1, ] = F.max_pool2d(heatmap[:, 1], kernel_size=1, stride=1, padding=0)\n                local_max[:, 2, ] = F.max_pool2d(heatmap[:, 2], kernel_size=1, stride=1, padding=0)\n            heatmap = heatmap * (heatmap == local_max)\n            heatmap = heatmap.view(batch_size, heatmap.shape[1], -1)\n\n            # top #num_proposals among all classes\n            top_proposals = heatmap.view(batch_size, -1).argsort(dim=-1, descending=True)[..., :self.num_proposals]\n            top_proposals_class = top_proposals // heatmap.shape[-1]\n            top_proposals_index = top_proposals % heatmap.shape[-1]\n            query_feat = lidar_feat_flatten.gather(index=top_proposals_index[:, None, :].expand(-1, lidar_feat_flatten.shape[1], -1), dim=-1)\n            self.query_labels = top_proposals_class\n\n            # add category embedding\n            one_hot = F.one_hot(top_proposals_class, num_classes=self.num_classes).permute(0, 2, 1)\n            query_cat_encoding = self.class_encoding(one_hot.float())\n            query_feat += query_cat_encoding\n\n            query_pos = bev_pos.gather(index=top_proposals_index[:, None, :].permute(0, 2, 1).expand(-1, -1, bev_pos.shape[-1]), dim=1)\n        else:\n            query_feat = self.query_feat.repeat(batch_size, 1, 1)  # [BS, C, num_proposals]\n            query_pos = self.query_pos.repeat(batch_size, 1, 1).to(lidar_feat.device)  # [BS, num_proposals, 2]\n\n        #################################\n        # transformer decoder layer (LiDAR feature as K,V)\n        #################################\n        ret_dicts = []\n        for i in range(self.num_decoder_layers):\n            prefix = 'last_' if (i == self.num_decoder_layers - 1) else f'{i}head_'\n\n            # Transformer Decoder Layer\n            # :param query: B C Pq    :param query_pos: B Pq 3/6\n            query_feat = self.decoder[i](query_feat, lidar_feat_flatten, query_pos, bev_pos)\n\n            # Prediction\n            res_layer = self.prediction_heads[i](query_feat)\n            res_layer['center'] = res_layer['center'] + query_pos.permute(0, 2, 1)\n            first_res_layer = res_layer\n            if not self.fuse_img:\n                ret_dicts.append(res_layer)\n\n            # for next level positional embedding\n            query_pos = res_layer['center'].detach().clone().permute(0, 2, 1)\n\n        #################################\n        # transformer decoder layer (img feature as K,V)\n        #################################\n        if self.fuse_img:\n            # positional encoding for image fusion\n            img_feat = raw_img_feat.permute(0, 3, 1, 2, 4) # [BS, n_views, C, H, W]\n            img_feat_flatten = img_feat.view(batch_size, self.num_views, num_channel, -1)  # [BS, n_views, C, H*W]\n            if self.img_feat_pos is None:\n                (h, w) = img_inputs.shape[-2], img_inputs.shape[-1]\n                img_feat_pos = self.img_feat_pos = self.create_2D_grid(h, w).to(img_feat_flatten.device)\n            else:\n                img_feat_pos = self.img_feat_pos\n\n            prev_query_feat = query_feat.detach().clone()\n            query_feat = torch.zeros_like(query_feat)  # create new container for img query feature\n            query_pos_realmetric = query_pos.permute(0, 2, 1) * self.test_cfg['out_size_factor'] * self.test_cfg['voxel_size'][0] + self.test_cfg['pc_range'][0]\n            query_pos_3d = torch.cat([query_pos_realmetric, res_layer['height']], dim=1).detach().clone()\n            if 'vel' in res_layer:\n                vel = copy.deepcopy(res_layer['vel'].detach())\n            else:\n                vel = None\n            pred_boxes = self.bbox_coder.decode(\n                copy.deepcopy(res_layer['heatmap'].detach()),\n                copy.deepcopy(res_layer['rot'].detach()),\n                copy.deepcopy(res_layer['dim'].detach()),\n                copy.deepcopy(res_layer['center'].detach()),\n                copy.deepcopy(res_layer['height'].detach()),\n                vel,\n            )\n\n            on_the_image_mask = torch.ones([batch_size, self.num_proposals]).to(query_pos_3d.device) * -1\n\n            for sample_idx in range(batch_size if self.fuse_img else 0):\n                lidar2img_rt = query_pos_3d.new_tensor(img_metas[sample_idx]['lidar2img'])\n                img_scale_factor = (\n                    query_pos_3d.new_tensor(img_metas[sample_idx]['scale_factor'][:2]\n                                            if 'scale_factor' in img_metas[sample_idx].keys() else [1.0, 1.0])\n                )\n                img_flip = img_metas[sample_idx]['flip'] if 'flip' in img_metas[sample_idx].keys() else False\n                img_crop_offset = (\n                    query_pos_3d.new_tensor(img_metas[sample_idx]['img_crop_offset'])\n                    if 'img_crop_offset' in img_metas[sample_idx].keys() else 0)\n                img_shape = img_metas[sample_idx]['img_shape'][:2]\n                img_pad_shape = img_metas[sample_idx]['input_shape'][:2]\n                boxes = LiDARInstance3DBoxes(pred_boxes[sample_idx]['bboxes'][:, :7], box_dim=7)\n                query_pos_3d_with_corners = torch.cat([query_pos_3d[sample_idx], boxes.corners.permute(2, 0, 1).view(3, -1)], dim=-1)  # [3, num_proposals] + [3, num_proposals*8]\n                # transform point clouds back to original coordinate system by reverting the data augmentation\n                if batch_size == 1:  # skip during inference to save time\n                    points = query_pos_3d_with_corners.T\n                else:\n                    points = apply_3d_transformation(query_pos_3d_with_corners.T, 'LIDAR', img_metas[sample_idx], reverse=True).detach()\n                num_points = points.shape[0]\n\n                for view_idx in range(self.num_views):\n                    pts_4d = torch.cat([points, points.new_ones(size=(num_points, 1))], dim=-1)\n                    pts_2d = pts_4d @ lidar2img_rt[view_idx].t()\n\n                    pts_2d[:, 2] = torch.clamp(pts_2d[:, 2], min=1e-5)\n                    pts_2d[:, 0] /= pts_2d[:, 2]\n                    pts_2d[:, 1] /= pts_2d[:, 2]\n\n                    # img transformation: scale -> crop -> flip\n                    # the image is resized by img_scale_factor\n                    img_coors = pts_2d[:, 0:2] * img_scale_factor  # Nx2\n                    img_coors -= img_crop_offset\n\n                    # grid sample, the valid grid range should be in [-1,1]\n                    coor_x, coor_y = torch.split(img_coors, 1, dim=1)  # each is Nx1\n\n                    if img_flip:\n                        # by default we take it as horizontal flip\n                        # use img_shape before padding for flip\n                        orig_h, orig_w = img_shape\n                        coor_x = orig_w - coor_x\n\n                    coor_x, coor_corner_x = coor_x[0:self.num_proposals, :], coor_x[self.num_proposals:, :]\n                    coor_y, coor_corner_y = coor_y[0:self.num_proposals, :], coor_y[self.num_proposals:, :]\n                    coor_corner_x = coor_corner_x.reshape(self.num_proposals, 8, 1)\n                    coor_corner_y = coor_corner_y.reshape(self.num_proposals, 8, 1)\n                    coor_corner_xy = torch.cat([coor_corner_x, coor_corner_y], dim=-1)\n\n                    h, w = img_pad_shape\n                    on_the_image = (coor_x > 0) * (coor_x < w) * (coor_y > 0) * (coor_y < h)\n                    on_the_image = on_the_image.squeeze()\n                    # skip the following computation if no object query fall on current image\n                    if on_the_image.sum() <= 1:\n                        continue\n                    on_the_image_mask[sample_idx, on_the_image] = view_idx\n\n                    # add spatial constraint\n                    center_ys = (coor_y[on_the_image] / self.out_size_factor_img)\n                    center_xs = (coor_x[on_the_image] / self.out_size_factor_img)\n                    centers = torch.cat([center_xs, center_ys], dim=-1).int()  # center on the feature map\n                    corners = (coor_corner_xy[on_the_image].max(1).values - coor_corner_xy[on_the_image].min(1).values) / self.out_size_factor_img\n                    radius = torch.ceil(corners.norm(dim=-1, p=2) / 2).int()  # radius of the minimum circumscribed circle of the wireframe\n                    sigma = (radius * 2 + 1) / 6.0\n                    distance = (centers[:, None, :] - (img_feat_pos - 0.5)).norm(dim=-1) ** 2\n                    gaussian_mask = (-distance / (2 * sigma[:, None] ** 2)).exp()\n                    gaussian_mask[gaussian_mask < torch.finfo(torch.float32).eps] = 0\n                    attn_mask = gaussian_mask\n\n                    query_feat_view = prev_query_feat[sample_idx, :, on_the_image]\n                    query_pos_view = torch.cat([center_xs, center_ys], dim=-1)\n                    query_feat_view = self.decoder[self.num_decoder_layers](query_feat_view[None], img_feat_flatten[sample_idx:sample_idx + 1, view_idx], query_pos_view[None], img_feat_pos, attn_mask=attn_mask.log())\n                    query_feat[sample_idx, :, on_the_image] = query_feat_view.clone()\n\n            self.on_the_image_mask = (on_the_image_mask != -1)\n            res_layer = self.prediction_heads[self.num_decoder_layers](torch.cat([query_feat, prev_query_feat], dim=1))\n            res_layer['center'] = res_layer['center'] + query_pos.permute(0, 2, 1)\n            for key, value in res_layer.items():\n                pred_dim = value.shape[1]\n                res_layer[key][~self.on_the_image_mask.unsqueeze(1).repeat(1, pred_dim, 1)] = first_res_layer[key][~self.on_the_image_mask.unsqueeze(1).repeat(1, pred_dim, 1)]\n            ret_dicts.append(res_layer)\n\n        if self.initialize_by_heatmap:\n            ret_dicts[0]['query_heatmap_score'] = heatmap.gather(index=top_proposals_index[:, None, :].expand(-1, self.num_classes, -1), dim=-1)  # [bs, num_classes, num_proposals]\n            if self.fuse_img:\n                ret_dicts[0]['dense_heatmap'] = dense_heatmap_img\n            else:\n                ret_dicts[0]['dense_heatmap'] = dense_heatmap\n\n        if self.auxiliary is False:\n            # only return the results of last decoder layer\n            return [ret_dicts[-1]]\n\n        # return all the layer's results for auxiliary superivison\n        new_res = {}\n        for key in ret_dicts[0].keys():\n            if key not in ['dense_heatmap', 'dense_heatmap_old', 'query_heatmap_score']:\n                new_res[key] = torch.cat([ret_dict[key] for ret_dict in ret_dicts], dim=-1)\n            else:\n                new_res[key] = ret_dicts[0][key]\n        return [new_res]\n\n    def forward(self, feats, img_feats, img_metas):\n        \"\"\"Forward pass.\n\n        Args:\n            feats (list[torch.Tensor]): Multi-level features, e.g.,\n                features produced by FPN.\n\n        Returns:\n            tuple(list[dict]): Output results. first index by level, second index by layer\n        \"\"\"\n        if img_feats is None:\n            img_feats = [None]\n        res = multi_apply(self.forward_single, feats, img_feats, [img_metas])\n        assert len(res) == 1, \"only support one level features.\"\n        return res\n\n    def get_targets(self, gt_bboxes_3d, gt_labels_3d, preds_dict):\n        \"\"\"Generate training targets.\n\n        Args:\n            gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`): Ground truth gt boxes.\n            gt_labels_3d (torch.Tensor): Labels of boxes.\n            preds_dicts (tuple of dict): first index by layer (default 1)\n        Returns:\n            tuple[torch.Tensor]: Tuple of target including \\\n                the following results in order.\n\n                - torch.Tensor: classification target.  [BS, num_proposals]\n                - torch.Tensor: classification weights (mask)  [BS, num_proposals]\n                - torch.Tensor: regression target. [BS, num_proposals, 8]\n                - torch.Tensor: regression weights. [BS, num_proposals, 8]\n        \"\"\"\n        # change preds_dict into list of dict (index by batch_id)\n        # preds_dict[0]['center'].shape [bs, 3, num_proposal]\n        list_of_pred_dict = []\n        for batch_idx in range(len(gt_bboxes_3d)):\n            pred_dict = {}\n            for key in preds_dict[0].keys():\n                pred_dict[key] = preds_dict[0][key][batch_idx:batch_idx + 1]\n            list_of_pred_dict.append(pred_dict)\n\n        assert len(gt_bboxes_3d) == len(list_of_pred_dict)\n\n        res_tuple = multi_apply(self.get_targets_single, gt_bboxes_3d, gt_labels_3d, list_of_pred_dict, np.arange(len(gt_labels_3d)))\n        labels = torch.cat(res_tuple[0], dim=0)\n        label_weights = torch.cat(res_tuple[1], dim=0)\n        bbox_targets = torch.cat(res_tuple[2], dim=0)\n        bbox_weights = torch.cat(res_tuple[3], dim=0)\n        ious = torch.cat(res_tuple[4], dim=0)\n        num_pos = np.sum(res_tuple[5])\n        matched_ious = np.mean(res_tuple[6])\n        if self.initialize_by_heatmap:\n            heatmap = torch.cat(res_tuple[7], dim=0)\n            return labels, label_weights, bbox_targets, bbox_weights, ious, num_pos, matched_ious, heatmap\n        else:\n            return labels, label_weights, bbox_targets, bbox_weights, ious, num_pos, matched_ious\n\n    def get_targets_single(self, gt_bboxes_3d, gt_labels_3d, preds_dict, batch_idx):\n        \"\"\"Generate training targets for a single sample.\n\n        Args:\n            gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`): Ground truth gt boxes.\n            gt_labels_3d (torch.Tensor): Labels of boxes.\n            preds_dict (dict): dict of prediction result for a single sample\n        Returns:\n            tuple[torch.Tensor]: Tuple of target including \\\n                the following results in order.\n\n                - torch.Tensor: classification target.  [1, num_proposals]\n                - torch.Tensor: classification weights (mask)  [1, num_proposals]\n                - torch.Tensor: regression target. [1, num_proposals, 8]\n                - torch.Tensor: regression weights. [1, num_proposals, 8]\n                - torch.Tensor: iou target. [1, num_proposals]\n                - int: number of positive proposals\n        \"\"\"\n        num_proposals = preds_dict['center'].shape[-1]\n\n        # get pred boxes, carefully ! donot change the network outputs\n        score = copy.deepcopy(preds_dict['heatmap'].detach())\n        center = copy.deepcopy(preds_dict['center'].detach())\n        height = copy.deepcopy(preds_dict['height'].detach())\n        dim = copy.deepcopy(preds_dict['dim'].detach())\n        rot = copy.deepcopy(preds_dict['rot'].detach())\n        if 'vel' in preds_dict.keys():\n            vel = copy.deepcopy(preds_dict['vel'].detach())\n        else:\n            vel = None\n\n        boxes_dict = self.bbox_coder.decode(score, rot, dim, center, height, vel)  # decode the prediction to real world metric bbox\n        bboxes_tensor = boxes_dict[0]['bboxes']\n        gt_bboxes_tensor = gt_bboxes_3d.tensor.to(score.device)\n        # each layer should do label assign seperately.\n        if self.auxiliary:\n            num_layer = self.num_decoder_layers\n        else:\n            num_layer = 1\n\n        assign_result_list = []\n        for idx_layer in range(num_layer):\n            bboxes_tensor_layer = bboxes_tensor[self.num_proposals * idx_layer:self.num_proposals * (idx_layer + 1), :]\n            score_layer = score[..., self.num_proposals * idx_layer:self.num_proposals * (idx_layer + 1)]\n\n            if self.train_cfg.assigner.type == 'HungarianAssigner3D':\n                assign_result = self.bbox_assigner.assign(bboxes_tensor_layer, gt_bboxes_tensor, gt_labels_3d, score_layer, self.train_cfg)\n            elif self.train_cfg.assigner.type == 'HeuristicAssigner':\n                assign_result = self.bbox_assigner.assign(bboxes_tensor_layer, gt_bboxes_tensor, None, gt_labels_3d, self.query_labels[batch_idx])\n            else:\n                raise NotImplementedError\n            assign_result_list.append(assign_result)\n\n        # combine assign result of each layer\n        assign_result_ensemble = AssignResult(\n            num_gts=sum([res.num_gts for res in assign_result_list]),\n            gt_inds=torch.cat([res.gt_inds for res in assign_result_list]),\n            max_overlaps=torch.cat([res.max_overlaps for res in assign_result_list]),\n            labels=torch.cat([res.labels for res in assign_result_list]),\n        )\n        sampling_result = self.bbox_sampler.sample(assign_result_ensemble, bboxes_tensor, gt_bboxes_tensor)\n        pos_inds = sampling_result.pos_inds\n        neg_inds = sampling_result.neg_inds\n        assert len(pos_inds) + len(neg_inds) == num_proposals\n\n        # create target for loss computation\n        bbox_targets = torch.zeros([num_proposals, self.bbox_coder.code_size]).to(center.device)\n        bbox_weights = torch.zeros([num_proposals, self.bbox_coder.code_size]).to(center.device)\n        ious = assign_result_ensemble.max_overlaps\n        ious = torch.clamp(ious, min=0.0, max=1.0)\n        labels = bboxes_tensor.new_zeros(num_proposals, dtype=torch.long)\n        label_weights = bboxes_tensor.new_zeros(num_proposals, dtype=torch.long)\n\n        if gt_labels_3d is not None:  # default label is -1\n            labels += self.num_classes\n\n        # both pos and neg have classification loss, only pos has regression and iou loss\n        if len(pos_inds) > 0:\n            pos_bbox_targets = self.bbox_coder.encode(sampling_result.pos_gt_bboxes)\n\n            bbox_targets[pos_inds, :] = pos_bbox_targets\n            bbox_weights[pos_inds, :] = 1.0\n\n            if gt_labels_3d is None:\n                labels[pos_inds] = 1\n            else:\n                labels[pos_inds] = gt_labels_3d[sampling_result.pos_assigned_gt_inds]\n            if self.train_cfg.pos_weight <= 0:\n                label_weights[pos_inds] = 1.0\n            else:\n                label_weights[pos_inds] = self.train_cfg.pos_weight\n\n        if len(neg_inds) > 0:\n            label_weights[neg_inds] = 1.0\n\n        # # compute dense heatmap targets\n        if self.initialize_by_heatmap:\n            device = labels.device\n            gt_bboxes_3d = torch.cat([gt_bboxes_3d.gravity_center, gt_bboxes_3d.tensor[:, 3:]], dim=1).to(device)\n            grid_size = torch.tensor(self.train_cfg['grid_size'])\n            pc_range = torch.tensor(self.train_cfg['point_cloud_range'])\n            voxel_size = torch.tensor(self.train_cfg['voxel_size'])\n            feature_map_size = grid_size[:2] // self.train_cfg['out_size_factor']  # [x_len, y_len]\n            heatmap = gt_bboxes_3d.new_zeros(self.num_classes, feature_map_size[1], feature_map_size[0])\n            for idx in range(len(gt_bboxes_3d)):\n                width = gt_bboxes_3d[idx][3]\n                length = gt_bboxes_3d[idx][4]\n                width = width / voxel_size[0] / self.train_cfg['out_size_factor']\n                length = length / voxel_size[1] / self.train_cfg['out_size_factor']\n                if width > 0 and length > 0:\n                    radius = gaussian_radius((length, width), min_overlap=self.train_cfg['gaussian_overlap'])\n                    radius = max(self.train_cfg['min_radius'], int(radius))\n                    x, y = gt_bboxes_3d[idx][0], gt_bboxes_3d[idx][1]\n\n                    coor_x = (x - pc_range[0]) / voxel_size[0] / self.train_cfg['out_size_factor']\n                    coor_y = (y - pc_range[1]) / voxel_size[1] / self.train_cfg['out_size_factor']\n\n                    center = torch.tensor([coor_x, coor_y], dtype=torch.float32, device=device)\n                    center_int = center.to(torch.int32)\n                    draw_heatmap_gaussian(heatmap[gt_labels_3d[idx]], center_int, radius)\n\n            mean_iou = ious[pos_inds].sum() / max(len(pos_inds), 1)\n            return labels[None], label_weights[None], bbox_targets[None], bbox_weights[None], ious[None], int(pos_inds.shape[0]), float(mean_iou), heatmap[None]\n\n        else:\n            mean_iou = ious[pos_inds].sum() / max(len(pos_inds), 1)\n            return labels[None], label_weights[None], bbox_targets[None], bbox_weights[None], ious[None], int(pos_inds.shape[0]), float(mean_iou)\n\n    @force_fp32(apply_to=('preds_dicts'))\n    def loss(self, gt_bboxes_3d, gt_labels_3d, preds_dicts, **kwargs):\n        \"\"\"Loss function for CenterHead.\n\n        Args:\n            gt_bboxes_3d (list[:obj:`LiDARInstance3DBoxes`]): Ground\n                truth gt boxes.\n            gt_labels_3d (list[torch.Tensor]): Labels of boxes.\n            preds_dicts (list[list[dict]]): Output of forward function.\n\n        Returns:\n            dict[str:torch.Tensor]: Loss of heatmap and bbox of each task.\n        \"\"\"\n        if self.initialize_by_heatmap:\n            labels, label_weights, bbox_targets, bbox_weights, ious, num_pos, matched_ious, heatmap = self.get_targets(gt_bboxes_3d, gt_labels_3d, preds_dicts[0])\n        else:\n            labels, label_weights, bbox_targets, bbox_weights, ious, num_pos, matched_ious = self.get_targets(gt_bboxes_3d, gt_labels_3d, preds_dicts[0])\n        if hasattr(self, 'on_the_image_mask'):\n            label_weights = label_weights * self.on_the_image_mask\n            bbox_weights = bbox_weights * self.on_the_image_mask[:, :, None]\n            num_pos = bbox_weights.max(-1).values.sum()\n        preds_dict = preds_dicts[0][0]\n        loss_dict = dict()\n\n        if self.initialize_by_heatmap:\n            # compute heatmap loss\n            loss_heatmap = self.loss_heatmap(clip_sigmoid(preds_dict['dense_heatmap']), heatmap, avg_factor=max(heatmap.eq(1).float().sum().item(), 1))\n            loss_dict['loss_heatmap'] = loss_heatmap\n\n        # compute loss for each layer\n        for idx_layer in range(self.num_decoder_layers if self.auxiliary else 1):\n            if idx_layer == self.num_decoder_layers - 1 or (idx_layer == 0 and self.auxiliary is False):\n                prefix = 'layer_-1'\n            else:\n                prefix = f'layer_{idx_layer}'\n\n            layer_labels = labels[..., idx_layer * self.num_proposals:(idx_layer + 1) * self.num_proposals].reshape(-1)\n            layer_label_weights = label_weights[..., idx_layer * self.num_proposals:(idx_layer + 1) * self.num_proposals].reshape(-1)\n            layer_score = preds_dict['heatmap'][..., idx_layer * self.num_proposals:(idx_layer + 1) * self.num_proposals]\n            layer_cls_score = layer_score.permute(0, 2, 1).reshape(-1, self.num_classes)\n            layer_loss_cls = self.loss_cls(layer_cls_score, layer_labels, layer_label_weights, avg_factor=max(num_pos, 1))\n\n            layer_center = preds_dict['center'][..., idx_layer * self.num_proposals:(idx_layer + 1) * self.num_proposals]\n            layer_height = preds_dict['height'][..., idx_layer * self.num_proposals:(idx_layer + 1) * self.num_proposals]\n            layer_rot = preds_dict['rot'][..., idx_layer * self.num_proposals:(idx_layer + 1) * self.num_proposals]\n            layer_dim = preds_dict['dim'][..., idx_layer * self.num_proposals:(idx_layer + 1) * self.num_proposals]\n            preds = torch.cat([layer_center, layer_height, layer_dim, layer_rot], dim=1).permute(0, 2, 1)  # [BS, num_proposals, code_size]\n            if 'vel' in preds_dict.keys():\n                layer_vel = preds_dict['vel'][..., idx_layer * self.num_proposals:(idx_layer + 1) * self.num_proposals]\n                preds = torch.cat([layer_center, layer_height, layer_dim, layer_rot, layer_vel], dim=1).permute(0, 2, 1)  # [BS, num_proposals, code_size]\n            code_weights = self.train_cfg.get('code_weights', None)\n            layer_bbox_weights = bbox_weights[:, idx_layer * self.num_proposals:(idx_layer + 1) * self.num_proposals, :]\n            layer_reg_weights = layer_bbox_weights * layer_bbox_weights.new_tensor(code_weights)\n            layer_bbox_targets = bbox_targets[:, idx_layer * self.num_proposals:(idx_layer + 1) * self.num_proposals, :]\n            layer_loss_bbox = self.loss_bbox(preds, layer_bbox_targets, layer_reg_weights, avg_factor=max(num_pos, 1))\n\n            # layer_iou = preds_dict['iou'][..., idx_layer*self.num_proposals:(idx_layer+1)*self.num_proposals].squeeze(1)\n            # layer_iou_target = ious[..., idx_layer*self.num_proposals:(idx_layer+1)*self.num_proposals]\n            # layer_loss_iou = self.loss_iou(layer_iou, layer_iou_target, layer_bbox_weights.max(-1).values, avg_factor=max(num_pos, 1))\n\n            loss_dict[f'{prefix}_loss_cls'] = layer_loss_cls\n            loss_dict[f'{prefix}_loss_bbox'] = layer_loss_bbox\n            # loss_dict[f'{prefix}_loss_iou'] = layer_loss_iou\n\n        loss_dict[f'matched_ious'] = layer_loss_cls.new_tensor(matched_ious)\n\n        return loss_dict\n\n    def get_bboxes(self, preds_dicts, img_metas, img=None, rescale=False, for_roi=False):\n        \"\"\"Generate bboxes from bbox head predictions.\n\n        Args:\n            preds_dicts (tuple[list[dict]]): Prediction results.\n\n        Returns:\n            list[list[dict]]: Decoded bbox, scores and labels for each layer & each batch\n        \"\"\"\n        rets = []\n        for layer_id, preds_dict in enumerate(preds_dicts):\n            batch_size = preds_dict[0]['heatmap'].shape[0]\n            batch_score = preds_dict[0]['heatmap'][..., -self.num_proposals:].sigmoid()\n            # if self.loss_iou.loss_weight != 0:\n            #    batch_score = torch.sqrt(batch_score * preds_dict[0]['iou'][..., -self.num_proposals:].sigmoid())\n            one_hot = F.one_hot(self.query_labels, num_classes=self.num_classes).permute(0, 2, 1)\n            batch_score = batch_score * preds_dict[0]['query_heatmap_score'] * one_hot\n\n            batch_center = preds_dict[0]['center'][..., -self.num_proposals:]\n            batch_height = preds_dict[0]['height'][..., -self.num_proposals:]\n            batch_dim = preds_dict[0]['dim'][..., -self.num_proposals:]\n            batch_rot = preds_dict[0]['rot'][..., -self.num_proposals:]\n            batch_vel = None\n            if 'vel' in preds_dict[0]:\n                batch_vel = preds_dict[0]['vel'][..., -self.num_proposals:]\n\n            temp = self.bbox_coder.decode(batch_score, batch_rot, batch_dim, batch_center, batch_height, batch_vel, filter=True)\n\n            if self.test_cfg['dataset'] == 'nuScenes':\n                self.tasks = [\n                    dict(num_class=8, class_names=[], indices=[0, 1, 2, 3, 4, 5, 6, 7], radius=-1),\n                    dict(num_class=1, class_names=['pedestrian'], indices=[8], radius=0.175),\n                    dict(num_class=1, class_names=['traffic_cone'], indices=[9], radius=0.175),\n                ]\n            elif self.test_cfg['dataset'] == 'Waymo':\n                self.tasks = [\n                    dict(num_class=1, class_names=['Car'], indices=[0], radius=0.7),\n                    dict(num_class=1, class_names=['Pedestrian'], indices=[1], radius=0.7),\n                    dict(num_class=1, class_names=['Cyclist'], indices=[2], radius=0.7),\n                ]\n\n            ret_layer = []\n            for i in range(batch_size):\n                boxes3d = temp[i]['bboxes']\n                scores = temp[i]['scores']\n                labels = temp[i]['labels']\n                ## adopt circle nms for different categories\n                if self.test_cfg['nms_type'] != None:\n                    keep_mask = torch.zeros_like(scores)\n                    for task in self.tasks:\n                        task_mask = torch.zeros_like(scores)\n                        for cls_idx in task['indices']:\n                            task_mask += labels == cls_idx\n                        task_mask = task_mask.bool()\n                        if task['radius'] > 0:\n                            if self.test_cfg['nms_type'] == 'circle':\n                                boxes_for_nms = torch.cat([boxes3d[task_mask][:, :2], scores[:, None][task_mask]], dim=1)\n                                task_keep_indices = torch.tensor(\n                                    circle_nms(\n                                        boxes_for_nms.detach().cpu().numpy(),\n                                        task['radius'],\n                                    )\n                                )\n                            else:\n                                boxes_for_nms = xywhr2xyxyr(img_metas[i]['box_type_3d'](boxes3d[task_mask][:, :7], 7).bev)\n                                top_scores = scores[task_mask]\n                                task_keep_indices = nms_gpu(\n                                    boxes_for_nms,\n                                    top_scores,\n                                    thresh=task['radius'],\n                                    pre_maxsize=self.test_cfg['pre_maxsize'],\n                                    post_max_size=self.test_cfg['post_maxsize'],\n                                )\n                        else:\n                            task_keep_indices = torch.arange(task_mask.sum())\n                        if task_keep_indices.shape[0] != 0:\n                            keep_indices = torch.where(task_mask != 0)[0][task_keep_indices]\n                            keep_mask[keep_indices] = 1\n                    keep_mask = keep_mask.bool()\n                    ret = dict(bboxes=boxes3d[keep_mask], scores=scores[keep_mask], labels=labels[keep_mask])\n                else:  # no nms\n                    ret = dict(bboxes=boxes3d, scores=scores, labels=labels)\n                ret_layer.append(ret)\n            rets.append(ret_layer)\n        assert len(rets) == 1\n        assert len(rets[0]) == 1\n        res = [[\n            img_metas[0]['box_type_3d'](rets[0][0]['bboxes'], box_dim=rets[0][0]['bboxes'].shape[-1]),\n            rets[0][0]['scores'],\n            rets[0][0]['labels'].int()\n        ]]\n        return res\n"
  },
  {
    "path": "mmdet3d/models/dense_heads/vote_head.py",
    "content": "import numpy as np\nimport torch\nfrom mmcv.runner import force_fp32\nfrom torch import nn as nn\nfrom torch.nn import functional as F\n\nfrom mmdet3d.core.post_processing import aligned_3d_nms\nfrom mmdet3d.models.builder import build_loss\nfrom mmdet3d.models.losses import chamfer_distance\nfrom mmdet3d.models.model_utils import VoteModule\nfrom mmdet3d.ops import build_sa_module, furthest_point_sample\nfrom mmdet.core import build_bbox_coder, multi_apply\nfrom mmdet.models import HEADS\nfrom .base_conv_bbox_head import BaseConvBboxHead\n\n\n@HEADS.register_module()\nclass VoteHead(nn.Module):\n    r\"\"\"Bbox head of `Votenet <https://arxiv.org/abs/1904.09664>`_.\n\n    Args:\n        num_classes (int): The number of class.\n        bbox_coder (:obj:`BaseBBoxCoder`): Bbox coder for encoding and\n            decoding boxes.\n        train_cfg (dict): Config for training.\n        test_cfg (dict): Config for testing.\n        vote_module_cfg (dict): Config of VoteModule for point-wise votes.\n        vote_aggregation_cfg (dict): Config of vote aggregation layer.\n        pred_layer_cfg (dict): Config of classfication and regression\n            prediction layers.\n        conv_cfg (dict): Config of convolution in prediction layer.\n        norm_cfg (dict): Config of BN in prediction layer.\n        objectness_loss (dict): Config of objectness loss.\n        center_loss (dict): Config of center loss.\n        dir_class_loss (dict): Config of direction classification loss.\n        dir_res_loss (dict): Config of direction residual regression loss.\n        size_class_loss (dict): Config of size classification loss.\n        size_res_loss (dict): Config of size residual regression loss.\n        semantic_loss (dict): Config of point-wise semantic segmentation loss.\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 bbox_coder,\n                 train_cfg=None,\n                 test_cfg=None,\n                 vote_module_cfg=None,\n                 vote_aggregation_cfg=None,\n                 pred_layer_cfg=None,\n                 conv_cfg=dict(type='Conv1d'),\n                 norm_cfg=dict(type='BN1d'),\n                 objectness_loss=None,\n                 center_loss=None,\n                 dir_class_loss=None,\n                 dir_res_loss=None,\n                 size_class_loss=None,\n                 size_res_loss=None,\n                 semantic_loss=None,\n                 iou_loss=None):\n        super(VoteHead, self).__init__()\n        self.num_classes = num_classes\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        self.gt_per_seed = vote_module_cfg['gt_per_seed']\n        self.num_proposal = vote_aggregation_cfg['num_point']\n\n        self.objectness_loss = build_loss(objectness_loss)\n        self.center_loss = build_loss(center_loss)\n        self.dir_res_loss = build_loss(dir_res_loss)\n        self.dir_class_loss = build_loss(dir_class_loss)\n        self.size_res_loss = build_loss(size_res_loss)\n        if size_class_loss is not None:\n            self.size_class_loss = build_loss(size_class_loss)\n        if semantic_loss is not None:\n            self.semantic_loss = build_loss(semantic_loss)\n        if iou_loss is not None:\n            self.iou_loss = build_loss(iou_loss)\n        else:\n            self.iou_loss = None\n\n        self.bbox_coder = build_bbox_coder(bbox_coder)\n        self.num_sizes = self.bbox_coder.num_sizes\n        self.num_dir_bins = self.bbox_coder.num_dir_bins\n\n        self.vote_module = VoteModule(**vote_module_cfg)\n        self.vote_aggregation = build_sa_module(vote_aggregation_cfg)\n        self.fp16_enabled = False\n\n        # Bbox classification and regression\n        self.conv_pred = BaseConvBboxHead(\n            **pred_layer_cfg,\n            num_cls_out_channels=self._get_cls_out_channels(),\n            num_reg_out_channels=self._get_reg_out_channels())\n\n    def init_weights(self):\n        \"\"\"Initialize weights of VoteHead.\"\"\"\n        pass\n\n    def _get_cls_out_channels(self):\n        \"\"\"Return the channel number of classification outputs.\"\"\"\n        # Class numbers (k) + objectness (2)\n        return self.num_classes + 2\n\n    def _get_reg_out_channels(self):\n        \"\"\"Return the channel number of regression outputs.\"\"\"\n        # Objectness scores (2), center residual (3),\n        # heading class+residual (num_dir_bins*2),\n        # size class+residual(num_sizes*4)\n        return 3 + self.num_dir_bins * 2 + self.num_sizes * 4\n\n    def _extract_input(self, feat_dict):\n        \"\"\"Extract inputs from features dictionary.\n\n        Args:\n            feat_dict (dict): Feature dict from backbone.\n\n        Returns:\n            torch.Tensor: Coordinates of input points.\n            torch.Tensor: Features of input points.\n            torch.Tensor: Indices of input points.\n        \"\"\"\n\n        # for imvotenet\n        if 'seed_points' in feat_dict and \\\n           'seed_features' in feat_dict and \\\n           'seed_indices' in feat_dict:\n            seed_points = feat_dict['seed_points']\n            seed_features = feat_dict['seed_features']\n            seed_indices = feat_dict['seed_indices']\n        # for votenet\n        else:\n            seed_points = feat_dict['fp_xyz'][-1]\n            seed_features = feat_dict['fp_features'][-1]\n            seed_indices = feat_dict['fp_indices'][-1]\n\n        return seed_points, seed_features, seed_indices\n\n    def forward(self, feat_dict, sample_mod):\n        \"\"\"Forward pass.\n\n        Note:\n            The forward of VoteHead is devided into 4 steps:\n\n                1. Generate vote_points from seed_points.\n                2. Aggregate vote_points.\n                3. Predict bbox and score.\n                4. Decode predictions.\n\n        Args:\n            feat_dict (dict): Feature dict from backbone.\n            sample_mod (str): Sample mode for vote aggregation layer.\n                valid modes are \"vote\", \"seed\", \"random\" and \"spec\".\n\n        Returns:\n            dict: Predictions of vote head.\n        \"\"\"\n        assert sample_mod in ['vote', 'seed', 'random', 'spec']\n\n        seed_points, seed_features, seed_indices = self._extract_input(\n            feat_dict)\n\n        # 1. generate vote_points from seed_points\n        vote_points, vote_features, vote_offset = self.vote_module(\n            seed_points, seed_features)\n        results = dict(\n            seed_points=seed_points,\n            seed_indices=seed_indices,\n            vote_points=vote_points,\n            vote_features=vote_features,\n            vote_offset=vote_offset)\n\n        # 2. aggregate vote_points\n        if sample_mod == 'vote':\n            # use fps in vote_aggregation\n            aggregation_inputs = dict(\n                points_xyz=vote_points, features=vote_features)\n        elif sample_mod == 'seed':\n            # FPS on seed and choose the votes corresponding to the seeds\n            sample_indices = furthest_point_sample(seed_points,\n                                                   self.num_proposal)\n            aggregation_inputs = dict(\n                points_xyz=vote_points,\n                features=vote_features,\n                indices=sample_indices)\n        elif sample_mod == 'random':\n            # Random sampling from the votes\n            batch_size, num_seed = seed_points.shape[:2]\n            sample_indices = seed_points.new_tensor(\n                torch.randint(0, num_seed, (batch_size, self.num_proposal)),\n                dtype=torch.int32)\n            aggregation_inputs = dict(\n                points_xyz=vote_points,\n                features=vote_features,\n                indices=sample_indices)\n        elif sample_mod == 'spec':\n            # Specify the new center in vote_aggregation\n            aggregation_inputs = dict(\n                points_xyz=seed_points,\n                features=seed_features,\n                target_xyz=vote_points)\n        else:\n            raise NotImplementedError(\n                f'Sample mode {sample_mod} is not supported!')\n\n        vote_aggregation_ret = self.vote_aggregation(**aggregation_inputs)\n        aggregated_points, features, aggregated_indices = vote_aggregation_ret\n\n        results['aggregated_points'] = aggregated_points\n        results['aggregated_features'] = features\n        results['aggregated_indices'] = aggregated_indices\n\n        # 3. predict bbox and score\n        cls_predictions, reg_predictions = self.conv_pred(features)\n\n        # 4. decode predictions\n        decode_res = self.bbox_coder.split_pred(cls_predictions,\n                                                reg_predictions,\n                                                aggregated_points)\n\n        results.update(decode_res)\n\n        return results\n\n    @force_fp32(apply_to=('bbox_preds', ))\n    def loss(self,\n             bbox_preds,\n             points,\n             gt_bboxes_3d,\n             gt_labels_3d,\n             pts_semantic_mask=None,\n             pts_instance_mask=None,\n             img_metas=None,\n             gt_bboxes_ignore=None,\n             ret_target=False):\n        \"\"\"Compute loss.\n\n        Args:\n            bbox_preds (dict): Predictions from forward of vote head.\n            points (list[torch.Tensor]): Input points.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \\\n                bboxes of each sample.\n            gt_labels_3d (list[torch.Tensor]): Labels of each sample.\n            pts_semantic_mask (None | list[torch.Tensor]): Point-wise\n                semantic mask.\n            pts_instance_mask (None | list[torch.Tensor]): Point-wise\n                instance mask.\n            img_metas (list[dict]): Contain pcd and img's meta info.\n            gt_bboxes_ignore (None | list[torch.Tensor]): Specify\n                which bounding.\n            ret_target (Bool): Return targets or not.\n\n        Returns:\n            dict: Losses of Votenet.\n        \"\"\"\n        targets = self.get_targets(points, gt_bboxes_3d, gt_labels_3d,\n                                   pts_semantic_mask, pts_instance_mask,\n                                   bbox_preds)\n        (vote_targets, vote_target_masks, size_class_targets, size_res_targets,\n         dir_class_targets, dir_res_targets, center_targets,\n         assigned_center_targets, mask_targets, valid_gt_masks,\n         objectness_targets, objectness_weights, box_loss_weights,\n         valid_gt_weights) = targets\n\n        # calculate vote loss\n        vote_loss = self.vote_module.get_loss(bbox_preds['seed_points'],\n                                              bbox_preds['vote_points'],\n                                              bbox_preds['seed_indices'],\n                                              vote_target_masks, vote_targets)\n\n        # calculate objectness loss\n        objectness_loss = self.objectness_loss(\n            bbox_preds['obj_scores'].transpose(2, 1),\n            objectness_targets,\n            weight=objectness_weights)\n\n        # calculate center loss\n        source2target_loss, target2source_loss = self.center_loss(\n            bbox_preds['center'],\n            center_targets,\n            src_weight=box_loss_weights,\n            dst_weight=valid_gt_weights)\n        center_loss = source2target_loss + target2source_loss\n\n        # calculate direction class loss\n        dir_class_loss = self.dir_class_loss(\n            bbox_preds['dir_class'].transpose(2, 1),\n            dir_class_targets,\n            weight=box_loss_weights)\n\n        # calculate direction residual loss\n        batch_size, proposal_num = size_class_targets.shape[:2]\n        heading_label_one_hot = vote_targets.new_zeros(\n            (batch_size, proposal_num, self.num_dir_bins))\n        heading_label_one_hot.scatter_(2, dir_class_targets.unsqueeze(-1), 1)\n        dir_res_norm = torch.sum(\n            bbox_preds['dir_res_norm'] * heading_label_one_hot, -1)\n        dir_res_loss = self.dir_res_loss(\n            dir_res_norm, dir_res_targets, weight=box_loss_weights)\n\n        # calculate size class loss\n        size_class_loss = self.size_class_loss(\n            bbox_preds['size_class'].transpose(2, 1),\n            size_class_targets,\n            weight=box_loss_weights)\n\n        # calculate size residual loss\n        one_hot_size_targets = vote_targets.new_zeros(\n            (batch_size, proposal_num, self.num_sizes))\n        one_hot_size_targets.scatter_(2, size_class_targets.unsqueeze(-1), 1)\n        one_hot_size_targets_expand = one_hot_size_targets.unsqueeze(\n            -1).repeat(1, 1, 1, 3).contiguous()\n        size_residual_norm = torch.sum(\n            bbox_preds['size_res_norm'] * one_hot_size_targets_expand, 2)\n        box_loss_weights_expand = box_loss_weights.unsqueeze(-1).repeat(\n            1, 1, 3)\n        size_res_loss = self.size_res_loss(\n            size_residual_norm,\n            size_res_targets,\n            weight=box_loss_weights_expand)\n\n        # calculate semantic loss\n        semantic_loss = self.semantic_loss(\n            bbox_preds['sem_scores'].transpose(2, 1),\n            mask_targets,\n            weight=box_loss_weights)\n\n        losses = dict(\n            vote_loss=vote_loss,\n            objectness_loss=objectness_loss,\n            semantic_loss=semantic_loss,\n            center_loss=center_loss,\n            dir_class_loss=dir_class_loss,\n            dir_res_loss=dir_res_loss,\n            size_class_loss=size_class_loss,\n            size_res_loss=size_res_loss)\n\n        if self.iou_loss:\n            corners_pred = self.bbox_coder.decode_corners(\n                bbox_preds['center'], size_residual_norm,\n                one_hot_size_targets_expand)\n            corners_target = self.bbox_coder.decode_corners(\n                assigned_center_targets, size_res_targets,\n                one_hot_size_targets_expand)\n            iou_loss = self.iou_loss(\n                corners_pred, corners_target, weight=box_loss_weights)\n            losses['iou_loss'] = iou_loss\n\n        if ret_target:\n            losses['targets'] = targets\n\n        return losses\n\n    def get_targets(self,\n                    points,\n                    gt_bboxes_3d,\n                    gt_labels_3d,\n                    pts_semantic_mask=None,\n                    pts_instance_mask=None,\n                    bbox_preds=None):\n        \"\"\"Generate targets of vote head.\n\n        Args:\n            points (list[torch.Tensor]): Points of each batch.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \\\n                bboxes of each batch.\n            gt_labels_3d (list[torch.Tensor]): Labels of each batch.\n            pts_semantic_mask (None | list[torch.Tensor]): Point-wise semantic\n                label of each batch.\n            pts_instance_mask (None | list[torch.Tensor]): Point-wise instance\n                label of each batch.\n            bbox_preds (torch.Tensor): Bounding box predictions of vote head.\n\n        Returns:\n            tuple[torch.Tensor]: Targets of vote head.\n        \"\"\"\n        # find empty example\n        valid_gt_masks = list()\n        gt_num = list()\n        for index in range(len(gt_labels_3d)):\n            if len(gt_labels_3d[index]) == 0:\n                fake_box = gt_bboxes_3d[index].tensor.new_zeros(\n                    1, gt_bboxes_3d[index].tensor.shape[-1])\n                gt_bboxes_3d[index] = gt_bboxes_3d[index].new_box(fake_box)\n                gt_labels_3d[index] = gt_labels_3d[index].new_zeros(1)\n                valid_gt_masks.append(gt_labels_3d[index].new_zeros(1))\n                gt_num.append(1)\n            else:\n                valid_gt_masks.append(gt_labels_3d[index].new_ones(\n                    gt_labels_3d[index].shape))\n                gt_num.append(gt_labels_3d[index].shape[0])\n        max_gt_num = max(gt_num)\n\n        if pts_semantic_mask is None:\n            pts_semantic_mask = [None for i in range(len(gt_labels_3d))]\n            pts_instance_mask = [None for i in range(len(gt_labels_3d))]\n\n        aggregated_points = [\n            bbox_preds['aggregated_points'][i]\n            for i in range(len(gt_labels_3d))\n        ]\n\n        (vote_targets, vote_target_masks, size_class_targets, size_res_targets,\n         dir_class_targets, dir_res_targets, center_targets,\n         assigned_center_targets, mask_targets, objectness_targets,\n         objectness_masks) = multi_apply(self.get_targets_single, points,\n                                         gt_bboxes_3d, gt_labels_3d,\n                                         pts_semantic_mask, pts_instance_mask,\n                                         aggregated_points)\n\n        # pad targets as original code of votenet.\n        for index in range(len(gt_labels_3d)):\n            pad_num = max_gt_num - gt_labels_3d[index].shape[0]\n            center_targets[index] = F.pad(center_targets[index],\n                                          (0, 0, 0, pad_num))\n            valid_gt_masks[index] = F.pad(valid_gt_masks[index], (0, pad_num))\n\n        vote_targets = torch.stack(vote_targets)\n        vote_target_masks = torch.stack(vote_target_masks)\n        center_targets = torch.stack(center_targets)\n        valid_gt_masks = torch.stack(valid_gt_masks)\n\n        assigned_center_targets = torch.stack(assigned_center_targets)\n        objectness_targets = torch.stack(objectness_targets)\n        objectness_weights = torch.stack(objectness_masks)\n        objectness_weights /= (torch.sum(objectness_weights) + 1e-6)\n        box_loss_weights = objectness_targets.float() / (\n            torch.sum(objectness_targets).float() + 1e-6)\n        valid_gt_weights = valid_gt_masks.float() / (\n            torch.sum(valid_gt_masks.float()) + 1e-6)\n        dir_class_targets = torch.stack(dir_class_targets)\n        dir_res_targets = torch.stack(dir_res_targets)\n        size_class_targets = torch.stack(size_class_targets)\n        size_res_targets = torch.stack(size_res_targets)\n        mask_targets = torch.stack(mask_targets)\n\n        return (vote_targets, vote_target_masks, size_class_targets,\n                size_res_targets, dir_class_targets, dir_res_targets,\n                center_targets, assigned_center_targets, mask_targets,\n                valid_gt_masks, objectness_targets, objectness_weights,\n                box_loss_weights, valid_gt_weights)\n\n    def get_targets_single(self,\n                           points,\n                           gt_bboxes_3d,\n                           gt_labels_3d,\n                           pts_semantic_mask=None,\n                           pts_instance_mask=None,\n                           aggregated_points=None):\n        \"\"\"Generate targets of vote head for single batch.\n\n        Args:\n            points (torch.Tensor): Points of each batch.\n            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth \\\n                boxes of each batch.\n            gt_labels_3d (torch.Tensor): Labels of each batch.\n            pts_semantic_mask (None | torch.Tensor): Point-wise semantic\n                label of each batch.\n            pts_instance_mask (None | torch.Tensor): Point-wise instance\n                label of each batch.\n            aggregated_points (torch.Tensor): Aggregated points from\n                vote aggregation layer.\n\n        Returns:\n            tuple[torch.Tensor]: Targets of vote head.\n        \"\"\"\n        assert self.bbox_coder.with_rot or pts_semantic_mask is not None\n\n        gt_bboxes_3d = gt_bboxes_3d.to(points.device)\n\n        # generate votes target\n        num_points = points.shape[0]\n        if self.bbox_coder.with_rot:\n            vote_targets = points.new_zeros([num_points, 3 * self.gt_per_seed])\n            vote_target_masks = points.new_zeros([num_points],\n                                                 dtype=torch.long)\n            vote_target_idx = points.new_zeros([num_points], dtype=torch.long)\n            box_indices_all = gt_bboxes_3d.points_in_boxes(points)\n            for i in range(gt_labels_3d.shape[0]):\n                box_indices = box_indices_all[:, i]\n                indices = torch.nonzero(\n                    box_indices, as_tuple=False).squeeze(-1)\n                selected_points = points[indices]\n                vote_target_masks[indices] = 1\n                vote_targets_tmp = vote_targets[indices]\n                votes = gt_bboxes_3d.gravity_center[i].unsqueeze(\n                    0) - selected_points[:, :3]\n\n                for j in range(self.gt_per_seed):\n                    column_indices = torch.nonzero(\n                        vote_target_idx[indices] == j,\n                        as_tuple=False).squeeze(-1)\n                    vote_targets_tmp[column_indices,\n                                     int(j * 3):int(j * 3 +\n                                                    3)] = votes[column_indices]\n                    if j == 0:\n                        vote_targets_tmp[column_indices] = votes[\n                            column_indices].repeat(1, self.gt_per_seed)\n\n                vote_targets[indices] = vote_targets_tmp\n                vote_target_idx[indices] = torch.clamp(\n                    vote_target_idx[indices] + 1, max=2)\n        elif pts_semantic_mask is not None:\n            vote_targets = points.new_zeros([num_points, 3])\n            vote_target_masks = points.new_zeros([num_points],\n                                                 dtype=torch.long)\n\n            for i in torch.unique(pts_instance_mask):\n                indices = torch.nonzero(\n                    pts_instance_mask == i, as_tuple=False).squeeze(-1)\n                if pts_semantic_mask[indices[0]] < self.num_classes:\n                    selected_points = points[indices, :3]\n                    center = 0.5 * (\n                        selected_points.min(0)[0] + selected_points.max(0)[0])\n                    vote_targets[indices, :] = center - selected_points\n                    vote_target_masks[indices] = 1\n            vote_targets = vote_targets.repeat((1, self.gt_per_seed))\n        else:\n            raise NotImplementedError\n\n        (center_targets, size_class_targets, size_res_targets,\n         dir_class_targets,\n         dir_res_targets) = self.bbox_coder.encode(gt_bboxes_3d, gt_labels_3d)\n\n        proposal_num = aggregated_points.shape[0]\n        distance1, _, assignment, _ = chamfer_distance(\n            aggregated_points.unsqueeze(0),\n            center_targets.unsqueeze(0),\n            reduction='none')\n        assignment = assignment.squeeze(0)\n        euclidean_distance1 = torch.sqrt(distance1.squeeze(0) + 1e-6)\n\n        objectness_targets = points.new_zeros((proposal_num), dtype=torch.long)\n        objectness_targets[\n            euclidean_distance1 < self.train_cfg['pos_distance_thr']] = 1\n\n        objectness_masks = points.new_zeros((proposal_num))\n        objectness_masks[\n            euclidean_distance1 < self.train_cfg['pos_distance_thr']] = 1.0\n        objectness_masks[\n            euclidean_distance1 > self.train_cfg['neg_distance_thr']] = 1.0\n\n        dir_class_targets = dir_class_targets[assignment]\n        dir_res_targets = dir_res_targets[assignment]\n        dir_res_targets /= (np.pi / self.num_dir_bins)\n        size_class_targets = size_class_targets[assignment]\n        size_res_targets = size_res_targets[assignment]\n\n        one_hot_size_targets = gt_bboxes_3d.tensor.new_zeros(\n            (proposal_num, self.num_sizes))\n        one_hot_size_targets.scatter_(1, size_class_targets.unsqueeze(-1), 1)\n        one_hot_size_targets = one_hot_size_targets.unsqueeze(-1).repeat(\n            1, 1, 3)\n        mean_sizes = size_res_targets.new_tensor(\n            self.bbox_coder.mean_sizes).unsqueeze(0)\n        pos_mean_sizes = torch.sum(one_hot_size_targets * mean_sizes, 1)\n        size_res_targets /= pos_mean_sizes\n\n        mask_targets = gt_labels_3d[assignment]\n        assigned_center_targets = center_targets[assignment]\n\n        return (vote_targets, vote_target_masks, size_class_targets,\n                size_res_targets, dir_class_targets,\n                dir_res_targets, center_targets, assigned_center_targets,\n                mask_targets.long(), objectness_targets, objectness_masks)\n\n    def get_bboxes(self,\n                   points,\n                   bbox_preds,\n                   input_metas,\n                   rescale=False,\n                   use_nms=True):\n        \"\"\"Generate bboxes from vote head predictions.\n\n        Args:\n            points (torch.Tensor): Input points.\n            bbox_preds (dict): Predictions from vote head.\n            input_metas (list[dict]): Point cloud and image's meta info.\n            rescale (bool): Whether to rescale bboxes.\n            use_nms (bool): Whether to apply NMS, skip nms postprocessing\n                while using vote head in rpn stage.\n\n        Returns:\n            list[tuple[torch.Tensor]]: Bounding boxes, scores and labels.\n        \"\"\"\n        # decode boxes\n        obj_scores = F.softmax(bbox_preds['obj_scores'], dim=-1)[..., -1]\n        sem_scores = F.softmax(bbox_preds['sem_scores'], dim=-1)\n        bbox3d = self.bbox_coder.decode(bbox_preds)\n\n        if use_nms:\n            batch_size = bbox3d.shape[0]\n            results = list()\n            for b in range(batch_size):\n                bbox_selected, score_selected, labels = \\\n                    self.multiclass_nms_single(obj_scores[b], sem_scores[b],\n                                               bbox3d[b], points[b, ..., :3],\n                                               input_metas[b])\n                bbox = input_metas[b]['box_type_3d'](\n                    bbox_selected,\n                    box_dim=bbox_selected.shape[-1],\n                    with_yaw=self.bbox_coder.with_rot)\n                results.append((bbox, score_selected, labels))\n\n            return results\n        else:\n            return bbox3d\n\n    def multiclass_nms_single(self, obj_scores, sem_scores, bbox, points,\n                              input_meta):\n        \"\"\"Multi-class nms in single batch.\n\n        Args:\n            obj_scores (torch.Tensor): Objectness score of bounding boxes.\n            sem_scores (torch.Tensor): semantic class score of bounding boxes.\n            bbox (torch.Tensor): Predicted bounding boxes.\n            points (torch.Tensor): Input points.\n            input_meta (dict): Point cloud and image's meta info.\n\n        Returns:\n            tuple[torch.Tensor]: Bounding boxes, scores and labels.\n        \"\"\"\n        bbox = input_meta['box_type_3d'](\n            bbox,\n            box_dim=bbox.shape[-1],\n            with_yaw=self.bbox_coder.with_rot,\n            origin=(0.5, 0.5, 0.5))\n        box_indices = bbox.points_in_boxes(points)\n\n        corner3d = bbox.corners\n        minmax_box3d = corner3d.new(torch.Size((corner3d.shape[0], 6)))\n        minmax_box3d[:, :3] = torch.min(corner3d, dim=1)[0]\n        minmax_box3d[:, 3:] = torch.max(corner3d, dim=1)[0]\n\n        nonempty_box_mask = box_indices.T.sum(1) > 5\n\n        bbox_classes = torch.argmax(sem_scores, -1)\n        nms_selected = aligned_3d_nms(minmax_box3d[nonempty_box_mask],\n                                      obj_scores[nonempty_box_mask],\n                                      bbox_classes[nonempty_box_mask],\n                                      self.test_cfg.nms_thr)\n\n        # filter empty boxes and boxes with low score\n        scores_mask = (obj_scores > self.test_cfg.score_thr)\n        nonempty_box_inds = torch.nonzero(\n            nonempty_box_mask, as_tuple=False).flatten()\n        nonempty_mask = torch.zeros_like(bbox_classes).scatter(\n            0, nonempty_box_inds[nms_selected], 1)\n        selected = (nonempty_mask.bool() & scores_mask.bool())\n\n        if self.test_cfg.per_class_proposal:\n            bbox_selected, score_selected, labels = [], [], []\n            for k in range(sem_scores.shape[-1]):\n                bbox_selected.append(bbox[selected].tensor)\n                score_selected.append(obj_scores[selected] *\n                                      sem_scores[selected][:, k])\n                labels.append(\n                    torch.zeros_like(bbox_classes[selected]).fill_(k))\n            bbox_selected = torch.cat(bbox_selected, 0)\n            score_selected = torch.cat(score_selected, 0)\n            labels = torch.cat(labels, 0)\n        else:\n            bbox_selected = bbox[selected].tensor\n            score_selected = obj_scores[selected]\n            labels = bbox_classes[selected]\n\n        return bbox_selected, score_selected, labels\n"
  },
  {
    "path": "mmdet3d/models/detectors/__init__.py",
    "content": "from .base import Base3DDetector\nfrom .centerpoint import CenterPoint\nfrom .dynamic_voxelnet import DynamicVoxelNet\nfrom .h3dnet import H3DNet\nfrom .imvotenet import ImVoteNet\nfrom .mvx_faster_rcnn import DynamicMVXFasterRCNN, MVXFasterRCNN\nfrom .mvx_two_stage import MVXTwoStageDetector\nfrom .parta2 import PartA2\nfrom .ssd3dnet import SSD3DNet\nfrom .votenet import VoteNet\nfrom .voxelnet import VoxelNet\nfrom .transfusion import TransFusionDetector\nfrom .sparsefusion import SparseFusionDetector\n\n__all__ = [\n    'Base3DDetector',\n    'VoxelNet',\n    'DynamicVoxelNet',\n    'MVXTwoStageDetector',\n    'DynamicMVXFasterRCNN',\n    'MVXFasterRCNN',\n    'PartA2',\n    'VoteNet',\n    'H3DNet',\n    'CenterPoint',\n    'SSD3DNet',\n    'ImVoteNet',\n    'TransFusionDetector',\n    'SparseFusionDetector',\n]\n"
  },
  {
    "path": "mmdet3d/models/detectors/base.py",
    "content": "import mmcv\nimport torch\nfrom mmcv.parallel import DataContainer as DC\nfrom mmcv.runner import auto_fp16\nfrom os import path as osp\n\nfrom mmdet3d.core import Box3DMode, Coord3DMode, show_result\nfrom mmdet.models.detectors import BaseDetector\n\n\nclass Base3DDetector(BaseDetector):\n    \"\"\"Base class for detectors.\"\"\"\n\n    def forward_test(self, points, img_metas, img=None, **kwargs):\n        \"\"\"\n        Args:\n            points (list[torch.Tensor]): the outer list indicates test-time\n                augmentations and inner torch.Tensor should have a shape NxC,\n                which contains all points in the batch.\n            img_metas (list[list[dict]]): the outer list indicates test-time\n                augs (multiscale, flip, etc.) and the inner list indicates\n                images in a batch\n            img (list[torch.Tensor], optional): the outer\n                list indicates test-time augmentations and inner\n                torch.Tensor should have a shape NxCxHxW, which contains\n                all images in the batch. Defaults to None.\n        \"\"\"\n        for var, name in [(points, 'points'), (img_metas, 'img_metas')]:\n            if not isinstance(var, list):\n                raise TypeError('{} must be a list, but got {}'.format(\n                    name, type(var)))\n\n        num_augs = len(points)\n        if num_augs != len(img_metas):\n            raise ValueError(\n                'num of augmentations ({}) != num of image meta ({})'.format(\n                    len(points), len(img_metas)))\n\n        if num_augs == 1:\n            img = [img] if img is None else img\n            return self.simple_test(points[0], img_metas[0], img[0], **kwargs)\n        else:\n            return self.aug_test(points, img_metas, img, **kwargs)\n\n    @auto_fp16(apply_to=('img', 'points'))\n    def forward(self, return_loss=True, **kwargs):\n        \"\"\"Calls either forward_train or forward_test depending on whether\n        return_loss=True.\n\n        Note this setting will change the expected inputs. When\n        `return_loss=True`, img and img_metas are single-nested (i.e.\n        torch.Tensor and list[dict]), and when `resturn_loss=False`, img and\n        img_metas should be double nested (i.e.  list[torch.Tensor],\n        list[list[dict]]), with the outer list indicating test time\n        augmentations.\n        \"\"\"\n        if return_loss:\n            return self.forward_train(**kwargs)\n        else:\n            return self.forward_test(**kwargs)\n\n    def show_results(self, data, result, out_dir):\n        \"\"\"Results visualization.\n\n        Args:\n            data (list[dict]): Input points and the information of the sample.\n            result (list[dict]): Prediction results.\n            out_dir (str): Output directory of visualization result.\n        \"\"\"\n        for batch_id in range(len(result)):\n            if isinstance(data['points'][0], DC):\n                points = data['points'][0]._data[0][batch_id].numpy()\n            elif mmcv.is_list_of(data['points'][0], torch.Tensor):\n                points = data['points'][0][batch_id]\n            else:\n                ValueError(f\"Unsupported data type {type(data['points'][0])} \"\n                           f'for visualization!')\n            if isinstance(data['img_metas'][0], DC):\n                pts_filename = data['img_metas'][0]._data[0][batch_id][\n                    'pts_filename']\n                box_mode_3d = data['img_metas'][0]._data[0][batch_id][\n                    'box_mode_3d']\n            elif mmcv.is_list_of(data['img_metas'][0], dict):\n                pts_filename = data['img_metas'][0][batch_id]['pts_filename']\n                box_mode_3d = data['img_metas'][0][batch_id]['box_mode_3d']\n            else:\n                ValueError(\n                    f\"Unsupported data type {type(data['img_metas'][0])} \"\n                    f'for visualization!')\n            file_name = osp.split(pts_filename)[-1].split('.')[0]\n\n            assert out_dir is not None, 'Expect out_dir, got none.'\n\n            pred_bboxes = result[batch_id]['boxes_3d']\n\n            # for now we convert points and bbox into depth mode\n            if (box_mode_3d == Box3DMode.CAM) or (box_mode_3d\n                                                  == Box3DMode.LIDAR):\n                points = Coord3DMode.convert_point(points, Coord3DMode.LIDAR,\n                                                   Coord3DMode.DEPTH)\n                pred_bboxes = Box3DMode.convert(pred_bboxes, box_mode_3d,\n                                                Box3DMode.DEPTH)\n            elif box_mode_3d != Box3DMode.DEPTH:\n                ValueError(\n                    f'Unsupported box_mode_3d {box_mode_3d} for convertion!')\n            pred_bboxes = pred_bboxes.tensor.cpu().numpy()\n            show_result(points, None, pred_bboxes, out_dir, file_name)\n"
  },
  {
    "path": "mmdet3d/models/detectors/centerpoint.py",
    "content": "import torch\n\nfrom mmdet3d.core import bbox3d2result, merge_aug_bboxes_3d\nfrom mmdet.models import DETECTORS\nfrom .mvx_two_stage import MVXTwoStageDetector\n\n\n@DETECTORS.register_module()\nclass CenterPoint(MVXTwoStageDetector):\n    \"\"\"Base class of Multi-modality VoxelNet.\"\"\"\n\n    def __init__(self,\n                 pts_voxel_layer=None,\n                 pts_voxel_encoder=None,\n                 pts_middle_encoder=None,\n                 pts_fusion_layer=None,\n                 img_backbone=None,\n                 pts_backbone=None,\n                 img_neck=None,\n                 pts_neck=None,\n                 pts_bbox_head=None,\n                 img_roi_head=None,\n                 img_rpn_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None):\n        super(CenterPoint,\n              self).__init__(pts_voxel_layer, pts_voxel_encoder,\n                             pts_middle_encoder, pts_fusion_layer,\n                             img_backbone, pts_backbone, img_neck, pts_neck,\n                             pts_bbox_head, img_roi_head, img_rpn_head,\n                             train_cfg, test_cfg, pretrained)\n\n    def extract_pts_feat(self, pts, img_feats, img_metas):\n        \"\"\"Extract features of points.\"\"\"\n        if not self.with_pts_bbox:\n            return None\n        voxels, num_points, coors = self.voxelize(pts)\n\n        voxel_features = self.pts_voxel_encoder(voxels, num_points, coors)\n        batch_size = coors[-1, 0] + 1\n        x = self.pts_middle_encoder(voxel_features, coors, batch_size)\n        x = self.pts_backbone(x)\n        if self.with_pts_neck:\n            x = self.pts_neck(x)\n        return x\n\n    def forward_pts_train(self,\n                          pts_feats,\n                          gt_bboxes_3d,\n                          gt_labels_3d,\n                          img_metas,\n                          gt_bboxes_ignore=None):\n        \"\"\"Forward function for point cloud branch.\n\n        Args:\n            pts_feats (list[torch.Tensor]): Features of point cloud branch\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                boxes for each sample.\n            gt_labels_3d (list[torch.Tensor]): Ground truth labels for\n                boxes of each sampole\n            img_metas (list[dict]): Meta information of samples.\n            gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth\n                boxes to be ignored. Defaults to None.\n\n        Returns:\n            dict: Losses of each branch.\n        \"\"\"\n        outs = self.pts_bbox_head(pts_feats)\n        loss_inputs = [gt_bboxes_3d, gt_labels_3d, outs]\n        losses = self.pts_bbox_head.loss(*loss_inputs)\n        return losses\n\n    def simple_test_pts(self, x, img_metas, rescale=False):\n        \"\"\"Test function of point cloud branch.\"\"\"\n        outs = self.pts_bbox_head(x)\n        bbox_list = self.pts_bbox_head.get_bboxes(\n            outs, img_metas, rescale=rescale)\n        bbox_results = [\n            bbox3d2result(bboxes, scores, labels)\n            for bboxes, scores, labels in bbox_list\n        ]\n        return bbox_results\n\n    def aug_test_pts(self, feats, img_metas, rescale=False):\n        \"\"\"Test function of point cloud branch with augmentaiton.\n\n        The function implementation process is as follows:\n\n            - step 1: map features back for double-flip augmentation.\n            - step 2: merge all features and generate boxes.\n            - step 3: map boxes back for scale augmentation.\n            - step 4: merge results.\n\n        Args:\n            feats (list[torch.Tensor]): Feature of point cloud.\n            img_metas (list[dict]): Meta information of samples.\n            rescale (bool): Whether to rescale bboxes. Default: False.\n\n        Returns:\n            dict: Returned bboxes consists of the following keys:\n\n                - boxes_3d (:obj:`LiDARInstance3DBoxes`): Predicted bboxes.\n                - scores_3d (torch.Tensor): Scores of predicted boxes.\n                - labels_3d (torch.Tensor): Labels of predicted boxes.\n        \"\"\"\n        # only support aug_test for one sample\n        outs_list = []\n        for x, img_meta in zip(feats, img_metas):\n            outs = self.pts_bbox_head(x)\n            # merge augmented outputs before decoding bboxes\n            for task_id, out in enumerate(outs):\n                for key in out[0].keys():\n                    if img_meta[0]['pcd_horizontal_flip']:\n                        outs[task_id][0][key] = torch.flip(\n                            outs[task_id][0][key], dims=[2])\n                        if key == 'reg':\n                            outs[task_id][0][key][:, 1, ...] = 1 - outs[\n                                task_id][0][key][:, 1, ...]\n                        elif key == 'rot':\n                            outs[task_id][0][\n                                key][:, 1,\n                                     ...] = -outs[task_id][0][key][:, 1, ...]\n                        elif key == 'vel':\n                            outs[task_id][0][\n                                key][:, 1,\n                                     ...] = -outs[task_id][0][key][:, 1, ...]\n                    if img_meta[0]['pcd_vertical_flip']:\n                        outs[task_id][0][key] = torch.flip(\n                            outs[task_id][0][key], dims=[3])\n                        if key == 'reg':\n                            outs[task_id][0][key][:, 0, ...] = 1 - outs[\n                                task_id][0][key][:, 0, ...]\n                        elif key == 'rot':\n                            outs[task_id][0][\n                                key][:, 0,\n                                     ...] = -outs[task_id][0][key][:, 0, ...]\n                        elif key == 'vel':\n                            outs[task_id][0][\n                                key][:, 0,\n                                     ...] = -outs[task_id][0][key][:, 0, ...]\n\n            outs_list.append(outs)\n\n        preds_dicts = dict()\n        scale_img_metas = []\n\n        # concat outputs sharing the same pcd_scale_factor\n        for i, (img_meta, outs) in enumerate(zip(img_metas, outs_list)):\n            pcd_scale_factor = img_meta[0]['pcd_scale_factor']\n            if pcd_scale_factor not in preds_dicts.keys():\n                preds_dicts[pcd_scale_factor] = outs\n                scale_img_metas.append(img_meta)\n            else:\n                for task_id, out in enumerate(outs):\n                    for key in out[0].keys():\n                        preds_dicts[pcd_scale_factor][task_id][0][key] += out[\n                            0][key]\n\n        aug_bboxes = []\n\n        for pcd_scale_factor, preds_dict in preds_dicts.items():\n            for task_id, pred_dict in enumerate(preds_dict):\n                # merge outputs with different flips before decoding bboxes\n                for key in pred_dict[0].keys():\n                    preds_dict[task_id][0][key] /= len(outs_list) / len(\n                        preds_dicts.keys())\n            bbox_list = self.pts_bbox_head.get_bboxes(\n                preds_dict, img_metas[0], rescale=rescale)\n            bbox_list = [\n                dict(boxes_3d=bboxes, scores_3d=scores, labels_3d=labels)\n                for bboxes, scores, labels in bbox_list\n            ]\n            aug_bboxes.append(bbox_list[0])\n\n        if len(preds_dicts.keys()) > 1:\n            # merge outputs with different scales after decoding bboxes\n            merged_bboxes = merge_aug_bboxes_3d(aug_bboxes, scale_img_metas,\n                                                self.pts_bbox_head.test_cfg)\n            return merged_bboxes\n        else:\n            for key in bbox_list[0].keys():\n                bbox_list[0][key] = bbox_list[0][key].to('cpu')\n            return bbox_list[0]\n\n    def aug_test(self, points, img_metas, imgs=None, rescale=False):\n        \"\"\"Test function with augmentaiton.\"\"\"\n        img_feats, pts_feats = self.extract_feats(points, img_metas, imgs)\n        bbox_list = dict()\n        if pts_feats and self.with_pts_bbox:\n            pts_bbox = self.aug_test_pts(pts_feats, img_metas, rescale)\n            bbox_list.update(pts_bbox=pts_bbox)\n        return [bbox_list]\n"
  },
  {
    "path": "mmdet3d/models/detectors/dynamic_voxelnet.py",
    "content": "import torch\nfrom mmcv.runner import force_fp32\nfrom torch.nn import functional as F\n\nfrom mmdet.models import DETECTORS\nfrom .voxelnet import VoxelNet\n\n\n@DETECTORS.register_module()\nclass DynamicVoxelNet(VoxelNet):\n    r\"\"\"VoxelNet using `dynamic voxelization <https://arxiv.org/abs/1910.06528>`_.\n    \"\"\"\n\n    def __init__(self,\n                 voxel_layer,\n                 voxel_encoder,\n                 middle_encoder,\n                 backbone,\n                 neck=None,\n                 bbox_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None):\n        super(DynamicVoxelNet, self).__init__(\n            voxel_layer=voxel_layer,\n            voxel_encoder=voxel_encoder,\n            middle_encoder=middle_encoder,\n            backbone=backbone,\n            neck=neck,\n            bbox_head=bbox_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            pretrained=pretrained,\n        )\n\n    def extract_feat(self, points, img_metas):\n        \"\"\"Extract features from points.\"\"\"\n        voxels, coors = self.voxelize(points)\n        voxel_features, feature_coors = self.voxel_encoder(voxels, coors)\n        batch_size = coors[-1, 0].item() + 1\n        x = self.middle_encoder(voxel_features, feature_coors, batch_size)\n        x = self.backbone(x)\n        if self.with_neck:\n            x = self.neck(x)\n        return x\n\n    @torch.no_grad()\n    @force_fp32()\n    def voxelize(self, points):\n        \"\"\"Apply dynamic voxelization to points.\n\n        Args:\n            points (list[torch.Tensor]): Points of each sample.\n\n        Returns:\n            tuple[torch.Tensor]: Concatenated points and coordinates.\n        \"\"\"\n        coors = []\n        # dynamic voxelization only provide a coors mapping\n        for res in points:\n            res_coors = self.voxel_layer(res)\n            coors.append(res_coors)\n        points = torch.cat(points, dim=0)\n        coors_batch = []\n        for i, coor in enumerate(coors):\n            coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)\n            coors_batch.append(coor_pad)\n        coors_batch = torch.cat(coors_batch, dim=0)\n        return points, coors_batch\n"
  },
  {
    "path": "mmdet3d/models/detectors/h3dnet.py",
    "content": "import torch\n\nfrom mmdet3d.core import merge_aug_bboxes_3d\nfrom mmdet.models import DETECTORS\nfrom .two_stage import TwoStage3DDetector\n\n\n@DETECTORS.register_module()\nclass H3DNet(TwoStage3DDetector):\n    r\"\"\"H3DNet model.\n\n    Please refer to the `paper <https://arxiv.org/abs/2006.05682>`_\n    \"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck=None,\n                 rpn_head=None,\n                 roi_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None):\n        super(H3DNet, self).__init__(\n            backbone=backbone,\n            neck=neck,\n            rpn_head=rpn_head,\n            roi_head=roi_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            pretrained=pretrained)\n\n    def forward_train(self,\n                      points,\n                      img_metas,\n                      gt_bboxes_3d,\n                      gt_labels_3d,\n                      pts_semantic_mask=None,\n                      pts_instance_mask=None,\n                      gt_bboxes_ignore=None):\n        \"\"\"Forward of training.\n\n        Args:\n            points (list[torch.Tensor]): Points of each batch.\n            img_metas (list): Image metas.\n            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch.\n            gt_labels_3d (list[torch.Tensor]): gt class labels of each batch.\n            pts_semantic_mask (None | list[torch.Tensor]): point-wise semantic\n                label of each batch.\n            pts_instance_mask (None | list[torch.Tensor]): point-wise instance\n                label of each batch.\n            gt_bboxes_ignore (None | list[torch.Tensor]): Specify\n                which bounding.\n\n        Returns:\n            dict: Losses.\n        \"\"\"\n        points_cat = torch.stack(points)\n\n        feats_dict = self.extract_feat(points_cat)\n        feats_dict['fp_xyz'] = [feats_dict['fp_xyz_net0'][-1]]\n        feats_dict['fp_features'] = [feats_dict['hd_feature']]\n        feats_dict['fp_indices'] = [feats_dict['fp_indices_net0'][-1]]\n\n        losses = dict()\n        if self.with_rpn:\n            rpn_outs = self.rpn_head(feats_dict, self.train_cfg.rpn.sample_mod)\n            feats_dict.update(rpn_outs)\n\n            rpn_loss_inputs = (points, gt_bboxes_3d, gt_labels_3d,\n                               pts_semantic_mask, pts_instance_mask, img_metas)\n            rpn_losses = self.rpn_head.loss(\n                rpn_outs,\n                *rpn_loss_inputs,\n                gt_bboxes_ignore=gt_bboxes_ignore,\n                ret_target=True)\n            feats_dict['targets'] = rpn_losses.pop('targets')\n            losses.update(rpn_losses)\n\n            # Generate rpn proposals\n            proposal_cfg = self.train_cfg.get('rpn_proposal',\n                                              self.test_cfg.rpn)\n            proposal_inputs = (points, rpn_outs, img_metas)\n            proposal_list = self.rpn_head.get_bboxes(\n                *proposal_inputs, use_nms=proposal_cfg.use_nms)\n            feats_dict['proposal_list'] = proposal_list\n        else:\n            raise NotImplementedError\n\n        roi_losses = self.roi_head.forward_train(feats_dict, img_metas, points,\n                                                 gt_bboxes_3d, gt_labels_3d,\n                                                 pts_semantic_mask,\n                                                 pts_instance_mask,\n                                                 gt_bboxes_ignore)\n        losses.update(roi_losses)\n\n        return losses\n\n    def simple_test(self, points, img_metas, imgs=None, rescale=False):\n        \"\"\"Forward of testing.\n\n        Args:\n            points (list[torch.Tensor]): Points of each sample.\n            img_metas (list): Image metas.\n            rescale (bool): Whether to rescale results.\n\n        Returns:\n            list: Predicted 3d boxes.\n        \"\"\"\n        points_cat = torch.stack(points)\n\n        feats_dict = self.extract_feat(points_cat)\n        feats_dict['fp_xyz'] = [feats_dict['fp_xyz_net0'][-1]]\n        feats_dict['fp_features'] = [feats_dict['hd_feature']]\n        feats_dict['fp_indices'] = [feats_dict['fp_indices_net0'][-1]]\n\n        if self.with_rpn:\n            proposal_cfg = self.test_cfg.rpn\n            rpn_outs = self.rpn_head(feats_dict, proposal_cfg.sample_mod)\n            feats_dict.update(rpn_outs)\n            # Generate rpn proposals\n            proposal_list = self.rpn_head.get_bboxes(\n                points, rpn_outs, img_metas, use_nms=proposal_cfg.use_nms)\n            feats_dict['proposal_list'] = proposal_list\n        else:\n            raise NotImplementedError\n\n        return self.roi_head.simple_test(\n            feats_dict, img_metas, points_cat, rescale=rescale)\n\n    def aug_test(self, points, img_metas, imgs=None, rescale=False):\n        \"\"\"Test with augmentation.\"\"\"\n        points_cat = [torch.stack(pts) for pts in points]\n        feats_dict = self.extract_feats(points_cat, img_metas)\n        for feat_dict in feats_dict:\n            feat_dict['fp_xyz'] = [feat_dict['fp_xyz_net0'][-1]]\n            feat_dict['fp_features'] = [feat_dict['hd_feature']]\n            feat_dict['fp_indices'] = [feat_dict['fp_indices_net0'][-1]]\n\n        # only support aug_test for one sample\n        aug_bboxes = []\n        for feat_dict, pts_cat, img_meta in zip(feats_dict, points_cat,\n                                                img_metas):\n            if self.with_rpn:\n                proposal_cfg = self.test_cfg.rpn\n                rpn_outs = self.rpn_head(feat_dict, proposal_cfg.sample_mod)\n                feat_dict.update(rpn_outs)\n                # Generate rpn proposals\n                proposal_list = self.rpn_head.get_bboxes(\n                    points, rpn_outs, img_metas, use_nms=proposal_cfg.use_nms)\n                feat_dict['proposal_list'] = proposal_list\n            else:\n                raise NotImplementedError\n\n            bbox_results = self.roi_head.simple_test(\n                feat_dict,\n                self.test_cfg.rcnn.sample_mod,\n                img_meta,\n                pts_cat,\n                rescale=rescale)\n            aug_bboxes.append(bbox_results)\n\n        # after merging, bboxes will be rescaled to the original image size\n        merged_bboxes = merge_aug_bboxes_3d(aug_bboxes, img_metas,\n                                            self.bbox_head.test_cfg)\n\n        return [merged_bboxes]\n\n    def extract_feats(self, points, img_metas):\n        \"\"\"Extract features of multiple samples.\"\"\"\n        return [\n            self.extract_feat(pts, img_meta)\n            for pts, img_meta in zip(points, img_metas)\n        ]\n"
  },
  {
    "path": "mmdet3d/models/detectors/imvotenet.py",
    "content": "import numpy as np\nimport torch\nfrom torch import nn as nn\n\nfrom mmdet3d.core import bbox3d2result, merge_aug_bboxes_3d\nfrom mmdet3d.models.utils import MLP\nfrom mmdet.models import DETECTORS\nfrom .. import builder\nfrom .base import Base3DDetector\n\n\ndef sample_valid_seeds(mask, num_sampled_seed=1024):\n    \"\"\"Randomly sample seeds from all imvotes.\n\n    Args:\n        mask (torch.Tensor): Bool tensor in shape (\n            seed_num*max_imvote_per_pixel), indicates\n            whether this imvote corresponds to a 2D bbox.\n        num_sampled_seed (int): How many to sample from all imvotes.\n\n    Returns:\n        torch.Tensor: Indices with shape (num_sampled_seed).\n    \"\"\"\n    device = mask.device\n    batch_size = mask.shape[0]\n    sample_inds = mask.new_zeros((batch_size, num_sampled_seed),\n                                 dtype=torch.int64)\n    for bidx in range(batch_size):\n        # return index of non zero elements\n        valid_inds = torch.nonzero(mask[bidx, :]).squeeze(-1)\n        if len(valid_inds) < num_sampled_seed:\n            # compute set t1 - t2\n            t1 = torch.arange(num_sampled_seed, device=device)\n            t2 = valid_inds % num_sampled_seed\n            combined = torch.cat((t1, t2))\n            uniques, counts = combined.unique(return_counts=True)\n            difference = uniques[counts == 1]\n\n            rand_inds = torch.randperm(\n                len(difference),\n                device=device)[:num_sampled_seed - len(valid_inds)]\n            cur_sample_inds = difference[rand_inds]\n            cur_sample_inds = torch.cat((valid_inds, cur_sample_inds))\n        else:\n            rand_inds = torch.randperm(\n                len(valid_inds), device=device)[:num_sampled_seed]\n            cur_sample_inds = valid_inds[rand_inds]\n        sample_inds[bidx, :] = cur_sample_inds\n    return sample_inds\n\n\n@DETECTORS.register_module()\nclass ImVoteNet(Base3DDetector):\n    r\"\"\"`ImVoteNet <https://arxiv.org/abs/2001.10692>`_ for 3D detection.\"\"\"\n\n    def __init__(self,\n                 pts_backbone=None,\n                 pts_bbox_heads=None,\n                 pts_neck=None,\n                 img_backbone=None,\n                 img_neck=None,\n                 img_roi_head=None,\n                 img_rpn_head=None,\n                 img_mlp=None,\n                 freeze_img_branch=False,\n                 fusion_layer=None,\n                 num_sampled_seed=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None):\n\n        super(ImVoteNet, self).__init__()\n\n        # point branch\n        if pts_backbone is not None:\n            self.pts_backbone = builder.build_backbone(pts_backbone)\n        if pts_neck is not None:\n            self.pts_neck = builder.build_neck(pts_neck)\n        if pts_bbox_heads is not None:\n            pts_bbox_head_common = pts_bbox_heads.common\n            pts_bbox_head_common.update(\n                train_cfg=train_cfg.pts if train_cfg is not None else None)\n            pts_bbox_head_common.update(test_cfg=test_cfg.pts)\n            pts_bbox_head_joint = pts_bbox_head_common.copy()\n            pts_bbox_head_joint.update(pts_bbox_heads.joint)\n            pts_bbox_head_pts = pts_bbox_head_common.copy()\n            pts_bbox_head_pts.update(pts_bbox_heads.pts)\n            pts_bbox_head_img = pts_bbox_head_common.copy()\n            pts_bbox_head_img.update(pts_bbox_heads.img)\n\n            self.pts_bbox_head_joint = builder.build_head(pts_bbox_head_joint)\n            self.pts_bbox_head_pts = builder.build_head(pts_bbox_head_pts)\n            self.pts_bbox_head_img = builder.build_head(pts_bbox_head_img)\n            self.pts_bbox_heads = [\n                self.pts_bbox_head_joint, self.pts_bbox_head_pts,\n                self.pts_bbox_head_img\n            ]\n            self.loss_weights = pts_bbox_heads.loss_weights\n\n        # image branch\n        if img_backbone:\n            self.img_backbone = builder.build_backbone(img_backbone)\n        if img_neck is not None:\n            self.img_neck = builder.build_neck(img_neck)\n        if img_rpn_head is not None:\n            rpn_train_cfg = train_cfg.img_rpn if train_cfg \\\n                is not None else None\n            img_rpn_head_ = img_rpn_head.copy()\n            img_rpn_head_.update(\n                train_cfg=rpn_train_cfg, test_cfg=test_cfg.img_rpn)\n            self.img_rpn_head = builder.build_head(img_rpn_head_)\n        if img_roi_head is not None:\n            rcnn_train_cfg = train_cfg.img_rcnn if train_cfg \\\n                is not None else None\n            img_roi_head.update(\n                train_cfg=rcnn_train_cfg, test_cfg=test_cfg.img_rcnn)\n            self.img_roi_head = builder.build_head(img_roi_head)\n\n        # fusion\n        if fusion_layer is not None:\n            self.fusion_layer = builder.build_fusion_layer(fusion_layer)\n            self.max_imvote_per_pixel = fusion_layer.max_imvote_per_pixel\n\n        self.freeze_img_branch = freeze_img_branch\n        if freeze_img_branch:\n            self.freeze_img_branch_params()\n\n        if img_mlp is not None:\n            self.img_mlp = MLP(**img_mlp)\n\n        self.num_sampled_seed = num_sampled_seed\n\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        self.init_weights(pretrained=pretrained)\n\n    def init_weights(self, pretrained=None):\n        \"\"\"Initialize model weights.\"\"\"\n        super(ImVoteNet, self).init_weights(pretrained)\n        if pretrained is None:\n            img_pretrained = None\n            pts_pretrained = None\n        elif isinstance(pretrained, dict):\n            img_pretrained = pretrained.get('img', None)\n            pts_pretrained = pretrained.get('pts', None)\n        else:\n            raise ValueError(\n                f'pretrained should be a dict, got {type(pretrained)}')\n        if self.with_img_backbone:\n            self.img_backbone.init_weights(pretrained=img_pretrained)\n        if self.with_img_neck:\n            if isinstance(self.img_neck, nn.Sequential):\n                for m in self.img_neck:\n                    m.init_weights()\n            else:\n                self.img_neck.init_weights()\n\n        if self.with_img_roi_head:\n            self.img_roi_head.init_weights(img_pretrained)\n        if self.with_img_rpn:\n            self.img_rpn_head.init_weights()\n        if self.with_pts_backbone:\n            self.pts_backbone.init_weights(pretrained=pts_pretrained)\n        if self.with_pts_bbox:\n            self.pts_bbox_head.init_weights()\n        if self.with_pts_neck:\n            if isinstance(self.pts_neck, nn.Sequential):\n                for m in self.pts_neck:\n                    m.init_weights()\n            else:\n                self.pts_neck.init_weights()\n\n    def freeze_img_branch_params(self):\n        \"\"\"Freeze all image branch parameters.\"\"\"\n        if self.with_img_bbox_head:\n            for param in self.img_bbox_head.parameters():\n                param.requires_grad = False\n        if self.with_img_backbone:\n            for param in self.img_backbone.parameters():\n                param.requires_grad = False\n        if self.with_img_neck:\n            for param in self.img_neck.parameters():\n                param.requires_grad = False\n        if self.with_img_rpn:\n            for param in self.img_rpn_head.parameters():\n                param.requires_grad = False\n        if self.with_img_roi_head:\n            for param in self.img_roi_head.parameters():\n                param.requires_grad = False\n\n    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,\n                              missing_keys, unexpected_keys, error_msgs):\n        \"\"\"Overload in order to load img network ckpts into img branch.\"\"\"\n        module_names = ['backbone', 'neck', 'roi_head', 'rpn_head']\n        for key in list(state_dict):\n            for module_name in module_names:\n                if key.startswith(module_name) and ('img_' +\n                                                    key) not in state_dict:\n                    state_dict['img_' + key] = state_dict.pop(key)\n\n        super()._load_from_state_dict(state_dict, prefix, local_metadata,\n                                      strict, missing_keys, unexpected_keys,\n                                      error_msgs)\n\n    def train(self, mode=True):\n        \"\"\"Overload in order to keep image branch modules in eval mode.\"\"\"\n        super(ImVoteNet, self).train(mode)\n        if self.freeze_img_branch:\n            if self.with_img_bbox_head:\n                self.img_bbox_head.eval()\n            if self.with_img_backbone:\n                self.img_backbone.eval()\n            if self.with_img_neck:\n                self.img_neck.eval()\n            if self.with_img_rpn:\n                self.img_rpn_head.eval()\n            if self.with_img_roi_head:\n                self.img_roi_head.eval()\n\n    @property\n    def with_img_bbox(self):\n        \"\"\"bool: Whether the detector has a 2D image box head.\"\"\"\n        return ((hasattr(self, 'img_roi_head') and self.img_roi_head.with_bbox)\n                or (hasattr(self, 'img_bbox_head')\n                    and self.img_bbox_head is not None))\n\n    @property\n    def with_img_bbox_head(self):\n        \"\"\"bool: Whether the detector has a 2D image box head (not roi).\"\"\"\n        return hasattr(self,\n                       'img_bbox_head') and self.img_bbox_head is not None\n\n    @property\n    def with_img_backbone(self):\n        \"\"\"bool: Whether the detector has a 2D image backbone.\"\"\"\n        return hasattr(self, 'img_backbone') and self.img_backbone is not None\n\n    @property\n    def with_img_neck(self):\n        \"\"\"bool: Whether the detector has a neck in image branch.\"\"\"\n        return hasattr(self, 'img_neck') and self.img_neck is not None\n\n    @property\n    def with_img_rpn(self):\n        \"\"\"bool: Whether the detector has a 2D RPN in image detector branch.\"\"\"\n        return hasattr(self, 'img_rpn_head') and self.img_rpn_head is not None\n\n    @property\n    def with_img_roi_head(self):\n        \"\"\"bool: Whether the detector has a RoI Head in image branch.\"\"\"\n        return hasattr(self, 'img_roi_head') and self.img_roi_head is not None\n\n    @property\n    def with_pts_bbox(self):\n        \"\"\"bool: Whether the detector has a 3D box head.\"\"\"\n        return hasattr(self,\n                       'pts_bbox_head') and self.pts_bbox_head is not None\n\n    @property\n    def with_pts_backbone(self):\n        \"\"\"bool: Whether the detector has a 3D backbone.\"\"\"\n        return hasattr(self, 'pts_backbone') and self.pts_backbone is not None\n\n    @property\n    def with_pts_neck(self):\n        \"\"\"bool: Whether the detector has a neck in 3D detector branch.\"\"\"\n        return hasattr(self, 'pts_neck') and self.pts_neck is not None\n\n    def extract_feat(self, imgs):\n        \"\"\"Just to inherit from abstract method.\"\"\"\n        pass\n\n    def extract_img_feat(self, img):\n        \"\"\"Directly extract features from the img backbone+neck.\"\"\"\n        x = self.img_backbone(img)\n        if self.with_img_neck:\n            x = self.img_neck(x)\n        return x\n\n    def extract_img_feats(self, imgs):\n        \"\"\"Extract features from multiple images.\n\n        Args:\n            imgs (list[torch.Tensor]): A list of images. The images are\n                augmented from the same image but in different ways.\n\n        Returns:\n            list[torch.Tensor]: Features of different images\n        \"\"\"\n\n        assert isinstance(imgs, list)\n        return [self.extract_img_feat(img) for img in imgs]\n\n    def extract_pts_feat(self, pts):\n        \"\"\"Extract features of points.\"\"\"\n        x = self.pts_backbone(pts)\n        if self.with_pts_neck:\n            x = self.pts_neck(x)\n\n        seed_points = x['fp_xyz'][-1]\n        seed_features = x['fp_features'][-1]\n        seed_indices = x['fp_indices'][-1]\n\n        return (seed_points, seed_features, seed_indices)\n\n    def extract_pts_feats(self, pts):\n        \"\"\"Extract features of points from multiple samples.\"\"\"\n        assert isinstance(pts, list)\n        return [self.extract_pts_feat(pt) for pt in pts]\n\n    @torch.no_grad()\n    def extract_bboxes_2d(self,\n                          img,\n                          img_metas,\n                          train=True,\n                          bboxes_2d=None,\n                          **kwargs):\n        \"\"\"Extract bounding boxes from 2d detector.\n\n        Args:\n            img (torch.Tensor): of shape (N, C, H, W) encoding input images.\n                Typically these should be mean centered and std scaled.\n            img_metas (list[dict]): Image meta info.\n            train (bool): train-time or not.\n            bboxes_2d (list[torch.Tensor]): provided 2d bboxes,\n                not supported yet.\n\n        Return:\n            list[torch.Tensor]: a list of processed 2d bounding boxes.\n        \"\"\"\n        if bboxes_2d is None:\n            x = self.extract_img_feat(img)\n            proposal_list = self.img_rpn_head.simple_test_rpn(x, img_metas)\n            rets = self.img_roi_head.simple_test(\n                x, proposal_list, img_metas, rescale=False)\n\n            rets_processed = []\n            for ret in rets:\n                tmp = np.concatenate(ret, axis=0)\n                sem_class = img.new_zeros((len(tmp)))\n                start = 0\n                for i, bboxes in enumerate(ret):\n                    sem_class[start:start + len(bboxes)] = i\n                    start += len(bboxes)\n                ret = img.new_tensor(tmp)\n\n                # append class index\n                ret = torch.cat([ret, sem_class[:, None]], dim=-1)\n                inds = torch.argsort(ret[:, 4], descending=True)\n                ret = ret.index_select(0, inds)\n\n                # drop half bboxes during training for better generalization\n                if train:\n                    rand_drop = torch.randperm(len(ret))[:(len(ret) + 1) // 2]\n                    rand_drop = torch.sort(rand_drop)[0]\n                    ret = ret[rand_drop]\n\n                rets_processed.append(ret.float())\n            return rets_processed\n        else:\n            rets_processed = []\n            for ret in bboxes_2d:\n                if len(ret) > 0 and train:\n                    rand_drop = torch.randperm(len(ret))[:(len(ret) + 1) // 2]\n                    rand_drop = torch.sort(rand_drop)[0]\n                    ret = ret[rand_drop]\n                rets_processed.append(ret.float())\n            return rets_processed\n\n    def forward_train(self,\n                      points=None,\n                      img=None,\n                      img_metas=None,\n                      gt_bboxes=None,\n                      gt_labels=None,\n                      gt_bboxes_ignore=None,\n                      gt_masks=None,\n                      proposals=None,\n                      calib=None,\n                      bboxes_2d=None,\n                      gt_bboxes_3d=None,\n                      gt_labels_3d=None,\n                      pts_semantic_mask=None,\n                      pts_instance_mask=None,\n                      **kwargs):\n        \"\"\"Forwarding of train for image branch pretrain or stage 2 train.\n\n        Args:\n            points (list[torch.Tensor]): Points of each batch.\n            img (torch.Tensor): of shape (N, C, H, W) encoding input images.\n                Typically these should be mean centered and std scaled.\n            img_metas (list[dict]): list of image and point cloud meta info\n                dict. For example, keys include 'ori_shape', 'img_norm_cfg',\n                and 'transformation_3d_flow'. For details on the values of\n                the keys see `mmdet/datasets/pipelines/formatting.py:Collect`.\n            gt_bboxes (list[torch.Tensor]): Ground truth bboxes for each image\n                with shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[torch.Tensor]): class indices for each\n                2d bounding box.\n            gt_bboxes_ignore (None | list[torch.Tensor]): specify which\n                2d bounding boxes can be ignored when computing the loss.\n            gt_masks (None | torch.Tensor): true segmentation masks for each\n                2d bbox, used if the architecture supports a segmentation task.\n            proposals: override rpn proposals (2d) with custom proposals.\n                Use when `with_rpn` is False.\n            calib (dict[str, torch.Tensor]): camera calibration matrices,\n                Rt and K.\n            bboxes_2d (list[torch.Tensor]): provided 2d bboxes,\n                not supported yet.\n            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): 3d gt bboxes.\n            gt_labels_3d (list[torch.Tensor]): gt class labels for 3d bboxes.\n            pts_semantic_mask (None | list[torch.Tensor]): point-wise semantic\n                label of each batch.\n            pts_instance_mask (None | list[torch.Tensor]): point-wise instance\n                label of each batch.\n\n        Returns:\n            dict[str, torch.Tensor]: a dictionary of loss components.\n        \"\"\"\n        if points is None:\n            x = self.extract_img_feat(img)\n            losses = dict()\n\n            # RPN forward and loss\n            if self.with_img_rpn:\n                proposal_cfg = self.train_cfg.get('img_rpn_proposal',\n                                                  self.test_cfg.img_rpn)\n                rpn_losses, proposal_list = self.img_rpn_head.forward_train(\n                    x,\n                    img_metas,\n                    gt_bboxes,\n                    gt_labels=None,\n                    gt_bboxes_ignore=gt_bboxes_ignore,\n                    proposal_cfg=proposal_cfg)\n                losses.update(rpn_losses)\n            else:\n                proposal_list = proposals\n\n            roi_losses = self.img_roi_head.forward_train(\n                x, img_metas, proposal_list, gt_bboxes, gt_labels,\n                gt_bboxes_ignore, gt_masks, **kwargs)\n            losses.update(roi_losses)\n            return losses\n        else:\n            bboxes_2d = self.extract_bboxes_2d(\n                img, img_metas, bboxes_2d=bboxes_2d, **kwargs)\n\n            points = torch.stack(points)\n            seeds_3d, seed_3d_features, seed_indices = \\\n                self.extract_pts_feat(points)\n\n            img_features, masks = self.fusion_layer(img, bboxes_2d, seeds_3d,\n                                                    img_metas, calib)\n\n            inds = sample_valid_seeds(masks, self.num_sampled_seed)\n            batch_size, img_feat_size = img_features.shape[:2]\n            pts_feat_size = seed_3d_features.shape[1]\n            inds_img = inds.view(batch_size, 1,\n                                 -1).expand(-1, img_feat_size, -1)\n            img_features = img_features.gather(-1, inds_img)\n            inds = inds % inds.shape[1]\n            inds_seed_xyz = inds.view(batch_size, -1, 1).expand(-1, -1, 3)\n            seeds_3d = seeds_3d.gather(1, inds_seed_xyz)\n            inds_seed_feats = inds.view(batch_size, 1,\n                                        -1).expand(-1, pts_feat_size, -1)\n            seed_3d_features = seed_3d_features.gather(-1, inds_seed_feats)\n            seed_indices = seed_indices.gather(1, inds)\n\n            img_features = self.img_mlp(img_features)\n            fused_features = torch.cat([seed_3d_features, img_features], dim=1)\n\n            feat_dict_joint = dict(\n                seed_points=seeds_3d,\n                seed_features=fused_features,\n                seed_indices=seed_indices)\n            feat_dict_pts = dict(\n                seed_points=seeds_3d,\n                seed_features=seed_3d_features,\n                seed_indices=seed_indices)\n            feat_dict_img = dict(\n                seed_points=seeds_3d,\n                seed_features=img_features,\n                seed_indices=seed_indices)\n\n            loss_inputs = (points, gt_bboxes_3d, gt_labels_3d,\n                           pts_semantic_mask, pts_instance_mask, img_metas)\n            bbox_preds_joints = self.pts_bbox_head_joint(\n                feat_dict_joint, self.train_cfg.pts.sample_mod)\n            bbox_preds_pts = self.pts_bbox_head_pts(\n                feat_dict_pts, self.train_cfg.pts.sample_mod)\n            bbox_preds_img = self.pts_bbox_head_img(\n                feat_dict_img, self.train_cfg.pts.sample_mod)\n            losses_towers = []\n            losses_joint = self.pts_bbox_head_joint.loss(\n                bbox_preds_joints,\n                *loss_inputs,\n                gt_bboxes_ignore=gt_bboxes_ignore)\n            losses_pts = self.pts_bbox_head_pts.loss(\n                bbox_preds_pts,\n                *loss_inputs,\n                gt_bboxes_ignore=gt_bboxes_ignore)\n            losses_img = self.pts_bbox_head_img.loss(\n                bbox_preds_img,\n                *loss_inputs,\n                gt_bboxes_ignore=gt_bboxes_ignore)\n            losses_towers.append(losses_joint)\n            losses_towers.append(losses_pts)\n            losses_towers.append(losses_img)\n            combined_losses = dict()\n            for loss_term in losses_joint:\n                if 'loss' in loss_term:\n                    combined_losses[loss_term] = 0\n                    for i in range(len(losses_towers)):\n                        combined_losses[loss_term] += \\\n                            losses_towers[i][loss_term] * \\\n                            self.loss_weights[i]\n                else:\n                    # only save the metric of the joint head\n                    # if it is not a loss\n                    combined_losses[loss_term] = \\\n                        losses_towers[0][loss_term]\n\n            return combined_losses\n\n    def forward_test(self,\n                     points=None,\n                     img_metas=None,\n                     img=None,\n                     calib=None,\n                     bboxes_2d=None,\n                     **kwargs):\n        \"\"\"Forwarding of test for image branch pretrain or stage 2 train.\n\n        Args:\n            points (list[list[torch.Tensor]], optional): the outer\n                list indicates test-time augmentations and the inner\n                list contains all points in the batch, where each Tensor\n                should have a shape NxC. Defaults to None.\n            img_metas (list[list[dict]], optional): the outer list\n                indicates test-time augs (multiscale, flip, etc.)\n                and the inner list indicates images in a batch.\n                Defaults to None.\n            img (list[list[torch.Tensor]], optional): the outer\n                list indicates test-time augmentations and inner Tensor\n                should have a shape NxCxHxW, which contains all images\n                in the batch. Defaults to None. Defaults to None.\n            calibs (list[dict[str, torch.Tensor]], optional): camera\n                calibration matrices, Rt and K.\n                List indicates test-time augs. Defaults to None.\n            bboxes_2d (list[list[torch.Tensor]], optional):\n                Provided 2d bboxes, not supported yet. Defaults to None.\n\n        Returns:\n            list[list[torch.Tensor]]|list[dict]: Predicted 2d or 3d boxes.\n        \"\"\"\n        if points is None:\n            for var, name in [(img, 'img'), (img_metas, 'img_metas')]:\n                if not isinstance(var, list):\n                    raise TypeError(\n                        f'{name} must be a list, but got {type(var)}')\n\n            num_augs = len(img)\n            if num_augs != len(img_metas):\n                raise ValueError(f'num of augmentations ({len(img)}) '\n                                 f'!= num of image meta ({len(img_metas)})')\n\n            if num_augs == 1:\n                # proposals (List[List[Tensor]]): the outer list indicates\n                # test-time augs (multiscale, flip, etc.) and the inner list\n                # indicates images in a batch.\n                # The Tensor should have a shape Px4, where P is the number of\n                # proposals.\n                if 'proposals' in kwargs:\n                    kwargs['proposals'] = kwargs['proposals'][0]\n                return self.simple_test_img_only(\n                    img=img[0], img_metas=img_metas[0], **kwargs)\n            else:\n                assert img[0].size(0) == 1, 'aug test does not support ' \\\n                                         'inference with batch size ' \\\n                                         f'{img[0].size(0)}'\n                # TODO: support test augmentation for predefined proposals\n                assert 'proposals' not in kwargs\n                return self.aug_test_img_only(\n                    img=img, img_metas=img_metas, **kwargs)\n\n        else:\n            for var, name in [(points, 'points'), (img_metas, 'img_metas')]:\n                if not isinstance(var, list):\n                    raise TypeError('{} must be a list, but got {}'.format(\n                        name, type(var)))\n\n            num_augs = len(points)\n            if num_augs != len(img_metas):\n                raise ValueError(\n                    'num of augmentations ({}) != num of image meta ({})'.\n                    format(len(points), len(img_metas)))\n\n            if num_augs == 1:\n                return self.simple_test(\n                    points[0],\n                    img_metas[0],\n                    img[0],\n                    calibs=calib[0],\n                    bboxes_2d=bboxes_2d[0] if bboxes_2d is not None else None,\n                    **kwargs)\n            else:\n                return self.aug_test(points, img_metas, img, calib, bboxes_2d,\n                                     **kwargs)\n\n    def simple_test_img_only(self,\n                             img,\n                             img_metas,\n                             proposals=None,\n                             rescale=False):\n        \"\"\"Test without augmentation, image network pretrain. May refer to\n        https://github.com/open-\n        mmlab/mmdetection/blob/master/mmdet/models/detectors/two_stage.py  #\n        noqa.\n\n        Args:\n            img (torch.Tensor): Should have a shape NxCxHxW, which contains\n                all images in the batch.\n            img_metas (list[dict]):\n            proposals (list[Tensor], optional): override rpn proposals\n                with custom proposals. Defaults to None.\n            rescale (bool, optional): Whether or not rescale bboxes to the\n                original shape of input image. Defaults to False.\n\n        Returns:\n            list[list[torch.Tensor]]: Predicted 2d boxes.\n        \"\"\"\n        assert self.with_img_bbox, 'Img bbox head must be implemented.'\n        assert self.with_img_backbone, 'Img backbone must be implemented.'\n        assert self.with_img_rpn, 'Img rpn must be implemented.'\n        assert self.with_img_roi_head, 'Img roi head must be implemented.'\n\n        x = self.extract_img_feat(img)\n\n        if proposals is None:\n            proposal_list = self.img_rpn_head.simple_test_rpn(x, img_metas)\n        else:\n            proposal_list = proposals\n\n        ret = self.img_roi_head.simple_test(\n            x, proposal_list, img_metas, rescale=rescale)\n\n        return ret\n\n    def simple_test(self,\n                    points=None,\n                    img_metas=None,\n                    img=None,\n                    calibs=None,\n                    bboxes_2d=None,\n                    rescale=False,\n                    **kwargs):\n        \"\"\"Test without augmentation, stage 2.\n\n        Args:\n            points (list[torch.Tensor], optional): Elements in the list\n                should have a shape NxC, the list indicates all point-clouds\n                in the batch. Defaults to None.\n            img_metas (list[dict], optional): List indicates\n                images in a batch. Defaults to None.\n            img (torch.Tensor, optional): Should have a shape NxCxHxW,\n                which contains all images in the batch. Defaults to None.\n            calibs (dict[str, torch.Tensor], optional): camera\n                calibration matrices, Rt and K. Defaults to None.\n            bboxes_2d (list[torch.Tensor], optional):\n                Provided 2d bboxes, not supported yet. Defaults to None.\n            rescale (bool, optional): Whether or not rescale bboxes.\n                Defaults to False.\n\n        Returns:\n            list[dict]: Predicted 3d boxes.\n        \"\"\"\n        bboxes_2d = self.extract_bboxes_2d(\n            img, img_metas, train=False, bboxes_2d=bboxes_2d, **kwargs)\n\n        points = torch.stack(points)\n        seeds_3d, seed_3d_features, seed_indices = \\\n            self.extract_pts_feat(points)\n\n        img_features, masks = self.fusion_layer(img, bboxes_2d, seeds_3d,\n                                                img_metas, calibs)\n\n        inds = sample_valid_seeds(masks, self.num_sampled_seed)\n        batch_size, img_feat_size = img_features.shape[:2]\n        pts_feat_size = seed_3d_features.shape[1]\n        inds_img = inds.view(batch_size, 1, -1).expand(-1, img_feat_size, -1)\n        img_features = img_features.gather(-1, inds_img)\n        inds = inds % inds.shape[1]\n        inds_seed_xyz = inds.view(batch_size, -1, 1).expand(-1, -1, 3)\n        seeds_3d = seeds_3d.gather(1, inds_seed_xyz)\n        inds_seed_feats = inds.view(batch_size, 1,\n                                    -1).expand(-1, pts_feat_size, -1)\n        seed_3d_features = seed_3d_features.gather(-1, inds_seed_feats)\n        seed_indices = seed_indices.gather(1, inds)\n\n        img_features = self.img_mlp(img_features)\n\n        fused_features = torch.cat([seed_3d_features, img_features], dim=1)\n\n        feat_dict = dict(\n            seed_points=seeds_3d,\n            seed_features=fused_features,\n            seed_indices=seed_indices)\n        bbox_preds = self.pts_bbox_head_joint(feat_dict,\n                                              self.test_cfg.pts.sample_mod)\n        bbox_list = self.pts_bbox_head_joint.get_bboxes(\n            points, bbox_preds, img_metas, rescale=rescale)\n        bbox_results = [\n            bbox3d2result(bboxes, scores, labels)\n            for bboxes, scores, labels in bbox_list\n        ]\n        return bbox_results\n\n    def aug_test_img_only(self, img, img_metas, rescale=False):\n        \"\"\"Test function with augmentation, image network pretrain. May refer\n        to https://github.com/open-\n        mmlab/mmdetection/blob/master/mmdet/models/detectors/two_stage.py  #\n        noqa.\n\n        Args:\n            img (list[list[torch.Tensor]], optional): the outer\n                list indicates test-time augmentations and inner Tensor\n                should have a shape NxCxHxW, which contains all images\n                in the batch. Defaults to None. Defaults to None.\n            img_metas (list[list[dict]], optional): the outer list\n                indicates test-time augs (multiscale, flip, etc.)\n                and the inner list indicates images in a batch.\n                Defaults to None.\n            rescale (bool, optional): Whether or not rescale bboxes to the\n                original shape of input image. If rescale is False, then\n                returned bboxes and masks will fit the scale of imgs[0].\n                Defaults to None.\n\n        Returns:\n            list[list[torch.Tensor]]: Predicted 2d boxes.\n        \"\"\"\n        assert self.with_img_bbox, 'Img bbox head must be implemented.'\n        assert self.with_img_backbone, 'Img backbone must be implemented.'\n        assert self.with_img_rpn, 'Img rpn must be implemented.'\n        assert self.with_img_roi_head, 'Img roi head must be implemented.'\n\n        x = self.extract_img_feats(img)\n        proposal_list = self.img_rpn_head.aug_test_rpn(x, img_metas)\n\n        return self.img_roi_head.aug_test(\n            x, proposal_list, img_metas, rescale=rescale)\n\n    def aug_test(self,\n                 points=None,\n                 img_metas=None,\n                 imgs=None,\n                 calibs=None,\n                 bboxes_2d=None,\n                 rescale=False,\n                 **kwargs):\n        \"\"\"Test function with augmentation, stage 2.\n\n        Args:\n            points (list[list[torch.Tensor]], optional): the outer\n                list indicates test-time augmentations and the inner\n                list contains all points in the batch, where each Tensor\n                should have a shape NxC. Defaults to None.\n            img_metas (list[list[dict]], optional): the outer list\n                indicates test-time augs (multiscale, flip, etc.)\n                and the inner list indicates images in a batch.\n                Defaults to None.\n            imgs (list[list[torch.Tensor]], optional): the outer\n                list indicates test-time augmentations and inner Tensor\n                should have a shape NxCxHxW, which contains all images\n                in the batch. Defaults to None. Defaults to None.\n            calibs (list[dict[str, torch.Tensor]], optional): camera\n                calibration matrices, Rt and K.\n                List indicates test-time augs. Defaults to None.\n            bboxes_2d (list[list[torch.Tensor]], optional):\n                Provided 2d bboxes, not supported yet. Defaults to None.\n            rescale (bool, optional): Whether or not rescale bboxes.\n                Defaults to False.\n\n        Returns:\n            list[dict]: Predicted 3d boxes.\n        \"\"\"\n        points_cat = [torch.stack(pts) for pts in points]\n        feats = self.extract_pts_feats(points_cat, img_metas)\n\n        # only support aug_test for one sample\n        aug_bboxes = []\n        for x, pts_cat, img_meta, bbox_2d, img, calib in zip(\n                feats, points_cat, img_metas, bboxes_2d, imgs, calibs):\n\n            bbox_2d = self.extract_bboxes_2d(\n                img, img_metas, train=False, bboxes_2d=bbox_2d, **kwargs)\n\n            seeds_3d, seed_3d_features, seed_indices = x\n\n            img_features, masks = self.fusion_layer(img, bbox_2d, seeds_3d,\n                                                    img_metas, calib)\n\n            inds = sample_valid_seeds(masks, self.num_sampled_seed)\n            batch_size, img_feat_size = img_features.shape[:2]\n            pts_feat_size = seed_3d_features.shape[1]\n            inds_img = inds.view(batch_size, 1,\n                                 -1).expand(-1, img_feat_size, -1)\n            img_features = img_features.gather(-1, inds_img)\n            inds = inds % inds.shape[1]\n            inds_seed_xyz = inds.view(batch_size, -1, 1).expand(-1, -1, 3)\n            seeds_3d = seeds_3d.gather(1, inds_seed_xyz)\n            inds_seed_feats = inds.view(batch_size, 1,\n                                        -1).expand(-1, pts_feat_size, -1)\n            seed_3d_features = seed_3d_features.gather(-1, inds_seed_feats)\n            seed_indices = seed_indices.gather(1, inds)\n\n            img_features = self.img_mlp(img_features)\n\n            fused_features = torch.cat([seed_3d_features, img_features], dim=1)\n\n            feat_dict = dict(\n                seed_points=seeds_3d,\n                seed_features=fused_features,\n                seed_indices=seed_indices)\n            bbox_preds = self.pts_bbox_head_joint(feat_dict,\n                                                  self.test_cfg.pts.sample_mod)\n            bbox_list = self.pts_bbox_head_joint.get_bboxes(\n                pts_cat, bbox_preds, img_metas, rescale=rescale)\n\n            bbox_list = [\n                dict(boxes_3d=bboxes, scores_3d=scores, labels_3d=labels)\n                for bboxes, scores, labels in bbox_list\n            ]\n            aug_bboxes.append(bbox_list[0])\n\n        # after merging, bboxes will be rescaled to the original image size\n        merged_bboxes = merge_aug_bboxes_3d(aug_bboxes, img_metas,\n                                            self.bbox_head.test_cfg)\n\n        return [merged_bboxes]\n"
  },
  {
    "path": "mmdet3d/models/detectors/mvx_faster_rcnn.py",
    "content": "import torch\nfrom mmcv.runner import force_fp32\nfrom torch.nn import functional as F\n\nfrom mmdet.models import DETECTORS\nfrom .mvx_two_stage import MVXTwoStageDetector\n\n\n@DETECTORS.register_module()\nclass MVXFasterRCNN(MVXTwoStageDetector):\n    \"\"\"Multi-modality VoxelNet using Faster R-CNN.\"\"\"\n\n    def __init__(self, **kwargs):\n        super(MVXFasterRCNN, self).__init__(**kwargs)\n\n\n@DETECTORS.register_module()\nclass DynamicMVXFasterRCNN(MVXTwoStageDetector):\n    \"\"\"Multi-modality VoxelNet using Faster R-CNN and dynamic voxelization.\"\"\"\n\n    def __init__(self, **kwargs):\n        super(DynamicMVXFasterRCNN, self).__init__(**kwargs)\n\n    @torch.no_grad()\n    @force_fp32()\n    def voxelize(self, points):\n        \"\"\"Apply dynamic voxelization to points.\n\n        Args:\n            points (list[torch.Tensor]): Points of each sample.\n\n        Returns:\n            tuple[torch.Tensor]: Concatenated points and coordinates.\n        \"\"\"\n        coors = []\n        # dynamic voxelization only provide a coors mapping\n        for res in points:\n            res_coors = self.pts_voxel_layer(res)\n            coors.append(res_coors)\n        points = torch.cat(points, dim=0)\n        coors_batch = []\n        for i, coor in enumerate(coors):\n            coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)\n            coors_batch.append(coor_pad)\n        coors_batch = torch.cat(coors_batch, dim=0)\n        return points, coors_batch\n\n    def extract_pts_feat(self, points, img_feats, img_metas):\n        \"\"\"Extract point features.\"\"\"\n        if not self.with_pts_bbox:\n            return None\n        voxels, coors = self.voxelize(points)\n        voxel_features, feature_coors = self.pts_voxel_encoder(\n            voxels, coors, points, img_feats, img_metas)\n        batch_size = coors[-1, 0] + 1\n        x = self.pts_middle_encoder(voxel_features, feature_coors, batch_size)\n        x = self.pts_backbone(x)\n        if self.with_pts_neck:\n            x = self.pts_neck(x)\n        return x\n"
  },
  {
    "path": "mmdet3d/models/detectors/mvx_two_stage.py",
    "content": "import mmcv\nimport torch\nfrom mmcv.parallel import DataContainer as DC\nfrom mmcv.runner import force_fp32\nfrom os import path as osp\nfrom torch import nn as nn\nfrom torch.nn import functional as F\nimport time\n\nfrom mmdet3d.core import (Box3DMode, Coord3DMode, bbox3d2result,\n                          merge_aug_bboxes_3d, show_result)\nfrom mmdet3d.ops import Voxelization\nfrom mmdet.core import multi_apply\nfrom mmdet.models import DETECTORS\nfrom .. import builder\nfrom .base import Base3DDetector\n\n\n@DETECTORS.register_module()\nclass MVXTwoStageDetector(Base3DDetector):\n    \"\"\"Base class of Multi-modality VoxelNet.\"\"\"\n\n    def __init__(self,\n                 freeze_img=True,\n                 freeze_img_head=False,\n                 pts_voxel_layer=None,\n                 pts_voxel_encoder=None,\n                 pts_middle_encoder=None,\n                 pts_fusion_layer=None,\n                 img_backbone=None,\n                 pts_backbone=None,\n                 img_neck=None,\n                 pts_neck=None,\n                 pts_bbox_head=None,\n                 img_roi_head=None,\n                 img_rpn_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 ):\n        super(MVXTwoStageDetector, self).__init__()\n\n        self.freeze_img = freeze_img\n        self.freeze_img_head = freeze_img_head\n        if pts_voxel_layer:\n            self.pts_voxel_layer = Voxelization(**pts_voxel_layer)\n        if pts_voxel_encoder:\n            self.pts_voxel_encoder = builder.build_voxel_encoder(\n                pts_voxel_encoder)\n        if pts_middle_encoder:\n            self.pts_middle_encoder = builder.build_middle_encoder(\n                pts_middle_encoder)\n        if pts_backbone:\n            self.pts_backbone = builder.build_backbone(pts_backbone)\n        if pts_fusion_layer:\n            self.pts_fusion_layer = builder.build_fusion_layer(\n                pts_fusion_layer)\n        if pts_neck is not None:\n            self.pts_neck = builder.build_neck(pts_neck)\n        if pts_bbox_head:\n            pts_train_cfg = train_cfg.pts if train_cfg else None\n            pts_bbox_head.update(train_cfg=pts_train_cfg)\n            pts_test_cfg = test_cfg.pts if test_cfg else None\n            pts_bbox_head.update(test_cfg=pts_test_cfg)\n            self.pts_bbox_head = builder.build_head(pts_bbox_head)\n\n        if img_backbone:\n            self.img_backbone = builder.build_backbone(img_backbone)\n        if img_neck is not None:\n            self.img_neck = builder.build_neck(img_neck)\n        if img_rpn_head is not None:\n            self.img_rpn_head = builder.build_head(img_rpn_head)\n        if img_roi_head is not None:\n            self.img_roi_head = builder.build_head(img_roi_head)\n\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        self.init_weights(pretrained=pretrained)\n\n    def init_weights(self, pretrained=None):\n        \"\"\"Initialize model weights.\"\"\"\n        super(MVXTwoStageDetector, self).init_weights(pretrained)\n        if pretrained is None:\n            img_pretrained = None\n            pts_pretrained = None\n        elif isinstance(pretrained, dict):\n            img_pretrained = pretrained.get('img', None)\n            pts_pretrained = pretrained.get('pts', None)\n        else:\n            raise ValueError(\n                f'pretrained should be a dict, got {type(pretrained)}')\n        if self.with_img_backbone:\n            self.img_backbone.init_weights(pretrained=img_pretrained)\n        if self.with_pts_backbone:\n            self.pts_backbone.init_weights(pretrained=pts_pretrained)\n        if self.with_img_neck:\n            if isinstance(self.img_neck, nn.Sequential):\n                for m in self.img_neck:\n                    m.init_weights()\n            else:\n                self.img_neck.init_weights()\n\n        if self.with_img_roi_head:\n            self.img_roi_head.init_weights(img_pretrained)\n        if self.with_img_rpn:\n            self.img_rpn_head.init_weights()\n        if self.with_pts_bbox:\n            self.pts_bbox_head.init_weights()\n        if self.with_pts_roi_head:\n            self.pts_roi_head.init_weights()\n\n        if self.freeze_img:\n            if self.with_img_backbone:\n                for param in self.img_backbone.parameters():\n                    param.requires_grad = False\n            if self.with_img_neck:\n                for param in self.img_neck.parameters():\n                    param.requires_grad = False\n\n    @property\n    def with_pts_roi_head(self):\n        \"\"\"bool: Whether the detector has a roi head in pts branch.\"\"\"\n        return hasattr(self,\n                       'pts_roi_head') and self.pts_roi_head is not None\n\n    @property\n    def with_img_shared_head(self):\n        \"\"\"bool: Whether the detector has a shared head in image branch.\"\"\"\n        return hasattr(self,\n                       'img_shared_head') and self.img_shared_head is not None\n\n    @property\n    def with_pts_bbox(self):\n        \"\"\"bool: Whether the detector has a 3D box head.\"\"\"\n        return hasattr(self,\n                       'pts_bbox_head') and self.pts_bbox_head is not None\n\n    @property\n    def with_img_bbox(self):\n        \"\"\"bool: Whether the detector has a 2D image box head.\"\"\"\n        return hasattr(self,\n                       'img_bbox_head') and self.img_bbox_head is not None\n\n    @property\n    def with_img_backbone(self):\n        \"\"\"bool: Whether the detector has a 2D image backbone.\"\"\"\n        return hasattr(self, 'img_backbone') and self.img_backbone is not None\n\n    @property\n    def with_pts_backbone(self):\n        \"\"\"bool: Whether the detector has a 3D backbone.\"\"\"\n        return hasattr(self, 'pts_backbone') and self.pts_backbone is not None\n\n    @property\n    def with_fusion(self):\n        \"\"\"bool: Whether the detector has a fusion layer.\"\"\"\n        return hasattr(self,\n                       'pts_fusion_layer') and self.fusion_layer is not None\n\n    @property\n    def with_img_neck(self):\n        \"\"\"bool: Whether the detector has a neck in image branch.\"\"\"\n        return hasattr(self, 'img_neck') and self.img_neck is not None\n\n    @property\n    def with_pts_neck(self):\n        \"\"\"bool: Whether the detector has a neck in 3D detector branch.\"\"\"\n        return hasattr(self, 'pts_neck') and self.pts_neck is not None\n\n    @property\n    def with_img_rpn(self):\n        \"\"\"bool: Whether the detector has a 2D RPN in image detector branch.\"\"\"\n        return hasattr(self, 'img_rpn_head') and self.img_rpn_head is not None\n\n    @property\n    def with_img_roi_head(self):\n        \"\"\"bool: Whether the detector has a RoI Head in image branch.\"\"\"\n        return hasattr(self, 'img_roi_head') and self.img_roi_head is not None\n\n    @property\n    def with_voxel_encoder(self):\n        \"\"\"bool: Whether the detector has a voxel encoder.\"\"\"\n        return hasattr(self,\n                       'voxel_encoder') and self.voxel_encoder is not None\n\n    @property\n    def with_middle_encoder(self):\n        \"\"\"bool: Whether the detector has a middle encoder.\"\"\"\n        return hasattr(self,\n                       'middle_encoder') and self.middle_encoder is not None\n\n    def extract_img_feat(self, img, img_metas):\n        \"\"\"Extract features of images.\"\"\"\n        if self.with_img_backbone and img is not None:\n            input_shape = img.shape[-2:]\n            # update real input shape of each single img\n            for img_meta in img_metas:\n                img_meta.update(input_shape=input_shape)\n\n            if img.dim() == 5 and img.size(0) == 1:\n                img.squeeze_(0)\n            elif img.dim() == 5 and img.size(0) > 1:\n                B, N, C, H, W = img.size()\n                img = img.view(B * N, C, H, W)\n            img_feats = self.img_backbone(img.float())\n        else:\n            return None\n        if self.with_img_neck:\n            img_feats = self.img_neck(img_feats)\n\n        return img_feats\n\n    def extract_pts_feat(self, pts, img_feats, img_metas):\n        \"\"\"Extract features of points.\"\"\"\n        if not self.with_pts_bbox:\n            return None\n        voxels, num_points, coors = self.voxelize(pts)\n        voxel_features = self.pts_voxel_encoder(voxels, num_points, coors,\n                                                )\n        batch_size = coors[-1, 0] + 1\n        x = self.pts_middle_encoder(voxel_features, coors, batch_size)\n        x = self.pts_backbone(x)\n        if self.with_pts_neck:\n            x = self.pts_neck(x)\n        return x\n\n    def extract_feat(self, points, img, img_metas):\n        \"\"\"Extract features from images and points.\"\"\"\n\n        img_feats = self.extract_img_feat(img, img_metas)\n        pts_feats = self.extract_pts_feat(points, img_feats, img_metas)\n        return (img_feats, pts_feats)\n\n    @torch.no_grad()\n    @force_fp32()\n    def voxelize(self, points):\n        \"\"\"Apply dynamic voxelization to points.\n\n        Args:\n            points (list[torch.Tensor]): Points of each sample.\n\n        Returns:\n            tuple[torch.Tensor]: Concatenated points, number of points\n                per voxel, and coordinates.\n        \"\"\"\n        voxels, coors, num_points = [], [], []\n        for res in points:\n            res_voxels, res_coors, res_num_points = self.pts_voxel_layer(res)\n            voxels.append(res_voxels)\n            coors.append(res_coors)\n            num_points.append(res_num_points)\n        voxels = torch.cat(voxels, dim=0)\n        num_points = torch.cat(num_points, dim=0)\n        coors_batch = []\n        for i, coor in enumerate(coors):\n            coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)\n            coors_batch.append(coor_pad)\n        coors_batch = torch.cat(coors_batch, dim=0)\n        return voxels, num_points, coors_batch\n\n    def forward_train(self,\n                      points=None,\n                      img_metas=None,\n                      gt_bboxes_3d=None,\n                      gt_labels_3d=None,\n                      gt_labels=None,\n                      gt_bboxes=None,\n                      img=None,\n                      proposals=None,\n                      gt_bboxes_ignore=None):\n        \"\"\"Forward training function.\n\n        Args:\n            points (list[torch.Tensor], optional): Points of each sample.\n                Defaults to None.\n            img_metas (list[dict], optional): Meta information of each sample.\n                Defaults to None.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`], optional):\n                Ground truth 3D boxes. Defaults to None.\n            gt_labels_3d (list[torch.Tensor], optional): Ground truth labels\n                of 3D boxes. Defaults to None.\n            gt_labels (list[torch.Tensor], optional): Ground truth labels\n                of 2D boxes in images. Defaults to None.\n            gt_bboxes (list[torch.Tensor], optional): Ground truth 2D boxes in\n                images. Defaults to None.\n            img (torch.Tensor optional): Images of each sample with shape\n                (N, C, H, W). Defaults to None.\n            proposals ([list[torch.Tensor], optional): Predicted proposals\n                used for training Fast RCNN. Defaults to None.\n            gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth\n                2D boxes in images to be ignored. Defaults to None.\n\n        Returns:\n            dict: Losses of different branches.\n        \"\"\"\n        img_feats, pts_feats = self.extract_feat(\n            points, img=img, img_metas=img_metas)\n        losses = dict()\n        if pts_feats:\n            losses_pts = self.forward_pts_train(pts_feats, img_feats, gt_bboxes_3d,\n                                                gt_labels_3d, img_metas,\n                                                gt_bboxes_ignore)\n            losses.update(losses_pts)\n        if img_feats:\n            losses_img = self.forward_img_train(\n                img_feats,\n                img_metas=img_metas,\n                gt_bboxes=gt_bboxes,\n                gt_labels=gt_labels,\n                gt_bboxes_ignore=gt_bboxes_ignore,\n                proposals=proposals)\n            losses.update(losses_img)\n        return losses\n\n    def forward_pts_train(self,\n                          pts_feats,\n                          img_feats,\n                          gt_bboxes_3d,\n                          gt_labels_3d,\n                          img_metas,\n                          gt_bboxes_ignore=None):\n        \"\"\"Forward function for point cloud branch.\n\n        Args:\n            pts_feats (list[torch.Tensor]): Features of point cloud branch\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                boxes for each sample.\n            gt_labels_3d (list[torch.Tensor]): Ground truth labels for\n                boxes of each sampole\n            img_metas (list[dict]): Meta information of samples.\n            gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth\n                boxes to be ignored. Defaults to None.\n\n        Returns:\n            dict: Losses of each branch.\n        \"\"\"\n        outs = self.pts_bbox_head(pts_feats, img_feats, img_metas)\n        loss_inputs = [gt_bboxes_3d, gt_labels_3d, outs]\n        losses = self.pts_bbox_head.loss(*loss_inputs)\n        return losses\n\n    def forward_img_train(self,\n                          x,\n                          img_metas,\n                          gt_bboxes,\n                          gt_labels,\n                          gt_bboxes_ignore=None,\n                          proposals=None,\n                          **kwargs):\n        \"\"\"Forward function for image branch.\n\n        This function works similar to the forward function of Faster R-CNN.\n\n        Args:\n            x (list[torch.Tensor]): Image features of shape (B, C, H, W)\n                of multiple levels.\n            img_metas (list[dict]): Meta information of images.\n            gt_bboxes (list[torch.Tensor]): Ground truth boxes of each image\n                sample.\n            gt_labels (list[torch.Tensor]): Ground truth labels of boxes.\n            gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth\n                boxes to be ignored. Defaults to None.\n            proposals (list[torch.Tensor], optional): Proposals of each sample.\n                Defaults to None.\n\n        Returns:\n            dict: Losses of each branch.\n        \"\"\"\n        losses = dict()\n        # RPN forward and loss\n        if self.with_img_rpn:\n            rpn_outs = self.img_rpn_head(x)\n            rpn_loss_inputs = rpn_outs + (gt_bboxes, img_metas,\n                                          self.train_cfg.img_rpn)\n            rpn_losses = self.img_rpn_head.loss(\n                *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)\n            losses.update(rpn_losses)\n\n            proposal_cfg = self.train_cfg.get('img_rpn_proposal',\n                                              self.test_cfg.img_rpn)\n            proposal_inputs = rpn_outs + (img_metas, proposal_cfg)\n            proposal_list = self.img_rpn_head.get_bboxes(*proposal_inputs)\n        else:\n            proposal_list = proposals\n\n        # bbox head forward and loss\n        if self.with_img_bbox:\n            # bbox head forward and loss\n            img_roi_losses = self.img_roi_head.forward_train(\n                x, img_metas, proposal_list, gt_bboxes, gt_labels,\n                gt_bboxes_ignore, **kwargs)\n            losses.update(img_roi_losses)\n\n        return losses\n\n    def simple_test_img(self, x, img_metas, proposals=None, rescale=False):\n        \"\"\"Test without augmentation.\"\"\"\n        if proposals is None:\n            proposal_list = self.simple_test_rpn(x, img_metas,\n                                                 self.test_cfg.img_rpn)\n        else:\n            proposal_list = proposals\n\n        return self.img_roi_head.simple_test(\n            x, proposal_list, img_metas, rescale=rescale)\n\n    def simple_test_rpn(self, x, img_metas, rpn_test_cfg):\n        \"\"\"RPN test function.\"\"\"\n        rpn_outs = self.img_rpn_head(x)\n        proposal_inputs = rpn_outs + (img_metas, rpn_test_cfg)\n        proposal_list = self.img_rpn_head.get_bboxes(*proposal_inputs)\n        return proposal_list\n\n    def simple_test_pts(self, x, x_img, img_metas, rescale=False):\n        \"\"\"Test function of point cloud branch.\"\"\"\n        outs = self.pts_bbox_head(x, x_img, img_metas)\n        bbox_list = self.pts_bbox_head.get_bboxes(\n            outs, img_metas, rescale=rescale)\n        bbox_results = [\n            bbox3d2result(bboxes, scores, labels)\n            for bboxes, scores, labels in bbox_list\n        ]\n        return bbox_results\n\n    def simple_test(self, points, img_metas, img=None, rescale=False):\n        \"\"\"Test function without augmentaiton.\"\"\"\n        img_feats, pts_feats = self.extract_feat(\n            points, img=img, img_metas=img_metas)\n\n        bbox_list = [dict() for i in range(len(img_metas))]\n        if pts_feats and self.with_pts_bbox:\n            bbox_pts = self.simple_test_pts(\n                pts_feats, img_feats, img_metas, rescale=rescale)\n            for result_dict, pts_bbox in zip(bbox_list, bbox_pts):\n                result_dict['pts_bbox'] = pts_bbox\n        if img_feats and self.with_img_bbox:\n            bbox_img = self.simple_test_img(\n                img_feats, img_metas, rescale=rescale)\n            for result_dict, img_bbox in zip(bbox_list, bbox_img):\n                result_dict['img_bbox'] = img_bbox\n        return bbox_list\n\n    def aug_test(self, points, img_metas, imgs=None, rescale=False):\n        \"\"\"Test function with augmentaiton.\"\"\"\n        img_feats, pts_feats = self.extract_feats(points, img_metas, imgs)\n\n        bbox_list = dict()\n        if pts_feats and self.with_pts_bbox:\n            bbox_pts = self.aug_test_pts(pts_feats, img_metas, rescale)\n            bbox_list.update(pts_bbox=bbox_pts)\n        return [bbox_list]\n\n    def extract_feats(self, points, img_metas, imgs=None):\n        \"\"\"Extract point and image features of multiple samples.\"\"\"\n        if imgs is None:\n            imgs = [None] * len(img_metas)\n        img_feats, pts_feats = multi_apply(self.extract_feat, points, imgs,\n                                           img_metas)\n        return img_feats, pts_feats\n\n    def aug_test_pts(self, feats, img_metas, rescale=False):\n        \"\"\"Test function of point cloud branch with augmentaiton.\"\"\"\n        # only support aug_test for one sample\n        aug_bboxes = []\n        for x, img_meta in zip(feats, img_metas):\n            outs = self.pts_bbox_head(x)\n            bbox_list = self.pts_bbox_head.get_bboxes(\n                *outs, img_meta, rescale=rescale)\n            bbox_list = [\n                dict(boxes_3d=bboxes, scores_3d=scores, labels_3d=labels)\n                for bboxes, scores, labels in bbox_list\n            ]\n            aug_bboxes.append(bbox_list[0])\n\n        # after merging, bboxes will be rescaled to the original image size\n        merged_bboxes = merge_aug_bboxes_3d(aug_bboxes, img_metas,\n                                            self.pts_bbox_head.test_cfg)\n        return merged_bboxes\n\n    def show_results(self, data, result, out_dir):\n        \"\"\"Results visualization.\n\n        Args:\n            data (dict): Input points and the information of the sample.\n            result (dict): Prediction results.\n            out_dir (str): Output directory of visualization result.\n        \"\"\"\n        for batch_id in range(len(result)):\n            if isinstance(data['points'][0], DC):\n                points = data['points'][0]._data[0][batch_id].numpy()\n            elif mmcv.is_list_of(data['points'][0], torch.Tensor):\n                points = data['points'][0][batch_id]\n            else:\n                ValueError(f\"Unsupported data type {type(data['points'][0])} \"\n                           f'for visualization!')\n            if isinstance(data['img_metas'][0], DC):\n                pts_filename = data['img_metas'][0]._data[0][batch_id][\n                    'pts_filename']\n                box_mode_3d = data['img_metas'][0]._data[0][batch_id][\n                    'box_mode_3d']\n            elif mmcv.is_list_of(data['img_metas'][0], dict):\n                pts_filename = data['img_metas'][0][batch_id]['pts_filename']\n                box_mode_3d = data['img_metas'][0][batch_id]['box_mode_3d']\n            else:\n                ValueError(\n                    f\"Unsupported data type {type(data['img_metas'][0])} \"\n                    f'for visualization!')\n            file_name = osp.split(pts_filename)[-1].split('.')[0]\n\n            assert out_dir is not None, 'Expect out_dir, got none.'\n            inds = result[batch_id]['pts_bbox']['scores_3d'] > 0.1\n            pred_bboxes = result[batch_id]['pts_bbox']['boxes_3d'][inds]\n\n            # for now we convert points and bbox into depth mode\n            if (box_mode_3d == Box3DMode.CAM) or (box_mode_3d\n                                                  == Box3DMode.LIDAR):\n                points = Coord3DMode.convert_point(points, Coord3DMode.LIDAR,\n                                                   Coord3DMode.DEPTH)\n                pred_bboxes = Box3DMode.convert(pred_bboxes, box_mode_3d,\n                                                Box3DMode.DEPTH)\n            elif box_mode_3d != Box3DMode.DEPTH:\n                ValueError(\n                    f'Unsupported box_mode_3d {box_mode_3d} for convertion!')\n\n            pred_bboxes = pred_bboxes.tensor.cpu().numpy()\n            show_result(points, None, pred_bboxes, out_dir, file_name)\n\n"
  },
  {
    "path": "mmdet3d/models/detectors/parta2.py",
    "content": "import torch\nfrom torch.nn import functional as F\n\nfrom mmdet3d.ops import Voxelization\nfrom mmdet.models import DETECTORS\nfrom .. import builder\nfrom .two_stage import TwoStage3DDetector\n\n\n@DETECTORS.register_module()\nclass PartA2(TwoStage3DDetector):\n    r\"\"\"Part-A2 detector.\n\n    Please refer to the `paper <https://arxiv.org/abs/1907.03670>`_\n    \"\"\"\n\n    def __init__(self,\n                 voxel_layer,\n                 voxel_encoder,\n                 middle_encoder,\n                 backbone,\n                 neck=None,\n                 rpn_head=None,\n                 roi_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None):\n        super(PartA2, self).__init__(\n            backbone=backbone,\n            neck=neck,\n            rpn_head=rpn_head,\n            roi_head=roi_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            pretrained=pretrained,\n        )\n        self.voxel_layer = Voxelization(**voxel_layer)\n        self.voxel_encoder = builder.build_voxel_encoder(voxel_encoder)\n        self.middle_encoder = builder.build_middle_encoder(middle_encoder)\n\n    def extract_feat(self, points, img_metas):\n        \"\"\"Extract features from points.\"\"\"\n        voxel_dict = self.voxelize(points)\n        voxel_features = self.voxel_encoder(voxel_dict['voxels'],\n                                            voxel_dict['num_points'],\n                                            voxel_dict['coors'])\n        batch_size = voxel_dict['coors'][-1, 0].item() + 1\n        feats_dict = self.middle_encoder(voxel_features, voxel_dict['coors'],\n                                         batch_size)\n        x = self.backbone(feats_dict['spatial_features'])\n        if self.with_neck:\n            neck_feats = self.neck(x)\n            feats_dict.update({'neck_feats': neck_feats})\n        return feats_dict, voxel_dict\n\n    @torch.no_grad()\n    def voxelize(self, points):\n        \"\"\"Apply hard voxelization to points.\"\"\"\n        voxels, coors, num_points, voxel_centers = [], [], [], []\n        for res in points:\n            res_voxels, res_coors, res_num_points = self.voxel_layer(res)\n            res_voxel_centers = (\n                res_coors[:, [2, 1, 0]] + 0.5) * res_voxels.new_tensor(\n                    self.voxel_layer.voxel_size) + res_voxels.new_tensor(\n                        self.voxel_layer.point_cloud_range[0:3])\n            voxels.append(res_voxels)\n            coors.append(res_coors)\n            num_points.append(res_num_points)\n            voxel_centers.append(res_voxel_centers)\n\n        voxels = torch.cat(voxels, dim=0)\n        num_points = torch.cat(num_points, dim=0)\n        voxel_centers = torch.cat(voxel_centers, dim=0)\n        coors_batch = []\n        for i, coor in enumerate(coors):\n            coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)\n            coors_batch.append(coor_pad)\n        coors_batch = torch.cat(coors_batch, dim=0)\n\n        voxel_dict = dict(\n            voxels=voxels,\n            num_points=num_points,\n            coors=coors_batch,\n            voxel_centers=voxel_centers)\n        return voxel_dict\n\n    def forward_train(self,\n                      points,\n                      img_metas,\n                      gt_bboxes_3d,\n                      gt_labels_3d,\n                      gt_bboxes_ignore=None,\n                      proposals=None):\n        \"\"\"Training forward function.\n\n        Args:\n            points (list[torch.Tensor]): Point cloud of each sample.\n            img_metas (list[dict]): Meta information of each sample\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                boxes for each sample.\n            gt_labels_3d (list[torch.Tensor]): Ground truth labels for\n                boxes of each sampole\n            gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth\n                boxes to be ignored. Defaults to None.\n\n        Returns:\n            dict: Losses of each branch.\n        \"\"\"\n        feats_dict, voxels_dict = self.extract_feat(points, img_metas)\n\n        losses = dict()\n\n        if self.with_rpn:\n            rpn_outs = self.rpn_head(feats_dict['neck_feats'])\n            rpn_loss_inputs = rpn_outs + (gt_bboxes_3d, gt_labels_3d,\n                                          img_metas)\n            rpn_losses = self.rpn_head.loss(\n                *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)\n            losses.update(rpn_losses)\n\n            proposal_cfg = self.train_cfg.get('rpn_proposal',\n                                              self.test_cfg.rpn)\n            proposal_inputs = rpn_outs + (img_metas, proposal_cfg)\n            proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)\n        else:\n            proposal_list = proposals\n\n        roi_losses = self.roi_head.forward_train(feats_dict, voxels_dict,\n                                                 img_metas, proposal_list,\n                                                 gt_bboxes_3d, gt_labels_3d)\n\n        losses.update(roi_losses)\n\n        return losses\n\n    def simple_test(self, points, img_metas, proposals=None, rescale=False):\n        \"\"\"Test function without augmentaiton.\"\"\"\n        feats_dict, voxels_dict = self.extract_feat(points, img_metas)\n\n        if self.with_rpn:\n            rpn_outs = self.rpn_head(feats_dict['neck_feats'])\n            proposal_cfg = self.test_cfg.rpn\n            bbox_inputs = rpn_outs + (img_metas, proposal_cfg)\n            proposal_list = self.rpn_head.get_bboxes(*bbox_inputs)\n        else:\n            proposal_list = proposals\n\n        return self.roi_head.simple_test(feats_dict, voxels_dict, img_metas,\n                                         proposal_list)\n"
  },
  {
    "path": "mmdet3d/models/detectors/single_stage.py",
    "content": "from torch import nn as nn\n\nfrom mmdet.models import DETECTORS, build_backbone, build_head, build_neck\nfrom .base import Base3DDetector\n\n\n@DETECTORS.register_module()\nclass SingleStage3DDetector(Base3DDetector):\n    \"\"\"SingleStage3DDetector.\n\n    This class serves as a base class for single-stage 3D detectors.\n\n    Args:\n        backbone (dict): Config dict of detector's backbone.\n        neck (dict, optional): Config dict of neck. Defaults to None.\n        bbox_head (dict, optional): Config dict of box head. Defaults to None.\n        train_cfg (dict, optional): Config dict of training hyper-parameters.\n            Defaults to None.\n        test_cfg (dict, optional): Config dict of test hyper-parameters.\n            Defaults to None.\n        pretrained (str, optional): Path of pretrained models.\n            Defaults to None.\n    \"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck=None,\n                 bbox_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None):\n        super(SingleStage3DDetector, self).__init__()\n        self.backbone = build_backbone(backbone)\n        if neck is not None:\n            self.neck = build_neck(neck)\n        bbox_head.update(train_cfg=train_cfg)\n        bbox_head.update(test_cfg=test_cfg)\n        self.bbox_head = build_head(bbox_head)\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        self.init_weights(pretrained=pretrained)\n\n    def init_weights(self, pretrained=None):\n        \"\"\"Initialize weights of detector.\"\"\"\n        super(SingleStage3DDetector, self).init_weights(pretrained)\n        self.backbone.init_weights(pretrained=pretrained)\n        if self.with_neck:\n            if isinstance(self.neck, nn.Sequential):\n                for m in self.neck:\n                    m.init_weights()\n            else:\n                self.neck.init_weights()\n        self.bbox_head.init_weights()\n\n    def extract_feat(self, points, img_metas=None):\n        \"\"\"Directly extract features from the backbone+neck.\n\n        Args:\n            points (torch.Tensor): Input points.\n        \"\"\"\n        x = self.backbone(points)\n        if self.with_neck:\n            x = self.neck(x)\n        return x\n\n    def extract_feats(self, points, img_metas):\n        \"\"\"Extract features of multiple samples.\"\"\"\n        return [\n            self.extract_feat(pts, img_meta)\n            for pts, img_meta in zip(points, img_metas)\n        ]\n"
  },
  {
    "path": "mmdet3d/models/detectors/sparsefusion.py",
    "content": "import mmcv\r\nimport torch\r\nfrom mmcv.parallel import DataContainer as DC\r\nfrom mmcv.runner import force_fp32\r\nfrom os import path as osp\r\nfrom torch import nn as nn\r\nfrom torch.nn import functional as F\r\nimport numpy as np\r\nimport time\r\n\r\nfrom mmdet3d.core import (Box3DMode, Coord3DMode, bbox3d2result,\r\n                          merge_aug_bboxes_3d, show_result)\r\nfrom mmdet3d.ops import Voxelization\r\nfrom mmdet.core import multi_apply\r\nfrom mmdet.models import DETECTORS\r\nfrom .. import builder\r\nfrom .mvx_two_stage import MVXTwoStageDetector\r\nfrom mmdet3d.ops import Voxelization\r\n\r\n@DETECTORS.register_module()\r\nclass SparseFusionDetector(MVXTwoStageDetector):\r\n    \"\"\"Base class of Multi-modality VoxelNet.\"\"\"\r\n\r\n    def __init__(self, **kwargs):\r\n        super(SparseFusionDetector, self).__init__(**kwargs)\r\n\r\n        self.freeze_img = kwargs.get('freeze_img', True)\r\n        self.freeze_img_head = kwargs.get('freeze_img_head', False)\r\n\r\n        self.init_weights(pretrained=kwargs.get('pretrained', None))\r\n\r\n    def init_weights(self, pretrained=None):\r\n        \"\"\"Initialize model weights.\"\"\"\r\n        super(SparseFusionDetector, self).init_weights(pretrained)\r\n\r\n        if self.freeze_img:\r\n            if self.with_img_backbone:\r\n                for param in self.img_backbone.parameters():\r\n                    param.requires_grad = False\r\n            if self.with_img_neck:\r\n                for param in self.img_neck.parameters():\r\n                    param.requires_grad = False\r\n            if self.freeze_img_head:\r\n                for param in self.pts_bbox_head.img_transformer.parameters():\r\n                    param.requires_grad = False\r\n                for param in self.pts_bbox_head.shared_conv_img.parameters():\r\n                    param.requires_grad = False\r\n                for param in self.pts_bbox_head.img_heatmap_head.parameters():\r\n                    param.requires_grad = False\r\n\r\n    def extract_img_feat(self, img, img_metas):\r\n        \"\"\"Extract features of images.\"\"\"\r\n        if self.with_img_backbone and img is not None:\r\n            input_shape = img.shape[-2:]\r\n            # update real input shape of each single img\r\n            for img_meta in img_metas:\r\n                img_meta.update(input_shape=input_shape)\r\n\r\n            if img.dim() == 5 and img.size(0) == 1:\r\n                img.squeeze_(0)\r\n            elif img.dim() == 5 and img.size(0) > 1:\r\n                B, N, C, H, W = img.size()\r\n                img = img.view(B * N, C, H, W)\r\n\r\n            img_feats = self.img_backbone(img.float())\r\n        else:\r\n            return None\r\n        if self.with_img_neck:\r\n            img_feats = self.img_neck(img_feats)\r\n\r\n        return img_feats\r\n\r\n    def extract_voxel_heights(self, voxels, coors):\r\n        batch_size = coors[-1, 0].item() + 1\r\n        grid_size = self.test_cfg['pts']['grid_size']\r\n        out_size_factor = self.test_cfg['pts']['out_size_factor']\r\n\r\n        height_num = grid_size[2]\r\n        x_num = grid_size[0] // out_size_factor\r\n        y_num = grid_size[1] // out_size_factor\r\n\r\n        voxels_ = voxels[:, :, 2].clone()\r\n        voxels_[voxels_==0] = 100\r\n        min_voxel = torch.min(voxels_, dim=-1)[0]\r\n        voxels_[voxels_==100] = -200\r\n        max_voxel = torch.max(voxels_, dim=-1)[0]\r\n\r\n        min_voxel_height = torch.zeros((batch_size, y_num, x_num, out_size_factor*out_size_factor)).to(voxels.device) + 100\r\n        max_voxel_height = torch.zeros((batch_size, y_num, x_num, out_size_factor*out_size_factor)).to(voxels.device) - 200\r\n\r\n        batch_ids = coors[:, 0].long()\r\n        height_ids = coors[:, 1].long()\r\n        y_ids = (coors[:, 2] // out_size_factor).long()\r\n        x_ids = (coors[:, 3] // out_size_factor).long()\r\n        y_offsets = (coors[:, 2] % out_size_factor).long()\r\n        x_offsets = (coors[:, 3] % out_size_factor).long()\r\n\r\n        for hid in range(height_num):\r\n            height_mask = height_ids == hid\r\n            batch_mask = batch_ids[height_mask]\r\n            y_ids_mask = y_ids[height_mask]\r\n            x_ids_mask = x_ids[height_mask]\r\n            y_offsets_mask = y_offsets[height_mask]\r\n            x_offsets_mask = x_offsets[height_mask]\r\n\r\n            min_voxel_height[batch_mask, y_ids_mask, x_ids_mask, y_offsets_mask * out_size_factor + x_offsets_mask] = torch.minimum(min_voxel_height[batch_mask, y_ids_mask, x_ids_mask, y_offsets_mask * out_size_factor + x_offsets_mask], min_voxel[height_mask])\r\n            max_voxel_height[batch_mask, y_ids_mask, x_ids_mask, y_offsets_mask * out_size_factor + x_offsets_mask] = torch.maximum(max_voxel_height[batch_mask, y_ids_mask, x_ids_mask, y_offsets_mask * out_size_factor + x_offsets_mask], max_voxel[height_mask])\r\n\r\n        min_voxel_height = torch.min(min_voxel_height, dim=-1)[0]\r\n        max_voxel_height = torch.max(max_voxel_height, dim=-1)[0]\r\n\r\n        return min_voxel_height, max_voxel_height\r\n\r\n    def extract_pts_feat(self, pts, img_feats, img_metas):\r\n        \"\"\"Extract features of points.\"\"\"\r\n        if not self.with_pts_bbox:\r\n            return None\r\n        voxels, num_points, coors, min_voxel_height, max_voxel_height = self.voxelize(pts)\r\n\r\n        voxel_features = self.pts_voxel_encoder(voxels, num_points, coors)\r\n        batch_size = coors[-1, 0] + 1\r\n        x = self.pts_middle_encoder(voxel_features, coors, batch_size)\r\n        x = self.pts_backbone(x)\r\n        if self.with_pts_neck:\r\n            x = self.pts_neck(x)\r\n\r\n        min_voxel_height = min_voxel_height[:, None]\r\n        max_voxel_height = max_voxel_height[:, None]\r\n\r\n        x[0] = torch.cat([x[0], min_voxel_height, max_voxel_height], dim=1)\r\n        return x\r\n\r\n    @torch.no_grad()\r\n    @force_fp32()\r\n    def voxelize(self, points):\r\n        \"\"\"Apply dynamic voxelization to points.\r\n\r\n        Args:\r\n            points (list[torch.Tensor]): Points of each sample.\r\n\r\n        Returns:\r\n            tuple[torch.Tensor]: Concatenated points, number of points\r\n                per voxel, and coordinates.\r\n        \"\"\"\r\n        voxels, coors, num_points = [], [], []\r\n        for res in points:\r\n            res_voxels, res_coors, res_num_points = self.pts_voxel_layer(res)\r\n            voxels.append(res_voxels)\r\n            coors.append(res_coors)\r\n            num_points.append(res_num_points)\r\n        voxels = torch.cat(voxels, dim=0)\r\n        num_points = torch.cat(num_points, dim=0)\r\n        coors_batch = []\r\n        for i, coor in enumerate(coors):\r\n            coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)\r\n            coors_batch.append(coor_pad)\r\n        coors_batch = torch.cat(coors_batch, dim=0)\r\n\r\n        min_voxel_height, max_voxel_height = self.extract_voxel_heights(voxels, coors_batch)\r\n\r\n        return voxels, num_points, coors_batch, min_voxel_height, max_voxel_height\r\n\r\n    def forward_train(self,\r\n                      points=None,\r\n                      img_metas=None,\r\n                      gt_bboxes_3d=None,\r\n                      gt_labels_3d=None,\r\n                      gt_labels=None,\r\n                      gt_bboxes=None,\r\n                      gt_pts_centers_view=None,\r\n                      gt_img_centers_view=None,\r\n                      gt_bboxes_cam_view=None,\r\n                      img=None,\r\n                      sparse_depth=None,\r\n                      gt_visible_3d=None,\r\n                      gt_bboxes_lidar_view=None,\r\n                      proposals=None,\r\n                      gt_bboxes_ignore=None):\r\n        \"\"\"Forward training function.\r\n\r\n        Args:\r\n            points (list[torch.Tensor], optional): Points of each sample.\r\n                Defaults to None.\r\n            img_metas (list[dict], optional): Meta information of each sample.\r\n                Defaults to None.\r\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`], optional):\r\n                Ground truth 3D boxes. Defaults to None.\r\n            gt_labels_3d (list[torch.Tensor], optional): Ground truth labels\r\n                of 3D boxes. Defaults to None.\r\n            gt_labels (list[torch.Tensor], optional): Ground truth labels\r\n                of 2D boxes in images. Defaults to None.\r\n            gt_bboxes (list[torch.Tensor], optional): Ground truth 2D boxes in\r\n                images. Defaults to None.\r\n            img (torch.Tensor optional): Images of each sample with shape\r\n                (N, C, H, W). Defaults to None.\r\n            proposals ([list[torch.Tensor], optional): Predicted proposals\r\n                used for training Fast RCNN. Defaults to None.\r\n            gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth\r\n                2D boxes in images to be ignored. Defaults to None.\r\n\r\n        Returns:\r\n            dict: Losses of different branches.\r\n        \"\"\"\r\n        img_feats, pts_feats = self.extract_feat(\r\n            points, img=img, img_metas=img_metas)\r\n        losses = dict()\r\n        if pts_feats:\r\n            losses_pts = self.forward_pts_train(\r\n                pts_feats, img_feats, gt_bboxes_3d, gt_labels_3d, gt_bboxes, gt_labels, gt_pts_centers_view, gt_img_centers_view, gt_bboxes_cam_view, img_metas, gt_bboxes_ignore, sparse_depth, gt_visible_3d, gt_bboxes_lidar_view\r\n            )\r\n            losses.update(losses_pts)\r\n        if img_feats:\r\n            losses_img = self.forward_img_train(\r\n                img_feats,\r\n                img_metas=img_metas,\r\n                gt_bboxes=gt_bboxes,\r\n                gt_labels=gt_labels,\r\n                gt_bboxes_ignore=gt_bboxes_ignore,\r\n                proposals=proposals)\r\n            losses.update(losses_img)\r\n        return losses\r\n\r\n    def forward_pts_train(self,\r\n                          pts_feats,\r\n                          img_feats,\r\n                          gt_bboxes_3d,\r\n                          gt_labels_3d,\r\n                          gt_bboxes,\r\n                          gt_labels,\r\n                          gt_pts_centers_view,\r\n                          gt_img_centers_view,\r\n                          gt_bboxes_cam_view,\r\n                          img_metas,\r\n                          gt_bboxes_ignore=None,\r\n                          sparse_depth=None,\r\n                          gt_visible_3d=None,\r\n                          gt_bboxes_lidar_view=None):\r\n        \"\"\"Forward function for point cloud branch.\r\n\r\n        Args:\r\n            pts_feats (list[torch.Tensor]): Features of point cloud branch\r\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth\r\n                boxes for each sample.\r\n            gt_labels_3d (list[torch.Tensor]): Ground truth labels for\r\n                boxes of each sampole\r\n            img_metas (list[dict]): Meta information of samples.\r\n            gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth\r\n                boxes to be ignored. Defaults to None.\r\n\r\n        Returns:\r\n            dict: Losses of each branch.\r\n        \"\"\"\r\n        outs = self.pts_bbox_head(pts_feats, img_feats, img_metas, sparse_depth)\r\n\r\n        loss_inputs = [gt_bboxes_3d, gt_labels_3d, gt_bboxes, gt_labels, gt_pts_centers_view, gt_img_centers_view, gt_bboxes_cam_view, gt_visible_3d, gt_bboxes_lidar_view, img_metas, outs]\r\n        losses = self.pts_bbox_head.loss(*loss_inputs)\r\n        return losses\r\n\r\n    def simple_test_pts(self, x, x_img, img_metas, rescale=False, sparse_depth=None):\r\n        \"\"\"Test function of point cloud branch.\"\"\"\r\n\r\n        outs = self.pts_bbox_head(x, x_img, img_metas, sparse_depth)\r\n\r\n        bbox_list = self.pts_bbox_head.get_bboxes(\r\n            outs, img_metas, rescale=rescale)\r\n\r\n        bbox_results = [\r\n            bbox3d2result(bboxes, scores, labels)\r\n            for bboxes, scores, labels in bbox_list\r\n        ]\r\n        return bbox_results\r\n\r\n    def simple_test(self, points, img_metas, img=None, sparse_depth=None, rescale=False):\r\n        \"\"\"Test function without augmentaiton.\"\"\"\r\n        img_feats, pts_feats = self.extract_feat(\r\n            points, img=img, img_metas=img_metas)\r\n\r\n        bbox_list = [dict() for i in range(len(img_metas))]\r\n        if pts_feats and self.with_pts_bbox:\r\n            bbox_pts = self.simple_test_pts(\r\n                pts_feats, img_feats, img_metas, rescale=rescale, sparse_depth=sparse_depth)\r\n            for result_dict, pts_bbox in zip(bbox_list, bbox_pts):\r\n                result_dict['pts_bbox'] = pts_bbox\r\n        if img_feats and self.with_img_bbox:\r\n            bbox_img = self.simple_test_img(\r\n                img_feats, img_metas, rescale=rescale)\r\n            for result_dict, img_bbox in zip(bbox_list, bbox_img):\r\n                result_dict['img_bbox'] = img_bbox\r\n\r\n        return bbox_list\r\n\r\n    def forward_test(self, points, img_metas, img=None, sparse_depth=None, **kwargs):\r\n        \"\"\"\r\n        Args:\r\n            points (list[torch.Tensor]): the outer list indicates test-time\r\n                augmentations and inner torch.Tensor should have a shape NxC,\r\n                which contains all points in the batch.\r\n            img_metas (list[list[dict]]): the outer list indicates test-time\r\n                augs (multiscale, flip, etc.) and the inner list indicates\r\n                images in a batch\r\n            img (list[torch.Tensor], optional): the outer\r\n                list indicates test-time augmentations and inner\r\n                torch.Tensor should have a shape NxCxHxW, which contains\r\n                all images in the batch. Defaults to None.\r\n        \"\"\"\r\n        for var, name in [(points, 'points'), (img_metas, 'img_metas')]:\r\n            if not isinstance(var, list):\r\n                raise TypeError('{} must be a list, but got {}'.format(\r\n                    name, type(var)))\r\n\r\n        num_augs = len(points)\r\n        if num_augs != len(img_metas):\r\n            raise ValueError(\r\n                'num of augmentations ({}) != num of image meta ({})'.format(\r\n                    len(points), len(img_metas)))\r\n\r\n        if num_augs == 1:\r\n            img = [img] if img is None else img\r\n            return self.simple_test(points[0], img_metas[0], img[0], sparse_depth[0], **kwargs)\r\n        else:\r\n            return self.aug_test(points, img_metas, img, **kwargs)"
  },
  {
    "path": "mmdet3d/models/detectors/ssd3dnet.py",
    "content": "from mmdet.models import DETECTORS\nfrom .votenet import VoteNet\n\n\n@DETECTORS.register_module()\nclass SSD3DNet(VoteNet):\n    \"\"\"3DSSDNet model.\n\n    https://arxiv.org/abs/2002.10187.pdf\n    \"\"\"\n\n    def __init__(self,\n                 backbone,\n                 bbox_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None):\n        super(SSD3DNet, self).__init__(\n            backbone=backbone,\n            bbox_head=bbox_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            pretrained=pretrained)\n"
  },
  {
    "path": "mmdet3d/models/detectors/transfusion.py",
    "content": "import mmcv\nimport torch\nimport time\nfrom mmcv.parallel import DataContainer as DC\nfrom mmcv.runner import force_fp32\nfrom os import path as osp\nfrom torch import nn as nn\nfrom torch.nn import functional as F\n\nfrom mmdet3d.core import (Box3DMode, Coord3DMode, bbox3d2result,\n                          merge_aug_bboxes_3d, show_result)\nfrom mmdet3d.ops import Voxelization\nfrom mmdet.core import multi_apply\nfrom mmdet.models import DETECTORS\nfrom .. import builder\nfrom .mvx_two_stage import MVXTwoStageDetector\n\n\n@DETECTORS.register_module()\nclass TransFusionDetector(MVXTwoStageDetector):\n    \"\"\"Base class of Multi-modality VoxelNet.\"\"\"\n\n    def __init__(self, **kwargs):\n        super(TransFusionDetector, self).__init__(**kwargs)\n\n        self.freeze_img = kwargs.get('freeze_img', True)\n        self.init_weights(pretrained=kwargs.get('pretrained', None))\n\n    def init_weights(self, pretrained=None):\n        \"\"\"Initialize model weights.\"\"\"\n        super(TransFusionDetector, self).init_weights(pretrained)\n\n        if self.freeze_img:\n            if self.with_img_backbone:\n                for param in self.img_backbone.parameters():\n                    param.requires_grad = False\n            if self.with_img_neck:\n                for param in self.img_neck.parameters():\n                    param.requires_grad = False\n\n    def extract_img_feat(self, img, img_metas):\n        \"\"\"Extract features of images.\"\"\"\n        if self.with_img_backbone and img is not None:\n            input_shape = img.shape[-2:]\n            # update real input shape of each single img\n            for img_meta in img_metas:\n                img_meta.update(input_shape=input_shape)\n\n            if img.dim() == 5 and img.size(0) == 1:\n                img.squeeze_(0)\n            elif img.dim() == 5 and img.size(0) > 1:\n                B, N, C, H, W = img.size()\n                img = img.view(B * N, C, H, W)\n            img_feats = self.img_backbone(img.float())\n        else:\n            return None\n        if self.with_img_neck:\n            img_feats = self.img_neck(img_feats)\n\n        return img_feats\n\n    def extract_pts_feat(self, pts, img_feats, img_metas):\n        \"\"\"Extract features of points.\"\"\"\n        if not self.with_pts_bbox:\n            return None\n        voxels, num_points, coors = self.voxelize(pts)\n\n        voxel_features = self.pts_voxel_encoder(voxels, num_points, coors,\n                                                )\n        batch_size = coors[-1, 0] + 1\n        x = self.pts_middle_encoder(voxel_features, coors, batch_size)\n        x = self.pts_backbone(x)\n        if self.with_pts_neck:\n            x = self.pts_neck(x)\n\n        return x\n\n    @torch.no_grad()\n    @force_fp32()\n    def voxelize(self, points):\n        \"\"\"Apply dynamic voxelization to points.\n\n        Args:\n            points (list[torch.Tensor]): Points of each sample.\n\n        Returns:\n            tuple[torch.Tensor]: Concatenated points, number of points\n                per voxel, and coordinates.\n        \"\"\"\n        voxels, coors, num_points = [], [], []\n        for res in points:\n            res_voxels, res_coors, res_num_points = self.pts_voxel_layer(res)\n            voxels.append(res_voxels)\n            coors.append(res_coors)\n            num_points.append(res_num_points)\n        voxels = torch.cat(voxels, dim=0)\n        num_points = torch.cat(num_points, dim=0)\n        coors_batch = []\n        for i, coor in enumerate(coors):\n            coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)\n            coors_batch.append(coor_pad)\n        coors_batch = torch.cat(coors_batch, dim=0)\n        return voxels, num_points, coors_batch\n\n    def forward_train(self,\n                      points=None,\n                      img_metas=None,\n                      gt_bboxes_3d=None,\n                      gt_labels_3d=None,\n                      gt_labels=None,\n                      gt_bboxes=None,\n                      img=None,\n                      proposals=None,\n                      gt_bboxes_ignore=None):\n        \"\"\"Forward training function.\n\n        Args:\n            points (list[torch.Tensor], optional): Points of each sample.\n                Defaults to None.\n            img_metas (list[dict], optional): Meta information of each sample.\n                Defaults to None.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`], optional):\n                Ground truth 3D boxes. Defaults to None.\n            gt_labels_3d (list[torch.Tensor], optional): Ground truth labels\n                of 3D boxes. Defaults to None.\n            gt_labels (list[torch.Tensor], optional): Ground truth labels\n                of 2D boxes in images. Defaults to None.\n            gt_bboxes (list[torch.Tensor], optional): Ground truth 2D boxes in\n                images. Defaults to None.\n            img (torch.Tensor optional): Images of each sample with shape\n                (N, C, H, W). Defaults to None.\n            proposals ([list[torch.Tensor], optional): Predicted proposals\n                used for training Fast RCNN. Defaults to None.\n            gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth\n                2D boxes in images to be ignored. Defaults to None.\n\n        Returns:\n            dict: Losses of different branches.\n        \"\"\"\n        img_feats, pts_feats = self.extract_feat(\n            points, img=img, img_metas=img_metas)\n        losses = dict()\n        if pts_feats:\n            losses_pts = self.forward_pts_train(pts_feats, img_feats, gt_bboxes_3d,\n                                                gt_labels_3d, img_metas,\n                                                gt_bboxes_ignore)\n            losses.update(losses_pts)\n        if img_feats:\n            losses_img = self.forward_img_train(\n                img_feats,\n                img_metas=img_metas,\n                gt_bboxes=gt_bboxes,\n                gt_labels=gt_labels,\n                gt_bboxes_ignore=gt_bboxes_ignore,\n                proposals=proposals)\n            losses.update(losses_img)\n        return losses\n\n    def forward_pts_train(self,\n                          pts_feats,\n                          img_feats,\n                          gt_bboxes_3d,\n                          gt_labels_3d,\n                          img_metas,\n                          gt_bboxes_ignore=None):\n        \"\"\"Forward function for point cloud branch.\n\n        Args:\n            pts_feats (list[torch.Tensor]): Features of point cloud branch\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                boxes for each sample.\n            gt_labels_3d (list[torch.Tensor]): Ground truth labels for\n                boxes of each sampole\n            img_metas (list[dict]): Meta information of samples.\n            gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth\n                boxes to be ignored. Defaults to None.\n\n        Returns:\n            dict: Losses of each branch.\n        \"\"\"\n        outs = self.pts_bbox_head(pts_feats, img_feats, img_metas)\n        loss_inputs = [gt_bboxes_3d, gt_labels_3d, outs]\n        losses = self.pts_bbox_head.loss(*loss_inputs)\n        return losses\n\n    def simple_test_pts(self, x, x_img, img_metas, rescale=False):\n        \"\"\"Test function of point cloud branch.\"\"\"\n        outs = self.pts_bbox_head(x, x_img, img_metas)\n        bbox_list = self.pts_bbox_head.get_bboxes(\n            outs, img_metas, rescale=rescale)\n        bbox_results = [\n            bbox3d2result(bboxes, scores, labels)\n            for bboxes, scores, labels in bbox_list\n        ]\n        return bbox_results\n\n    def simple_test(self, points, img_metas, img=None, rescale=False):\n        \"\"\"Test function without augmentaiton.\"\"\"\n        img_feats, pts_feats = self.extract_feat(\n            points, img=img, img_metas=img_metas)\n\n        bbox_list = [dict() for i in range(len(img_metas))]\n        if pts_feats and self.with_pts_bbox:\n            bbox_pts = self.simple_test_pts(\n                pts_feats, img_feats, img_metas, rescale=rescale)\n            for result_dict, pts_bbox in zip(bbox_list, bbox_pts):\n                result_dict['pts_bbox'] = pts_bbox\n        if img_feats and self.with_img_bbox:\n            bbox_img = self.simple_test_img(\n                img_feats, img_metas, rescale=rescale)\n            for result_dict, img_bbox in zip(bbox_list, bbox_img):\n                result_dict['img_bbox'] = img_bbox\n\n        return bbox_list\n"
  },
  {
    "path": "mmdet3d/models/detectors/two_stage.py",
    "content": "from mmdet.models import DETECTORS, TwoStageDetector\nfrom .base import Base3DDetector\n\n\n@DETECTORS.register_module()\nclass TwoStage3DDetector(Base3DDetector, TwoStageDetector):\n    \"\"\"Base class of two-stage 3D detector.\n\n    It inherits original ``:class:TwoStageDetector`` and\n    ``:class:Base3DDetector``. This class could serve as a base class for all\n    two-stage 3D detectors.\n    \"\"\"\n\n    def __init__(self, **kwargs):\n        super(TwoStage3DDetector, self).__init__(**kwargs)\n"
  },
  {
    "path": "mmdet3d/models/detectors/votenet.py",
    "content": "import torch\n\nfrom mmdet3d.core import bbox3d2result, merge_aug_bboxes_3d\nfrom mmdet.models import DETECTORS\nfrom .single_stage import SingleStage3DDetector\n\n\n@DETECTORS.register_module()\nclass VoteNet(SingleStage3DDetector):\n    r\"\"\"`VoteNet <https://arxiv.org/pdf/1904.09664.pdf>`_ for 3D detection.\"\"\"\n\n    def __init__(self,\n                 backbone,\n                 bbox_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None):\n        super(VoteNet, self).__init__(\n            backbone=backbone,\n            bbox_head=bbox_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            pretrained=pretrained)\n\n    def forward_train(self,\n                      points,\n                      img_metas,\n                      gt_bboxes_3d,\n                      gt_labels_3d,\n                      pts_semantic_mask=None,\n                      pts_instance_mask=None,\n                      gt_bboxes_ignore=None):\n        \"\"\"Forward of training.\n\n        Args:\n            points (list[torch.Tensor]): Points of each batch.\n            img_metas (list): Image metas.\n            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch.\n            gt_labels_3d (list[torch.Tensor]): gt class labels of each batch.\n            pts_semantic_mask (None | list[torch.Tensor]): point-wise semantic\n                label of each batch.\n            pts_instance_mask (None | list[torch.Tensor]): point-wise instance\n                label of each batch.\n            gt_bboxes_ignore (None | list[torch.Tensor]): Specify\n                which bounding.\n\n        Returns:\n            dict: Losses.\n        \"\"\"\n        points_cat = torch.stack(points)\n\n        x = self.extract_feat(points_cat)\n        bbox_preds = self.bbox_head(x, self.train_cfg.sample_mod)\n        loss_inputs = (points, gt_bboxes_3d, gt_labels_3d, pts_semantic_mask,\n                       pts_instance_mask, img_metas)\n        losses = self.bbox_head.loss(\n            bbox_preds, *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)\n        return losses\n\n    def simple_test(self, points, img_metas, imgs=None, rescale=False):\n        \"\"\"Forward of testing.\n\n        Args:\n            points (list[torch.Tensor]): Points of each sample.\n            img_metas (list): Image metas.\n            rescale (bool): Whether to rescale results.\n\n        Returns:\n            list: Predicted 3d boxes.\n        \"\"\"\n        points_cat = torch.stack(points)\n\n        x = self.extract_feat(points_cat)\n        bbox_preds = self.bbox_head(x, self.test_cfg.sample_mod)\n        bbox_list = self.bbox_head.get_bboxes(\n            points_cat, bbox_preds, img_metas, rescale=rescale)\n        bbox_results = [\n            bbox3d2result(bboxes, scores, labels)\n            for bboxes, scores, labels in bbox_list\n        ]\n        return bbox_results\n\n    def aug_test(self, points, img_metas, imgs=None, rescale=False):\n        \"\"\"Test with augmentation.\"\"\"\n        points_cat = [torch.stack(pts) for pts in points]\n        feats = self.extract_feats(points_cat, img_metas)\n\n        # only support aug_test for one sample\n        aug_bboxes = []\n        for x, pts_cat, img_meta in zip(feats, points_cat, img_metas):\n            bbox_preds = self.bbox_head(x, self.test_cfg.sample_mod)\n            bbox_list = self.bbox_head.get_bboxes(\n                pts_cat, bbox_preds, img_meta, rescale=rescale)\n            bbox_list = [\n                dict(boxes_3d=bboxes, scores_3d=scores, labels_3d=labels)\n                for bboxes, scores, labels in bbox_list\n            ]\n            aug_bboxes.append(bbox_list[0])\n\n        # after merging, bboxes will be rescaled to the original image size\n        merged_bboxes = merge_aug_bboxes_3d(aug_bboxes, img_metas,\n                                            self.bbox_head.test_cfg)\n\n        return [merged_bboxes]\n"
  },
  {
    "path": "mmdet3d/models/detectors/voxelnet.py",
    "content": "import torch\nfrom mmcv.runner import force_fp32\nfrom torch.nn import functional as F\n\nfrom mmdet3d.core import bbox3d2result, merge_aug_bboxes_3d\nfrom mmdet3d.ops import Voxelization\nfrom mmdet.models import DETECTORS\nfrom .. import builder\nfrom .single_stage import SingleStage3DDetector\n\n\n@DETECTORS.register_module()\nclass VoxelNet(SingleStage3DDetector):\n    r\"\"\"`VoxelNet <https://arxiv.org/abs/1711.06396>`_ for 3D detection.\"\"\"\n\n    def __init__(self,\n                 voxel_layer,\n                 voxel_encoder,\n                 middle_encoder,\n                 backbone,\n                 neck=None,\n                 bbox_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None):\n        super(VoxelNet, self).__init__(\n            backbone=backbone,\n            neck=neck,\n            bbox_head=bbox_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            pretrained=pretrained,\n        )\n        self.voxel_layer = Voxelization(**voxel_layer)\n        self.voxel_encoder = builder.build_voxel_encoder(voxel_encoder)\n        self.middle_encoder = builder.build_middle_encoder(middle_encoder)\n\n    def extract_feat(self, points, img_metas):\n        \"\"\"Extract features from points.\"\"\"\n        voxels, num_points, coors = self.voxelize(points)\n        voxel_features = self.voxel_encoder(voxels, num_points, coors)\n        batch_size = coors[-1, 0].item() + 1\n        x = self.middle_encoder(voxel_features, coors, batch_size)\n        x = self.backbone(x)\n        if self.with_neck:\n            x = self.neck(x)\n        return x\n\n    @torch.no_grad()\n    @force_fp32()\n    def voxelize(self, points):\n        \"\"\"Apply hard voxelization to points.\"\"\"\n        voxels, coors, num_points = [], [], []\n        for res in points:\n            res_voxels, res_coors, res_num_points = self.voxel_layer(res)\n            voxels.append(res_voxels)\n            coors.append(res_coors)\n            num_points.append(res_num_points)\n        voxels = torch.cat(voxels, dim=0)\n        num_points = torch.cat(num_points, dim=0)\n        coors_batch = []\n        for i, coor in enumerate(coors):\n            coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)\n            coors_batch.append(coor_pad)\n        coors_batch = torch.cat(coors_batch, dim=0)\n        return voxels, num_points, coors_batch\n\n    def forward_train(self,\n                      points,\n                      img_metas,\n                      gt_bboxes_3d,\n                      gt_labels_3d,\n                      gt_bboxes_ignore=None):\n        \"\"\"Training forward function.\n\n        Args:\n            points (list[torch.Tensor]): Point cloud of each sample.\n            img_metas (list[dict]): Meta information of each sample\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                boxes for each sample.\n            gt_labels_3d (list[torch.Tensor]): Ground truth labels for\n                boxes of each sampole\n            gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth\n                boxes to be ignored. Defaults to None.\n\n        Returns:\n            dict: Losses of each branch.\n        \"\"\"\n        x = self.extract_feat(points, img_metas)\n        outs = self.bbox_head(x)\n        loss_inputs = outs + (gt_bboxes_3d, gt_labels_3d, img_metas)\n        losses = self.bbox_head.loss(\n            *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)\n        return losses\n\n    def simple_test(self, points, img_metas, imgs=None, rescale=False):\n        \"\"\"Test function without augmentaiton.\"\"\"\n        x = self.extract_feat(points, img_metas)\n        outs = self.bbox_head(x)\n        bbox_list = self.bbox_head.get_bboxes(\n            *outs, img_metas, rescale=rescale)\n        bbox_results = [\n            bbox3d2result(bboxes, scores, labels)\n            for bboxes, scores, labels in bbox_list\n        ]\n        return bbox_results\n\n    def aug_test(self, points, img_metas, imgs=None, rescale=False):\n        \"\"\"Test function with augmentaiton.\"\"\"\n        feats = self.extract_feats(points, img_metas)\n\n        # only support aug_test for one sample\n        aug_bboxes = []\n        for x, img_meta in zip(feats, img_metas):\n            outs = self.bbox_head(x)\n            bbox_list = self.bbox_head.get_bboxes(\n                *outs, img_meta, rescale=rescale)\n            bbox_list = [\n                dict(boxes_3d=bboxes, scores_3d=scores, labels_3d=labels)\n                for bboxes, scores, labels in bbox_list\n            ]\n            aug_bboxes.append(bbox_list[0])\n\n        # after merging, bboxes will be rescaled to the original image size\n        merged_bboxes = merge_aug_bboxes_3d(aug_bboxes, img_metas,\n                                            self.bbox_head.test_cfg)\n\n        return [merged_bboxes]\n"
  },
  {
    "path": "mmdet3d/models/fusion_layers/__init__.py",
    "content": "from .coord_transform import (apply_3d_transformation, bbox_2d_transform,\n                              coord_2d_transform)\nfrom .point_fusion import PointFusion\nfrom .vote_fusion import VoteFusion\n\n__all__ = [\n    'PointFusion', 'VoteFusion', 'apply_3d_transformation',\n    'bbox_2d_transform', 'coord_2d_transform'\n]\n"
  },
  {
    "path": "mmdet3d/models/fusion_layers/coord_transform.py",
    "content": "import torch\nfrom functools import partial\n\nfrom mmdet3d.core.points import get_points_type\n\n\ndef apply_3d_transformation(pcd, coords_type, img_meta, reverse=False):\n    \"\"\"Apply transformation to input point cloud.\n\n    Args:\n        pcd (torch.Tensor): The point cloud to be transformed.\n        coords_type (str): 'DEPTH' or 'CAMERA' or 'LIDAR'\n        img_meta(dict): Meta info regarding data transformation.\n        reverse (bool): Reversed transformation or not.\n\n    Note:\n        The elements in img_meta['transformation_3d_flow']:\n        \"T\" stands for translation;\n        \"S\" stands for scale;\n        \"R\" stands for rotation;\n        \"HF\" stands for horizontal flip;\n        \"VF\" stands for vertical flip.\n\n    Returns:\n        torch.Tensor: The transformed point cloud.\n    \"\"\"\n\n    dtype = pcd.dtype\n    device = pcd.device\n\n    pcd_rotate_mat = (\n        torch.tensor(img_meta['pcd_rotation'], dtype=dtype, device=device)\n        if 'pcd_rotation' in img_meta else torch.eye(\n            3, dtype=dtype, device=device))\n\n    pcd_scale_factor = (\n        img_meta['pcd_scale_factor'] if 'pcd_scale_factor' in img_meta else 1.)\n\n    pcd_trans_factor = (\n        torch.tensor(img_meta['pcd_trans'], dtype=dtype, device=device)\n        if 'pcd_trans' in img_meta else torch.zeros(\n            (3), dtype=dtype, device=device))\n\n    pcd_horizontal_flip = img_meta[\n        'pcd_horizontal_flip'] if 'pcd_horizontal_flip' in \\\n        img_meta else False\n\n    pcd_vertical_flip = img_meta[\n        'pcd_vertical_flip'] if 'pcd_vertical_flip' in \\\n        img_meta else False\n\n    flow = img_meta['transformation_3d_flow'] \\\n        if 'transformation_3d_flow' in img_meta else []\n\n    pcd = pcd.clone()  # prevent inplace modification\n    pcd = get_points_type(coords_type)(pcd)\n\n    horizontal_flip_func = partial(pcd.flip, bev_direction='horizontal') \\\n        if pcd_horizontal_flip else lambda: None\n    vertical_flip_func = partial(pcd.flip, bev_direction='vertical') \\\n        if pcd_vertical_flip else lambda: None\n    if reverse:\n        scale_func = partial(pcd.scale, scale_factor=1.0 / pcd_scale_factor)\n        translate_func = partial(pcd.translate, trans_vector=-pcd_trans_factor)\n        # pcd_rotate_mat @ pcd_rotate_mat.inverse() is not\n        # exactly an identity matrix\n        # use angle to create the inverse rot matrix neither.\n        rotate_func = partial(pcd.rotate, rotation=pcd_rotate_mat.inverse())\n\n        # reverse the pipeline\n        flow = flow[::-1]\n    else:\n        scale_func = partial(pcd.scale, scale_factor=pcd_scale_factor)\n        translate_func = partial(pcd.translate, trans_vector=pcd_trans_factor)\n        rotate_func = partial(pcd.rotate, rotation=pcd_rotate_mat)\n\n    flow_mapping = {\n        'T': translate_func,\n        'S': scale_func,\n        'R': rotate_func,\n        'HF': horizontal_flip_func,\n        'VF': vertical_flip_func\n    }\n    for op in flow:\n        assert op in flow_mapping, f'This 3D data '\\\n            f'transformation op ({op}) is not supported'\n        func = flow_mapping[op]\n        func()\n\n    return pcd.coord\n\n\ndef extract_2d_info(img_meta, tensor):\n    \"\"\"Extract image augmentation information from img_meta.\n\n    Args:\n        img_meta(dict): Meta info regarding data transformation.\n        tensor(torch.Tensor): Input tensor used to create new ones.\n\n    Returns:\n        (int, int, int, int, torch.Tensor, bool, torch.Tensor):\n            The extracted information.\n    \"\"\"\n    img_shape = img_meta['img_shape']\n    ori_shape = img_meta['ori_shape']\n    img_h, img_w, _ = img_shape\n    ori_h, ori_w, _ = ori_shape\n\n    img_scale_factor = (\n        tensor.new_tensor(img_meta['scale_factor'][:2])\n        if 'scale_factor' in img_meta else tensor.new_tensor([1.0, 1.0]))\n    img_flip = img_meta['flip'] if 'flip' in img_meta else False\n    img_crop_offset = (\n        tensor.new_tensor(img_meta['img_crop_offset'])\n        if 'img_crop_offset' in img_meta else tensor.new_tensor([0.0, 0.0]))\n\n    return (img_h, img_w, ori_h, ori_w, img_scale_factor, img_flip,\n            img_crop_offset)\n\n\ndef bbox_2d_transform(img_meta, bbox_2d, ori2new):\n    \"\"\"Transform 2d bbox according to img_meta.\n\n    Args:\n        img_meta(dict): Meta info regarding data transformation.\n        bbox_2d (torch.Tensor): Shape (..., >4)\n            The input 2d bboxes to transform.\n        ori2new (bool): Origin img coord system to new or not.\n\n    Returns:\n        torch.Tensor: The transformed 2d bboxes.\n    \"\"\"\n\n    img_h, img_w, ori_h, ori_w, img_scale_factor, img_flip, \\\n        img_crop_offset = extract_2d_info(img_meta, bbox_2d)\n\n    bbox_2d_new = bbox_2d.clone()\n\n    if ori2new:\n        bbox_2d_new[:, 0] = bbox_2d_new[:, 0] * img_scale_factor[0]\n        bbox_2d_new[:, 2] = bbox_2d_new[:, 2] * img_scale_factor[0]\n        bbox_2d_new[:, 1] = bbox_2d_new[:, 1] * img_scale_factor[1]\n        bbox_2d_new[:, 3] = bbox_2d_new[:, 3] * img_scale_factor[1]\n\n        bbox_2d_new[:, 0] = bbox_2d_new[:, 0] + img_crop_offset[0]\n        bbox_2d_new[:, 2] = bbox_2d_new[:, 2] + img_crop_offset[0]\n        bbox_2d_new[:, 1] = bbox_2d_new[:, 1] + img_crop_offset[1]\n        bbox_2d_new[:, 3] = bbox_2d_new[:, 3] + img_crop_offset[1]\n\n        if img_flip:\n            bbox_2d_r = img_w - bbox_2d_new[:, 0]\n            bbox_2d_l = img_w - bbox_2d_new[:, 2]\n            bbox_2d_new[:, 0] = bbox_2d_l\n            bbox_2d_new[:, 2] = bbox_2d_r\n    else:\n        if img_flip:\n            bbox_2d_r = img_w - bbox_2d_new[:, 0]\n            bbox_2d_l = img_w - bbox_2d_new[:, 2]\n            bbox_2d_new[:, 0] = bbox_2d_l\n            bbox_2d_new[:, 2] = bbox_2d_r\n\n        bbox_2d_new[:, 0] = bbox_2d_new[:, 0] - img_crop_offset[0]\n        bbox_2d_new[:, 2] = bbox_2d_new[:, 2] - img_crop_offset[0]\n        bbox_2d_new[:, 1] = bbox_2d_new[:, 1] - img_crop_offset[1]\n        bbox_2d_new[:, 3] = bbox_2d_new[:, 3] - img_crop_offset[1]\n\n        bbox_2d_new[:, 0] = bbox_2d_new[:, 0] / img_scale_factor[0]\n        bbox_2d_new[:, 2] = bbox_2d_new[:, 2] / img_scale_factor[0]\n        bbox_2d_new[:, 1] = bbox_2d_new[:, 1] / img_scale_factor[1]\n        bbox_2d_new[:, 3] = bbox_2d_new[:, 3] / img_scale_factor[1]\n\n    return bbox_2d_new\n\n\ndef coord_2d_transform(img_meta, coord_2d, ori2new):\n    \"\"\"Transform 2d pixel coordinates according to img_meta.\n\n    Args:\n        img_meta(dict): Meta info regarding data transformation.\n        coord_2d (torch.Tensor): Shape (..., 2)\n            The input 2d coords to transform.\n        ori2new (bool): Origin img coord system to new or not.\n\n    Returns:\n        torch.Tensor: The transformed 2d coordinates.\n    \"\"\"\n\n    img_h, img_w, ori_h, ori_w, img_scale_factor, img_flip, \\\n        img_crop_offset = extract_2d_info(img_meta, coord_2d)\n\n    coord_2d_new = coord_2d.clone()\n\n    if ori2new:\n        # TODO here we assume this order of transformation\n        coord_2d_new[..., 0] = coord_2d_new[..., 0] * img_scale_factor[0]\n        coord_2d_new[..., 1] = coord_2d_new[..., 1] * img_scale_factor[1]\n\n        coord_2d_new[..., 0] += img_crop_offset[0]\n        coord_2d_new[..., 1] += img_crop_offset[1]\n\n        # flip uv coordinates and bbox\n        if img_flip:\n            coord_2d_new[..., 0] = img_w - coord_2d_new[..., 0]\n    else:\n        if img_flip:\n            coord_2d_new[..., 0] = img_w - coord_2d_new[..., 0]\n\n        coord_2d_new[..., 0] -= img_crop_offset[0]\n        coord_2d_new[..., 1] -= img_crop_offset[1]\n\n        coord_2d_new[..., 0] = coord_2d_new[..., 0] / img_scale_factor[0]\n        coord_2d_new[..., 1] = coord_2d_new[..., 1] / img_scale_factor[1]\n\n    return coord_2d_new\n"
  },
  {
    "path": "mmdet3d/models/fusion_layers/point_fusion.py",
    "content": "import torch\nfrom mmcv.cnn import ConvModule, xavier_init\nfrom torch import nn as nn\nfrom torch.nn import functional as F\n\nfrom ..registry import FUSION_LAYERS\nfrom . import apply_3d_transformation\n\n\ndef point_sample(\n    img_meta,\n    img_features,\n    points,\n    lidar2img_rt,\n    img_scale_factor,\n    img_crop_offset,\n    img_flip,\n    img_pad_shape,\n    img_shape,\n    aligned=True,\n    padding_mode='zeros',\n    align_corners=True,\n):\n    \"\"\"Obtain image features using points.\n\n    Args:\n        img_meta (dict): Meta info.\n        img_features (torch.Tensor): 1 x C x H x W image features.\n        points (torch.Tensor): Nx3 point cloud in LiDAR coordinates.\n        lidar2img_rt (torch.Tensor): 4x4 transformation matrix.\n        img_scale_factor (torch.Tensor): Scale factor with shape of \\\n            (w_scale, h_scale).\n        img_crop_offset (torch.Tensor): Crop offset used to crop \\\n            image during data augmentation with shape of (w_offset, h_offset).\n        img_flip (bool): Whether the image is flipped.\n        img_pad_shape (tuple[int]): int tuple indicates the h & w after\n            padding, this is necessary to obtain features in feature map.\n        img_shape (tuple[int]): int tuple indicates the h & w before padding\n            after scaling, this is necessary for flipping coordinates.\n        aligned (bool, optional): Whether use bilinear interpolation when\n            sampling image features for each point. Defaults to True.\n        padding_mode (str, optional): Padding mode when padding values for\n            features of out-of-image points. Defaults to 'zeros'.\n        align_corners (bool, optional): Whether to align corners when\n            sampling image features for each point. Defaults to True.\n\n    Returns:\n        torch.Tensor: NxC image features sampled by point coordinates.\n    \"\"\"\n\n    # apply transformation based on info in img_meta\n    points = apply_3d_transformation(points, 'LIDAR', img_meta, reverse=True)\n\n    # project points from velo coordinate to camera coordinate\n    num_points = points.shape[0]\n    pts_4d = torch.cat([points, points.new_ones(size=(num_points, 1))], dim=-1)\n    pts_2d = pts_4d @ lidar2img_rt.t()\n\n    # cam_points is Tensor of Nx4 whose last column is 1\n    # transform camera coordinate to image coordinate\n\n    pts_2d[:, 2] = torch.clamp(pts_2d[:, 2], min=1e-5)\n    pts_2d[:, 0] /= pts_2d[:, 2]\n    pts_2d[:, 1] /= pts_2d[:, 2]\n\n    # img transformation: scale -> crop -> flip\n    # the image is resized by img_scale_factor\n    img_coors = pts_2d[:, 0:2] * img_scale_factor  # Nx2\n    img_coors -= img_crop_offset\n\n    # grid sample, the valid grid range should be in [-1,1]\n    coor_x, coor_y = torch.split(img_coors, 1, dim=1)  # each is Nx1\n\n    if img_flip:\n        # by default we take it as horizontal flip\n        # use img_shape before padding for flip\n        orig_h, orig_w = img_shape\n        coor_x = orig_w - coor_x\n\n    h, w = img_pad_shape\n    coor_y = coor_y / h * 2 - 1\n    coor_x = coor_x / w * 2 - 1\n    grid = torch.cat([coor_x, coor_y],\n                     dim=1).unsqueeze(0).unsqueeze(0)  # Nx2 -> 1x1xNx2\n\n    # align_corner=True provides higher performance\n    mode = 'bilinear' if aligned else 'nearest'\n    point_features = F.grid_sample(\n        img_features,\n        grid,\n        mode=mode,\n        padding_mode=padding_mode,\n        align_corners=align_corners)  # 1xCx1xN feats\n\n    return point_features.squeeze().t()\n\n\n@FUSION_LAYERS.register_module()\nclass PointFusion(nn.Module):\n    \"\"\"Fuse image features from multi-scale features.\n\n    Args:\n        img_channels (list[int] | int): Channels of image features.\n            It could be a list if the input is multi-scale image features.\n        pts_channels (int): Channels of point features\n        mid_channels (int): Channels of middle layers\n        out_channels (int): Channels of output fused features\n        img_levels (int, optional): Number of image levels. Defaults to 3.\n        conv_cfg (dict, optional): Dict config of conv layers of middle\n            layers. Defaults to None.\n        norm_cfg (dict, optional): Dict config of norm layers of middle\n            layers. Defaults to None.\n        act_cfg (dict, optional): Dict config of activatation layers.\n            Defaults to None.\n        activate_out (bool, optional): Whether to apply relu activation\n            to output features. Defaults to True.\n        fuse_out (bool, optional): Whether apply conv layer to the fused\n            features. Defaults to False.\n        dropout_ratio (int, float, optional): Dropout ratio of image\n            features to prevent overfitting. Defaults to 0.\n        aligned (bool, optional): Whether apply aligned feature fusion.\n            Defaults to True.\n        align_corners (bool, optional): Whether to align corner when\n            sampling features according to points. Defaults to True.\n        padding_mode (str, optional): Mode used to pad the features of\n            points that do not have corresponding image features.\n            Defaults to 'zeros'.\n        lateral_conv (bool, optional): Whether to apply lateral convs\n            to image features. Defaults to True.\n    \"\"\"\n\n    def __init__(self,\n                 img_channels,\n                 pts_channels,\n                 mid_channels,\n                 out_channels,\n                 img_levels=3,\n                 conv_cfg=None,\n                 norm_cfg=None,\n                 act_cfg=None,\n                 activate_out=True,\n                 fuse_out=False,\n                 dropout_ratio=0,\n                 aligned=True,\n                 align_corners=True,\n                 padding_mode='zeros',\n                 lateral_conv=True):\n        super(PointFusion, self).__init__()\n        if isinstance(img_levels, int):\n            img_levels = [img_levels]\n        if isinstance(img_channels, int):\n            img_channels = [img_channels] * len(img_levels)\n        assert isinstance(img_levels, list)\n        assert isinstance(img_channels, list)\n        assert len(img_channels) == len(img_levels)\n\n        self.img_levels = img_levels\n        self.act_cfg = act_cfg\n        self.activate_out = activate_out\n        self.fuse_out = fuse_out\n        self.dropout_ratio = dropout_ratio\n        self.img_channels = img_channels\n        self.aligned = aligned\n        self.align_corners = align_corners\n        self.padding_mode = padding_mode\n\n        self.lateral_convs = None\n        if lateral_conv:\n            self.lateral_convs = nn.ModuleList()\n            for i in range(len(img_channels)):\n                l_conv = ConvModule(\n                    img_channels[i],\n                    mid_channels,\n                    3,\n                    padding=1,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    act_cfg=self.act_cfg,\n                    inplace=False)\n                self.lateral_convs.append(l_conv)\n            self.img_transform = nn.Sequential(\n                nn.Linear(mid_channels * len(img_channels), out_channels),\n                nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01),\n            )\n        else:\n            self.img_transform = nn.Sequential(\n                nn.Linear(sum(img_channels), out_channels),\n                nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01),\n            )\n        self.pts_transform = nn.Sequential(\n            nn.Linear(pts_channels, out_channels),\n            nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01),\n        )\n\n        if self.fuse_out:\n            self.fuse_conv = nn.Sequential(\n                nn.Linear(mid_channels, out_channels),\n                # For pts the BN is initialized differently by default\n                # TODO: check whether this is necessary\n                nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01),\n                nn.ReLU(inplace=False))\n\n        self.init_weights()\n\n    # default init_weights for conv(msra) and norm in ConvModule\n    def init_weights(self):\n        \"\"\"Initialize the weights of modules.\"\"\"\n        for m in self.modules():\n            if isinstance(m, (nn.Conv2d, nn.Linear)):\n                xavier_init(m, distribution='uniform')\n\n    def forward(self, img_feats, pts, pts_feats, img_metas):\n        \"\"\"Forward function.\n\n        Args:\n            img_feats (list[torch.Tensor]): Image features.\n            pts: [list[torch.Tensor]]: A batch of points with shape N x 3.\n            pts_feats (torch.Tensor): A tensor consist of point features of the\n                total batch.\n            img_metas (list[dict]): Meta information of images.\n\n        Returns:\n            torch.Tensor: Fused features of each point.\n        \"\"\"\n        img_pts = self.obtain_mlvl_feats(img_feats, pts, img_metas)\n        img_pre_fuse = self.img_transform(img_pts)\n        if self.training and self.dropout_ratio > 0:\n            img_pre_fuse = F.dropout(img_pre_fuse, self.dropout_ratio)\n        pts_pre_fuse = self.pts_transform(pts_feats)\n\n        fuse_out = img_pre_fuse + pts_pre_fuse\n        if self.activate_out:\n            fuse_out = F.relu(fuse_out)\n        if self.fuse_out:\n            fuse_out = self.fuse_conv(fuse_out)\n\n        return fuse_out\n\n    def obtain_mlvl_feats(self, img_feats, pts, img_metas):\n        \"\"\"Obtain multi-level features for each point.\n\n        Args:\n            img_feats (list(torch.Tensor)): Multi-scale image features produced\n                by image backbone in shape (N, C, H, W).\n            pts (list[torch.Tensor]): Points of each sample.\n            img_metas (list[dict]): Meta information for each sample.\n\n        Returns:\n            torch.Tensor: Corresponding image features of each point.\n        \"\"\"\n        if self.lateral_convs is not None:\n            img_ins = [\n                lateral_conv(img_feats[i])\n                for i, lateral_conv in zip(self.img_levels, self.lateral_convs)\n            ]\n        else:\n            img_ins = img_feats\n        img_feats_per_point = []\n        # Sample multi-level features\n        for i in range(len(img_metas)):\n            mlvl_img_feats = []\n            for level in range(len(self.img_levels)):\n                mlvl_img_feats.append(\n                    self.sample_single(img_ins[level][i:i + 1], pts[i][:, :3],\n                                       img_metas[i]))\n            mlvl_img_feats = torch.cat(mlvl_img_feats, dim=-1)\n            img_feats_per_point.append(mlvl_img_feats)\n\n        img_pts = torch.cat(img_feats_per_point, dim=0)\n        return img_pts\n\n    def sample_single(self, img_feats, pts, img_meta):\n        \"\"\"Sample features from single level image feature map.\n\n        Args:\n            img_feats (torch.Tensor): Image feature map in shape\n                (1, C, H, W).\n            pts (torch.Tensor): Points of a single sample.\n            img_meta (dict): Meta information of the single sample.\n\n        Returns:\n            torch.Tensor: Single level image features of each point.\n        \"\"\"\n        # TODO: image transformation also extracted\n        img_scale_factor = (\n            pts.new_tensor(img_meta['scale_factor'][:2])\n            if 'scale_factor' in img_meta.keys() else 1)\n        img_flip = img_meta['flip'] if 'flip' in img_meta.keys() else False\n        img_crop_offset = (\n            pts.new_tensor(img_meta['img_crop_offset'])\n            if 'img_crop_offset' in img_meta.keys() else 0)\n        img_pts = point_sample(\n            img_meta,\n            img_feats,\n            pts,\n            pts.new_tensor(img_meta['lidar2img']),\n            img_scale_factor,\n            img_crop_offset,\n            img_flip=img_flip,\n            img_pad_shape=img_meta['input_shape'][:2],\n            img_shape=img_meta['img_shape'][:2],\n            aligned=self.aligned,\n            padding_mode=self.padding_mode,\n            align_corners=self.align_corners,\n        )\n        return img_pts\n"
  },
  {
    "path": "mmdet3d/models/fusion_layers/vote_fusion.py",
    "content": "import torch\nfrom torch import nn as nn\n\nfrom mmdet3d.core.bbox import Coord3DMode, points_cam2img\nfrom ..registry import FUSION_LAYERS\nfrom . import apply_3d_transformation, bbox_2d_transform, coord_2d_transform\n\nEPS = 1e-6\n\n\n@FUSION_LAYERS.register_module()\nclass VoteFusion(nn.Module):\n    \"\"\"Fuse 2d features from 3d seeds.\n\n    Args:\n        num_classes (int): number of classes.\n        max_imvote_per_pixel (int): max number of imvotes.\n    \"\"\"\n\n    def __init__(self, num_classes=10, max_imvote_per_pixel=3):\n        super(VoteFusion, self).__init__()\n        self.num_classes = num_classes\n        self.max_imvote_per_pixel = max_imvote_per_pixel\n\n    def forward(self, imgs, bboxes_2d_rescaled, seeds_3d_depth, img_metas,\n                calibs):\n        \"\"\"Forward function.\n\n        Args:\n            imgs (list[torch.Tensor]): Image features.\n            bboxes_2d_rescaled (list[torch.Tensor]): 2D bboxes.\n            seeds_3d_depth (torch.Tensor): 3D seeds.\n            img_metas (list[dict]): Meta information of images.\n            calibs: Camera calibration information of the images.\n\n        Returns:\n            torch.Tensor: Concatenated cues of each point.\n            torch.Tensor: Validity mask of each feature.\n        \"\"\"\n        img_features = []\n        masks = []\n        for i, data in enumerate(\n                zip(imgs, bboxes_2d_rescaled, seeds_3d_depth, img_metas)):\n            img, bbox_2d_rescaled, seed_3d_depth, img_meta = data\n            bbox_num = bbox_2d_rescaled.shape[0]\n            seed_num = seed_3d_depth.shape[0]\n\n            img_shape = img_meta['img_shape']\n            img_h, img_w, _ = img_shape\n\n            # first reverse the data transformations\n            xyz_depth = apply_3d_transformation(\n                seed_3d_depth, 'DEPTH', img_meta, reverse=True)\n\n            # then convert from depth coords to camera coords\n            xyz_cam = Coord3DMode.convert_point(\n                xyz_depth,\n                Coord3DMode.DEPTH,\n                Coord3DMode.CAM,\n                rt_mat=calibs['Rt'][i])\n\n            # project to 2d to get image coords (uv)\n            uv_origin = points_cam2img(xyz_cam, calibs['K'][i])\n            uv_origin = (uv_origin - 1).round()\n\n            # rescale 2d coordinates and bboxes\n            uv_rescaled = coord_2d_transform(img_meta, uv_origin, True)\n            bbox_2d_origin = bbox_2d_transform(img_meta, bbox_2d_rescaled,\n                                               False)\n\n            if bbox_num == 0:\n                imvote_num = seed_num * self.max_imvote_per_pixel\n\n                # use zero features\n                two_cues = torch.zeros((15, imvote_num),\n                                       device=seed_3d_depth.device)\n                mask_zero = torch.zeros(\n                    imvote_num - seed_num, device=seed_3d_depth.device).bool()\n                mask_one = torch.ones(\n                    seed_num, device=seed_3d_depth.device).bool()\n                mask = torch.cat([mask_one, mask_zero], dim=0)\n            else:\n                # expand bboxes and seeds\n                bbox_expanded = bbox_2d_origin.view(1, bbox_num, -1).expand(\n                    seed_num, -1, -1)\n                seed_2d_expanded = uv_origin.view(seed_num, 1,\n                                                  -1).expand(-1, bbox_num, -1)\n                seed_2d_expanded_x, seed_2d_expanded_y = \\\n                    seed_2d_expanded.split(1, dim=-1)\n\n                bbox_expanded_l, bbox_expanded_t, bbox_expanded_r, \\\n                    bbox_expanded_b, bbox_expanded_conf, bbox_expanded_cls = \\\n                    bbox_expanded.split(1, dim=-1)\n                bbox_expanded_midx = (bbox_expanded_l + bbox_expanded_r) / 2\n                bbox_expanded_midy = (bbox_expanded_t + bbox_expanded_b) / 2\n\n                seed_2d_in_bbox_x = (seed_2d_expanded_x > bbox_expanded_l) * \\\n                    (seed_2d_expanded_x < bbox_expanded_r)\n                seed_2d_in_bbox_y = (seed_2d_expanded_y > bbox_expanded_t) * \\\n                    (seed_2d_expanded_y < bbox_expanded_b)\n                seed_2d_in_bbox = seed_2d_in_bbox_x * seed_2d_in_bbox_y\n\n                # semantic cues, dim=class_num\n                sem_cue = torch.zeros_like(bbox_expanded_conf).expand(\n                    -1, -1, self.num_classes)\n                sem_cue = sem_cue.scatter(-1, bbox_expanded_cls.long(),\n                                          bbox_expanded_conf)\n\n                # bbox center - uv\n                delta_u = bbox_expanded_midx - seed_2d_expanded_x\n                delta_v = bbox_expanded_midy - seed_2d_expanded_y\n\n                seed_3d_expanded = seed_3d_depth.view(seed_num, 1, -1).expand(\n                    -1, bbox_num, -1)\n\n                z_cam = xyz_cam[..., 2:3].view(seed_num, 1,\n                                               1).expand(-1, bbox_num, -1)\n\n                delta_u = delta_u * z_cam / calibs['K'][i, 0, 0]\n                delta_v = delta_v * z_cam / calibs['K'][i, 0, 0]\n\n                imvote = torch.cat(\n                    [delta_u, delta_v,\n                     torch.zeros_like(delta_v)], dim=-1).view(-1, 3)\n\n                # convert from camera coords to depth coords\n                imvote = Coord3DMode.convert_point(\n                    imvote.view((-1, 3)),\n                    Coord3DMode.CAM,\n                    Coord3DMode.DEPTH,\n                    rt_mat=calibs['Rt'][i])\n\n                # apply transformation to lifted imvotes\n                imvote = apply_3d_transformation(\n                    imvote, 'DEPTH', img_meta, reverse=False)\n\n                seed_3d_expanded = seed_3d_expanded.reshape(imvote.shape)\n\n                # ray angle\n                ray_angle = seed_3d_expanded + imvote\n                ray_angle /= torch.sqrt(torch.sum(ray_angle**2, -1) +\n                                        EPS).unsqueeze(-1)\n\n                # imvote lifted to 3d\n                xz = ray_angle[:, [0, 2]] / (ray_angle[:, [1]] + EPS) \\\n                    * seed_3d_expanded[:, [1]] - seed_3d_expanded[:, [0, 2]]\n\n                # geometric cues, dim=5\n                geo_cue = torch.cat([xz, ray_angle],\n                                    dim=-1).view(seed_num, -1, 5)\n\n                two_cues = torch.cat([geo_cue, sem_cue], dim=-1)\n                # mask to 0 if seed not in bbox\n                two_cues = two_cues * seed_2d_in_bbox.float()\n\n                feature_size = two_cues.shape[-1]\n                # if bbox number is too small, append zeros\n                if bbox_num < self.max_imvote_per_pixel:\n                    append_num = self.max_imvote_per_pixel - bbox_num\n                    append_zeros = torch.zeros(\n                        (seed_num, append_num, 1),\n                        device=seed_2d_in_bbox.device).bool()\n                    seed_2d_in_bbox = torch.cat(\n                        [seed_2d_in_bbox, append_zeros], dim=1)\n                    append_zeros = torch.zeros(\n                        (seed_num, append_num, feature_size),\n                        device=two_cues.device)\n                    two_cues = torch.cat([two_cues, append_zeros], dim=1)\n                    append_zeros = torch.zeros((seed_num, append_num, 1),\n                                               device=two_cues.device)\n                    bbox_expanded_conf = torch.cat(\n                        [bbox_expanded_conf, append_zeros], dim=1)\n\n                # sort the valid seed-bbox pair according to confidence\n                pair_score = seed_2d_in_bbox.float() + bbox_expanded_conf\n                # and find the largests\n                mask, indices = pair_score.topk(\n                    self.max_imvote_per_pixel,\n                    dim=1,\n                    largest=True,\n                    sorted=True)\n\n                indices_img = indices.expand(-1, -1, feature_size)\n                two_cues = two_cues.gather(dim=1, index=indices_img)\n                two_cues = two_cues.transpose(1, 0)\n                two_cues = two_cues.reshape(-1, feature_size).transpose(\n                    1, 0).contiguous()\n\n                # since conf is ~ (0, 1), floor gives us validity\n                mask = mask.floor().int()\n                mask = mask.transpose(1, 0).reshape(-1).bool()\n\n            # clear the padding\n            img = img[:, :img_shape[0], :img_shape[1]]\n            img_flatten = img.reshape(3, -1).float()\n            img_flatten /= 255.\n\n            # take the normalized pixel value as texture cue\n            uv_flatten = uv_rescaled[:, 1].round() * \\\n                img_shape[1] + uv_rescaled[:, 0].round()\n            uv_expanded = uv_flatten.unsqueeze(0).expand(3, -1).long()\n            txt_cue = torch.gather(img_flatten, dim=-1, index=uv_expanded)\n            txt_cue = txt_cue.unsqueeze(1).expand(-1,\n                                                  self.max_imvote_per_pixel,\n                                                  -1).reshape(3, -1)\n\n            # append texture cue\n            img_feature = torch.cat([two_cues, txt_cue], dim=0)\n            img_features.append(img_feature)\n            masks.append(mask)\n\n        return torch.stack(img_features, 0), torch.stack(masks, 0)\n"
  },
  {
    "path": "mmdet3d/models/losses/__init__.py",
    "content": "from mmdet.models.losses import FocalLoss, SmoothL1Loss, binary_cross_entropy\nfrom .axis_aligned_iou_loss import AxisAlignedIoULoss, axis_aligned_iou_loss\nfrom .chamfer_distance import ChamferDistance, chamfer_distance\nfrom .uncertainty_loss import LaplaceL1Loss\n\n__all__ = [\n    'FocalLoss', 'SmoothL1Loss', 'binary_cross_entropy', 'ChamferDistance',\n    'chamfer_distance', 'axis_aligned_iou_loss', 'AxisAlignedIoULoss',\n    'LaplaceL1Loss'\n]\n"
  },
  {
    "path": "mmdet3d/models/losses/axis_aligned_iou_loss.py",
    "content": "import torch\nfrom torch import nn as nn\n\nfrom mmdet.models.builder import LOSSES\nfrom mmdet.models.losses.utils import weighted_loss\nfrom ...core.bbox import AxisAlignedBboxOverlaps3D\n\n\n@weighted_loss\ndef axis_aligned_iou_loss(pred, target):\n    \"\"\"Calculate the IoU loss (1-IoU) of two set of axis aligned bounding\n    boxes. Note that predictions and targets are one-to-one corresponded.\n\n    Args:\n        pred (torch.Tensor): Bbox predictions with shape [..., 3].\n        target (torch.Tensor): Bbox targets (gt) with shape [..., 3].\n\n    Returns:\n        torch.Tensor: IoU loss between predictions and targets.\n    \"\"\"\n\n    axis_aligned_iou = AxisAlignedBboxOverlaps3D()(\n        pred, target, is_aligned=True)\n    iou_loss = 1 - axis_aligned_iou\n    return iou_loss\n\n\n@LOSSES.register_module()\nclass AxisAlignedIoULoss(nn.Module):\n    \"\"\"Calculate the IoU loss (1-IoU) of axis aligned bounding boxes.\n\n    Args:\n        reduction (str): Method to reduce losses.\n            The valid reduction method are none, sum or mean.\n        loss_weight (float, optional): Weight of loss. Defaults to 1.0.\n    \"\"\"\n\n    def __init__(self, reduction='mean', loss_weight=1.0):\n        super(AxisAlignedIoULoss, self).__init__()\n        assert reduction in ['none', 'sum', 'mean']\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n\n    def forward(self,\n                pred,\n                target,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None,\n                **kwargs):\n        \"\"\"Forward function of loss calculation.\n\n        Args:\n            pred (torch.Tensor): Bbox predictions with shape [..., 3].\n            target (torch.Tensor): Bbox targets (gt) with shape [..., 3].\n            weight (torch.Tensor|float, optional): Weight of loss. \\\n                Defaults to None.\n            avg_factor (int, optional): Average factor that is used to average\n                the loss. Defaults to None.\n            reduction_override (str, optional): Method to reduce losses.\n                The valid reduction method are 'none', 'sum' or 'mean'.\n                Defaults to None.\n\n        Returns:\n            torch.Tensor: IoU loss between predictions and targets.\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n        if (weight is not None) and (not torch.any(weight > 0)) and (\n                reduction != 'none'):\n            return (pred * weight).sum()\n        return axis_aligned_iou_loss(\n            pred,\n            target,\n            weight=weight,\n            avg_factor=avg_factor,\n            reduction=reduction) * self.loss_weight\n"
  },
  {
    "path": "mmdet3d/models/losses/chamfer_distance.py",
    "content": "import torch\nfrom torch import nn as nn\nfrom torch.nn.functional import l1_loss, mse_loss, smooth_l1_loss\n\nfrom mmdet.models.builder import LOSSES\n\n\ndef chamfer_distance(src,\n                     dst,\n                     src_weight=1.0,\n                     dst_weight=1.0,\n                     criterion_mode='l2',\n                     reduction='mean'):\n    \"\"\"Calculate Chamfer Distance of two sets.\n\n    Args:\n        src (torch.Tensor): Source set with shape [B, N, C] to\n            calculate Chamfer Distance.\n        dst (torch.Tensor): Destination set with shape [B, M, C] to\n            calculate Chamfer Distance.\n        src_weight (torch.Tensor or float): Weight of source loss.\n        dst_weight (torch.Tensor or float): Weight of destination loss.\n        criterion_mode (str): Criterion mode to calculate distance.\n            The valid modes are smooth_l1, l1 or l2.\n        reduction (str): Method to reduce losses.\n            The valid reduction method are 'none', 'sum' or 'mean'.\n\n    Returns:\n        tuple: Source and Destination loss with the corresponding indices.\n\n            - loss_src (torch.Tensor): The min distance \\\n                from source to destination.\n            - loss_dst (torch.Tensor): The min distance \\\n                from destination to source.\n            - indices1 (torch.Tensor): Index the min distance point \\\n                for each point in source to destination.\n            - indices2 (torch.Tensor): Index the min distance point \\\n                for each point in destination to source.\n    \"\"\"\n\n    if criterion_mode == 'smooth_l1':\n        criterion = smooth_l1_loss\n    elif criterion_mode == 'l1':\n        criterion = l1_loss\n    elif criterion_mode == 'l2':\n        criterion = mse_loss\n    else:\n        raise NotImplementedError\n\n    src_expand = src.unsqueeze(2).repeat(1, 1, dst.shape[1], 1)\n    dst_expand = dst.unsqueeze(1).repeat(1, src.shape[1], 1, 1)\n\n    distance = criterion(src_expand, dst_expand, reduction='none').sum(-1)\n    src2dst_distance, indices1 = torch.min(distance, dim=2)  # (B,N)\n    dst2src_distance, indices2 = torch.min(distance, dim=1)  # (B,M)\n\n    loss_src = (src2dst_distance * src_weight)\n    loss_dst = (dst2src_distance * dst_weight)\n\n    if reduction == 'sum':\n        loss_src = torch.sum(loss_src)\n        loss_dst = torch.sum(loss_dst)\n    elif reduction == 'mean':\n        loss_src = torch.mean(loss_src)\n        loss_dst = torch.mean(loss_dst)\n    elif reduction == 'none':\n        pass\n    else:\n        raise NotImplementedError\n\n    return loss_src, loss_dst, indices1, indices2\n\n\n@LOSSES.register_module()\nclass ChamferDistance(nn.Module):\n    \"\"\"Calculate Chamfer Distance of two sets.\n\n    Args:\n        mode (str): Criterion mode to calculate distance.\n            The valid modes are smooth_l1, l1 or l2.\n        reduction (str): Method to reduce losses.\n            The valid reduction method are none, sum or mean.\n        loss_src_weight (float): Weight of loss_source.\n        loss_dst_weight (float): Weight of loss_target.\n    \"\"\"\n\n    def __init__(self,\n                 mode='l2',\n                 reduction='mean',\n                 loss_src_weight=1.0,\n                 loss_dst_weight=1.0):\n        super(ChamferDistance, self).__init__()\n\n        assert mode in ['smooth_l1', 'l1', 'l2']\n        assert reduction in ['none', 'sum', 'mean']\n        self.mode = mode\n        self.reduction = reduction\n        self.loss_src_weight = loss_src_weight\n        self.loss_dst_weight = loss_dst_weight\n\n    def forward(self,\n                source,\n                target,\n                src_weight=1.0,\n                dst_weight=1.0,\n                reduction_override=None,\n                return_indices=False,\n                **kwargs):\n        \"\"\"Forward function of loss calculation.\n\n        Args:\n            source (torch.Tensor): Source set with shape [B, N, C] to\n                calculate Chamfer Distance.\n            target (torch.Tensor): Destination set with shape [B, M, C] to\n                calculate Chamfer Distance.\n            src_weight (torch.Tensor | float, optional):\n                Weight of source loss. Defaults to 1.0.\n            dst_weight (torch.Tensor | float, optional):\n                Weight of destination loss. Defaults to 1.0.\n            reduction_override (str, optional): Method to reduce losses.\n                The valid reduction method are 'none', 'sum' or 'mean'.\n                Defaults to None.\n            return_indices (bool, optional): Whether to return indices.\n                Defaults to False.\n\n        Returns:\n            tuple[torch.Tensor]: If ``return_indices=True``, return losses of \\\n                source and target with their corresponding indices in the \\\n                order of ``(loss_source, loss_target, indices1, indices2)``. \\\n                If ``return_indices=False``, return \\\n                ``(loss_source, loss_target)``.\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n\n        loss_source, loss_target, indices1, indices2 = chamfer_distance(\n            source, target, src_weight, dst_weight, self.mode, reduction)\n\n        loss_source *= self.loss_src_weight\n        loss_target *= self.loss_dst_weight\n\n        if return_indices:\n            return loss_source, loss_target, indices1, indices2\n        else:\n            return loss_source, loss_target\n"
  },
  {
    "path": "mmdet3d/models/losses/uncertainty_loss.py",
    "content": "import torch\r\nfrom torch import nn as nn\r\n\r\nfrom mmdet.models.builder import LOSSES\r\nfrom mmdet.models.losses.utils import weighted_loss\r\n\r\n@weighted_loss\r\ndef laplacian_aleatoric_uncertainty_loss(pred, target):\r\n    '''\r\n    References:\r\n        MonoPair: Monocular 3D Object Detection Using Pairwise Spatial Relationships, CVPR'20\r\n        Geometry and Uncertainty in Deep Learning for Computer Vision, University of Cambridge\r\n    '''\r\n\r\n    log_variance = pred[..., 1:]\r\n    pred = pred[..., :1]\r\n    if target.numel() == 0:\r\n        return pred.sum() * 0\r\n    assert pred.size() == target.size()\r\n    assert pred.size() == log_variance.size()\r\n\r\n    loss = 1.4142 * torch.exp(-log_variance) * torch.abs(pred - target) + log_variance\r\n    return loss\r\n\r\n\r\n@LOSSES.register_module()\r\nclass LaplaceL1Loss(nn.Module):\r\n    \"\"\"L1 loss.\r\n    Args:\r\n        reduction (str, optional): The method to reduce the loss.\r\n            Options are \"none\", \"mean\" and \"sum\".\r\n        loss_weight (float, optional): The weight of loss.\r\n    \"\"\"\r\n\r\n    def __init__(self, reduction='mean', loss_weight=1.0):\r\n        super(LaplaceL1Loss, self).__init__()\r\n        self.reduction = reduction\r\n        self.loss_weight = loss_weight\r\n\r\n    def forward(self,\r\n                pred,\r\n                target,\r\n                weight=None,\r\n                avg_factor=None,\r\n                reduction_override=None):\r\n        \"\"\"Forward function.\r\n        Args:\r\n            pred (torch.Tensor): The prediction.\r\n            target (torch.Tensor): The learning target of the prediction.\r\n            weight (torch.Tensor, optional): The weight of loss for each\r\n                prediction. Defaults to None.\r\n            avg_factor (int, optional): Average factor that is used to average\r\n                the loss. Defaults to None.\r\n            reduction_override (str, optional): The reduction method used to\r\n                override the original reduction method of the loss.\r\n                Defaults to None.\r\n        \"\"\"\r\n        assert reduction_override in (None, 'none', 'mean', 'sum')\r\n        reduction = (\r\n            reduction_override if reduction_override else self.reduction)\r\n        loss = laplacian_aleatoric_uncertainty_loss(pred, target, weight=weight, reduction=reduction, avg_factor=avg_factor)\r\n        loss_bbox = self.loss_weight * loss\r\n        return loss_bbox"
  },
  {
    "path": "mmdet3d/models/middle_encoders/__init__.py",
    "content": "from .pillar_scatter import PointPillarsScatter\nfrom .sparse_encoder import SparseEncoder\nfrom .sparse_unet import SparseUNet\n\n__all__ = ['PointPillarsScatter', 'SparseEncoder', 'SparseUNet']\n"
  },
  {
    "path": "mmdet3d/models/middle_encoders/pillar_scatter.py",
    "content": "import torch\nfrom mmcv.runner import auto_fp16\nfrom torch import nn\n\nfrom ..registry import MIDDLE_ENCODERS\n\n\n@MIDDLE_ENCODERS.register_module()\nclass PointPillarsScatter(nn.Module):\n    \"\"\"Point Pillar's Scatter.\n\n    Converts learned features from dense tensor to sparse pseudo image.\n\n    Args:\n        in_channels (int): Channels of input features.\n        output_shape (list[int]): Required output shape of features.\n    \"\"\"\n\n    def __init__(self, in_channels, output_shape):\n        super().__init__()\n        self.output_shape = output_shape\n        self.ny = output_shape[0]\n        self.nx = output_shape[1]\n        self.in_channels = in_channels\n        self.fp16_enabled = False\n\n    @auto_fp16(apply_to=('voxel_features', ))\n    def forward(self, voxel_features, coors, batch_size=None):\n        \"\"\"Foraward function to scatter features.\"\"\"\n        # TODO: rewrite the function in a batch manner\n        # no need to deal with different batch cases\n        if batch_size is not None:\n            return self.forward_batch(voxel_features, coors, batch_size)\n        else:\n            return self.forward_single(voxel_features, coors)\n\n    def forward_single(self, voxel_features, coors):\n        \"\"\"Scatter features of single sample.\n\n        Args:\n            voxel_features (torch.Tensor): Voxel features in shape (N, M, C).\n            coors (torch.Tensor): Coordinates of each voxel.\n                The first column indicates the sample ID.\n        \"\"\"\n        # Create the canvas for this sample\n        canvas = torch.zeros(\n            self.in_channels,\n            self.nx * self.ny,\n            dtype=voxel_features.dtype,\n            device=voxel_features.device)\n\n        indices = coors[:, 1] * self.nx + coors[:, 2]\n        indices = indices.long()\n        voxels = voxel_features.t()\n        # Now scatter the blob back to the canvas.\n        canvas[:, indices] = voxels\n        # Undo the column stacking to final 4-dim tensor\n        canvas = canvas.view(1, self.in_channels, self.ny, self.nx)\n        return [canvas]\n\n    def forward_batch(self, voxel_features, coors, batch_size):\n        \"\"\"Scatter features of single sample.\n\n        Args:\n            voxel_features (torch.Tensor): Voxel features in shape (N, M, C).\n            coors (torch.Tensor): Coordinates of each voxel in shape (N, 4).\n                The first column indicates the sample ID.\n            batch_size (int): Number of samples in the current batch.\n        \"\"\"\n        # batch_canvas will be the final output.\n        batch_canvas = []\n        for batch_itt in range(batch_size):\n            # Create the canvas for this sample\n            canvas = torch.zeros(\n                self.in_channels,\n                self.nx * self.ny,\n                dtype=voxel_features.dtype,\n                device=voxel_features.device)\n\n            # Only include non-empty pillars\n            batch_mask = coors[:, 0] == batch_itt\n            this_coors = coors[batch_mask, :]\n            indices = this_coors[:, 2] * self.nx + this_coors[:, 3]\n            indices = indices.type(torch.long)\n            voxels = voxel_features[batch_mask, :]\n            voxels = voxels.t()\n\n            # Now scatter the blob back to the canvas.\n            canvas[:, indices] = voxels\n\n            # Append to a list for later stacking.\n            batch_canvas.append(canvas)\n\n        # Stack to 3-dim tensor (batch-size, in_channels, nrows*ncols)\n        batch_canvas = torch.stack(batch_canvas, 0)\n\n        # Undo the column stacking to final 4-dim tensor\n        batch_canvas = batch_canvas.view(batch_size, self.in_channels, self.ny,\n                                         self.nx)\n\n        return batch_canvas\n"
  },
  {
    "path": "mmdet3d/models/middle_encoders/sparse_encoder.py",
    "content": "from mmcv.runner import auto_fp16\nfrom torch import nn as nn\n\nfrom mmdet3d.ops import SparseBasicBlock, make_sparse_convmodule\nfrom mmdet3d.ops import spconv as spconv\nfrom ..registry import MIDDLE_ENCODERS\n\n\n@MIDDLE_ENCODERS.register_module()\nclass SparseEncoder(nn.Module):\n    r\"\"\"Sparse encoder for SECOND and Part-A2.\n\n    Args:\n        in_channels (int): The number of input channels.\n        sparse_shape (list[int]): The sparse shape of input tensor.\n        order (list[str]): Order of conv module. Defaults to ('conv',\n            'norm', 'act').\n        norm_cfg (dict): Config of normalization layer. Defaults to\n            dict(type='BN1d', eps=1e-3, momentum=0.01).\n        base_channels (int): Out channels for conv_input layer.\n            Defaults to 16.\n        output_channels (int): Out channels for conv_out layer.\n            Defaults to 128.\n        encoder_channels (tuple[tuple[int]]):\n            Convolutional channels of each encode block.\n        encoder_paddings (tuple[tuple[int]]): Paddings of each encode block.\n            Defaults to ((16, ), (32, 32, 32), (64, 64, 64), (64, 64, 64)).\n        block_type (str): Type of the block to use. Defaults to 'conv_module'.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 sparse_shape,\n                 order=('conv', 'norm', 'act'),\n                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),\n                 base_channels=16,\n                 output_channels=128,\n                 encoder_channels=((16, ), (32, 32, 32), (64, 64, 64), (64, 64,\n                                                                        64)),\n                 encoder_paddings=((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1,\n                                                                 1)),\n                 block_type='conv_module'):\n        super().__init__()\n        assert block_type in ['conv_module', 'basicblock']\n        self.sparse_shape = sparse_shape\n        self.in_channels = in_channels\n        self.order = order\n        self.base_channels = base_channels\n        self.output_channels = output_channels\n        self.encoder_channels = encoder_channels\n        self.encoder_paddings = encoder_paddings\n        self.stage_num = len(self.encoder_channels)\n        self.fp16_enabled = False\n        # Spconv init all weight on its own\n\n        assert isinstance(order, tuple) and len(order) == 3\n        assert set(order) == {'conv', 'norm', 'act'}\n\n        if self.order[0] != 'conv':  # pre activate\n            self.conv_input = make_sparse_convmodule(\n                in_channels,\n                self.base_channels,\n                3,\n                norm_cfg=norm_cfg,\n                padding=1,\n                indice_key='subm1',\n                conv_type='SubMConv3d',\n                order=('conv', ))\n        else:  # post activate\n            self.conv_input = make_sparse_convmodule(\n                in_channels,\n                self.base_channels,\n                3,\n                norm_cfg=norm_cfg,\n                padding=1,\n                indice_key='subm1',\n                conv_type='SubMConv3d')\n\n        encoder_out_channels = self.make_encoder_layers(\n            make_sparse_convmodule,\n            norm_cfg,\n            self.base_channels,\n            block_type=block_type)\n\n        self.conv_out = make_sparse_convmodule(\n            encoder_out_channels,\n            self.output_channels,\n            kernel_size=(3, 1, 1),\n            stride=(2, 1, 1),\n            norm_cfg=norm_cfg,\n            padding=0,\n            indice_key='spconv_down2',\n            conv_type='SparseConv3d')\n\n    @auto_fp16(apply_to=('voxel_features', ))\n    def forward(self, voxel_features, coors, batch_size):\n        \"\"\"Forward of SparseEncoder.\n\n        Args:\n            voxel_features (torch.float32): Voxel features in shape (N, C).\n            coors (torch.int32): Coordinates in shape (N, 4), \\\n                the columns in the order of (batch_idx, z_idx, y_idx, x_idx).\n            batch_size (int): Batch size.\n\n        Returns:\n            dict: Backbone features.\n        \"\"\"\n        coors = coors.int()\n        input_sp_tensor = spconv.SparseConvTensor(voxel_features, coors,\n                                                  self.sparse_shape,\n                                                  batch_size)\n        x = self.conv_input(input_sp_tensor)\n\n        encode_features = []\n        for encoder_layer in self.encoder_layers:\n            x = encoder_layer(x)\n            encode_features.append(x)\n\n        # for detection head\n        # [200, 176, 5] -> [200, 176, 2]\n        out = self.conv_out(encode_features[-1])\n        spatial_features = out.dense()\n\n        N, C, D, H, W = spatial_features.shape\n        spatial_features = spatial_features.view(N, C * D, H, W)\n\n        return spatial_features\n\n    def make_encoder_layers(self,\n                            make_block,\n                            norm_cfg,\n                            in_channels,\n                            block_type='conv_module',\n                            conv_cfg=dict(type='SubMConv3d')):\n        \"\"\"make encoder layers using sparse convs.\n\n        Args:\n            make_block (method): A bounded function to build blocks.\n            norm_cfg (dict[str]): Config of normalization layer.\n            in_channels (int): The number of encoder input channels.\n            block_type (str): Type of the block to use. Defaults to\n                'conv_module'.\n            conv_cfg (dict): Config of conv layer. Defaults to\n                dict(type='SubMConv3d').\n\n        Returns:\n            int: The number of encoder output channels.\n        \"\"\"\n        assert block_type in ['conv_module', 'basicblock']\n        self.encoder_layers = spconv.SparseSequential()\n\n        for i, blocks in enumerate(self.encoder_channels):\n            blocks_list = []\n            for j, out_channels in enumerate(tuple(blocks)):\n                padding = tuple(self.encoder_paddings[i])[j]\n                # each stage started with a spconv layer\n                # except the first stage\n                if i != 0 and j == 0 and block_type == 'conv_module':\n                    blocks_list.append(\n                        make_block(\n                            in_channels,\n                            out_channels,\n                            3,\n                            norm_cfg=norm_cfg,\n                            stride=2,\n                            padding=padding,\n                            indice_key=f'spconv{i + 1}',\n                            conv_type='SparseConv3d'))\n                elif block_type == 'basicblock':\n                    if j == len(blocks) - 1 and i != len(\n                            self.encoder_channels) - 1:\n                        blocks_list.append(\n                            make_block(\n                                in_channels,\n                                out_channels,\n                                3,\n                                norm_cfg=norm_cfg,\n                                stride=2,\n                                padding=padding,\n                                indice_key=f'spconv{i + 1}',\n                                conv_type='SparseConv3d'))\n                    else:\n                        blocks_list.append(\n                            SparseBasicBlock(\n                                out_channels,\n                                out_channels,\n                                norm_cfg=norm_cfg,\n                                conv_cfg=conv_cfg))\n                else:\n                    blocks_list.append(\n                        make_block(\n                            in_channels,\n                            out_channels,\n                            3,\n                            norm_cfg=norm_cfg,\n                            padding=padding,\n                            indice_key=f'subm{i + 1}',\n                            conv_type='SubMConv3d'))\n                in_channels = out_channels\n            stage_name = f'encoder_layer{i + 1}'\n            stage_layers = spconv.SparseSequential(*blocks_list)\n            self.encoder_layers.add_module(stage_name, stage_layers)\n        return out_channels\n"
  },
  {
    "path": "mmdet3d/models/middle_encoders/sparse_unet.py",
    "content": "import torch\nfrom mmcv.runner import auto_fp16\nfrom torch import nn as nn\n\nfrom mmdet3d.ops import SparseBasicBlock, make_sparse_convmodule\nfrom mmdet3d.ops import spconv as spconv\nfrom ..registry import MIDDLE_ENCODERS\n\n\n@MIDDLE_ENCODERS.register_module()\nclass SparseUNet(nn.Module):\n    r\"\"\"SparseUNet for PartA^2.\n\n    See the `paper <https://arxiv.org/abs/1907.03670>`_ for more detials.\n\n    Args:\n        in_channels (int): The number of input channels.\n        sparse_shape (list[int]): The sparse shape of input tensor.\n        norm_cfg (dict): Config of normalization layer.\n        base_channels (int): Out channels for conv_input layer.\n        output_channels (int): Out channels for conv_out layer.\n        encoder_channels (tuple[tuple[int]]):\n            Convolutional channels of each encode block.\n        encoder_paddings (tuple[tuple[int]]): Paddings of each encode block.\n        decoder_channels (tuple[tuple[int]]):\n            Convolutional channels of each decode block.\n        decoder_paddings (tuple[tuple[int]]): Paddings of each decode block.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 sparse_shape,\n                 order=('conv', 'norm', 'act'),\n                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),\n                 base_channels=16,\n                 output_channels=128,\n                 encoder_channels=((16, ), (32, 32, 32), (64, 64, 64), (64, 64,\n                                                                        64)),\n                 encoder_paddings=((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1,\n                                                                 1)),\n                 decoder_channels=((64, 64, 64), (64, 64, 32), (32, 32, 16),\n                                   (16, 16, 16)),\n                 decoder_paddings=((1, 0), (1, 0), (0, 0), (0, 1))):\n        super().__init__()\n        self.sparse_shape = sparse_shape\n        self.in_channels = in_channels\n        self.order = order\n        self.base_channels = base_channels\n        self.output_channels = output_channels\n        self.encoder_channels = encoder_channels\n        self.encoder_paddings = encoder_paddings\n        self.decoder_channels = decoder_channels\n        self.decoder_paddings = decoder_paddings\n        self.stage_num = len(self.encoder_channels)\n        self.fp16_enabled = False\n        # Spconv init all weight on its own\n\n        assert isinstance(order, tuple) and len(order) == 3\n        assert set(order) == {'conv', 'norm', 'act'}\n\n        if self.order[0] != 'conv':  # pre activate\n            self.conv_input = make_sparse_convmodule(\n                in_channels,\n                self.base_channels,\n                3,\n                norm_cfg=norm_cfg,\n                padding=1,\n                indice_key='subm1',\n                conv_type='SubMConv3d',\n                order=('conv', ))\n        else:  # post activate\n            self.conv_input = make_sparse_convmodule(\n                in_channels,\n                self.base_channels,\n                3,\n                norm_cfg=norm_cfg,\n                padding=1,\n                indice_key='subm1',\n                conv_type='SubMConv3d')\n\n        encoder_out_channels = self.make_encoder_layers(\n            make_sparse_convmodule, norm_cfg, self.base_channels)\n        self.make_decoder_layers(make_sparse_convmodule, norm_cfg,\n                                 encoder_out_channels)\n\n        self.conv_out = make_sparse_convmodule(\n            encoder_out_channels,\n            self.output_channels,\n            kernel_size=(3, 1, 1),\n            stride=(2, 1, 1),\n            norm_cfg=norm_cfg,\n            padding=0,\n            indice_key='spconv_down2',\n            conv_type='SparseConv3d')\n\n    @auto_fp16(apply_to=('voxel_features', ))\n    def forward(self, voxel_features, coors, batch_size):\n        \"\"\"Forward of SparseUNet.\n\n        Args:\n            voxel_features (torch.float32): Voxel features in shape [N, C].\n            coors (torch.int32): Coordinates in shape [N, 4],\n                the columns in the order of (batch_idx, z_idx, y_idx, x_idx).\n            batch_size (int): Batch size.\n\n        Returns:\n            dict[str, torch.Tensor]: Backbone features.\n        \"\"\"\n        coors = coors.int()\n        input_sp_tensor = spconv.SparseConvTensor(voxel_features, coors,\n                                                  self.sparse_shape,\n                                                  batch_size)\n        x = self.conv_input(input_sp_tensor)\n\n        encode_features = []\n        for encoder_layer in self.encoder_layers:\n            x = encoder_layer(x)\n            encode_features.append(x)\n\n        # for detection head\n        # [200, 176, 5] -> [200, 176, 2]\n        out = self.conv_out(encode_features[-1])\n        spatial_features = out.dense()\n\n        N, C, D, H, W = spatial_features.shape\n        spatial_features = spatial_features.view(N, C * D, H, W)\n\n        # for segmentation head, with output shape:\n        # [400, 352, 11] <- [200, 176, 5]\n        # [800, 704, 21] <- [400, 352, 11]\n        # [1600, 1408, 41] <- [800, 704, 21]\n        # [1600, 1408, 41] <- [1600, 1408, 41]\n        decode_features = []\n        x = encode_features[-1]\n        for i in range(self.stage_num, 0, -1):\n            x = self.decoder_layer_forward(encode_features[i - 1], x,\n                                           getattr(self, f'lateral_layer{i}'),\n                                           getattr(self, f'merge_layer{i}'),\n                                           getattr(self, f'upsample_layer{i}'))\n            decode_features.append(x)\n\n        seg_features = decode_features[-1].features\n\n        ret = dict(\n            spatial_features=spatial_features, seg_features=seg_features)\n\n        return ret\n\n    def decoder_layer_forward(self, x_lateral, x_bottom, lateral_layer,\n                              merge_layer, upsample_layer):\n        \"\"\"Forward of upsample and residual block.\n\n        Args:\n            x_lateral (:obj:`SparseConvTensor`): Lateral tensor.\n            x_bottom (:obj:`SparseConvTensor`): Feature from bottom layer.\n            lateral_layer (SparseBasicBlock): Convolution for lateral tensor.\n            merge_layer (SparseSequential): Convolution for merging features.\n            upsample_layer (SparseSequential): Convolution for upsampling.\n\n        Returns:\n            :obj:`SparseConvTensor`: Upsampled feature.\n        \"\"\"\n        x = lateral_layer(x_lateral)\n        x.features = torch.cat((x_bottom.features, x.features), dim=1)\n        x_merge = merge_layer(x)\n        x = self.reduce_channel(x, x_merge.features.shape[1])\n        x.features = x_merge.features + x.features\n        x = upsample_layer(x)\n        return x\n\n    @staticmethod\n    def reduce_channel(x, out_channels):\n        \"\"\"reduce channel for element-wise addition.\n\n        Args:\n            x (:obj:`SparseConvTensor`): Sparse tensor, ``x.features``\n                are in shape (N, C1).\n            out_channels (int): The number of channel after reduction.\n\n        Returns:\n            :obj:`SparseConvTensor`: Channel reduced feature.\n        \"\"\"\n        features = x.features\n        n, in_channels = features.shape\n        assert (in_channels % out_channels\n                == 0) and (in_channels >= out_channels)\n\n        x.features = features.view(n, out_channels, -1).sum(dim=2)\n        return x\n\n    def make_encoder_layers(self, make_block, norm_cfg, in_channels):\n        \"\"\"make encoder layers using sparse convs.\n\n        Args:\n            make_block (method): A bounded function to build blocks.\n            norm_cfg (dict[str]): Config of normalization layer.\n            in_channels (int): The number of encoder input channels.\n\n        Returns:\n            int: The number of encoder output channels.\n        \"\"\"\n        self.encoder_layers = spconv.SparseSequential()\n\n        for i, blocks in enumerate(self.encoder_channels):\n            blocks_list = []\n            for j, out_channels in enumerate(tuple(blocks)):\n                padding = tuple(self.encoder_paddings[i])[j]\n                # each stage started with a spconv layer\n                # except the first stage\n                if i != 0 and j == 0:\n                    blocks_list.append(\n                        make_block(\n                            in_channels,\n                            out_channels,\n                            3,\n                            norm_cfg=norm_cfg,\n                            stride=2,\n                            padding=padding,\n                            indice_key=f'spconv{i + 1}',\n                            conv_type='SparseConv3d'))\n                else:\n                    blocks_list.append(\n                        make_block(\n                            in_channels,\n                            out_channels,\n                            3,\n                            norm_cfg=norm_cfg,\n                            padding=padding,\n                            indice_key=f'subm{i + 1}',\n                            conv_type='SubMConv3d'))\n                in_channels = out_channels\n            stage_name = f'encoder_layer{i + 1}'\n            stage_layers = spconv.SparseSequential(*blocks_list)\n            self.encoder_layers.add_module(stage_name, stage_layers)\n        return out_channels\n\n    def make_decoder_layers(self, make_block, norm_cfg, in_channels):\n        \"\"\"make decoder layers using sparse convs.\n\n        Args:\n            make_block (method): A bounded function to build blocks.\n            norm_cfg (dict[str]): Config of normalization layer.\n            in_channels (int): The number of encoder input channels.\n\n        Returns:\n            int: The number of encoder output channels.\n        \"\"\"\n        block_num = len(self.decoder_channels)\n        for i, block_channels in enumerate(self.decoder_channels):\n            paddings = self.decoder_paddings[i]\n            setattr(\n                self, f'lateral_layer{block_num - i}',\n                SparseBasicBlock(\n                    in_channels,\n                    block_channels[0],\n                    conv_cfg=dict(\n                        type='SubMConv3d', indice_key=f'subm{block_num - i}'),\n                    norm_cfg=norm_cfg))\n            setattr(\n                self, f'merge_layer{block_num - i}',\n                make_block(\n                    in_channels * 2,\n                    block_channels[1],\n                    3,\n                    norm_cfg=norm_cfg,\n                    padding=paddings[0],\n                    indice_key=f'subm{block_num - i}',\n                    conv_type='SubMConv3d'))\n            if block_num - i != 1:\n                setattr(\n                    self, f'upsample_layer{block_num - i}',\n                    make_block(\n                        in_channels,\n                        block_channels[2],\n                        3,\n                        norm_cfg=norm_cfg,\n                        indice_key=f'spconv{block_num - i}',\n                        conv_type='SparseInverseConv3d'))\n            else:\n                # use submanifold conv instead of inverse conv\n                # in the last block\n                setattr(\n                    self, f'upsample_layer{block_num - i}',\n                    make_block(\n                        in_channels,\n                        block_channels[2],\n                        3,\n                        norm_cfg=norm_cfg,\n                        padding=paddings[1],\n                        indice_key='subm1',\n                        conv_type='SubMConv3d'))\n            in_channels = block_channels[2]\n"
  },
  {
    "path": "mmdet3d/models/model_utils/__init__.py",
    "content": "from .vote_module import VoteModule\n\n__all__ = ['VoteModule']\n"
  },
  {
    "path": "mmdet3d/models/model_utils/vote_module.py",
    "content": "import torch\nfrom mmcv import is_tuple_of\nfrom mmcv.cnn import ConvModule\nfrom torch import nn as nn\n\nfrom mmdet3d.models.builder import build_loss\n\n\nclass VoteModule(nn.Module):\n    \"\"\"Vote module.\n\n    Generate votes from seed point features.\n\n    Args:\n        in_channels (int): Number of channels of seed point features.\n        vote_per_seed (int): Number of votes generated from each seed point.\n        gt_per_seed (int): Number of ground truth votes generated\n            from each seed point.\n        num_points (int): Number of points to be used for voting.\n        conv_channels (tuple[int]): Out channels of vote\n            generating convolution.\n        conv_cfg (dict): Config of convolution.\n            Default: dict(type='Conv1d').\n        norm_cfg (dict): Config of normalization.\n            Default: dict(type='BN1d').\n        norm_feats (bool): Whether to normalize features.\n            Default: True.\n        with_res_feat (bool): Whether to predict residual features.\n            Default: True.\n        vote_xyz_range (list[float], None): The range of points translation.\n        vote_loss (dict): Config of vote loss.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 vote_per_seed=1,\n                 gt_per_seed=3,\n                 num_points=-1,\n                 conv_channels=(16, 16),\n                 conv_cfg=dict(type='Conv1d'),\n                 norm_cfg=dict(type='BN1d'),\n                 act_cfg=dict(type='ReLU'),\n                 norm_feats=True,\n                 with_res_feat=True,\n                 vote_xyz_range=None,\n                 vote_loss=None):\n        super().__init__()\n        self.in_channels = in_channels\n        self.vote_per_seed = vote_per_seed\n        self.gt_per_seed = gt_per_seed\n        self.num_points = num_points\n        self.norm_feats = norm_feats\n        self.with_res_feat = with_res_feat\n\n        assert vote_xyz_range is None or is_tuple_of(vote_xyz_range, float)\n        self.vote_xyz_range = vote_xyz_range\n\n        if vote_loss is not None:\n            self.vote_loss = build_loss(vote_loss)\n\n        prev_channels = in_channels\n        vote_conv_list = list()\n        for k in range(len(conv_channels)):\n            vote_conv_list.append(\n                ConvModule(\n                    prev_channels,\n                    conv_channels[k],\n                    1,\n                    padding=0,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    act_cfg=act_cfg,\n                    bias=True,\n                    inplace=True))\n            prev_channels = conv_channels[k]\n        self.vote_conv = nn.Sequential(*vote_conv_list)\n\n        # conv_out predicts coordinate and residual features\n        if with_res_feat:\n            out_channel = (3 + in_channels) * self.vote_per_seed\n        else:\n            out_channel = 3 * self.vote_per_seed\n        self.conv_out = nn.Conv1d(prev_channels, out_channel, 1)\n\n    def forward(self, seed_points, seed_feats):\n        \"\"\"forward.\n\n        Args:\n            seed_points (torch.Tensor): Coordinate of the seed\n                points in shape (B, N, 3).\n            seed_feats (torch.Tensor): Features of the seed points in shape\n                (B, C, N).\n\n        Returns:\n            tuple[torch.Tensor]:\n\n                - vote_points: Voted xyz based on the seed points \\\n                    with shape (B, M, 3), ``M=num_seed*vote_per_seed``.\n                - vote_features: Voted features based on the seed points with \\\n                    shape (B, C, M) where ``M=num_seed*vote_per_seed``, \\\n                    ``C=vote_feature_dim``.\n        \"\"\"\n        if self.num_points != -1:\n            assert self.num_points < seed_points.shape[1], \\\n                f'Number of vote points ({self.num_points}) should be '\\\n                f'smaller than seed points size ({seed_points.shape[1]})'\n            seed_points = seed_points[:, :self.num_points]\n            seed_feats = seed_feats[..., :self.num_points]\n\n        batch_size, feat_channels, num_seed = seed_feats.shape\n        num_vote = num_seed * self.vote_per_seed\n        x = self.vote_conv(seed_feats)\n        # (batch_size, (3+out_dim)*vote_per_seed, num_seed)\n        votes = self.conv_out(x)\n\n        votes = votes.transpose(2, 1).view(batch_size, num_seed,\n                                           self.vote_per_seed, -1)\n\n        offset = votes[:, :, :, 0:3]\n        if self.vote_xyz_range is not None:\n            limited_offset_list = []\n            for axis in range(len(self.vote_xyz_range)):\n                limited_offset_list.append(offset[..., axis].clamp(\n                    min=-self.vote_xyz_range[axis],\n                    max=self.vote_xyz_range[axis]))\n            limited_offset = torch.stack(limited_offset_list, -1)\n            vote_points = (seed_points.unsqueeze(2) +\n                           limited_offset).contiguous()\n        else:\n            vote_points = (seed_points.unsqueeze(2) + offset).contiguous()\n        vote_points = vote_points.view(batch_size, num_vote, 3)\n        offset = offset.reshape(batch_size, num_vote, 3).transpose(2, 1)\n\n        if self.with_res_feat:\n            res_feats = votes[:, :, :, 3:]\n            vote_feats = (seed_feats.transpose(2, 1).unsqueeze(2) +\n                          res_feats).contiguous()\n            vote_feats = vote_feats.view(batch_size,\n                                         num_vote, feat_channels).transpose(\n                                             2, 1).contiguous()\n\n            if self.norm_feats:\n                features_norm = torch.norm(vote_feats, p=2, dim=1)\n                vote_feats = vote_feats.div(features_norm.unsqueeze(1))\n        else:\n            vote_feats = seed_feats\n        return vote_points, vote_feats, offset\n\n    def get_loss(self, seed_points, vote_points, seed_indices,\n                 vote_targets_mask, vote_targets):\n        \"\"\"Calculate loss of voting module.\n\n        Args:\n            seed_points (torch.Tensor): Coordinate of the seed points.\n            vote_points (torch.Tensor): Coordinate of the vote points.\n            seed_indices (torch.Tensor): Indices of seed points in raw points.\n            vote_targets_mask (torch.Tensor): Mask of valid vote targets.\n            vote_targets (torch.Tensor): Targets of votes.\n\n        Returns:\n            torch.Tensor: Weighted vote loss.\n        \"\"\"\n        batch_size, num_seed = seed_points.shape[:2]\n\n        seed_gt_votes_mask = torch.gather(vote_targets_mask, 1,\n                                          seed_indices).float()\n\n        seed_indices_expand = seed_indices.unsqueeze(-1).repeat(\n            1, 1, 3 * self.gt_per_seed)\n        seed_gt_votes = torch.gather(vote_targets, 1, seed_indices_expand)\n        seed_gt_votes += seed_points.repeat(1, 1, self.gt_per_seed)\n\n        weight = seed_gt_votes_mask / (torch.sum(seed_gt_votes_mask) + 1e-6)\n        distance = self.vote_loss(\n            vote_points.view(batch_size * num_seed, -1, 3),\n            seed_gt_votes.view(batch_size * num_seed, -1, 3),\n            dst_weight=weight.view(batch_size * num_seed, 1))[1]\n        vote_loss = torch.sum(torch.min(distance, dim=1)[0])\n\n        return vote_loss\n"
  },
  {
    "path": "mmdet3d/models/necks/__init__.py",
    "content": "from mmdet.models.necks.fpn import FPN\nfrom .second_fpn import SECONDFPN\n\n__all__ = ['FPN', 'SECONDFPN']\n"
  },
  {
    "path": "mmdet3d/models/necks/second_fpn.py",
    "content": "import numpy as np\nimport torch\nfrom mmcv.cnn import (build_conv_layer, build_norm_layer, build_upsample_layer,\n                      constant_init, is_norm, kaiming_init)\nfrom mmcv.runner import auto_fp16\nfrom torch import nn as nn\n\nfrom mmdet.models import NECKS\n\n\n@NECKS.register_module()\nclass SECONDFPN(nn.Module):\n    \"\"\"FPN used in SECOND/PointPillars/PartA2/MVXNet.\n\n    Args:\n        in_channels (list[int]): Input channels of multi-scale feature maps.\n        out_channels (list[int]): Output channels of feature maps.\n        upsample_strides (list[int]): Strides used to upsample the\n            feature maps.\n        norm_cfg (dict): Config dict of normalization layers.\n        upsample_cfg (dict): Config dict of upsample layers.\n        conv_cfg (dict): Config dict of conv layers.\n        use_conv_for_no_stride (bool): Whether to use conv when stride is 1.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels=[128, 128, 256],\n                 out_channels=[256, 256, 256],\n                 upsample_strides=[1, 2, 4],\n                 norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),\n                 upsample_cfg=dict(type='deconv', bias=False),\n                 conv_cfg=dict(type='Conv2d', bias=False),\n                 use_conv_for_no_stride=False):\n        # if for GroupNorm,\n        # cfg is dict(type='GN', num_groups=num_groups, eps=1e-3, affine=True)\n        super(SECONDFPN, self).__init__()\n        assert len(out_channels) == len(upsample_strides) == len(in_channels)\n        self.in_channels = in_channels\n        self.out_channels = out_channels\n        self.fp16_enabled = False\n\n        deblocks = []\n        for i, out_channel in enumerate(out_channels):\n            stride = upsample_strides[i]\n            if stride > 1 or (stride == 1 and not use_conv_for_no_stride):\n                upsample_layer = build_upsample_layer(\n                    upsample_cfg,\n                    in_channels=in_channels[i],\n                    out_channels=out_channel,\n                    kernel_size=upsample_strides[i],\n                    stride=upsample_strides[i])\n            else:\n                stride = np.round(1 / stride).astype(np.int64)\n                upsample_layer = build_conv_layer(\n                    conv_cfg,\n                    in_channels=in_channels[i],\n                    out_channels=out_channel,\n                    kernel_size=stride,\n                    stride=stride)\n\n            deblock = nn.Sequential(upsample_layer,\n                                    build_norm_layer(norm_cfg, out_channel)[1],\n                                    nn.ReLU(inplace=True))\n            deblocks.append(deblock)\n        self.deblocks = nn.ModuleList(deblocks)\n\n    def init_weights(self):\n        \"\"\"Initialize weights of FPN.\"\"\"\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                kaiming_init(m)\n            elif is_norm(m):\n                constant_init(m, 1)\n\n    @auto_fp16()\n    def forward(self, x):\n        \"\"\"Forward function.\n\n        Args:\n            x (torch.Tensor): 4D Tensor in (N, C, H, W) shape.\n\n        Returns:\n            list[torch.Tensor]: Multi-level feature maps.\n        \"\"\"\n        assert len(x) == len(self.in_channels)\n        ups = [deblock(x[i]) for i, deblock in enumerate(self.deblocks)]\n\n        if len(ups) > 1:\n            out = torch.cat(ups, dim=1)\n        else:\n            out = ups[0]\n        return [out]\n"
  },
  {
    "path": "mmdet3d/models/registry.py",
    "content": "from mmcv.utils import Registry\n\nVOXEL_ENCODERS = Registry('voxel_encoder')\nMIDDLE_ENCODERS = Registry('middle_encoder')\nFUSION_LAYERS = Registry('fusion_layer')\n\n# ACTIVATION_LAYERS = Registry('activation layer')\nDROPOUT_LAYERS = Registry('drop out layers')\nPOSITIONAL_ENCODING = Registry('position encoding')\nATTENTION = Registry('attention')\nFEEDFORWARD_NETWORK = Registry('feed-forward Network')\nTRANSFORMER_LAYER = Registry('transformerLayer')\nTRANSFORMER_LAYER_SEQUENCE = Registry('transformer-layers sequence')\n"
  },
  {
    "path": "mmdet3d/models/roi_heads/__init__.py",
    "content": "from .base_3droi_head import Base3DRoIHead\nfrom .bbox_heads import PartA2BboxHead\nfrom .h3d_roi_head import H3DRoIHead\nfrom .mask_heads import PointwiseSemanticHead, PrimitiveHead\nfrom .part_aggregation_roi_head import PartAggregationROIHead\nfrom .roi_extractors import Single3DRoIAwareExtractor, SingleRoIExtractor\n\n__all__ = [\n    'Base3DRoIHead', 'PartAggregationROIHead', 'PointwiseSemanticHead',\n    'Single3DRoIAwareExtractor', 'PartA2BboxHead', 'SingleRoIExtractor',\n    'H3DRoIHead', 'PrimitiveHead'\n]\n"
  },
  {
    "path": "mmdet3d/models/roi_heads/base_3droi_head.py",
    "content": "from abc import ABCMeta, abstractmethod\nfrom torch import nn as nn\n\n\nclass Base3DRoIHead(nn.Module, metaclass=ABCMeta):\n    \"\"\"Base class for 3d RoIHeads.\"\"\"\n\n    def __init__(self,\n                 bbox_head=None,\n                 mask_roi_extractor=None,\n                 mask_head=None,\n                 train_cfg=None,\n                 test_cfg=None):\n        super(Base3DRoIHead, self).__init__()\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n\n        if bbox_head is not None:\n            self.init_bbox_head(bbox_head)\n\n        if mask_head is not None:\n            self.init_mask_head(mask_roi_extractor, mask_head)\n\n        self.init_assigner_sampler()\n\n    @property\n    def with_bbox(self):\n        \"\"\"bool: whether the RoIHead has box head\"\"\"\n        return hasattr(self, 'bbox_head') and self.bbox_head is not None\n\n    @property\n    def with_mask(self):\n        \"\"\"bool: whether the RoIHead has mask head\"\"\"\n        return hasattr(self, 'mask_head') and self.mask_head is not None\n\n    @abstractmethod\n    def init_weights(self, pretrained):\n        \"\"\"Initialize the module with pre-trained weights.\"\"\"\n        pass\n\n    @abstractmethod\n    def init_bbox_head(self):\n        \"\"\"Initialize the box head.\"\"\"\n        pass\n\n    @abstractmethod\n    def init_mask_head(self):\n        \"\"\"Initialize maek head.\"\"\"\n        pass\n\n    @abstractmethod\n    def init_assigner_sampler(self):\n        \"\"\"Initialize assigner and sampler.\"\"\"\n        pass\n\n    @abstractmethod\n    def forward_train(self,\n                      x,\n                      img_metas,\n                      proposal_list,\n                      gt_bboxes,\n                      gt_labels,\n                      gt_bboxes_ignore=None,\n                      **kwargs):\n        \"\"\"Forward function during training.\n\n        Args:\n            x (dict): Contains features from the first stage.\n            img_metas (list[dict]): Meta info of each image.\n            proposal_list (list[dict]): Proposal information from rpn.\n            gt_bboxes (list[:obj:`BaseInstance3DBoxes`]):\n                GT bboxes of each sample. The bboxes are encapsulated\n                by 3D box structures.\n            gt_labels (list[torch.LongTensor]): GT labels of each sample.\n            gt_bboxes_ignore (list[torch.Tensor], optional):\n                Ground truth boxes to be ignored.\n\n        Returns:\n            dict[str, torch.Tensor]: Losses from each head.\n        \"\"\"\n        pass\n\n    def simple_test(self,\n                    x,\n                    proposal_list,\n                    img_metas,\n                    proposals=None,\n                    rescale=False,\n                    **kwargs):\n        \"\"\"Test without augmentation.\"\"\"\n        pass\n\n    def aug_test(self, x, proposal_list, img_metas, rescale=False, **kwargs):\n        \"\"\"Test with augmentations.\n\n        If rescale is False, then returned bboxes and masks will fit the scale\n        of imgs[0].\n        \"\"\"\n        pass\n"
  },
  {
    "path": "mmdet3d/models/roi_heads/bbox_heads/__init__.py",
    "content": "from mmdet.models.roi_heads.bbox_heads import (BBoxHead, ConvFCBBoxHead,\n                                               DoubleConvFCBBoxHead,\n                                               Shared2FCBBoxHead,\n                                               Shared4Conv1FCBBoxHead)\nfrom .h3d_bbox_head import H3DBboxHead\nfrom .parta2_bbox_head import PartA2BboxHead\n\n__all__ = [\n    'BBoxHead', 'ConvFCBBoxHead', 'Shared2FCBBoxHead',\n    'Shared4Conv1FCBBoxHead', 'DoubleConvFCBBoxHead', 'PartA2BboxHead',\n    'H3DBboxHead'\n]\n"
  },
  {
    "path": "mmdet3d/models/roi_heads/bbox_heads/h3d_bbox_head.py",
    "content": "import torch\nfrom mmcv.cnn import ConvModule\nfrom torch import nn as nn\nfrom torch.nn import functional as F\n\nfrom mmdet3d.core.bbox import DepthInstance3DBoxes\nfrom mmdet3d.core.post_processing import aligned_3d_nms\nfrom mmdet3d.models.builder import build_loss\nfrom mmdet3d.models.losses import chamfer_distance\nfrom mmdet3d.ops import build_sa_module\nfrom mmdet.core import build_bbox_coder, multi_apply\nfrom mmdet.models import HEADS\n\n\n@HEADS.register_module()\nclass H3DBboxHead(nn.Module):\n    r\"\"\"Bbox head of `H3DNet <https://arxiv.org/abs/2006.05682>`_.\n\n    Args:\n        num_classes (int): The number of classes.\n        suface_matching_cfg (dict): Config for suface primitive matching.\n        line_matching_cfg (dict): Config for line primitive matching.\n        bbox_coder (:obj:`BaseBBoxCoder`): Bbox coder for encoding and\n            decoding boxes.\n        train_cfg (dict): Config for training.\n        test_cfg (dict): Config for testing.\n        gt_per_seed (int): Number of ground truth votes generated\n            from each seed point.\n        num_proposal (int): Number of proposal votes generated.\n        feat_channels (tuple[int]): Convolution channels of\n            prediction layer.\n        primitive_feat_refine_streams (int): The number of mlps to\n            refine primitive feature.\n        primitive_refine_channels (tuple[int]): Convolution channels of\n            prediction layer.\n        upper_thresh (float): Threshold for line matching.\n        surface_thresh (float): Threshold for suface matching.\n        line_thresh (float): Threshold for line matching.\n        conv_cfg (dict): Config of convolution in prediction layer.\n        norm_cfg (dict): Config of BN in prediction layer.\n        objectness_loss (dict): Config of objectness loss.\n        center_loss (dict): Config of center loss.\n        dir_class_loss (dict): Config of direction classification loss.\n        dir_res_loss (dict): Config of direction residual regression loss.\n        size_class_loss (dict): Config of size classification loss.\n        size_res_loss (dict): Config of size residual regression loss.\n        semantic_loss (dict): Config of point-wise semantic segmentation loss.\n        cues_objectness_loss (dict): Config of cues objectness loss.\n        cues_semantic_loss (dict): Config of cues semantic loss.\n        proposal_objectness_loss (dict): Config of proposal objectness\n            loss.\n        primitive_center_loss (dict): Config of primitive center regression\n            loss.\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 suface_matching_cfg,\n                 line_matching_cfg,\n                 bbox_coder,\n                 train_cfg=None,\n                 test_cfg=None,\n                 gt_per_seed=1,\n                 num_proposal=256,\n                 feat_channels=(128, 128),\n                 primitive_feat_refine_streams=2,\n                 primitive_refine_channels=[128, 128, 128],\n                 upper_thresh=100.0,\n                 surface_thresh=0.5,\n                 line_thresh=0.5,\n                 conv_cfg=dict(type='Conv1d'),\n                 norm_cfg=dict(type='BN1d'),\n                 objectness_loss=None,\n                 center_loss=None,\n                 dir_class_loss=None,\n                 dir_res_loss=None,\n                 size_class_loss=None,\n                 size_res_loss=None,\n                 semantic_loss=None,\n                 cues_objectness_loss=None,\n                 cues_semantic_loss=None,\n                 proposal_objectness_loss=None,\n                 primitive_center_loss=None):\n        super(H3DBboxHead, self).__init__()\n        self.num_classes = num_classes\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        self.gt_per_seed = gt_per_seed\n        self.num_proposal = num_proposal\n        self.with_angle = bbox_coder['with_rot']\n        self.upper_thresh = upper_thresh\n        self.surface_thresh = surface_thresh\n        self.line_thresh = line_thresh\n\n        self.objectness_loss = build_loss(objectness_loss)\n        self.center_loss = build_loss(center_loss)\n        self.dir_class_loss = build_loss(dir_class_loss)\n        self.dir_res_loss = build_loss(dir_res_loss)\n        self.size_class_loss = build_loss(size_class_loss)\n        self.size_res_loss = build_loss(size_res_loss)\n        self.semantic_loss = build_loss(semantic_loss)\n\n        self.bbox_coder = build_bbox_coder(bbox_coder)\n        self.num_sizes = self.bbox_coder.num_sizes\n        self.num_dir_bins = self.bbox_coder.num_dir_bins\n\n        self.cues_objectness_loss = build_loss(cues_objectness_loss)\n        self.cues_semantic_loss = build_loss(cues_semantic_loss)\n        self.proposal_objectness_loss = build_loss(proposal_objectness_loss)\n        self.primitive_center_loss = build_loss(primitive_center_loss)\n\n        assert suface_matching_cfg['mlp_channels'][-1] == \\\n            line_matching_cfg['mlp_channels'][-1]\n\n        # surface center matching\n        self.surface_center_matcher = build_sa_module(suface_matching_cfg)\n        # line center matching\n        self.line_center_matcher = build_sa_module(line_matching_cfg)\n\n        # Compute the matching scores\n        matching_feat_dims = suface_matching_cfg['mlp_channels'][-1]\n        self.matching_conv = ConvModule(\n            matching_feat_dims,\n            matching_feat_dims,\n            1,\n            padding=0,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg,\n            bias=True,\n            inplace=True)\n        self.matching_pred = nn.Conv1d(matching_feat_dims, 2, 1)\n\n        # Compute the semantic matching scores\n        self.semantic_matching_conv = ConvModule(\n            matching_feat_dims,\n            matching_feat_dims,\n            1,\n            padding=0,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg,\n            bias=True,\n            inplace=True)\n        self.semantic_matching_pred = nn.Conv1d(matching_feat_dims, 2, 1)\n\n        # Surface feature aggregation\n        self.surface_feats_aggregation = list()\n        for k in range(primitive_feat_refine_streams):\n            self.surface_feats_aggregation.append(\n                ConvModule(\n                    matching_feat_dims,\n                    matching_feat_dims,\n                    1,\n                    padding=0,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    bias=True,\n                    inplace=True))\n        self.surface_feats_aggregation = nn.Sequential(\n            *self.surface_feats_aggregation)\n\n        # Line feature aggregation\n        self.line_feats_aggregation = list()\n        for k in range(primitive_feat_refine_streams):\n            self.line_feats_aggregation.append(\n                ConvModule(\n                    matching_feat_dims,\n                    matching_feat_dims,\n                    1,\n                    padding=0,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    bias=True,\n                    inplace=True))\n        self.line_feats_aggregation = nn.Sequential(\n            *self.line_feats_aggregation)\n\n        # surface center(6) + line center(12)\n        prev_channel = 18 * matching_feat_dims\n        self.bbox_pred = nn.ModuleList()\n        for k in range(len(primitive_refine_channels)):\n            self.bbox_pred.append(\n                ConvModule(\n                    prev_channel,\n                    primitive_refine_channels[k],\n                    1,\n                    padding=0,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    bias=True,\n                    inplace=False))\n            prev_channel = primitive_refine_channels[k]\n\n        # Final object detection\n        # Objectness scores (2), center residual (3),\n        # heading class+residual (num_heading_bin*2), size class +\n        # residual(num_size_cluster*4)\n        conv_out_channel = (2 + 3 + bbox_coder['num_dir_bins'] * 2 +\n                            bbox_coder['num_sizes'] * 4 + self.num_classes)\n        self.bbox_pred.append(nn.Conv1d(prev_channel, conv_out_channel, 1))\n\n    def init_weights(self, pretrained=None):\n        \"\"\"Initialize the weights in detector.\n\n        Args:\n            pretrained (str, optional): Path to pre-trained weights.\n                Defaults to None.\n        \"\"\"\n        pass\n\n    def forward(self, feats_dict, sample_mod):\n        \"\"\"Forward pass.\n\n        Args:\n            feats_dict (dict): Feature dict from backbone.\n            sample_mod (str): Sample mode for vote aggregation layer.\n                valid modes are \"vote\", \"seed\" and \"random\".\n\n        Returns:\n            dict: Predictions of vote head.\n        \"\"\"\n        ret_dict = {}\n        aggregated_points = feats_dict['aggregated_points']\n        original_feature = feats_dict['aggregated_features']\n        batch_size = original_feature.shape[0]\n        object_proposal = original_feature.shape[2]\n\n        # Extract surface center, features and semantic predictions\n        z_center = feats_dict['pred_z_center']\n        xy_center = feats_dict['pred_xy_center']\n        z_semantic = feats_dict['sem_cls_scores_z']\n        xy_semantic = feats_dict['sem_cls_scores_xy']\n        z_feature = feats_dict['aggregated_features_z']\n        xy_feature = feats_dict['aggregated_features_xy']\n        # Extract line points and features\n        line_center = feats_dict['pred_line_center']\n        line_feature = feats_dict['aggregated_features_line']\n\n        surface_center_pred = torch.cat((z_center, xy_center), dim=1)\n        ret_dict['surface_center_pred'] = surface_center_pred\n        ret_dict['surface_sem_pred'] = torch.cat((z_semantic, xy_semantic),\n                                                 dim=1)\n\n        # Extract the surface and line centers of rpn proposals\n        rpn_proposals = feats_dict['proposal_list']\n        rpn_proposals_bbox = DepthInstance3DBoxes(\n            rpn_proposals.reshape(-1, 7).clone(),\n            box_dim=rpn_proposals.shape[-1],\n            with_yaw=self.with_angle,\n            origin=(0.5, 0.5, 0.5))\n\n        obj_surface_center, obj_line_center = \\\n            rpn_proposals_bbox.get_surface_line_center()\n        obj_surface_center = obj_surface_center.reshape(\n            batch_size, -1, 6, 3).transpose(1, 2).reshape(batch_size, -1, 3)\n        obj_line_center = obj_line_center.reshape(batch_size, -1, 12,\n                                                  3).transpose(1, 2).reshape(\n                                                      batch_size, -1, 3)\n        ret_dict['surface_center_object'] = obj_surface_center\n        ret_dict['line_center_object'] = obj_line_center\n\n        # aggregate primitive z and xy features to rpn proposals\n        surface_center_feature_pred = torch.cat((z_feature, xy_feature), dim=2)\n        surface_center_feature_pred = torch.cat(\n            (surface_center_feature_pred.new_zeros(\n                (batch_size, 6, surface_center_feature_pred.shape[2])),\n             surface_center_feature_pred),\n            dim=1)\n\n        surface_xyz, surface_features, _ = self.surface_center_matcher(\n            surface_center_pred,\n            surface_center_feature_pred,\n            target_xyz=obj_surface_center)\n\n        # aggregate primitive line features to rpn proposals\n        line_feature = torch.cat((line_feature.new_zeros(\n            (batch_size, 12, line_feature.shape[2])), line_feature),\n                                 dim=1)\n        line_xyz, line_features, _ = self.line_center_matcher(\n            line_center, line_feature, target_xyz=obj_line_center)\n\n        # combine the surface and line features\n        combine_features = torch.cat((surface_features, line_features), dim=2)\n\n        matching_features = self.matching_conv(combine_features)\n        matching_score = self.matching_pred(matching_features)\n        ret_dict['matching_score'] = matching_score.transpose(2, 1)\n\n        semantic_matching_features = self.semantic_matching_conv(\n            combine_features)\n        semantic_matching_score = self.semantic_matching_pred(\n            semantic_matching_features)\n        ret_dict['semantic_matching_score'] = \\\n            semantic_matching_score.transpose(2, 1)\n\n        surface_features = self.surface_feats_aggregation(surface_features)\n        line_features = self.line_feats_aggregation(line_features)\n\n        # Combine all surface and line features\n        surface_features = surface_features.view(batch_size, -1,\n                                                 object_proposal)\n        line_features = line_features.view(batch_size, -1, object_proposal)\n\n        combine_feature = torch.cat((surface_features, line_features), dim=1)\n\n        # Final bbox predictions\n        bbox_predictions = self.bbox_pred[0](combine_feature)\n        bbox_predictions += original_feature\n        for conv_module in self.bbox_pred[1:]:\n            bbox_predictions = conv_module(bbox_predictions)\n\n        refine_decode_res = self.bbox_coder.split_pred(\n            bbox_predictions[:, :self.num_classes + 2],\n            bbox_predictions[:, self.num_classes + 2:], aggregated_points)\n        for key in refine_decode_res.keys():\n            ret_dict[key + '_optimized'] = refine_decode_res[key]\n        return ret_dict\n\n    def loss(self,\n             bbox_preds,\n             points,\n             gt_bboxes_3d,\n             gt_labels_3d,\n             pts_semantic_mask=None,\n             pts_instance_mask=None,\n             img_metas=None,\n             rpn_targets=None,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute loss.\n\n        Args:\n            bbox_preds (dict): Predictions from forward of h3d bbox head.\n            points (list[torch.Tensor]): Input points.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \\\n                bboxes of each sample.\n            gt_labels_3d (list[torch.Tensor]): Labels of each sample.\n            pts_semantic_mask (None | list[torch.Tensor]): Point-wise\n                semantic mask.\n            pts_instance_mask (None | list[torch.Tensor]): Point-wise\n                instance mask.\n            img_metas (list[dict]): Contain pcd and img's meta info.\n            rpn_targets (Tuple) : Targets generated by rpn head.\n            gt_bboxes_ignore (None | list[torch.Tensor]): Specify\n                which bounding.\n\n        Returns:\n            dict: Losses of H3dnet.\n        \"\"\"\n        (vote_targets, vote_target_masks, size_class_targets, size_res_targets,\n         dir_class_targets, dir_res_targets, center_targets, mask_targets,\n         valid_gt_masks, objectness_targets, objectness_weights,\n         box_loss_weights, valid_gt_weights) = rpn_targets\n\n        losses = {}\n\n        # calculate refined proposal loss\n        refined_proposal_loss = self.get_proposal_stage_loss(\n            bbox_preds,\n            size_class_targets,\n            size_res_targets,\n            dir_class_targets,\n            dir_res_targets,\n            center_targets,\n            mask_targets,\n            objectness_targets,\n            objectness_weights,\n            box_loss_weights,\n            valid_gt_weights,\n            suffix='_optimized')\n        for key in refined_proposal_loss.keys():\n            losses[key + '_optimized'] = refined_proposal_loss[key]\n\n        bbox3d_optimized = self.bbox_coder.decode(\n            bbox_preds, suffix='_optimized')\n\n        targets = self.get_targets(points, gt_bboxes_3d, gt_labels_3d,\n                                   pts_semantic_mask, pts_instance_mask,\n                                   bbox_preds)\n\n        (cues_objectness_label, cues_sem_label, proposal_objectness_label,\n         cues_mask, cues_match_mask, proposal_objectness_mask,\n         cues_matching_label, obj_surface_line_center) = targets\n\n        # match scores for each geometric primitive\n        objectness_scores = bbox_preds['matching_score']\n        # match scores for the semantics of primitives\n        objectness_scores_sem = bbox_preds['semantic_matching_score']\n\n        primitive_objectness_loss = self.cues_objectness_loss(\n            objectness_scores.transpose(2, 1),\n            cues_objectness_label,\n            weight=cues_mask,\n            avg_factor=cues_mask.sum() + 1e-6)\n\n        primitive_sem_loss = self.cues_semantic_loss(\n            objectness_scores_sem.transpose(2, 1),\n            cues_sem_label,\n            weight=cues_mask,\n            avg_factor=cues_mask.sum() + 1e-6)\n\n        objectness_scores = bbox_preds['obj_scores_optimized']\n        objectness_loss_refine = self.proposal_objectness_loss(\n            objectness_scores.transpose(2, 1), proposal_objectness_label)\n        primitive_matching_loss = (objectness_loss_refine *\n                                   cues_match_mask).sum() / (\n                                       cues_match_mask.sum() + 1e-6) * 0.5\n        primitive_sem_matching_loss = (\n            objectness_loss_refine * proposal_objectness_mask).sum() / (\n                proposal_objectness_mask.sum() + 1e-6) * 0.5\n\n        # Get the object surface center here\n        batch_size, object_proposal = bbox3d_optimized.shape[:2]\n        refined_bbox = DepthInstance3DBoxes(\n            bbox3d_optimized.reshape(-1, 7).clone(),\n            box_dim=bbox3d_optimized.shape[-1],\n            with_yaw=self.with_angle,\n            origin=(0.5, 0.5, 0.5))\n\n        pred_obj_surface_center, pred_obj_line_center = \\\n            refined_bbox.get_surface_line_center()\n        pred_obj_surface_center = pred_obj_surface_center.reshape(\n            batch_size, -1, 6, 3).transpose(1, 2).reshape(batch_size, -1, 3)\n        pred_obj_line_center = pred_obj_line_center.reshape(\n            batch_size, -1, 12, 3).transpose(1, 2).reshape(batch_size, -1, 3)\n        pred_surface_line_center = torch.cat(\n            (pred_obj_surface_center, pred_obj_line_center), 1)\n\n        square_dist = self.primitive_center_loss(pred_surface_line_center,\n                                                 obj_surface_line_center)\n\n        match_dist = torch.sqrt(square_dist.sum(dim=-1) + 1e-6)\n        primitive_centroid_reg_loss = torch.sum(\n            match_dist * cues_matching_label) / (\n                cues_matching_label.sum() + 1e-6)\n\n        refined_loss = dict(\n            primitive_objectness_loss=primitive_objectness_loss,\n            primitive_sem_loss=primitive_sem_loss,\n            primitive_matching_loss=primitive_matching_loss,\n            primitive_sem_matching_loss=primitive_sem_matching_loss,\n            primitive_centroid_reg_loss=primitive_centroid_reg_loss)\n\n        losses.update(refined_loss)\n\n        return losses\n\n    def get_bboxes(self,\n                   points,\n                   bbox_preds,\n                   input_metas,\n                   rescale=False,\n                   suffix=''):\n        \"\"\"Generate bboxes from vote head predictions.\n\n        Args:\n            points (torch.Tensor): Input points.\n            bbox_preds (dict): Predictions from vote head.\n            input_metas (list[dict]): Point cloud and image's meta info.\n            rescale (bool): Whether to rescale bboxes.\n\n        Returns:\n            list[tuple[torch.Tensor]]: Bounding boxes, scores and labels.\n        \"\"\"\n        # decode boxes\n        obj_scores = F.softmax(\n            bbox_preds['obj_scores' + suffix], dim=-1)[..., -1]\n\n        sem_scores = F.softmax(bbox_preds['sem_scores'], dim=-1)\n\n        prediction_collection = {}\n        prediction_collection['center'] = bbox_preds['center' + suffix]\n        prediction_collection['dir_class'] = bbox_preds['dir_class']\n        prediction_collection['dir_res'] = bbox_preds['dir_res' + suffix]\n        prediction_collection['size_class'] = bbox_preds['size_class']\n        prediction_collection['size_res'] = bbox_preds['size_res' + suffix]\n\n        bbox3d = self.bbox_coder.decode(prediction_collection)\n\n        batch_size = bbox3d.shape[0]\n        results = list()\n        for b in range(batch_size):\n            bbox_selected, score_selected, labels = self.multiclass_nms_single(\n                obj_scores[b], sem_scores[b], bbox3d[b], points[b, ..., :3],\n                input_metas[b])\n            bbox = input_metas[b]['box_type_3d'](\n                bbox_selected,\n                box_dim=bbox_selected.shape[-1],\n                with_yaw=self.bbox_coder.with_rot)\n            results.append((bbox, score_selected, labels))\n\n        return results\n\n    def multiclass_nms_single(self, obj_scores, sem_scores, bbox, points,\n                              input_meta):\n        \"\"\"Multi-class nms in single batch.\n\n        Args:\n            obj_scores (torch.Tensor): Objectness score of bounding boxes.\n            sem_scores (torch.Tensor): semantic class score of bounding boxes.\n            bbox (torch.Tensor): Predicted bounding boxes.\n            points (torch.Tensor): Input points.\n            input_meta (dict): Point cloud and image's meta info.\n\n        Returns:\n            tuple[torch.Tensor]: Bounding boxes, scores and labels.\n        \"\"\"\n        bbox = input_meta['box_type_3d'](\n            bbox,\n            box_dim=bbox.shape[-1],\n            with_yaw=self.bbox_coder.with_rot,\n            origin=(0.5, 0.5, 0.5))\n        box_indices = bbox.points_in_boxes(points)\n\n        corner3d = bbox.corners\n        minmax_box3d = corner3d.new(torch.Size((corner3d.shape[0], 6)))\n        minmax_box3d[:, :3] = torch.min(corner3d, dim=1)[0]\n        minmax_box3d[:, 3:] = torch.max(corner3d, dim=1)[0]\n\n        nonempty_box_mask = box_indices.T.sum(1) > 5\n\n        bbox_classes = torch.argmax(sem_scores, -1)\n        nms_selected = aligned_3d_nms(minmax_box3d[nonempty_box_mask],\n                                      obj_scores[nonempty_box_mask],\n                                      bbox_classes[nonempty_box_mask],\n                                      self.test_cfg.nms_thr)\n\n        # filter empty boxes and boxes with low score\n        scores_mask = (obj_scores > self.test_cfg.score_thr)\n        nonempty_box_inds = torch.nonzero(\n            nonempty_box_mask, as_tuple=False).flatten()\n        nonempty_mask = torch.zeros_like(bbox_classes).scatter(\n            0, nonempty_box_inds[nms_selected], 1)\n        selected = (nonempty_mask.bool() & scores_mask.bool())\n\n        if self.test_cfg.per_class_proposal:\n            bbox_selected, score_selected, labels = [], [], []\n            for k in range(sem_scores.shape[-1]):\n                bbox_selected.append(bbox[selected].tensor)\n                score_selected.append(obj_scores[selected] *\n                                      sem_scores[selected][:, k])\n                labels.append(\n                    torch.zeros_like(bbox_classes[selected]).fill_(k))\n            bbox_selected = torch.cat(bbox_selected, 0)\n            score_selected = torch.cat(score_selected, 0)\n            labels = torch.cat(labels, 0)\n        else:\n            bbox_selected = bbox[selected].tensor\n            score_selected = obj_scores[selected]\n            labels = bbox_classes[selected]\n\n        return bbox_selected, score_selected, labels\n\n    def get_proposal_stage_loss(self,\n                                bbox_preds,\n                                size_class_targets,\n                                size_res_targets,\n                                dir_class_targets,\n                                dir_res_targets,\n                                center_targets,\n                                mask_targets,\n                                objectness_targets,\n                                objectness_weights,\n                                box_loss_weights,\n                                valid_gt_weights,\n                                suffix=''):\n        \"\"\"Compute loss for the aggregation module.\n\n        Args:\n            bbox_preds (dict): Predictions from forward of vote head.\n            size_class_targets (torch.Tensor): Ground truth \\\n                size class of each prediction bounding box.\n            size_res_targets (torch.Tensor): Ground truth \\\n                size residual of each prediction bounding box.\n            dir_class_targets (torch.Tensor): Ground truth \\\n                direction class of each prediction bounding box.\n            dir_res_targets (torch.Tensor): Ground truth \\\n                direction residual of each prediction bounding box.\n            center_targets (torch.Tensor): Ground truth center \\\n                of each prediction bounding box.\n            mask_targets (torch.Tensor): Validation of each \\\n                prediction bounding box.\n            objectness_targets (torch.Tensor): Ground truth \\\n                objectness label of each prediction bounding box.\n            objectness_weights (torch.Tensor): Weights of objectness \\\n                loss for each prediction bounding box.\n            box_loss_weights (torch.Tensor): Weights of regression \\\n                loss for each prediction bounding box.\n            valid_gt_weights (torch.Tensor): Validation of each \\\n                ground truth bounding box.\n\n        Returns:\n            dict: Losses of aggregation module.\n        \"\"\"\n        # calculate objectness loss\n        objectness_loss = self.objectness_loss(\n            bbox_preds['obj_scores' + suffix].transpose(2, 1),\n            objectness_targets,\n            weight=objectness_weights)\n\n        # calculate center loss\n        source2target_loss, target2source_loss = self.center_loss(\n            bbox_preds['center' + suffix],\n            center_targets,\n            src_weight=box_loss_weights,\n            dst_weight=valid_gt_weights)\n        center_loss = source2target_loss + target2source_loss\n\n        # calculate direction class loss\n        dir_class_loss = self.dir_class_loss(\n            bbox_preds['dir_class' + suffix].transpose(2, 1),\n            dir_class_targets,\n            weight=box_loss_weights)\n\n        # calculate direction residual loss\n        batch_size, proposal_num = size_class_targets.shape[:2]\n        heading_label_one_hot = dir_class_targets.new_zeros(\n            (batch_size, proposal_num, self.num_dir_bins))\n        heading_label_one_hot.scatter_(2, dir_class_targets.unsqueeze(-1), 1)\n        dir_res_norm = (bbox_preds['dir_res_norm' + suffix] *\n                        heading_label_one_hot).sum(dim=-1)\n        dir_res_loss = self.dir_res_loss(\n            dir_res_norm, dir_res_targets, weight=box_loss_weights)\n\n        # calculate size class loss\n        size_class_loss = self.size_class_loss(\n            bbox_preds['size_class' + suffix].transpose(2, 1),\n            size_class_targets,\n            weight=box_loss_weights)\n\n        # calculate size residual loss\n        one_hot_size_targets = box_loss_weights.new_zeros(\n            (batch_size, proposal_num, self.num_sizes))\n        one_hot_size_targets.scatter_(2, size_class_targets.unsqueeze(-1), 1)\n        one_hot_size_targets_expand = one_hot_size_targets.unsqueeze(\n            -1).repeat(1, 1, 1, 3)\n        size_residual_norm = (bbox_preds['size_res_norm' + suffix] *\n                              one_hot_size_targets_expand).sum(dim=2)\n        box_loss_weights_expand = box_loss_weights.unsqueeze(-1).repeat(\n            1, 1, 3)\n        size_res_loss = self.size_res_loss(\n            size_residual_norm,\n            size_res_targets,\n            weight=box_loss_weights_expand)\n\n        # calculate semantic loss\n        semantic_loss = self.semantic_loss(\n            bbox_preds['sem_scores' + suffix].transpose(2, 1),\n            mask_targets,\n            weight=box_loss_weights)\n\n        losses = dict(\n            objectness_loss=objectness_loss,\n            semantic_loss=semantic_loss,\n            center_loss=center_loss,\n            dir_class_loss=dir_class_loss,\n            dir_res_loss=dir_res_loss,\n            size_class_loss=size_class_loss,\n            size_res_loss=size_res_loss)\n\n        return losses\n\n    def get_targets(self,\n                    points,\n                    gt_bboxes_3d,\n                    gt_labels_3d,\n                    pts_semantic_mask=None,\n                    pts_instance_mask=None,\n                    bbox_preds=None):\n        \"\"\"Generate targets of proposal module.\n\n        Args:\n            points (list[torch.Tensor]): Points of each batch.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \\\n                bboxes of each batch.\n            gt_labels_3d (list[torch.Tensor]): Labels of each batch.\n            pts_semantic_mask (None | list[torch.Tensor]): Point-wise semantic\n                label of each batch.\n            pts_instance_mask (None | list[torch.Tensor]): Point-wise instance\n                label of each batch.\n            bbox_preds (torch.Tensor): Bounding box predictions of vote head.\n\n        Returns:\n            tuple[torch.Tensor]: Targets of proposal module.\n        \"\"\"\n        # find empty example\n        valid_gt_masks = list()\n        gt_num = list()\n        for index in range(len(gt_labels_3d)):\n            if len(gt_labels_3d[index]) == 0:\n                fake_box = gt_bboxes_3d[index].tensor.new_zeros(\n                    1, gt_bboxes_3d[index].tensor.shape[-1])\n                gt_bboxes_3d[index] = gt_bboxes_3d[index].new_box(fake_box)\n                gt_labels_3d[index] = gt_labels_3d[index].new_zeros(1)\n                valid_gt_masks.append(gt_labels_3d[index].new_zeros(1))\n                gt_num.append(1)\n            else:\n                valid_gt_masks.append(gt_labels_3d[index].new_ones(\n                    gt_labels_3d[index].shape))\n                gt_num.append(gt_labels_3d[index].shape[0])\n\n        if pts_semantic_mask is None:\n            pts_semantic_mask = [None for i in range(len(gt_labels_3d))]\n            pts_instance_mask = [None for i in range(len(gt_labels_3d))]\n\n        aggregated_points = [\n            bbox_preds['aggregated_points'][i]\n            for i in range(len(gt_labels_3d))\n        ]\n\n        surface_center_pred = [\n            bbox_preds['surface_center_pred'][i]\n            for i in range(len(gt_labels_3d))\n        ]\n\n        line_center_pred = [\n            bbox_preds['pred_line_center'][i]\n            for i in range(len(gt_labels_3d))\n        ]\n\n        surface_center_object = [\n            bbox_preds['surface_center_object'][i]\n            for i in range(len(gt_labels_3d))\n        ]\n\n        line_center_object = [\n            bbox_preds['line_center_object'][i]\n            for i in range(len(gt_labels_3d))\n        ]\n\n        surface_sem_pred = [\n            bbox_preds['surface_sem_pred'][i]\n            for i in range(len(gt_labels_3d))\n        ]\n\n        line_sem_pred = [\n            bbox_preds['sem_cls_scores_line'][i]\n            for i in range(len(gt_labels_3d))\n        ]\n\n        (cues_objectness_label, cues_sem_label, proposal_objectness_label,\n         cues_mask, cues_match_mask, proposal_objectness_mask,\n         cues_matching_label, obj_surface_line_center) = multi_apply(\n             self.get_targets_single, points, gt_bboxes_3d, gt_labels_3d,\n             pts_semantic_mask, pts_instance_mask, aggregated_points,\n             surface_center_pred, line_center_pred, surface_center_object,\n             line_center_object, surface_sem_pred, line_sem_pred)\n\n        cues_objectness_label = torch.stack(cues_objectness_label)\n        cues_sem_label = torch.stack(cues_sem_label)\n        proposal_objectness_label = torch.stack(proposal_objectness_label)\n        cues_mask = torch.stack(cues_mask)\n        cues_match_mask = torch.stack(cues_match_mask)\n        proposal_objectness_mask = torch.stack(proposal_objectness_mask)\n        cues_matching_label = torch.stack(cues_matching_label)\n        obj_surface_line_center = torch.stack(obj_surface_line_center)\n\n        return (cues_objectness_label, cues_sem_label,\n                proposal_objectness_label, cues_mask, cues_match_mask,\n                proposal_objectness_mask, cues_matching_label,\n                obj_surface_line_center)\n\n    def get_targets_single(self,\n                           points,\n                           gt_bboxes_3d,\n                           gt_labels_3d,\n                           pts_semantic_mask=None,\n                           pts_instance_mask=None,\n                           aggregated_points=None,\n                           pred_surface_center=None,\n                           pred_line_center=None,\n                           pred_obj_surface_center=None,\n                           pred_obj_line_center=None,\n                           pred_surface_sem=None,\n                           pred_line_sem=None):\n        \"\"\"Generate targets for primitive cues for single batch.\n\n        Args:\n            points (torch.Tensor): Points of each batch.\n            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth \\\n                boxes of each batch.\n            gt_labels_3d (torch.Tensor): Labels of each batch.\n            pts_semantic_mask (None | torch.Tensor): Point-wise semantic\n                label of each batch.\n            pts_instance_mask (None | torch.Tensor): Point-wise instance\n                label of each batch.\n            aggregated_points (torch.Tensor): Aggregated points from\n                vote aggregation layer.\n            pred_surface_center (torch.Tensor): Prediction of surface center.\n            pred_line_center (torch.Tensor): Prediction of line center.\n            pred_obj_surface_center (torch.Tensor): Objectness prediction \\\n                of surface center.\n            pred_obj_line_center (torch.Tensor): Objectness prediction of \\\n                line center.\n            pred_surface_sem (torch.Tensor): Semantic prediction of \\\n                surface center.\n            pred_line_sem (torch.Tensor): Semantic prediction of line center.\n        Returns:\n            tuple[torch.Tensor]: Targets for primitive cues.\n        \"\"\"\n        device = points.device\n        gt_bboxes_3d = gt_bboxes_3d.to(device)\n        num_proposals = aggregated_points.shape[0]\n        gt_center = gt_bboxes_3d.gravity_center\n\n        dist1, dist2, ind1, _ = chamfer_distance(\n            aggregated_points.unsqueeze(0),\n            gt_center.unsqueeze(0),\n            reduction='none')\n        # Set assignment\n        object_assignment = ind1.squeeze(0)\n\n        # Generate objectness label and mask\n        # objectness_label: 1 if pred object center is within\n        # self.train_cfg['near_threshold'] of any GT object\n        # objectness_mask: 0 if pred object center is in gray\n        # zone (DONOTCARE), 1 otherwise\n        euclidean_dist1 = torch.sqrt(dist1.squeeze(0) + 1e-6)\n        proposal_objectness_label = euclidean_dist1.new_zeros(\n            num_proposals, dtype=torch.long)\n        proposal_objectness_mask = euclidean_dist1.new_zeros(num_proposals)\n\n        gt_sem = gt_labels_3d[object_assignment]\n\n        obj_surface_center, obj_line_center = \\\n            gt_bboxes_3d.get_surface_line_center()\n        obj_surface_center = obj_surface_center.reshape(-1, 6,\n                                                        3).transpose(0, 1)\n        obj_line_center = obj_line_center.reshape(-1, 12, 3).transpose(0, 1)\n        obj_surface_center = obj_surface_center[:, object_assignment].reshape(\n            1, -1, 3)\n        obj_line_center = obj_line_center[:,\n                                          object_assignment].reshape(1, -1, 3)\n\n        surface_sem = torch.argmax(pred_surface_sem, dim=1).float()\n        line_sem = torch.argmax(pred_line_sem, dim=1).float()\n\n        dist_surface, _, surface_ind, _ = chamfer_distance(\n            obj_surface_center,\n            pred_surface_center.unsqueeze(0),\n            reduction='none')\n        dist_line, _, line_ind, _ = chamfer_distance(\n            obj_line_center, pred_line_center.unsqueeze(0), reduction='none')\n\n        surface_sel = pred_surface_center[surface_ind.squeeze(0)]\n        line_sel = pred_line_center[line_ind.squeeze(0)]\n        surface_sel_sem = surface_sem[surface_ind.squeeze(0)]\n        line_sel_sem = line_sem[line_ind.squeeze(0)]\n\n        surface_sel_sem_gt = gt_sem.repeat(6).float()\n        line_sel_sem_gt = gt_sem.repeat(12).float()\n\n        euclidean_dist_surface = torch.sqrt(dist_surface.squeeze(0) + 1e-6)\n        euclidean_dist_line = torch.sqrt(dist_line.squeeze(0) + 1e-6)\n        objectness_label_surface = euclidean_dist_line.new_zeros(\n            num_proposals * 6, dtype=torch.long)\n        objectness_mask_surface = euclidean_dist_line.new_zeros(num_proposals *\n                                                                6)\n        objectness_label_line = euclidean_dist_line.new_zeros(\n            num_proposals * 12, dtype=torch.long)\n        objectness_mask_line = euclidean_dist_line.new_zeros(num_proposals *\n                                                             12)\n        objectness_label_surface_sem = euclidean_dist_line.new_zeros(\n            num_proposals * 6, dtype=torch.long)\n        objectness_label_line_sem = euclidean_dist_line.new_zeros(\n            num_proposals * 12, dtype=torch.long)\n\n        euclidean_dist_obj_surface = torch.sqrt((\n            (pred_obj_surface_center - surface_sel)**2).sum(dim=-1) + 1e-6)\n        euclidean_dist_obj_line = torch.sqrt(\n            torch.sum((pred_obj_line_center - line_sel)**2, dim=-1) + 1e-6)\n\n        # Objectness score just with centers\n        proposal_objectness_label[\n            euclidean_dist1 < self.train_cfg['near_threshold']] = 1\n        proposal_objectness_mask[\n            euclidean_dist1 < self.train_cfg['near_threshold']] = 1\n        proposal_objectness_mask[\n            euclidean_dist1 > self.train_cfg['far_threshold']] = 1\n\n        objectness_label_surface[\n            (euclidean_dist_obj_surface <\n             self.train_cfg['label_surface_threshold']) *\n            (euclidean_dist_surface <\n             self.train_cfg['mask_surface_threshold'])] = 1\n        objectness_label_surface_sem[\n            (euclidean_dist_obj_surface <\n             self.train_cfg['label_surface_threshold']) *\n            (euclidean_dist_surface < self.train_cfg['mask_surface_threshold'])\n            * (surface_sel_sem == surface_sel_sem_gt)] = 1\n\n        objectness_label_line[\n            (euclidean_dist_obj_line < self.train_cfg['label_line_threshold'])\n            *\n            (euclidean_dist_line < self.train_cfg['mask_line_threshold'])] = 1\n        objectness_label_line_sem[\n            (euclidean_dist_obj_line < self.train_cfg['label_line_threshold'])\n            * (euclidean_dist_line < self.train_cfg['mask_line_threshold']) *\n            (line_sel_sem == line_sel_sem_gt)] = 1\n\n        objectness_label_surface_obj = proposal_objectness_label.repeat(6)\n        objectness_mask_surface_obj = proposal_objectness_mask.repeat(6)\n        objectness_label_line_obj = proposal_objectness_label.repeat(12)\n        objectness_mask_line_obj = proposal_objectness_mask.repeat(12)\n\n        objectness_mask_surface = objectness_mask_surface_obj\n        objectness_mask_line = objectness_mask_line_obj\n\n        cues_objectness_label = torch.cat(\n            (objectness_label_surface, objectness_label_line), 0)\n        cues_sem_label = torch.cat(\n            (objectness_label_surface_sem, objectness_label_line_sem), 0)\n        cues_mask = torch.cat((objectness_mask_surface, objectness_mask_line),\n                              0)\n\n        objectness_label_surface *= objectness_label_surface_obj\n        objectness_label_line *= objectness_label_line_obj\n        cues_matching_label = torch.cat(\n            (objectness_label_surface, objectness_label_line), 0)\n\n        objectness_label_surface_sem *= objectness_label_surface_obj\n        objectness_label_line_sem *= objectness_label_line_obj\n\n        cues_match_mask = (torch.sum(\n            cues_objectness_label.view(18, num_proposals), dim=0) >=\n                           1).float()\n\n        obj_surface_line_center = torch.cat(\n            (obj_surface_center, obj_line_center), 1).squeeze(0)\n\n        return (cues_objectness_label, cues_sem_label,\n                proposal_objectness_label, cues_mask, cues_match_mask,\n                proposal_objectness_mask, cues_matching_label,\n                obj_surface_line_center)\n"
  },
  {
    "path": "mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py",
    "content": "import numpy as np\nimport torch\nfrom mmcv.cnn import ConvModule, normal_init, xavier_init\nfrom torch import nn as nn\n\nfrom mmdet3d.core.bbox.structures import (LiDARInstance3DBoxes,\n                                          rotation_3d_in_axis, xywhr2xyxyr)\nfrom mmdet3d.models.builder import build_loss\nfrom mmdet3d.ops import make_sparse_convmodule\nfrom mmdet3d.ops import spconv as spconv\nfrom mmdet3d.ops.iou3d.iou3d_utils import nms_gpu, nms_normal_gpu\nfrom mmdet.core import build_bbox_coder, multi_apply\nfrom mmdet.models import HEADS\n\n\n@HEADS.register_module()\nclass PartA2BboxHead(nn.Module):\n    \"\"\"PartA2 RoI head.\n\n    Args:\n        num_classes (int): The number of classes to prediction.\n        seg_in_channels (int): Input channels of segmentation\n            convolution layer.\n        part_in_channels (int): Input channels of part convolution layer.\n        seg_conv_channels (list(int)): Out channels of each\n            segmentation convolution layer.\n        part_conv_channels (list(int)): Out channels of each\n            part convolution layer.\n        merge_conv_channels (list(int)): Out channels of each\n            feature merged convolution layer.\n        down_conv_channels (list(int)): Out channels of each\n            downsampled convolution layer.\n        shared_fc_channels (list(int)): Out channels of each shared fc layer.\n        cls_channels (list(int)): Out channels of each classification layer.\n        reg_channels (list(int)): Out channels of each regression layer.\n        dropout_ratio (float): Dropout ratio of classification and\n            regression layers.\n        roi_feat_size (int): The size of pooled roi features.\n        with_corner_loss (bool): Whether to use corner loss or not.\n        bbox_coder (:obj:`BaseBBoxCoder`): Bbox coder for box head.\n        conv_cfg (dict): Config dict of convolutional layers\n        norm_cfg (dict): Config dict of normalization layers\n        loss_bbox (dict): Config dict of box regression loss.\n        loss_cls (dict): Config dict of classifacation loss.\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 seg_in_channels,\n                 part_in_channels,\n                 seg_conv_channels=None,\n                 part_conv_channels=None,\n                 merge_conv_channels=None,\n                 down_conv_channels=None,\n                 shared_fc_channels=None,\n                 cls_channels=None,\n                 reg_channels=None,\n                 dropout_ratio=0.1,\n                 roi_feat_size=14,\n                 with_corner_loss=True,\n                 bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),\n                 conv_cfg=dict(type='Conv1d'),\n                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),\n                 loss_bbox=dict(\n                     type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),\n                 loss_cls=dict(\n                     type='CrossEntropyLoss',\n                     use_sigmoid=True,\n                     reduction='none',\n                     loss_weight=1.0)):\n        super(PartA2BboxHead, self).__init__()\n        self.num_classes = num_classes\n        self.with_corner_loss = with_corner_loss\n        self.bbox_coder = build_bbox_coder(bbox_coder)\n        self.loss_bbox = build_loss(loss_bbox)\n        self.loss_cls = build_loss(loss_cls)\n        self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)\n\n        assert down_conv_channels[-1] == shared_fc_channels[0]\n\n        # init layers\n        part_channel_last = part_in_channels\n        part_conv = []\n        for i, channel in enumerate(part_conv_channels):\n            part_conv.append(\n                make_sparse_convmodule(\n                    part_channel_last,\n                    channel,\n                    3,\n                    padding=1,\n                    norm_cfg=norm_cfg,\n                    indice_key=f'rcnn_part{i}',\n                    conv_type='SubMConv3d'))\n            part_channel_last = channel\n        self.part_conv = spconv.SparseSequential(*part_conv)\n\n        seg_channel_last = seg_in_channels\n        seg_conv = []\n        for i, channel in enumerate(seg_conv_channels):\n            seg_conv.append(\n                make_sparse_convmodule(\n                    seg_channel_last,\n                    channel,\n                    3,\n                    padding=1,\n                    norm_cfg=norm_cfg,\n                    indice_key=f'rcnn_seg{i}',\n                    conv_type='SubMConv3d'))\n            seg_channel_last = channel\n        self.seg_conv = spconv.SparseSequential(*seg_conv)\n\n        self.conv_down = spconv.SparseSequential()\n\n        merge_conv_channel_last = part_channel_last + seg_channel_last\n        merge_conv = []\n        for i, channel in enumerate(merge_conv_channels):\n            merge_conv.append(\n                make_sparse_convmodule(\n                    merge_conv_channel_last,\n                    channel,\n                    3,\n                    padding=1,\n                    norm_cfg=norm_cfg,\n                    indice_key='rcnn_down0'))\n            merge_conv_channel_last = channel\n\n        down_conv_channel_last = merge_conv_channel_last\n        conv_down = []\n        for i, channel in enumerate(down_conv_channels):\n            conv_down.append(\n                make_sparse_convmodule(\n                    down_conv_channel_last,\n                    channel,\n                    3,\n                    padding=1,\n                    norm_cfg=norm_cfg,\n                    indice_key='rcnn_down1'))\n            down_conv_channel_last = channel\n\n        self.conv_down.add_module('merge_conv',\n                                  spconv.SparseSequential(*merge_conv))\n        self.conv_down.add_module(\n            'max_pool3d', spconv.SparseMaxPool3d(kernel_size=2, stride=2))\n        self.conv_down.add_module('down_conv',\n                                  spconv.SparseSequential(*conv_down))\n\n        shared_fc_list = []\n        pool_size = roi_feat_size // 2\n        pre_channel = shared_fc_channels[0] * pool_size**3\n        for k in range(1, len(shared_fc_channels)):\n            shared_fc_list.append(\n                ConvModule(\n                    pre_channel,\n                    shared_fc_channels[k],\n                    1,\n                    padding=0,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    inplace=True))\n            pre_channel = shared_fc_channels[k]\n\n            if k != len(shared_fc_channels) - 1 and dropout_ratio > 0:\n                shared_fc_list.append(nn.Dropout(dropout_ratio))\n\n        self.shared_fc = nn.Sequential(*shared_fc_list)\n\n        # Classification layer\n        channel_in = shared_fc_channels[-1]\n        cls_channel = 1\n        cls_layers = []\n        pre_channel = channel_in\n        for k in range(0, len(cls_channels)):\n            cls_layers.append(\n                ConvModule(\n                    pre_channel,\n                    cls_channels[k],\n                    1,\n                    padding=0,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    inplace=True))\n            pre_channel = cls_channels[k]\n        cls_layers.append(\n            ConvModule(\n                pre_channel,\n                cls_channel,\n                1,\n                padding=0,\n                conv_cfg=conv_cfg,\n                act_cfg=None))\n        if dropout_ratio >= 0:\n            cls_layers.insert(1, nn.Dropout(dropout_ratio))\n\n        self.conv_cls = nn.Sequential(*cls_layers)\n\n        # Regression layer\n        reg_layers = []\n        pre_channel = channel_in\n        for k in range(0, len(reg_channels)):\n            reg_layers.append(\n                ConvModule(\n                    pre_channel,\n                    reg_channels[k],\n                    1,\n                    padding=0,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    inplace=True))\n            pre_channel = reg_channels[k]\n        reg_layers.append(\n            ConvModule(\n                pre_channel,\n                self.bbox_coder.code_size,\n                1,\n                padding=0,\n                conv_cfg=conv_cfg,\n                act_cfg=None))\n        if dropout_ratio >= 0:\n            reg_layers.insert(1, nn.Dropout(dropout_ratio))\n\n        self.conv_reg = nn.Sequential(*reg_layers)\n\n        self.init_weights()\n\n    def init_weights(self):\n        \"\"\"Initialize weights of the bbox head.\"\"\"\n        for m in self.modules():\n            if isinstance(m, (nn.Conv2d, nn.Conv1d)):\n                xavier_init(m, distribution='uniform')\n\n        normal_init(self.conv_reg[-1].conv, mean=0, std=0.001)\n\n    def forward(self, seg_feats, part_feats):\n        \"\"\"Forward pass.\n\n        Args:\n            seg_feats (torch.Tensor): Point-wise semantic features.\n            part_feats (torch.Tensor): Point-wise part prediction features.\n\n        Returns:\n            tuple[torch.Tensor]: Score of class and bbox predictions.\n        \"\"\"\n        # (B * N, out_x, out_y, out_z, 4)\n        rcnn_batch_size = part_feats.shape[0]\n\n        # transform to sparse tensors\n        sparse_shape = part_feats.shape[1:4]\n        # (non_empty_num, 4) ==> [bs_idx, x_idx, y_idx, z_idx]\n        sparse_idx = part_feats.sum(dim=-1).nonzero(as_tuple=False)\n\n        part_features = part_feats[sparse_idx[:, 0], sparse_idx[:, 1],\n                                   sparse_idx[:, 2], sparse_idx[:, 3]]\n        seg_features = seg_feats[sparse_idx[:, 0], sparse_idx[:, 1],\n                                 sparse_idx[:, 2], sparse_idx[:, 3]]\n        coords = sparse_idx.int()\n        part_features = spconv.SparseConvTensor(part_features, coords,\n                                                sparse_shape, rcnn_batch_size)\n        seg_features = spconv.SparseConvTensor(seg_features, coords,\n                                               sparse_shape, rcnn_batch_size)\n\n        # forward rcnn network\n        x_part = self.part_conv(part_features)\n        x_rpn = self.seg_conv(seg_features)\n\n        merged_feature = torch.cat((x_rpn.features, x_part.features),\n                                   dim=1)  # (N, C)\n        shared_feature = spconv.SparseConvTensor(merged_feature, coords,\n                                                 sparse_shape, rcnn_batch_size)\n\n        x = self.conv_down(shared_feature)\n\n        shared_feature = x.dense().view(rcnn_batch_size, -1, 1)\n\n        shared_feature = self.shared_fc(shared_feature)\n\n        cls_score = self.conv_cls(shared_feature).transpose(\n            1, 2).contiguous().squeeze(dim=1)  # (B, 1)\n        bbox_pred = self.conv_reg(shared_feature).transpose(\n            1, 2).contiguous().squeeze(dim=1)  # (B, C)\n\n        return cls_score, bbox_pred\n\n    def loss(self, cls_score, bbox_pred, rois, labels, bbox_targets,\n             pos_gt_bboxes, reg_mask, label_weights, bbox_weights):\n        \"\"\"Coumputing losses.\n\n        Args:\n            cls_score (torch.Tensor): Scores of each roi.\n            bbox_pred (torch.Tensor): Predictions of bboxes.\n            rois (torch.Tensor): Roi bboxes.\n            labels (torch.Tensor): Labels of class.\n            bbox_targets (torch.Tensor): Target of positive bboxes.\n            pos_gt_bboxes (torch.Tensor): Ground truths of positive bboxes.\n            reg_mask (torch.Tensor): Mask for positive bboxes.\n            label_weights (torch.Tensor): Weights of class loss.\n            bbox_weights (torch.Tensor): Weights of bbox loss.\n\n        Returns:\n            dict: Computed losses.\n\n                - loss_cls (torch.Tensor): Loss of classes.\n                - loss_bbox (torch.Tensor): Loss of bboxes.\n                - loss_corner (torch.Tensor): Loss of corners.\n        \"\"\"\n        losses = dict()\n        rcnn_batch_size = cls_score.shape[0]\n\n        # calculate class loss\n        cls_flat = cls_score.view(-1)\n        loss_cls = self.loss_cls(cls_flat, labels, label_weights)\n        losses['loss_cls'] = loss_cls\n\n        # calculate regression loss\n        code_size = self.bbox_coder.code_size\n        pos_inds = (reg_mask > 0)\n        if pos_inds.any() == 0:\n            # fake a part loss\n            losses['loss_bbox'] = loss_cls.new_tensor(0)\n            if self.with_corner_loss:\n                losses['loss_corner'] = loss_cls.new_tensor(0)\n        else:\n            pos_bbox_pred = bbox_pred.view(rcnn_batch_size, -1)[pos_inds]\n            bbox_weights_flat = bbox_weights[pos_inds].view(-1, 1).repeat(\n                1, pos_bbox_pred.shape[-1])\n            loss_bbox = self.loss_bbox(\n                pos_bbox_pred.unsqueeze(dim=0), bbox_targets.unsqueeze(dim=0),\n                bbox_weights_flat.unsqueeze(dim=0))\n            losses['loss_bbox'] = loss_bbox\n\n            if self.with_corner_loss:\n                pos_roi_boxes3d = rois[..., 1:].view(-1, code_size)[pos_inds]\n                pos_roi_boxes3d = pos_roi_boxes3d.view(-1, code_size)\n                batch_anchors = pos_roi_boxes3d.clone().detach()\n                pos_rois_rotation = pos_roi_boxes3d[..., 6].view(-1)\n                roi_xyz = pos_roi_boxes3d[..., 0:3].view(-1, 3)\n                batch_anchors[..., 0:3] = 0\n                # decode boxes\n                pred_boxes3d = self.bbox_coder.decode(\n                    batch_anchors,\n                    pos_bbox_pred.view(-1, code_size)).view(-1, code_size)\n\n                pred_boxes3d[..., 0:3] = rotation_3d_in_axis(\n                    pred_boxes3d[..., 0:3].unsqueeze(1),\n                    (pos_rois_rotation + np.pi / 2),\n                    axis=2).squeeze(1)\n\n                pred_boxes3d[:, 0:3] += roi_xyz\n\n                # calculate corner loss\n                loss_corner = self.get_corner_loss_lidar(\n                    pred_boxes3d, pos_gt_bboxes)\n                losses['loss_corner'] = loss_corner\n\n        return losses\n\n    def get_targets(self, sampling_results, rcnn_train_cfg, concat=True):\n        \"\"\"Generate targets.\n\n        Args:\n            sampling_results (list[:obj:`SamplingResult`]):\n                Sampled results from rois.\n            rcnn_train_cfg (:obj:`ConfigDict`): Training config of rcnn.\n            concat (bool): Whether to concatenate targets between batches.\n\n        Returns:\n            tuple[torch.Tensor]: Targets of boxes and class prediction.\n        \"\"\"\n        pos_bboxes_list = [res.pos_bboxes for res in sampling_results]\n        pos_gt_bboxes_list = [res.pos_gt_bboxes for res in sampling_results]\n        iou_list = [res.iou for res in sampling_results]\n        targets = multi_apply(\n            self._get_target_single,\n            pos_bboxes_list,\n            pos_gt_bboxes_list,\n            iou_list,\n            cfg=rcnn_train_cfg)\n\n        (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,\n         bbox_weights) = targets\n\n        if concat:\n            label = torch.cat(label, 0)\n            bbox_targets = torch.cat(bbox_targets, 0)\n            pos_gt_bboxes = torch.cat(pos_gt_bboxes, 0)\n            reg_mask = torch.cat(reg_mask, 0)\n\n            label_weights = torch.cat(label_weights, 0)\n            label_weights /= torch.clamp(label_weights.sum(), min=1.0)\n\n            bbox_weights = torch.cat(bbox_weights, 0)\n            bbox_weights /= torch.clamp(bbox_weights.sum(), min=1.0)\n\n        return (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,\n                bbox_weights)\n\n    def _get_target_single(self, pos_bboxes, pos_gt_bboxes, ious, cfg):\n        \"\"\"Generate training targets for a single sample.\n\n        Args:\n            pos_bboxes (torch.Tensor): Positive boxes with shape\n                (N, 7).\n            pos_gt_bboxes (torch.Tensor): Ground truth boxes with shape\n                (M, 7).\n            ious (torch.Tensor): IoU between `pos_bboxes` and `pos_gt_bboxes`\n                in shape (N, M).\n            cfg (dict): Training configs.\n\n        Returns:\n            tuple[torch.Tensor]: Target for positive boxes.\n                (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,\n                bbox_weights)\n        \"\"\"\n        cls_pos_mask = ious > cfg.cls_pos_thr\n        cls_neg_mask = ious < cfg.cls_neg_thr\n        interval_mask = (cls_pos_mask == 0) & (cls_neg_mask == 0)\n\n        # iou regression target\n        label = (cls_pos_mask > 0).float()\n        label[interval_mask] = ious[interval_mask] * 2 - 0.5\n        # label weights\n        label_weights = (label >= 0).float()\n\n        # box regression target\n        reg_mask = pos_bboxes.new_zeros(ious.size(0)).long()\n        reg_mask[0:pos_gt_bboxes.size(0)] = 1\n        bbox_weights = (reg_mask > 0).float()\n        if reg_mask.bool().any():\n            pos_gt_bboxes_ct = pos_gt_bboxes.clone().detach()\n            roi_center = pos_bboxes[..., 0:3]\n            roi_ry = pos_bboxes[..., 6] % (2 * np.pi)\n\n            # canonical transformation\n            pos_gt_bboxes_ct[..., 0:3] -= roi_center\n            pos_gt_bboxes_ct[..., 6] -= roi_ry\n            pos_gt_bboxes_ct[..., 0:3] = rotation_3d_in_axis(\n                pos_gt_bboxes_ct[..., 0:3].unsqueeze(1),\n                -(roi_ry + np.pi / 2),\n                axis=2).squeeze(1)\n\n            # flip orientation if rois have opposite orientation\n            ry_label = pos_gt_bboxes_ct[..., 6] % (2 * np.pi)  # 0 ~ 2pi\n            opposite_flag = (ry_label > np.pi * 0.5) & (ry_label < np.pi * 1.5)\n            ry_label[opposite_flag] = (ry_label[opposite_flag] + np.pi) % (\n                2 * np.pi)  # (0 ~ pi/2, 3pi/2 ~ 2pi)\n            flag = ry_label > np.pi\n            ry_label[flag] = ry_label[flag] - np.pi * 2  # (-pi/2, pi/2)\n            ry_label = torch.clamp(ry_label, min=-np.pi / 2, max=np.pi / 2)\n            pos_gt_bboxes_ct[..., 6] = ry_label\n\n            rois_anchor = pos_bboxes.clone().detach()\n            rois_anchor[:, 0:3] = 0\n            rois_anchor[:, 6] = 0\n            bbox_targets = self.bbox_coder.encode(rois_anchor,\n                                                  pos_gt_bboxes_ct)\n        else:\n            # no fg bbox\n            bbox_targets = pos_gt_bboxes.new_empty((0, 7))\n\n        return (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,\n                bbox_weights)\n\n    def get_corner_loss_lidar(self, pred_bbox3d, gt_bbox3d, delta=1):\n        \"\"\"Calculate corner loss of given boxes.\n\n        Args:\n            pred_bbox3d (torch.FloatTensor): Predicted boxes in shape (N, 7).\n            gt_bbox3d (torch.FloatTensor): Ground truth boxes in shape (N, 7).\n\n        Returns:\n            torch.FloatTensor: Calculated corner loss in shape (N).\n        \"\"\"\n        assert pred_bbox3d.shape[0] == gt_bbox3d.shape[0]\n\n        # This is a little bit hack here because we assume the box for\n        # Part-A2 is in LiDAR coordinates\n        gt_boxes_structure = LiDARInstance3DBoxes(gt_bbox3d)\n        pred_box_corners = LiDARInstance3DBoxes(pred_bbox3d).corners\n        gt_box_corners = gt_boxes_structure.corners\n\n        # This flip only changes the heading direction of GT boxes\n        gt_bbox3d_flip = gt_boxes_structure.clone()\n        gt_bbox3d_flip.tensor[:, 6] += np.pi\n        gt_box_corners_flip = gt_bbox3d_flip.corners\n\n        corner_dist = torch.min(\n            torch.norm(pred_box_corners - gt_box_corners, dim=2),\n            torch.norm(pred_box_corners - gt_box_corners_flip,\n                       dim=2))  # (N, 8)\n        # huber loss\n        abs_error = torch.abs(corner_dist)\n        quadratic = torch.clamp(abs_error, max=delta)\n        linear = (abs_error - quadratic)\n        corner_loss = 0.5 * quadratic**2 + delta * linear\n\n        return corner_loss.mean(dim=1)\n\n    def get_bboxes(self,\n                   rois,\n                   cls_score,\n                   bbox_pred,\n                   class_labels,\n                   class_pred,\n                   img_metas,\n                   cfg=None):\n        \"\"\"Generate bboxes from bbox head predictions.\n\n        Args:\n            rois (torch.Tensor): Roi bounding boxes.\n            cls_score (torch.Tensor): Scores of bounding boxes.\n            bbox_pred (torch.Tensor): Bounding boxes predictions\n            class_labels (torch.Tensor): Label of classes\n            class_pred (torch.Tensor): Score for nms.\n            img_metas (list[dict]): Point cloud and image's meta info.\n            cfg (:obj:`ConfigDict`): Testing config.\n\n        Returns:\n            list[tuple]: Decoded bbox, scores and labels after nms.\n        \"\"\"\n        roi_batch_id = rois[..., 0]\n        roi_boxes = rois[..., 1:]  # boxes without batch id\n        batch_size = int(roi_batch_id.max().item() + 1)\n\n        # decode boxes\n        roi_ry = roi_boxes[..., 6].view(-1)\n        roi_xyz = roi_boxes[..., 0:3].view(-1, 3)\n        local_roi_boxes = roi_boxes.clone().detach()\n        local_roi_boxes[..., 0:3] = 0\n        rcnn_boxes3d = self.bbox_coder.decode(local_roi_boxes, bbox_pred)\n        rcnn_boxes3d[..., 0:3] = rotation_3d_in_axis(\n            rcnn_boxes3d[..., 0:3].unsqueeze(1), (roi_ry + np.pi / 2),\n            axis=2).squeeze(1)\n        rcnn_boxes3d[:, 0:3] += roi_xyz\n\n        # post processing\n        result_list = []\n        for batch_id in range(batch_size):\n            cur_class_labels = class_labels[batch_id]\n            cur_cls_score = cls_score[roi_batch_id == batch_id].view(-1)\n\n            cur_box_prob = class_pred[batch_id]\n            cur_rcnn_boxes3d = rcnn_boxes3d[roi_batch_id == batch_id]\n            selected = self.multi_class_nms(cur_box_prob, cur_rcnn_boxes3d,\n                                            cfg.score_thr, cfg.nms_thr,\n                                            img_metas[batch_id],\n                                            cfg.use_rotate_nms)\n            selected_bboxes = cur_rcnn_boxes3d[selected]\n            selected_label_preds = cur_class_labels[selected]\n            selected_scores = cur_cls_score[selected]\n\n            result_list.append(\n                (img_metas[batch_id]['box_type_3d'](selected_bboxes,\n                                                    self.bbox_coder.code_size),\n                 selected_scores, selected_label_preds))\n        return result_list\n\n    def multi_class_nms(self,\n                        box_probs,\n                        box_preds,\n                        score_thr,\n                        nms_thr,\n                        input_meta,\n                        use_rotate_nms=True):\n        \"\"\"Multi-class NMS for box head.\n\n        Note:\n            This function has large overlap with the `box3d_multiclass_nms`\n            implemented in `mmdet3d.core.post_processing`. We are considering\n            merging these two functions in the future.\n\n        Args:\n            box_probs (torch.Tensor): Predicted boxes probabitilies in\n                shape (N,).\n            box_preds (torch.Tensor): Predicted boxes in shape (N, 7+C).\n            score_thr (float): Threshold of scores.\n            nms_thr (float): Threshold for NMS.\n            input_meta (dict): Meta informations of the current sample.\n            use_rotate_nms (bool, optional): Whether to use rotated nms.\n                Defaults to True.\n\n        Returns:\n            torch.Tensor: Selected indices.\n        \"\"\"\n        if use_rotate_nms:\n            nms_func = nms_gpu\n        else:\n            nms_func = nms_normal_gpu\n\n        assert box_probs.shape[\n            1] == self.num_classes, f'box_probs shape: {str(box_probs.shape)}'\n        selected_list = []\n        selected_labels = []\n        boxes_for_nms = xywhr2xyxyr(input_meta['box_type_3d'](\n            box_preds, self.bbox_coder.code_size).bev)\n\n        score_thresh = score_thr if isinstance(\n            score_thr, list) else [score_thr for x in range(self.num_classes)]\n        nms_thresh = nms_thr if isinstance(\n            nms_thr, list) else [nms_thr for x in range(self.num_classes)]\n        for k in range(0, self.num_classes):\n            class_scores_keep = box_probs[:, k] >= score_thresh[k]\n\n            if class_scores_keep.int().sum() > 0:\n                original_idxs = class_scores_keep.nonzero(\n                    as_tuple=False).view(-1)\n                cur_boxes_for_nms = boxes_for_nms[class_scores_keep]\n                cur_rank_scores = box_probs[class_scores_keep, k]\n\n                cur_selected = nms_func(cur_boxes_for_nms, cur_rank_scores,\n                                        nms_thresh[k])\n\n                if cur_selected.shape[0] == 0:\n                    continue\n                selected_list.append(original_idxs[cur_selected])\n                selected_labels.append(\n                    torch.full([cur_selected.shape[0]],\n                               k + 1,\n                               dtype=torch.int64,\n                               device=box_preds.device))\n\n        selected = torch.cat(\n            selected_list, dim=0) if len(selected_list) > 0 else []\n        return selected\n"
  },
  {
    "path": "mmdet3d/models/roi_heads/h3d_roi_head.py",
    "content": "from mmdet3d.core.bbox import bbox3d2result\nfrom mmdet.models import HEADS\nfrom ..builder import build_head\nfrom .base_3droi_head import Base3DRoIHead\n\n\n@HEADS.register_module()\nclass H3DRoIHead(Base3DRoIHead):\n    \"\"\"H3D roi head for H3DNet.\n\n    Args:\n        primitive_list (List): Configs of primitive heads.\n        bbox_head (ConfigDict): Config of bbox_head.\n        train_cfg (ConfigDict): Training config.\n        test_cfg (ConfigDict): Testing config.\n    \"\"\"\n\n    def __init__(self,\n                 primitive_list,\n                 bbox_head=None,\n                 train_cfg=None,\n                 test_cfg=None):\n        super(H3DRoIHead, self).__init__(\n            bbox_head=bbox_head, train_cfg=train_cfg, test_cfg=test_cfg)\n        # Primitive module\n        assert len(primitive_list) == 3\n        self.primitive_z = build_head(primitive_list[0])\n        self.primitive_xy = build_head(primitive_list[1])\n        self.primitive_line = build_head(primitive_list[2])\n\n    def init_weights(self, pretrained):\n        \"\"\"Initialize weights, skip since ``H3DROIHead`` does not need to\n        initialize weights.\"\"\"\n        pass\n\n    def init_mask_head(self):\n        \"\"\"Initialize mask head, skip since ``H3DROIHead`` does not have\n        one.\"\"\"\n        pass\n\n    def init_bbox_head(self, bbox_head):\n        \"\"\"Initialize box head.\"\"\"\n        bbox_head['train_cfg'] = self.train_cfg\n        bbox_head['test_cfg'] = self.test_cfg\n        self.bbox_head = build_head(bbox_head)\n\n    def init_assigner_sampler(self):\n        \"\"\"Initialize assigner and sampler.\"\"\"\n        pass\n\n    def forward_train(self,\n                      feats_dict,\n                      img_metas,\n                      points,\n                      gt_bboxes_3d,\n                      gt_labels_3d,\n                      pts_semantic_mask,\n                      pts_instance_mask,\n                      gt_bboxes_ignore=None):\n        \"\"\"Training forward function of PartAggregationROIHead.\n\n        Args:\n            feats_dict (dict): Contains features from the first stage.\n            img_metas (list[dict]): Contain pcd and img's meta info.\n            points (list[torch.Tensor]): Input points.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \\\n                bboxes of each sample.\n            gt_labels_3d (list[torch.Tensor]): Labels of each sample.\n            pts_semantic_mask (None | list[torch.Tensor]): Point-wise\n                semantic mask.\n            pts_instance_mask (None | list[torch.Tensor]): Point-wise\n                instance mask.\n            gt_bboxes_ignore (None | list[torch.Tensor]): Specify\n                which bounding.\n\n        Returns:\n            dict: losses from each head.\n        \"\"\"\n        losses = dict()\n\n        sample_mod = self.train_cfg.sample_mod\n        assert sample_mod in ['vote', 'seed', 'random']\n        result_z = self.primitive_z(feats_dict, sample_mod)\n        feats_dict.update(result_z)\n\n        result_xy = self.primitive_xy(feats_dict, sample_mod)\n        feats_dict.update(result_xy)\n\n        result_line = self.primitive_line(feats_dict, sample_mod)\n        feats_dict.update(result_line)\n\n        primitive_loss_inputs = (feats_dict, points, gt_bboxes_3d,\n                                 gt_labels_3d, pts_semantic_mask,\n                                 pts_instance_mask, img_metas,\n                                 gt_bboxes_ignore)\n\n        loss_z = self.primitive_z.loss(*primitive_loss_inputs)\n        losses.update(loss_z)\n\n        loss_xy = self.primitive_xy.loss(*primitive_loss_inputs)\n        losses.update(loss_xy)\n\n        loss_line = self.primitive_line.loss(*primitive_loss_inputs)\n        losses.update(loss_line)\n\n        targets = feats_dict.pop('targets')\n\n        bbox_results = self.bbox_head(feats_dict, sample_mod)\n\n        feats_dict.update(bbox_results)\n        bbox_loss = self.bbox_head.loss(feats_dict, points, gt_bboxes_3d,\n                                        gt_labels_3d, pts_semantic_mask,\n                                        pts_instance_mask, img_metas, targets,\n                                        gt_bboxes_ignore)\n        losses.update(bbox_loss)\n\n        return losses\n\n    def simple_test(self, feats_dict, img_metas, points, rescale=False):\n        \"\"\"Simple testing forward function of PartAggregationROIHead.\n\n        Note:\n            This function assumes that the batch size is 1\n\n        Args:\n            feats_dict (dict): Contains features from the first stage.\n            img_metas (list[dict]): Contain pcd and img's meta info.\n            points (torch.Tensor): Input points.\n            rescale (bool): Whether to rescale results.\n\n        Returns:\n            dict: Bbox results of one frame.\n        \"\"\"\n        sample_mod = self.test_cfg.sample_mod\n        assert sample_mod in ['vote', 'seed', 'random']\n\n        result_z = self.primitive_z(feats_dict, sample_mod)\n        feats_dict.update(result_z)\n\n        result_xy = self.primitive_xy(feats_dict, sample_mod)\n        feats_dict.update(result_xy)\n\n        result_line = self.primitive_line(feats_dict, sample_mod)\n        feats_dict.update(result_line)\n\n        bbox_preds = self.bbox_head(feats_dict, sample_mod)\n        feats_dict.update(bbox_preds)\n        bbox_list = self.bbox_head.get_bboxes(\n            points,\n            feats_dict,\n            img_metas,\n            rescale=rescale,\n            suffix='_optimized')\n        bbox_results = [\n            bbox3d2result(bboxes, scores, labels)\n            for bboxes, scores, labels in bbox_list\n        ]\n        return bbox_results\n"
  },
  {
    "path": "mmdet3d/models/roi_heads/mask_heads/__init__.py",
    "content": "from .pointwise_semantic_head import PointwiseSemanticHead\nfrom .primitive_head import PrimitiveHead\n\n__all__ = ['PointwiseSemanticHead', 'PrimitiveHead']\n"
  },
  {
    "path": "mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py",
    "content": "import torch\nfrom torch import nn as nn\nfrom torch.nn import functional as F\n\nfrom mmdet3d.core.bbox.structures import rotation_3d_in_axis\nfrom mmdet3d.models.builder import build_loss\nfrom mmdet.core import multi_apply\nfrom mmdet.models import HEADS\n\n\n@HEADS.register_module()\nclass PointwiseSemanticHead(nn.Module):\n    \"\"\"Semantic segmentation head for point-wise segmentation.\n\n    Predict point-wise segmentation and part regression results for PartA2.\n    See `paper <https://arxiv.org/abs/1907.03670>`_ for more detials.\n\n    Args:\n        in_channels (int): The number of input channel.\n        num_classes (int): The number of class.\n        extra_width (float): Boxes enlarge width.\n        loss_seg (dict): Config of segmentation loss.\n        loss_part (dict): Config of part prediction loss.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 num_classes=3,\n                 extra_width=0.2,\n                 seg_score_thr=0.3,\n                 loss_seg=dict(\n                     type='FocalLoss',\n                     use_sigmoid=True,\n                     reduction='sum',\n                     gamma=2.0,\n                     alpha=0.25,\n                     loss_weight=1.0),\n                 loss_part=dict(\n                     type='CrossEntropyLoss',\n                     use_sigmoid=True,\n                     loss_weight=1.0)):\n        super(PointwiseSemanticHead, self).__init__()\n        self.extra_width = extra_width\n        self.num_classes = num_classes\n        self.seg_score_thr = seg_score_thr\n        self.seg_cls_layer = nn.Linear(in_channels, 1, bias=True)\n        self.seg_reg_layer = nn.Linear(in_channels, 3, bias=True)\n\n        self.loss_seg = build_loss(loss_seg)\n        self.loss_part = build_loss(loss_part)\n\n    def forward(self, x):\n        \"\"\"Forward pass.\n\n        Args:\n            x (torch.Tensor): Features from the first stage.\n\n        Returns:\n            dict: Part features, segmentation and part predictions.\n\n                - seg_preds (torch.Tensor): Segment predictions.\n                - part_preds (torch.Tensor): Part predictions.\n                - part_feats (torch.Tensor): Feature predictions.\n        \"\"\"\n        seg_preds = self.seg_cls_layer(x)  # (N, 1)\n        part_preds = self.seg_reg_layer(x)  # (N, 3)\n\n        seg_scores = torch.sigmoid(seg_preds).detach()\n        seg_mask = (seg_scores > self.seg_score_thr)\n\n        part_offsets = torch.sigmoid(part_preds).clone().detach()\n        part_offsets[seg_mask.view(-1) == 0] = 0\n        part_feats = torch.cat((part_offsets, seg_scores),\n                               dim=-1)  # shape (npoints, 4)\n        return dict(\n            seg_preds=seg_preds, part_preds=part_preds, part_feats=part_feats)\n\n    def get_targets_single(self, voxel_centers, gt_bboxes_3d, gt_labels_3d):\n        \"\"\"generate segmentation and part prediction targets for a single\n        sample.\n\n        Args:\n            voxel_centers (torch.Tensor): The center of voxels in shape \\\n                (voxel_num, 3).\n            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in \\\n                shape (box_num, 7).\n            gt_labels_3d (torch.Tensor): Class labels of ground truths in \\\n                shape (box_num).\n\n        Returns:\n            tuple[torch.Tensor]: Segmentation targets with shape [voxel_num] \\\n                part prediction targets with shape [voxel_num, 3]\n        \"\"\"\n        gt_bboxes_3d = gt_bboxes_3d.to(voxel_centers.device)\n        enlarged_gt_boxes = gt_bboxes_3d.enlarged_box(self.extra_width)\n\n        part_targets = voxel_centers.new_zeros((voxel_centers.shape[0], 3),\n                                               dtype=torch.float32)\n        box_idx = gt_bboxes_3d.points_in_boxes(voxel_centers)\n        enlarge_box_idx = enlarged_gt_boxes.points_in_boxes(\n            voxel_centers).long()\n\n        gt_labels_pad = F.pad(\n            gt_labels_3d, (1, 0), mode='constant', value=self.num_classes)\n        seg_targets = gt_labels_pad[(box_idx.long() + 1)]\n        fg_pt_flag = box_idx > -1\n        ignore_flag = fg_pt_flag ^ (enlarge_box_idx > -1)\n        seg_targets[ignore_flag] = -1\n\n        for k in range(len(gt_bboxes_3d)):\n            k_box_flag = box_idx == k\n            # no point in current box (caused by velodyne reduce)\n            if not k_box_flag.any():\n                continue\n            fg_voxels = voxel_centers[k_box_flag]\n            transformed_voxels = fg_voxels - gt_bboxes_3d.bottom_center[k]\n            transformed_voxels = rotation_3d_in_axis(\n                transformed_voxels.unsqueeze(0),\n                -gt_bboxes_3d.yaw[k].view(1),\n                axis=2)\n            part_targets[k_box_flag] = transformed_voxels / gt_bboxes_3d.dims[\n                k] + voxel_centers.new_tensor([0.5, 0.5, 0])\n\n        part_targets = torch.clamp(part_targets, min=0)\n        return seg_targets, part_targets\n\n    def get_targets(self, voxels_dict, gt_bboxes_3d, gt_labels_3d):\n        \"\"\"generate segmentation and part prediction targets.\n\n        Args:\n            voxel_centers (torch.Tensor): The center of voxels in shape \\\n                (voxel_num, 3).\n            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in \\\n                shape (box_num, 7).\n            gt_labels_3d (torch.Tensor): Class labels of ground truths in \\\n                shape (box_num).\n\n        Returns:\n            dict: Prediction targets\n\n                - seg_targets (torch.Tensor): Segmentation targets \\\n                    with shape [voxel_num].\n                - part_targets (torch.Tensor): Part prediction targets \\\n                    with shape [voxel_num, 3].\n        \"\"\"\n        batch_size = len(gt_labels_3d)\n        voxel_center_list = []\n        for idx in range(batch_size):\n            coords_idx = voxels_dict['coors'][:, 0] == idx\n            voxel_center_list.append(voxels_dict['voxel_centers'][coords_idx])\n\n        seg_targets, part_targets = multi_apply(self.get_targets_single,\n                                                voxel_center_list,\n                                                gt_bboxes_3d, gt_labels_3d)\n        seg_targets = torch.cat(seg_targets, dim=0)\n        part_targets = torch.cat(part_targets, dim=0)\n        return dict(seg_targets=seg_targets, part_targets=part_targets)\n\n    def loss(self, semantic_results, semantic_targets):\n        \"\"\"Calculate point-wise segmentation and part prediction losses.\n\n        Args:\n            semantic_results (dict): Results from semantic head.\n\n                - seg_preds: Segmentation predictions.\n                - part_preds: Part predictions.\n\n            semantic_targets (dict): Targets of semantic results.\n\n                - seg_preds: Segmentation targets.\n                - part_preds: Part targets.\n\n        Returns:\n            dict: Loss of segmentation and part prediction.\n\n                - loss_seg (torch.Tensor): Segmentation prediction loss.\n                - loss_part (torch.Tensor): Part prediction loss.\n        \"\"\"\n        seg_preds = semantic_results['seg_preds']\n        part_preds = semantic_results['part_preds']\n        seg_targets = semantic_targets['seg_targets']\n        part_targets = semantic_targets['part_targets']\n\n        pos_mask = (seg_targets > -1) & (seg_targets < self.num_classes)\n        binary_seg_target = pos_mask.long()\n        pos = pos_mask.float()\n        neg = (seg_targets == self.num_classes).float()\n        seg_weights = pos + neg\n        pos_normalizer = pos.sum()\n        seg_weights = seg_weights / torch.clamp(pos_normalizer, min=1.0)\n        loss_seg = self.loss_seg(seg_preds, binary_seg_target, seg_weights)\n\n        if pos_normalizer > 0:\n            loss_part = self.loss_part(part_preds[pos_mask],\n                                       part_targets[pos_mask])\n        else:\n            # fake a part loss\n            loss_part = loss_seg.new_tensor(0)\n\n        return dict(loss_seg=loss_seg, loss_part=loss_part)\n"
  },
  {
    "path": "mmdet3d/models/roi_heads/mask_heads/primitive_head.py",
    "content": "import torch\nfrom mmcv.cnn import ConvModule\nfrom torch import nn as nn\nfrom torch.nn import functional as F\n\nfrom mmdet3d.models.builder import build_loss\nfrom mmdet3d.models.model_utils import VoteModule\nfrom mmdet3d.ops import build_sa_module, furthest_point_sample\nfrom mmdet.core import multi_apply\nfrom mmdet.models import HEADS\n\n\n@HEADS.register_module()\nclass PrimitiveHead(nn.Module):\n    r\"\"\"Primitive head of `H3DNet <https://arxiv.org/abs/2006.05682>`_.\n\n    Args:\n        num_dims (int): The dimension of primitive semantic information.\n        num_classes (int): The number of class.\n        primitive_mode (str): The mode of primitive module,\n            avaliable mode ['z', 'xy', 'line'].\n        bbox_coder (:obj:`BaseBBoxCoder`): Bbox coder for encoding and\n            decoding boxes.\n        train_cfg (dict): Config for training.\n        test_cfg (dict): Config for testing.\n        vote_module_cfg (dict): Config of VoteModule for point-wise votes.\n        vote_aggregation_cfg (dict): Config of vote aggregation layer.\n        feat_channels (tuple[int]): Convolution channels of\n            prediction layer.\n        upper_thresh (float): Threshold for line matching.\n        surface_thresh (float): Threshold for suface matching.\n        conv_cfg (dict): Config of convolution in prediction layer.\n        norm_cfg (dict): Config of BN in prediction layer.\n        objectness_loss (dict): Config of objectness loss.\n        center_loss (dict): Config of center loss.\n        semantic_loss (dict): Config of point-wise semantic segmentation loss.\n    \"\"\"\n\n    def __init__(self,\n                 num_dims,\n                 num_classes,\n                 primitive_mode,\n                 train_cfg=None,\n                 test_cfg=None,\n                 vote_module_cfg=None,\n                 vote_aggregation_cfg=None,\n                 feat_channels=(128, 128),\n                 upper_thresh=100.0,\n                 surface_thresh=0.5,\n                 conv_cfg=dict(type='Conv1d'),\n                 norm_cfg=dict(type='BN1d'),\n                 objectness_loss=None,\n                 center_loss=None,\n                 semantic_reg_loss=None,\n                 semantic_cls_loss=None):\n        super(PrimitiveHead, self).__init__()\n        assert primitive_mode in ['z', 'xy', 'line']\n        # The dimension of primitive semantic information.\n        self.num_dims = num_dims\n        self.num_classes = num_classes\n        self.primitive_mode = primitive_mode\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        self.gt_per_seed = vote_module_cfg['gt_per_seed']\n        self.num_proposal = vote_aggregation_cfg['num_point']\n        self.upper_thresh = upper_thresh\n        self.surface_thresh = surface_thresh\n\n        self.objectness_loss = build_loss(objectness_loss)\n        self.center_loss = build_loss(center_loss)\n        self.semantic_reg_loss = build_loss(semantic_reg_loss)\n        self.semantic_cls_loss = build_loss(semantic_cls_loss)\n\n        assert vote_aggregation_cfg['mlp_channels'][0] == vote_module_cfg[\n            'in_channels']\n\n        # Primitive existence flag prediction\n        self.flag_conv = ConvModule(\n            vote_module_cfg['conv_channels'][-1],\n            vote_module_cfg['conv_channels'][-1] // 2,\n            1,\n            padding=0,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg,\n            bias=True,\n            inplace=True)\n        self.flag_pred = torch.nn.Conv1d(\n            vote_module_cfg['conv_channels'][-1] // 2, 2, 1)\n\n        self.vote_module = VoteModule(**vote_module_cfg)\n        self.vote_aggregation = build_sa_module(vote_aggregation_cfg)\n\n        prev_channel = vote_aggregation_cfg['mlp_channels'][-1]\n        conv_pred_list = list()\n        for k in range(len(feat_channels)):\n            conv_pred_list.append(\n                ConvModule(\n                    prev_channel,\n                    feat_channels[k],\n                    1,\n                    padding=0,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    bias=True,\n                    inplace=True))\n            prev_channel = feat_channels[k]\n        self.conv_pred = nn.Sequential(*conv_pred_list)\n\n        conv_out_channel = 3 + num_dims + num_classes\n        self.conv_pred.add_module('conv_out',\n                                  nn.Conv1d(prev_channel, conv_out_channel, 1))\n\n    def init_weights(self):\n        \"\"\"Initialize weights of VoteHead.\"\"\"\n        pass\n\n    def forward(self, feats_dict, sample_mod):\n        \"\"\"Forward pass.\n\n        Args:\n            feats_dict (dict): Feature dict from backbone.\n            sample_mod (str): Sample mode for vote aggregation layer.\n                valid modes are \"vote\", \"seed\" and \"random\".\n\n        Returns:\n            dict: Predictions of primitive head.\n        \"\"\"\n        assert sample_mod in ['vote', 'seed', 'random']\n\n        seed_points = feats_dict['fp_xyz_net0'][-1]\n        seed_features = feats_dict['hd_feature']\n        results = {}\n\n        primitive_flag = self.flag_conv(seed_features)\n        primitive_flag = self.flag_pred(primitive_flag)\n\n        results['pred_flag_' + self.primitive_mode] = primitive_flag\n\n        # 1. generate vote_points from seed_points\n        vote_points, vote_features, _ = self.vote_module(\n            seed_points, seed_features)\n        results['vote_' + self.primitive_mode] = vote_points\n        results['vote_features_' + self.primitive_mode] = vote_features\n\n        # 2. aggregate vote_points\n        if sample_mod == 'vote':\n            # use fps in vote_aggregation\n            sample_indices = None\n        elif sample_mod == 'seed':\n            # FPS on seed and choose the votes corresponding to the seeds\n            sample_indices = furthest_point_sample(seed_points,\n                                                   self.num_proposal)\n        elif sample_mod == 'random':\n            # Random sampling from the votes\n            batch_size, num_seed = seed_points.shape[:2]\n            sample_indices = torch.randint(\n                0,\n                num_seed, (batch_size, self.num_proposal),\n                dtype=torch.int32,\n                device=seed_points.device)\n        else:\n            raise NotImplementedError('Unsupported sample mod!')\n\n        vote_aggregation_ret = self.vote_aggregation(vote_points,\n                                                     vote_features,\n                                                     sample_indices)\n        aggregated_points, features, aggregated_indices = vote_aggregation_ret\n        results['aggregated_points_' + self.primitive_mode] = aggregated_points\n        results['aggregated_features_' + self.primitive_mode] = features\n        results['aggregated_indices_' +\n                self.primitive_mode] = aggregated_indices\n\n        # 3. predict primitive offsets and semantic information\n        predictions = self.conv_pred(features)\n\n        # 4. decode predictions\n        decode_ret = self.primitive_decode_scores(predictions,\n                                                  aggregated_points)\n        results.update(decode_ret)\n\n        center, pred_ind = self.get_primitive_center(\n            primitive_flag, decode_ret['center_' + self.primitive_mode])\n\n        results['pred_' + self.primitive_mode + '_ind'] = pred_ind\n        results['pred_' + self.primitive_mode + '_center'] = center\n        return results\n\n    def loss(self,\n             bbox_preds,\n             points,\n             gt_bboxes_3d,\n             gt_labels_3d,\n             pts_semantic_mask=None,\n             pts_instance_mask=None,\n             img_metas=None,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute loss.\n\n        Args:\n            bbox_preds (dict): Predictions from forward of primitive head.\n            points (list[torch.Tensor]): Input points.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \\\n                bboxes of each sample.\n            gt_labels_3d (list[torch.Tensor]): Labels of each sample.\n            pts_semantic_mask (None | list[torch.Tensor]): Point-wise\n                semantic mask.\n            pts_instance_mask (None | list[torch.Tensor]): Point-wise\n                instance mask.\n            img_metas (list[dict]): Contain pcd and img's meta info.\n            gt_bboxes_ignore (None | list[torch.Tensor]): Specify\n                which bounding.\n\n        Returns:\n            dict: Losses of Primitive Head.\n        \"\"\"\n        targets = self.get_targets(points, gt_bboxes_3d, gt_labels_3d,\n                                   pts_semantic_mask, pts_instance_mask,\n                                   bbox_preds)\n\n        (point_mask, point_offset, gt_primitive_center, gt_primitive_semantic,\n         gt_sem_cls_label, gt_primitive_mask) = targets\n\n        losses = {}\n        # Compute the loss of primitive existence flag\n        pred_flag = bbox_preds['pred_flag_' + self.primitive_mode]\n        flag_loss = self.objectness_loss(pred_flag, gt_primitive_mask.long())\n        losses['flag_loss_' + self.primitive_mode] = flag_loss\n\n        # calculate vote loss\n        vote_loss = self.vote_module.get_loss(\n            bbox_preds['seed_points'],\n            bbox_preds['vote_' + self.primitive_mode],\n            bbox_preds['seed_indices'], point_mask, point_offset)\n        losses['vote_loss_' + self.primitive_mode] = vote_loss\n\n        num_proposal = bbox_preds['aggregated_points_' +\n                                  self.primitive_mode].shape[1]\n        primitive_center = bbox_preds['center_' + self.primitive_mode]\n        if self.primitive_mode != 'line':\n            primitive_semantic = bbox_preds['size_residuals_' +\n                                            self.primitive_mode].contiguous()\n        else:\n            primitive_semantic = None\n        semancitc_scores = bbox_preds['sem_cls_scores_' +\n                                      self.primitive_mode].transpose(2, 1)\n\n        gt_primitive_mask = gt_primitive_mask / \\\n            (gt_primitive_mask.sum() + 1e-6)\n        center_loss, size_loss, sem_cls_loss = self.compute_primitive_loss(\n            primitive_center, primitive_semantic, semancitc_scores,\n            num_proposal, gt_primitive_center, gt_primitive_semantic,\n            gt_sem_cls_label, gt_primitive_mask)\n        losses['center_loss_' + self.primitive_mode] = center_loss\n        losses['size_loss_' + self.primitive_mode] = size_loss\n        losses['sem_loss_' + self.primitive_mode] = sem_cls_loss\n\n        return losses\n\n    def get_targets(self,\n                    points,\n                    gt_bboxes_3d,\n                    gt_labels_3d,\n                    pts_semantic_mask=None,\n                    pts_instance_mask=None,\n                    bbox_preds=None):\n        \"\"\"Generate targets of primitive head.\n\n        Args:\n            points (list[torch.Tensor]): Points of each batch.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \\\n                bboxes of each batch.\n            gt_labels_3d (list[torch.Tensor]): Labels of each batch.\n            pts_semantic_mask (None | list[torch.Tensor]): Point-wise semantic\n                label of each batch.\n            pts_instance_mask (None | list[torch.Tensor]): Point-wise instance\n                label of each batch.\n            bbox_preds (dict): Predictions from forward of primitive head.\n\n        Returns:\n            tuple[torch.Tensor]: Targets of primitive head.\n        \"\"\"\n        for index in range(len(gt_labels_3d)):\n            if len(gt_labels_3d[index]) == 0:\n                fake_box = gt_bboxes_3d[index].tensor.new_zeros(\n                    1, gt_bboxes_3d[index].tensor.shape[-1])\n                gt_bboxes_3d[index] = gt_bboxes_3d[index].new_box(fake_box)\n                gt_labels_3d[index] = gt_labels_3d[index].new_zeros(1)\n\n        if pts_semantic_mask is None:\n            pts_semantic_mask = [None for i in range(len(gt_labels_3d))]\n            pts_instance_mask = [None for i in range(len(gt_labels_3d))]\n\n        (point_mask, point_sem,\n         point_offset) = multi_apply(self.get_targets_single, points,\n                                     gt_bboxes_3d, gt_labels_3d,\n                                     pts_semantic_mask, pts_instance_mask)\n\n        point_mask = torch.stack(point_mask)\n        point_sem = torch.stack(point_sem)\n        point_offset = torch.stack(point_offset)\n\n        batch_size = point_mask.shape[0]\n        num_proposal = bbox_preds['aggregated_points_' +\n                                  self.primitive_mode].shape[1]\n        num_seed = bbox_preds['seed_points'].shape[1]\n        seed_inds = bbox_preds['seed_indices'].long()\n        seed_inds_expand = seed_inds.view(batch_size, num_seed,\n                                          1).repeat(1, 1, 3)\n        seed_gt_votes = torch.gather(point_offset, 1, seed_inds_expand)\n        seed_gt_votes += bbox_preds['seed_points']\n        gt_primitive_center = seed_gt_votes.view(batch_size * num_proposal, 1,\n                                                 3)\n\n        seed_inds_expand_sem = seed_inds.view(batch_size, num_seed, 1).repeat(\n            1, 1, 4 + self.num_dims)\n        seed_gt_sem = torch.gather(point_sem, 1, seed_inds_expand_sem)\n        gt_primitive_semantic = seed_gt_sem[:, :, 3:3 + self.num_dims].view(\n            batch_size * num_proposal, 1, self.num_dims).contiguous()\n\n        gt_sem_cls_label = seed_gt_sem[:, :, -1].long()\n\n        gt_votes_mask = torch.gather(point_mask, 1, seed_inds)\n\n        return (point_mask, point_offset, gt_primitive_center,\n                gt_primitive_semantic, gt_sem_cls_label, gt_votes_mask)\n\n    def get_targets_single(self,\n                           points,\n                           gt_bboxes_3d,\n                           gt_labels_3d,\n                           pts_semantic_mask=None,\n                           pts_instance_mask=None):\n        \"\"\"Generate targets of primitive head for single batch.\n\n        Args:\n            points (torch.Tensor): Points of each batch.\n            gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth \\\n                boxes of each batch.\n            gt_labels_3d (torch.Tensor): Labels of each batch.\n            pts_semantic_mask (None | torch.Tensor): Point-wise semantic\n                label of each batch.\n            pts_instance_mask (None | torch.Tensor): Point-wise instance\n                label of each batch.\n\n        Returns:\n            tuple[torch.Tensor]: Targets of primitive head.\n        \"\"\"\n        gt_bboxes_3d = gt_bboxes_3d.to(points.device)\n        num_points = points.shape[0]\n\n        point_mask = points.new_zeros(num_points)\n        # Offset to the primitive center\n        point_offset = points.new_zeros([num_points, 3])\n        # Semantic information of primitive center\n        point_sem = points.new_zeros([num_points, 3 + self.num_dims + 1])\n\n        # Generate pts_semantic_mask and pts_instance_mask when they are None\n        if pts_semantic_mask is None or pts_instance_mask is None:\n            points2box_mask = gt_bboxes_3d.points_in_boxes(points)\n            assignment = points2box_mask.argmax(1)\n            background_mask = points2box_mask.max(1)[0] == 0\n\n            if pts_semantic_mask is None:\n                pts_semantic_mask = gt_labels_3d[assignment]\n                pts_semantic_mask[background_mask] = self.num_classes\n\n            if pts_instance_mask is None:\n                pts_instance_mask = assignment\n                pts_instance_mask[background_mask] = gt_labels_3d.shape[0]\n\n        instance_flag = torch.nonzero(\n            pts_semantic_mask != self.num_classes, as_tuple=False).squeeze(1)\n        instance_labels = pts_instance_mask[instance_flag].unique()\n\n        with_yaw = gt_bboxes_3d.with_yaw\n        for i, i_instance in enumerate(instance_labels):\n            indices = instance_flag[pts_instance_mask[instance_flag] ==\n                                    i_instance]\n            coords = points[indices, :3]\n            cur_cls_label = pts_semantic_mask[indices][0]\n\n            # Bbox Corners\n            cur_corners = gt_bboxes_3d.corners[i]\n\n            plane_lower_temp = points.new_tensor(\n                [0, 0, 1, -cur_corners[7, -1]])\n            upper_points = cur_corners[[1, 2, 5, 6]]\n            refined_distance = (upper_points * plane_lower_temp[:3]).sum(dim=1)\n\n            if self.check_horizon(upper_points) and \\\n                    plane_lower_temp[0] + plane_lower_temp[1] < \\\n                    self.train_cfg['lower_thresh']:\n                plane_lower = points.new_tensor(\n                    [0, 0, 1, plane_lower_temp[-1]])\n                plane_upper = points.new_tensor(\n                    [0, 0, 1, -torch.mean(refined_distance)])\n            else:\n                raise NotImplementedError('Only horizontal plane is support!')\n\n            if self.check_dist(plane_upper, upper_points) is False:\n                raise NotImplementedError(\n                    'Mean distance to plane should be lower than thresh!')\n\n            # Get the boundary points here\n            point2plane_dist, selected = self.match_point2plane(\n                plane_lower, coords)\n\n            # Get bottom four lines\n            if self.primitive_mode == 'line':\n                point2line_matching = self.match_point2line(\n                    coords[selected], cur_corners, with_yaw, mode='bottom')\n\n                point_mask, point_offset, point_sem = \\\n                    self._assign_primitive_line_targets(point_mask,\n                                                        point_offset,\n                                                        point_sem,\n                                                        coords[selected],\n                                                        indices[selected],\n                                                        cur_cls_label,\n                                                        point2line_matching,\n                                                        cur_corners,\n                                                        [1, 1, 0, 0],\n                                                        with_yaw,\n                                                        mode='bottom')\n\n            # Set the surface labels here\n            if self.primitive_mode == 'z' and \\\n                    selected.sum() > self.train_cfg['num_point'] and \\\n                    point2plane_dist[selected].var() < \\\n                    self.train_cfg['var_thresh']:\n\n                point_mask, point_offset, point_sem = \\\n                    self._assign_primitive_surface_targets(point_mask,\n                                                           point_offset,\n                                                           point_sem,\n                                                           coords[selected],\n                                                           indices[selected],\n                                                           cur_cls_label,\n                                                           cur_corners,\n                                                           with_yaw,\n                                                           mode='bottom')\n\n            # Get the boundary points here\n            point2plane_dist, selected = self.match_point2plane(\n                plane_upper, coords)\n\n            # Get top four lines\n            if self.primitive_mode == 'line':\n                point2line_matching = self.match_point2line(\n                    coords[selected], cur_corners, with_yaw, mode='top')\n\n                point_mask, point_offset, point_sem = \\\n                    self._assign_primitive_line_targets(point_mask,\n                                                        point_offset,\n                                                        point_sem,\n                                                        coords[selected],\n                                                        indices[selected],\n                                                        cur_cls_label,\n                                                        point2line_matching,\n                                                        cur_corners,\n                                                        [1, 1, 0, 0],\n                                                        with_yaw,\n                                                        mode='top')\n\n            if self.primitive_mode == 'z' and \\\n                    selected.sum() > self.train_cfg['num_point'] and \\\n                    point2plane_dist[selected].var() < \\\n                    self.train_cfg['var_thresh']:\n\n                point_mask, point_offset, point_sem = \\\n                    self._assign_primitive_surface_targets(point_mask,\n                                                           point_offset,\n                                                           point_sem,\n                                                           coords[selected],\n                                                           indices[selected],\n                                                           cur_cls_label,\n                                                           cur_corners,\n                                                           with_yaw,\n                                                           mode='top')\n\n            # Get left two lines\n            plane_left_temp = self._get_plane_fomulation(\n                cur_corners[2] - cur_corners[3],\n                cur_corners[3] - cur_corners[0], cur_corners[0])\n\n            right_points = cur_corners[[4, 5, 7, 6]]\n            plane_left_temp /= torch.norm(plane_left_temp[:3])\n            refined_distance = (right_points * plane_left_temp[:3]).sum(dim=1)\n\n            if plane_left_temp[2] < self.train_cfg['lower_thresh']:\n                plane_left = plane_left_temp\n                plane_right = points.new_tensor([\n                    plane_left_temp[0], plane_left_temp[1], plane_left_temp[2],\n                    -refined_distance.mean()\n                ])\n            else:\n                raise NotImplementedError(\n                    'Normal vector of the plane should be horizontal!')\n\n            # Get the boundary points here\n            point2plane_dist, selected = self.match_point2plane(\n                plane_left, coords)\n\n            # Get left four lines\n            if self.primitive_mode == 'line':\n                point2line_matching = self.match_point2line(\n                    coords[selected], cur_corners, with_yaw, mode='left')\n                point_mask, point_offset, point_sem = \\\n                    self._assign_primitive_line_targets(\n                        point_mask, point_offset, point_sem,\n                        coords[selected], indices[selected], cur_cls_label,\n                        point2line_matching[2:], cur_corners, [2, 2],\n                        with_yaw, mode='left')\n\n            if self.primitive_mode == 'xy' and \\\n                    selected.sum() > self.train_cfg['num_point'] and \\\n                    point2plane_dist[selected].var() < \\\n                    self.train_cfg['var_thresh']:\n\n                point_mask, point_offset, point_sem = \\\n                    self._assign_primitive_surface_targets(\n                        point_mask, point_offset, point_sem,\n                        coords[selected], indices[selected], cur_cls_label,\n                        cur_corners, with_yaw, mode='left')\n\n            # Get the boundary points here\n            point2plane_dist, selected = self.match_point2plane(\n                plane_right, coords)\n\n            # Get right four lines\n            if self.primitive_mode == 'line':\n                point2line_matching = self.match_point2line(\n                    coords[selected], cur_corners, with_yaw, mode='right')\n\n                point_mask, point_offset, point_sem = \\\n                    self._assign_primitive_line_targets(\n                        point_mask, point_offset, point_sem,\n                        coords[selected], indices[selected], cur_cls_label,\n                        point2line_matching[2:], cur_corners, [2, 2],\n                        with_yaw, mode='right')\n\n            if self.primitive_mode == 'xy' and \\\n                    selected.sum() > self.train_cfg['num_point'] and \\\n                    point2plane_dist[selected].var() < \\\n                    self.train_cfg['var_thresh']:\n\n                point_mask, point_offset, point_sem = \\\n                    self._assign_primitive_surface_targets(\n                        point_mask, point_offset, point_sem,\n                        coords[selected], indices[selected], cur_cls_label,\n                        cur_corners, with_yaw, mode='right')\n\n            plane_front_temp = self._get_plane_fomulation(\n                cur_corners[0] - cur_corners[4],\n                cur_corners[4] - cur_corners[5], cur_corners[5])\n\n            back_points = cur_corners[[3, 2, 7, 6]]\n            plane_front_temp /= torch.norm(plane_front_temp[:3])\n            refined_distance = (back_points * plane_front_temp[:3]).sum(dim=1)\n\n            if plane_front_temp[2] < self.train_cfg['lower_thresh']:\n                plane_front = plane_front_temp\n                plane_back = points.new_tensor([\n                    plane_front_temp[0], plane_front_temp[1],\n                    plane_front_temp[2], -torch.mean(refined_distance)\n                ])\n            else:\n                raise NotImplementedError(\n                    'Normal vector of the plane should be horizontal!')\n\n            # Get the boundary points here\n            point2plane_dist, selected = self.match_point2plane(\n                plane_front, coords)\n\n            if self.primitive_mode == 'xy' and \\\n                    selected.sum() > self.train_cfg['num_point'] and \\\n                    (point2plane_dist[selected]).var() < \\\n                    self.train_cfg['var_thresh']:\n\n                point_mask, point_offset, point_sem = \\\n                    self._assign_primitive_surface_targets(\n                        point_mask, point_offset, point_sem,\n                        coords[selected], indices[selected], cur_cls_label,\n                        cur_corners, with_yaw, mode='front')\n\n            # Get the boundary points here\n            point2plane_dist, selected = self.match_point2plane(\n                plane_back, coords)\n\n            if self.primitive_mode == 'xy' and \\\n                    selected.sum() > self.train_cfg['num_point'] and \\\n                    point2plane_dist[selected].var() < \\\n                    self.train_cfg['var_thresh']:\n\n                point_mask, point_offset, point_sem = \\\n                    self._assign_primitive_surface_targets(\n                        point_mask, point_offset, point_sem,\n                        coords[selected], indices[selected], cur_cls_label,\n                        cur_corners, with_yaw, mode='back')\n\n        return (point_mask, point_sem, point_offset)\n\n    def primitive_decode_scores(self, predictions, aggregated_points):\n        \"\"\"Decode predicted parts to primitive head.\n\n        Args:\n            predictions (torch.Tensor): primitive pridictions of each batch.\n            aggregated_points (torch.Tensor): The aggregated points\n                of vote stage.\n\n        Returns:\n            Dict: Predictions of primitive head, including center,\n                semantic size and semantic scores.\n        \"\"\"\n\n        ret_dict = {}\n        pred_transposed = predictions.transpose(2, 1)\n\n        center = aggregated_points + pred_transposed[:, :, 0:3]\n        ret_dict['center_' + self.primitive_mode] = center\n\n        if self.primitive_mode in ['z', 'xy']:\n            ret_dict['size_residuals_' + self.primitive_mode] = \\\n                pred_transposed[:, :, 3:3 + self.num_dims]\n\n        ret_dict['sem_cls_scores_' + self.primitive_mode] = \\\n            pred_transposed[:, :, 3 + self.num_dims:]\n\n        return ret_dict\n\n    def check_horizon(self, points):\n        \"\"\"Check whether is a horizontal plane.\n\n        Args:\n            points (torch.Tensor): Points of input.\n\n        Returns:\n            Bool: Flag of result.\n        \"\"\"\n        return (points[0][-1] == points[1][-1]) and \\\n               (points[1][-1] == points[2][-1]) and \\\n               (points[2][-1] == points[3][-1])\n\n    def check_dist(self, plane_equ, points):\n        \"\"\"Whether the mean of points to plane distance is lower than thresh.\n\n        Args:\n            plane_equ (torch.Tensor): Plane to be checked.\n            points (torch.Tensor): Points to be checked.\n\n        Returns:\n            Tuple: Flag of result.\n        \"\"\"\n        return (points[:, 2] +\n                plane_equ[-1]).sum() / 4.0 < self.train_cfg['lower_thresh']\n\n    def point2line_dist(self, points, pts_a, pts_b):\n        \"\"\"Calculate the distance from point to line.\n\n        Args:\n            points (torch.Tensor): Points of input.\n            pts_a (torch.Tensor): Point on the specific line.\n            pts_b (torch.Tensor): Point on the specific line.\n\n        Returns:\n            torch.Tensor: Distance between each point to line.\n        \"\"\"\n        line_a2b = pts_b - pts_a\n        line_a2pts = points - pts_a\n        length = (line_a2pts * line_a2b.view(1, 3)).sum(1) / \\\n            line_a2b.norm()\n        dist = (line_a2pts.norm(dim=1)**2 - length**2).sqrt()\n\n        return dist\n\n    def match_point2line(self, points, corners, with_yaw, mode='bottom'):\n        \"\"\"Match points to corresponding line.\n\n        Args:\n            points (torch.Tensor): Points of input.\n            corners (torch.Tensor): Eight corners of a bounding box.\n            with_yaw (Bool): Whether the boundind box is with rotation.\n            mode (str, optional): Specify which line should be matched,\n                available mode are ('bottom', 'top', 'left', 'right').\n                Defaults to 'bottom'.\n\n        Returns:\n            Tuple: Flag of matching correspondence.\n        \"\"\"\n        if with_yaw:\n            corners_pair = {\n                'bottom': [[0, 3], [4, 7], [0, 4], [3, 7]],\n                'top': [[1, 2], [5, 6], [1, 5], [2, 6]],\n                'left': [[0, 1], [3, 2], [0, 1], [3, 2]],\n                'right': [[4, 5], [7, 6], [4, 5], [7, 6]]\n            }\n            selected_list = []\n            for pair_index in corners_pair[mode]:\n                selected = self.point2line_dist(\n                    points, corners[pair_index[0]], corners[pair_index[1]]) \\\n                    < self.train_cfg['line_thresh']\n                selected_list.append(selected)\n        else:\n            xmin, ymin, _ = corners.min(0)[0]\n            xmax, ymax, _ = corners.max(0)[0]\n            sel1 = torch.abs(points[:, 0] -\n                             xmin) < self.train_cfg['line_thresh']\n            sel2 = torch.abs(points[:, 0] -\n                             xmax) < self.train_cfg['line_thresh']\n            sel3 = torch.abs(points[:, 1] -\n                             ymin) < self.train_cfg['line_thresh']\n            sel4 = torch.abs(points[:, 1] -\n                             ymax) < self.train_cfg['line_thresh']\n            selected_list = [sel1, sel2, sel3, sel4]\n        return selected_list\n\n    def match_point2plane(self, plane, points):\n        \"\"\"Match points to plane.\n\n        Args:\n            plane (torch.Tensor): Equation of the plane.\n            points (torch.Tensor): Points of input.\n\n        Returns:\n            Tuple: Distance of each point to the plane and\n                flag of matching correspondence.\n        \"\"\"\n        point2plane_dist = torch.abs((points * plane[:3]).sum(dim=1) +\n                                     plane[-1])\n        min_dist = point2plane_dist.min()\n        selected = torch.abs(point2plane_dist -\n                             min_dist) < self.train_cfg['dist_thresh']\n        return point2plane_dist, selected\n\n    def compute_primitive_loss(self, primitive_center, primitive_semantic,\n                               semantic_scores, num_proposal,\n                               gt_primitive_center, gt_primitive_semantic,\n                               gt_sem_cls_label, gt_primitive_mask):\n        \"\"\"Compute loss of primitive module.\n\n        Args:\n            primitive_center (torch.Tensor): Pridictions of primitive center.\n            primitive_semantic (torch.Tensor): Pridictions of primitive\n                semantic.\n            semantic_scores (torch.Tensor): Pridictions of primitive\n                semantic scores.\n            num_proposal (int): The number of primitive proposal.\n            gt_primitive_center (torch.Tensor): Ground truth of\n                primitive center.\n            gt_votes_sem (torch.Tensor): Ground truth of primitive semantic.\n            gt_sem_cls_label (torch.Tensor): Ground truth of primitive\n                semantic class.\n            gt_primitive_mask (torch.Tensor): Ground truth of primitive mask.\n\n        Returns:\n            Tuple: Loss of primitive module.\n        \"\"\"\n        batch_size = primitive_center.shape[0]\n        vote_xyz_reshape = primitive_center.view(batch_size * num_proposal, -1,\n                                                 3)\n\n        center_loss = self.center_loss(\n            vote_xyz_reshape,\n            gt_primitive_center,\n            dst_weight=gt_primitive_mask.view(batch_size * num_proposal, 1))[1]\n\n        if self.primitive_mode != 'line':\n            size_xyz_reshape = primitive_semantic.view(\n                batch_size * num_proposal, -1, self.num_dims).contiguous()\n            size_loss = self.semantic_reg_loss(\n                size_xyz_reshape,\n                gt_primitive_semantic,\n                dst_weight=gt_primitive_mask.view(batch_size * num_proposal,\n                                                  1))[1]\n        else:\n            size_loss = center_loss.new_tensor(0.0)\n\n        # Semantic cls loss\n        sem_cls_loss = self.semantic_cls_loss(\n            semantic_scores, gt_sem_cls_label, weight=gt_primitive_mask)\n\n        return center_loss, size_loss, sem_cls_loss\n\n    def get_primitive_center(self, pred_flag, center):\n        \"\"\"Generate primitive center from predictions.\n\n        Args:\n            pred_flag (torch.Tensor): Scores of primitive center.\n            center (torch.Tensor): Pridictions of primitive center.\n\n        Returns:\n            Tuple: Primitive center and the prediction indices.\n        \"\"\"\n        ind_normal = F.softmax(pred_flag, dim=1)\n        pred_indices = (ind_normal[:, 1, :] >\n                        self.surface_thresh).detach().float()\n        selected = (ind_normal[:, 1, :] <=\n                    self.surface_thresh).detach().float()\n        offset = torch.ones_like(center) * self.upper_thresh\n        center = center + offset * selected.unsqueeze(-1)\n        return center, pred_indices\n\n    def _assign_primitive_line_targets(self,\n                                       point_mask,\n                                       point_offset,\n                                       point_sem,\n                                       coords,\n                                       indices,\n                                       cls_label,\n                                       point2line_matching,\n                                       corners,\n                                       center_axises,\n                                       with_yaw,\n                                       mode='bottom'):\n        \"\"\"Generate targets of line primitive.\n\n        Args:\n            point_mask (torch.Tensor): Tensor to store the ground\n                truth of mask.\n            point_offset (torch.Tensor): Tensor to store the ground\n                truth of offset.\n            point_sem (torch.Tensor): Tensor to store the ground\n                truth of semantic.\n            coords (torch.Tensor): The selected points.\n            indices (torch.Tensor): Indices of the selected points.\n            cls_label (int): Class label of the ground truth bounding box.\n            point2line_matching (torch.Tensor): Flag indicate that\n                matching line of each point.\n            corners (torch.Tensor): Corners of the ground truth bounding box.\n            center_axises (list[int]): Indicate in which axis the line center\n                should be refined.\n            with_yaw (Bool): Whether the boundind box is with rotation.\n            mode (str, optional): Specify which line should be matched,\n                available mode are ('bottom', 'top', 'left', 'right').\n                Defaults to 'bottom'.\n\n        Returns:\n            Tuple: Targets of the line primitive.\n        \"\"\"\n        corners_pair = {\n            'bottom': [[0, 3], [4, 7], [0, 4], [3, 7]],\n            'top': [[1, 2], [5, 6], [1, 5], [2, 6]],\n            'left': [[0, 1], [3, 2]],\n            'right': [[4, 5], [7, 6]]\n        }\n        corners_pair = corners_pair[mode]\n        assert len(corners_pair) == len(point2line_matching) == len(\n            center_axises)\n        for line_select, center_axis, pair_index in zip(\n                point2line_matching, center_axises, corners_pair):\n            if line_select.sum() > self.train_cfg['num_point_line']:\n                point_mask[indices[line_select]] = 1.0\n\n                if with_yaw:\n                    line_center = (corners[pair_index[0]] +\n                                   corners[pair_index[1]]) / 2\n                else:\n                    line_center = coords[line_select].mean(dim=0)\n                    line_center[center_axis] = corners[:, center_axis].mean()\n\n                point_offset[indices[line_select]] = \\\n                    line_center - coords[line_select]\n                point_sem[indices[line_select]] = \\\n                    point_sem.new_tensor([line_center[0], line_center[1],\n                                          line_center[2], cls_label])\n        return point_mask, point_offset, point_sem\n\n    def _assign_primitive_surface_targets(self,\n                                          point_mask,\n                                          point_offset,\n                                          point_sem,\n                                          coords,\n                                          indices,\n                                          cls_label,\n                                          corners,\n                                          with_yaw,\n                                          mode='bottom'):\n        \"\"\"Generate targets for primitive z and primitive xy.\n\n        Args:\n            point_mask (torch.Tensor): Tensor to store the ground\n                truth of mask.\n            point_offset (torch.Tensor): Tensor to store the ground\n                truth of offset.\n            point_sem (torch.Tensor): Tensor to store the ground\n                truth of semantic.\n            coords (torch.Tensor): The selected points.\n            indices (torch.Tensor): Indices of the selected points.\n            cls_label (int): Class label of the ground truth bounding box.\n            corners (torch.Tensor): Corners of the ground truth bounding box.\n            with_yaw (Bool): Whether the boundind box is with rotation.\n            mode (str, optional): Specify which line should be matched,\n                available mode are ('bottom', 'top', 'left', 'right',\n                'front', 'back').\n                Defaults to 'bottom'.\n\n        Returns:\n            Tuple: Targets of the center primitive.\n        \"\"\"\n        point_mask[indices] = 1.0\n        corners_pair = {\n            'bottom': [0, 7],\n            'top': [1, 6],\n            'left': [0, 1],\n            'right': [4, 5],\n            'front': [0, 1],\n            'back': [3, 2]\n        }\n        pair_index = corners_pair[mode]\n        if self.primitive_mode == 'z':\n            if with_yaw:\n                center = (corners[pair_index[0]] +\n                          corners[pair_index[1]]) / 2.0\n                center[2] = coords[:, 2].mean()\n                point_sem[indices] = point_sem.new_tensor([\n                    center[0], center[1],\n                    center[2], (corners[4] - corners[0]).norm(),\n                    (corners[3] - corners[0]).norm(), cls_label\n                ])\n            else:\n                center = point_mask.new_tensor([\n                    corners[:, 0].mean(), corners[:, 1].mean(),\n                    coords[:, 2].mean()\n                ])\n                point_sem[indices] = point_sem.new_tensor([\n                    center[0], center[1], center[2],\n                    corners[:, 0].max() - corners[:, 0].min(),\n                    corners[:, 1].max() - corners[:, 1].min(), cls_label\n                ])\n        elif self.primitive_mode == 'xy':\n            if with_yaw:\n                center = coords.mean(0)\n                center[2] = (corners[pair_index[0], 2] +\n                             corners[pair_index[1], 2]) / 2.0\n                point_sem[indices] = point_sem.new_tensor([\n                    center[0], center[1], center[2],\n                    corners[pair_index[1], 2] - corners[pair_index[0], 2],\n                    cls_label\n                ])\n            else:\n                center = point_mask.new_tensor([\n                    coords[:, 0].mean(), coords[:, 1].mean(),\n                    corners[:, 2].mean()\n                ])\n                point_sem[indices] = point_sem.new_tensor([\n                    center[0], center[1], center[2],\n                    corners[:, 2].max() - corners[:, 2].min(), cls_label\n                ])\n        point_offset[indices] = center - coords\n        return point_mask, point_offset, point_sem\n\n    def _get_plane_fomulation(self, vector1, vector2, point):\n        \"\"\"Compute the equation of the plane.\n\n        Args:\n            vector1 (torch.Tensor): Parallel vector of the plane.\n            vector2 (torch.Tensor): Parallel vector of the plane.\n            point (torch.Tensor): Point on the plane.\n\n        Returns:\n            torch.Tensor: Equation of the plane.\n        \"\"\"\n        surface_norm = torch.cross(vector1, vector2)\n        surface_dis = -torch.dot(surface_norm, point)\n        plane = point.new_tensor(\n            [surface_norm[0], surface_norm[1], surface_norm[2], surface_dis])\n        return plane\n"
  },
  {
    "path": "mmdet3d/models/roi_heads/part_aggregation_roi_head.py",
    "content": "from torch.nn import functional as F\n\nfrom mmdet3d.core import AssignResult\nfrom mmdet3d.core.bbox import bbox3d2result, bbox3d2roi\nfrom mmdet.core import build_assigner, build_sampler\nfrom mmdet.models import HEADS\nfrom ..builder import build_head, build_roi_extractor\nfrom .base_3droi_head import Base3DRoIHead\n\n\n@HEADS.register_module()\nclass PartAggregationROIHead(Base3DRoIHead):\n    \"\"\"Part aggregation roi head for PartA2.\n\n    Args:\n        semantic_head (ConfigDict): Config of semantic head.\n        num_classes (int): The number of classes.\n        seg_roi_extractor (ConfigDict): Config of seg_roi_extractor.\n        part_roi_extractor (ConfigDict): Config of part_roi_extractor.\n        bbox_head (ConfigDict): Config of bbox_head.\n        train_cfg (ConfigDict): Training config.\n        test_cfg (ConfigDict): Testing config.\n    \"\"\"\n\n    def __init__(self,\n                 semantic_head,\n                 num_classes=3,\n                 seg_roi_extractor=None,\n                 part_roi_extractor=None,\n                 bbox_head=None,\n                 train_cfg=None,\n                 test_cfg=None):\n        super(PartAggregationROIHead, self).__init__(\n            bbox_head=bbox_head, train_cfg=train_cfg, test_cfg=test_cfg)\n        self.num_classes = num_classes\n        assert semantic_head is not None\n        self.semantic_head = build_head(semantic_head)\n\n        if seg_roi_extractor is not None:\n            self.seg_roi_extractor = build_roi_extractor(seg_roi_extractor)\n        if part_roi_extractor is not None:\n            self.part_roi_extractor = build_roi_extractor(part_roi_extractor)\n\n        self.init_assigner_sampler()\n\n    def init_weights(self, pretrained):\n        \"\"\"Initialize weights, skip since ``PartAggregationROIHead`` does not\n        need to initialize weights.\"\"\"\n        pass\n\n    def init_mask_head(self):\n        \"\"\"Initialize mask head, skip since ``PartAggregationROIHead`` does not\n        have one.\"\"\"\n        pass\n\n    def init_bbox_head(self, bbox_head):\n        \"\"\"Initialize box head.\"\"\"\n        self.bbox_head = build_head(bbox_head)\n\n    def init_assigner_sampler(self):\n        \"\"\"Initialize assigner and sampler.\"\"\"\n        self.bbox_assigner = None\n        self.bbox_sampler = None\n        if self.train_cfg:\n            if isinstance(self.train_cfg.assigner, dict):\n                self.bbox_assigner = build_assigner(self.train_cfg.assigner)\n            elif isinstance(self.train_cfg.assigner, list):\n                self.bbox_assigner = [\n                    build_assigner(res) for res in self.train_cfg.assigner\n                ]\n            self.bbox_sampler = build_sampler(self.train_cfg.sampler)\n\n    @property\n    def with_semantic(self):\n        \"\"\"bool: whether the head has semantic branch\"\"\"\n        return hasattr(self,\n                       'semantic_head') and self.semantic_head is not None\n\n    def forward_train(self, feats_dict, voxels_dict, img_metas, proposal_list,\n                      gt_bboxes_3d, gt_labels_3d):\n        \"\"\"Training forward function of PartAggregationROIHead.\n\n        Args:\n            feats_dict (dict): Contains features from the first stage.\n            voxels_dict (dict): Contains information of voxels.\n            img_metas (list[dict]): Meta info of each image.\n            proposal_list (list[dict]): Proposal information from rpn.\n                The dictionary should contain the following keys:\n\n                - boxes_3d (:obj:`BaseInstance3DBoxes`): Proposal bboxes\n                - labels_3d (torch.Tensor): Labels of proposals\n                - cls_preds (torch.Tensor): Original scores of proposals\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]):\n                GT bboxes of each sample. The bboxes are encapsulated\n                by 3D box structures.\n            gt_labels_3d (list[LongTensor]): GT labels of each sample.\n\n        Returns:\n            dict: losses from each head.\n\n                - loss_semantic (torch.Tensor): loss of semantic head\n                - loss_bbox (torch.Tensor): loss of bboxes\n        \"\"\"\n        losses = dict()\n        if self.with_semantic:\n            semantic_results = self._semantic_forward_train(\n                feats_dict['seg_features'], voxels_dict, gt_bboxes_3d,\n                gt_labels_3d)\n            losses.update(semantic_results['loss_semantic'])\n\n        sample_results = self._assign_and_sample(proposal_list, gt_bboxes_3d,\n                                                 gt_labels_3d)\n        if self.with_bbox:\n            bbox_results = self._bbox_forward_train(\n                feats_dict['seg_features'], semantic_results['part_feats'],\n                voxels_dict, sample_results)\n            losses.update(bbox_results['loss_bbox'])\n\n        return losses\n\n    def simple_test(self, feats_dict, voxels_dict, img_metas, proposal_list,\n                    **kwargs):\n        \"\"\"Simple testing forward function of PartAggregationROIHead.\n\n        Note:\n            This function assumes that the batch size is 1\n\n        Args:\n            feats_dict (dict): Contains features from the first stage.\n            voxels_dict (dict): Contains information of voxels.\n            img_metas (list[dict]): Meta info of each image.\n            proposal_list (list[dict]): Proposal information from rpn.\n\n        Returns:\n            dict: Bbox results of one frame.\n        \"\"\"\n        assert self.with_bbox, 'Bbox head must be implemented.'\n        assert self.with_semantic\n\n        semantic_results = self.semantic_head(feats_dict['seg_features'])\n\n        rois = bbox3d2roi([res['boxes_3d'].tensor for res in proposal_list])\n        labels_3d = [res['labels_3d'] for res in proposal_list]\n        cls_preds = [res['cls_preds'] for res in proposal_list]\n        bbox_results = self._bbox_forward(feats_dict['seg_features'],\n                                          semantic_results['part_feats'],\n                                          voxels_dict, rois)\n\n        bbox_list = self.bbox_head.get_bboxes(\n            rois,\n            bbox_results['cls_score'],\n            bbox_results['bbox_pred'],\n            labels_3d,\n            cls_preds,\n            img_metas,\n            cfg=self.test_cfg)\n\n        bbox_results = [\n            bbox3d2result(bboxes, scores, labels)\n            for bboxes, scores, labels in bbox_list\n        ]\n        return bbox_results\n\n    def _bbox_forward_train(self, seg_feats, part_feats, voxels_dict,\n                            sampling_results):\n        \"\"\"Forward training function of roi_extractor and bbox_head.\n\n        Args:\n            seg_feats (torch.Tensor): Point-wise semantic features.\n            part_feats (torch.Tensor): Point-wise part prediction features.\n            voxels_dict (dict): Contains information of voxels.\n            sampling_results (:obj:`SamplingResult`): Sampled results used\n                for training.\n\n        Returns:\n            dict: Forward results including losses and predictions.\n        \"\"\"\n        rois = bbox3d2roi([res.bboxes for res in sampling_results])\n        bbox_results = self._bbox_forward(seg_feats, part_feats, voxels_dict,\n                                          rois)\n\n        bbox_targets = self.bbox_head.get_targets(sampling_results,\n                                                  self.train_cfg)\n        loss_bbox = self.bbox_head.loss(bbox_results['cls_score'],\n                                        bbox_results['bbox_pred'], rois,\n                                        *bbox_targets)\n\n        bbox_results.update(loss_bbox=loss_bbox)\n        return bbox_results\n\n    def _bbox_forward(self, seg_feats, part_feats, voxels_dict, rois):\n        \"\"\"Forward function of roi_extractor and bbox_head used in both\n        training and testing.\n\n        Args:\n            seg_feats (torch.Tensor): Point-wise semantic features.\n            part_feats (torch.Tensor): Point-wise part prediction features.\n            voxels_dict (dict): Contains information of voxels.\n            rois (Tensor): Roi boxes.\n\n        Returns:\n            dict: Contains predictions of bbox_head and\n                features of roi_extractor.\n        \"\"\"\n        pooled_seg_feats = self.seg_roi_extractor(seg_feats,\n                                                  voxels_dict['voxel_centers'],\n                                                  voxels_dict['coors'][..., 0],\n                                                  rois)\n        pooled_part_feats = self.part_roi_extractor(\n            part_feats, voxels_dict['voxel_centers'],\n            voxels_dict['coors'][..., 0], rois)\n        cls_score, bbox_pred = self.bbox_head(pooled_seg_feats,\n                                              pooled_part_feats)\n\n        bbox_results = dict(\n            cls_score=cls_score,\n            bbox_pred=bbox_pred,\n            pooled_seg_feats=pooled_seg_feats,\n            pooled_part_feats=pooled_part_feats)\n        return bbox_results\n\n    def _assign_and_sample(self, proposal_list, gt_bboxes_3d, gt_labels_3d):\n        \"\"\"Assign and sample proposals for training.\n\n        Args:\n            proposal_list (list[dict]): Proposals produced by RPN.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                boxes.\n            gt_labels_3d (list[torch.Tensor]): Ground truth labels\n\n        Returns:\n            list[:obj:`SamplingResult`]: Sampled results of each training\n                sample.\n        \"\"\"\n        sampling_results = []\n        # bbox assign\n        for batch_idx in range(len(proposal_list)):\n            cur_proposal_list = proposal_list[batch_idx]\n            cur_boxes = cur_proposal_list['boxes_3d']\n            cur_labels_3d = cur_proposal_list['labels_3d']\n            cur_gt_bboxes = gt_bboxes_3d[batch_idx].to(cur_boxes.device)\n            cur_gt_labels = gt_labels_3d[batch_idx]\n\n            batch_num_gts = 0\n            # 0 is bg\n            batch_gt_indis = cur_gt_labels.new_full((len(cur_boxes), ), 0)\n            batch_max_overlaps = cur_boxes.tensor.new_zeros(len(cur_boxes))\n            # -1 is bg\n            batch_gt_labels = cur_gt_labels.new_full((len(cur_boxes), ), -1)\n\n            # each class may have its own assigner\n            if isinstance(self.bbox_assigner, list):\n                for i, assigner in enumerate(self.bbox_assigner):\n                    gt_per_cls = (cur_gt_labels == i)\n                    pred_per_cls = (cur_labels_3d == i)\n                    cur_assign_res = assigner.assign(\n                        cur_boxes.tensor[pred_per_cls],\n                        cur_gt_bboxes.tensor[gt_per_cls],\n                        gt_labels=cur_gt_labels[gt_per_cls])\n                    # gather assign_results in different class into one result\n                    batch_num_gts += cur_assign_res.num_gts\n                    # gt inds (1-based)\n                    gt_inds_arange_pad = gt_per_cls.nonzero(\n                        as_tuple=False).view(-1) + 1\n                    # pad 0 for indice unassigned\n                    gt_inds_arange_pad = F.pad(\n                        gt_inds_arange_pad, (1, 0), mode='constant', value=0)\n                    # pad -1 for indice ignore\n                    gt_inds_arange_pad = F.pad(\n                        gt_inds_arange_pad, (1, 0), mode='constant', value=-1)\n                    # convert to 0~gt_num+2 for indices\n                    gt_inds_arange_pad += 1\n                    # now 0 is bg, >1 is fg in batch_gt_indis\n                    batch_gt_indis[pred_per_cls] = gt_inds_arange_pad[\n                        cur_assign_res.gt_inds + 1] - 1\n                    batch_max_overlaps[\n                        pred_per_cls] = cur_assign_res.max_overlaps\n                    batch_gt_labels[pred_per_cls] = cur_assign_res.labels\n\n                assign_result = AssignResult(batch_num_gts, batch_gt_indis,\n                                             batch_max_overlaps,\n                                             batch_gt_labels)\n            else:  # for single class\n                assign_result = self.bbox_assigner.assign(\n                    cur_boxes.tensor,\n                    cur_gt_bboxes.tensor,\n                    gt_labels=cur_gt_labels)\n            # sample boxes\n            sampling_result = self.bbox_sampler.sample(assign_result,\n                                                       cur_boxes.tensor,\n                                                       cur_gt_bboxes.tensor,\n                                                       cur_gt_labels)\n            sampling_results.append(sampling_result)\n        return sampling_results\n\n    def _semantic_forward_train(self, x, voxels_dict, gt_bboxes_3d,\n                                gt_labels_3d):\n        \"\"\"Train semantic head.\n\n        Args:\n            x (torch.Tensor): Point-wise semantic features for segmentation\n            voxels_dict (dict): Contains information of voxels.\n            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth\n                boxes.\n            gt_labels_3d (list[torch.Tensor]): Ground truth labels\n\n        Returns:\n            dict: Segmentation results including losses\n        \"\"\"\n        semantic_results = self.semantic_head(x)\n        semantic_targets = self.semantic_head.get_targets(\n            voxels_dict, gt_bboxes_3d, gt_labels_3d)\n        loss_semantic = self.semantic_head.loss(semantic_results,\n                                                semantic_targets)\n        semantic_results.update(loss_semantic=loss_semantic)\n        return semantic_results\n"
  },
  {
    "path": "mmdet3d/models/roi_heads/roi_extractors/__init__.py",
    "content": "from mmdet.models.roi_heads.roi_extractors import SingleRoIExtractor\nfrom .single_roiaware_extractor import Single3DRoIAwareExtractor\n\n__all__ = ['SingleRoIExtractor', 'Single3DRoIAwareExtractor']\n"
  },
  {
    "path": "mmdet3d/models/roi_heads/roi_extractors/single_roiaware_extractor.py",
    "content": "import torch\nfrom torch import nn as nn\n\nfrom mmdet3d import ops\nfrom mmdet.models.builder import ROI_EXTRACTORS\n\n\n@ROI_EXTRACTORS.register_module()\nclass Single3DRoIAwareExtractor(nn.Module):\n    \"\"\"Point-wise roi-aware Extractor.\n\n    Extract Point-wise roi features.\n\n    Args:\n        roi_layer (dict): The config of roi layer.\n    \"\"\"\n\n    def __init__(self, roi_layer=None):\n        super(Single3DRoIAwareExtractor, self).__init__()\n        self.roi_layer = self.build_roi_layers(roi_layer)\n\n    def build_roi_layers(self, layer_cfg):\n        \"\"\"Build roi layers using `layer_cfg`\"\"\"\n        cfg = layer_cfg.copy()\n        layer_type = cfg.pop('type')\n        assert hasattr(ops, layer_type)\n        layer_cls = getattr(ops, layer_type)\n        roi_layers = layer_cls(**cfg)\n        return roi_layers\n\n    def forward(self, feats, coordinate, batch_inds, rois):\n        \"\"\"Extract point-wise roi features.\n\n        Args:\n            feats (torch.FloatTensor): Point-wise features with\n                shape (batch, npoints, channels) for pooling.\n            coordinate (torch.FloatTensor): Coordinate of each point.\n            batch_inds (torch.LongTensor): Indicate the batch of each point.\n            rois (torch.FloatTensor): Roi boxes with batch indices.\n\n        Returns:\n            torch.FloatTensor: Pooled features\n        \"\"\"\n        pooled_roi_feats = []\n        for batch_idx in range(int(batch_inds.max()) + 1):\n            roi_inds = (rois[..., 0].int() == batch_idx)\n            coors_inds = (batch_inds.int() == batch_idx)\n            pooled_roi_feat = self.roi_layer(rois[..., 1:][roi_inds],\n                                             coordinate[coors_inds],\n                                             feats[coors_inds])\n            pooled_roi_feats.append(pooled_roi_feat)\n        pooled_roi_feats = torch.cat(pooled_roi_feats, 0)\n        return pooled_roi_feats\n"
  },
  {
    "path": "mmdet3d/models/utils/__init__.py",
    "content": "from .clip_sigmoid import clip_sigmoid\nfrom .inverse_sigmoid import inverse_sigmoid\nfrom .mlp import MLP\nfrom .transformerdecoder import PositionEmbeddingLearned, TransformerDecoderLayer, MultiheadAttention, PositionEmbeddingLearnedwoNorm\nfrom .ffn import FFN, FFNLN\nfrom .projection import ProjectionLayerNorm\nfrom .sparsefusion_models import PointTransformer2D_3D, FusionTransformer2D_3D_Self, ImageTransformer_Cam_3D_MS, ViewTransformer\n\nfrom .drop import Dropout, DropPath, build_dropout\nfrom .deformable_decoder import DeformableTransformerDecoderLayer\nfrom .depth_encoder import DepthEncoderResNet\nfrom .network_modules import LayerNorm, ConvLN, denormalize_pos, normalize_pos\n\n__all__ = ['clip_sigmoid', \"MLP\", 'PositionEmbeddingLearned', 'TransformerDecoderLayer', 'MultiheadAttention',\n           'FFN', 'inverse_sigmoid',  'PointTransformer2D_3D', 'FFNLN', 'PositionEmbeddingLearnedwoNorm',\n           'ProjectionLayerNorm', 'FusionTransformer2D_3D_Self',\n           'Dropout', 'DropPath', 'build_dropout',\n           'DeformableTransformerDecoderLayer' 'ImageTransformer_Cam_3D_MS',\n           'ViewTransformer', 'DepthEncoderResNet',\n           'LayerNorm', 'ConvLN', \"normalize_pos\", \"denormalize_pos\"\n]\n"
  },
  {
    "path": "mmdet3d/models/utils/clip_sigmoid.py",
    "content": "import torch\n\n\ndef clip_sigmoid(x, eps=1e-4):\n    \"\"\"Sigmoid function for input feature.\n\n    Args:\n        x (torch.Tensor): Input feature map with the shape of [B, N, H, W].\n        eps (float): Lower bound of the range to be clamped to. Defaults\n            to 1e-4.\n\n    Returns:\n        torch.Tensor: Feature map after sigmoid.\n    \"\"\"\n    y = torch.clamp(x.sigmoid_(), min=eps, max=1 - eps)\n    return y\n"
  },
  {
    "path": "mmdet3d/models/utils/deformable_decoder.py",
    "content": "import copy\r\nimport numpy as np\r\nimport torch\r\nfrom torch import nn\r\nimport torch.nn.functional as F\r\nfrom torch.nn import Linear\r\nimport math\r\nimport warnings\r\nfrom typing import Optional, no_type_check\r\nfrom torch.autograd.function import Function, once_differentiable\r\n\r\nfrom mmdet3d.models.utils import MultiheadAttention\r\nfrom mmcv.runner import BaseModule\r\nfrom mmcv import deprecated_api_warning\r\nfrom mmcv.cnn import constant_init, xavier_init\r\nfrom mmcv.runner import BaseModule\r\nfrom mmdet3d.models.utils.ops.modules import MSDeformAttn\r\n\r\n\r\nclass DeformableTransformerDecoderLayer(nn.Module):\r\n    def __init__(self, d_model, nhead, level_num=4, dim_feedforward=2048, dropout=0.1, activation=\"relu\",\r\n                 self_posembed=None, cross_posembed=None, cross_only=False, n_points=4):\r\n        super().__init__()\r\n        self.cross_only = cross_only\r\n        if not self.cross_only:\r\n            self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout)\r\n        self.multihead_attn = MSDeformAttn(d_model, level_num, nhead, n_points)\r\n\r\n        # Implementation of Feedforward model\r\n        self.linear1 = nn.Linear(d_model, dim_feedforward)\r\n        self.dropout = nn.Dropout(dropout)\r\n        self.linear2 = nn.Linear(dim_feedforward, d_model)\r\n\r\n        self.norm1 = nn.LayerNorm(d_model)\r\n        self.norm2 = nn.LayerNorm(d_model)\r\n        self.norm3 = nn.LayerNorm(d_model)\r\n        self.dropout1 = nn.Dropout(dropout)\r\n        self.dropout2 = nn.Dropout(dropout)\r\n        self.dropout3 = nn.Dropout(dropout)\r\n\r\n        def _get_activation_fn(activation):\r\n            \"\"\"Return an activation function given a string\"\"\"\r\n            if activation == \"relu\":\r\n                return F.relu\r\n            if activation == \"gelu\":\r\n                return F.gelu\r\n            if activation == \"glu\":\r\n                return F.glu\r\n            raise RuntimeError(F\"activation should be relu/gelu, not {activation}.\")\r\n\r\n        self.activation = _get_activation_fn(activation)\r\n\r\n        self.self_posembed = self_posembed\r\n        self.cross_posembed = cross_posembed\r\n\r\n    def with_pos_embed(self, tensor, pos_embed):\r\n        return tensor if pos_embed is None else tensor + pos_embed\r\n\r\n    def forward(self, query, key, query_pos, key_pos, reference_points, level_start_index, spatial_shapes, query_padding_mask=None, input_padding_mask=None):\r\n\r\n        \"\"\"\r\n        :param query: B C Pq\r\n        :param key: B C Pk\r\n        :param query_pos: B Pq 3/6\r\n        :param key_pos: B Pk 3/6\r\n        :param value_pos: [B Pq 3/6]\r\n        :return:\r\n        \"\"\"\r\n        # NxCxP to PxNxC\r\n        if self.self_posembed is not None:\r\n            query_pos_embed = self.self_posembed(query_pos).permute(2, 0, 1)\r\n        else:\r\n            query_pos_embed = None\r\n        if self.cross_posembed is not None:\r\n            key_pos_embed = self.cross_posembed(key_pos).permute(2, 0, 1)\r\n        else:\r\n            key_pos_embed = None\r\n\r\n        query = query.permute(2, 0, 1)\r\n        key = key.permute(2, 0, 1)\r\n\r\n        if not self.cross_only:\r\n            q = k = v = self.with_pos_embed(query, query_pos_embed)\r\n            query2 = self.self_attn(q, k, value=v, key_padding_mask=query_padding_mask)[0]\r\n            query = query + self.dropout1(query2)\r\n            query = self.norm1(query)\r\n\r\n        query_d = self.with_pos_embed(query, query_pos_embed)\r\n        input_flatten_d = self.with_pos_embed(key, key_pos_embed)\r\n        query2 = self.multihead_attn(query=query_d.permute(1, 0, 2),\r\n                    input_flatten=input_flatten_d.permute(1, 0, 2), reference_points=reference_points,\r\n                    input_spatial_shapes=spatial_shapes, input_level_start_index=level_start_index,\r\n                    input_padding_mask=input_padding_mask\r\n                )\r\n\r\n\r\n        query2 = query2.permute(1, 0, 2)\r\n        query = query + self.dropout2(query2)\r\n        query = self.norm2(query)\r\n\r\n        query2 = self.linear2(self.dropout(self.activation(self.linear1(query))))\r\n        query = query + self.dropout3(query2)\r\n        query = self.norm3(query)\r\n\r\n        # NxCxP to PxNxC\r\n        query = query.permute(1, 2, 0)\r\n        return query\r\n"
  },
  {
    "path": "mmdet3d/models/utils/depth_encoder.py",
    "content": "import torch\r\nimport torch.nn as nn\r\nfrom mmdet.models.backbones.resnet import BasicBlock\r\n\r\nfrom mmdet3d.models.utils.network_modules import LayerNorm\r\nfrom mmcv.cnn import ConvModule\r\n\r\n\r\nclass DepthEncoderResNet(nn.Module):\r\n    def __init__(self, input_channel, input_channel_img, hidden_channel, depth_layers):\r\n        super().__init__()\r\n\r\n        self.depth_layers = depth_layers\r\n\r\n        self.conv_depth = nn.Sequential(\r\n            nn.Conv2d(input_channel, hidden_channel, kernel_size=3, padding=1, bias=True),\r\n            nn.BatchNorm2d(hidden_channel),\r\n            nn.ReLU(inplace=True)\r\n        )\r\n\r\n        self.inplanes = hidden_channel\r\n        self._norm_layer = nn.BatchNorm2d\r\n\r\n        self.layers = nn.ModuleList()\r\n        self.fuse_layers = nn.ModuleList()\r\n        self.output_layers = nn.ModuleList()\r\n        for i in range(len(depth_layers)):\r\n            if i == 0:\r\n                stride = 1\r\n            else:\r\n                stride = 2\r\n\r\n            self.layers.append(self._make_layer(BasicBlock, hidden_channel, depth_layers[i], stride=stride))\r\n            self.fuse_layers.append(nn.Conv2d(input_channel_img+hidden_channel, hidden_channel, kernel_size=3, padding=1))\r\n\r\n\r\n    def _make_layer(self, block, planes, blocks, stride=1):\r\n        norm_layer = self._norm_layer\r\n        downsample = None\r\n        if stride != 1 or self.inplanes != planes * block.expansion:\r\n            downsample = nn.Sequential(\r\n                nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride),\r\n                norm_layer(planes * block.expansion),\r\n            )\r\n\r\n        layers = []\r\n        layers.append(block(self.inplanes, planes, stride=stride, downsample=downsample))\r\n        self.inplanes = planes * block.expansion\r\n        for _ in range(1, blocks):\r\n            layers.append(block(self.inplanes, planes))\r\n\r\n        return nn.Sequential(*layers)\r\n\r\n    def forward(self, sparse_depth, img_inputs):\r\n        depth = self.conv_depth(sparse_depth)\r\n\r\n        img_outputs = []\r\n        for i in range(len(img_inputs)):\r\n            depth = self.layers[i](depth)\r\n            depth = torch.cat([depth, img_inputs[i]], dim=1)\r\n            depth = self.fuse_layers[i](depth)\r\n            img_outputs.append(depth.clone())\r\n\r\n        return img_outputs\r\n"
  },
  {
    "path": "mmdet3d/models/utils/drop.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\r\nfrom typing import Any, Dict, Optional\r\n\r\nimport torch\r\nimport torch.nn as nn\r\n\r\nfrom mmcv import build_from_cfg\r\nfrom mmdet3d.models.registry import DROPOUT_LAYERS\r\n\r\n\r\ndef drop_path(x: torch.Tensor,\r\n              drop_prob: float = 0.,\r\n              training: bool = False) -> torch.Tensor:\r\n    \"\"\"Drop paths (Stochastic Depth) per sample (when applied in main path of\r\n    residual blocks).\r\n\r\n    We follow the implementation\r\n    https://github.com/rwightman/pytorch-image-models/blob/a2727c1bf78ba0d7b5727f5f95e37fb7f8866b1f/timm/models/layers/drop.py  # noqa: E501\r\n    \"\"\"\r\n    if drop_prob == 0. or not training:\r\n        return x\r\n    keep_prob = 1 - drop_prob\r\n    # handle tensors with different dimensions, not just 4D tensors.\r\n    shape = (x.shape[0], ) + (1, ) * (x.ndim - 1)\r\n    random_tensor = keep_prob + torch.rand(\r\n        shape, dtype=x.dtype, device=x.device)\r\n    output = x.div(keep_prob) * random_tensor.floor()\r\n    return output\r\n\r\n\r\n@DROPOUT_LAYERS.register_module()\r\nclass DropPath(nn.Module):\r\n    \"\"\"Drop paths (Stochastic Depth) per sample  (when applied in main path of\r\n    residual blocks).\r\n\r\n    We follow the implementation\r\n    https://github.com/rwightman/pytorch-image-models/blob/a2727c1bf78ba0d7b5727f5f95e37fb7f8866b1f/timm/models/layers/drop.py  # noqa: E501\r\n\r\n    Args:\r\n        drop_prob (float): Probability of the path to be zeroed. Default: 0.1\r\n    \"\"\"\r\n\r\n    def __init__(self, drop_prob: float = 0.1):\r\n        super().__init__()\r\n        self.drop_prob = drop_prob\r\n\r\n    def forward(self, x: torch.Tensor) -> torch.Tensor:\r\n        return drop_path(x, self.drop_prob, self.training)\r\n\r\n\r\n@DROPOUT_LAYERS.register_module()\r\nclass Dropout(nn.Dropout):\r\n    \"\"\"A wrapper for ``torch.nn.Dropout``, We rename the ``p`` of\r\n    ``torch.nn.Dropout`` to ``drop_prob`` so as to be consistent with\r\n    ``DropPath``\r\n\r\n    Args:\r\n        drop_prob (float): Probability of the elements to be\r\n            zeroed. Default: 0.5.\r\n        inplace (bool):  Do the operation inplace or not. Default: False.\r\n    \"\"\"\r\n\r\n    def __init__(self, drop_prob: float = 0.5, inplace: bool = False):\r\n        super().__init__(p=drop_prob, inplace=inplace)\r\n\r\n\r\ndef build_dropout(cfg: Dict, default_args: Optional[Dict] = None) -> Any:\r\n    \"\"\"Builder for drop out layers.\"\"\"\r\n    return build_from_cfg(cfg, DROPOUT_LAYERS, default_args)"
  },
  {
    "path": "mmdet3d/models/utils/ffn.py",
    "content": "import copy\r\nimport numpy as np\r\nimport torch\r\nfrom mmcv.cnn import ConvModule, build_conv_layer, kaiming_init\r\nfrom mmcv.runner import force_fp32\r\nfrom torch import nn\r\nimport torch.nn.functional as F\r\nfrom torch.nn.parameter import Parameter\r\nfrom torch.nn import Linear\r\nfrom torch.nn.init import xavier_uniform_, constant_\r\n\r\n\r\nclass FFN(nn.Module):\r\n    def __init__(self,\r\n                 in_channels,\r\n                 heads,\r\n                 head_conv=64,\r\n                 final_kernel=1,\r\n                 init_bias=-2.19,\r\n                 conv_cfg=dict(type='Conv1d'),\r\n                 norm_cfg=dict(type='BN1d'),\r\n                 bias='auto',\r\n                 **kwargs):\r\n        super(FFN, self).__init__()\r\n\r\n        self.heads = heads\r\n        self.init_bias = init_bias\r\n        for head in self.heads:\r\n            if len(self.heads[head]) == 2:\r\n                classes, num_conv = self.heads[head]\r\n                need_bn = True\r\n            else:\r\n                classes, num_conv, need_bn = self.heads[head]\r\n\r\n            conv_layers = []\r\n            c_in = in_channels\r\n            for i in range(num_conv - 1):\r\n                if need_bn:\r\n                    conv_layers.append(\r\n                        ConvModule(\r\n                            c_in,\r\n                            head_conv,\r\n                            kernel_size=final_kernel,\r\n                            stride=1,\r\n                            padding=final_kernel // 2,\r\n                            bias=bias,\r\n                            conv_cfg=conv_cfg,\r\n                            norm_cfg=norm_cfg))\r\n                else:\r\n                    conv_layers.append(\r\n                        ConvModule(\r\n                            c_in,\r\n                            head_conv,\r\n                            kernel_size=final_kernel,\r\n                            stride=1,\r\n                            padding=final_kernel // 2,\r\n                            bias=bias,\r\n                            conv_cfg=conv_cfg,\r\n                            norm_cfg=None))\r\n                c_in = head_conv\r\n\r\n            conv_layers.append(\r\n                build_conv_layer(\r\n                    conv_cfg,\r\n                    head_conv,\r\n                    classes,\r\n                    kernel_size=final_kernel,\r\n                    stride=1,\r\n                    padding=final_kernel // 2,\r\n                    bias=True))\r\n            conv_layers = nn.Sequential(*conv_layers)\r\n\r\n            self.__setattr__(head, conv_layers)\r\n\r\n    def init_weights(self):\r\n        \"\"\"Initialize weights.\"\"\"\r\n        for head in self.heads:\r\n            if 'heatmap' in head or 'cls' in head:\r\n                self.__getattr__(head)[-1].bias.data.fill_(self.init_bias)\r\n            else:\r\n                for m in self.__getattr__(head).modules():\r\n                    if isinstance(m, nn.Conv2d):\r\n                        kaiming_init(m)\r\n\r\n    def forward(self, x):\r\n        \"\"\"Forward function for SepHead.\r\n\r\n        Args:\r\n            x (torch.Tensor): Input feature map with the shape of\r\n                [B, 512, 128, 128].\r\n\r\n        Returns:\r\n            dict[str: torch.Tensor]: contains the following keys:\r\n\r\n                -reg （torch.Tensor): 2D regression value with the \\\r\n                    shape of [B, 2, H, W].\r\n                -height (torch.Tensor): Height value with the \\\r\n                    shape of [B, 1, H, W].\r\n                -dim (torch.Tensor): Size value with the shape \\\r\n                    of [B, 3, H, W].\r\n                -rot (torch.Tensor): Rotation value with the \\\r\n                    shape of [B, 1, H, W].\r\n                -vel (torch.Tensor): Velocity value with the \\\r\n                    shape of [B, 2, H, W].\r\n                -heatmap (torch.Tensor): Heatmap with the shape of \\\r\n                    [B, N, H, W].\r\n        \"\"\"\r\n        ret_dict = dict()\r\n        for head in self.heads:\r\n            ret_dict[head] = self.__getattr__(head)(x)\r\n\r\n        return ret_dict\r\n\r\n\r\nclass FFNLN(nn.Module):\r\n    def __init__(self,\r\n                 in_channels,\r\n                 heads,\r\n                 head_conv=64,\r\n                 init_bias=-2.19,\r\n                 **kwargs):\r\n        super(FFNLN, self).__init__()\r\n\r\n        self.heads = heads\r\n        self.init_bias = init_bias\r\n        for head in self.heads:\r\n            if len(self.heads[head]) == 2:\r\n                classes, num_conv = self.heads[head]\r\n                need_norm = True\r\n            else:\r\n                classes, num_conv, need_norm = self.heads[head]\r\n\r\n            conv_layers = []\r\n            c_in = in_channels\r\n            for i in range(num_conv - 1):\r\n                if need_norm:\r\n                    conv_layers.append(\r\n                        nn.Linear(\r\n                            c_in,\r\n                            head_conv,\r\n                            bias=False,\r\n                        )\r\n                    )\r\n                    conv_layers.append(nn.LayerNorm(head_conv))\r\n                else:\r\n                    conv_layers.append(\r\n                        nn.Linear(\r\n                            c_in,\r\n                            head_conv,\r\n                            bias=True,\r\n                        )\r\n                    )\r\n                conv_layers.append(nn.ReLU(inplace=True))\r\n                c_in = head_conv\r\n\r\n            conv_layers.append(\r\n                nn.Linear(\r\n                    head_conv,\r\n                    classes,\r\n                    bias=True,\r\n                )\r\n            )\r\n            conv_layers = nn.Sequential(*conv_layers)\r\n\r\n            self.__setattr__(head, conv_layers)\r\n\r\n    def init_weights(self):\r\n        \"\"\"Initialize weights.\"\"\"\r\n        for head in self.heads:\r\n            if 'heatmap' in head or 'cls' in head:\r\n                self.__getattr__(head)[-1].bias.data.fill_(self.init_bias)\r\n            else:\r\n                for m in self.__getattr__(head).modules():\r\n                    if isinstance(m, nn.Linear):\r\n                        kaiming_init(m)\r\n\r\n    def forward(self, x):\r\n        \"\"\"Forward function for SepHead.\r\n\r\n        Args:\r\n            x (torch.Tensor): Input feature map with the shape of\r\n                [B, 512, 128, 128].\r\n\r\n        Returns:\r\n            dict[str: torch.Tensor]: contains the following keys:\r\n\r\n                -reg （torch.Tensor): 2D regression value with the \\\r\n                    shape of [B, 2, H, W].\r\n                -height (torch.Tensor): Height value with the \\\r\n                    shape of [B, 1, H, W].\r\n                -dim (torch.Tensor): Size value with the shape \\\r\n                    of [B, 3, H, W].\r\n                -rot (torch.Tensor): Rotation value with the \\\r\n                    shape of [B, 1, H, W].\r\n                -vel (torch.Tensor): Velocity value with the \\\r\n                    shape of [B, 2, H, W].\r\n                -heatmap (torch.Tensor): Heatmap with the shape of \\\r\n                    [B, N, H, W].\r\n        \"\"\"\r\n        ret_dict = dict()\r\n        x = x.permute(0, 2, 1).contiguous()\r\n        for head in self.heads:\r\n            ret_dict[head] = self.__getattr__(head)(x)\r\n            ret_dict[head] = ret_dict[head].permute(0, 2, 1).contiguous()\r\n\r\n        return ret_dict\r\n\r\nclass FFNReg(nn.Module):\r\n    def __init__(self,\r\n                 in_channels,\r\n                 heads,\r\n                 head_conv=64,\r\n                 init_bias=-2.19,\r\n                 **kwargs):\r\n        super(FFNReg, self).__init__()\r\n\r\n        self.heads = heads\r\n        self.init_bias = init_bias\r\n        for head in self.heads:\r\n            classes, num_conv = self.heads[head]\r\n\r\n            conv_layers = []\r\n            c_in = in_channels\r\n            for i in range(num_conv - 1):\r\n                conv_layers.append(\r\n                    nn.Linear(\r\n                        c_in,\r\n                        head_conv,\r\n                        bias=False,\r\n                    )\r\n                )\r\n                if head == \"heatmap\" or head == \"cls\":\r\n                    conv_layers.append(nn.LayerNorm(head_conv))\r\n                conv_layers.append(nn.ReLU(inplace=True))\r\n                c_in = head_conv\r\n\r\n            conv_layers.append(\r\n                nn.Linear(\r\n                    head_conv,\r\n                    classes,\r\n                    bias=True,\r\n                )\r\n            )\r\n            conv_layers = nn.Sequential(*conv_layers)\r\n\r\n            self.__setattr__(head, conv_layers)\r\n\r\n    def init_weights(self):\r\n        \"\"\"Initialize weights.\"\"\"\r\n        for head in self.heads:\r\n            if head == 'heatmap' or head == 'cls':\r\n                self.__getattr__(head)[-1].bias.data.fill_(self.init_bias)\r\n            else:\r\n                for m in self.__getattr__(head).modules():\r\n                    if isinstance(m, nn.Linear):\r\n                        kaiming_init(m)\r\n\r\n    def forward(self, x):\r\n        \"\"\"Forward function for SepHead.\r\n\r\n        Args:\r\n            x (torch.Tensor): Input feature map with the shape of\r\n                [B, 512, 128, 128].\r\n\r\n        Returns:\r\n            dict[str: torch.Tensor]: contains the following keys:\r\n\r\n                -reg （torch.Tensor): 2D regression value with the \\\r\n                    shape of [B, 2, H, W].\r\n                -height (torch.Tensor): Height value with the \\\r\n                    shape of [B, 1, H, W].\r\n                -dim (torch.Tensor): Size value with the shape \\\r\n                    of [B, 3, H, W].\r\n                -rot (torch.Tensor): Rotation value with the \\\r\n                    shape of [B, 1, H, W].\r\n                -vel (torch.Tensor): Velocity value with the \\\r\n                    shape of [B, 2, H, W].\r\n                -heatmap (torch.Tensor): Heatmap with the shape of \\\r\n                    [B, N, H, W].\r\n        \"\"\"\r\n        ret_dict = dict()\r\n        x = x.permute(0, 2, 1).contiguous()\r\n        for head in self.heads:\r\n            ret_dict[head] = self.__getattr__(head)(x)\r\n            ret_dict[head] = ret_dict[head].permute(0, 2, 1).contiguous()\r\n\r\n        if 'bbox_3d' in ret_dict:\r\n            ret_dict['center'] = ret_dict['bbox_3d'][:, 0:2]\r\n            ret_dict['dim'] = ret_dict['bbox_3d'][:, 2:5]\r\n            ret_dict['height'] = ret_dict['bbox_3d'][:, 5:6]\r\n            ret_dict['rot'] = ret_dict['bbox_3d'][:, 6:8]\r\n            ret_dict['vel'] = ret_dict['bbox_3d'][:, 8:10]\r\n            del ret_dict['bbox_3d']\r\n        return ret_dict"
  },
  {
    "path": "mmdet3d/models/utils/inverse_sigmoid.py",
    "content": "import torch\r\n\r\ndef inverse_sigmoid(x, eps=1e-5):\r\n    x = x.clamp(min=0, max=1)\r\n    x1 = x.clamp(min=eps)\r\n    x2 = (1 - x).clamp(min=eps)\r\n    return torch.log(x1/x2)"
  },
  {
    "path": "mmdet3d/models/utils/mlp.py",
    "content": "from mmcv.cnn import ConvModule\nfrom torch import nn as nn\n\n\nclass MLP(nn.Module):\n    \"\"\"A simple MLP module.\n\n    Pass features (B, C, N) through an MLP.\n\n    Args:\n        in_channels (int): Number of channels of input features.\n            Default: 18.\n        conv_channels (tuple[int]): Out channels of the convolution.\n            Default: (256, 256).\n        conv_cfg (dict): Config of convolution.\n            Default: dict(type='Conv1d').\n        norm_cfg (dict): Config of normalization.\n            Default: dict(type='BN1d').\n        act_cfg (dict): Config of activation.\n            Default: dict(type='ReLU').\n    \"\"\"\n\n    def __init__(self,\n                 in_channel=18,\n                 conv_channels=(256, 256),\n                 conv_cfg=dict(type='Conv1d'),\n                 norm_cfg=dict(type='BN1d'),\n                 act_cfg=dict(type='ReLU')):\n        super().__init__()\n        self.mlp = nn.Sequential()\n        prev_channels = in_channel\n        for i, conv_channel in enumerate(conv_channels):\n            self.mlp.add_module(\n                f'layer{i}',\n                ConvModule(\n                    prev_channels,\n                    conv_channels[i],\n                    1,\n                    padding=0,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    act_cfg=act_cfg,\n                    bias=True,\n                    inplace=True))\n            prev_channels = conv_channels[i]\n\n    def forward(self, img_features):\n        return self.mlp(img_features)\n"
  },
  {
    "path": "mmdet3d/models/utils/network_modules.py",
    "content": "import torch\r\nimport torch.nn as nn\r\nimport torch.nn.functional as F\r\n\r\nfrom .inverse_sigmoid import inverse_sigmoid\r\n\r\ndef denormalize_pos(normal_pos, x_max, y_max, sigmoid=True):\r\n    max_xy = torch.Tensor([x_max, y_max]).to(normal_pos.device).view(1, 1, 2)\r\n    if sigmoid:\r\n        pos = normal_pos.sigmoid() * max_xy\r\n    else:\r\n        pos = normal_pos * max_xy\r\n    return pos\r\n\r\n\r\ndef normalize_pos(pos, x_max, y_max):\r\n    max_xy = torch.Tensor([x_max, y_max]).to(pos.device).view(1, 1, 2)\r\n    normal_pos = pos / max_xy\r\n    return inverse_sigmoid(normal_pos)\r\n\r\n\r\nclass LayerNorm(nn.Module):\r\n    r\"\"\" LayerNorm that supports two data formats: channels_last (default) or channels_first.\r\n    The ordering of the dimensions in the inputs. channels_last corresponds to inputs with\r\n    shape (batch_size, height, width, channels) while channels_first corresponds to inputs\r\n    with shape (batch_size, channels, height, width).\r\n    \"\"\"\r\n\r\n    def __init__(self, normalized_shape, eps=1e-6, data_format=\"channels_last\"):\r\n        super().__init__()\r\n        self.weight = nn.Parameter(torch.ones(normalized_shape))\r\n        self.bias = nn.Parameter(torch.zeros(normalized_shape))\r\n        self.eps = eps\r\n        self.data_format = data_format\r\n        if self.data_format not in [\"channels_last\", \"channels_first\"]:\r\n            raise NotImplementedError\r\n        self.normalized_shape = (normalized_shape,)\r\n\r\n    def forward(self, x):\r\n        if self.data_format == \"channels_last\":\r\n            return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)\r\n        elif self.data_format == \"channels_first\":\r\n            u = x.mean(1, keepdim=True)\r\n            s = (x - u).pow(2).mean(1, keepdim=True)\r\n            x = (x - u) / torch.sqrt(s + self.eps)\r\n            x = self.weight[:, None, None] * x + self.bias[:, None, None]\r\n            return x\r\n\r\nclass ConvLN(nn.Module):\r\n    def __init__(self, input_channel, hidden_channel, kernel_size=3,  stride=1, padding=1, require_act=True):\r\n        super().__init__()\r\n        if require_act:\r\n            self.module = nn.Sequential(\r\n                nn.Conv2d(input_channel, hidden_channel, kernel_size=kernel_size, stride=stride, padding=padding),\r\n                LayerNorm(hidden_channel, data_format=\"channels_first\"),\r\n                nn.ReLU()\r\n            )\r\n        else:\r\n            self.module = nn.Sequential(\r\n                nn.Conv2d(input_channel, hidden_channel, kernel_size=kernel_size, stride=stride, padding=padding),\r\n                LayerNorm(hidden_channel, data_format=\"channels_first\"),\r\n            )\r\n\r\n    def forward(self, x):\r\n        # [bs, C, H, W]\r\n        x = self.module(x)\r\n        return x\r\n\r\nclass SE_Block(nn.Module):\r\n    def __init__(self, c):\r\n        super().__init__()\r\n        self.att = nn.Sequential(\r\n            nn.AdaptiveAvgPool2d(1),\r\n            nn.Conv2d(c, c, kernel_size=1, stride=1),\r\n            nn.Sigmoid()\r\n        )\r\n    def forward(self, x):\r\n        return x * self.att(x)\r\n"
  },
  {
    "path": "mmdet3d/models/utils/ops/functions/__init__.py",
    "content": "# ------------------------------------------------------------------------------------------------\n# Deformable DETR\n# Copyright (c) 2020 SenseTime. All Rights Reserved.\n# Licensed under the Apache License, Version 2.0 [see LICENSE for details]\n# ------------------------------------------------------------------------------------------------\n# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0\n# ------------------------------------------------------------------------------------------------\n\nfrom .ms_deform_attn_func import MSDeformAttnFunction\n\n__all__ = ['MSDeformAttnFunction']\n"
  },
  {
    "path": "mmdet3d/models/utils/ops/functions/ms_deform_attn_func.py",
    "content": "# ------------------------------------------------------------------------------------------------\n# Deformable DETR\n# Copyright (c) 2020 SenseTime. All Rights Reserved.\n# Licensed under the Apache License, Version 2.0 [see LICENSE for details]\n# ------------------------------------------------------------------------------------------------\n# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0\n# ------------------------------------------------------------------------------------------------\n\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\n\nimport torch\nimport torch.nn.functional as F\nfrom torch.autograd import Function\nfrom torch.autograd.function import once_differentiable\n\nimport MultiScaleDeformableAttention as MSDA\n\n\nclass MSDeformAttnFunction(Function):\n    @staticmethod\n    def forward(ctx, value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, im2col_step):\n        ctx.im2col_step = im2col_step\n        output = MSDA.ms_deform_attn_forward(\n            value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, ctx.im2col_step)\n        ctx.save_for_backward(value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights)\n        return output\n\n    @staticmethod\n    @once_differentiable\n    def backward(ctx, grad_output):\n        value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights = ctx.saved_tensors\n        grad_value, grad_sampling_loc, grad_attn_weight = \\\n            MSDA.ms_deform_attn_backward(\n                value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, grad_output, ctx.im2col_step)\n\n        return grad_value, None, None, grad_sampling_loc, grad_attn_weight, None\n\n\ndef ms_deform_attn_core_pytorch(value, value_spatial_shapes, sampling_locations, attention_weights):\n    # for debug and test only,\n    # need to use cuda version instead\n    N_, S_, M_, D_ = value.shape\n    _, Lq_, M_, L_, P_, _ = sampling_locations.shape\n    value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1)\n    sampling_grids = 2 * sampling_locations - 1\n    sampling_value_list = []\n    for lid_, (H_, W_) in enumerate(value_spatial_shapes):\n        # N_, H_*W_, M_, D_ -> N_, H_*W_, M_*D_ -> N_, M_*D_, H_*W_ -> N_*M_, D_, H_, W_\n        value_l_ = value_list[lid_].flatten(2).transpose(1, 2).reshape(N_*M_, D_, H_, W_)\n        # N_, Lq_, M_, P_, 2 -> N_, M_, Lq_, P_, 2 -> N_*M_, Lq_, P_, 2\n        sampling_grid_l_ = sampling_grids[:, :, :, lid_].transpose(1, 2).flatten(0, 1)\n        # N_*M_, D_, Lq_, P_\n        sampling_value_l_ = F.grid_sample(value_l_, sampling_grid_l_,\n                                          mode='bilinear', padding_mode='zeros', align_corners=False)\n        sampling_value_list.append(sampling_value_l_)\n    # (N_, Lq_, M_, L_, P_) -> (N_, M_, Lq_, L_, P_) -> (N_, M_, 1, Lq_, L_*P_)\n    attention_weights = attention_weights.transpose(1, 2).reshape(N_*M_, 1, Lq_, L_*P_)\n    output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights).sum(-1).view(N_, M_*D_, Lq_)\n    return output.transpose(1, 2).contiguous()\n"
  },
  {
    "path": "mmdet3d/models/utils/ops/make.sh",
    "content": "#!/usr/bin/env bash\n# ------------------------------------------------------------------------------------------------\n# Deformable DETR\n# Copyright (c) 2020 SenseTime. All Rights Reserved.\n# Licensed under the Apache License, Version 2.0 [see LICENSE for details]\n# ------------------------------------------------------------------------------------------------\n# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0\n# ------------------------------------------------------------------------------------------------\n\npython setup.py build install\n"
  },
  {
    "path": "mmdet3d/models/utils/ops/modules/__init__.py",
    "content": "# ------------------------------------------------------------------------------------------------\n# Deformable DETR\n# Copyright (c) 2020 SenseTime. All Rights Reserved.\n# Licensed under the Apache License, Version 2.0 [see LICENSE for details]\n# ------------------------------------------------------------------------------------------------\n# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0\n# ------------------------------------------------------------------------------------------------\n\nfrom .ms_deform_attn import MSDeformAttn\n"
  },
  {
    "path": "mmdet3d/models/utils/ops/modules/ms_deform_attn.py",
    "content": "# ------------------------------------------------------------------------------------------------\n# Deformable DETR\n# Copyright (c) 2020 SenseTime. All Rights Reserved.\n# Licensed under the Apache License, Version 2.0 [see LICENSE for details]\n# ------------------------------------------------------------------------------------------------\n# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0\n# ------------------------------------------------------------------------------------------------\n\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\n\nimport warnings\nimport math\n\nimport torch\nfrom torch import nn\nimport torch.nn.functional as F\nfrom torch.nn.init import xavier_uniform_, constant_\n\nfrom ..functions import MSDeformAttnFunction\n\n\ndef _is_power_of_2(n):\n    if (not isinstance(n, int)) or (n < 0):\n        raise ValueError(\"invalid input for _is_power_of_2: {} (type: {})\".format(n, type(n)))\n    return (n & (n-1) == 0) and n != 0\n\n\nclass MSDeformAttn(nn.Module):\n    def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4):\n        \"\"\"\n        Multi-Scale Deformable Attention Module\n        :param d_model      hidden dimension\n        :param n_levels     number of feature levels\n        :param n_heads      number of attention heads\n        :param n_points     number of sampling points per attention head per feature level\n        \"\"\"\n        super().__init__()\n        if d_model % n_heads != 0:\n            raise ValueError('d_model must be divisible by n_heads, but got {} and {}'.format(d_model, n_heads))\n        _d_per_head = d_model // n_heads\n        # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation\n        if not _is_power_of_2(_d_per_head):\n            warnings.warn(\"You'd better set d_model in MSDeformAttn to make the dimension of each attention head a power of 2 \"\n                          \"which is more efficient in our CUDA implementation.\")\n\n        self.im2col_step = 64\n\n        self.d_model = d_model\n        self.n_levels = n_levels\n        self.n_heads = n_heads\n        self.n_points = n_points\n\n        self.sampling_offsets = nn.Linear(d_model, n_heads * n_levels * n_points * 2)\n        self.attention_weights = nn.Linear(d_model, n_heads * n_levels * n_points)\n        self.value_proj = nn.Linear(d_model, d_model)\n        self.output_proj = nn.Linear(d_model, d_model)\n\n        self._reset_parameters()\n\n    def _reset_parameters(self):\n        constant_(self.sampling_offsets.weight.data, 0.)\n        thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2.0 * math.pi / self.n_heads)\n        grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)\n        grid_init = (grid_init / grid_init.abs().max(-1, keepdim=True)[0]).view(self.n_heads, 1, 1, 2).repeat(1, self.n_levels, self.n_points, 1)\n        for i in range(self.n_points):\n            grid_init[:, :, i, :] *= i + 1\n        with torch.no_grad():\n            self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1))\n        constant_(self.attention_weights.weight.data, 0.)\n        constant_(self.attention_weights.bias.data, 0.)\n        xavier_uniform_(self.value_proj.weight.data)\n        constant_(self.value_proj.bias.data, 0.)\n        xavier_uniform_(self.output_proj.weight.data)\n        constant_(self.output_proj.bias.data, 0.)\n\n    def forward(self, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask=None):\n        \"\"\"\n        :param query                       (N, Length_{query}, C)\n        :param reference_points            (N, Length_{query}, n_levels, 2), range in [0, 1], top-left (0,0), bottom-right (1, 1), including padding area\n                                        or (N, Length_{query}, n_levels, 4), add additional (w, h) to form reference boxes\n        :param input_flatten               (N, \\sum_{l=0}^{L-1} H_l \\cdot W_l, C)\n        :param input_spatial_shapes        (n_levels, 2), [(H_0, W_0), (H_1, W_1), ..., (H_{L-1}, W_{L-1})]\n        :param input_level_start_index     (n_levels, ), [0, H_0*W_0, H_0*W_0+H_1*W_1, H_0*W_0+H_1*W_1+H_2*W_2, ..., H_0*W_0+H_1*W_1+...+H_{L-1}*W_{L-1}]\n        :param input_padding_mask          (N, \\sum_{l=0}^{L-1} H_l \\cdot W_l), True for padding elements, False for non-padding elements\n\n        :return output                     (N, Length_{query}, C)\n        \"\"\"\n\n        N, Len_q, _ = query.shape\n        N, Len_in, _ = input_flatten.shape\n        assert (input_spatial_shapes[:, 0] * input_spatial_shapes[:, 1]).sum() == Len_in\n\n        value = self.value_proj(input_flatten)\n        if input_padding_mask is not None:\n            value = value.masked_fill(input_padding_mask[..., None], float(0))\n        value = value.view(N, Len_in, self.n_heads, self.d_model // self.n_heads)\n        sampling_offsets = self.sampling_offsets(query).view(N, Len_q, self.n_heads, self.n_levels, self.n_points, 2)\n        attention_weights = self.attention_weights(query).view(N, Len_q, self.n_heads, self.n_levels * self.n_points)\n        attention_weights = F.softmax(attention_weights, -1).view(N, Len_q, self.n_heads, self.n_levels, self.n_points)\n        # N, Len_q, n_heads, n_levels, n_points, 2\n        if reference_points.shape[-1] == 2:\n            offset_normalizer = torch.stack([input_spatial_shapes[..., 1], input_spatial_shapes[..., 0]], -1)\n            sampling_locations = reference_points[:, :, None, :, None, :] \\\n                                 + sampling_offsets / offset_normalizer[None, None, None, :, None, :]\n        elif reference_points.shape[-1] == 4:\n            sampling_locations = reference_points[:, :, None, :, None, :2] \\\n                                 + sampling_offsets / self.n_points * reference_points[:, :, None, :, None, 2:] * 0.5\n        else:\n            raise ValueError(\n                'Last dim of reference_points must be 2 or 4, but get {} instead.'.format(reference_points.shape[-1]))\n        output = MSDeformAttnFunction.apply(\n            value, input_spatial_shapes, input_level_start_index, sampling_locations, attention_weights, self.im2col_step)\n        output = self.output_proj(output)\n        return output\n"
  },
  {
    "path": "mmdet3d/models/utils/ops/setup.py",
    "content": "# ------------------------------------------------------------------------------------------------\n# Deformable DETR\n# Copyright (c) 2020 SenseTime. All Rights Reserved.\n# Licensed under the Apache License, Version 2.0 [see LICENSE for details]\n# ------------------------------------------------------------------------------------------------\n# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0\n# ------------------------------------------------------------------------------------------------\n\nimport os\nimport glob\n\nimport torch\n\nfrom torch.utils.cpp_extension import CUDA_HOME\nfrom torch.utils.cpp_extension import CppExtension\nfrom torch.utils.cpp_extension import CUDAExtension\n\nfrom setuptools import find_packages\nfrom setuptools import setup\n\nrequirements = [\"torch\", \"torchvision\"]\n\ndef get_extensions():\n    this_dir = os.path.dirname(os.path.abspath(__file__))\n    extensions_dir = os.path.join(this_dir, \"src\")\n\n    main_file = glob.glob(os.path.join(extensions_dir, \"*.cpp\"))\n    source_cpu = glob.glob(os.path.join(extensions_dir, \"cpu\", \"*.cpp\"))\n    source_cuda = glob.glob(os.path.join(extensions_dir, \"cuda\", \"*.cu\"))\n\n    sources = main_file + source_cpu\n    extension = CppExtension\n    extra_compile_args = {\"cxx\": []}\n    define_macros = []\n\n    if torch.cuda.is_available() and CUDA_HOME is not None:\n        extension = CUDAExtension\n        sources += source_cuda\n        define_macros += [(\"WITH_CUDA\", None)]\n        extra_compile_args[\"nvcc\"] = [\n            \"-DCUDA_HAS_FP16=1\",\n            \"-D__CUDA_NO_HALF_OPERATORS__\",\n            \"-D__CUDA_NO_HALF_CONVERSIONS__\",\n            \"-D__CUDA_NO_HALF2_OPERATORS__\",\n        ]\n    else:\n        raise NotImplementedError('Cuda is not availabel')\n\n    sources = [os.path.join(extensions_dir, s) for s in sources]\n    include_dirs = [extensions_dir]\n    ext_modules = [\n        extension(\n            \"MultiScaleDeformableAttention\",\n            sources,\n            include_dirs=include_dirs,\n            define_macros=define_macros,\n            extra_compile_args=extra_compile_args,\n        )\n    ]\n    return ext_modules\n\nsetup(\n    name=\"MultiScaleDeformableAttention\",\n    version=\"1.0\",\n    author=\"Weijie Su\",\n    url=\"https://github.com/fundamentalvision/Deformable-DETR\",\n    description=\"PyTorch Wrapper for CUDA Functions of Multi-Scale Deformable Attention\",\n    packages=find_packages(exclude=(\"configs\", \"tests\",)),\n    ext_modules=get_extensions(),\n    cmdclass={\"build_ext\": torch.utils.cpp_extension.BuildExtension},\n)\n"
  },
  {
    "path": "mmdet3d/models/utils/ops/src/cpu/ms_deform_attn_cpu.cpp",
    "content": "/*!\n**************************************************************************************************\n* Deformable DETR\n* Copyright (c) 2020 SenseTime. All Rights Reserved.\n* Licensed under the Apache License, Version 2.0 [see LICENSE for details]\n**************************************************************************************************\n* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0\n**************************************************************************************************\n*/\n\n#include <vector>\n\n#include <ATen/ATen.h>\n#include <ATen/cuda/CUDAContext.h>\n\n\nat::Tensor\nms_deform_attn_cpu_forward(\n    const at::Tensor &value, \n    const at::Tensor &spatial_shapes,\n    const at::Tensor &level_start_index,\n    const at::Tensor &sampling_loc,\n    const at::Tensor &attn_weight,\n    const int im2col_step)\n{\n    AT_ERROR(\"Not implement on cpu\");\n}\n\nstd::vector<at::Tensor>\nms_deform_attn_cpu_backward(\n    const at::Tensor &value, \n    const at::Tensor &spatial_shapes,\n    const at::Tensor &level_start_index,\n    const at::Tensor &sampling_loc,\n    const at::Tensor &attn_weight,\n    const at::Tensor &grad_output,\n    const int im2col_step)\n{\n    AT_ERROR(\"Not implement on cpu\");\n}\n\n"
  },
  {
    "path": "mmdet3d/models/utils/ops/src/cpu/ms_deform_attn_cpu.h",
    "content": "/*!\n**************************************************************************************************\n* Deformable DETR\n* Copyright (c) 2020 SenseTime. All Rights Reserved.\n* Licensed under the Apache License, Version 2.0 [see LICENSE for details]\n**************************************************************************************************\n* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0\n**************************************************************************************************\n*/\n\n#pragma once\n#include <torch/extension.h>\n\nat::Tensor\nms_deform_attn_cpu_forward(\n    const at::Tensor &value, \n    const at::Tensor &spatial_shapes,\n    const at::Tensor &level_start_index,\n    const at::Tensor &sampling_loc,\n    const at::Tensor &attn_weight,\n    const int im2col_step);\n\nstd::vector<at::Tensor>\nms_deform_attn_cpu_backward(\n    const at::Tensor &value, \n    const at::Tensor &spatial_shapes,\n    const at::Tensor &level_start_index,\n    const at::Tensor &sampling_loc,\n    const at::Tensor &attn_weight,\n    const at::Tensor &grad_output,\n    const int im2col_step);\n\n\n"
  },
  {
    "path": "mmdet3d/models/utils/ops/src/cuda/ms_deform_attn_cuda.cu",
    "content": "/*!\n**************************************************************************************************\n* Deformable DETR\n* Copyright (c) 2020 SenseTime. All Rights Reserved.\n* Licensed under the Apache License, Version 2.0 [see LICENSE for details]\n**************************************************************************************************\n* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0\n**************************************************************************************************\n*/\n\n#include <vector>\n#include \"cuda/ms_deform_im2col_cuda.cuh\"\n\n#include <ATen/ATen.h>\n#include <ATen/cuda/CUDAContext.h>\n#include <cuda.h>\n#include <cuda_runtime.h>\n\n\nat::Tensor ms_deform_attn_cuda_forward(\n    const at::Tensor &value, \n    const at::Tensor &spatial_shapes,\n    const at::Tensor &level_start_index,\n    const at::Tensor &sampling_loc,\n    const at::Tensor &attn_weight,\n    const int im2col_step)\n{\n    AT_ASSERTM(value.is_contiguous(), \"value tensor has to be contiguous\");\n    AT_ASSERTM(spatial_shapes.is_contiguous(), \"spatial_shapes tensor has to be contiguous\");\n    AT_ASSERTM(level_start_index.is_contiguous(), \"level_start_index tensor has to be contiguous\");\n    AT_ASSERTM(sampling_loc.is_contiguous(), \"sampling_loc tensor has to be contiguous\");\n    AT_ASSERTM(attn_weight.is_contiguous(), \"attn_weight tensor has to be contiguous\");\n\n    AT_ASSERTM(value.type().is_cuda(), \"value must be a CUDA tensor\");\n    AT_ASSERTM(spatial_shapes.type().is_cuda(), \"spatial_shapes must be a CUDA tensor\");\n    AT_ASSERTM(level_start_index.type().is_cuda(), \"level_start_index must be a CUDA tensor\");\n    AT_ASSERTM(sampling_loc.type().is_cuda(), \"sampling_loc must be a CUDA tensor\");\n    AT_ASSERTM(attn_weight.type().is_cuda(), \"attn_weight must be a CUDA tensor\");\n\n    const int batch = value.size(0);\n    const int spatial_size = value.size(1);\n    const int num_heads = value.size(2);\n    const int channels = value.size(3);\n\n    const int num_levels = spatial_shapes.size(0);\n\n    const int num_query = sampling_loc.size(1);\n    const int num_point = sampling_loc.size(4);\n\n    const int im2col_step_ = std::min(batch, im2col_step);\n\n    AT_ASSERTM(batch % im2col_step_ == 0, \"batch(%d) must divide im2col_step(%d)\", batch, im2col_step_);\n    \n    auto output = at::zeros({batch, num_query, num_heads, channels}, value.options());\n\n    const int batch_n = im2col_step_;\n    auto output_n = output.view({batch/im2col_step_, batch_n, num_query, num_heads, channels});\n    auto per_value_size = spatial_size * num_heads * channels;\n    auto per_sample_loc_size = num_query * num_heads * num_levels * num_point * 2;\n    auto per_attn_weight_size = num_query * num_heads * num_levels * num_point;\n    for (int n = 0; n < batch/im2col_step_; ++n)\n    {\n        auto columns = output_n.select(0, n);\n        AT_DISPATCH_FLOATING_TYPES(value.type(), \"ms_deform_attn_forward_cuda\", ([&] {\n            ms_deformable_im2col_cuda(at::cuda::getCurrentCUDAStream(),\n                value.data<scalar_t>() + n * im2col_step_ * per_value_size,\n                spatial_shapes.data<int64_t>(),\n                level_start_index.data<int64_t>(),\n                sampling_loc.data<scalar_t>() + n * im2col_step_ * per_sample_loc_size,\n                attn_weight.data<scalar_t>() + n * im2col_step_ * per_attn_weight_size,\n                batch_n, spatial_size, num_heads, channels, num_levels, num_query, num_point,\n                columns.data<scalar_t>());\n\n        }));\n    }\n\n    output = output.view({batch, num_query, num_heads*channels});\n\n    return output;\n}\n\n\nstd::vector<at::Tensor> ms_deform_attn_cuda_backward(\n    const at::Tensor &value, \n    const at::Tensor &spatial_shapes,\n    const at::Tensor &level_start_index,\n    const at::Tensor &sampling_loc,\n    const at::Tensor &attn_weight,\n    const at::Tensor &grad_output,\n    const int im2col_step)\n{\n\n    AT_ASSERTM(value.is_contiguous(), \"value tensor has to be contiguous\");\n    AT_ASSERTM(spatial_shapes.is_contiguous(), \"spatial_shapes tensor has to be contiguous\");\n    AT_ASSERTM(level_start_index.is_contiguous(), \"level_start_index tensor has to be contiguous\");\n    AT_ASSERTM(sampling_loc.is_contiguous(), \"sampling_loc tensor has to be contiguous\");\n    AT_ASSERTM(attn_weight.is_contiguous(), \"attn_weight tensor has to be contiguous\");\n    AT_ASSERTM(grad_output.is_contiguous(), \"grad_output tensor has to be contiguous\");\n\n    AT_ASSERTM(value.type().is_cuda(), \"value must be a CUDA tensor\");\n    AT_ASSERTM(spatial_shapes.type().is_cuda(), \"spatial_shapes must be a CUDA tensor\");\n    AT_ASSERTM(level_start_index.type().is_cuda(), \"level_start_index must be a CUDA tensor\");\n    AT_ASSERTM(sampling_loc.type().is_cuda(), \"sampling_loc must be a CUDA tensor\");\n    AT_ASSERTM(attn_weight.type().is_cuda(), \"attn_weight must be a CUDA tensor\");\n    AT_ASSERTM(grad_output.type().is_cuda(), \"grad_output must be a CUDA tensor\");\n\n    const int batch = value.size(0);\n    const int spatial_size = value.size(1);\n    const int num_heads = value.size(2);\n    const int channels = value.size(3);\n\n    const int num_levels = spatial_shapes.size(0);\n\n    const int num_query = sampling_loc.size(1);\n    const int num_point = sampling_loc.size(4);\n\n    const int im2col_step_ = std::min(batch, im2col_step);\n\n    AT_ASSERTM(batch % im2col_step_ == 0, \"batch(%d) must divide im2col_step(%d)\", batch, im2col_step_);\n\n    auto grad_value = at::zeros_like(value);\n    auto grad_sampling_loc = at::zeros_like(sampling_loc);\n    auto grad_attn_weight = at::zeros_like(attn_weight);\n\n    const int batch_n = im2col_step_;\n    auto per_value_size = spatial_size * num_heads * channels;\n    auto per_sample_loc_size = num_query * num_heads * num_levels * num_point * 2;\n    auto per_attn_weight_size = num_query * num_heads * num_levels * num_point;\n    auto grad_output_n = grad_output.view({batch/im2col_step_, batch_n, num_query, num_heads, channels});\n    \n    for (int n = 0; n < batch/im2col_step_; ++n)\n    {\n        auto grad_output_g = grad_output_n.select(0, n);\n        AT_DISPATCH_FLOATING_TYPES(value.type(), \"ms_deform_attn_backward_cuda\", ([&] {\n            ms_deformable_col2im_cuda(at::cuda::getCurrentCUDAStream(),\n                                    grad_output_g.data<scalar_t>(),\n                                    value.data<scalar_t>() + n * im2col_step_ * per_value_size,\n                                    spatial_shapes.data<int64_t>(),\n                                    level_start_index.data<int64_t>(),\n                                    sampling_loc.data<scalar_t>() + n * im2col_step_ * per_sample_loc_size,\n                                    attn_weight.data<scalar_t>() + n * im2col_step_ * per_attn_weight_size,\n                                    batch_n, spatial_size, num_heads, channels, num_levels, num_query, num_point,\n                                    grad_value.data<scalar_t>() +  n * im2col_step_ * per_value_size,\n                                    grad_sampling_loc.data<scalar_t>() + n * im2col_step_ * per_sample_loc_size,\n                                    grad_attn_weight.data<scalar_t>() + n * im2col_step_ * per_attn_weight_size);\n\n        }));\n    }\n\n    return {\n        grad_value, grad_sampling_loc, grad_attn_weight\n    };\n}"
  },
  {
    "path": "mmdet3d/models/utils/ops/src/cuda/ms_deform_attn_cuda.h",
    "content": "/*!\n**************************************************************************************************\n* Deformable DETR\n* Copyright (c) 2020 SenseTime. All Rights Reserved.\n* Licensed under the Apache License, Version 2.0 [see LICENSE for details]\n**************************************************************************************************\n* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0\n**************************************************************************************************\n*/\n\n#pragma once\n#include <torch/extension.h>\n\nat::Tensor ms_deform_attn_cuda_forward(\n    const at::Tensor &value, \n    const at::Tensor &spatial_shapes,\n    const at::Tensor &level_start_index,\n    const at::Tensor &sampling_loc,\n    const at::Tensor &attn_weight,\n    const int im2col_step);\n\nstd::vector<at::Tensor> ms_deform_attn_cuda_backward(\n    const at::Tensor &value, \n    const at::Tensor &spatial_shapes,\n    const at::Tensor &level_start_index,\n    const at::Tensor &sampling_loc,\n    const at::Tensor &attn_weight,\n    const at::Tensor &grad_output,\n    const int im2col_step);\n\n"
  },
  {
    "path": "mmdet3d/models/utils/ops/src/cuda/ms_deform_im2col_cuda.cuh",
    "content": "/*!\n**************************************************************************\n* Deformable DETR\n* Copyright (c) 2020 SenseTime. All Rights Reserved.\n* Licensed under the Apache License, Version 2.0 [see LICENSE for details]\n**************************************************************************\n* Modified from DCN (https://github.com/msracver/Deformable-ConvNets)\n* Copyright (c) 2018 Microsoft\n**************************************************************************\n*/\n\n#include <cstdio>\n#include <algorithm>\n#include <cstring>\n\n#include <ATen/ATen.h>\n#include <ATen/cuda/CUDAContext.h>\n\n#include <THC/THCAtomics.cuh>\n\n#define CUDA_KERNEL_LOOP(i, n)                          \\\n  for (int i = blockIdx.x * blockDim.x + threadIdx.x;   \\\n      i < (n);                                          \\\n      i += blockDim.x * gridDim.x)\n\nconst int CUDA_NUM_THREADS = 1024;\ninline int GET_BLOCKS(const int N, const int num_threads)\n{\n  return (N + num_threads - 1) / num_threads;\n}\n\n\ntemplate <typename scalar_t>\n__device__ scalar_t ms_deform_attn_im2col_bilinear(const scalar_t* &bottom_data, \n                                                   const int &height, const int &width, const int &nheads, const int &channels,\n                                                   const scalar_t &h, const scalar_t &w, const int &m, const int &c)\n{\n  const int h_low = floor(h);\n  const int w_low = floor(w);\n  const int h_high = h_low + 1;\n  const int w_high = w_low + 1;\n\n  const scalar_t lh = h - h_low;\n  const scalar_t lw = w - w_low;\n  const scalar_t hh = 1 - lh, hw = 1 - lw;\n\n  const int w_stride = nheads * channels;\n  const int h_stride = width * w_stride;\n  const int h_low_ptr_offset = h_low * h_stride;\n  const int h_high_ptr_offset = h_low_ptr_offset + h_stride;\n  const int w_low_ptr_offset = w_low * w_stride;\n  const int w_high_ptr_offset = w_low_ptr_offset + w_stride;\n  const int base_ptr = m * channels + c;\n\n  scalar_t v1 = 0;\n  if (h_low >= 0 && w_low >= 0)\n  {\n    const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr;\n    v1 = bottom_data[ptr1];\n  }\n  scalar_t v2 = 0;\n  if (h_low >= 0 && w_high <= width - 1)\n  {\n    const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr;\n    v2 = bottom_data[ptr2];\n  }\n  scalar_t v3 = 0;\n  if (h_high <= height - 1 && w_low >= 0)\n  {\n    const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr;\n    v3 = bottom_data[ptr3];\n  }\n  scalar_t v4 = 0;\n  if (h_high <= height - 1 && w_high <= width - 1)\n  {\n    const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr;\n    v4 = bottom_data[ptr4];\n  }\n\n  const scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;\n\n  const scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);\n  return val;\n}\n\n\ntemplate <typename scalar_t>\n__device__ void ms_deform_attn_col2im_bilinear(const scalar_t* &bottom_data, \n                                                   const int &height, const int &width, const int &nheads, const int &channels,\n                                                   const scalar_t &h, const scalar_t &w, const int &m, const int &c,\n                                                   const scalar_t &top_grad,\n                                                   const scalar_t &attn_weight,\n                                                   scalar_t* &grad_value, \n                                                   scalar_t* grad_sampling_loc,\n                                                   scalar_t* grad_attn_weight)\n{\n  const int h_low = floor(h);\n  const int w_low = floor(w);\n  const int h_high = h_low + 1;\n  const int w_high = w_low + 1;\n\n  const scalar_t lh = h - h_low;\n  const scalar_t lw = w - w_low;\n  const scalar_t hh = 1 - lh, hw = 1 - lw;\n\n  const int w_stride = nheads * channels;\n  const int h_stride = width * w_stride;\n  const int h_low_ptr_offset = h_low * h_stride;\n  const int h_high_ptr_offset = h_low_ptr_offset + h_stride;\n  const int w_low_ptr_offset = w_low * w_stride;\n  const int w_high_ptr_offset = w_low_ptr_offset + w_stride;\n  const int base_ptr = m * channels + c;\n\n  const scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;\n  const scalar_t top_grad_value = top_grad * attn_weight;\n  scalar_t grad_h_weight = 0, grad_w_weight = 0;\n\n  scalar_t v1 = 0;\n  if (h_low >= 0 && w_low >= 0)\n  {\n    const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr;\n    v1 = bottom_data[ptr1];\n    grad_h_weight -= hw * v1;\n    grad_w_weight -= hh * v1;\n    atomicAdd(grad_value+ptr1, w1*top_grad_value);\n  }\n  scalar_t v2 = 0;\n  if (h_low >= 0 && w_high <= width - 1)\n  {\n    const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr;\n    v2 = bottom_data[ptr2];\n    grad_h_weight -= lw * v2;\n    grad_w_weight += hh * v2;\n    atomicAdd(grad_value+ptr2, w2*top_grad_value);\n  }\n  scalar_t v3 = 0;\n  if (h_high <= height - 1 && w_low >= 0)\n  {\n    const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr;\n    v3 = bottom_data[ptr3];\n    grad_h_weight += hw * v3;\n    grad_w_weight -= lh * v3;\n    atomicAdd(grad_value+ptr3, w3*top_grad_value); \n  }\n  scalar_t v4 = 0;\n  if (h_high <= height - 1 && w_high <= width - 1)\n  {\n    const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr;\n    v4 = bottom_data[ptr4];\n    grad_h_weight += lw * v4;\n    grad_w_weight += lh * v4;\n    atomicAdd(grad_value+ptr4, w4*top_grad_value);\n  }\n\n  const scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);\n  *grad_attn_weight = top_grad * val;\n  *grad_sampling_loc = width * grad_w_weight * top_grad_value;\n  *(grad_sampling_loc + 1) = height * grad_h_weight * top_grad_value;\n}\n\n\ntemplate <typename scalar_t>\n__device__ void ms_deform_attn_col2im_bilinear_gm(const scalar_t* &bottom_data, \n                                                   const int &height, const int &width, const int &nheads, const int &channels,\n                                                   const scalar_t &h, const scalar_t &w, const int &m, const int &c,\n                                                   const scalar_t &top_grad,\n                                                   const scalar_t &attn_weight,\n                                                   scalar_t* &grad_value, \n                                                   scalar_t* grad_sampling_loc,\n                                                   scalar_t* grad_attn_weight)\n{\n  const int h_low = floor(h);\n  const int w_low = floor(w);\n  const int h_high = h_low + 1;\n  const int w_high = w_low + 1;\n\n  const scalar_t lh = h - h_low;\n  const scalar_t lw = w - w_low;\n  const scalar_t hh = 1 - lh, hw = 1 - lw;\n\n  const int w_stride = nheads * channels;\n  const int h_stride = width * w_stride;\n  const int h_low_ptr_offset = h_low * h_stride;\n  const int h_high_ptr_offset = h_low_ptr_offset + h_stride;\n  const int w_low_ptr_offset = w_low * w_stride;\n  const int w_high_ptr_offset = w_low_ptr_offset + w_stride;\n  const int base_ptr = m * channels + c;\n\n  const scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;\n  const scalar_t top_grad_value = top_grad * attn_weight;\n  scalar_t grad_h_weight = 0, grad_w_weight = 0;\n\n  scalar_t v1 = 0;\n  if (h_low >= 0 && w_low >= 0)\n  {\n    const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr;\n    v1 = bottom_data[ptr1];\n    grad_h_weight -= hw * v1;\n    grad_w_weight -= hh * v1;\n    atomicAdd(grad_value+ptr1, w1*top_grad_value);\n  }\n  scalar_t v2 = 0;\n  if (h_low >= 0 && w_high <= width - 1)\n  {\n    const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr;\n    v2 = bottom_data[ptr2];\n    grad_h_weight -= lw * v2;\n    grad_w_weight += hh * v2;\n    atomicAdd(grad_value+ptr2, w2*top_grad_value);\n  }\n  scalar_t v3 = 0;\n  if (h_high <= height - 1 && w_low >= 0)\n  {\n    const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr;\n    v3 = bottom_data[ptr3];\n    grad_h_weight += hw * v3;\n    grad_w_weight -= lh * v3;\n    atomicAdd(grad_value+ptr3, w3*top_grad_value); \n  }\n  scalar_t v4 = 0;\n  if (h_high <= height - 1 && w_high <= width - 1)\n  {\n    const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr;\n    v4 = bottom_data[ptr4];\n    grad_h_weight += lw * v4;\n    grad_w_weight += lh * v4;\n    atomicAdd(grad_value+ptr4, w4*top_grad_value);\n  }\n\n  const scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);\n  atomicAdd(grad_attn_weight, top_grad * val); \n  atomicAdd(grad_sampling_loc, width * grad_w_weight * top_grad_value);\n  atomicAdd(grad_sampling_loc + 1, height * grad_h_weight * top_grad_value);\n}\n\n\ntemplate <typename scalar_t>\n__global__ void ms_deformable_im2col_gpu_kernel(const int n,\n                                                const scalar_t *data_value, \n                                                const int64_t *data_spatial_shapes,\n                                                const int64_t *data_level_start_index, \n                                                const scalar_t *data_sampling_loc,\n                                                const scalar_t *data_attn_weight,\n                                                const int batch_size, \n                                                const int spatial_size, \n                                                const int num_heads,\n                                                const int channels, \n                                                const int num_levels,\n                                                const int num_query,\n                                                const int num_point,\n                                                scalar_t *data_col)\n{\n  CUDA_KERNEL_LOOP(index, n)\n  {\n    int _temp = index;\n    const int c_col = _temp % channels;\n    _temp /= channels;\n    const int sampling_index = _temp; \n    const int m_col = _temp % num_heads;\n    _temp /= num_heads;\n    const int q_col = _temp % num_query;\n    _temp /= num_query;\n    const int b_col = _temp;\n\n    scalar_t *data_col_ptr = data_col + index;\n    int data_weight_ptr = sampling_index * num_levels * num_point;\n    int data_loc_w_ptr = data_weight_ptr << 1;\n    const int qid_stride = num_heads * channels;\n    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;\n    scalar_t col = 0;\n    \n    for (int l_col=0; l_col < num_levels; ++l_col)\n    {\n      const int level_start_id = data_level_start_index[l_col];\n      const int spatial_h_ptr = l_col << 1;\n      const int spatial_h = data_spatial_shapes[spatial_h_ptr];\n      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];\n      const scalar_t *data_value_ptr = data_value + (data_value_ptr_init_offset + level_start_id * qid_stride);\n      for (int p_col=0; p_col < num_point; ++p_col)\n      {\n        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];\n        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];\n        const scalar_t weight = data_attn_weight[data_weight_ptr];\n\n        const scalar_t h_im = loc_h * spatial_h - 0.5;\n        const scalar_t w_im = loc_w * spatial_w - 0.5;\n\n        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w)\n        {\n          col += ms_deform_attn_im2col_bilinear(data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col) * weight;\n        }\n\n        data_weight_ptr += 1;\n        data_loc_w_ptr += 2;\n      }\n    }\n    *data_col_ptr = col;\n  }\n}\n\ntemplate <typename scalar_t, unsigned int blockSize>\n__global__ void ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1(const int n,\n                                                const scalar_t *grad_col,\n                                                const scalar_t *data_value,\n                                                const int64_t *data_spatial_shapes,\n                                                const int64_t *data_level_start_index, \n                                                const scalar_t *data_sampling_loc,\n                                                const scalar_t *data_attn_weight,\n                                                const int batch_size, \n                                                const int spatial_size, \n                                                const int num_heads,\n                                                const int channels, \n                                                const int num_levels,\n                                                const int num_query,\n                                                const int num_point,\n                                                scalar_t *grad_value,\n                                                scalar_t *grad_sampling_loc,\n                                                scalar_t *grad_attn_weight)\n{\n  CUDA_KERNEL_LOOP(index, n)\n  {\n    __shared__ scalar_t cache_grad_sampling_loc[blockSize * 2];\n    __shared__ scalar_t cache_grad_attn_weight[blockSize];\n    unsigned int tid = threadIdx.x;\n    int _temp = index;\n    const int c_col = _temp % channels;\n    _temp /= channels;\n    const int sampling_index = _temp; \n    const int m_col = _temp % num_heads;\n    _temp /= num_heads;\n    const int q_col = _temp % num_query;\n    _temp /= num_query;\n    const int b_col = _temp;\n\n    const scalar_t top_grad = grad_col[index];\n\n    int data_weight_ptr = sampling_index * num_levels * num_point;\n    int data_loc_w_ptr = data_weight_ptr << 1;\n    const int grad_sampling_ptr = data_weight_ptr;\n    grad_sampling_loc += grad_sampling_ptr << 1;\n    grad_attn_weight += grad_sampling_ptr;\n    const int grad_weight_stride = 1;\n    const int grad_loc_stride = 2;\n    const int qid_stride = num_heads * channels;\n    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;\n\n    for (int l_col=0; l_col < num_levels; ++l_col)\n    {\n      const int level_start_id = data_level_start_index[l_col];\n      const int spatial_h_ptr = l_col << 1;\n      const int spatial_h = data_spatial_shapes[spatial_h_ptr];\n      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];\n      const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride;\n      const scalar_t *data_value_ptr = data_value + value_ptr_offset;\n      scalar_t *grad_value_ptr = grad_value + value_ptr_offset;\n\n      for (int p_col=0; p_col < num_point; ++p_col)\n      {\n        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];\n        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];\n        const scalar_t weight = data_attn_weight[data_weight_ptr];\n\n        const scalar_t h_im = loc_h * spatial_h - 0.5;\n        const scalar_t w_im = loc_w * spatial_w - 0.5;\n        *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0;\n        *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0;\n        *(cache_grad_attn_weight+threadIdx.x)=0;\n        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w)\n        {\n          ms_deform_attn_col2im_bilinear(\n            data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col,\n            top_grad, weight, grad_value_ptr, \n            cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x);\n        }\n        \n        __syncthreads();\n        if (tid == 0)\n        {\n          scalar_t _grad_w=cache_grad_sampling_loc[0], _grad_h=cache_grad_sampling_loc[1], _grad_a=cache_grad_attn_weight[0];\n          int sid=2;\n          for (unsigned int tid = 1; tid < blockSize; ++tid)\n          {\n            _grad_w += cache_grad_sampling_loc[sid];\n            _grad_h += cache_grad_sampling_loc[sid + 1];\n            _grad_a += cache_grad_attn_weight[tid];\n            sid += 2;\n          }\n          \n          \n          *grad_sampling_loc = _grad_w;\n          *(grad_sampling_loc + 1) = _grad_h;\n          *grad_attn_weight = _grad_a;\n        }\n        __syncthreads();\n\n        data_weight_ptr += 1;\n        data_loc_w_ptr += 2;\n        grad_attn_weight += grad_weight_stride;\n        grad_sampling_loc += grad_loc_stride;\n      }\n    }\n  }\n}\n\n\ntemplate <typename scalar_t, unsigned int blockSize>\n__global__ void ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2(const int n,\n                                                const scalar_t *grad_col,\n                                                const scalar_t *data_value,\n                                                const int64_t *data_spatial_shapes,\n                                                const int64_t *data_level_start_index, \n                                                const scalar_t *data_sampling_loc,\n                                                const scalar_t *data_attn_weight,\n                                                const int batch_size, \n                                                const int spatial_size, \n                                                const int num_heads,\n                                                const int channels, \n                                                const int num_levels,\n                                                const int num_query,\n                                                const int num_point,\n                                                scalar_t *grad_value,\n                                                scalar_t *grad_sampling_loc,\n                                                scalar_t *grad_attn_weight)\n{\n  CUDA_KERNEL_LOOP(index, n)\n  {\n    __shared__ scalar_t cache_grad_sampling_loc[blockSize * 2];\n    __shared__ scalar_t cache_grad_attn_weight[blockSize];\n    unsigned int tid = threadIdx.x;\n    int _temp = index;\n    const int c_col = _temp % channels;\n    _temp /= channels;\n    const int sampling_index = _temp; \n    const int m_col = _temp % num_heads;\n    _temp /= num_heads;\n    const int q_col = _temp % num_query;\n    _temp /= num_query;\n    const int b_col = _temp;\n\n    const scalar_t top_grad = grad_col[index];\n\n    int data_weight_ptr = sampling_index * num_levels * num_point;\n    int data_loc_w_ptr = data_weight_ptr << 1;\n    const int grad_sampling_ptr = data_weight_ptr;\n    grad_sampling_loc += grad_sampling_ptr << 1;\n    grad_attn_weight += grad_sampling_ptr;\n    const int grad_weight_stride = 1;\n    const int grad_loc_stride = 2;\n    const int qid_stride = num_heads * channels;\n    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;\n\n    for (int l_col=0; l_col < num_levels; ++l_col)\n    {\n      const int level_start_id = data_level_start_index[l_col];\n      const int spatial_h_ptr = l_col << 1;\n      const int spatial_h = data_spatial_shapes[spatial_h_ptr];\n      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];\n      const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride;\n      const scalar_t *data_value_ptr = data_value + value_ptr_offset;\n      scalar_t *grad_value_ptr = grad_value + value_ptr_offset;\n\n      for (int p_col=0; p_col < num_point; ++p_col)\n      {\n        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];\n        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];\n        const scalar_t weight = data_attn_weight[data_weight_ptr];\n\n        const scalar_t h_im = loc_h * spatial_h - 0.5;\n        const scalar_t w_im = loc_w * spatial_w - 0.5;\n        *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0;\n        *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0;\n        *(cache_grad_attn_weight+threadIdx.x)=0;\n        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w)\n        {\n          ms_deform_attn_col2im_bilinear(\n            data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col,\n            top_grad, weight, grad_value_ptr, \n            cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x);\n        }\n        \n        __syncthreads();\n\n        for (unsigned int s=blockSize/2; s>0; s>>=1)\n        {\n          if (tid < s) {\n            const unsigned int xid1 = tid << 1;\n            const unsigned int xid2 = (tid + s) << 1;\n            cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + s];\n            cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2];\n            cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1];\n          }\n          __syncthreads();\n        }\n\n        if (tid == 0)\n        { \n          *grad_sampling_loc = cache_grad_sampling_loc[0];\n          *(grad_sampling_loc + 1) = cache_grad_sampling_loc[1];\n          *grad_attn_weight = cache_grad_attn_weight[0];\n        }\n        __syncthreads();\n\n        data_weight_ptr += 1;\n        data_loc_w_ptr += 2;\n        grad_attn_weight += grad_weight_stride;\n        grad_sampling_loc += grad_loc_stride;\n      }\n    }\n  }\n}\n\n\ntemplate <typename scalar_t>\n__global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v1(const int n,\n                                                const scalar_t *grad_col,\n                                                const scalar_t *data_value,\n                                                const int64_t *data_spatial_shapes,\n                                                const int64_t *data_level_start_index, \n                                                const scalar_t *data_sampling_loc,\n                                                const scalar_t *data_attn_weight,\n                                                const int batch_size, \n                                                const int spatial_size, \n                                                const int num_heads,\n                                                const int channels, \n                                                const int num_levels,\n                                                const int num_query,\n                                                const int num_point,\n                                                scalar_t *grad_value,\n                                                scalar_t *grad_sampling_loc,\n                                                scalar_t *grad_attn_weight)\n{\n  CUDA_KERNEL_LOOP(index, n)\n  {\n    extern __shared__ int _s[];\n    scalar_t* cache_grad_sampling_loc = (scalar_t*)_s;\n    scalar_t* cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x;\n    unsigned int tid = threadIdx.x;\n    int _temp = index;\n    const int c_col = _temp % channels;\n    _temp /= channels;\n    const int sampling_index = _temp; \n    const int m_col = _temp % num_heads;\n    _temp /= num_heads;\n    const int q_col = _temp % num_query;\n    _temp /= num_query;\n    const int b_col = _temp;\n\n    const scalar_t top_grad = grad_col[index];\n\n    int data_weight_ptr = sampling_index * num_levels * num_point;\n    int data_loc_w_ptr = data_weight_ptr << 1;\n    const int grad_sampling_ptr = data_weight_ptr;\n    grad_sampling_loc += grad_sampling_ptr << 1;\n    grad_attn_weight += grad_sampling_ptr;\n    const int grad_weight_stride = 1;\n    const int grad_loc_stride = 2;\n    const int qid_stride = num_heads * channels;\n    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;\n\n    for (int l_col=0; l_col < num_levels; ++l_col)\n    {\n      const int level_start_id = data_level_start_index[l_col];\n      const int spatial_h_ptr = l_col << 1;\n      const int spatial_h = data_spatial_shapes[spatial_h_ptr];\n      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];\n      const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride;\n      const scalar_t *data_value_ptr = data_value + value_ptr_offset;\n      scalar_t *grad_value_ptr = grad_value + value_ptr_offset;\n\n      for (int p_col=0; p_col < num_point; ++p_col)\n      {\n        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];\n        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];\n        const scalar_t weight = data_attn_weight[data_weight_ptr];\n\n        const scalar_t h_im = loc_h * spatial_h - 0.5;\n        const scalar_t w_im = loc_w * spatial_w - 0.5;\n        *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0;\n        *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0;\n        *(cache_grad_attn_weight+threadIdx.x)=0;\n        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w)\n        {\n          ms_deform_attn_col2im_bilinear(\n            data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col,\n            top_grad, weight, grad_value_ptr, \n            cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x);\n        }\n        \n        __syncthreads();\n        if (tid == 0)\n        {\n          scalar_t _grad_w=cache_grad_sampling_loc[0], _grad_h=cache_grad_sampling_loc[1], _grad_a=cache_grad_attn_weight[0];\n          int sid=2;\n          for (unsigned int tid = 1; tid < blockDim.x; ++tid)\n          {\n            _grad_w += cache_grad_sampling_loc[sid];\n            _grad_h += cache_grad_sampling_loc[sid + 1];\n            _grad_a += cache_grad_attn_weight[tid];\n            sid += 2;\n          }\n          \n          \n          *grad_sampling_loc = _grad_w;\n          *(grad_sampling_loc + 1) = _grad_h;\n          *grad_attn_weight = _grad_a;\n        }\n        __syncthreads();\n\n        data_weight_ptr += 1;\n        data_loc_w_ptr += 2;\n        grad_attn_weight += grad_weight_stride;\n        grad_sampling_loc += grad_loc_stride;\n      }\n    }\n  }\n}\n\ntemplate <typename scalar_t>\n__global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v2(const int n,\n                                                const scalar_t *grad_col,\n                                                const scalar_t *data_value,\n                                                const int64_t *data_spatial_shapes,\n                                                const int64_t *data_level_start_index, \n                                                const scalar_t *data_sampling_loc,\n                                                const scalar_t *data_attn_weight,\n                                                const int batch_size, \n                                                const int spatial_size, \n                                                const int num_heads,\n                                                const int channels, \n                                                const int num_levels,\n                                                const int num_query,\n                                                const int num_point,\n                                                scalar_t *grad_value,\n                                                scalar_t *grad_sampling_loc,\n                                                scalar_t *grad_attn_weight)\n{\n  CUDA_KERNEL_LOOP(index, n)\n  {\n    extern __shared__ int _s[];\n    scalar_t* cache_grad_sampling_loc = (scalar_t*)_s;\n    scalar_t* cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x;\n    unsigned int tid = threadIdx.x;\n    int _temp = index;\n    const int c_col = _temp % channels;\n    _temp /= channels;\n    const int sampling_index = _temp; \n    const int m_col = _temp % num_heads;\n    _temp /= num_heads;\n    const int q_col = _temp % num_query;\n    _temp /= num_query;\n    const int b_col = _temp;\n\n    const scalar_t top_grad = grad_col[index];\n\n    int data_weight_ptr = sampling_index * num_levels * num_point;\n    int data_loc_w_ptr = data_weight_ptr << 1;\n    const int grad_sampling_ptr = data_weight_ptr;\n    grad_sampling_loc += grad_sampling_ptr << 1;\n    grad_attn_weight += grad_sampling_ptr;\n    const int grad_weight_stride = 1;\n    const int grad_loc_stride = 2;\n    const int qid_stride = num_heads * channels;\n    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;\n\n    for (int l_col=0; l_col < num_levels; ++l_col)\n    {\n      const int level_start_id = data_level_start_index[l_col];\n      const int spatial_h_ptr = l_col << 1;\n      const int spatial_h = data_spatial_shapes[spatial_h_ptr];\n      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];\n      const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride;\n      const scalar_t *data_value_ptr = data_value + value_ptr_offset;\n      scalar_t *grad_value_ptr = grad_value + value_ptr_offset;\n\n      for (int p_col=0; p_col < num_point; ++p_col)\n      {\n        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];\n        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];\n        const scalar_t weight = data_attn_weight[data_weight_ptr];\n\n        const scalar_t h_im = loc_h * spatial_h - 0.5;\n        const scalar_t w_im = loc_w * spatial_w - 0.5;\n        *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0;\n        *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0;\n        *(cache_grad_attn_weight+threadIdx.x)=0;\n        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w)\n        {\n          ms_deform_attn_col2im_bilinear(\n            data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col,\n            top_grad, weight, grad_value_ptr, \n            cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x);\n        }\n        \n        __syncthreads();\n\n        for (unsigned int s=blockDim.x/2, spre=blockDim.x; s>0; s>>=1, spre>>=1)\n        {\n          if (tid < s) {\n            const unsigned int xid1 = tid << 1;\n            const unsigned int xid2 = (tid + s) << 1;\n            cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + s];\n            cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2];\n            cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1];\n            if (tid + (s << 1) < spre)\n            {\n              cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + (s << 1)];\n              cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2 + (s << 1)];\n              cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1 + (s << 1)];\n            } \n          }\n          __syncthreads();\n        }\n\n        if (tid == 0)\n        {\n          *grad_sampling_loc = cache_grad_sampling_loc[0];\n          *(grad_sampling_loc + 1) = cache_grad_sampling_loc[1];\n          *grad_attn_weight = cache_grad_attn_weight[0];\n        }\n        __syncthreads();\n\n        data_weight_ptr += 1;\n        data_loc_w_ptr += 2;\n        grad_attn_weight += grad_weight_stride;\n        grad_sampling_loc += grad_loc_stride;\n      }\n    }\n  }\n}\n\ntemplate <typename scalar_t>\n__global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v2_multi_blocks(const int n,\n                                                const scalar_t *grad_col,\n                                                const scalar_t *data_value,\n                                                const int64_t *data_spatial_shapes,\n                                                const int64_t *data_level_start_index, \n                                                const scalar_t *data_sampling_loc,\n                                                const scalar_t *data_attn_weight,\n                                                const int batch_size, \n                                                const int spatial_size, \n                                                const int num_heads,\n                                                const int channels, \n                                                const int num_levels,\n                                                const int num_query,\n                                                const int num_point,\n                                                scalar_t *grad_value,\n                                                scalar_t *grad_sampling_loc,\n                                                scalar_t *grad_attn_weight)\n{\n  CUDA_KERNEL_LOOP(index, n)\n  {\n    extern __shared__ int _s[];\n    scalar_t* cache_grad_sampling_loc = (scalar_t*)_s;\n    scalar_t* cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x;\n    unsigned int tid = threadIdx.x;\n    int _temp = index;\n    const int c_col = _temp % channels;\n    _temp /= channels;\n    const int sampling_index = _temp; \n    const int m_col = _temp % num_heads;\n    _temp /= num_heads;\n    const int q_col = _temp % num_query;\n    _temp /= num_query;\n    const int b_col = _temp;\n\n    const scalar_t top_grad = grad_col[index];\n\n    int data_weight_ptr = sampling_index * num_levels * num_point;\n    int data_loc_w_ptr = data_weight_ptr << 1;\n    const int grad_sampling_ptr = data_weight_ptr;\n    grad_sampling_loc += grad_sampling_ptr << 1;\n    grad_attn_weight += grad_sampling_ptr;\n    const int grad_weight_stride = 1;\n    const int grad_loc_stride = 2;\n    const int qid_stride = num_heads * channels;\n    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;\n\n    for (int l_col=0; l_col < num_levels; ++l_col)\n    {\n      const int level_start_id = data_level_start_index[l_col];\n      const int spatial_h_ptr = l_col << 1;\n      const int spatial_h = data_spatial_shapes[spatial_h_ptr];\n      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];\n      const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride;\n      const scalar_t *data_value_ptr = data_value + value_ptr_offset;\n      scalar_t *grad_value_ptr = grad_value + value_ptr_offset;\n\n      for (int p_col=0; p_col < num_point; ++p_col)\n      {\n        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];\n        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];\n        const scalar_t weight = data_attn_weight[data_weight_ptr];\n\n        const scalar_t h_im = loc_h * spatial_h - 0.5;\n        const scalar_t w_im = loc_w * spatial_w - 0.5;\n        *(cache_grad_sampling_loc+(threadIdx.x << 1)) = 0;\n        *(cache_grad_sampling_loc+((threadIdx.x << 1) + 1)) = 0;\n        *(cache_grad_attn_weight+threadIdx.x)=0;\n        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w)\n        {\n          ms_deform_attn_col2im_bilinear(\n            data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col,\n            top_grad, weight, grad_value_ptr, \n            cache_grad_sampling_loc+(threadIdx.x << 1), cache_grad_attn_weight+threadIdx.x);\n        }\n        \n        __syncthreads();\n\n        for (unsigned int s=blockDim.x/2, spre=blockDim.x; s>0; s>>=1, spre>>=1)\n        {\n          if (tid < s) {\n            const unsigned int xid1 = tid << 1;\n            const unsigned int xid2 = (tid + s) << 1;\n            cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + s];\n            cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2];\n            cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1];\n            if (tid + (s << 1) < spre)\n            {\n              cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + (s << 1)];\n              cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2 + (s << 1)];\n              cache_grad_sampling_loc[xid1 + 1] += cache_grad_sampling_loc[xid2 + 1 + (s << 1)];\n            }\n          }\n          __syncthreads();\n        }\n\n        if (tid == 0)\n        {\n          atomicAdd(grad_sampling_loc, cache_grad_sampling_loc[0]);\n          atomicAdd(grad_sampling_loc + 1, cache_grad_sampling_loc[1]);\n          atomicAdd(grad_attn_weight, cache_grad_attn_weight[0]);\n        }\n        __syncthreads();\n\n        data_weight_ptr += 1;\n        data_loc_w_ptr += 2;\n        grad_attn_weight += grad_weight_stride;\n        grad_sampling_loc += grad_loc_stride;\n      }\n    }\n  }\n}\n\n\ntemplate <typename scalar_t>\n__global__ void ms_deformable_col2im_gpu_kernel_gm(const int n,\n                                                const scalar_t *grad_col,\n                                                const scalar_t *data_value,\n                                                const int64_t *data_spatial_shapes,\n                                                const int64_t *data_level_start_index, \n                                                const scalar_t *data_sampling_loc,\n                                                const scalar_t *data_attn_weight,\n                                                const int batch_size, \n                                                const int spatial_size, \n                                                const int num_heads,\n                                                const int channels, \n                                                const int num_levels,\n                                                const int num_query,\n                                                const int num_point,\n                                                scalar_t *grad_value,\n                                                scalar_t *grad_sampling_loc,\n                                                scalar_t *grad_attn_weight)\n{\n  CUDA_KERNEL_LOOP(index, n)\n  {\n    int _temp = index;\n    const int c_col = _temp % channels;\n    _temp /= channels;\n    const int sampling_index = _temp; \n    const int m_col = _temp % num_heads;\n    _temp /= num_heads;\n    const int q_col = _temp % num_query;\n    _temp /= num_query;\n    const int b_col = _temp;\n\n    const scalar_t top_grad = grad_col[index];\n\n    int data_weight_ptr = sampling_index * num_levels * num_point;\n    int data_loc_w_ptr = data_weight_ptr << 1;\n    const int grad_sampling_ptr = data_weight_ptr;\n    grad_sampling_loc += grad_sampling_ptr << 1;\n    grad_attn_weight += grad_sampling_ptr;\n    const int grad_weight_stride = 1;\n    const int grad_loc_stride = 2;\n    const int qid_stride = num_heads * channels;\n    const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;\n\n    for (int l_col=0; l_col < num_levels; ++l_col)\n    {\n      const int level_start_id = data_level_start_index[l_col];\n      const int spatial_h_ptr = l_col << 1;\n      const int spatial_h = data_spatial_shapes[spatial_h_ptr];\n      const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];\n      const int value_ptr_offset = data_value_ptr_init_offset + level_start_id * qid_stride;\n      const scalar_t *data_value_ptr = data_value + value_ptr_offset;\n      scalar_t *grad_value_ptr = grad_value + value_ptr_offset;\n\n      for (int p_col=0; p_col < num_point; ++p_col)\n      {\n        const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];\n        const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];\n        const scalar_t weight = data_attn_weight[data_weight_ptr];\n\n        const scalar_t h_im = loc_h * spatial_h - 0.5;\n        const scalar_t w_im = loc_w * spatial_w - 0.5;\n        if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w)\n        {\n          ms_deform_attn_col2im_bilinear_gm(\n            data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col,\n            top_grad, weight, grad_value_ptr, \n            grad_sampling_loc, grad_attn_weight);\n        }\n        data_weight_ptr += 1;\n        data_loc_w_ptr += 2;\n        grad_attn_weight += grad_weight_stride;\n        grad_sampling_loc += grad_loc_stride;\n      }\n    }\n  }\n}\n\n\ntemplate <typename scalar_t>\nvoid ms_deformable_im2col_cuda(cudaStream_t stream,\n                              const scalar_t* data_value,\n                              const int64_t* data_spatial_shapes, \n                              const int64_t* data_level_start_index, \n                              const scalar_t* data_sampling_loc,\n                              const scalar_t* data_attn_weight,\n                              const int batch_size,\n                              const int spatial_size, \n                              const int num_heads, \n                              const int channels, \n                              const int num_levels, \n                              const int num_query,\n                              const int num_point,\n                              scalar_t* data_col)\n{\n  const int num_kernels = batch_size * num_query * num_heads * channels;\n  const int num_actual_kernels = batch_size * num_query * num_heads * channels;\n  const int num_threads = CUDA_NUM_THREADS;\n  ms_deformable_im2col_gpu_kernel<scalar_t>\n      <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,\n          0, stream>>>(\n      num_kernels, data_value, data_spatial_shapes, data_level_start_index, data_sampling_loc, data_attn_weight, \n      batch_size, spatial_size, num_heads, channels, num_levels, num_query, num_point, data_col);\n  \n  cudaError_t err = cudaGetLastError();\n  if (err != cudaSuccess)\n  {\n    printf(\"error in ms_deformable_im2col_cuda: %s\\n\", cudaGetErrorString(err));\n  }\n\n}\n\ntemplate <typename scalar_t>\nvoid ms_deformable_col2im_cuda(cudaStream_t stream,\n                              const scalar_t* grad_col,\n                              const scalar_t* data_value,\n                              const int64_t * data_spatial_shapes,\n                              const int64_t * data_level_start_index,\n                              const scalar_t * data_sampling_loc,\n                              const scalar_t * data_attn_weight,\n                              const int batch_size, \n                              const int spatial_size, \n                              const int num_heads,\n                              const int channels, \n                              const int num_levels,\n                              const int num_query,\n                              const int num_point, \n                              scalar_t* grad_value,\n                              scalar_t* grad_sampling_loc,\n                              scalar_t* grad_attn_weight)\n{\n  const int num_threads = (channels > CUDA_NUM_THREADS)?CUDA_NUM_THREADS:channels;\n  const int num_kernels = batch_size * num_query * num_heads * channels;\n  const int num_actual_kernels = batch_size * num_query * num_heads * channels;\n  if (channels > 1024)\n  {\n    if ((channels & 1023) == 0)\n    {\n      ms_deformable_col2im_gpu_kernel_shm_reduce_v2_multi_blocks<scalar_t>\n          <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,\n              num_threads*3*sizeof(scalar_t), stream>>>(\n                        num_kernels, \n                        grad_col,\n                        data_value,\n                        data_spatial_shapes,\n                        data_level_start_index, \n                        data_sampling_loc,\n                        data_attn_weight,\n                        batch_size, \n                        spatial_size, \n                        num_heads,\n                        channels, \n                        num_levels,\n                        num_query,\n                        num_point,\n                        grad_value,\n                        grad_sampling_loc,\n                        grad_attn_weight);\n    }\n    else\n    {\n      ms_deformable_col2im_gpu_kernel_gm<scalar_t>\n        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,\n            0, stream>>>(\n                      num_kernels, \n                      grad_col,\n                      data_value,\n                      data_spatial_shapes,\n                      data_level_start_index, \n                      data_sampling_loc,\n                      data_attn_weight,\n                      batch_size, \n                      spatial_size, \n                      num_heads,\n                      channels, \n                      num_levels,\n                      num_query,\n                      num_point,\n                      grad_value,\n                      grad_sampling_loc,\n                      grad_attn_weight);\n    }\n  }\n  else{\n    switch(channels)\n    {\n      case 1:\n        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t, 1>\n        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,\n            0, stream>>>(\n                      num_kernels, \n                      grad_col,\n                      data_value,\n                      data_spatial_shapes,\n                      data_level_start_index, \n                      data_sampling_loc,\n                      data_attn_weight,\n                      batch_size, \n                      spatial_size, \n                      num_heads,\n                      channels, \n                      num_levels,\n                      num_query,\n                      num_point,\n                      grad_value,\n                      grad_sampling_loc,\n                      grad_attn_weight);\n        break;\n      case 2:\n        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t, 2>\n        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,\n            0, stream>>>(\n                      num_kernels, \n                      grad_col,\n                      data_value,\n                      data_spatial_shapes,\n                      data_level_start_index, \n                      data_sampling_loc,\n                      data_attn_weight,\n                      batch_size, \n                      spatial_size, \n                      num_heads,\n                      channels, \n                      num_levels,\n                      num_query,\n                      num_point,\n                      grad_value,\n                      grad_sampling_loc,\n                      grad_attn_weight);\n        break;\n      case 4:\n        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t, 4>\n        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,\n            0, stream>>>(\n                      num_kernels, \n                      grad_col,\n                      data_value,\n                      data_spatial_shapes,\n                      data_level_start_index, \n                      data_sampling_loc,\n                      data_attn_weight,\n                      batch_size, \n                      spatial_size, \n                      num_heads,\n                      channels, \n                      num_levels,\n                      num_query,\n                      num_point,\n                      grad_value,\n                      grad_sampling_loc,\n                      grad_attn_weight);\n        break;\n      case 8:\n        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t, 8>\n        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,\n            0, stream>>>(\n                      num_kernels, \n                      grad_col,\n                      data_value,\n                      data_spatial_shapes,\n                      data_level_start_index, \n                      data_sampling_loc,\n                      data_attn_weight,\n                      batch_size, \n                      spatial_size, \n                      num_heads,\n                      channels, \n                      num_levels,\n                      num_query,\n                      num_point,\n                      grad_value,\n                      grad_sampling_loc,\n                      grad_attn_weight);\n        break;\n      case 16:\n        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t, 16>\n        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,\n            0, stream>>>(\n                      num_kernels, \n                      grad_col,\n                      data_value,\n                      data_spatial_shapes,\n                      data_level_start_index, \n                      data_sampling_loc,\n                      data_attn_weight,\n                      batch_size, \n                      spatial_size, \n                      num_heads,\n                      channels, \n                      num_levels,\n                      num_query,\n                      num_point,\n                      grad_value,\n                      grad_sampling_loc,\n                      grad_attn_weight);\n        break;\n      case 32:\n        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1<scalar_t, 32>\n        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,\n            0, stream>>>(\n                      num_kernels, \n                      grad_col,\n                      data_value,\n                      data_spatial_shapes,\n                      data_level_start_index, \n                      data_sampling_loc,\n                      data_attn_weight,\n                      batch_size, \n                      spatial_size, \n                      num_heads,\n                      channels, \n                      num_levels,\n                      num_query,\n                      num_point,\n                      grad_value,\n                      grad_sampling_loc,\n                      grad_attn_weight);\n        break;\n      case 64:\n        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2<scalar_t, 64>\n        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,\n            0, stream>>>(\n                      num_kernels, \n                      grad_col,\n                      data_value,\n                      data_spatial_shapes,\n                      data_level_start_index, \n                      data_sampling_loc,\n                      data_attn_weight,\n                      batch_size, \n                      spatial_size, \n                      num_heads,\n                      channels, \n                      num_levels,\n                      num_query,\n                      num_point,\n                      grad_value,\n                      grad_sampling_loc,\n                      grad_attn_weight);\n        break;\n      case 128:\n        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2<scalar_t, 128>\n        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,\n            0, stream>>>(\n                      num_kernels, \n                      grad_col,\n                      data_value,\n                      data_spatial_shapes,\n                      data_level_start_index, \n                      data_sampling_loc,\n                      data_attn_weight,\n                      batch_size, \n                      spatial_size, \n                      num_heads,\n                      channels, \n                      num_levels,\n                      num_query,\n                      num_point,\n                      grad_value,\n                      grad_sampling_loc,\n                      grad_attn_weight);\n        break;\n      case 256:\n        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2<scalar_t, 256>\n        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,\n            0, stream>>>(\n                      num_kernels, \n                      grad_col,\n                      data_value,\n                      data_spatial_shapes,\n                      data_level_start_index, \n                      data_sampling_loc,\n                      data_attn_weight,\n                      batch_size, \n                      spatial_size, \n                      num_heads,\n                      channels, \n                      num_levels,\n                      num_query,\n                      num_point,\n                      grad_value,\n                      grad_sampling_loc,\n                      grad_attn_weight);\n        break;\n      case 512:\n        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2<scalar_t, 512>\n        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,\n            0, stream>>>(\n                      num_kernels, \n                      grad_col,\n                      data_value,\n                      data_spatial_shapes,\n                      data_level_start_index, \n                      data_sampling_loc,\n                      data_attn_weight,\n                      batch_size, \n                      spatial_size, \n                      num_heads,\n                      channels, \n                      num_levels,\n                      num_query,\n                      num_point,\n                      grad_value,\n                      grad_sampling_loc,\n                      grad_attn_weight);\n        break;\n      case 1024:\n        ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2<scalar_t, 1024>\n        <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,\n            0, stream>>>(\n                      num_kernels, \n                      grad_col,\n                      data_value,\n                      data_spatial_shapes,\n                      data_level_start_index, \n                      data_sampling_loc,\n                      data_attn_weight,\n                      batch_size, \n                      spatial_size, \n                      num_heads,\n                      channels, \n                      num_levels,\n                      num_query,\n                      num_point,\n                      grad_value,\n                      grad_sampling_loc,\n                      grad_attn_weight);\n        break;\n      default:\n        if (channels < 64)\n        {\n          ms_deformable_col2im_gpu_kernel_shm_reduce_v1<scalar_t>\n          <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,\n              num_threads*3*sizeof(scalar_t), stream>>>(\n                        num_kernels, \n                        grad_col,\n                        data_value,\n                        data_spatial_shapes,\n                        data_level_start_index, \n                        data_sampling_loc,\n                        data_attn_weight,\n                        batch_size, \n                        spatial_size, \n                        num_heads,\n                        channels, \n                        num_levels,\n                        num_query,\n                        num_point,\n                        grad_value,\n                        grad_sampling_loc,\n                        grad_attn_weight);\n        }\n        else\n        {\n          ms_deformable_col2im_gpu_kernel_shm_reduce_v2<scalar_t>\n          <<<GET_BLOCKS(num_actual_kernels, num_threads), num_threads,\n              num_threads*3*sizeof(scalar_t), stream>>>(\n                        num_kernels, \n                        grad_col,\n                        data_value,\n                        data_spatial_shapes,\n                        data_level_start_index, \n                        data_sampling_loc,\n                        data_attn_weight,\n                        batch_size, \n                        spatial_size, \n                        num_heads,\n                        channels, \n                        num_levels,\n                        num_query,\n                        num_point,\n                        grad_value,\n                        grad_sampling_loc,\n                        grad_attn_weight);\n        }\n    }\n  }\n  cudaError_t err = cudaGetLastError();\n  if (err != cudaSuccess)\n  {\n    printf(\"error in ms_deformable_col2im_cuda: %s\\n\", cudaGetErrorString(err));\n  }\n\n}"
  },
  {
    "path": "mmdet3d/models/utils/ops/src/ms_deform_attn.h",
    "content": "/*!\n**************************************************************************************************\n* Deformable DETR\n* Copyright (c) 2020 SenseTime. All Rights Reserved.\n* Licensed under the Apache License, Version 2.0 [see LICENSE for details]\n**************************************************************************************************\n* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0\n**************************************************************************************************\n*/\n\n#pragma once\n\n#include \"cpu/ms_deform_attn_cpu.h\"\n\n#ifdef WITH_CUDA\n#include \"cuda/ms_deform_attn_cuda.h\"\n#endif\n\n\nat::Tensor\nms_deform_attn_forward(\n    const at::Tensor &value, \n    const at::Tensor &spatial_shapes,\n    const at::Tensor &level_start_index,\n    const at::Tensor &sampling_loc,\n    const at::Tensor &attn_weight,\n    const int im2col_step)\n{\n    if (value.type().is_cuda())\n    {\n#ifdef WITH_CUDA\n        return ms_deform_attn_cuda_forward(\n            value, spatial_shapes, level_start_index, sampling_loc, attn_weight, im2col_step);\n#else\n        AT_ERROR(\"Not compiled with GPU support\");\n#endif\n    }\n    AT_ERROR(\"Not implemented on the CPU\");\n}\n\nstd::vector<at::Tensor>\nms_deform_attn_backward(\n    const at::Tensor &value, \n    const at::Tensor &spatial_shapes,\n    const at::Tensor &level_start_index,\n    const at::Tensor &sampling_loc,\n    const at::Tensor &attn_weight,\n    const at::Tensor &grad_output,\n    const int im2col_step)\n{\n    if (value.type().is_cuda())\n    {\n#ifdef WITH_CUDA\n        return ms_deform_attn_cuda_backward(\n            value, spatial_shapes, level_start_index, sampling_loc, attn_weight, grad_output, im2col_step);\n#else\n        AT_ERROR(\"Not compiled with GPU support\");\n#endif\n    }\n    AT_ERROR(\"Not implemented on the CPU\");\n}\n\n"
  },
  {
    "path": "mmdet3d/models/utils/ops/src/vision.cpp",
    "content": "/*!\n**************************************************************************************************\n* Deformable DETR\n* Copyright (c) 2020 SenseTime. All Rights Reserved.\n* Licensed under the Apache License, Version 2.0 [see LICENSE for details]\n**************************************************************************************************\n* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0\n**************************************************************************************************\n*/\n\n#include \"ms_deform_attn.h\"\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n  m.def(\"ms_deform_attn_forward\", &ms_deform_attn_forward, \"ms_deform_attn_forward\");\n  m.def(\"ms_deform_attn_backward\", &ms_deform_attn_backward, \"ms_deform_attn_backward\");\n}\n"
  },
  {
    "path": "mmdet3d/models/utils/ops/test.py",
    "content": "# ------------------------------------------------------------------------------------------------\n# Deformable DETR\n# Copyright (c) 2020 SenseTime. All Rights Reserved.\n# Licensed under the Apache License, Version 2.0 [see LICENSE for details]\n# ------------------------------------------------------------------------------------------------\n# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0\n# ------------------------------------------------------------------------------------------------\n\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ import division\n\nimport time\nimport torch\nimport torch.nn as nn\nfrom torch.autograd import gradcheck\n\nfrom functions.ms_deform_attn_func import MSDeformAttnFunction, ms_deform_attn_core_pytorch\n\n\nN, M, D = 1, 2, 2\nLq, L, P = 2, 2, 2\nshapes = torch.as_tensor([(6, 4), (3, 2)], dtype=torch.long).cuda()\nlevel_start_index = torch.cat((shapes.new_zeros((1, )), shapes.prod(1).cumsum(0)[:-1]))\nS = sum([(H*W).item() for H, W in shapes])\n\n\ntorch.manual_seed(3)\n\n\n@torch.no_grad()\ndef check_forward_equal_with_pytorch_double():\n    value = torch.rand(N, S, M, D).cuda() * 0.01\n    sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()\n    attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5\n    attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True)\n    im2col_step = 2\n    output_pytorch = ms_deform_attn_core_pytorch(value.double(), shapes, sampling_locations.double(), attention_weights.double()).detach().cpu()\n    output_cuda = MSDeformAttnFunction.apply(value.double(), shapes, level_start_index, sampling_locations.double(), attention_weights.double(), im2col_step).detach().cpu()\n    fwdok = torch.allclose(output_cuda, output_pytorch)\n    max_abs_err = (output_cuda - output_pytorch).abs().max()\n    max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max()\n\n    print(f'* {fwdok} check_forward_equal_with_pytorch_double: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}')\n\n\n@torch.no_grad()\ndef check_forward_equal_with_pytorch_float():\n    value = torch.rand(N, S, M, D).cuda() * 0.01\n    sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()\n    attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5\n    attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True)\n    im2col_step = 2\n    output_pytorch = ms_deform_attn_core_pytorch(value, shapes, sampling_locations, attention_weights).detach().cpu()\n    output_cuda = MSDeformAttnFunction.apply(value, shapes, level_start_index, sampling_locations, attention_weights, im2col_step).detach().cpu()\n    fwdok = torch.allclose(output_cuda, output_pytorch, rtol=1e-2, atol=1e-3)\n    max_abs_err = (output_cuda - output_pytorch).abs().max()\n    max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max()\n\n    print(f'* {fwdok} check_forward_equal_with_pytorch_float: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}')\n\n\ndef check_gradient_numerical(channels=4, grad_value=True, grad_sampling_loc=True, grad_attn_weight=True):\n\n    value = torch.rand(N, S, M, channels).cuda() * 0.01\n    sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()\n    attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5\n    attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True)\n    im2col_step = 2\n    func = MSDeformAttnFunction.apply\n\n    value.requires_grad = grad_value\n    sampling_locations.requires_grad = grad_sampling_loc\n    attention_weights.requires_grad = grad_attn_weight\n\n    gradok = gradcheck(func, (value.double(), shapes, level_start_index, sampling_locations.double(), attention_weights.double(), im2col_step))\n\n    print(f'* {gradok} check_gradient_numerical(D={channels})')\n\n\nif __name__ == '__main__':\n    check_forward_equal_with_pytorch_double()\n    check_forward_equal_with_pytorch_float()\n\n    for channels in [30, 32, 64, 71, 1025, 2048, 3096]:\n        check_gradient_numerical(channels, True, True, True)\n\n\n\n"
  },
  {
    "path": "mmdet3d/models/utils/projection.py",
    "content": "import torch\r\nimport torch.nn as nn\r\nfrom mmdet3d.models.utils import PositionEmbeddingLearned\r\n\r\nclass PointProjection(nn.Module):\r\n    def __init__(self, pos_channel, hidden_channel):\r\n        super(PointProjection, self).__init__()\r\n        self.feat_proj = nn.Conv1d(hidden_channel, hidden_channel, kernel_size=1)\r\n        self.pos_embed = nn.Sequential(\r\n            nn.Conv1d(pos_channel, hidden_channel*4, kernel_size=1),\r\n            nn.ReLU(inplace=True),\r\n            nn.Conv1d(hidden_channel*4, hidden_channel, kernel_size=1)\r\n        )\r\n        self.fuse_proj = nn.Sequential(\r\n            nn.Conv1d(hidden_channel, hidden_channel, kernel_size=1),\r\n            nn.ReLU(inplace=True),\r\n            nn.Conv1d(hidden_channel, hidden_channel, kernel_size=1)\r\n        )\r\n\r\n    def forward(self, query_feat, query_pos):\r\n        pos_embed = self.pos_embed(query_pos.permute(0, 2, 1))\r\n        feat_embed = self.feat_proj(query_feat)\r\n        proj_embed = self.fuse_proj(feat_embed + pos_embed)\r\n        return proj_embed\r\n\r\nclass ImageProjection(nn.Module):\r\n    def __init__(self, pos_channel, hidden_channel):\r\n        super(ImageProjection, self).__init__()\r\n        self.feat_proj = nn.Conv1d(hidden_channel, hidden_channel, kernel_size=1)\r\n        self.pos_proj = nn.Sequential(\r\n            nn.Conv1d(pos_channel, hidden_channel*4, kernel_size=1),\r\n            nn.ReLU(inplace=True),\r\n            nn.Conv1d(hidden_channel*4, hidden_channel, kernel_size=1),\r\n        )\r\n        self.fuse_proj = nn.Sequential(\r\n            nn.Conv1d(hidden_channel, hidden_channel, kernel_size=1),\r\n            nn.ReLU(inplace=True),\r\n            nn.Conv1d(hidden_channel, hidden_channel, kernel_size=1)\r\n        )\r\n\r\n    def forward(self, query_feat, query_pos):\r\n        feat_embed = self.feat_proj(query_feat)\r\n        pos_embed = self.pos_proj(query_pos.permute(0, 2, 1))\r\n        proj_embed = self.fuse_proj(feat_embed + pos_embed)\r\n        return proj_embed\r\n\r\n\r\nclass ProjectionL2Norm(nn.Module):\r\n    def __init__(self, hidden_channel):\r\n        super(ProjectionL2Norm, self).__init__()\r\n        self.hidden_channel = hidden_channel\r\n        self.feat_proj = nn.Conv1d(hidden_channel, hidden_channel, kernel_size=1)\r\n\r\n    def forward(self, query_feat):\r\n        query_feat = self.feat_proj(query_feat)\r\n        assert query_feat.shape[1] == self.hidden_channel\r\n        query_feat = query_feat / torch.norm(query_feat, p=2, keepdim=True, dim=1)\r\n        return query_feat\r\n\r\nclass ProjectionLayerNorm(nn.Module):\r\n    def __init__(self, hidden_channel, norm=True, input_channel=None):\r\n        super(ProjectionLayerNorm, self).__init__()\r\n        if input_channel is None:\r\n            input_channel = hidden_channel\r\n        self.hidden_channel = hidden_channel\r\n        self.feat_proj = nn.Linear(input_channel, hidden_channel)\r\n        self.norm = norm\r\n        if norm:\r\n            self.norm = nn.LayerNorm(hidden_channel)\r\n\r\n    def forward(self, query_feat):\r\n        query_feat = query_feat.transpose(2, 1)\r\n        query_feat = self.feat_proj(query_feat)\r\n        if self.norm:\r\n            query_feat = self.norm(query_feat)\r\n        query_feat = query_feat.transpose(2, 1)\r\n        return query_feat\r\n\r\nclass Projection_wPos(nn.Module):\r\n    def __init__(self, hidden_channel, pos_embed):\r\n        super(Projection_wPos, self).__init__()\r\n        self.hidden_channel = hidden_channel\r\n        self.pos_proj = pos_embed\r\n        self.feat_proj = ProjectionLayerNorm(hidden_channel)\r\n\r\n    def forward(self, query_feat, query_pos):\r\n        feat_embed = self.feat_proj(query_feat)\r\n        pos_embed = self.pos_proj(query_pos)\r\n        return feat_embed + pos_embed\r\n"
  },
  {
    "path": "mmdet3d/models/utils/sparsefusion_models.py",
    "content": "import copy\r\nimport numpy as np\r\nimport torch\r\nfrom torch import nn\r\nimport torch.nn.functional as F\r\n\r\nfrom mmdet3d.models.fusion_layers import apply_3d_transformation\r\n\r\nfrom mmdet3d.models.utils import TransformerDecoderLayer, inverse_sigmoid\r\nfrom mmdet3d.models.utils.deformable_decoder import DeformableTransformerDecoderLayer\r\nfrom mmdet3d.models.utils.network_modules import LayerNorm, denormalize_pos, normalize_pos\r\n\r\n\r\nclass PointTransformer2D_3D(nn.Module):\r\n    def __init__(self, hidden_channel, num_heads, num_decoder_layers, prediction_heads, ffn_channel, dropout, activation, test_cfg, query_pos, key_pos):\r\n        super(PointTransformer2D_3D, self).__init__()\r\n        self.hidden_channel = hidden_channel\r\n        self.num_heads = num_heads\r\n        self.num_decoder_layers = num_decoder_layers\r\n        self.prediction_heads = prediction_heads\r\n        self.test_cfg = test_cfg\r\n\r\n        self.decoder = nn.ModuleList()\r\n        for i in range(self.num_decoder_layers):\r\n            self.decoder.append(\r\n                TransformerDecoderLayer(\r\n                    hidden_channel, num_heads, ffn_channel, dropout, activation,\r\n                    self_posembed=query_pos[i],\r\n                    cross_posembed=key_pos[i],\r\n                )\r\n            )\r\n\r\n    def forward(self, pts_query_feat, pts_query_pos, lidar_feat_flatten, bev_pos):\r\n        ret_dicts = []\r\n        res_layer = self.prediction_heads(pts_query_feat)\r\n        res_layer['center'] = pts_query_pos.permute(0, 2, 1)  # [BS, 2, num_proposals]\r\n\r\n        for i in range(self.num_decoder_layers):\r\n            # Transformer Decoder Layer\r\n            # :param query: B C Pq    :param query_pos: B Pq 3/6\r\n            pts_query_feat = self.decoder[i](pts_query_feat, lidar_feat_flatten, pts_query_pos, bev_pos)\r\n\r\n            # Prediction\r\n            res_layer = self.prediction_heads(pts_query_feat)\r\n            res_layer['center'] = res_layer['center'] + pts_query_pos.permute(0, 2, 1)\r\n\r\n            ret_dicts.append(res_layer)\r\n            # for next level positional embedding\r\n            pts_query_pos = res_layer['center'].detach().clone().permute(0, 2, 1)\r\n\r\n        return pts_query_feat, pts_query_pos, ret_dicts\r\n\r\n\r\nclass CameraSE(nn.Module):\r\n    def __init__(self, cam_dim, hidden_channel):\r\n        super(CameraSE, self).__init__()\r\n        self.bn = nn.BatchNorm1d(cam_dim)\r\n\r\n        self.hidden_channel = hidden_channel\r\n        self.mlp_depth = nn.Sequential(\r\n            nn.Conv1d(cam_dim, hidden_channel, kernel_size=1),\r\n            nn.ReLU(inplace=True),\r\n            nn.Conv1d(hidden_channel, hidden_channel, kernel_size=1),\r\n            nn.ReLU(inplace=True),\r\n            nn.Conv1d(hidden_channel, hidden_channel, kernel_size=1),\r\n        )\r\n\r\n    def forward(self, feat, cam_info):\r\n        cam_info_bn = self.bn(cam_info)\r\n        pred = feat * self.mlp_depth(cam_info_bn).sigmoid()\r\n        return pred\r\n\r\n\r\nclass ImageTransformer_Cam_3D_MS(nn.Module):\r\n    def __init__(self, num_views, hidden_channel, num_heads, num_decoder_layers, prediction_heads, out_size_factor_img,\r\n                 ffn_channel, dropout, activation, test_cfg, query_pos, key_pos):\r\n        super(ImageTransformer_Cam_3D_MS, self).__init__()\r\n        self.hidden_channel = hidden_channel\r\n        self.num_heads = num_heads\r\n        self.num_decoder_layers = num_decoder_layers\r\n        self.prediction_heads = prediction_heads\r\n        self.num_views = num_views\r\n        self.out_size_factor_img = out_size_factor_img\r\n        self.test_cfg = test_cfg\r\n        # self.use_camera = use_camera\r\n\r\n        self.decoder = nn.ModuleList()\r\n        for i in range(self.num_decoder_layers):\r\n            self.decoder.append(\r\n                DeformableTransformerDecoderLayer(\r\n                    hidden_channel, num_heads, dim_feedforward=ffn_channel, dropout=dropout, activation=activation,\r\n                    self_posembed=query_pos[i], cross_posembed=key_pos[i],\r\n                )\r\n            )\r\n\r\n        camera_dim = 16\r\n\r\n        # if use_camera == 'se':\r\n        #     self.camera_net = CameraSE(camera_dim, hidden_channel)\r\n\r\n    def forward(self, img_query_feat, normal_img_query_pos, img_query_view, img_feats, normal_img_feats_pos_stack, lidar2cam_rt, cam_intrinsic, img_metas, input_padding_mask=None):\r\n        num_img_proposals = img_query_feat.shape[-1]\r\n        level_num = len(img_feats)\r\n        batch_size = img_query_feat.shape[0]\r\n        img_feats_flatten = []\r\n        level_start_index = [0]\r\n        spatial_shapes = []\r\n        for lvl in range(level_num):\r\n            img_feat = img_feats[lvl]\r\n            h, w = img_feat.shape[-2], img_feat.shape[-1]\r\n            img_feat_flatten = img_feat.view(batch_size, self.num_views, self.hidden_channel, h*w)  # [bs, num_view, C, h*w]\r\n            img_feats_flatten.append(img_feat_flatten)\r\n            level_start_index.append(level_start_index[-1] + h*w)\r\n            spatial_shapes.append([h, w])\r\n        level_start_index = level_start_index[:-1]\r\n        level_start_index = torch.LongTensor(level_start_index).to(img_query_feat.device)\r\n        spatial_shapes = torch.LongTensor(spatial_shapes).to(img_query_feat.device)\r\n\r\n        img_feats_stack = torch.cat(img_feats_flatten, dim=3)  # [bs, num_view, C, h*w (sum)]\r\n        reference_points = normal_img_query_pos.sigmoid()  # [bs, num_img_proposal, 2]\r\n        reference_points = reference_points[:, :, None].repeat(1, 1, level_num, 1)\r\n\r\n        camera_info = torch.zeros([batch_size, 16, num_img_proposals]).to(img_query_feat.device)\r\n\r\n        camera_info[:, :9] = lidar2cam_rt[:, :, :3, :3].permute(0, 2, 3, 1).reshape(batch_size, 9, num_img_proposals)\r\n        camera_info[:, 9:12] = lidar2cam_rt[:, :, :3, 3].permute(0, 2, 1)\r\n        camera_info[:, 12] = cam_intrinsic[:, :, 0, 0]\r\n        camera_info[:, 13] = cam_intrinsic[:, :, 1, 1]\r\n        camera_info[:, 14:16] = cam_intrinsic[:, :, :2, 2].permute(0, 2, 1)\r\n\r\n        ret_dicts = []\r\n\r\n        for i in range(self.num_decoder_layers):\r\n            img_prev_query_feat = img_query_feat.clone()  # [BS, C, num_proposals]\r\n            img_query_feat = torch.zeros_like(img_query_feat)  # create new container for img query feature\r\n\r\n            for sample_idx in range(batch_size):\r\n                bincount = torch.bincount(img_query_view[sample_idx], minlength=self.num_views)\r\n                view_mask = bincount > 1\r\n                max_len = torch.max(bincount)\r\n                sample_query_feats = torch.zeros([self.num_views, self.hidden_channel, max_len]).type_as(camera_info)\r\n                samples_normal_query_pos = torch.zeros([self.num_views, max_len, 2]).type_as(camera_info)\r\n                sample_reference_points = torch.zeros([self.num_views, max_len, level_num, 2]).type_as(camera_info)\r\n                sample_padding_mask = torch.zeros([self.num_views, max_len], dtype=torch.bool, device=camera_info.device)\r\n                for view_idx in range(self.num_views):\r\n                    on_the_image = img_query_view[sample_idx] == view_idx  # [num_on_the_image, ]\r\n                    view_count = bincount[view_idx]\r\n                    if torch.sum(on_the_image) <= 1:\r\n                        continue\r\n\r\n                    sample_query_feats[view_idx, :, :view_count] = img_prev_query_feat[sample_idx, :, on_the_image]\r\n                    samples_normal_query_pos[view_idx, :view_count] = normal_img_query_pos[sample_idx, on_the_image]\r\n                    sample_reference_points[view_idx, :view_count] = reference_points[sample_idx, on_the_image]\r\n                    sample_padding_mask[view_idx, view_count:] = True\r\n\r\n                if input_padding_mask is None:\r\n                    sample_query_feats[view_mask] = self.decoder[i](\r\n                        sample_query_feats[view_mask], img_feats_stack[sample_idx, view_mask], samples_normal_query_pos[view_mask],\r\n                        normal_img_feats_pos_stack.repeat(view_mask.sum(), 1, 1), reference_points=sample_reference_points[view_mask],\r\n                        level_start_index=level_start_index, spatial_shapes=spatial_shapes,\r\n                        query_padding_mask=sample_padding_mask[view_mask]\r\n                    )\r\n                else:\r\n                    sample_query_feats[view_mask] = self.decoder[i](\r\n                        sample_query_feats[view_mask], img_feats_stack[sample_idx, view_mask], samples_normal_query_pos[view_mask],\r\n                        normal_img_feats_pos_stack.repeat(view_mask.sum(), 1, 1), reference_points=sample_reference_points[view_mask],\r\n                        level_start_index=level_start_index, spatial_shapes=spatial_shapes,\r\n                        query_padding_mask=sample_padding_mask[view_mask], input_padding_mask=input_padding_mask[sample_idx,view_mask]\r\n                    )\r\n\r\n                for view_idx in range(self.num_views):\r\n                    on_the_image = img_query_view[sample_idx] == view_idx  # [num_on_the_image, ]\r\n                    if torch.sum(on_the_image) <= 1:\r\n                        continue\r\n                    view_count = bincount[view_idx]\r\n                    img_query_feat[sample_idx, :, on_the_image] = sample_query_feats[view_idx, :, :view_count]\r\n\r\n            res_layer = self.prediction_heads(img_query_feat)\r\n\r\n            if 'center_img' in res_layer:\r\n                res_layer['center_img'] = res_layer['center_img'] + normal_img_query_pos.permute(0, 2, 1)\r\n                res_layer['center_img'] = res_layer['center_img'].sigmoid()\r\n                res_layer['dim_img'] = res_layer['dim_img'].sigmoid()\r\n\r\n            res_layer['center_2d'] = res_layer['center_2d'] + normal_img_query_pos.permute(0, 2, 1)\r\n            normal_img_query_pos = res_layer['center_2d'].detach().clone().permute(0, 2, 1)\r\n\r\n            res_layer['center_2d'] = res_layer['center_2d'].sigmoid()\r\n\r\n            if batch_size > 1 or i == self.num_decoder_layers-1: # only when training\r\n                center_2d = res_layer['center_2d'].clone().permute(0, 2, 1)  # [bs, num_proposals, 2]\r\n                depth = res_layer['depth_2d'].clone().permute(0, 2, 1)[..., :1]  # [bs, num_proposals, 1]\r\n                h, w = img_metas[0]['input_shape'][:2]\r\n                center_pos = denormalize_pos(center_2d, w, h, sigmoid=False)  # [bs, num_proposals, 2]\r\n                center_pos = center_pos * depth\r\n                camera_coords = torch.cat([center_pos, depth], dim=2)  # [bs, num_proposals, 3]\r\n                loc_cam_3d = torch.matmul(torch.inverse(cam_intrinsic[:, :, :3, :3]), camera_coords.unsqueeze(-1)).squeeze(-1)  # [bs, num_proposals, 3]\r\n\r\n                res_layer['loc_cam_3d'] = loc_cam_3d.permute(0, 2, 1)\r\n\r\n            ret_dicts.append(res_layer)\r\n\r\n        # img_query_feat = self.camera_net(img_query_feat, camera_info.clone())\r\n\r\n        loc_cam_3d = copy.deepcopy(ret_dicts[-1]['loc_cam_3d'].detach()).permute(0, 2, 1)[..., None]\r\n\r\n        lidar2cam_r = camera_info[:, :9, :].permute(0, 2, 1)\r\n        lidar2cam_r = lidar2cam_r.reshape(batch_size, num_img_proposals, 3, 3)\r\n\r\n        lidar2cam_t = camera_info[:, 9:12, :].permute(0, 2, 1)[..., None]\r\n        bev_coords = torch.matmul(torch.inverse(lidar2cam_r), loc_cam_3d - lidar2cam_t)\r\n        bev_coords = bev_coords.squeeze(-1)\r\n\r\n        bev_coords[..., 0:1] = (bev_coords[..., 0:1] - self.test_cfg['pc_range'][0]) / (\r\n                    self.test_cfg['pc_range'][3] - self.test_cfg['pc_range'][0])\r\n        bev_coords[..., 1:2] = (bev_coords[..., 1:2] - self.test_cfg['pc_range'][1]) / (\r\n                    self.test_cfg['pc_range'][4] - self.test_cfg['pc_range'][1])\r\n\r\n        bev_coords[..., 0:1] = bev_coords[..., 0:1] * (self.test_cfg['grid_size'][0] // self.test_cfg['out_size_factor'])\r\n        bev_coords[..., 1:2] = bev_coords[..., 1:2] * (self.test_cfg['grid_size'][1] // self.test_cfg['out_size_factor'])\r\n\r\n        dims, rots, vels = self.transform_bbox(ret_dicts[-1], camera_info, w, img_metas)\r\n        bev_coords = torch.cat([bev_coords, rots, vels, dims], dim=2)\r\n\r\n        return img_query_feat, normal_img_query_pos, bev_coords, camera_info, ret_dicts\r\n\r\n    def transform_bbox(self, ret_dict, camera_info, width, img_metas):\r\n        bs = camera_info.shape[0]\r\n        num_proposal = camera_info.shape[2]\r\n\r\n        lidar2cam_rs = camera_info[:, :9]\r\n        lidar2cam_rs = lidar2cam_rs.reshape(bs, 3, 3, num_proposal)\r\n        lidar2cam_rs = lidar2cam_rs.permute(0, 3, 1, 2)  # [bs, num_proposals, 3, 3]\r\n        cam2lidar_rs = torch.inverse(lidar2cam_rs)\r\n\r\n        cam_dims = ret_dict['dim_2d'].detach().clone()  # [bs, 3, num_proposals]\r\n        cam_rots = ret_dict['rot_2d'].detach().clone()  # [bs, 2, num_proposals]\r\n        cam_vels = ret_dict['vel_2d'].detach().clone()  # [bs, 2, num_proposals]\r\n\r\n        dims = cam_dims[:, [2, 0, 1]]\r\n        dims = dims.permute(0, 2, 1)\r\n\r\n        sin_rots = -cam_rots[:, 0:1]\r\n        cos_rots = cam_rots[:, 1:2]\r\n        rot_dirs = torch.cat([cos_rots, torch.zeros_like(sin_rots), sin_rots], dim=1)  # [bs, 3, num_proposals]\r\n        rot_dirs = rot_dirs.permute(0, 2, 1).unsqueeze(-1)  # [bs, num_proposals, 3, 1]\r\n        rot_dirs = torch.matmul(cam2lidar_rs, rot_dirs)  # [bs, num_proposals, 3, 1]\r\n        lidar_rots = -rot_dirs[:, :, [0, 1], 0]  # [bs, num_proposals, 2]\r\n\r\n        cam_vels_x = cam_vels[:, 0:1, :]\r\n        cam_vels_z = cam_vels[:, 1:2, :]\r\n        vels = torch.cat([cam_vels_x, torch.zeros_like(cam_vels_x), cam_vels_z], dim=1)  # [bs, 3, num_proposals]\r\n        vels = vels.permute(0, 2, 1).unsqueeze(-1)  # [bs, num_proposals, 3, 1]\r\n        vels = torch.matmul(cam2lidar_rs, vels)  # [bs, num_proposals, 3, 1]\r\n        lidar_vels = vels[:, :, [0, 1], 0]\r\n\r\n        return dims, lidar_rots, lidar_vels\r\n\r\n\r\nclass ViewTransformer(nn.Module):\r\n    def __init__(self, hidden_channel, num_heads, prediction_heads, ffn_channel, dropout, activation, test_cfg,\r\n                 query_pos, key_pos, view_projection, use_camera):\r\n        super(ViewTransformer, self).__init__()\r\n        self.hidden_channel = hidden_channel\r\n        self.num_heads = num_heads\r\n        self.prediction_heads = prediction_heads\r\n        self.test_cfg = test_cfg\r\n        self.grid_x_size = test_cfg['grid_size'][0] // test_cfg['out_size_factor']\r\n        self.grid_y_size = test_cfg['grid_size'][1] // test_cfg['out_size_factor']\r\n        self.view_projection = view_projection\r\n        self.use_camera = use_camera\r\n\r\n        if use_camera is not None:\r\n            assert use_camera == \"se\"\r\n            self.camera_net = CameraSE(16, hidden_channel)\r\n\r\n        self.decoder = TransformerDecoderLayer(\r\n            hidden_channel, num_heads, ffn_channel, activation=activation, dropout=dropout,\r\n            self_posembed=query_pos, cross_posembed=key_pos,\r\n            cross_only=True\r\n        )\r\n\r\n\r\n    def forward(self, img_query_feat, img_query_pos_bev, normal_img_query_pos, img_ret_dicts, camera_info):\r\n        bs = img_query_feat.shape[0]\r\n        num_proposals = img_query_feat.shape[-1]\r\n\r\n        center_3d = img_ret_dicts[-1]['loc_cam_3d'].detach().clone().permute(0, 2, 1)  # [bs, num_proposal, 3]\r\n        center_3d = center_3d[:, -num_proposals:]\r\n\r\n        if self.use_camera is not None:\r\n            img_query_feat = self.camera_net(img_query_feat, camera_info)\r\n\r\n        camera_info = camera_info.permute(0, 2, 1)  # [bs, num_proposal, 16]\r\n\r\n        img_query_feat = self.view_projection(img_query_feat)\r\n\r\n        camera_R = camera_info[:, :, :9].reshape(bs, num_proposals, 3, 3)\r\n        camera_t = camera_info[:, :, 9:12].reshape(bs, num_proposals, 3, 1)\r\n\r\n        camera_t = -torch.matmul(camera_R.permute(0, 1, 3, 2), camera_t).squeeze(-1)\r\n\r\n        camera_t[..., 0:1] = (camera_t[..., 0:1] - self.test_cfg['pc_range'][0]) / (\r\n                        self.test_cfg['pc_range'][3] - self.test_cfg['pc_range'][0])\r\n        camera_t[..., 1:2] = (camera_t[..., 1:2] - self.test_cfg['pc_range'][1]) / (\r\n                        self.test_cfg['pc_range'][4] - self.test_cfg['pc_range'][1])\r\n        camera_t[..., 0:1] = camera_t[..., 0:1] * (self.test_cfg['grid_size'][0] // self.test_cfg['out_size_factor'])\r\n        camera_t[..., 1:2] = camera_t[..., 1:2] * (self.test_cfg['grid_size'][0] // self.test_cfg['out_size_factor'])\r\n\r\n        img_query_pos = copy.deepcopy(img_query_pos_bev[..., :7])\r\n        img_query_pos[..., :2] = inverse_sigmoid((img_query_pos[..., :2] + 12) / 204)\r\n        img_query_pos[..., 2] = inverse_sigmoid((img_query_pos[..., 2] + 10) / 20)\r\n        img_query_pos[..., 3:5] = inverse_sigmoid((img_query_pos[..., 3:5] + 1) / 2)\r\n\r\n        img_query_pos = torch.cat([img_query_pos, normal_img_query_pos], dim=2)\r\n\r\n        img_query_feat = self.decoder(img_query_feat, img_query_feat, img_query_pos, img_query_pos)\r\n\r\n        # Prediction\r\n        res_layer = self.prediction_heads(img_query_feat)\r\n\r\n        res_layer['center_mono'] = img_query_pos_bev[..., 0:2].permute(0, 2, 1)\r\n        res_layer['height_mono'] = img_query_pos_bev[..., 2:3].permute(0, 2, 1)\r\n        res_layer['rot_mono'] = img_query_pos_bev[..., 3:5].permute(0, 2, 1)\r\n        res_layer['vel_mono'] = img_query_pos_bev[..., 5:7].permute(0, 2, 1)\r\n        res_layer['dim_mono'] = img_query_pos_bev[..., 7:10].permute(0, 2, 1)\r\n\r\n        res_layer['center_view'] = res_layer['center_view'] + img_query_pos_bev[..., 0:2].permute(0, 2, 1)\r\n\r\n        img_query_pos_bev = res_layer['center_view'].detach().clone().permute(0, 2, 1)\r\n\r\n        return img_query_feat, img_query_pos_bev, [res_layer]\r\n\r\n\r\nclass FusionTransformer2D_3D_Self(nn.Module):\r\n    def __init__(self, hidden_channel, num_heads, num_decoder_layers, prediction_heads, ffn_channel, dropout, activation, test_cfg,\r\n                 query_pos, key_pos, pts_projection, img_projection, num_proposals):\r\n        super(FusionTransformer2D_3D_Self, self).__init__()\r\n        self.hidden_channel = hidden_channel\r\n        self.num_heads = num_heads\r\n        self.num_decoder_layers = num_decoder_layers\r\n        self.prediction_heads = prediction_heads\r\n        self.test_cfg = test_cfg\r\n        self.grid_x_size = test_cfg['grid_size'][0] // test_cfg['out_size_factor']\r\n        self.grid_y_size = test_cfg['grid_size'][1] // test_cfg['out_size_factor']\r\n        self.pts_projection = pts_projection\r\n        self.img_projection = img_projection\r\n        self.num_proposals = num_proposals\r\n\r\n        self.decoder = nn.ModuleList()\r\n        for i in range(self.num_decoder_layers):\r\n            self.decoder.append(\r\n                TransformerDecoderLayer(\r\n                    hidden_channel, num_heads, ffn_channel, dropout, activation,\r\n                    self_posembed=query_pos[i], cross_posembed=key_pos[i], cross_only=True\r\n                )\r\n            )\r\n\r\n    def forward(self, pts_query_feat, pts_query_pos, img_query_feat, img_query_pos, need_weights=False):\r\n        ret_dicts = []\r\n        pts_query_feat = self.pts_projection(pts_query_feat)\r\n        img_query_feat = self.img_projection(img_query_feat)\r\n\r\n        all_query_feat = torch.cat([pts_query_feat, img_query_feat], dim=2)\r\n        all_query_pos = torch.cat([pts_query_pos, img_query_pos], dim=1)\r\n\r\n        for i in range(self.num_decoder_layers):\r\n            # Transformer Decoder Layer\r\n            # :param query: B C Pq    :param query_pos: B Pq 3/6\r\n            all_query_feat_raw = all_query_feat.clone()\r\n\r\n            if need_weights:\r\n                all_query_feat, attn_weights = self.decoder[i](all_query_feat, all_query_feat, all_query_pos, all_query_pos, need_weights=True)\r\n            else:\r\n                all_query_feat = self.decoder[i](all_query_feat, all_query_feat, all_query_pos, all_query_pos)\r\n\r\n            all_query_feat_pred = all_query_feat\r\n\r\n            # Prediction\r\n            res_layer = self.prediction_heads(all_query_feat_pred)\r\n            res_layer['center'] = res_layer['center'] + all_query_pos.permute(0, 2, 1)\r\n\r\n            ret_dicts.append(res_layer)\r\n\r\n            all_query_pos = res_layer['center'].detach().clone().permute(0, 2, 1)\r\n\r\n        # return all_query_feat, all_query_pos, ret_dicts\r\n        if need_weights:\r\n            return all_query_feat, all_query_pos, ret_dicts, attn_weights\r\n        else:\r\n            return all_query_feat, all_query_pos, ret_dicts\r\n\r\n\r\nclass ImageTransformer2D_3D_MS(nn.Module):\r\n    def __init__(self, num_views, hidden_channel, num_heads, num_decoder_layers, prediction_heads, out_size_factor_img,\r\n                 ffn_channel, dropout, activation, test_cfg, query_pos, key_pos, supervision2d):\r\n        super(ImageTransformer2D_3D_MS, self).__init__()\r\n        self.hidden_channel = hidden_channel\r\n        self.num_heads = num_heads\r\n        self.num_decoder_layers = num_decoder_layers\r\n        self.prediction_heads = prediction_heads\r\n        self.num_views = num_views\r\n        self.out_size_factor_img = out_size_factor_img\r\n        self.test_cfg = test_cfg\r\n        self.supervision2d = supervision2d\r\n\r\n        self.decoder = nn.ModuleList()\r\n        for i in range(self.num_decoder_layers):\r\n            self.decoder.append(\r\n                DeformableTransformerDecoderLayer(\r\n                    hidden_channel, num_heads, dim_feedforward=ffn_channel, dropout=dropout, activation=activation,\r\n                    self_posembed=query_pos[i], cross_posembed=key_pos[i],\r\n                )\r\n            )\r\n\r\n    def forward(self, img_query_feat, normal_img_query_pos, img_query_view, img_feats, normal_img_feats_pos_stack, img_metas):\r\n        level_num = len(img_feats)\r\n        batch_size = img_query_feat.shape[0]\r\n        img_feats_flatten = []\r\n        level_start_index = [0]\r\n        spatial_shapes = []\r\n        for lvl in range(level_num):\r\n            img_feat = img_feats[lvl]\r\n            h, w = img_feat.shape[-2], img_feat.shape[-1]\r\n            img_feat_flatten = img_feat.view(batch_size, self.num_views, self.hidden_channel, h*w)  # [bs, num_view, C, h*w]\r\n            img_feats_flatten.append(img_feat_flatten)\r\n            level_start_index.append(level_start_index[-1] + h*w)\r\n            spatial_shapes.append([h, w])\r\n        level_start_index = level_start_index[:-1]\r\n        level_start_index = torch.LongTensor(level_start_index).to(img_query_feat.device)\r\n        spatial_shapes = torch.LongTensor(spatial_shapes).to(img_query_feat.device)\r\n\r\n        img_feats_stack = torch.cat(img_feats_flatten, dim=3)  # [bs, num_view, C, h*w (sum)]\r\n        reference_points = normal_img_query_pos.sigmoid()  # [bs, num_img_proposal, 2]\r\n        reference_points = reference_points[:, :, None].repeat(1, 1, level_num, 1)\r\n        ret_dicts = []\r\n\r\n        for i in range(self.num_decoder_layers):\r\n            img_prev_query_feat = img_query_feat.clone()  # [BS, C, num_proposals]\r\n            img_query_feat = torch.zeros_like(img_query_feat)  # create new container for img query feature\r\n            for sample_idx in range(batch_size):\r\n                for view_idx in range(self.num_views):\r\n                    on_the_image = img_query_view[sample_idx] == view_idx  # [num_on_the_image, ]\r\n                    if torch.sum(on_the_image) <= 1:\r\n                        continue\r\n                    img_query_feat_view = img_prev_query_feat[sample_idx, :, on_the_image]  # [C, num_on_the_image]\r\n\r\n                    img_query_feat_view = self.decoder[i](\r\n                        img_query_feat_view[None], img_feats_stack[sample_idx:sample_idx + 1, view_idx],\r\n                        normal_img_query_pos[sample_idx:sample_idx + 1, on_the_image], normal_img_feats_pos_stack,\r\n                        reference_points=reference_points[sample_idx:sample_idx+1, on_the_image],\r\n                        level_start_index=level_start_index, spatial_shapes=spatial_shapes\r\n                    )\r\n                    img_query_feat[sample_idx, :, on_the_image] = img_query_feat_view.clone()\r\n\r\n            res_layer = self.prediction_heads(img_query_feat)\r\n            if 'center_offset' in res_layer:\r\n                assert 'center_2d' not in res_layer and 'offset' not in res_layer\r\n                res_layer['center_2d'] = res_layer['center_offset'][:, :2]\r\n                res_layer['offset'] = res_layer['center_offset'][:, 2:]\r\n\r\n            res_layer['center_2d'] = res_layer['center_2d'] + normal_img_query_pos.permute(0, 2, 1)\r\n\r\n            if self.supervision2d:\r\n                normal_img_query_pos = res_layer['center_2d'].detach().clone().permute(0, 2, 1)\r\n\r\n            res_layer['center_2d'] = res_layer['center_2d'].sigmoid()\r\n            res_layer['offset'] = res_layer['offset'].sigmoid()\r\n\r\n            bbox_width = res_layer['offset'][:, 0] + res_layer['offset'][:, 2]\r\n            bbox_height = res_layer['offset'][:, 1] + res_layer['offset'][:, 3]\r\n\r\n            bbox_cx = (res_layer['center_2d'][:, 0] - res_layer['offset'][:, 0] + res_layer['center_2d'][:, 0] + res_layer['offset'][:, 2]) / 2\r\n            bbox_cy = (res_layer['center_2d'][:, 1] - res_layer['offset'][:, 1] + res_layer['center_2d'][:, 1] + res_layer['offset'][:, 3]) / 2\r\n\r\n            res_layer['bbox_2d'] = torch.stack([bbox_cx, bbox_cy, bbox_width, bbox_height], dim=1).detach().clone()\r\n\r\n            ret_dicts.append(res_layer)\r\n\r\n        return img_query_feat, normal_img_query_pos, ret_dicts\r\n\r\n    def camera2lidar(self, camera_coords, lidar2img, img_meta, batch_size):\r\n        # img_pos: [W*H, 2]\r\n\r\n        coords = torch.cat([camera_coords, torch.ones_like(camera_coords[..., :1])], dim=1)  # [N, 4]\r\n\r\n        img2lidars = torch.inverse(lidar2img)\r\n        coords3d = torch.matmul(img2lidars, coords.unsqueeze(-1)).squeeze(-1)[..., :3]  # [N, 3]\r\n\r\n        if batch_size > 1:\r\n            coords3d = apply_3d_transformation(coords3d, 'LIDAR', img_meta, reverse=False).detach()\r\n        coords3d[..., 0:1] = (coords3d[..., 0:1] - self.test_cfg['pc_range'][0]) / (\r\n                    self.test_cfg['pc_range'][3] - self.test_cfg['pc_range'][0])\r\n        coords3d[..., 1:2] = (coords3d[..., 1:2] - self.test_cfg['pc_range'][1]) / (\r\n                    self.test_cfg['pc_range'][4] - self.test_cfg['pc_range'][1])\r\n\r\n        coords3d[..., 0:1] = coords3d[..., 0:1] * (self.test_cfg['grid_size'][0] // self.test_cfg['out_size_factor'])\r\n        coords3d[..., 1:2] = coords3d[..., 1:2] * (self.test_cfg['grid_size'][1] // self.test_cfg['out_size_factor'])\r\n\r\n        if not self.pos_3d:\r\n            coords3d = coords3d[..., :2]  # [N, 3]\r\n\r\n        if self.pos_3d:\r\n            coords3d = coords3d.contiguous().view(coords3d.size(0), 3)\r\n        else:\r\n            coords3d = coords3d.contiguous().view(coords3d.size(0), 2)\r\n\r\n        return coords3d"
  },
  {
    "path": "mmdet3d/models/utils/transformer.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\r\nimport copy\r\nimport math\r\nimport warnings\r\nimport collections\r\nfrom typing import Sequence, Iterable, Optional\r\nfrom itertools import repeat\r\n\r\nimport torch\r\nimport torch.nn as nn\r\nimport torch.nn.functional as F\r\n\r\nfrom mmcv.cnn import (Linear, build_activation_layer, build_conv_layer, build_norm_layer)\r\nfrom mmcv.runner.base_module import BaseModule\r\nfrom mmcv.utils import (ConfigDict, build_from_cfg, deprecated_api_warning, TORCH_VERSION, digit_version)\r\nfrom .drop import build_dropout\r\nfrom mmdet3d.models.registry import (ATTENTION, FEEDFORWARD_NETWORK, POSITIONAL_ENCODING,\r\n                       TRANSFORMER_LAYER, TRANSFORMER_LAYER_SEQUENCE)\r\nfrom mmcv.cnn.bricks.registry import ACTIVATION_LAYERS\r\n\r\n\r\n# From PyTorch internals\r\ndef _ntuple(n):\r\n\r\n    def parse(x):\r\n        if isinstance(x, collections.abc.Iterable):\r\n            return x\r\n        return tuple(repeat(x, n))\r\n\r\n    return parse\r\nto_2tuple = _ntuple(2)\r\n\r\nclass GELU(nn.Module):\r\n    r\"\"\"Applies the Gaussian Error Linear Units function:\r\n    .. math::\r\n        \\text{GELU}(x) = x * \\Phi(x)\r\n    where :math:`\\Phi(x)` is the Cumulative Distribution Function for\r\n    Gaussian Distribution.\r\n    Shape:\r\n        - Input: :math:`(N, *)` where `*` means, any number of additional\r\n          dimensions\r\n        - Output: :math:`(N, *)`, same shape as the input\r\n    .. image:: scripts/activation_images/GELU.png\r\n    Examples::\r\n        >>> m = nn.GELU()\r\n        >>> input = torch.randn(2)\r\n        >>> output = m(input)\r\n    \"\"\"\r\n\r\n    def forward(self, input: torch.Tensor) -> torch.Tensor:\r\n        return F.gelu(input)\r\n\r\nif (TORCH_VERSION == 'parrots' or digit_version(TORCH_VERSION) < digit_version('1.4')):\r\n    ACTIVATION_LAYERS.register_module(module=GELU)\r\nelse:\r\n    ACTIVATION_LAYERS.register_module(module=nn.GELU)\r\n\r\n\r\nclass ModuleList(BaseModule, nn.ModuleList):\r\n    \"\"\"ModuleList in openmmlab.\r\n    Args:\r\n        modules (iterable, optional): an iterable of modules to add.\r\n        init_cfg (dict, optional): Initialization config dict.\r\n    \"\"\"\r\n\r\n    def __init__(self,\r\n                 modules: Optional[Iterable] = None,\r\n                 init_cfg: Optional[dict] = None):\r\n        BaseModule.__init__(self, init_cfg)\r\n        nn.ModuleList.__init__(self, modules)\r\n\r\nclass Sequential(BaseModule, nn.Sequential):\r\n    \"\"\"Sequential module in openmmlab.\r\n    Args:\r\n        init_cfg (dict, optional): Initialization config dict.\r\n    \"\"\"\r\n\r\n    def __init__(self, *args, init_cfg: Optional[dict] = None):\r\n        BaseModule.__init__(self, init_cfg)\r\n        nn.Sequential.__init__(self, *args)\r\n\r\ndef build_positional_encoding(cfg, default_args=None):\r\n    \"\"\"Builder for Position Encoding.\"\"\"\r\n    return build_from_cfg(cfg, POSITIONAL_ENCODING, default_args)\r\n\r\n\r\ndef build_attention(cfg, default_args=None):\r\n    \"\"\"Builder for attention.\"\"\"\r\n    return build_from_cfg(cfg, ATTENTION, default_args)\r\n\r\n\r\ndef build_feedforward_network(cfg, default_args=None):\r\n    \"\"\"Builder for feed-forward network (FFN).\"\"\"\r\n    return build_from_cfg(cfg, FEEDFORWARD_NETWORK, default_args)\r\n\r\n\r\ndef build_transformer_layer(cfg, default_args=None):\r\n    \"\"\"Builder for transformer layer.\"\"\"\r\n    return build_from_cfg(cfg, TRANSFORMER_LAYER, default_args)\r\n\r\n\r\ndef build_transformer_layer_sequence(cfg, default_args=None):\r\n    \"\"\"Builder for transformer encoder and transformer decoder.\"\"\"\r\n    return build_from_cfg(cfg, TRANSFORMER_LAYER_SEQUENCE, default_args)\r\n\r\n\r\nclass AdaptivePadding(nn.Module):\r\n    \"\"\"Applies padding adaptively to the input.\r\n\r\n    This module can make input get fully covered by filter\r\n    you specified. It support two modes \"same\" and \"corner\". The\r\n    \"same\" mode is same with \"SAME\" padding mode in TensorFlow, pad\r\n    zero around input. The \"corner\"  mode would pad zero\r\n    to bottom right.\r\n\r\n    Args:\r\n        kernel_size (int | tuple): Size of the kernel. Default: 1.\r\n        stride (int | tuple): Stride of the filter. Default: 1.\r\n        dilation (int | tuple): Spacing between kernel elements.\r\n            Default: 1.\r\n        padding (str): Support \"same\" and \"corner\", \"corner\" mode\r\n            would pad zero to bottom right, and \"same\" mode would\r\n            pad zero around input. Default: \"corner\".\r\n\r\n    Example:\r\n        >>> kernel_size = 16\r\n        >>> stride = 16\r\n        >>> dilation = 1\r\n        >>> input = torch.rand(1, 1, 15, 17)\r\n        >>> adap_pad = AdaptivePadding(\r\n        >>>     kernel_size=kernel_size,\r\n        >>>     stride=stride,\r\n        >>>     dilation=dilation,\r\n        >>>     padding=\"corner\")\r\n        >>> out = adap_pad(input)\r\n        >>> assert (out.shape[2], out.shape[3]) == (16, 32)\r\n        >>> input = torch.rand(1, 1, 16, 17)\r\n        >>> out = adap_pad(input)\r\n        >>> assert (out.shape[2], out.shape[3]) == (16, 32)\r\n    \"\"\"\r\n\r\n    def __init__(self, kernel_size=1, stride=1, dilation=1, padding='corner'):\r\n        super().__init__()\r\n        assert padding in ('same', 'corner')\r\n\r\n        kernel_size = to_2tuple(kernel_size)\r\n        stride = to_2tuple(stride)\r\n        dilation = to_2tuple(dilation)\r\n\r\n        self.padding = padding\r\n        self.kernel_size = kernel_size\r\n        self.stride = stride\r\n        self.dilation = dilation\r\n\r\n    def get_pad_shape(self, input_shape):\r\n        \"\"\"Calculate the padding size of input.\r\n\r\n        Args:\r\n            input_shape (:obj:`torch.Size`): arrange as (H, W).\r\n\r\n        Returns:\r\n            Tuple[int]: The padding size along the\r\n            original H and W directions\r\n        \"\"\"\r\n        input_h, input_w = input_shape\r\n        kernel_h, kernel_w = self.kernel_size\r\n        stride_h, stride_w = self.stride\r\n        output_h = math.ceil(input_h / stride_h)\r\n        output_w = math.ceil(input_w / stride_w)\r\n        pad_h = max((output_h - 1) * stride_h +\r\n                    (kernel_h - 1) * self.dilation[0] + 1 - input_h, 0)\r\n        pad_w = max((output_w - 1) * stride_w +\r\n                    (kernel_w - 1) * self.dilation[1] + 1 - input_w, 0)\r\n        return pad_h, pad_w\r\n\r\n    def forward(self, x):\r\n        \"\"\"Add padding to `x`\r\n\r\n        Args:\r\n            x (Tensor): Input tensor has shape (B, C, H, W).\r\n\r\n        Returns:\r\n            Tensor: The tensor with adaptive padding\r\n        \"\"\"\r\n        pad_h, pad_w = self.get_pad_shape(x.size()[-2:])\r\n        if pad_h > 0 or pad_w > 0:\r\n            if self.padding == 'corner':\r\n                x = F.pad(x, [0, pad_w, 0, pad_h])\r\n            elif self.padding == 'same':\r\n                x = F.pad(x, [\r\n                    pad_w // 2, pad_w - pad_w // 2, pad_h // 2,\r\n                    pad_h - pad_h // 2\r\n                ])\r\n        return x\r\n\r\n\r\nclass PatchEmbed(BaseModule):\r\n    \"\"\"Image to Patch Embedding.\r\n\r\n    We use a conv layer to implement PatchEmbed.\r\n\r\n    Args:\r\n        in_channels (int): The num of input channels. Default: 3\r\n        embed_dims (int): The dimensions of embedding. Default: 768\r\n        conv_type (str): The type of convolution\r\n            to generate patch embedding. Default: \"Conv2d\".\r\n        kernel_size (int): The kernel_size of embedding conv. Default: 16.\r\n        stride (int): The slide stride of embedding conv.\r\n            Default: 16.\r\n        padding (int | tuple | string): The padding length of\r\n            embedding conv. When it is a string, it means the mode\r\n            of adaptive padding, support \"same\" and \"corner\" now.\r\n            Default: \"corner\".\r\n        dilation (int): The dilation rate of embedding conv. Default: 1.\r\n        bias (bool): Bias of embed conv. Default: True.\r\n        norm_cfg (dict, optional): Config dict for normalization layer.\r\n            Default: None.\r\n        input_size (int | tuple | None): The size of input, which will be\r\n            used to calculate the out size. Only works when `dynamic_size`\r\n            is False. Default: None.\r\n        init_cfg (`mmcv.ConfigDict`, optional): The Config for initialization.\r\n            Default: None.\r\n    \"\"\"\r\n\r\n    def __init__(self,\r\n                 in_channels=3,\r\n                 embed_dims=768,\r\n                 conv_type='Conv2d',\r\n                 kernel_size=16,\r\n                 stride=16,\r\n                 padding='corner',\r\n                 dilation=1,\r\n                 bias=True,\r\n                 norm_cfg=None,\r\n                 input_size=None,\r\n                 init_cfg=None):\r\n        super().__init__(init_cfg=init_cfg)\r\n\r\n        self.embed_dims = embed_dims\r\n        if stride is None:\r\n            stride = kernel_size\r\n\r\n        kernel_size = to_2tuple(kernel_size)\r\n        stride = to_2tuple(stride)\r\n        dilation = to_2tuple(dilation)\r\n\r\n        if isinstance(padding, str):\r\n            self.adaptive_padding = AdaptivePadding(\r\n                kernel_size=kernel_size,\r\n                stride=stride,\r\n                dilation=dilation,\r\n                padding=padding)\r\n            # disable the padding of conv\r\n            padding = 0\r\n        else:\r\n            self.adaptive_padding = None\r\n        padding = to_2tuple(padding)\r\n\r\n        self.projection = build_conv_layer(\r\n            dict(type=conv_type),\r\n            in_channels=in_channels,\r\n            out_channels=embed_dims,\r\n            kernel_size=kernel_size,\r\n            stride=stride,\r\n            padding=padding,\r\n            dilation=dilation,\r\n            bias=bias)\r\n\r\n        if norm_cfg is not None:\r\n            self.norm = build_norm_layer(norm_cfg, embed_dims)[1]\r\n        else:\r\n            self.norm = None\r\n\r\n        if input_size:\r\n            input_size = to_2tuple(input_size)\r\n            # `init_out_size` would be used outside to\r\n            # calculate the num_patches\r\n            # e.g. when `use_abs_pos_embed` outside\r\n            self.init_input_size = input_size\r\n            if self.adaptive_padding:\r\n                pad_h, pad_w = self.adaptive_padding.get_pad_shape(input_size)\r\n                input_h, input_w = input_size\r\n                input_h = input_h + pad_h\r\n                input_w = input_w + pad_w\r\n                input_size = (input_h, input_w)\r\n\r\n            # https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html\r\n            h_out = (input_size[0] + 2 * padding[0] - dilation[0] *\r\n                     (kernel_size[0] - 1) - 1) // stride[0] + 1\r\n            w_out = (input_size[1] + 2 * padding[1] - dilation[1] *\r\n                     (kernel_size[1] - 1) - 1) // stride[1] + 1\r\n            self.init_out_size = (h_out, w_out)\r\n        else:\r\n            self.init_input_size = None\r\n            self.init_out_size = None\r\n\r\n    def forward(self, x):\r\n        \"\"\"\r\n        Args:\r\n            x (Tensor): Has shape (B, C, H, W). In most case, C is 3.\r\n\r\n        Returns:\r\n            tuple: Contains merged results and its spatial shape.\r\n\r\n            - x (Tensor): Has shape (B, out_h * out_w, embed_dims)\r\n            - out_size (tuple[int]): Spatial shape of x, arrange as\r\n              (out_h, out_w).\r\n        \"\"\"\r\n\r\n        if self.adaptive_padding:\r\n            x = self.adaptive_padding(x)\r\n\r\n        x = self.projection(x)\r\n        out_size = (x.shape[2], x.shape[3])\r\n        x = x.flatten(2).transpose(1, 2)\r\n        if self.norm is not None:\r\n            x = self.norm(x)\r\n        return x, out_size\r\n\r\n\r\nclass PatchMerging(BaseModule):\r\n    \"\"\"Merge patch feature map.\r\n\r\n    This layer groups feature map by kernel_size, and applies norm and linear\r\n    layers to the grouped feature map ((used in Swin Transformer)).\r\n    Our implementation uses `nn.Unfold` to\r\n    merge patches, which is about 25% faster than the original\r\n    implementation. However, we need to modify pretrained\r\n    models for compatibility.\r\n\r\n    Args:\r\n        in_channels (int): The num of input channels.\r\n            to gets fully covered by filter and stride you specified.\r\n        out_channels (int): The num of output channels.\r\n        kernel_size (int | tuple, optional): the kernel size in the unfold\r\n            layer. Defaults to 2.\r\n        stride (int | tuple, optional): the stride of the sliding blocks in the\r\n            unfold layer. Default: None. (Would be set as `kernel_size`)\r\n        padding (int | tuple | string ): The padding length of\r\n            embedding conv. When it is a string, it means the mode\r\n            of adaptive padding, support \"same\" and \"corner\" now.\r\n            Default: \"corner\".\r\n        dilation (int | tuple, optional): dilation parameter in the unfold\r\n            layer. Default: 1.\r\n        bias (bool, optional): Whether to add bias in linear layer or not.\r\n            Defaults: False.\r\n        norm_cfg (dict, optional): Config dict for normalization layer.\r\n            Default: dict(type='LN').\r\n        init_cfg (dict, optional): The extra config for initialization.\r\n            Default: None.\r\n    \"\"\"\r\n\r\n    def __init__(self,\r\n                 in_channels,\r\n                 out_channels,\r\n                 kernel_size=2,\r\n                 stride=None,\r\n                 padding='corner',\r\n                 dilation=1,\r\n                 bias=False,\r\n                 norm_cfg=dict(type='LN'),\r\n                 init_cfg=None):\r\n        super().__init__(init_cfg=init_cfg)\r\n        self.in_channels = in_channels\r\n        self.out_channels = out_channels\r\n        if stride:\r\n            stride = stride\r\n        else:\r\n            stride = kernel_size\r\n\r\n        kernel_size = to_2tuple(kernel_size)\r\n        stride = to_2tuple(stride)\r\n        dilation = to_2tuple(dilation)\r\n\r\n        if isinstance(padding, str):\r\n            self.adaptive_padding = AdaptivePadding(\r\n                kernel_size=kernel_size,\r\n                stride=stride,\r\n                dilation=dilation,\r\n                padding=padding)\r\n            # disable the padding of unfold\r\n            padding = 0\r\n        else:\r\n            self.adaptive_padding = None\r\n\r\n        padding = to_2tuple(padding)\r\n        self.sampler = nn.Unfold(\r\n            kernel_size=kernel_size,\r\n            dilation=dilation,\r\n            padding=padding,\r\n            stride=stride)\r\n\r\n        sample_dim = kernel_size[0] * kernel_size[1] * in_channels\r\n\r\n        if norm_cfg is not None:\r\n            self.norm = build_norm_layer(norm_cfg, sample_dim)[1]\r\n        else:\r\n            self.norm = None\r\n\r\n        self.reduction = nn.Linear(sample_dim, out_channels, bias=bias)\r\n\r\n    def forward(self, x, input_size):\r\n        \"\"\"\r\n        Args:\r\n            x (Tensor): Has shape (B, H*W, C_in).\r\n            input_size (tuple[int]): The spatial shape of x, arrange as (H, W).\r\n                Default: None.\r\n\r\n        Returns:\r\n            tuple: Contains merged results and its spatial shape.\r\n\r\n            - x (Tensor): Has shape (B, Merged_H * Merged_W, C_out)\r\n            - out_size (tuple[int]): Spatial shape of x, arrange as\r\n              (Merged_H, Merged_W).\r\n        \"\"\"\r\n        B, L, C = x.shape\r\n        assert isinstance(input_size, Sequence), f'Expect ' \\\r\n                                                 f'input_size is ' \\\r\n                                                 f'`Sequence` ' \\\r\n                                                 f'but get {input_size}'\r\n\r\n        H, W = input_size\r\n        assert L == H * W, 'input feature has wrong size'\r\n\r\n        x = x.view(B, H, W, C).permute([0, 3, 1, 2])  # B, C, H, W\r\n\r\n        if self.adaptive_padding:\r\n            x = self.adaptive_padding(x)\r\n            H, W = x.shape[-2:]\r\n\r\n        # Use nn.Unfold to merge patch. About 25% faster than original method,\r\n        # but need to modify pretrained model for compatibility\r\n        # if kernel_size=2 and stride=2, x should has shape (B, 4*C, H/2*W/2)\r\n        x = self.sampler(x)\r\n\r\n        out_h = (H + 2 * self.sampler.padding[0] - self.sampler.dilation[0] *\r\n                 (self.sampler.kernel_size[0] - 1) -\r\n                 1) // self.sampler.stride[0] + 1\r\n        out_w = (W + 2 * self.sampler.padding[1] - self.sampler.dilation[1] *\r\n                 (self.sampler.kernel_size[1] - 1) -\r\n                 1) // self.sampler.stride[1] + 1\r\n\r\n        output_size = (out_h, out_w)\r\n        x = x.transpose(1, 2)  # B, H/2*W/2, 4*C\r\n        x = self.norm(x) if self.norm else x\r\n        x = self.reduction(x)\r\n        return x, output_size\r\n\r\n\r\n@ATTENTION.register_module()\r\nclass MultiheadAttention(BaseModule):\r\n    \"\"\"A wrapper for ``torch.nn.MultiheadAttention``.\r\n\r\n    This module implements MultiheadAttention with identity connection,\r\n    and positional encoding  is also passed as input.\r\n\r\n    Args:\r\n        embed_dims (int): The embedding dimension.\r\n        num_heads (int): Parallel attention heads.\r\n        attn_drop (float): A Dropout layer on attn_output_weights.\r\n            Default: 0.0.\r\n        proj_drop (float): A Dropout layer after `nn.MultiheadAttention`.\r\n            Default: 0.0.\r\n        dropout_layer (obj:`ConfigDict`): The dropout_layer used\r\n            when adding the shortcut.\r\n        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.\r\n            Default: None.\r\n        batch_first (bool): When it is True,  Key, Query and Value are shape of\r\n            (batch, n, embed_dim), otherwise (n, batch, embed_dim).\r\n             Default to False.\r\n    \"\"\"\r\n\r\n    def __init__(self,\r\n                 embed_dims,\r\n                 num_heads,\r\n                 attn_drop=0.,\r\n                 proj_drop=0.,\r\n                 dropout_layer=dict(type='Dropout', drop_prob=0.),\r\n                 init_cfg=None,\r\n                 batch_first=False,\r\n                 **kwargs):\r\n        super().__init__(init_cfg)\r\n        if 'dropout' in kwargs:\r\n            warnings.warn(\r\n                'The arguments `dropout` in MultiheadAttention '\r\n                'has been deprecated, now you can separately '\r\n                'set `attn_drop`(float), proj_drop(float), '\r\n                'and `dropout_layer`(dict) ', DeprecationWarning)\r\n            attn_drop = kwargs['dropout']\r\n            dropout_layer['drop_prob'] = kwargs.pop('dropout')\r\n\r\n        self.embed_dims = embed_dims\r\n        self.num_heads = num_heads\r\n        self.batch_first = batch_first\r\n\r\n        self.attn = nn.MultiheadAttention(embed_dims, num_heads, attn_drop,\r\n                                          **kwargs)\r\n\r\n        self.proj_drop = nn.Dropout(proj_drop)\r\n        self.dropout_layer = build_dropout(\r\n            dropout_layer) if dropout_layer else nn.Identity()\r\n\r\n    @deprecated_api_warning({'residual': 'identity'},\r\n                            cls_name='MultiheadAttention')\r\n    def forward(self,\r\n                query,\r\n                key=None,\r\n                value=None,\r\n                identity=None,\r\n                query_pos=None,\r\n                key_pos=None,\r\n                attn_mask=None,\r\n                key_padding_mask=None,\r\n                **kwargs):\r\n        \"\"\"Forward function for `MultiheadAttention`.\r\n\r\n        **kwargs allow passing a more general data flow when combining\r\n        with other operations in `transformerlayer`.\r\n\r\n        Args:\r\n            query (Tensor): The input query with shape [num_queries, bs,\r\n                embed_dims] if self.batch_first is False, else\r\n                [bs, num_queries embed_dims].\r\n            key (Tensor): The key tensor with shape [num_keys, bs,\r\n                embed_dims] if self.batch_first is False, else\r\n                [bs, num_keys, embed_dims] .\r\n                If None, the ``query`` will be used. Defaults to None.\r\n            value (Tensor): The value tensor with same shape as `key`.\r\n                Same in `nn.MultiheadAttention.forward`. Defaults to None.\r\n                If None, the `key` will be used.\r\n            identity (Tensor): This tensor, with the same shape as x,\r\n                will be used for the identity link.\r\n                If None, `x` will be used. Defaults to None.\r\n            query_pos (Tensor): The positional encoding for query, with\r\n                the same shape as `x`. If not None, it will\r\n                be added to `x` before forward function. Defaults to None.\r\n            key_pos (Tensor): The positional encoding for `key`, with the\r\n                same shape as `key`. Defaults to None. If not None, it will\r\n                be added to `key` before forward function. If None, and\r\n                `query_pos` has the same shape as `key`, then `query_pos`\r\n                will be used for `key_pos`. Defaults to None.\r\n            attn_mask (Tensor): ByteTensor mask with shape [num_queries,\r\n                num_keys]. Same in `nn.MultiheadAttention.forward`.\r\n                Defaults to None.\r\n            key_padding_mask (Tensor): ByteTensor with shape [bs, num_keys].\r\n                Defaults to None.\r\n\r\n        Returns:\r\n            Tensor: forwarded results with shape\r\n            [num_queries, bs, embed_dims]\r\n            if self.batch_first is False, else\r\n            [bs, num_queries embed_dims].\r\n        \"\"\"\r\n\r\n        if key is None:\r\n            key = query\r\n        if value is None:\r\n            value = key\r\n        if identity is None:\r\n            identity = query\r\n        if key_pos is None:\r\n            if query_pos is not None:\r\n                # use query_pos if key_pos is not available\r\n                if query_pos.shape == key.shape:\r\n                    key_pos = query_pos\r\n                else:\r\n                    warnings.warn(f'position encoding of key is'\r\n                                  f'missing in {self.__class__.__name__}.')\r\n        if query_pos is not None:\r\n            query = query + query_pos\r\n        if key_pos is not None:\r\n            key = key + key_pos\r\n\r\n        # Because the dataflow('key', 'query', 'value') of\r\n        # ``torch.nn.MultiheadAttention`` is (num_query, batch,\r\n        # embed_dims), We should adjust the shape of dataflow from\r\n        # batch_first (batch, num_query, embed_dims) to num_query_first\r\n        # (num_query ,batch, embed_dims), and recover ``attn_output``\r\n        # from num_query_first to batch_first.\r\n        if self.batch_first:\r\n            query = query.transpose(0, 1)\r\n            key = key.transpose(0, 1)\r\n            value = value.transpose(0, 1)\r\n\r\n        out = self.attn(\r\n            query=query,\r\n            key=key,\r\n            value=value,\r\n            attn_mask=attn_mask,\r\n            key_padding_mask=key_padding_mask)[0]\r\n\r\n        if self.batch_first:\r\n            out = out.transpose(0, 1)\r\n\r\n        return identity + self.dropout_layer(self.proj_drop(out))\r\n\r\n\r\n@FEEDFORWARD_NETWORK.register_module()\r\nclass FFN(BaseModule):\r\n    \"\"\"Implements feed-forward networks (FFNs) with identity connection.\r\n\r\n    Args:\r\n        embed_dims (int): The feature dimension. Same as\r\n            `MultiheadAttention`. Defaults: 256.\r\n        feedforward_channels (int): The hidden dimension of FFNs.\r\n            Defaults: 1024.\r\n        num_fcs (int, optional): The number of fully-connected layers in\r\n            FFNs. Default: 2.\r\n        act_cfg (dict, optional): The activation config for FFNs.\r\n            Default: dict(type='ReLU')\r\n        ffn_drop (float, optional): Probability of an element to be\r\n            zeroed in FFN. Default 0.0.\r\n        add_identity (bool, optional): Whether to add the\r\n            identity connection. Default: `True`.\r\n        dropout_layer (obj:`ConfigDict`): The dropout_layer used\r\n            when adding the shortcut.\r\n        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.\r\n            Default: None.\r\n    \"\"\"\r\n\r\n    @deprecated_api_warning(\r\n        {\r\n            'dropout': 'ffn_drop',\r\n            'add_residual': 'add_identity'\r\n        },\r\n        cls_name='FFN')\r\n    def __init__(self,\r\n                 embed_dims=256,\r\n                 feedforward_channels=1024,\r\n                 num_fcs=2,\r\n                 act_cfg=dict(type='ReLU', inplace=True),\r\n                 ffn_drop=0.,\r\n                 dropout_layer=None,\r\n                 add_identity=True,\r\n                 init_cfg=None,\r\n                 **kwargs):\r\n        super().__init__(init_cfg)\r\n        assert num_fcs >= 2, 'num_fcs should be no less ' \\\r\n            f'than 2. got {num_fcs}.'\r\n        self.embed_dims = embed_dims\r\n        self.feedforward_channels = feedforward_channels\r\n        self.num_fcs = num_fcs\r\n        self.act_cfg = act_cfg\r\n        self.activate = build_activation_layer(act_cfg)\r\n\r\n        layers = []\r\n        in_channels = embed_dims\r\n        for _ in range(num_fcs - 1):\r\n            layers.append(\r\n                Sequential(\r\n                    Linear(in_channels, feedforward_channels), self.activate,\r\n                    nn.Dropout(ffn_drop)))\r\n            in_channels = feedforward_channels\r\n        layers.append(Linear(feedforward_channels, embed_dims))\r\n        layers.append(nn.Dropout(ffn_drop))\r\n        self.layers = Sequential(*layers)\r\n        self.dropout_layer = build_dropout(\r\n            dropout_layer) if dropout_layer else torch.nn.Identity()\r\n        self.add_identity = add_identity\r\n\r\n    @deprecated_api_warning({'residual': 'identity'}, cls_name='FFN')\r\n    def forward(self, x, identity=None):\r\n        \"\"\"Forward function for `FFN`.\r\n\r\n        The function would add x to the output tensor if residue is None.\r\n        \"\"\"\r\n        out = self.layers(x)\r\n        if not self.add_identity:\r\n            return self.dropout_layer(out)\r\n        if identity is None:\r\n            identity = x\r\n        return identity + self.dropout_layer(out)\r\n\r\n\r\n@TRANSFORMER_LAYER.register_module()\r\nclass BaseTransformerLayer(BaseModule):\r\n    \"\"\"Base `TransformerLayer` for vision transformer.\r\n\r\n    It can be built from `mmcv.ConfigDict` and support more flexible\r\n    customization, for example, using any number of `FFN or LN ` and\r\n    use different kinds of `attention` by specifying a list of `ConfigDict`\r\n    named `attn_cfgs`. It is worth mentioning that it supports `prenorm`\r\n    when you specifying `norm` as the first element of `operation_order`.\r\n    More details about the `prenorm`: `On Layer Normalization in the\r\n    Transformer Architecture <https://arxiv.org/abs/2002.04745>`_ .\r\n\r\n    Args:\r\n        attn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )):\r\n            Configs for `self_attention` or `cross_attention` modules,\r\n            The order of the configs in the list should be consistent with\r\n            corresponding attentions in operation_order.\r\n            If it is a dict, all of the attention modules in operation_order\r\n            will be built with this config. Default: None.\r\n        ffn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )):\r\n            Configs for FFN, The order of the configs in the list should be\r\n            consistent with corresponding ffn in operation_order.\r\n            If it is a dict, all of the attention modules in operation_order\r\n            will be built with this config.\r\n        operation_order (tuple[str]): The execution order of operation\r\n            in transformer. Such as ('self_attn', 'norm', 'ffn', 'norm').\r\n            Support `prenorm` when you specifying first element as `norm`.\r\n            Default：None.\r\n        norm_cfg (dict): Config dict for normalization layer.\r\n            Default: dict(type='LN').\r\n        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.\r\n            Default: None.\r\n        batch_first (bool): Key, Query and Value are shape\r\n            of (batch, n, embed_dim)\r\n            or (n, batch, embed_dim). Default to False.\r\n    \"\"\"\r\n\r\n    def __init__(self,\r\n                 attn_cfgs=None,\r\n                 ffn_cfgs=dict(\r\n                     type='FFN',\r\n                     embed_dims=256,\r\n                     feedforward_channels=1024,\r\n                     num_fcs=2,\r\n                     ffn_drop=0.,\r\n                     act_cfg=dict(type='ReLU', inplace=True),\r\n                 ),\r\n                 operation_order=None,\r\n                 norm_cfg=dict(type='LN'),\r\n                 init_cfg=None,\r\n                 batch_first=False,\r\n                 **kwargs):\r\n\r\n        deprecated_args = dict(\r\n            feedforward_channels='feedforward_channels',\r\n            ffn_dropout='ffn_drop',\r\n            ffn_num_fcs='num_fcs')\r\n        for ori_name, new_name in deprecated_args.items():\r\n            if ori_name in kwargs:\r\n                warnings.warn(\r\n                    f'The arguments `{ori_name}` in BaseTransformerLayer '\r\n                    f'has been deprecated, now you should set `{new_name}` '\r\n                    f'and other FFN related arguments '\r\n                    f'to a dict named `ffn_cfgs`. ', DeprecationWarning)\r\n                ffn_cfgs[new_name] = kwargs[ori_name]\r\n\r\n        super().__init__(init_cfg)\r\n\r\n        self.batch_first = batch_first\r\n\r\n        assert set(operation_order) & {\r\n            'self_attn', 'norm', 'ffn', 'cross_attn'} == \\\r\n            set(operation_order), f'The operation_order of' \\\r\n            f' {self.__class__.__name__} should ' \\\r\n            f'contains all four operation type ' \\\r\n            f\"{['self_attn', 'norm', 'ffn', 'cross_attn']}\"\r\n\r\n        num_attn = operation_order.count('self_attn') + operation_order.count(\r\n            'cross_attn')\r\n        if isinstance(attn_cfgs, dict):\r\n            attn_cfgs = [copy.deepcopy(attn_cfgs) for _ in range(num_attn)]\r\n        else:\r\n            assert num_attn == len(attn_cfgs), f'The length ' \\\r\n                f'of attn_cfg {num_attn} is ' \\\r\n                f'not consistent with the number of attention' \\\r\n                f'in operation_order {operation_order}.'\r\n\r\n        self.num_attn = num_attn\r\n        self.operation_order = operation_order\r\n        self.norm_cfg = norm_cfg\r\n        self.pre_norm = operation_order[0] == 'norm'\r\n        self.attentions = ModuleList()\r\n\r\n        index = 0\r\n        for operation_name in operation_order:\r\n            if operation_name in ['self_attn', 'cross_attn']:\r\n                if 'batch_first' in attn_cfgs[index]:\r\n                    assert self.batch_first == attn_cfgs[index]['batch_first']\r\n                else:\r\n                    attn_cfgs[index]['batch_first'] = self.batch_first\r\n                attention = build_attention(attn_cfgs[index])\r\n                # Some custom attentions used as `self_attn`\r\n                # or `cross_attn` can have different behavior.\r\n                attention.operation_name = operation_name\r\n                self.attentions.append(attention)\r\n                index += 1\r\n\r\n        self.embed_dims = self.attentions[0].embed_dims\r\n\r\n        self.ffns = ModuleList()\r\n        num_ffns = operation_order.count('ffn')\r\n        if isinstance(ffn_cfgs, dict):\r\n            ffn_cfgs = ConfigDict(ffn_cfgs)\r\n        if isinstance(ffn_cfgs, dict):\r\n            ffn_cfgs = [copy.deepcopy(ffn_cfgs) for _ in range(num_ffns)]\r\n        assert len(ffn_cfgs) == num_ffns\r\n        for ffn_index in range(num_ffns):\r\n            if 'embed_dims' not in ffn_cfgs[ffn_index]:\r\n                ffn_cfgs[ffn_index]['embed_dims'] = self.embed_dims\r\n            else:\r\n                assert ffn_cfgs[ffn_index]['embed_dims'] == self.embed_dims\r\n            self.ffns.append(\r\n                build_feedforward_network(ffn_cfgs[ffn_index],\r\n                                          dict(type='FFN')))\r\n\r\n        self.norms = ModuleList()\r\n        num_norms = operation_order.count('norm')\r\n        for _ in range(num_norms):\r\n            self.norms.append(build_norm_layer(norm_cfg, self.embed_dims)[1])\r\n\r\n    def forward(self,\r\n                query,\r\n                key=None,\r\n                value=None,\r\n                query_pos=None,\r\n                key_pos=None,\r\n                attn_masks=None,\r\n                query_key_padding_mask=None,\r\n                key_padding_mask=None,\r\n                **kwargs):\r\n        \"\"\"Forward function for `TransformerDecoderLayer`.\r\n\r\n        **kwargs contains some specific arguments of attentions.\r\n\r\n        Args:\r\n            query (Tensor): The input query with shape\r\n                [num_queries, bs, embed_dims] if\r\n                self.batch_first is False, else\r\n                [bs, num_queries embed_dims].\r\n            key (Tensor): The key tensor with shape [num_keys, bs,\r\n                embed_dims] if self.batch_first is False, else\r\n                [bs, num_keys, embed_dims] .\r\n            value (Tensor): The value tensor with same shape as `key`.\r\n            query_pos (Tensor): The positional encoding for `query`.\r\n                Default: None.\r\n            key_pos (Tensor): The positional encoding for `key`.\r\n                Default: None.\r\n            attn_masks (List[Tensor] | None): 2D Tensor used in\r\n                calculation of corresponding attention. The length of\r\n                it should equal to the number of `attention` in\r\n                `operation_order`. Default: None.\r\n            query_key_padding_mask (Tensor): ByteTensor for `query`, with\r\n                shape [bs, num_queries]. Only used in `self_attn` layer.\r\n                Defaults to None.\r\n            key_padding_mask (Tensor): ByteTensor for `query`, with\r\n                shape [bs, num_keys]. Default: None.\r\n\r\n        Returns:\r\n            Tensor: forwarded results with shape [num_queries, bs, embed_dims].\r\n        \"\"\"\r\n\r\n        norm_index = 0\r\n        attn_index = 0\r\n        ffn_index = 0\r\n        identity = query\r\n        if attn_masks is None:\r\n            attn_masks = [None for _ in range(self.num_attn)]\r\n        elif isinstance(attn_masks, torch.Tensor):\r\n            attn_masks = [\r\n                copy.deepcopy(attn_masks) for _ in range(self.num_attn)\r\n            ]\r\n            warnings.warn(f'Use same attn_mask in all attentions in '\r\n                          f'{self.__class__.__name__} ')\r\n        else:\r\n            assert len(attn_masks) == self.num_attn, f'The length of ' \\\r\n                        f'attn_masks {len(attn_masks)} must be equal ' \\\r\n                        f'to the number of attention in ' \\\r\n                        f'operation_order {self.num_attn}'\r\n\r\n        for layer in self.operation_order:\r\n            if layer == 'self_attn':\r\n                temp_key = temp_value = query\r\n                query = self.attentions[attn_index](\r\n                    query,\r\n                    temp_key,\r\n                    temp_value,\r\n                    identity if self.pre_norm else None,\r\n                    query_pos=query_pos,\r\n                    key_pos=query_pos,\r\n                    attn_mask=attn_masks[attn_index],\r\n                    key_padding_mask=query_key_padding_mask,\r\n                    **kwargs)\r\n                attn_index += 1\r\n                identity = query\r\n\r\n            elif layer == 'norm':\r\n                query = self.norms[norm_index](query)\r\n                norm_index += 1\r\n\r\n            elif layer == 'cross_attn':\r\n                query = self.attentions[attn_index](\r\n                    query,\r\n                    key,\r\n                    value,\r\n                    identity if self.pre_norm else None,\r\n                    query_pos=query_pos,\r\n                    key_pos=key_pos,\r\n                    attn_mask=attn_masks[attn_index],\r\n                    key_padding_mask=key_padding_mask,\r\n                    **kwargs)\r\n                attn_index += 1\r\n                identity = query\r\n\r\n            elif layer == 'ffn':\r\n                query = self.ffns[ffn_index](\r\n                    query, identity if self.pre_norm else None)\r\n                ffn_index += 1\r\n\r\n        return query\r\n\r\n\r\n@TRANSFORMER_LAYER_SEQUENCE.register_module()\r\nclass TransformerLayerSequence(BaseModule):\r\n    \"\"\"Base class for TransformerEncoder and TransformerDecoder in vision\r\n    transformer.\r\n\r\n    As base-class of Encoder and Decoder in vision transformer.\r\n    Support customization such as specifying different kind\r\n    of `transformer_layer` in `transformer_coder`.\r\n\r\n    Args:\r\n        transformerlayer (list[obj:`mmcv.ConfigDict`] |\r\n            obj:`mmcv.ConfigDict`): Config of transformerlayer\r\n            in TransformerCoder. If it is obj:`mmcv.ConfigDict`,\r\n             it would be repeated `num_layer` times to a\r\n             list[`mmcv.ConfigDict`]. Default: None.\r\n        num_layers (int): The number of `TransformerLayer`. Default: None.\r\n        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.\r\n            Default: None.\r\n    \"\"\"\r\n\r\n    def __init__(self, transformerlayers=None, num_layers=None, init_cfg=None):\r\n        super().__init__(init_cfg)\r\n        if isinstance(transformerlayers, dict):\r\n            transformerlayers = [\r\n                copy.deepcopy(transformerlayers) for _ in range(num_layers)\r\n            ]\r\n        else:\r\n            assert isinstance(transformerlayers, list) and \\\r\n                   len(transformerlayers) == num_layers\r\n        self.num_layers = num_layers\r\n        self.layers = ModuleList()\r\n        for i in range(num_layers):\r\n            self.layers.append(build_transformer_layer(transformerlayers[i]))\r\n        self.embed_dims = self.layers[0].embed_dims\r\n        self.pre_norm = self.layers[0].pre_norm\r\n\r\n    def forward(self,\r\n                query,\r\n                key,\r\n                value,\r\n                query_pos=None,\r\n                key_pos=None,\r\n                attn_masks=None,\r\n                query_key_padding_mask=None,\r\n                key_padding_mask=None,\r\n                **kwargs):\r\n        \"\"\"Forward function for `TransformerCoder`.\r\n\r\n        Args:\r\n            query (Tensor): Input query with shape\r\n                `(num_queries, bs, embed_dims)`.\r\n            key (Tensor): The key tensor with shape\r\n                `(num_keys, bs, embed_dims)`.\r\n            value (Tensor): The value tensor with shape\r\n                `(num_keys, bs, embed_dims)`.\r\n            query_pos (Tensor): The positional encoding for `query`.\r\n                Default: None.\r\n            key_pos (Tensor): The positional encoding for `key`.\r\n                Default: None.\r\n            attn_masks (List[Tensor], optional): Each element is 2D Tensor\r\n                which is used in calculation of corresponding attention in\r\n                operation_order. Default: None.\r\n            query_key_padding_mask (Tensor): ByteTensor for `query`, with\r\n                shape [bs, num_queries]. Only used in self-attention\r\n                Default: None.\r\n            key_padding_mask (Tensor): ByteTensor for `query`, with\r\n                shape [bs, num_keys]. Default: None.\r\n\r\n        Returns:\r\n            Tensor:  results with shape [num_queries, bs, embed_dims].\r\n        \"\"\"\r\n        for layer in self.layers:\r\n            query = layer(\r\n                query,\r\n                key,\r\n                value,\r\n                query_pos=query_pos,\r\n                key_pos=key_pos,\r\n                attn_masks=attn_masks,\r\n                query_key_padding_mask=query_key_padding_mask,\r\n                key_padding_mask=key_padding_mask,\r\n                **kwargs)\r\n        return query"
  },
  {
    "path": "mmdet3d/models/utils/transformerdecoder.py",
    "content": "import copy\r\nimport numpy as np\r\nimport torch\r\nfrom mmcv.cnn import ConvModule, build_conv_layer, kaiming_init\r\nfrom mmcv.runner import force_fp32\r\nfrom torch import nn\r\nimport torch.nn.functional as F\r\nfrom torch.nn.parameter import Parameter\r\nfrom torch.nn import Linear\r\nfrom torch.nn.init import xavier_uniform_, constant_\r\n\r\nfrom mmdet3d.core import (circle_nms, draw_heatmap_gaussian, gaussian_radius,\r\n                          xywhr2xyxyr, limit_period, PseudoSampler)\r\nfrom mmdet3d.core.bbox.structures import rotation_3d_in_axis\r\nfrom mmdet3d.core import Box3DMode, LiDARInstance3DBoxes\r\nfrom mmdet3d.models import builder\r\nfrom mmdet3d.models.builder import HEADS, build_loss\r\nfrom mmdet3d.models.utils import clip_sigmoid\r\nfrom mmdet3d.models.fusion_layers import apply_3d_transformation\r\nfrom mmdet3d.ops.iou3d.iou3d_utils import nms_gpu\r\nfrom mmdet.core import build_bbox_coder, multi_apply, build_assigner, build_sampler, AssignResult\r\nfrom mmdet3d.ops.roiaware_pool3d import points_in_boxes_batch\r\n\r\n\r\nclass PositionEmbeddingLearnedLN(nn.Module):\r\n    \"\"\"\r\n    Absolute pos embedding, learned.\r\n    \"\"\"\r\n\r\n    def __init__(self, input_channel, num_pos_feats=288):\r\n        super().__init__()\r\n        self.position_embedding_head = nn.Sequential(\r\n            nn.Linear(input_channel, num_pos_feats),\r\n            nn.ReLU(inplace=True),\r\n            nn.Linear(num_pos_feats, num_pos_feats),\r\n            nn.LayerNorm(num_pos_feats),\r\n        )\r\n\r\n    def forward(self, xyz):\r\n        position_embedding = self.position_embedding_head(xyz)\r\n        position_embedding = position_embedding.transpose(1, 2).contiguous()\r\n        return position_embedding\r\n\r\nclass PositionEmbeddingLearned(nn.Module):\r\n    \"\"\"\r\n    Absolute pos embedding, learned.\r\n    \"\"\"\r\n\r\n    def __init__(self, input_channel, num_pos_feats=288):\r\n        super().__init__()\r\n        self.position_embedding_head = nn.Sequential(\r\n            nn.Conv1d(input_channel, num_pos_feats, kernel_size=1),\r\n            nn.BatchNorm1d(num_pos_feats),\r\n            nn.ReLU(inplace=True),\r\n            nn.Conv1d(num_pos_feats, num_pos_feats, kernel_size=1))\r\n\r\n    def forward(self, xyz):\r\n        xyz = xyz.transpose(1, 2).contiguous()\r\n        position_embedding = self.position_embedding_head(xyz)\r\n        return position_embedding\r\n\r\nclass PositionEmbeddingLearnedwoNorm(nn.Module):\r\n    \"\"\"\r\n    Absolute pos embedding, learned.\r\n    \"\"\"\r\n\r\n    def __init__(self, input_channel, num_pos_feats=288):\r\n        super().__init__()\r\n        self.position_embedding_head = nn.Sequential(\r\n            nn.Conv1d(input_channel, num_pos_feats, kernel_size=1),\r\n            # nn.BatchNorm1d(num_pos_feats),\r\n            nn.ReLU(inplace=True),\r\n            nn.Conv1d(num_pos_feats, num_pos_feats, kernel_size=1))\r\n\r\n    def forward(self, xyz):\r\n        xyz = xyz.transpose(1, 2).contiguous()\r\n        position_embedding = self.position_embedding_head(xyz)\r\n        return position_embedding\r\n\r\nclass PositionEmbeddingLearnedMulti(nn.Module):\r\n    \"\"\"\r\n    Absolute pos embedding, learned.\r\n    \"\"\"\r\n\r\n    def __init__(self, input_channel, num_pos_feats=288, pos_num=2):\r\n        super().__init__()\r\n        self.position_embedding_heads = nn.ModuleList()\r\n        self.pos_num = pos_num\r\n        for i in range(pos_num):\r\n            self.position_embedding_heads.append(nn.Sequential(\r\n                nn.Conv1d(input_channel, num_pos_feats, kernel_size=1),\r\n                nn.BatchNorm1d(num_pos_feats),\r\n                nn.ReLU(inplace=True),\r\n                nn.Conv1d(num_pos_feats, num_pos_feats, kernel_size=1)\r\n            ))\r\n\r\n    def forward(self, xyzs):\r\n        output = None\r\n        for i in range(self.pos_num):\r\n            xyz = xyzs[i].transpose(1, 2).contiguous()\r\n            position_embedding = self.position_embedding_heads[i](xyz)\r\n            if output is None:\r\n                output = position_embedding\r\n            else:\r\n                output = output + position_embedding\r\n\r\n        return output\r\n\r\n\r\nclass PositionEmbeddingLearnedMultiInput(nn.Module):\r\n    def __init__(self, input_channels, num_pos_feats=288):\r\n        super().__init__()\r\n        self.position_embedding_heads = nn.ModuleList()\r\n        self.pos_num = len(input_channels)\r\n        for i in range(self.pos_num):\r\n            pos_embed = PositionEmbeddingLearned(input_channels[i], num_pos_feats)\r\n            self.position_embedding_heads.append(pos_embed)\r\n\r\n    def forward(self, xyzs):\r\n        output = None\r\n        assert len(xyzs) == self.pos_num\r\n        for i in range(self.pos_num):\r\n            if output is None:\r\n                output = self.position_embedding_heads[i](xyzs[i])\r\n            else:\r\n                output = output + self.position_embedding_heads[i](xyzs[i])\r\n        return output\r\n\r\n\r\nclass TransformerDecoderLayer(nn.Module):\r\n    def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation=\"relu\",\r\n                 self_posembed=None, cross_posembed=None, cross_only=False):\r\n        super().__init__()\r\n        self.cross_only = cross_only\r\n        if not self.cross_only:\r\n            self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout)\r\n        self.multihead_attn = MultiheadAttention(d_model, nhead, dropout=dropout)\r\n\r\n        # Implementation of Feedforward model\r\n        self.linear1 = nn.Linear(d_model, dim_feedforward)\r\n        self.dropout = nn.Dropout(dropout)\r\n        self.linear2 = nn.Linear(dim_feedforward, d_model)\r\n\r\n        self.norm1 = nn.LayerNorm(d_model)\r\n        self.norm2 = nn.LayerNorm(d_model)\r\n        self.norm3 = nn.LayerNorm(d_model)\r\n        self.dropout1 = nn.Dropout(dropout)\r\n        self.dropout2 = nn.Dropout(dropout)\r\n        self.dropout3 = nn.Dropout(dropout)\r\n\r\n        def _get_activation_fn(activation):\r\n            \"\"\"Return an activation function given a string\"\"\"\r\n            if activation == \"relu\":\r\n                return F.relu\r\n            if activation == \"gelu\":\r\n                return F.gelu\r\n            if activation == \"glu\":\r\n                return F.glu\r\n            raise RuntimeError(F\"activation should be relu/gelu, not {activation}.\")\r\n\r\n        self.activation = _get_activation_fn(activation)\r\n\r\n        self.self_posembed = self_posembed\r\n        self.cross_posembed = cross_posembed\r\n\r\n    def with_pos_embed(self, tensor, pos_embed):\r\n        return tensor if pos_embed is None else tensor + pos_embed\r\n\r\n    def forward(self, query, key, query_pos, key_pos, attn_mask=None, need_weights=False):\r\n        \"\"\"\r\n        :param query: B C Pq\r\n        :param key: B C Pk\r\n        :param query_pos: B Pq 3/6\r\n        :param key_pos: B Pk 3/6\r\n        :param value_pos: [B Pq 3/6]\r\n        :return:\r\n        \"\"\"\r\n        # NxCxP to PxNxC\r\n        if self.self_posembed is not None:\r\n            query_pos_embed = self.self_posembed(query_pos).permute(2, 0, 1)\r\n        else:\r\n            query_pos_embed = None\r\n        if self.cross_posembed is not None:\r\n            key_pos_embed = self.cross_posembed(key_pos).permute(2, 0, 1)\r\n        else:\r\n            key_pos_embed = None\r\n\r\n        query = query.permute(2, 0, 1)\r\n        key = key.permute(2, 0, 1)\r\n\r\n        if not self.cross_only:\r\n            q = k = v = self.with_pos_embed(query, query_pos_embed)\r\n            query2 = self.self_attn(q, k, value=v)[0]\r\n            query = query + self.dropout1(query2)\r\n            query = self.norm1(query)\r\n\r\n        query2, weights = self.multihead_attn(query=self.with_pos_embed(query, query_pos_embed),\r\n                key=self.with_pos_embed(key, key_pos_embed), value=self.with_pos_embed(key, key_pos_embed),\r\n                attn_mask=attn_mask)\r\n        query = query + self.dropout2(query2)\r\n        query = self.norm2(query)\r\n\r\n        query2 = self.linear2(self.dropout(self.activation(self.linear1(query))))\r\n        query = query + self.dropout3(query2)\r\n        query = self.norm3(query)\r\n\r\n        # NxCxP to PxNxC\r\n        query = query.permute(1, 2, 0)\r\n        if need_weights:\r\n            return query, weights\r\n        else:\r\n            return query\r\n\r\n\r\nclass MultiheadAttention(nn.Module):\r\n    r\"\"\"Allows the model to jointly attend to information\r\n    from different representation subspaces.\r\n    See reference: Attention Is All You Need\r\n    .. math::\r\n        \\text{MultiHead}(Q, K, V) = \\text{Concat}(head_1,\\dots,head_h)W^O\r\n        \\text{where} head_i = \\text{Attention}(QW_i^Q, KW_i^K, VW_i^V)\r\n    Args:\r\n        embed_dim: total dimension of the model.\r\n        num_heads: parallel attention heads.\r\n        dropout: a Dropout layer on attn_output_weights. Default: 0.0.\r\n        bias: add bias as module parameter. Default: True.\r\n        add_bias_kv: add bias to the key and value sequences at dim=0.\r\n        add_zero_attn: add a new batch of zeros to the key and\r\n                       value sequences at dim=1.\r\n        kdim: total number of features in key. Default: None.\r\n        vdim: total number of features in key. Default: None.\r\n        Note: if kdim and vdim are None, they will be set to embed_dim such that\r\n        query, key, and value have the same number of features.\r\n    Examples::\r\n        >>> multihead_attn = nn.MultiheadAttention(embed_dim, num_heads)\r\n        >>> attn_output, attn_output_weights = multihead_attn(query, key, value)\r\n    \"\"\"\r\n\r\n    def __init__(self, embed_dim, num_heads, dropout=0., bias=True, add_bias_kv=False, add_zero_attn=False, kdim=None,\r\n                 vdim=None):\r\n        super(MultiheadAttention, self).__init__()\r\n        self.embed_dim = embed_dim\r\n        self.kdim = kdim if kdim is not None else embed_dim\r\n        self.vdim = vdim if vdim is not None else embed_dim\r\n        self._qkv_same_embed_dim = self.kdim == embed_dim and self.vdim == embed_dim\r\n\r\n        self.num_heads = num_heads\r\n        self.dropout = dropout\r\n        self.head_dim = embed_dim // num_heads\r\n        assert self.head_dim * num_heads == self.embed_dim, \"embed_dim must be divisible by num_heads\"\r\n\r\n        self.in_proj_weight = Parameter(torch.empty(3 * embed_dim, embed_dim))\r\n\r\n        if self._qkv_same_embed_dim is False:\r\n            self.q_proj_weight = Parameter(torch.Tensor(embed_dim, embed_dim))\r\n            self.k_proj_weight = Parameter(torch.Tensor(embed_dim, self.kdim))\r\n            self.v_proj_weight = Parameter(torch.Tensor(embed_dim, self.vdim))\r\n\r\n        if bias:\r\n            self.in_proj_bias = Parameter(torch.empty(3 * embed_dim))\r\n        else:\r\n            self.register_parameter('in_proj_bias', None)\r\n        self.out_proj = Linear(embed_dim, embed_dim, bias=bias)\r\n\r\n        if add_bias_kv:\r\n            self.bias_k = Parameter(torch.empty(1, 1, embed_dim))\r\n            self.bias_v = Parameter(torch.empty(1, 1, embed_dim))\r\n        else:\r\n            self.bias_k = self.bias_v = None\r\n\r\n        self.add_zero_attn = add_zero_attn\r\n\r\n        self._reset_parameters()\r\n\r\n    def _reset_parameters(self):\r\n        if self._qkv_same_embed_dim:\r\n            xavier_uniform_(self.in_proj_weight)\r\n        else:\r\n            xavier_uniform_(self.q_proj_weight)\r\n            xavier_uniform_(self.k_proj_weight)\r\n            xavier_uniform_(self.v_proj_weight)\r\n\r\n        if self.in_proj_bias is not None:\r\n            constant_(self.in_proj_bias, 0.)\r\n            constant_(self.out_proj.bias, 0.)\r\n        if self.bias_k is not None:\r\n            xavier_normal_(self.bias_k)\r\n        if self.bias_v is not None:\r\n            xavier_normal_(self.bias_v)\r\n\r\n    def forward(self, query, key, value, key_padding_mask=None, need_weights=True, attn_mask=None):\r\n        r\"\"\"\r\n    Args:\r\n        query, key, value: map a query and a set of key-value pairs to an output.\r\n            See \"Attention Is All You Need\" for more details.\r\n        key_padding_mask: if provided, specified padding elements in the key will\r\n            be ignored by the attention. This is an binary mask. When the value is True,\r\n            the corresponding value on the attention layer will be filled with -inf.\r\n        need_weights: output attn_output_weights.\r\n        attn_mask: mask that prevents attention to certain positions. This is an additive mask\r\n            (i.e. the values will be added to the attention layer).\r\n    Shape:\r\n        - Inputs:\r\n        - query: :math:`(L, N, E)` where L is the target sequence length, N is the batch size, E is\r\n          the embedding dimension.\r\n        - key: :math:`(S, N, E)`, where S is the source sequence length, N is the batch size, E is\r\n          the embedding dimension.\r\n        - value: :math:`(S, N, E)` where S is the source sequence length, N is the batch size, E is\r\n          the embedding dimension.\r\n        - key_padding_mask: :math:`(N, S)`, ByteTensor, where N is the batch size, S is the source sequence length.\r\n        - attn_mask: :math:`(L, S)` where L is the target sequence length, S is the source sequence length.\r\n        - Outputs:\r\n        - attn_output: :math:`(L, N, E)` where L is the target sequence length, N is the batch size,\r\n          E is the embedding dimension.\r\n        - attn_output_weights: :math:`(N, L, S)` where N is the batch size,\r\n          L is the target sequence length, S is the source sequence length.\r\n        \"\"\"\r\n        if hasattr(self, '_qkv_same_embed_dim') and self._qkv_same_embed_dim is False:\r\n            return multi_head_attention_forward(\r\n                query, key, value, self.embed_dim, self.num_heads,\r\n                self.in_proj_weight, self.in_proj_bias,\r\n                self.bias_k, self.bias_v, self.add_zero_attn,\r\n                self.dropout, self.out_proj.weight, self.out_proj.bias,\r\n                training=self.training,\r\n                key_padding_mask=key_padding_mask, need_weights=need_weights,\r\n                attn_mask=attn_mask, use_separate_proj_weight=True,\r\n                q_proj_weight=self.q_proj_weight, k_proj_weight=self.k_proj_weight,\r\n                v_proj_weight=self.v_proj_weight)\r\n        else:\r\n            if not hasattr(self, '_qkv_same_embed_dim'):\r\n                warnings.warn('A new version of MultiheadAttention module has been implemented. \\\r\n                    Please re-train your model with the new module',\r\n                              UserWarning)\r\n\r\n            return multi_head_attention_forward(\r\n                query, key, value, self.embed_dim, self.num_heads,\r\n                self.in_proj_weight, self.in_proj_bias,\r\n                self.bias_k, self.bias_v, self.add_zero_attn,\r\n                self.dropout, self.out_proj.weight, self.out_proj.bias,\r\n                training=self.training,\r\n                key_padding_mask=key_padding_mask, need_weights=need_weights,\r\n                attn_mask=attn_mask)\r\n\r\n\r\ndef multi_head_attention_forward(query,  # type: Tensor\r\n                                 key,  # type: Tensor\r\n                                 value,  # type: Tensor\r\n                                 embed_dim_to_check,  # type: int\r\n                                 num_heads,  # type: int\r\n                                 in_proj_weight,  # type: Tensor\r\n                                 in_proj_bias,  # type: Tensor\r\n                                 bias_k,  # type: Optional[Tensor]\r\n                                 bias_v,  # type: Optional[Tensor]\r\n                                 add_zero_attn,  # type: bool\r\n                                 dropout_p,  # type: float\r\n                                 out_proj_weight,  # type: Tensor\r\n                                 out_proj_bias,  # type: Tensor\r\n                                 training=True,  # type: bool\r\n                                 key_padding_mask=None,  # type: Optional[Tensor]\r\n                                 need_weights=True,  # type: bool\r\n                                 attn_mask=None,  # type: Optional[Tensor]\r\n                                 use_separate_proj_weight=False,  # type: bool\r\n                                 q_proj_weight=None,  # type: Optional[Tensor]\r\n                                 k_proj_weight=None,  # type: Optional[Tensor]\r\n                                 v_proj_weight=None,  # type: Optional[Tensor]\r\n                                 static_k=None,  # type: Optional[Tensor]\r\n                                 static_v=None,  # type: Optional[Tensor]\r\n                                 ):\r\n    # type: (...) -> Tuple[Tensor, Optional[Tensor]]\r\n    r\"\"\"\r\n    Args:\r\n        query, key, value: map a query and a set of key-value pairs to an output.\r\n            See \"Attention Is All You Need\" for more details.\r\n        embed_dim_to_check: total dimension of the model.\r\n        num_heads: parallel attention heads.\r\n        in_proj_weight, in_proj_bias: input projection weight and bias.\r\n        bias_k, bias_v: bias of the key and value sequences to be added at dim=0.\r\n        add_zero_attn: add a new batch of zeros to the key and\r\n                       value sequences at dim=1.\r\n        dropout_p: probability of an element to be zeroed.\r\n        out_proj_weight, out_proj_bias: the output projection weight and bias.\r\n        training: apply dropout if is ``True``.\r\n        key_padding_mask: if provided, specified padding elements in the key will\r\n            be ignored by the attention. This is an binary mask. When the value is True,\r\n            the corresponding value on the attention layer will be filled with -inf.\r\n        need_weights: output attn_output_weights.\r\n        attn_mask: mask that prevents attention to certain positions. This is an additive mask\r\n            (i.e. the values will be added to the attention layer).\r\n        use_separate_proj_weight: the function accept the proj. weights for query, key,\r\n            and value in differnt forms. If false, in_proj_weight will be used, which is\r\n            a combination of q_proj_weight, k_proj_weight, v_proj_weight.\r\n        q_proj_weight, k_proj_weight, v_proj_weight, in_proj_bias: input projection weight and bias.\r\n        static_k, static_v: static key and value used for attention operators.\r\n    Shape:\r\n        Inputs:\r\n        - query: :math:`(L, N, E)` where L is the target sequence length, N is the batch size, E is\r\n          the embedding dimension.\r\n        - key: :math:`(S, N, E)`, where S is the source sequence length, N is the batch size, E is\r\n          the embedding dimension.\r\n        - value: :math:`(S, N, E)` where S is the source sequence length, N is the batch size, E is\r\n          the embedding dimension.\r\n        - key_padding_mask: :math:`(N, S)`, ByteTensor, where N is the batch size, S is the source sequence length.\r\n        - attn_mask: :math:`(L, S)` where L is the target sequence length, S is the source sequence length.\r\n        - static_k: :math:`(N*num_heads, S, E/num_heads)`, where S is the source sequence length,\r\n          N is the batch size, E is the embedding dimension. E/num_heads is the head dimension.\r\n        - static_v: :math:`(N*num_heads, S, E/num_heads)`, where S is the source sequence length,\r\n          N is the batch size, E is the embedding dimension. E/num_heads is the head dimension.\r\n        Outputs:\r\n        - attn_output: :math:`(L, N, E)` where L is the target sequence length, N is the batch size,\r\n          E is the embedding dimension.\r\n        - attn_output_weights: :math:`(N, L, S)` where N is the batch size,\r\n          L is the target sequence length, S is the source sequence length.\r\n    \"\"\"\r\n\r\n    qkv_same = torch.equal(query, key) and torch.equal(key, value)\r\n    kv_same = torch.equal(key, value)\r\n\r\n    tgt_len, bsz, embed_dim = query.size()\r\n    assert embed_dim == embed_dim_to_check\r\n    assert list(query.size()) == [tgt_len, bsz, embed_dim]\r\n    assert key.size() == value.size()\r\n\r\n    head_dim = embed_dim // num_heads\r\n    assert head_dim * num_heads == embed_dim, \"embed_dim must be divisible by num_heads\"\r\n    scaling = float(head_dim) ** -0.5\r\n\r\n    if use_separate_proj_weight is not True:\r\n        if qkv_same:\r\n            # self-attention\r\n            q, k, v = F.linear(query, in_proj_weight, in_proj_bias).chunk(3, dim=-1)\r\n\r\n        elif kv_same:\r\n            # encoder-decoder attention\r\n            # This is inline in_proj function with in_proj_weight and in_proj_bias\r\n            _b = in_proj_bias\r\n            _start = 0\r\n            _end = embed_dim\r\n            _w = in_proj_weight[_start:_end, :]\r\n            if _b is not None:\r\n                _b = _b[_start:_end]\r\n            q = F.linear(query, _w, _b)\r\n\r\n            if key is None:\r\n                assert value is None\r\n                k = None\r\n                v = None\r\n            else:\r\n\r\n                # This is inline in_proj function with in_proj_weight and in_proj_bias\r\n                _b = in_proj_bias\r\n                _start = embed_dim\r\n                _end = None\r\n                _w = in_proj_weight[_start:, :]\r\n                if _b is not None:\r\n                    _b = _b[_start:]\r\n                k, v = F.linear(key, _w, _b).chunk(2, dim=-1)\r\n\r\n        else:\r\n            # This is inline in_proj function with in_proj_weight and in_proj_bias\r\n            _b = in_proj_bias\r\n            _start = 0\r\n            _end = embed_dim\r\n            _w = in_proj_weight[_start:_end, :]\r\n            if _b is not None:\r\n                _b = _b[_start:_end]\r\n            q = F.linear(query, _w, _b)\r\n\r\n            # This is inline in_proj function with in_proj_weight and in_proj_bias\r\n            _b = in_proj_bias\r\n            _start = embed_dim\r\n            _end = embed_dim * 2\r\n            _w = in_proj_weight[_start:_end, :]\r\n            if _b is not None:\r\n                _b = _b[_start:_end]\r\n            k = F.linear(key, _w, _b)\r\n\r\n            # This is inline in_proj function with in_proj_weight and in_proj_bias\r\n            _b = in_proj_bias\r\n            _start = embed_dim * 2\r\n            _end = None\r\n            _w = in_proj_weight[_start:, :]\r\n            if _b is not None:\r\n                _b = _b[_start:]\r\n            v = F.linear(value, _w, _b)\r\n    else:\r\n        q_proj_weight_non_opt = torch.jit._unwrap_optional(q_proj_weight)\r\n        len1, len2 = q_proj_weight_non_opt.size()\r\n        assert len1 == embed_dim and len2 == query.size(-1)\r\n\r\n        k_proj_weight_non_opt = torch.jit._unwrap_optional(k_proj_weight)\r\n        len1, len2 = k_proj_weight_non_opt.size()\r\n        assert len1 == embed_dim and len2 == key.size(-1)\r\n\r\n        v_proj_weight_non_opt = torch.jit._unwrap_optional(v_proj_weight)\r\n        len1, len2 = v_proj_weight_non_opt.size()\r\n        assert len1 == embed_dim and len2 == value.size(-1)\r\n\r\n        if in_proj_bias is not None:\r\n            q = F.linear(query, q_proj_weight_non_opt, in_proj_bias[0:embed_dim])\r\n            k = F.linear(key, k_proj_weight_non_opt, in_proj_bias[embed_dim:(embed_dim * 2)])\r\n            v = F.linear(value, v_proj_weight_non_opt, in_proj_bias[(embed_dim * 2):])\r\n        else:\r\n            q = F.linear(query, q_proj_weight_non_opt, in_proj_bias)\r\n            k = F.linear(key, k_proj_weight_non_opt, in_proj_bias)\r\n            v = F.linear(value, v_proj_weight_non_opt, in_proj_bias)\r\n    q = q * scaling\r\n\r\n    if bias_k is not None and bias_v is not None:\r\n        if static_k is None and static_v is None:\r\n            k = torch.cat([k, bias_k.repeat(1, bsz, 1)])\r\n            v = torch.cat([v, bias_v.repeat(1, bsz, 1)])\r\n            if attn_mask is not None:\r\n                attn_mask = torch.cat([attn_mask,\r\n                                       torch.zeros((attn_mask.size(0), 1),\r\n                                                   dtype=attn_mask.dtype,\r\n                                                   device=attn_mask.device)], dim=1)\r\n            if key_padding_mask is not None:\r\n                key_padding_mask = torch.cat(\r\n                    [key_padding_mask, torch.zeros((key_padding_mask.size(0), 1),\r\n                                                   dtype=key_padding_mask.dtype,\r\n                                                   device=key_padding_mask.device)], dim=1)\r\n        else:\r\n            assert static_k is None, \"bias cannot be added to static key.\"\r\n            assert static_v is None, \"bias cannot be added to static value.\"\r\n    else:\r\n        assert bias_k is None\r\n        assert bias_v is None\r\n\r\n    q = q.contiguous().view(tgt_len, bsz * num_heads, head_dim).transpose(0, 1)\r\n    if k is not None:\r\n        k = k.contiguous().view(-1, bsz * num_heads, head_dim).transpose(0, 1)\r\n    if v is not None:\r\n        v = v.contiguous().view(-1, bsz * num_heads, head_dim).transpose(0, 1)\r\n\r\n    if static_k is not None:\r\n        assert static_k.size(0) == bsz * num_heads\r\n        assert static_k.size(2) == head_dim\r\n        k = static_k\r\n\r\n    if static_v is not None:\r\n        assert static_v.size(0) == bsz * num_heads\r\n        assert static_v.size(2) == head_dim\r\n        v = static_v\r\n\r\n    src_len = k.size(1)\r\n\r\n    if key_padding_mask is not None:\r\n        assert key_padding_mask.size(0) == bsz\r\n        assert key_padding_mask.size(1) == src_len\r\n\r\n    if add_zero_attn:\r\n        src_len += 1\r\n        k = torch.cat([k, torch.zeros((k.size(0), 1) + k.size()[2:], dtype=k.dtype, device=k.device)], dim=1)\r\n        v = torch.cat([v, torch.zeros((v.size(0), 1) + v.size()[2:], dtype=v.dtype, device=v.device)], dim=1)\r\n        if attn_mask is not None:\r\n            if len(attn_mask.shape) == 2:\r\n                attn_mask = torch.cat([attn_mask, torch.zeros((attn_mask.size(0), 1),\r\n                                                          dtype=attn_mask.dtype,\r\n                                                          device=attn_mask.device)], dim=1)\r\n            else:\r\n                attn_mask = torch.cat([attn_mask, torch.zeros((attn_mask.size(0), attn_mask.size(1), 1),\r\n                                                          dtype=attn_mask.dtype,\r\n                                                          device=attn_mask.device)], dim=2)\r\n        if key_padding_mask is not None:\r\n            key_padding_mask = torch.cat(\r\n                [key_padding_mask, torch.zeros((key_padding_mask.size(0), 1),\r\n                                               dtype=key_padding_mask.dtype,\r\n                                               device=key_padding_mask.device)], dim=1)\r\n\r\n    attn_output_weights = torch.bmm(q, k.transpose(1, 2))\r\n    assert list(attn_output_weights.size()) == [bsz * num_heads, tgt_len, src_len]\r\n\r\n    if attn_mask is not None:\r\n        if len(attn_mask.shape) == 2:\r\n            attn_mask = attn_mask.unsqueeze(0)\r\n        else:\r\n            attn_mask = attn_mask.unsqueeze(1).repeat(1, num_heads, 1, 1)\r\n            attn_mask = attn_mask.reshape(attn_mask.size(0)*num_heads, attn_mask.size(2), attn_mask.size(3))\r\n        attn_output_weights += attn_mask\r\n\r\n    if key_padding_mask is not None:\r\n        attn_output_weights = attn_output_weights.view(bsz, num_heads, tgt_len, src_len)\r\n        attn_output_weights = attn_output_weights.masked_fill(\r\n            key_padding_mask.unsqueeze(1).unsqueeze(2),\r\n            float('-inf'),\r\n        )\r\n        attn_output_weights = attn_output_weights.view(bsz * num_heads, tgt_len, src_len)\r\n\r\n    attn_output_weights = F.softmax(\r\n        attn_output_weights, dim=-1)\r\n    attn_output_weights = F.dropout(attn_output_weights, p=dropout_p, training=training)\r\n\r\n    attn_output = torch.bmm(attn_output_weights, v)\r\n    assert list(attn_output.size()) == [bsz * num_heads, tgt_len, head_dim]\r\n    attn_output = attn_output.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim)\r\n    attn_output = F.linear(attn_output, out_proj_weight, out_proj_bias)\r\n\r\n    if need_weights:\r\n        # average attention weights over heads\r\n        attn_output_weights = attn_output_weights.view(bsz, num_heads, tgt_len, src_len)\r\n        return attn_output, attn_output_weights.sum(dim=1) / num_heads\r\n    else:\r\n        return attn_output, None\r\n"
  },
  {
    "path": "mmdet3d/models/voxel_encoders/__init__.py",
    "content": "from .pillar_encoder import PillarFeatureNet\nfrom .voxel_encoder import DynamicSimpleVFE, DynamicVFE, HardSimpleVFE, HardVFE\n\n__all__ = [\n    'PillarFeatureNet', 'HardVFE', 'DynamicVFE', 'HardSimpleVFE',\n    'DynamicSimpleVFE'\n]\n"
  },
  {
    "path": "mmdet3d/models/voxel_encoders/pillar_encoder.py",
    "content": "import torch\nfrom mmcv.cnn import build_norm_layer\nfrom mmcv.runner import force_fp32\nfrom torch import nn\n\nfrom mmdet3d.ops import DynamicScatter\nfrom ..registry import VOXEL_ENCODERS\nfrom .utils import PFNLayer, get_paddings_indicator\n\n\n@VOXEL_ENCODERS.register_module()\nclass PillarFeatureNet(nn.Module):\n    \"\"\"Pillar Feature Net.\n\n    The network prepares the pillar features and performs forward pass\n    through PFNLayers.\n\n    Args:\n        in_channels (int, optional): Number of input features,\n            either x, y, z or x, y, z, r. Defaults to 4.\n        feat_channels (tuple, optional): Number of features in each of the\n            N PFNLayers. Defaults to (64, ).\n        with_distance (bool, optional): Whether to include Euclidean distance\n            to points. Defaults to False.\n        with_cluster_center (bool, optional): [description]. Defaults to True.\n        with_voxel_center (bool, optional): [description]. Defaults to True.\n        voxel_size (tuple[float], optional): Size of voxels, only utilize x\n            and y size. Defaults to (0.2, 0.2, 4).\n        point_cloud_range (tuple[float], optional): Point cloud range, only\n            utilizes x and y min. Defaults to (0, -40, -3, 70.4, 40, 1).\n        norm_cfg ([type], optional): [description].\n            Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01).\n        mode (str, optional): The mode to gather point features. Options are\n            'max' or 'avg'. Defaults to 'max'.\n        legacy (bool): Whether to use the new behavior or\n            the original behavior. Defaults to True.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels=4,\n                 feat_channels=(64, ),\n                 with_distance=False,\n                 with_cluster_center=True,\n                 with_voxel_center=True,\n                 voxel_size=(0.2, 0.2, 4),\n                 point_cloud_range=(0, -40, -3, 70.4, 40, 1),\n                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),\n                 mode='max',\n                 legacy=True):\n        super(PillarFeatureNet, self).__init__()\n        assert len(feat_channels) > 0\n        self.legacy = legacy\n        if with_cluster_center:\n            in_channels += 3\n        if with_voxel_center:\n            in_channels += 2\n        if with_distance:\n            in_channels += 1\n        self._with_distance = with_distance\n        self._with_cluster_center = with_cluster_center\n        self._with_voxel_center = with_voxel_center\n        self.fp16_enabled = False\n        # Create PillarFeatureNet layers\n        self.in_channels = in_channels\n        feat_channels = [in_channels] + list(feat_channels)\n        pfn_layers = []\n        for i in range(len(feat_channels) - 1):\n            in_filters = feat_channels[i]\n            out_filters = feat_channels[i + 1]\n            if i < len(feat_channels) - 2:\n                last_layer = False\n            else:\n                last_layer = True\n            pfn_layers.append(\n                PFNLayer(\n                    in_filters,\n                    out_filters,\n                    norm_cfg=norm_cfg,\n                    last_layer=last_layer,\n                    mode=mode))\n        self.pfn_layers = nn.ModuleList(pfn_layers)\n\n        # Need pillar (voxel) size and x/y offset in order to calculate offset\n        self.vx = voxel_size[0]\n        self.vy = voxel_size[1]\n        self.x_offset = self.vx / 2 + point_cloud_range[0]\n        self.y_offset = self.vy / 2 + point_cloud_range[1]\n        self.point_cloud_range = point_cloud_range\n\n    @force_fp32(out_fp16=True)\n    def forward(self, features, num_points, coors):\n        \"\"\"Forward function.\n\n        Args:\n            features (torch.Tensor): Point features or raw points in shape\n                (N, M, C).\n            num_points (torch.Tensor): Number of points in each pillar.\n            coors (torch.Tensor): Coordinates of each voxel.\n\n        Returns:\n            torch.Tensor: Features of pillars.\n        \"\"\"\n        features_ls = [features]\n        # Find distance of x, y, and z from cluster center\n        if self._with_cluster_center:\n            points_mean = features[:, :, :3].sum(\n                dim=1, keepdim=True) / num_points.type_as(features).view(\n                    -1, 1, 1)\n            f_cluster = features[:, :, :3] - points_mean\n            features_ls.append(f_cluster)\n\n        # Find distance of x, y, and z from pillar center\n        dtype = features.dtype\n        if self._with_voxel_center:\n            if not self.legacy:\n                f_center = torch.zeros_like(features[:, :, :2])\n                f_center[:, :, 0] = features[:, :, 0] - (\n                    coors[:, 3].to(dtype).unsqueeze(1) * self.vx +\n                    self.x_offset)\n                f_center[:, :, 1] = features[:, :, 1] - (\n                    coors[:, 2].to(dtype).unsqueeze(1) * self.vy +\n                    self.y_offset)\n            else:\n                f_center = features[:, :, :2]\n                f_center[:, :, 0] = f_center[:, :, 0] - (\n                    coors[:, 3].type_as(features).unsqueeze(1) * self.vx +\n                    self.x_offset)\n                f_center[:, :, 1] = f_center[:, :, 1] - (\n                    coors[:, 2].type_as(features).unsqueeze(1) * self.vy +\n                    self.y_offset)\n            features_ls.append(f_center)\n\n        if self._with_distance:\n            points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)\n            features_ls.append(points_dist)\n\n        # Combine together feature decorations\n        features = torch.cat(features_ls, dim=-1)\n        # The feature decorations were calculated without regard to whether\n        # pillar was empty. Need to ensure that\n        # empty pillars remain set to zeros.\n        voxel_count = features.shape[1]\n        mask = get_paddings_indicator(num_points, voxel_count, axis=0)\n        mask = torch.unsqueeze(mask, -1).type_as(features)\n        features *= mask\n\n        for pfn in self.pfn_layers:\n            features = pfn(features, num_points)\n\n        return features.squeeze()\n\n\n@VOXEL_ENCODERS.register_module()\nclass DynamicPillarFeatureNet(PillarFeatureNet):\n    \"\"\"Pillar Feature Net using dynamic voxelization.\n\n    The network prepares the pillar features and performs forward pass\n    through PFNLayers. The main difference is that it is used for\n    dynamic voxels, which contains different number of points inside a voxel\n    without limits.\n\n    Args:\n        in_channels (int, optional): Number of input features,\n            either x, y, z or x, y, z, r. Defaults to 4.\n        feat_channels (tuple, optional): Number of features in each of the\n            N PFNLayers. Defaults to (64, ).\n        with_distance (bool, optional): Whether to include Euclidean distance\n            to points. Defaults to False.\n        with_cluster_center (bool, optional): [description]. Defaults to True.\n        with_voxel_center (bool, optional): [description]. Defaults to True.\n        voxel_size (tuple[float], optional): Size of voxels, only utilize x\n            and y size. Defaults to (0.2, 0.2, 4).\n        point_cloud_range (tuple[float], optional): Point cloud range, only\n            utilizes x and y min. Defaults to (0, -40, -3, 70.4, 40, 1).\n        norm_cfg ([type], optional): [description].\n            Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01).\n        mode (str, optional): The mode to gather point features. Options are\n            'max' or 'avg'. Defaults to 'max'.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels=4,\n                 feat_channels=(64, ),\n                 with_distance=False,\n                 with_cluster_center=True,\n                 with_voxel_center=True,\n                 voxel_size=(0.2, 0.2, 4),\n                 point_cloud_range=(0, -40, -3, 70.4, 40, 1),\n                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),\n                 mode='max'):\n        super(DynamicPillarFeatureNet, self).__init__(\n            in_channels,\n            feat_channels,\n            with_distance,\n            with_cluster_center=with_cluster_center,\n            with_voxel_center=with_voxel_center,\n            voxel_size=voxel_size,\n            point_cloud_range=point_cloud_range,\n            norm_cfg=norm_cfg,\n            mode=mode)\n        self.fp16_enabled = False\n        feat_channels = [self.in_channels] + list(feat_channels)\n        pfn_layers = []\n        # TODO: currently only support one PFNLayer\n\n        for i in range(len(feat_channels) - 1):\n            in_filters = feat_channels[i]\n            out_filters = feat_channels[i + 1]\n            if i > 0:\n                in_filters *= 2\n            norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters)\n            pfn_layers.append(\n                nn.Sequential(\n                    nn.Linear(in_filters, out_filters, bias=False), norm_layer,\n                    nn.ReLU(inplace=True)))\n        self.num_pfn = len(pfn_layers)\n        self.pfn_layers = nn.ModuleList(pfn_layers)\n        self.pfn_scatter = DynamicScatter(voxel_size, point_cloud_range,\n                                          (mode != 'max'))\n        self.cluster_scatter = DynamicScatter(\n            voxel_size, point_cloud_range, average_points=True)\n\n    def map_voxel_center_to_point(self, pts_coors, voxel_mean, voxel_coors):\n        \"\"\"Map the centers of voxels to its corresponding points.\n\n        Args:\n            pts_coors (torch.Tensor): The coordinates of each points, shape\n                (M, 3), where M is the number of points.\n            voxel_mean (torch.Tensor): The mean or aggreagated features of a\n                voxel, shape (N, C), where N is the number of voxels.\n            voxel_coors (torch.Tensor): The coordinates of each voxel.\n\n        Returns:\n            torch.Tensor: Corresponding voxel centers of each points, shape\n                (M, C), where M is the numver of points.\n        \"\"\"\n        # Step 1: scatter voxel into canvas\n        # Calculate necessary things for canvas creation\n        canvas_y = int(\n            (self.point_cloud_range[4] - self.point_cloud_range[1]) / self.vy)\n        canvas_x = int(\n            (self.point_cloud_range[3] - self.point_cloud_range[0]) / self.vx)\n        canvas_channel = voxel_mean.size(1)\n        batch_size = pts_coors[-1, 0] + 1\n        canvas_len = canvas_y * canvas_x * batch_size\n        # Create the canvas for this sample\n        canvas = voxel_mean.new_zeros(canvas_channel, canvas_len)\n        # Only include non-empty pillars\n        indices = (\n            voxel_coors[:, 0] * canvas_y * canvas_x +\n            voxel_coors[:, 2] * canvas_x + voxel_coors[:, 3])\n        # Scatter the blob back to the canvas\n        canvas[:, indices.long()] = voxel_mean.t()\n\n        # Step 2: get voxel mean for each point\n        voxel_index = (\n            pts_coors[:, 0] * canvas_y * canvas_x +\n            pts_coors[:, 2] * canvas_x + pts_coors[:, 3])\n        center_per_point = canvas[:, voxel_index.long()].t()\n        return center_per_point\n\n    @force_fp32(out_fp16=True)\n    def forward(self, features, coors):\n        \"\"\"Forward function.\n\n        Args:\n            features (torch.Tensor): Point features or raw points in shape\n                (N, M, C).\n            coors (torch.Tensor): Coordinates of each voxel\n\n        Returns:\n            torch.Tensor: Features of pillars.\n        \"\"\"\n        features_ls = [features]\n        # Find distance of x, y, and z from cluster center\n        if self._with_cluster_center:\n            voxel_mean, mean_coors = self.cluster_scatter(features, coors)\n            points_mean = self.map_voxel_center_to_point(\n                coors, voxel_mean, mean_coors)\n            # TODO: maybe also do cluster for reflectivity\n            f_cluster = features[:, :3] - points_mean[:, :3]\n            features_ls.append(f_cluster)\n\n        # Find distance of x, y, and z from pillar center\n        if self._with_voxel_center:\n            f_center = features.new_zeros(size=(features.size(0), 2))\n            f_center[:, 0] = features[:, 0] - (\n                coors[:, 3].type_as(features) * self.vx + self.x_offset)\n            f_center[:, 1] = features[:, 1] - (\n                coors[:, 2].type_as(features) * self.vy + self.y_offset)\n            features_ls.append(f_center)\n\n        if self._with_distance:\n            points_dist = torch.norm(features[:, :3], 2, 1, keepdim=True)\n            features_ls.append(points_dist)\n\n        # Combine together feature decorations\n        features = torch.cat(features_ls, dim=-1)\n        for i, pfn in enumerate(self.pfn_layers):\n            point_feats = pfn(features)\n            voxel_feats, voxel_coors = self.pfn_scatter(point_feats, coors)\n            if i != len(self.pfn_layers) - 1:\n                # need to concat voxel feats if it is not the last pfn\n                feat_per_point = self.map_voxel_center_to_point(\n                    coors, voxel_feats, voxel_coors)\n                features = torch.cat([point_feats, feat_per_point], dim=1)\n\n        return voxel_feats, voxel_coors\n"
  },
  {
    "path": "mmdet3d/models/voxel_encoders/utils.py",
    "content": "import torch\nfrom mmcv.cnn import build_norm_layer\nfrom mmcv.runner import auto_fp16\nfrom torch import nn\nfrom torch.nn import functional as F\n\n\ndef get_paddings_indicator(actual_num, max_num, axis=0):\n    \"\"\"Create boolean mask by actually number of a padded tensor.\n\n    Args:\n        actual_num (torch.Tensor): Actual number of points in each voxel.\n        max_num (int): Max number of points in each voxel\n\n    Returns:\n        torch.Tensor: Mask indicates which points are valid inside a voxel.\n    \"\"\"\n    actual_num = torch.unsqueeze(actual_num, axis + 1)\n    # tiled_actual_num: [N, M, 1]\n    max_num_shape = [1] * len(actual_num.shape)\n    max_num_shape[axis + 1] = -1\n    max_num = torch.arange(\n        max_num, dtype=torch.int, device=actual_num.device).view(max_num_shape)\n    # tiled_actual_num: [[3,3,3,3,3], [4,4,4,4,4], [2,2,2,2,2]]\n    # tiled_max_num: [[0,1,2,3,4], [0,1,2,3,4], [0,1,2,3,4]]\n    paddings_indicator = actual_num.int() > max_num\n    # paddings_indicator shape: [batch_size, max_num]\n    return paddings_indicator\n\n\nclass VFELayer(nn.Module):\n    \"\"\"Voxel Feature Encoder layer.\n\n    The voxel encoder is composed of a series of these layers.\n    This module do not support average pooling and only support to use\n    max pooling to gather features inside a VFE.\n\n    Args:\n        in_channels (int): Number of input channels.\n        out_channels (int): Number of output channels.\n        norm_cfg (dict): Config dict of normalization layers\n        max_out (bool): Whether aggregate the features of points inside\n            each voxel and only return voxel features.\n        cat_max (bool): Whether concatenate the aggregated features\n            and pointwise features.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),\n                 max_out=True,\n                 cat_max=True):\n        super(VFELayer, self).__init__()\n        self.fp16_enabled = False\n        self.cat_max = cat_max\n        self.max_out = max_out\n        # self.units = int(out_channels / 2)\n\n        self.norm = build_norm_layer(norm_cfg, out_channels)[1]\n        self.linear = nn.Linear(in_channels, out_channels, bias=False)\n\n    @auto_fp16(apply_to=('inputs'), out_fp32=True)\n    def forward(self, inputs):\n        \"\"\"Forward function.\n\n        Args:\n            inputs (torch.Tensor): Voxels features of shape (N, M, C).\n                N is the number of voxels, M is the number of points in\n                voxels, C is the number of channels of point features.\n\n        Returns:\n            torch.Tensor: Voxel features. There are three mode under which the\n                features have different meaning.\n                - `max_out=False`: Return point-wise features in\n                    shape (N, M, C).\n                - `max_out=True` and `cat_max=False`: Return aggregated\n                    voxel features in shape (N, C)\n                - `max_out=True` and `cat_max=True`: Return concatenated\n                    point-wise features in shape (N, M, C).\n        \"\"\"\n        # [K, T, 7] tensordot [7, units] = [K, T, units]\n        voxel_count = inputs.shape[1]\n\n        x = self.linear(inputs)\n        x = self.norm(x.permute(0, 2, 1).contiguous()).permute(0, 2,\n                                                               1).contiguous()\n        pointwise = F.relu(x)\n        # [K, T, units]\n        if self.max_out:\n            aggregated = torch.max(pointwise, dim=1, keepdim=True)[0]\n        else:\n            # this is for fusion layer\n            return pointwise\n\n        if not self.cat_max:\n            return aggregated.squeeze(1)\n        else:\n            # [K, 1, units]\n            repeated = aggregated.repeat(1, voxel_count, 1)\n            concatenated = torch.cat([pointwise, repeated], dim=2)\n            # [K, T, 2 * units]\n            return concatenated\n\n\nclass PFNLayer(nn.Module):\n    \"\"\"Pillar Feature Net Layer.\n\n    The Pillar Feature Net is composed of a series of these layers, but the\n    PointPillars paper results only used a single PFNLayer.\n\n    Args:\n        in_channels (int): Number of input channels.\n        out_channels (int): Number of output channels.\n        norm_cfg (dict): Config dict of normalization layers\n        last_layer (bool): If last_layer, there is no concatenation of\n            features.\n        mode (str): Pooling model to gather features inside voxels.\n            Default to 'max'.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),\n                 last_layer=False,\n                 mode='max'):\n\n        super().__init__()\n        self.fp16_enabled = False\n        self.name = 'PFNLayer'\n        self.last_vfe = last_layer\n        if not self.last_vfe:\n            out_channels = out_channels // 2\n        self.units = out_channels\n\n        self.norm = build_norm_layer(norm_cfg, self.units)[1]\n        self.linear = nn.Linear(in_channels, self.units, bias=False)\n\n        assert mode in ['max', 'avg']\n        self.mode = mode\n\n    @auto_fp16(apply_to=('inputs'), out_fp32=True)\n    def forward(self, inputs, num_voxels=None, aligned_distance=None):\n        \"\"\"Forward function.\n\n        Args:\n            inputs (torch.Tensor): Pillar/Voxel inputs with shape (N, M, C).\n                N is the number of voxels, M is the number of points in\n                voxels, C is the number of channels of point features.\n            num_voxels (torch.Tensor, optional): Number of points in each\n                voxel. Defaults to None.\n            aligned_distance (torch.Tensor, optional): The distance of\n                each points to the voxel center. Defaults to None.\n\n        Returns:\n            torch.Tensor: Features of Pillars.\n        \"\"\"\n        x = self.linear(inputs)\n        x = self.norm(x.permute(0, 2, 1).contiguous()).permute(0, 2,\n                                                               1).contiguous()\n        x = F.relu(x)\n\n        if self.mode == 'max':\n            if aligned_distance is not None:\n                x = x.mul(aligned_distance.unsqueeze(-1))\n            x_max = torch.max(x, dim=1, keepdim=True)[0]\n        elif self.mode == 'avg':\n            if aligned_distance is not None:\n                x = x.mul(aligned_distance.unsqueeze(-1))\n            x_max = x.sum(\n                dim=1, keepdim=True) / num_voxels.type_as(inputs).view(\n                    -1, 1, 1)\n\n        if self.last_vfe:\n            return x_max\n        else:\n            x_repeat = x_max.repeat(1, inputs.shape[1], 1)\n            x_concatenated = torch.cat([x, x_repeat], dim=2)\n            return x_concatenated\n"
  },
  {
    "path": "mmdet3d/models/voxel_encoders/voxel_encoder.py",
    "content": "import torch\nfrom mmcv.cnn import build_norm_layer\nfrom mmcv.runner import force_fp32\nfrom torch import nn\n\nfrom mmdet3d.ops import DynamicScatter\nfrom .. import builder\nfrom ..registry import VOXEL_ENCODERS\nfrom .utils import VFELayer, get_paddings_indicator\n\n\n@VOXEL_ENCODERS.register_module()\nclass HardSimpleVFE(nn.Module):\n    \"\"\"Simple voxel feature encoder used in SECOND.\n\n    It simply averages the values of points in a voxel.\n\n    Args:\n        num_features (int): Number of features to use. Default: 4.\n    \"\"\"\n\n    def __init__(self, num_features=4):\n        super(HardSimpleVFE, self).__init__()\n        self.num_features = num_features\n        self.fp16_enabled = False\n\n    @force_fp32(out_fp16=True)\n    def forward(self, features, num_points, coors):\n        \"\"\"Forward function.\n\n        Args:\n            features (torch.Tensor): Point features in shape\n                (N, M, 3(4)). N is the number of voxels and M is the maximum\n                number of points inside a single voxel.\n            num_points (torch.Tensor): Number of points in each voxel,\n                 shape (N, ).\n            coors (torch.Tensor): Coordinates of voxels.\n\n        Returns:\n            torch.Tensor: Mean of points inside each voxel in shape (N, 3(4))\n        \"\"\"\n        points_mean = features[:, :, :self.num_features].sum(\n            dim=1, keepdim=False) / num_points.type_as(features).view(-1, 1)\n        return points_mean.contiguous()\n\n\n@VOXEL_ENCODERS.register_module()\nclass DynamicSimpleVFE(nn.Module):\n    \"\"\"Simple dynamic voxel feature encoder used in DV-SECOND.\n\n    It simply averages the values of points in a voxel.\n    But the number of points in a voxel is dynamic and varies.\n\n    Args:\n        voxel_size (tupe[float]): Size of a single voxel\n        point_cloud_range (tuple[float]): Range of the point cloud and voxels\n    \"\"\"\n\n    def __init__(self,\n                 voxel_size=(0.2, 0.2, 4),\n                 point_cloud_range=(0, -40, -3, 70.4, 40, 1)):\n        super(DynamicSimpleVFE, self).__init__()\n        self.scatter = DynamicScatter(voxel_size, point_cloud_range, True)\n        self.fp16_enabled = False\n\n    @torch.no_grad()\n    @force_fp32(out_fp16=True)\n    def forward(self, features, coors):\n        \"\"\"Forward function.\n\n        Args:\n            features (torch.Tensor): Point features in shape\n                (N, 3(4)). N is the number of points.\n            coors (torch.Tensor): Coordinates of voxels.\n\n        Returns:\n            torch.Tensor: Mean of points inside each voxel in shape (M, 3(4)).\n                M is the number of voxels.\n        \"\"\"\n        # This function is used from the start of the voxelnet\n        # num_points: [concated_num_points]\n        features, features_coors = self.scatter(features, coors)\n        return features, features_coors\n\n\n@VOXEL_ENCODERS.register_module()\nclass DynamicVFE(nn.Module):\n    \"\"\"Dynamic Voxel feature encoder used in DV-SECOND.\n\n    It encodes features of voxels and their points. It could also fuse\n    image feature into voxel features in a point-wise manner.\n    The number of points inside the voxel varies.\n\n    Args:\n        in_channels (int): Input channels of VFE. Defaults to 4.\n        feat_channels (list(int)): Channels of features in VFE.\n        with_distance (bool): Whether to use the L2 distance of points to the\n            origin point. Default False.\n        with_cluster_center (bool): Whether to use the distance to cluster\n            center of points inside a voxel. Default to False.\n        with_voxel_center (bool): Whether to use the distance to center of\n            voxel for each points inside a voxel. Default to False.\n        voxel_size (tuple[float]): Size of a single voxel. Default to\n            (0.2, 0.2, 4).\n        point_cloud_range (tuple[float]): The range of points or voxels.\n            Default to (0, -40, -3, 70.4, 40, 1).\n        norm_cfg (dict): Config dict of normalization layers.\n        mode (str): The mode when pooling features of points inside a voxel.\n            Available options include 'max' and 'avg'. Default to 'max'.\n        fusion_layer (dict | None): The config dict of fusion layer used in\n            multi-modal detectors. Default to None.\n        return_point_feats (bool): Whether to return the features of each\n            points. Default to False.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels=4,\n                 feat_channels=[],\n                 with_distance=False,\n                 with_cluster_center=False,\n                 with_voxel_center=False,\n                 voxel_size=(0.2, 0.2, 4),\n                 point_cloud_range=(0, -40, -3, 70.4, 40, 1),\n                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),\n                 mode='max',\n                 fusion_layer=None,\n                 return_point_feats=False):\n        super(DynamicVFE, self).__init__()\n        assert mode in ['avg', 'max']\n        assert len(feat_channels) > 0\n        if with_cluster_center:\n            in_channels += 3\n        if with_voxel_center:\n            in_channels += 3\n        if with_distance:\n            in_channels += 3\n        self.in_channels = in_channels\n        self._with_distance = with_distance\n        self._with_cluster_center = with_cluster_center\n        self._with_voxel_center = with_voxel_center\n        self.return_point_feats = return_point_feats\n        self.fp16_enabled = False\n\n        # Need pillar (voxel) size and x/y offset in order to calculate offset\n        self.vx = voxel_size[0]\n        self.vy = voxel_size[1]\n        self.vz = voxel_size[2]\n        self.x_offset = self.vx / 2 + point_cloud_range[0]\n        self.y_offset = self.vy / 2 + point_cloud_range[1]\n        self.z_offset = self.vz / 2 + point_cloud_range[2]\n        self.point_cloud_range = point_cloud_range\n        self.scatter = DynamicScatter(voxel_size, point_cloud_range, True)\n\n        feat_channels = [self.in_channels] + list(feat_channels)\n        vfe_layers = []\n        for i in range(len(feat_channels) - 1):\n            in_filters = feat_channels[i]\n            out_filters = feat_channels[i + 1]\n            if i > 0:\n                in_filters *= 2\n            norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters)\n            vfe_layers.append(\n                nn.Sequential(\n                    nn.Linear(in_filters, out_filters, bias=False), norm_layer,\n                    nn.ReLU(inplace=True)))\n        self.vfe_layers = nn.ModuleList(vfe_layers)\n        self.num_vfe = len(vfe_layers)\n        self.vfe_scatter = DynamicScatter(voxel_size, point_cloud_range,\n                                          (mode != 'max'))\n        self.cluster_scatter = DynamicScatter(\n            voxel_size, point_cloud_range, average_points=True)\n        self.fusion_layer = None\n        if fusion_layer is not None:\n            self.fusion_layer = builder.build_fusion_layer(fusion_layer)\n\n    def map_voxel_center_to_point(self, pts_coors, voxel_mean, voxel_coors):\n        \"\"\"Map voxel features to its corresponding points.\n\n        Args:\n            pts_coors (torch.Tensor): Voxel coordinate of each point.\n            voxel_mean (torch.Tensor): Voxel features to be mapped.\n            voxel_coors (torch.Tensor): Coordinates of valid voxels\n\n        Returns:\n            torch.Tensor: Features or centers of each point.\n        \"\"\"\n        # Step 1: scatter voxel into canvas\n        # Calculate necessary things for canvas creation\n        canvas_z = int(\n            (self.point_cloud_range[5] - self.point_cloud_range[2]) / self.vz)\n        canvas_y = int(\n            (self.point_cloud_range[4] - self.point_cloud_range[1]) / self.vy)\n        canvas_x = int(\n            (self.point_cloud_range[3] - self.point_cloud_range[0]) / self.vx)\n        # canvas_channel = voxel_mean.size(1)\n        batch_size = pts_coors[-1, 0] + 1\n        canvas_len = canvas_z * canvas_y * canvas_x * batch_size\n        # Create the canvas for this sample\n        canvas = voxel_mean.new_zeros(canvas_len, dtype=torch.long)\n        # Only include non-empty pillars\n        indices = (\n            voxel_coors[:, 0] * canvas_z * canvas_y * canvas_x +\n            voxel_coors[:, 1] * canvas_y * canvas_x +\n            voxel_coors[:, 2] * canvas_x + voxel_coors[:, 3])\n        # Scatter the blob back to the canvas\n        canvas[indices.long()] = torch.arange(\n            start=0, end=voxel_mean.size(0), device=voxel_mean.device)\n\n        # Step 2: get voxel mean for each point\n        voxel_index = (\n            pts_coors[:, 0] * canvas_z * canvas_y * canvas_x +\n            pts_coors[:, 1] * canvas_y * canvas_x +\n            pts_coors[:, 2] * canvas_x + pts_coors[:, 3])\n        voxel_inds = canvas[voxel_index.long()]\n        center_per_point = voxel_mean[voxel_inds, ...]\n        return center_per_point\n\n    @force_fp32(out_fp16=True)\n    def forward(self,\n                features,\n                coors,\n                points=None,\n                img_feats=None,\n                img_metas=None):\n        \"\"\"Forward functions.\n\n        Args:\n            features (torch.Tensor): Features of voxels, shape is NxC.\n            coors (torch.Tensor): Coordinates of voxels, shape is  Nx(1+NDim).\n            points (list[torch.Tensor], optional): Raw points used to guide the\n                multi-modality fusion. Defaults to None.\n            img_feats (list[torch.Tensor], optional): Image fetures used for\n                multi-modality fusion. Defaults to None.\n            img_metas (dict, optional): [description]. Defaults to None.\n\n        Returns:\n            tuple: If `return_point_feats` is False, returns voxel features and\n                its coordinates. If `return_point_feats` is True, returns\n                feature of each points inside voxels.\n        \"\"\"\n        features_ls = [features]\n        # Find distance of x, y, and z from cluster center\n        if self._with_cluster_center:\n            voxel_mean, mean_coors = self.cluster_scatter(features, coors)\n            points_mean = self.map_voxel_center_to_point(\n                coors, voxel_mean, mean_coors)\n            # TODO: maybe also do cluster for reflectivity\n            f_cluster = features[:, :3] - points_mean[:, :3]\n            features_ls.append(f_cluster)\n\n        # Find distance of x, y, and z from pillar center\n        if self._with_voxel_center:\n            f_center = features.new_zeros(size=(features.size(0), 3))\n            f_center[:, 0] = features[:, 0] - (\n                coors[:, 3].type_as(features) * self.vx + self.x_offset)\n            f_center[:, 1] = features[:, 1] - (\n                coors[:, 2].type_as(features) * self.vy + self.y_offset)\n            f_center[:, 2] = features[:, 2] - (\n                coors[:, 1].type_as(features) * self.vz + self.z_offset)\n            features_ls.append(f_center)\n\n        if self._with_distance:\n            points_dist = torch.norm(features[:, :3], 2, 1, keepdim=True)\n            features_ls.append(points_dist)\n\n        # Combine together feature decorations\n        features = torch.cat(features_ls, dim=-1)\n        for i, vfe in enumerate(self.vfe_layers):\n            point_feats = vfe(features)\n            if (i == len(self.vfe_layers) - 1 and self.fusion_layer is not None\n                    and img_feats is not None):\n                point_feats = self.fusion_layer(img_feats, points, point_feats,\n                                                img_metas)\n            voxel_feats, voxel_coors = self.vfe_scatter(point_feats, coors)\n            if i != len(self.vfe_layers) - 1:\n                # need to concat voxel feats if it is not the last vfe\n                feat_per_point = self.map_voxel_center_to_point(\n                    coors, voxel_feats, voxel_coors)\n                features = torch.cat([point_feats, feat_per_point], dim=1)\n\n        if self.return_point_feats:\n            return point_feats\n        return voxel_feats, voxel_coors\n\n\n@VOXEL_ENCODERS.register_module()\nclass HardVFE(nn.Module):\n    \"\"\"Voxel feature encoder used in DV-SECOND.\n\n    It encodes features of voxels and their points. It could also fuse\n    image feature into voxel features in a point-wise manner.\n\n    Args:\n        in_channels (int): Input channels of VFE. Defaults to 4.\n        feat_channels (list(int)): Channels of features in VFE.\n        with_distance (bool): Whether to use the L2 distance of points to the\n            origin point. Default False.\n        with_cluster_center (bool): Whether to use the distance to cluster\n            center of points inside a voxel. Default to False.\n        with_voxel_center (bool): Whether to use the distance to center of\n            voxel for each points inside a voxel. Default to False.\n        voxel_size (tuple[float]): Size of a single voxel. Default to\n            (0.2, 0.2, 4).\n        point_cloud_range (tuple[float]): The range of points or voxels.\n            Default to (0, -40, -3, 70.4, 40, 1).\n        norm_cfg (dict): Config dict of normalization layers.\n        mode (str): The mode when pooling features of points inside a voxel.\n            Available options include 'max' and 'avg'. Default to 'max'.\n        fusion_layer (dict | None): The config dict of fusion layer used in\n            multi-modal detectors. Default to None.\n        return_point_feats (bool): Whether to return the features of each\n            points. Default to False.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels=4,\n                 feat_channels=[],\n                 with_distance=False,\n                 with_cluster_center=False,\n                 with_voxel_center=False,\n                 voxel_size=(0.2, 0.2, 4),\n                 point_cloud_range=(0, -40, -3, 70.4, 40, 1),\n                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),\n                 mode='max',\n                 fusion_layer=None,\n                 return_point_feats=False):\n        super(HardVFE, self).__init__()\n        assert len(feat_channels) > 0\n        if with_cluster_center:\n            in_channels += 3\n        if with_voxel_center:\n            in_channels += 3\n        if with_distance:\n            in_channels += 3\n        self.in_channels = in_channels\n        self._with_distance = with_distance\n        self._with_cluster_center = with_cluster_center\n        self._with_voxel_center = with_voxel_center\n        self.return_point_feats = return_point_feats\n        self.fp16_enabled = False\n\n        # Need pillar (voxel) size and x/y offset to calculate pillar offset\n        self.vx = voxel_size[0]\n        self.vy = voxel_size[1]\n        self.vz = voxel_size[2]\n        self.x_offset = self.vx / 2 + point_cloud_range[0]\n        self.y_offset = self.vy / 2 + point_cloud_range[1]\n        self.z_offset = self.vz / 2 + point_cloud_range[2]\n        self.point_cloud_range = point_cloud_range\n        self.scatter = DynamicScatter(voxel_size, point_cloud_range, True)\n\n        feat_channels = [self.in_channels] + list(feat_channels)\n        vfe_layers = []\n        for i in range(len(feat_channels) - 1):\n            in_filters = feat_channels[i]\n            out_filters = feat_channels[i + 1]\n            if i > 0:\n                in_filters *= 2\n            # TODO: pass norm_cfg to VFE\n            # norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters)\n            if i == (len(feat_channels) - 2):\n                cat_max = False\n                max_out = True\n                if fusion_layer:\n                    max_out = False\n            else:\n                max_out = True\n                cat_max = True\n            vfe_layers.append(\n                VFELayer(\n                    in_filters,\n                    out_filters,\n                    norm_cfg=norm_cfg,\n                    max_out=max_out,\n                    cat_max=cat_max))\n            self.vfe_layers = nn.ModuleList(vfe_layers)\n        self.num_vfe = len(vfe_layers)\n\n        self.fusion_layer = None\n        if fusion_layer is not None:\n            self.fusion_layer = builder.build_fusion_layer(fusion_layer)\n\n    @force_fp32(out_fp16=True)\n    def forward(self,\n                features,\n                num_points,\n                coors,\n                img_feats=None,\n                img_metas=None):\n        \"\"\"Forward functions.\n\n        Args:\n            features (torch.Tensor): Features of voxels, shape is MxNxC.\n            num_points (torch.Tensor): Number of points in each voxel.\n            coors (torch.Tensor): Coordinates of voxels, shape is Mx(1+NDim).\n            img_feats (list[torch.Tensor], optional): Image fetures used for\n                multi-modality fusion. Defaults to None.\n            img_metas (dict, optional): [description]. Defaults to None.\n\n        Returns:\n            tuple: If `return_point_feats` is False, returns voxel features and\n                its coordinates. If `return_point_feats` is True, returns\n                feature of each points inside voxels.\n        \"\"\"\n        features_ls = [features]\n        # Find distance of x, y, and z from cluster center\n        if self._with_cluster_center:\n            points_mean = (\n                features[:, :, :3].sum(dim=1, keepdim=True) /\n                num_points.type_as(features).view(-1, 1, 1))\n            # TODO: maybe also do cluster for reflectivity\n            f_cluster = features[:, :, :3] - points_mean\n            features_ls.append(f_cluster)\n\n        # Find distance of x, y, and z from pillar center\n        if self._with_voxel_center:\n            f_center = features.new_zeros(\n                size=(features.size(0), features.size(1), 3))\n            f_center[:, :, 0] = features[:, :, 0] - (\n                coors[:, 3].type_as(features).unsqueeze(1) * self.vx +\n                self.x_offset)\n            f_center[:, :, 1] = features[:, :, 1] - (\n                coors[:, 2].type_as(features).unsqueeze(1) * self.vy +\n                self.y_offset)\n            f_center[:, :, 2] = features[:, :, 2] - (\n                coors[:, 1].type_as(features).unsqueeze(1) * self.vz +\n                self.z_offset)\n            features_ls.append(f_center)\n\n        if self._with_distance:\n            points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)\n            features_ls.append(points_dist)\n\n        # Combine together feature decorations\n        voxel_feats = torch.cat(features_ls, dim=-1)\n        # The feature decorations were calculated without regard to whether\n        # pillar was empty.\n        # Need to ensure that empty voxels remain set to zeros.\n        voxel_count = voxel_feats.shape[1]\n        mask = get_paddings_indicator(num_points, voxel_count, axis=0)\n        voxel_feats *= mask.unsqueeze(-1).type_as(voxel_feats)\n\n        for i, vfe in enumerate(self.vfe_layers):\n            voxel_feats = vfe(voxel_feats)\n\n        if (self.fusion_layer is not None and img_feats is not None):\n            voxel_feats = self.fusion_with_mask(features, mask, voxel_feats,\n                                                coors, img_feats, img_metas)\n\n        return voxel_feats\n\n    def fusion_with_mask(self, features, mask, voxel_feats, coors, img_feats,\n                         img_metas):\n        \"\"\"Fuse image and point features with mask.\n\n        Args:\n            features (torch.Tensor): Features of voxel, usually it is the\n                values of points in voxels.\n            mask (torch.Tensor): Mask indicates valid features in each voxel.\n            voxel_feats (torch.Tensor): Features of voxels.\n            coors (torch.Tensor): Coordinates of each single voxel.\n            img_feats (list[torch.Tensor]): Multi-scale feature maps of image.\n            img_metas (list(dict)): Meta information of image and points.\n\n        Returns:\n            torch.Tensor: Fused features of each voxel.\n        \"\"\"\n        # the features is consist of a batch of points\n        batch_size = coors[-1, 0] + 1\n        points = []\n        for i in range(batch_size):\n            single_mask = (coors[:, 0] == i)\n            points.append(features[single_mask][mask[single_mask]])\n\n        point_feats = voxel_feats[mask]\n        point_feats = self.fusion_layer(img_feats, points, point_feats,\n                                        img_metas)\n\n        voxel_canvas = voxel_feats.new_zeros(\n            size=(voxel_feats.size(0), voxel_feats.size(1),\n                  point_feats.size(-1)))\n        voxel_canvas[mask] = point_feats\n        out = torch.max(voxel_canvas, dim=1)[0]\n\n        return out\n"
  },
  {
    "path": "mmdet3d/ops/__init__.py",
    "content": "from mmcv.ops import (RoIAlign, SigmoidFocalLoss, get_compiler_version,\n                      get_compiling_cuda_version, nms, roi_align,\n                      sigmoid_focal_loss)\n\nfrom .ball_query import ball_query\nfrom .furthest_point_sample import (Points_Sampler, furthest_point_sample,\n                                    furthest_point_sample_with_dist)\nfrom .gather_points import gather_points\nfrom .group_points import (GroupAll, QueryAndGroup, group_points,\n                           grouping_operation)\nfrom .interpolate import three_interpolate, three_nn\nfrom .knn import knn\nfrom .norm import NaiveSyncBatchNorm1d, NaiveSyncBatchNorm2d\nfrom .pointnet_modules import (PointFPModule, PointSAModule, PointSAModuleMSG,\n                               build_sa_module)\nfrom .roiaware_pool3d import (RoIAwarePool3d, points_in_boxes_batch,\n                              points_in_boxes_cpu, points_in_boxes_gpu)\nfrom .sparse_block import (SparseBasicBlock, SparseBottleneck,\n                           make_sparse_convmodule)\nfrom .voxel import DynamicScatter, Voxelization, dynamic_scatter, voxelization\n\n__all__ = [\n    'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'get_compiler_version',\n    'get_compiling_cuda_version', 'NaiveSyncBatchNorm1d',\n    'NaiveSyncBatchNorm2d', 'batched_nms', 'Voxelization', 'voxelization',\n    'dynamic_scatter', 'DynamicScatter', 'sigmoid_focal_loss',\n    'SigmoidFocalLoss', 'SparseBasicBlock', 'SparseBottleneck',\n    'RoIAwarePool3d', 'points_in_boxes_gpu', 'points_in_boxes_cpu',\n    'make_sparse_convmodule', 'ball_query', 'knn', 'furthest_point_sample',\n    'furthest_point_sample_with_dist', 'three_interpolate', 'three_nn',\n    'gather_points', 'grouping_operation', 'group_points', 'GroupAll',\n    'QueryAndGroup', 'PointSAModule', 'PointSAModuleMSG', 'PointFPModule',\n    'points_in_boxes_batch', 'get_compiler_version',\n    'get_compiling_cuda_version', 'Points_Sampler', 'build_sa_module'\n]\n"
  },
  {
    "path": "mmdet3d/ops/ball_query/__init__.py",
    "content": "from .ball_query import ball_query\n\n__all__ = ['ball_query']\n"
  },
  {
    "path": "mmdet3d/ops/ball_query/ball_query.py",
    "content": "import torch\nfrom torch.autograd import Function\n\nfrom . import ball_query_ext\n\n\nclass BallQuery(Function):\n    \"\"\"Ball Query.\n\n    Find nearby points in spherical space.\n    \"\"\"\n\n    @staticmethod\n    def forward(ctx, min_radius: float, max_radius: float, sample_num: int,\n                xyz: torch.Tensor, center_xyz: torch.Tensor) -> torch.Tensor:\n        \"\"\"forward.\n\n        Args:\n            min_radius (float): minimum radius of the balls.\n            max_radius (float): maximum radius of the balls.\n            sample_num (int): maximum number of features in the balls.\n            xyz (Tensor): (B, N, 3) xyz coordinates of the features.\n            center_xyz (Tensor): (B, npoint, 3) centers of the ball query.\n\n        Returns:\n            Tensor: (B, npoint, nsample) tensor with the indicies of\n                the features that form the query balls.\n        \"\"\"\n        assert center_xyz.is_contiguous()\n        assert xyz.is_contiguous()\n        assert min_radius < max_radius\n\n        B, N, _ = xyz.size()\n        npoint = center_xyz.size(1)\n        idx = torch.cuda.IntTensor(B, npoint, sample_num).zero_()\n\n        ball_query_ext.ball_query_wrapper(B, N, npoint, min_radius, max_radius,\n                                          sample_num, center_xyz, xyz, idx)\n        ctx.mark_non_differentiable(idx)\n        return idx\n\n    @staticmethod\n    def backward(ctx, a=None):\n        return None, None, None, None\n\n\nball_query = BallQuery.apply\n"
  },
  {
    "path": "mmdet3d/ops/ball_query/src/ball_query.cpp",
    "content": "// Modified from\n// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/ball_query.cpp\n\n#include <THC/THC.h>\n#include <cuda.h>\n#include <cuda_runtime_api.h>\n#include <torch/extension.h>\n#include <torch/serialize/tensor.h>\n\n#include <vector>\n\nextern THCState *state;\n\n#define CHECK_CUDA(x) \\\n  TORCH_CHECK(x.type().is_cuda(), #x, \" must be a CUDAtensor \")\n#define CHECK_CONTIGUOUS(x) \\\n  TORCH_CHECK(x.is_contiguous(), #x, \" must be contiguous \")\n#define CHECK_INPUT(x) \\\n  CHECK_CUDA(x);       \\\n  CHECK_CONTIGUOUS(x)\n\nint ball_query_wrapper(int b, int n, int m, float min_radius, float max_radius, int nsample,\n                       at::Tensor new_xyz_tensor, at::Tensor xyz_tensor,\n                       at::Tensor idx_tensor);\n\nvoid ball_query_kernel_launcher(int b, int n, int m, float min_radius, float max_radius,\n                                int nsample, const float *xyz, const float *new_xyz,\n                                int *idx, cudaStream_t stream);\n\nint ball_query_wrapper(int b, int n, int m, float min_radius, float max_radius, int nsample,\n                       at::Tensor new_xyz_tensor, at::Tensor xyz_tensor,\n                       at::Tensor idx_tensor) {\n  CHECK_INPUT(new_xyz_tensor);\n  CHECK_INPUT(xyz_tensor);\n  const float *new_xyz = new_xyz_tensor.data_ptr<float>();\n  const float *xyz = xyz_tensor.data_ptr<float>();\n  int *idx = idx_tensor.data_ptr<int>();\n\n  cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream();\n  ball_query_kernel_launcher(b, n, m, min_radius, max_radius,\n                             nsample, new_xyz, xyz, idx, stream);\n  return 1;\n}\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n  m.def(\"ball_query_wrapper\", &ball_query_wrapper, \"ball_query_wrapper\");\n}\n"
  },
  {
    "path": "mmdet3d/ops/ball_query/src/ball_query_cuda.cu",
    "content": "// Modified from\n// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/ball_query_gpu.cu\n\n#include <math.h>\n#include <stdio.h>\n#include <stdlib.h>\n\n#define THREADS_PER_BLOCK 256\n#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))\n\n__global__ void ball_query_kernel(int b, int n, int m,\n                                  float min_radius,\n                                  float max_radius,\n                                  int nsample,\n                                  const float *__restrict__ new_xyz,\n                                  const float *__restrict__ xyz,\n                                  int *__restrict__ idx) {\n  // new_xyz: (B, M, 3)\n  // xyz: (B, N, 3)\n  // output:\n  //      idx: (B, M, nsample)\n  int bs_idx = blockIdx.y;\n  int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;\n  if (bs_idx >= b || pt_idx >= m) return;\n\n  new_xyz += bs_idx * m * 3 + pt_idx * 3;\n  xyz += bs_idx * n * 3;\n  idx += bs_idx * m * nsample + pt_idx * nsample;\n\n  float max_radius2 = max_radius * max_radius;\n  float min_radius2 = min_radius * min_radius;\n  float new_x = new_xyz[0];\n  float new_y = new_xyz[1];\n  float new_z = new_xyz[2];\n\n  int cnt = 0;\n  for (int k = 0; k < n; ++k) {\n    float x = xyz[k * 3 + 0];\n    float y = xyz[k * 3 + 1];\n    float z = xyz[k * 3 + 2];\n    float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) +\n               (new_z - z) * (new_z - z);\n    if (d2 == 0 || (d2 >= min_radius2 && d2 < max_radius2)) {\n      if (cnt == 0) {\n        for (int l = 0; l < nsample; ++l) {\n          idx[l] = k;\n        }\n      }\n      idx[cnt] = k;\n      ++cnt;\n      if (cnt >= nsample) break;\n    }\n  }\n}\n\nvoid ball_query_kernel_launcher(int b, int n, int m, float min_radius, float max_radius,\n                                int nsample, const float *new_xyz, const float *xyz,\n                                int *idx, cudaStream_t stream) {\n  // new_xyz: (B, M, 3)\n  // xyz: (B, N, 3)\n  // output:\n  //      idx: (B, M, nsample)\n\n  cudaError_t err;\n\n  dim3 blocks(DIVUP(m, THREADS_PER_BLOCK),\n              b);  // blockIdx.x(col), blockIdx.y(row)\n  dim3 threads(THREADS_PER_BLOCK);\n\n  ball_query_kernel<<<blocks, threads, 0, stream>>>(b, n, m, min_radius, max_radius,\n                                                    nsample, new_xyz, xyz, idx);\n  // cudaDeviceSynchronize();  // for using printf in kernel function\n  err = cudaGetLastError();\n  if (cudaSuccess != err) {\n    fprintf(stderr, \"CUDA kernel failed : %s\\n\", cudaGetErrorString(err));\n    exit(-1);\n  }\n}\n"
  },
  {
    "path": "mmdet3d/ops/furthest_point_sample/__init__.py",
    "content": "from .furthest_point_sample import (furthest_point_sample,\n                                    furthest_point_sample_with_dist)\nfrom .points_sampler import Points_Sampler\n\n__all__ = [\n    'furthest_point_sample', 'furthest_point_sample_with_dist',\n    'Points_Sampler'\n]\n"
  },
  {
    "path": "mmdet3d/ops/furthest_point_sample/furthest_point_sample.py",
    "content": "import torch\nfrom torch.autograd import Function\n\nfrom . import furthest_point_sample_ext\n\n\nclass FurthestPointSampling(Function):\n    \"\"\"Furthest Point Sampling.\n\n    Uses iterative furthest point sampling to select a set of features whose\n    corresponding points have the furthest distance.\n    \"\"\"\n\n    @staticmethod\n    def forward(ctx, points_xyz: torch.Tensor,\n                num_points: int) -> torch.Tensor:\n        \"\"\"forward.\n\n        Args:\n            points_xyz (Tensor): (B, N, 3) where N > num_points.\n            num_points (int): Number of points in the sampled set.\n\n        Returns:\n             Tensor: (B, num_points) indices of the sampled points.\n        \"\"\"\n        assert points_xyz.is_contiguous()\n\n        B, N = points_xyz.size()[:2]\n        output = torch.cuda.IntTensor(B, num_points)\n        temp = torch.cuda.FloatTensor(B, N).fill_(1e10)\n\n        furthest_point_sample_ext.furthest_point_sampling_wrapper(\n            B, N, num_points, points_xyz, temp, output)\n        ctx.mark_non_differentiable(output)\n        return output\n\n    @staticmethod\n    def backward(xyz, a=None):\n        return None, None\n\n\nclass FurthestPointSamplingWithDist(Function):\n    \"\"\"Furthest Point Sampling With Distance.\n\n    Uses iterative furthest point sampling to select a set of features whose\n    corresponding points have the furthest distance.\n    \"\"\"\n\n    @staticmethod\n    def forward(ctx, points_dist: torch.Tensor,\n                num_points: int) -> torch.Tensor:\n        \"\"\"forward.\n\n        Args:\n            points_dist (Tensor): (B, N, N) Distance between each point pair.\n            num_points (int): Number of points in the sampled set.\n\n        Returns:\n             Tensor: (B, num_points) indices of the sampled points.\n        \"\"\"\n        assert points_dist.is_contiguous()\n\n        B, N, _ = points_dist.size()\n        output = points_dist.new_zeros([B, num_points], dtype=torch.int32)\n        temp = points_dist.new_zeros([B, N]).fill_(1e10)\n\n        furthest_point_sample_ext.furthest_point_sampling_with_dist_wrapper(\n            B, N, num_points, points_dist, temp, output)\n        ctx.mark_non_differentiable(output)\n        return output\n\n    @staticmethod\n    def backward(xyz, a=None):\n        return None, None\n\n\nfurthest_point_sample = FurthestPointSampling.apply\nfurthest_point_sample_with_dist = FurthestPointSamplingWithDist.apply\n"
  },
  {
    "path": "mmdet3d/ops/furthest_point_sample/points_sampler.py",
    "content": "import torch\nfrom mmcv.runner import force_fp32\nfrom torch import nn as nn\nfrom typing import List\n\nfrom .furthest_point_sample import (furthest_point_sample,\n                                    furthest_point_sample_with_dist)\nfrom .utils import calc_square_dist\n\n\ndef get_sampler_type(sampler_type):\n    \"\"\"Get the type and mode of points sampler.\n\n    Args:\n        sampler_type (str): The type of points sampler.\n            The valid value are \"D-FPS\", \"F-FPS\", or \"FS\".\n\n    Returns:\n        class: Points sampler type.\n    \"\"\"\n    if sampler_type == 'D-FPS':\n        sampler = DFPS_Sampler\n    elif sampler_type == 'F-FPS':\n        sampler = FFPS_Sampler\n    elif sampler_type == 'FS':\n        sampler = FS_Sampler\n    else:\n        raise ValueError('Only \"sampler_type\" of \"D-FPS\", \"F-FPS\", or \"FS\"'\n                         f' are supported, got {sampler_type}')\n\n    return sampler\n\n\nclass Points_Sampler(nn.Module):\n    \"\"\"Points sampling.\n\n    Args:\n        num_point (list[int]): Number of sample points.\n        fps_mod_list (list[str]: Type of FPS method, valid mod\n            ['F-FPS', 'D-FPS', 'FS'], Default: ['D-FPS'].\n            F-FPS: using feature distances for FPS.\n            D-FPS: using Euclidean distances of points for FPS.\n            FS: using F-FPS and D-FPS simultaneously.\n        fps_sample_range_list (list[int]): Range of points to apply FPS.\n            Default: [-1].\n    \"\"\"\n\n    def __init__(self,\n                 num_point: List[int],\n                 fps_mod_list: List[str] = ['D-FPS'],\n                 fps_sample_range_list: List[int] = [-1]):\n        super(Points_Sampler, self).__init__()\n        # FPS would be applied to different fps_mod in the list,\n        # so the length of the num_point should be equal to\n        # fps_mod_list and fps_sample_range_list.\n        assert len(num_point) == len(fps_mod_list) == len(\n            fps_sample_range_list)\n        self.num_point = num_point\n        self.fps_sample_range_list = fps_sample_range_list\n        self.samplers = nn.ModuleList()\n        for fps_mod in fps_mod_list:\n            self.samplers.append(get_sampler_type(fps_mod)())\n        self.fp16_enabled = False\n\n    @force_fp32()\n    def forward(self, points_xyz, features):\n        \"\"\"forward.\n\n        Args:\n            points_xyz (Tensor): (B, N, 3) xyz coordinates of the features.\n            features (Tensor): (B, C, N) Descriptors of the features.\n\n        Return：\n            Tensor: (B, npoint, sample_num) Indices of sampled points.\n        \"\"\"\n        indices = []\n        last_fps_end_index = 0\n\n        for fps_sample_range, sampler, npoint in zip(\n                self.fps_sample_range_list, self.samplers, self.num_point):\n            assert fps_sample_range < points_xyz.shape[1]\n\n            if fps_sample_range == -1:\n                sample_points_xyz = points_xyz[:, last_fps_end_index:]\n                sample_features = features[:, :, last_fps_end_index:]\n            else:\n                sample_points_xyz = \\\n                    points_xyz[:, last_fps_end_index:fps_sample_range]\n                sample_features = \\\n                    features[:, :, last_fps_end_index:fps_sample_range]\n\n            fps_idx = sampler(sample_points_xyz.contiguous(), sample_features,\n                              npoint)\n\n            indices.append(fps_idx + last_fps_end_index)\n            last_fps_end_index += fps_sample_range\n        indices = torch.cat(indices, dim=1)\n\n        return indices\n\n\nclass DFPS_Sampler(nn.Module):\n    \"\"\"DFPS_Sampling.\n\n    Using Euclidean distances of points for FPS.\n    \"\"\"\n\n    def __init__(self):\n        super(DFPS_Sampler, self).__init__()\n\n    def forward(self, points, features, npoint):\n        \"\"\"Sampling points with D-FPS.\"\"\"\n        fps_idx = furthest_point_sample(points.contiguous(), npoint)\n        return fps_idx\n\n\nclass FFPS_Sampler(nn.Module):\n    \"\"\"FFPS_Sampler.\n\n    Using feature distances for FPS.\n    \"\"\"\n\n    def __init__(self):\n        super(FFPS_Sampler, self).__init__()\n\n    def forward(self, points, features, npoint):\n        \"\"\"Sampling points with F-FPS.\"\"\"\n        features_for_fps = torch.cat([points, features.transpose(1, 2)], dim=2)\n        features_dist = calc_square_dist(\n            features_for_fps, features_for_fps, norm=False)\n        fps_idx = furthest_point_sample_with_dist(features_dist, npoint)\n        return fps_idx\n\n\nclass FS_Sampler(nn.Module):\n    \"\"\"FS_Sampling.\n\n    Using F-FPS and D-FPS simultaneously.\n    \"\"\"\n\n    def __init__(self):\n        super(FS_Sampler, self).__init__()\n\n    def forward(self, points, features, npoint):\n        \"\"\"Sampling points with FS_Sampling.\"\"\"\n        features_for_fps = torch.cat([points, features.transpose(1, 2)], dim=2)\n        features_dist = calc_square_dist(\n            features_for_fps, features_for_fps, norm=False)\n        fps_idx_ffps = furthest_point_sample_with_dist(features_dist, npoint)\n        fps_idx_dfps = furthest_point_sample(points, npoint)\n        fps_idx = torch.cat([fps_idx_ffps, fps_idx_dfps], dim=1)\n        return fps_idx\n"
  },
  {
    "path": "mmdet3d/ops/furthest_point_sample/src/furthest_point_sample.cpp",
    "content": "// Modified from\n// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/sampling.cpp\n\n#include <ATen/cuda/CUDAContext.h>\n#include <THC/THC.h>\n#include <torch/extension.h>\n#include <torch/serialize/tensor.h>\n\n#include <vector>\n\nextern THCState *state;\n\nint furthest_point_sampling_wrapper(int b, int n, int m,\n                                    at::Tensor points_tensor,\n                                    at::Tensor temp_tensor,\n                                    at::Tensor idx_tensor);\n\nvoid furthest_point_sampling_kernel_launcher(int b, int n, int m,\n                                             const float *dataset, float *temp,\n                                             int *idxs, cudaStream_t stream);\n\nint furthest_point_sampling_with_dist_wrapper(int b, int n, int m,\n                                              at::Tensor points_tensor,\n                                              at::Tensor temp_tensor,\n                                              at::Tensor idx_tensor);\n\nvoid furthest_point_sampling_with_dist_kernel_launcher(int b, int n, int m,\n                                                       const float *dataset,\n                                                       float *temp, int *idxs,\n                                                       cudaStream_t stream);\n\nint furthest_point_sampling_wrapper(int b, int n, int m,\n                                    at::Tensor points_tensor,\n                                    at::Tensor temp_tensor,\n                                    at::Tensor idx_tensor) {\n  const float *points = points_tensor.data_ptr<float>();\n  float *temp = temp_tensor.data_ptr<float>();\n  int *idx = idx_tensor.data_ptr<int>();\n\n  cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream();\n  furthest_point_sampling_kernel_launcher(b, n, m, points, temp, idx, stream);\n  return 1;\n}\n\nint furthest_point_sampling_with_dist_wrapper(int b, int n, int m,\n                                              at::Tensor points_tensor,\n                                              at::Tensor temp_tensor,\n                                              at::Tensor idx_tensor) {\n\n  const float *points = points_tensor.data<float>();\n  float *temp = temp_tensor.data<float>();\n  int *idx = idx_tensor.data<int>();\n\n  cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream();\n  furthest_point_sampling_with_dist_kernel_launcher(b, n, m, points, temp, idx, stream);\n  return 1;\n}\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n  m.def(\"furthest_point_sampling_wrapper\", &furthest_point_sampling_wrapper,\n        \"furthest_point_sampling_wrapper\");\n  m.def(\"furthest_point_sampling_with_dist_wrapper\",\n        &furthest_point_sampling_with_dist_wrapper,\n        \"furthest_point_sampling_with_dist_wrapper\");\n}\n"
  },
  {
    "path": "mmdet3d/ops/furthest_point_sample/src/furthest_point_sample_cuda.cu",
    "content": "// Modified from\n// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/sampling_gpu.cu\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#define TOTAL_THREADS 1024\n#define THREADS_PER_BLOCK 256\n#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))\n\ninline int opt_n_threads(int work_size) {\n  const int pow_2 = std::log(static_cast<double>(work_size)) / std::log(2.0);\n\n  return max(min(1 << pow_2, TOTAL_THREADS), 1);\n}\n\n__device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i,\n                         int idx1, int idx2) {\n  const float v1 = dists[idx1], v2 = dists[idx2];\n  const int i1 = dists_i[idx1], i2 = dists_i[idx2];\n  dists[idx1] = max(v1, v2);\n  dists_i[idx1] = v2 > v1 ? i2 : i1;\n}\n\ntemplate <unsigned int block_size>\n__global__ void furthest_point_sampling_kernel(\n    int b, int n, int m, const float *__restrict__ dataset,\n    float *__restrict__ temp, int *__restrict__ idxs) {\n  // dataset: (B, N, 3)\n  // tmp: (B, N)\n  // output:\n  //      idx: (B, M)\n\n  if (m <= 0) return;\n  __shared__ float dists[block_size];\n  __shared__ int dists_i[block_size];\n\n  int batch_index = blockIdx.x;\n  dataset += batch_index * n * 3;\n  temp += batch_index * n;\n  idxs += batch_index * m;\n\n  int tid = threadIdx.x;\n  const int stride = block_size;\n\n  int old = 0;\n  if (threadIdx.x == 0) idxs[0] = old;\n\n  __syncthreads();\n  for (int j = 1; j < m; j++) {\n    int besti = 0;\n    float best = -1;\n    float x1 = dataset[old * 3 + 0];\n    float y1 = dataset[old * 3 + 1];\n    float z1 = dataset[old * 3 + 2];\n    for (int k = tid; k < n; k += stride) {\n      float x2, y2, z2;\n      x2 = dataset[k * 3 + 0];\n      y2 = dataset[k * 3 + 1];\n      z2 = dataset[k * 3 + 2];\n      // float mag = (x2 * x2) + (y2 * y2) + (z2 * z2);\n      // if (mag <= 1e-3)\n      // continue;\n\n      float d =\n          (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1);\n      float d2 = min(d, temp[k]);\n      temp[k] = d2;\n      besti = d2 > best ? k : besti;\n      best = d2 > best ? d2 : best;\n    }\n    dists[tid] = best;\n    dists_i[tid] = besti;\n    __syncthreads();\n\n    if (block_size >= 1024) {\n      if (tid < 512) {\n        __update(dists, dists_i, tid, tid + 512);\n      }\n      __syncthreads();\n    }\n\n    if (block_size >= 512) {\n      if (tid < 256) {\n        __update(dists, dists_i, tid, tid + 256);\n      }\n      __syncthreads();\n    }\n    if (block_size >= 256) {\n      if (tid < 128) {\n        __update(dists, dists_i, tid, tid + 128);\n      }\n      __syncthreads();\n    }\n    if (block_size >= 128) {\n      if (tid < 64) {\n        __update(dists, dists_i, tid, tid + 64);\n      }\n      __syncthreads();\n    }\n    if (block_size >= 64) {\n      if (tid < 32) {\n        __update(dists, dists_i, tid, tid + 32);\n      }\n      __syncthreads();\n    }\n    if (block_size >= 32) {\n      if (tid < 16) {\n        __update(dists, dists_i, tid, tid + 16);\n      }\n      __syncthreads();\n    }\n    if (block_size >= 16) {\n      if (tid < 8) {\n        __update(dists, dists_i, tid, tid + 8);\n      }\n      __syncthreads();\n    }\n    if (block_size >= 8) {\n      if (tid < 4) {\n        __update(dists, dists_i, tid, tid + 4);\n      }\n      __syncthreads();\n    }\n    if (block_size >= 4) {\n      if (tid < 2) {\n        __update(dists, dists_i, tid, tid + 2);\n      }\n      __syncthreads();\n    }\n    if (block_size >= 2) {\n      if (tid < 1) {\n        __update(dists, dists_i, tid, tid + 1);\n      }\n      __syncthreads();\n    }\n\n    old = dists_i[0];\n    if (tid == 0) idxs[j] = old;\n  }\n}\n\nvoid furthest_point_sampling_kernel_launcher(int b, int n, int m,\n                                             const float *dataset, float *temp,\n                                             int *idxs, cudaStream_t stream) {\n  // dataset: (B, N, 3)\n  // tmp: (B, N)\n  // output:\n  //      idx: (B, M)\n\n  cudaError_t err;\n  unsigned int n_threads = opt_n_threads(n);\n\n  switch (n_threads) {\n    case 1024:\n      furthest_point_sampling_kernel<1024>\n          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);\n      break;\n    case 512:\n      furthest_point_sampling_kernel<512>\n          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);\n      break;\n    case 256:\n      furthest_point_sampling_kernel<256>\n          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);\n      break;\n    case 128:\n      furthest_point_sampling_kernel<128>\n          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);\n      break;\n    case 64:\n      furthest_point_sampling_kernel<64>\n          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);\n      break;\n    case 32:\n      furthest_point_sampling_kernel<32>\n          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);\n      break;\n    case 16:\n      furthest_point_sampling_kernel<16>\n          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);\n      break;\n    case 8:\n      furthest_point_sampling_kernel<8>\n          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);\n      break;\n    case 4:\n      furthest_point_sampling_kernel<4>\n          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);\n      break;\n    case 2:\n      furthest_point_sampling_kernel<2>\n          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);\n      break;\n    case 1:\n      furthest_point_sampling_kernel<1>\n          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);\n      break;\n    default:\n      furthest_point_sampling_kernel<512>\n          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);\n  }\n\n  err = cudaGetLastError();\n  if (cudaSuccess != err) {\n    fprintf(stderr, \"CUDA kernel failed : %s\\n\", cudaGetErrorString(err));\n    exit(-1);\n  }\n}\n\n// Modified from\n// https://github.com/qiqihaer/3DSSD-pytorch/blob/master/lib/pointnet2/src/sampling_gpu.cu\ntemplate <unsigned int block_size>\n__global__ void furthest_point_sampling_with_dist_kernel(\n    int b, int n, int m, const float *__restrict__ dataset,\n    float *__restrict__ temp, int *__restrict__ idxs) {\n  // dataset: (B, N, N)\n  // tmp: (B, N)\n  // output:\n  //      idx: (B, M)\n\n  if (m <= 0)\n    return;\n  __shared__ float dists[block_size];\n  __shared__ int dists_i[block_size];\n\n  int batch_index = blockIdx.x;\n  dataset += batch_index * n * n;\n  temp += batch_index * n;\n  idxs += batch_index * m;\n\n  int tid = threadIdx.x;\n  const int stride = block_size;\n\n  int old = 0;\n  if (threadIdx.x == 0)\n    idxs[0] = old;\n\n  __syncthreads();\n  for (int j = 1; j < m; j++) {\n    int besti = 0;\n    float best = -1;\n    // float x1 = dataset[old * 3 + 0];\n    // float y1 = dataset[old * 3 + 1];\n    // float z1 = dataset[old * 3 + 2];\n    for (int k = tid; k < n; k += stride) {\n      // float x2, y2, z2;\n      // x2 = dataset[k * 3 + 0];\n      // y2 = dataset[k * 3 + 1];\n      // z2 = dataset[k * 3 + 2];\n\n      // float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) *\n      // (z2 - z1);\n      float d = dataset[old * n + k];\n\n      float d2 = min(d, temp[k]);\n      temp[k] = d2;\n      besti = d2 > best ? k : besti;\n      best = d2 > best ? d2 : best;\n    }\n    dists[tid] = best;\n    dists_i[tid] = besti;\n    __syncthreads();\n\n    if (block_size >= 1024) {\n      if (tid < 512) {\n        __update(dists, dists_i, tid, tid + 512);\n      }\n      __syncthreads();\n    }\n\n    if (block_size >= 512) {\n      if (tid < 256) {\n        __update(dists, dists_i, tid, tid + 256);\n      }\n      __syncthreads();\n    }\n    if (block_size >= 256) {\n      if (tid < 128) {\n        __update(dists, dists_i, tid, tid + 128);\n      }\n      __syncthreads();\n    }\n    if (block_size >= 128) {\n      if (tid < 64) {\n        __update(dists, dists_i, tid, tid + 64);\n      }\n      __syncthreads();\n    }\n    if (block_size >= 64) {\n      if (tid < 32) {\n        __update(dists, dists_i, tid, tid + 32);\n      }\n      __syncthreads();\n    }\n    if (block_size >= 32) {\n      if (tid < 16) {\n        __update(dists, dists_i, tid, tid + 16);\n      }\n      __syncthreads();\n    }\n    if (block_size >= 16) {\n      if (tid < 8) {\n        __update(dists, dists_i, tid, tid + 8);\n      }\n      __syncthreads();\n    }\n    if (block_size >= 8) {\n      if (tid < 4) {\n        __update(dists, dists_i, tid, tid + 4);\n      }\n      __syncthreads();\n    }\n    if (block_size >= 4) {\n      if (tid < 2) {\n        __update(dists, dists_i, tid, tid + 2);\n      }\n      __syncthreads();\n    }\n    if (block_size >= 2) {\n      if (tid < 1) {\n        __update(dists, dists_i, tid, tid + 1);\n      }\n      __syncthreads();\n    }\n\n    old = dists_i[0];\n    if (tid == 0)\n      idxs[j] = old;\n  }\n}\n\nvoid furthest_point_sampling_with_dist_kernel_launcher(int b, int n, int m,\n                                                       const float *dataset,\n                                                       float *temp, int *idxs,\n                                                       cudaStream_t stream) {\n  // dataset: (B, N, N)\n  // temp: (B, N)\n  // output:\n  //      idx: (B, M)\n\n  cudaError_t err;\n  unsigned int n_threads = opt_n_threads(n);\n\n  switch (n_threads) {\n  case 1024:\n    furthest_point_sampling_with_dist_kernel<1024><<<b, n_threads, 0, stream>>>(\n        b, n, m, dataset, temp, idxs);\n    break;\n  case 512:\n    furthest_point_sampling_with_dist_kernel<512><<<b, n_threads, 0, stream>>>(\n        b, n, m, dataset, temp, idxs);\n    break;\n  case 256:\n    furthest_point_sampling_with_dist_kernel<256><<<b, n_threads, 0, stream>>>(\n        b, n, m, dataset, temp, idxs);\n    break;\n  case 128:\n    furthest_point_sampling_with_dist_kernel<128><<<b, n_threads, 0, stream>>>(\n        b, n, m, dataset, temp, idxs);\n    break;\n  case 64:\n    furthest_point_sampling_with_dist_kernel<64><<<b, n_threads, 0, stream>>>(\n        b, n, m, dataset, temp, idxs);\n    break;\n  case 32:\n    furthest_point_sampling_with_dist_kernel<32><<<b, n_threads, 0, stream>>>(\n        b, n, m, dataset, temp, idxs);\n    break;\n  case 16:\n    furthest_point_sampling_with_dist_kernel<16><<<b, n_threads, 0, stream>>>(\n        b, n, m, dataset, temp, idxs);\n    break;\n  case 8:\n    furthest_point_sampling_with_dist_kernel<8><<<b, n_threads, 0, stream>>>(\n        b, n, m, dataset, temp, idxs);\n    break;\n  case 4:\n    furthest_point_sampling_with_dist_kernel<4><<<b, n_threads, 0, stream>>>(\n        b, n, m, dataset, temp, idxs);\n    break;\n  case 2:\n    furthest_point_sampling_with_dist_kernel<2><<<b, n_threads, 0, stream>>>(\n        b, n, m, dataset, temp, idxs);\n    break;\n  case 1:\n    furthest_point_sampling_with_dist_kernel<1><<<b, n_threads, 0, stream>>>(\n        b, n, m, dataset, temp, idxs);\n    break;\n  default:\n    furthest_point_sampling_with_dist_kernel<512><<<b, n_threads, 0, stream>>>(\n        b, n, m, dataset, temp, idxs);\n  }\n\n  err = cudaGetLastError();\n  if (cudaSuccess != err) {\n    fprintf(stderr, \"CUDA kernel failed : %s\\n\", cudaGetErrorString(err));\n    exit(-1);\n  }\n}\n"
  },
  {
    "path": "mmdet3d/ops/furthest_point_sample/utils.py",
    "content": "import torch\n\n\ndef calc_square_dist(point_feat_a, point_feat_b, norm=True):\n    \"\"\"Calculating square distance between a and b.\n\n    Args:\n        point_feat_a (Tensor): (B, N, C) Feature vector of each point.\n        point_feat_b (Tensor): (B, M, C) Feature vector of each point.\n        norm (Bool): Whether to normalize the distance.\n            Default: True.\n\n    Returns:\n        Tensor: (B, N, M) Distance between each pair points.\n    \"\"\"\n    length_a = point_feat_a.shape[1]\n    length_b = point_feat_b.shape[1]\n    num_channel = point_feat_a.shape[-1]\n    # [bs, n, 1]\n    a_square = torch.sum(point_feat_a.unsqueeze(dim=2).pow(2), dim=-1)\n    # [bs, 1, m]\n    b_square = torch.sum(point_feat_b.unsqueeze(dim=1).pow(2), dim=-1)\n    a_square = a_square.repeat((1, 1, length_b))  # [bs, n, m]\n    b_square = b_square.repeat((1, length_a, 1))  # [bs, n, m]\n\n    coor = torch.matmul(point_feat_a, point_feat_b.transpose(1, 2))\n\n    dist = a_square + b_square - 2 * coor\n    if norm:\n        dist = torch.sqrt(dist) / num_channel\n    return dist\n"
  },
  {
    "path": "mmdet3d/ops/gather_points/__init__.py",
    "content": "from .gather_points import gather_points\n\n__all__ = ['gather_points']\n"
  },
  {
    "path": "mmdet3d/ops/gather_points/gather_points.py",
    "content": "import torch\nfrom torch.autograd import Function\n\nfrom . import gather_points_ext\n\n\nclass GatherPoints(Function):\n    \"\"\"Gather Points.\n\n    Gather points with given index.\n    \"\"\"\n\n    @staticmethod\n    def forward(ctx, features: torch.Tensor,\n                indices: torch.Tensor) -> torch.Tensor:\n        \"\"\"forward.\n\n        Args:\n            features (Tensor): (B, C, N) features to gather.\n            indices (Tensor): (B, M) where M is the number of points.\n\n        Returns:\n            Tensor: (B, C, M) where M is the number of points.\n        \"\"\"\n        assert features.is_contiguous()\n        assert indices.is_contiguous()\n\n        B, npoint = indices.size()\n        _, C, N = features.size()\n        output = torch.cuda.FloatTensor(B, C, npoint)\n\n        gather_points_ext.gather_points_wrapper(B, C, N, npoint, features,\n                                                indices, output)\n\n        ctx.for_backwards = (indices, C, N)\n        ctx.mark_non_differentiable(indices)\n        return output\n\n    @staticmethod\n    def backward(ctx, grad_out):\n        idx, C, N = ctx.for_backwards\n        B, npoint = idx.size()\n\n        grad_features = torch.cuda.FloatTensor(B, C, N).zero_()\n        grad_out_data = grad_out.data.contiguous()\n        gather_points_ext.gather_points_grad_wrapper(B, C, N, npoint,\n                                                     grad_out_data, idx,\n                                                     grad_features.data)\n        return grad_features, None\n\n\ngather_points = GatherPoints.apply\n"
  },
  {
    "path": "mmdet3d/ops/gather_points/src/gather_points.cpp",
    "content": "#include <ATen/cuda/CUDAContext.h>\n#include <THC/THC.h>\n#include <torch/extension.h>\n#include <torch/serialize/tensor.h>\n\n#include <vector>\n\nextern THCState *state;\n\nint gather_points_wrapper(int b, int c, int n, int npoints,\n                          at::Tensor points_tensor, at::Tensor idx_tensor,\n                          at::Tensor out_tensor);\n\nvoid gather_points_kernel_launcher(int b, int c, int n, int npoints,\n                                   const float *points, const int *idx,\n                                   float *out, cudaStream_t stream);\n\nint gather_points_grad_wrapper(int b, int c, int n, int npoints,\n                               at::Tensor grad_out_tensor,\n                               at::Tensor idx_tensor,\n                               at::Tensor grad_points_tensor);\n\nvoid gather_points_grad_kernel_launcher(int b, int c, int n, int npoints,\n                                        const float *grad_out, const int *idx,\n                                        float *grad_points,\n                                        cudaStream_t stream);\n\nint gather_points_wrapper(int b, int c, int n, int npoints,\n                          at::Tensor points_tensor, at::Tensor idx_tensor,\n                          at::Tensor out_tensor) {\n  const float *points = points_tensor.data_ptr<float>();\n  const int *idx = idx_tensor.data_ptr<int>();\n  float *out = out_tensor.data_ptr<float>();\n\n  cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream();\n  gather_points_kernel_launcher(b, c, n, npoints, points, idx, out, stream);\n  return 1;\n}\n\nint gather_points_grad_wrapper(int b, int c, int n, int npoints,\n                               at::Tensor grad_out_tensor,\n                               at::Tensor idx_tensor,\n                               at::Tensor grad_points_tensor) {\n  const float *grad_out = grad_out_tensor.data_ptr<float>();\n  const int *idx = idx_tensor.data_ptr<int>();\n  float *grad_points = grad_points_tensor.data_ptr<float>();\n\n  cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream();\n  gather_points_grad_kernel_launcher(b, c, n, npoints, grad_out, idx,\n                                     grad_points, stream);\n  return 1;\n}\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n  m.def(\"gather_points_wrapper\", &gather_points_wrapper,\n        \"gather_points_wrapper\");\n  m.def(\"gather_points_grad_wrapper\", &gather_points_grad_wrapper,\n        \"gather_points_grad_wrapper\");\n}\n"
  },
  {
    "path": "mmdet3d/ops/gather_points/src/gather_points_cuda.cu",
    "content": "#include <stdio.h>\n#include <stdlib.h>\n\n#define TOTAL_THREADS 1024\n#define THREADS_PER_BLOCK 256\n#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))\n\n__global__ void gather_points_kernel(int b, int c, int n, int m,\n                                     const float *__restrict__ points,\n                                     const int *__restrict__ idx,\n                                     float *__restrict__ out) {\n  // points: (B, C, N)\n  // idx: (B, M)\n  // output:\n  //      out: (B, C, M)\n\n  int bs_idx = blockIdx.z;\n  int c_idx = blockIdx.y;\n  int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;\n  if (bs_idx >= b || c_idx >= c || pt_idx >= m) return;\n\n  out += bs_idx * c * m + c_idx * m + pt_idx;\n  idx += bs_idx * m + pt_idx;\n  points += bs_idx * c * n + c_idx * n;\n  out[0] = points[idx[0]];\n}\n\nvoid gather_points_kernel_launcher(int b, int c, int n, int npoints,\n                                   const float *points, const int *idx,\n                                   float *out, cudaStream_t stream) {\n  // points: (B, C, N)\n  // idx: (B, npoints)\n  // output:\n  //      out: (B, C, npoints)\n\n  cudaError_t err;\n  dim3 blocks(DIVUP(npoints, THREADS_PER_BLOCK), c,\n              b);  // blockIdx.x(col), blockIdx.y(row)\n  dim3 threads(THREADS_PER_BLOCK);\n\n  gather_points_kernel<<<blocks, threads, 0, stream>>>(b, c, n, npoints, points,\n                                                       idx, out);\n\n  err = cudaGetLastError();\n  if (cudaSuccess != err) {\n    fprintf(stderr, \"CUDA kernel failed : %s\\n\", cudaGetErrorString(err));\n    exit(-1);\n  }\n}\n\n__global__ void gather_points_grad_kernel(int b, int c, int n, int m,\n                                          const float *__restrict__ grad_out,\n                                          const int *__restrict__ idx,\n                                          float *__restrict__ grad_points) {\n  // grad_out: (B, C, M)\n  // idx: (B, M)\n  // output:\n  //      grad_points: (B, C, N)\n\n  int bs_idx = blockIdx.z;\n  int c_idx = blockIdx.y;\n  int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;\n  if (bs_idx >= b || c_idx >= c || pt_idx >= m) return;\n\n  grad_out += bs_idx * c * m + c_idx * m + pt_idx;\n  idx += bs_idx * m + pt_idx;\n  grad_points += bs_idx * c * n + c_idx * n;\n\n  atomicAdd(grad_points + idx[0], grad_out[0]);\n}\n\nvoid gather_points_grad_kernel_launcher(int b, int c, int n, int npoints,\n                                        const float *grad_out, const int *idx,\n                                        float *grad_points,\n                                        cudaStream_t stream) {\n  // grad_out: (B, C, npoints)\n  // idx: (B, npoints)\n  // output:\n  //      grad_points: (B, C, N)\n\n  cudaError_t err;\n  dim3 blocks(DIVUP(npoints, THREADS_PER_BLOCK), c,\n              b);  // blockIdx.x(col), blockIdx.y(row)\n  dim3 threads(THREADS_PER_BLOCK);\n\n  gather_points_grad_kernel<<<blocks, threads, 0, stream>>>(\n      b, c, n, npoints, grad_out, idx, grad_points);\n\n  err = cudaGetLastError();\n  if (cudaSuccess != err) {\n    fprintf(stderr, \"CUDA kernel failed : %s\\n\", cudaGetErrorString(err));\n    exit(-1);\n  }\n}\n"
  },
  {
    "path": "mmdet3d/ops/group_points/__init__.py",
    "content": "from .group_points import GroupAll, QueryAndGroup, grouping_operation\n\n__all__ = ['QueryAndGroup', 'GroupAll', 'grouping_operation']\n"
  },
  {
    "path": "mmdet3d/ops/group_points/group_points.py",
    "content": "import torch\nfrom torch import nn as nn\nfrom torch.autograd import Function\nfrom typing import Tuple\n\nfrom ..ball_query import ball_query\nfrom . import group_points_ext\n\n\nclass QueryAndGroup(nn.Module):\n    \"\"\"Query and Group.\n\n    Groups with a ball query of radius\n\n    Args:\n        max_radius (float): The maximum radius of the balls.\n        sample_num (int): Maximum number of features to gather in the ball.\n        min_radius (float): The minimum radius of the balls.\n        use_xyz (bool): Whether to use xyz.\n            Default: True.\n        return_grouped_xyz (bool): Whether to return grouped xyz.\n            Default: False.\n        normalize_xyz (bool): Whether to normalize xyz.\n            Default: False.\n        uniform_sample (bool): Whether to sample uniformly.\n            Default: False\n        return_unique_cnt (bool): Whether to return the count of\n            unique samples.\n            Default: False.\n    \"\"\"\n\n    def __init__(self,\n                 max_radius,\n                 sample_num,\n                 min_radius=0,\n                 use_xyz=True,\n                 return_grouped_xyz=False,\n                 normalize_xyz=False,\n                 uniform_sample=False,\n                 return_unique_cnt=False):\n        super(QueryAndGroup, self).__init__()\n        self.max_radius = max_radius\n        self.min_radius = min_radius\n        self.sample_num = sample_num\n        self.use_xyz = use_xyz\n        self.return_grouped_xyz = return_grouped_xyz\n        self.normalize_xyz = normalize_xyz\n        self.uniform_sample = uniform_sample\n        self.return_unique_cnt = return_unique_cnt\n        if self.return_unique_cnt:\n            assert self.uniform_sample\n\n    def forward(self, points_xyz, center_xyz, features=None):\n        \"\"\"forward.\n\n        Args:\n            points_xyz (Tensor): (B, N, 3) xyz coordinates of the features.\n            center_xyz (Tensor): (B, npoint, 3) Centriods.\n            features (Tensor): (B, C, N) Descriptors of the features.\n\n        Return：\n            Tensor: (B, 3 + C, npoint, sample_num) Grouped feature.\n        \"\"\"\n        idx = ball_query(self.min_radius, self.max_radius, self.sample_num,\n                         points_xyz, center_xyz)\n\n        if self.uniform_sample:\n            unique_cnt = torch.zeros((idx.shape[0], idx.shape[1]))\n            for i_batch in range(idx.shape[0]):\n                for i_region in range(idx.shape[1]):\n                    unique_ind = torch.unique(idx[i_batch, i_region, :])\n                    num_unique = unique_ind.shape[0]\n                    unique_cnt[i_batch, i_region] = num_unique\n                    sample_ind = torch.randint(\n                        0,\n                        num_unique, (self.sample_num - num_unique, ),\n                        dtype=torch.long)\n                    all_ind = torch.cat((unique_ind, unique_ind[sample_ind]))\n                    idx[i_batch, i_region, :] = all_ind\n\n        xyz_trans = points_xyz.transpose(1, 2).contiguous()\n        # (B, 3, npoint, sample_num)\n        grouped_xyz = grouping_operation(xyz_trans, idx)\n        grouped_xyz -= center_xyz.transpose(1, 2).unsqueeze(-1)\n        if self.normalize_xyz:\n            grouped_xyz /= self.max_radius\n\n        if features is not None:\n            grouped_features = grouping_operation(features, idx)\n            if self.use_xyz:\n                # (B, C + 3, npoint, sample_num)\n                new_features = torch.cat([grouped_xyz, grouped_features],\n                                         dim=1)\n            else:\n                new_features = grouped_features\n        else:\n            assert (self.use_xyz\n                    ), 'Cannot have not features and not use xyz as a feature!'\n            new_features = grouped_xyz\n\n        ret = [new_features]\n        if self.return_grouped_xyz:\n            ret.append(grouped_xyz)\n        if self.return_unique_cnt:\n            ret.append(unique_cnt)\n        if len(ret) == 1:\n            return ret[0]\n        else:\n            return tuple(ret)\n\n\nclass GroupAll(nn.Module):\n    \"\"\"Group All.\n\n    Group xyz with feature.\n\n    Args:\n        use_xyz (bool): Whether to use xyz.\n    \"\"\"\n\n    def __init__(self, use_xyz: bool = True):\n        super().__init__()\n        self.use_xyz = use_xyz\n\n    def forward(self,\n                xyz: torch.Tensor,\n                new_xyz: torch.Tensor,\n                features: torch.Tensor = None):\n        \"\"\"forward.\n\n        Args:\n            xyz (Tensor): (B, N, 3) xyz coordinates of the features.\n            new_xyz (Tensor): Ignored.\n            features (Tensor): (B, C, N) features to group.\n\n        Return:\n            Tensor: (B, C + 3, 1, N) Grouped feature.\n        \"\"\"\n        grouped_xyz = xyz.transpose(1, 2).unsqueeze(2)\n        if features is not None:\n            grouped_features = features.unsqueeze(2)\n            if self.use_xyz:\n                new_features = torch.cat([grouped_xyz, grouped_features],\n                                         dim=1)  # (B, 3 + C, 1, N)\n            else:\n                new_features = grouped_features\n        else:\n            new_features = grouped_xyz\n\n        return new_features\n\n\nclass GroupingOperation(Function):\n    \"\"\"Grouping Operation.\n\n    Group feature with given index.\n    \"\"\"\n\n    @staticmethod\n    def forward(ctx, features: torch.Tensor,\n                indices: torch.Tensor) -> torch.Tensor:\n        \"\"\"forward.\n\n        Args:\n            features (Tensor): (B, C, N) tensor of features to group.\n            indices (Tensor): (B, npoint, nsample) the indicies of\n                features to group with.\n\n        Returns:\n            Tensor: (B, C, npoint, nsample) Grouped features.\n        \"\"\"\n        assert features.is_contiguous()\n        assert indices.is_contiguous()\n\n        B, nfeatures, nsample = indices.size()\n        _, C, N = features.size()\n        output = torch.cuda.FloatTensor(B, C, nfeatures, nsample)\n\n        group_points_ext.forward(B, C, N, nfeatures, nsample, features,\n                                 indices, output)\n\n        ctx.for_backwards = (indices, N)\n        return output\n\n    @staticmethod\n    def backward(ctx,\n                 grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:\n        \"\"\"backward.\n\n        Args:\n            grad_out (Tensor): (B, C, npoint, nsample) tensor of the gradients\n                of the output from forward.\n\n        Returns:\n            Tensor: (B, C, N) gradient of the features.\n        \"\"\"\n        idx, N = ctx.for_backwards\n\n        B, C, npoint, nsample = grad_out.size()\n        grad_features = torch.cuda.FloatTensor(B, C, N).zero_()\n\n        grad_out_data = grad_out.data.contiguous()\n        group_points_ext.backward(B, C, N, npoint, nsample, grad_out_data, idx,\n                                  grad_features.data)\n        return grad_features, None\n\n\ngrouping_operation = GroupingOperation.apply\n"
  },
  {
    "path": "mmdet3d/ops/group_points/src/group_points.cpp",
    "content": "// Modified from\n// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/group_points.cpp\n\n#include <THC/THC.h>\n#include <cuda.h>\n#include <cuda_runtime_api.h>\n#include <torch/extension.h>\n#include <torch/serialize/tensor.h>\n\n#include <vector>\n\nextern THCState *state;\n\nint group_points_wrapper(int b, int c, int n, int npoints, int nsample,\n                         at::Tensor points_tensor, at::Tensor idx_tensor,\n                         at::Tensor out_tensor);\n\nvoid group_points_kernel_launcher(int b, int c, int n, int npoints, int nsample,\n                                  const float *points, const int *idx,\n                                  float *out, cudaStream_t stream);\n\nint group_points_grad_wrapper(int b, int c, int n, int npoints, int nsample,\n                              at::Tensor grad_out_tensor, at::Tensor idx_tensor,\n                              at::Tensor grad_points_tensor);\n\nvoid group_points_grad_kernel_launcher(int b, int c, int n, int npoints,\n                                       int nsample, const float *grad_out,\n                                       const int *idx, float *grad_points,\n                                       cudaStream_t stream);\n\nint group_points_grad_wrapper(int b, int c, int n, int npoints, int nsample,\n                              at::Tensor grad_out_tensor, at::Tensor idx_tensor,\n                              at::Tensor grad_points_tensor) {\n  float *grad_points = grad_points_tensor.data_ptr<float>();\n  const int *idx = idx_tensor.data_ptr<int>();\n  const float *grad_out = grad_out_tensor.data_ptr<float>();\n\n  cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream();\n\n  group_points_grad_kernel_launcher(b, c, n, npoints, nsample, grad_out, idx,\n                                    grad_points, stream);\n  return 1;\n}\n\nint group_points_wrapper(int b, int c, int n, int npoints, int nsample,\n                         at::Tensor points_tensor, at::Tensor idx_tensor,\n                         at::Tensor out_tensor) {\n  const float *points = points_tensor.data_ptr<float>();\n  const int *idx = idx_tensor.data_ptr<int>();\n  float *out = out_tensor.data_ptr<float>();\n\n  cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream();\n\n  group_points_kernel_launcher(b, c, n, npoints, nsample, points, idx, out,\n                               stream);\n  return 1;\n}\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n  m.def(\"forward\", &group_points_wrapper, \"group_points_wrapper\");\n  m.def(\"backward\", &group_points_grad_wrapper, \"group_points_grad_wrapper\");\n}\n"
  },
  {
    "path": "mmdet3d/ops/group_points/src/group_points_cuda.cu",
    "content": "// Modified from\n// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/group_points_gpu.cu\n\n#include <stdio.h>\n#include <stdlib.h>\n\n#define THREADS_PER_BLOCK 256\n#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))\n\n__global__ void group_points_grad_kernel(int b, int c, int n, int npoints,\n                                         int nsample,\n                                         const float *__restrict__ grad_out,\n                                         const int *__restrict__ idx,\n                                         float *__restrict__ grad_points) {\n  // grad_out: (B, C, npoints, nsample)\n  // idx: (B, npoints, nsample)\n  // output:\n  //      grad_points: (B, C, N)\n  int bs_idx = blockIdx.z;\n  int c_idx = blockIdx.y;\n  int index = blockIdx.x * blockDim.x + threadIdx.x;\n  int pt_idx = index / nsample;\n  if (bs_idx >= b || c_idx >= c || pt_idx >= npoints) return;\n\n  int sample_idx = index % nsample;\n  grad_out += bs_idx * c * npoints * nsample + c_idx * npoints * nsample +\n              pt_idx * nsample + sample_idx;\n  idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx;\n\n  atomicAdd(grad_points + bs_idx * c * n + c_idx * n + idx[0], grad_out[0]);\n}\n\nvoid group_points_grad_kernel_launcher(int b, int c, int n, int npoints,\n                                       int nsample, const float *grad_out,\n                                       const int *idx, float *grad_points,\n                                       cudaStream_t stream) {\n  // grad_out: (B, C, npoints, nsample)\n  // idx: (B, npoints, nsample)\n  // output:\n  //      grad_points: (B, C, N)\n  cudaError_t err;\n  dim3 blocks(DIVUP(npoints * nsample, THREADS_PER_BLOCK), c,\n              b);  // blockIdx.x(col), blockIdx.y(row)\n  dim3 threads(THREADS_PER_BLOCK);\n\n  group_points_grad_kernel<<<blocks, threads, 0, stream>>>(\n      b, c, n, npoints, nsample, grad_out, idx, grad_points);\n\n  err = cudaGetLastError();\n  if (cudaSuccess != err) {\n    fprintf(stderr, \"CUDA kernel failed : %s\\n\", cudaGetErrorString(err));\n    exit(-1);\n  }\n}\n\n__global__ void group_points_kernel(int b, int c, int n, int npoints,\n                                    int nsample,\n                                    const float *__restrict__ points,\n                                    const int *__restrict__ idx,\n                                    float *__restrict__ out) {\n  // points: (B, C, N)\n  // idx: (B, npoints, nsample)\n  // output:\n  //      out: (B, C, npoints, nsample)\n  int bs_idx = blockIdx.z;\n  int c_idx = blockIdx.y;\n  int index = blockIdx.x * blockDim.x + threadIdx.x;\n  int pt_idx = index / nsample;\n  if (bs_idx >= b || c_idx >= c || pt_idx >= npoints) return;\n\n  int sample_idx = index % nsample;\n\n  idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx;\n  int in_idx = bs_idx * c * n + c_idx * n + idx[0];\n  int out_idx = bs_idx * c * npoints * nsample + c_idx * npoints * nsample +\n                pt_idx * nsample + sample_idx;\n\n  out[out_idx] = points[in_idx];\n}\n\nvoid group_points_kernel_launcher(int b, int c, int n, int npoints, int nsample,\n                                  const float *points, const int *idx,\n                                  float *out, cudaStream_t stream) {\n  // points: (B, C, N)\n  // idx: (B, npoints, nsample)\n  // output:\n  //      out: (B, C, npoints, nsample)\n  cudaError_t err;\n  dim3 blocks(DIVUP(npoints * nsample, THREADS_PER_BLOCK), c,\n              b);  // blockIdx.x(col), blockIdx.y(row)\n  dim3 threads(THREADS_PER_BLOCK);\n\n  group_points_kernel<<<blocks, threads, 0, stream>>>(b, c, n, npoints, nsample,\n                                                      points, idx, out);\n  // cudaDeviceSynchronize();  // for using printf in kernel function\n  err = cudaGetLastError();\n  if (cudaSuccess != err) {\n    fprintf(stderr, \"CUDA kernel failed : %s\\n\", cudaGetErrorString(err));\n    exit(-1);\n  }\n}\n"
  },
  {
    "path": "mmdet3d/ops/interpolate/__init__.py",
    "content": "from .three_interpolate import three_interpolate\nfrom .three_nn import three_nn\n\n__all__ = ['three_nn', 'three_interpolate']\n"
  },
  {
    "path": "mmdet3d/ops/interpolate/src/interpolate.cpp",
    "content": "// Modified from\n// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/interpolate.cpp\n\n#include <THC/THC.h>\n#include <cuda.h>\n#include <cuda_runtime_api.h>\n#include <math.h>\n#include <stdio.h>\n#include <stdlib.h>\n#include <torch/extension.h>\n#include <torch/serialize/tensor.h>\n\n#include <vector>\n\nextern THCState *state;\n\nvoid three_nn_wrapper(int b, int n, int m, at::Tensor unknown_tensor,\n                      at::Tensor known_tensor, at::Tensor dist2_tensor,\n                      at::Tensor idx_tensor);\n\nvoid three_nn_kernel_launcher(int b, int n, int m, const float *unknown,\n                              const float *known, float *dist2, int *idx,\n                              cudaStream_t stream);\n\nvoid three_interpolate_wrapper(int b, int c, int m, int n,\n                               at::Tensor points_tensor, at::Tensor idx_tensor,\n                               at::Tensor weight_tensor, at::Tensor out_tensor);\n\nvoid three_interpolate_kernel_launcher(int b, int c, int m, int n,\n                                       const float *points, const int *idx,\n                                       const float *weight, float *out,\n                                       cudaStream_t stream);\n\nvoid three_interpolate_grad_wrapper(int b, int c, int n, int m,\n                                    at::Tensor grad_out_tensor,\n                                    at::Tensor idx_tensor,\n                                    at::Tensor weight_tensor,\n                                    at::Tensor grad_points_tensor);\n\nvoid three_interpolate_grad_kernel_launcher(int b, int c, int n, int m,\n                                            const float *grad_out,\n                                            const int *idx, const float *weight,\n                                            float *grad_points,\n                                            cudaStream_t stream);\n\nvoid three_nn_wrapper(int b, int n, int m, at::Tensor unknown_tensor,\n                      at::Tensor known_tensor, at::Tensor dist2_tensor,\n                      at::Tensor idx_tensor) {\n  const float *unknown = unknown_tensor.data_ptr<float>();\n  const float *known = known_tensor.data_ptr<float>();\n  float *dist2 = dist2_tensor.data_ptr<float>();\n  int *idx = idx_tensor.data_ptr<int>();\n\n  cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream();\n  three_nn_kernel_launcher(b, n, m, unknown, known, dist2, idx, stream);\n}\n\nvoid three_interpolate_wrapper(int b, int c, int m, int n,\n                               at::Tensor points_tensor, at::Tensor idx_tensor,\n                               at::Tensor weight_tensor,\n                               at::Tensor out_tensor) {\n  const float *points = points_tensor.data_ptr<float>();\n  const float *weight = weight_tensor.data_ptr<float>();\n  float *out = out_tensor.data_ptr<float>();\n  const int *idx = idx_tensor.data_ptr<int>();\n\n  cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream();\n  three_interpolate_kernel_launcher(b, c, m, n, points, idx, weight, out,\n                                    stream);\n}\n\nvoid three_interpolate_grad_wrapper(int b, int c, int n, int m,\n                                    at::Tensor grad_out_tensor,\n                                    at::Tensor idx_tensor,\n                                    at::Tensor weight_tensor,\n                                    at::Tensor grad_points_tensor) {\n  const float *grad_out = grad_out_tensor.data_ptr<float>();\n  const float *weight = weight_tensor.data_ptr<float>();\n  float *grad_points = grad_points_tensor.data_ptr<float>();\n  const int *idx = idx_tensor.data_ptr<int>();\n\n  cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream();\n  three_interpolate_grad_kernel_launcher(b, c, n, m, grad_out, idx, weight,\n                                         grad_points, stream);\n}\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n  m.def(\"three_nn_wrapper\", &three_nn_wrapper, \"three_nn_wrapper\");\n  m.def(\"three_interpolate_wrapper\", &three_interpolate_wrapper,\n        \"three_interpolate_wrapper\");\n  m.def(\"three_interpolate_grad_wrapper\", &three_interpolate_grad_wrapper,\n        \"three_interpolate_grad_wrapper\");\n}\n"
  },
  {
    "path": "mmdet3d/ops/interpolate/src/three_interpolate_cuda.cu",
    "content": "// Modified from\n// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/interpolate_gpu.cu\n\n#include <math.h>\n#include <stdio.h>\n#include <stdlib.h>\n\n#define THREADS_PER_BLOCK 256\n#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))\n\n__global__ void three_interpolate_kernel(int b, int c, int m, int n,\n                                         const float *__restrict__ points,\n                                         const int *__restrict__ idx,\n                                         const float *__restrict__ weight,\n                                         float *__restrict__ out) {\n  // points: (B, C, M)\n  // idx: (B, N, 3)\n  // weight: (B, N, 3)\n  // output:\n  //      out: (B, C, N)\n\n  int bs_idx = blockIdx.z;\n  int c_idx = blockIdx.y;\n  int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;\n\n  if (bs_idx >= b || c_idx >= c || pt_idx >= n) return;\n\n  weight += bs_idx * n * 3 + pt_idx * 3;\n  points += bs_idx * c * m + c_idx * m;\n  idx += bs_idx * n * 3 + pt_idx * 3;\n  out += bs_idx * c * n + c_idx * n;\n\n  out[pt_idx] = weight[0] * points[idx[0]] + weight[1] * points[idx[1]] +\n                weight[2] * points[idx[2]];\n}\n\nvoid three_interpolate_kernel_launcher(int b, int c, int m, int n,\n                                       const float *points, const int *idx,\n                                       const float *weight, float *out,\n                                       cudaStream_t stream) {\n  // points: (B, C, M)\n  // idx: (B, N, 3)\n  // weight: (B, N, 3)\n  // output:\n  //      out: (B, C, N)\n\n  cudaError_t err;\n  dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), c,\n              b);  // blockIdx.x(col), blockIdx.y(row)\n  dim3 threads(THREADS_PER_BLOCK);\n  three_interpolate_kernel<<<blocks, threads, 0, stream>>>(b, c, m, n, points,\n                                                           idx, weight, out);\n\n  err = cudaGetLastError();\n  if (cudaSuccess != err) {\n    fprintf(stderr, \"CUDA kernel failed : %s\\n\", cudaGetErrorString(err));\n    exit(-1);\n  }\n}\n\n__global__ void three_interpolate_grad_kernel(\n    int b, int c, int n, int m, const float *__restrict__ grad_out,\n    const int *__restrict__ idx, const float *__restrict__ weight,\n    float *__restrict__ grad_points) {\n  // grad_out: (B, C, N)\n  // weight: (B, N, 3)\n  // output:\n  //      grad_points: (B, C, M)\n\n  int bs_idx = blockIdx.z;\n  int c_idx = blockIdx.y;\n  int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;\n\n  if (bs_idx >= b || c_idx >= c || pt_idx >= n) return;\n\n  grad_out += bs_idx * c * n + c_idx * n + pt_idx;\n  weight += bs_idx * n * 3 + pt_idx * 3;\n  grad_points += bs_idx * c * m + c_idx * m;\n  idx += bs_idx * n * 3 + pt_idx * 3;\n\n  atomicAdd(grad_points + idx[0], grad_out[0] * weight[0]);\n  atomicAdd(grad_points + idx[1], grad_out[0] * weight[1]);\n  atomicAdd(grad_points + idx[2], grad_out[0] * weight[2]);\n}\n\nvoid three_interpolate_grad_kernel_launcher(int b, int c, int n, int m,\n                                            const float *grad_out,\n                                            const int *idx, const float *weight,\n                                            float *grad_points,\n                                            cudaStream_t stream) {\n  // grad_out: (B, C, N)\n  // weight: (B, N, 3)\n  // output:\n  //      grad_points: (B, C, M)\n\n  cudaError_t err;\n  dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), c,\n              b);  // blockIdx.x(col), blockIdx.y(row)\n  dim3 threads(THREADS_PER_BLOCK);\n  three_interpolate_grad_kernel<<<blocks, threads, 0, stream>>>(\n      b, c, n, m, grad_out, idx, weight, grad_points);\n\n  err = cudaGetLastError();\n  if (cudaSuccess != err) {\n    fprintf(stderr, \"CUDA kernel failed : %s\\n\", cudaGetErrorString(err));\n    exit(-1);\n  }\n}\n"
  },
  {
    "path": "mmdet3d/ops/interpolate/src/three_nn_cuda.cu",
    "content": "// Modified from\n// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/interpolate_gpu.cu\n\n#include <math.h>\n#include <stdio.h>\n#include <stdlib.h>\n\n#define THREADS_PER_BLOCK 256\n#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))\n\n__global__ void three_nn_kernel(int b, int n, int m,\n                                const float *__restrict__ unknown,\n                                const float *__restrict__ known,\n                                float *__restrict__ dist2,\n                                int *__restrict__ idx) {\n  // unknown: (B, N, 3)\n  // known: (B, M, 3)\n  // output:\n  //      dist2: (B, N, 3)\n  //      idx: (B, N, 3)\n\n  int bs_idx = blockIdx.y;\n  int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;\n  if (bs_idx >= b || pt_idx >= n) return;\n\n  unknown += bs_idx * n * 3 + pt_idx * 3;\n  known += bs_idx * m * 3;\n  dist2 += bs_idx * n * 3 + pt_idx * 3;\n  idx += bs_idx * n * 3 + pt_idx * 3;\n\n  float ux = unknown[0];\n  float uy = unknown[1];\n  float uz = unknown[2];\n\n  double best1 = 1e40, best2 = 1e40, best3 = 1e40;\n  int besti1 = 0, besti2 = 0, besti3 = 0;\n  for (int k = 0; k < m; ++k) {\n    float x = known[k * 3 + 0];\n    float y = known[k * 3 + 1];\n    float z = known[k * 3 + 2];\n    float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z);\n    if (d < best1) {\n      best3 = best2;\n      besti3 = besti2;\n      best2 = best1;\n      besti2 = besti1;\n      best1 = d;\n      besti1 = k;\n    } else if (d < best2) {\n      best3 = best2;\n      besti3 = besti2;\n      best2 = d;\n      besti2 = k;\n    } else if (d < best3) {\n      best3 = d;\n      besti3 = k;\n    }\n  }\n  dist2[0] = best1;\n  dist2[1] = best2;\n  dist2[2] = best3;\n  idx[0] = besti1;\n  idx[1] = besti2;\n  idx[2] = besti3;\n}\n\nvoid three_nn_kernel_launcher(int b, int n, int m, const float *unknown,\n                              const float *known, float *dist2, int *idx,\n                              cudaStream_t stream) {\n  // unknown: (B, N, 3)\n  // known: (B, M, 3)\n  // output:\n  //      dist2: (B, N, 3)\n  //      idx: (B, N, 3)\n\n  cudaError_t err;\n  dim3 blocks(DIVUP(n, THREADS_PER_BLOCK),\n              b);  // blockIdx.x(col), blockIdx.y(row)\n  dim3 threads(THREADS_PER_BLOCK);\n\n  three_nn_kernel<<<blocks, threads, 0, stream>>>(b, n, m, unknown, known,\n                                                  dist2, idx);\n\n  err = cudaGetLastError();\n  if (cudaSuccess != err) {\n    fprintf(stderr, \"CUDA kernel failed : %s\\n\", cudaGetErrorString(err));\n    exit(-1);\n  }\n}\n"
  },
  {
    "path": "mmdet3d/ops/interpolate/three_interpolate.py",
    "content": "import torch\nfrom torch.autograd import Function\nfrom typing import Tuple\n\nfrom . import interpolate_ext\n\n\nclass ThreeInterpolate(Function):\n\n    @staticmethod\n    def forward(ctx, features: torch.Tensor, indices: torch.Tensor,\n                weight: torch.Tensor) -> torch.Tensor:\n        \"\"\"Performs weighted linear interpolation on 3 features.\n\n        Args:\n            features (Tensor): (B, C, M) Features descriptors to be\n                interpolated from\n            indices (Tensor): (B, n, 3) index three nearest neighbors\n                of the target features in features\n            weight (Tensor): (B, n, 3) weights of interpolation\n\n        Returns:\n            Tensor: (B, C, N) tensor of the interpolated features\n        \"\"\"\n        assert features.is_contiguous()\n        assert indices.is_contiguous()\n        assert weight.is_contiguous()\n\n        B, c, m = features.size()\n        n = indices.size(1)\n        ctx.three_interpolate_for_backward = (indices, weight, m)\n        output = torch.cuda.FloatTensor(B, c, n)\n\n        interpolate_ext.three_interpolate_wrapper(B, c, m, n, features,\n                                                  indices, weight, output)\n        return output\n\n    @staticmethod\n    def backward(\n        ctx, grad_out: torch.Tensor\n    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:\n        \"\"\"Backward of three interpolate.\n\n        Args:\n            grad_out (Tensor): (B, C, N) tensor with gradients of outputs\n\n        Returns:\n            Tensor: (B, C, M) tensor with gradients of features\n        \"\"\"\n        idx, weight, m = ctx.three_interpolate_for_backward\n        B, c, n = grad_out.size()\n\n        grad_features = torch.cuda.FloatTensor(B, c, m).zero_()\n        grad_out_data = grad_out.data.contiguous()\n\n        interpolate_ext.three_interpolate_grad_wrapper(B, c, n, m,\n                                                       grad_out_data, idx,\n                                                       weight,\n                                                       grad_features.data)\n        return grad_features, None, None\n\n\nthree_interpolate = ThreeInterpolate.apply\n"
  },
  {
    "path": "mmdet3d/ops/interpolate/three_nn.py",
    "content": "import torch\nfrom torch.autograd import Function\nfrom typing import Tuple\n\nfrom . import interpolate_ext\n\n\nclass ThreeNN(Function):\n\n    @staticmethod\n    def forward(ctx, target: torch.Tensor,\n                source: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:\n        \"\"\"Find the top-3 nearest neighbors of the target set from the source\n        set.\n\n        Args:\n            target (Tensor): shape (B, N, 3), points set that needs to\n                find the nearest neighbors.\n            source (Tensor): shape (B, M, 3), points set that is used\n                to find the nearest neighbors of points in target set.\n\n        Returns:\n            Tensor: shape (B, N, 3), L2 distance of each point in target\n                set to their corresponding nearest neighbors.\n        \"\"\"\n        assert target.is_contiguous()\n        assert source.is_contiguous()\n\n        B, N, _ = target.size()\n        m = source.size(1)\n        dist2 = torch.cuda.FloatTensor(B, N, 3)\n        idx = torch.cuda.IntTensor(B, N, 3)\n\n        interpolate_ext.three_nn_wrapper(B, N, m, target, source, dist2, idx)\n\n        ctx.mark_non_differentiable(idx)\n\n        return torch.sqrt(dist2), idx\n\n    @staticmethod\n    def backward(ctx, a=None, b=None):\n        return None, None\n\n\nthree_nn = ThreeNN.apply\n"
  },
  {
    "path": "mmdet3d/ops/iou3d/__init__.py",
    "content": "from .iou3d_utils import boxes_iou_bev, nms_gpu, nms_normal_gpu\n\n__all__ = ['boxes_iou_bev', 'nms_gpu', 'nms_normal_gpu']\n"
  },
  {
    "path": "mmdet3d/ops/iou3d/iou3d_utils.py",
    "content": "import torch\n\nfrom . import iou3d_cuda\n\n\ndef boxes_iou_bev(boxes_a, boxes_b):\n    \"\"\"Calculate boxes IoU in the bird view.\n\n    Args:\n        boxes_a (torch.Tensor): Input boxes a with shape (M, 5).\n        boxes_b (torch.Tensor): Input boxes b with shape (N, 5).\n\n    Returns:\n        ans_iou (torch.Tensor): IoU result with shape (M, N).\n    \"\"\"\n    ans_iou = boxes_a.new_zeros(\n        torch.Size((boxes_a.shape[0], boxes_b.shape[0])))\n\n    iou3d_cuda.boxes_iou_bev_gpu(boxes_a.contiguous(), boxes_b.contiguous(),\n                                 ans_iou)\n\n    return ans_iou\n\n\ndef nms_gpu(boxes, scores, thresh, pre_maxsize=None, post_max_size=None):\n    \"\"\"Nms function with gpu implementation.\n\n    Args:\n        boxes (torch.Tensor): Input boxes with the shape of [N, 5]\n            ([x1, y1, x2, y2, ry]).\n        scores (torch.Tensor): Scores of boxes with the shape of [N].\n        thresh (int): Threshold.\n        pre_maxsize (int): Max size of boxes before nms. Default: None.\n        post_maxsize (int): Max size of boxes after nms. Default: None.\n\n    Returns:\n        torch.Tensor: Indexes after nms.\n    \"\"\"\n    order = scores.sort(0, descending=True)[1]\n\n    if pre_maxsize is not None:\n        order = order[:pre_maxsize]\n    boxes = boxes[order].contiguous()\n\n    keep = torch.zeros(boxes.size(0), dtype=torch.long)\n    num_out = iou3d_cuda.nms_gpu(boxes, keep, thresh, boxes.device.index)\n    keep = order[keep[:num_out].cuda(boxes.device)].contiguous()\n    if post_max_size is not None:\n        keep = keep[:post_max_size]\n    return keep\n\n\ndef nms_normal_gpu(boxes, scores, thresh):\n    \"\"\"Normal non maximum suppression on GPU.\n\n    Args:\n        boxes (torch.Tensor): Input boxes with shape (N, 5).\n        scores (torch.Tensor): Scores of predicted boxes with shape (N).\n        thresh (torch.Tensor): Threshold of non maximum suppression.\n\n    Returns:\n        torch.Tensor: Remaining indices with scores in descending order.\n    \"\"\"\n    order = scores.sort(0, descending=True)[1]\n\n    boxes = boxes[order].contiguous()\n\n    keep = torch.zeros(boxes.size(0), dtype=torch.long)\n    num_out = iou3d_cuda.nms_normal_gpu(boxes, keep, thresh,\n                                        boxes.device.index)\n    return order[keep[:num_out].cuda(boxes.device)].contiguous()\n"
  },
  {
    "path": "mmdet3d/ops/iou3d/src/iou3d.cpp",
    "content": "// Modified from\n// https://github.com/open-mmlab/OpenPCDet/blob/master/pcdet/ops/iou3d_nms/src/iou3d_nms.cpp\n\n/*\n3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others)\nWritten by Shaoshuai Shi\nAll Rights Reserved 2019-2020.\n*/\n\n#include <cuda.h>\n#include <cuda_runtime_api.h>\n#include <torch/extension.h>\n#include <torch/serialize/tensor.h>\n\n#include <vector>\n\n#define CHECK_CUDA(x) \\\n  TORCH_CHECK(x.device().is_cuda(), #x, \" must be a CUDAtensor \")\n#define CHECK_CONTIGUOUS(x) \\\n  TORCH_CHECK(x.is_contiguous(), #x, \" must be contiguous \")\n#define CHECK_INPUT(x) \\\n  CHECK_CUDA(x);       \\\n  CHECK_CONTIGUOUS(x)\n\n#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))\n\n#define CHECK_ERROR(ans) \\\n  { gpuAssert((ans), __FILE__, __LINE__); }\ninline void gpuAssert(cudaError_t code, const char *file, int line,\n                      bool abort = true) {\n  if (code != cudaSuccess) {\n    fprintf(stderr, \"GPUassert: %s %s %d\\n\", cudaGetErrorString(code), file,\n            line);\n    if (abort) exit(code);\n  }\n}\n\nconst int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8;\n\nvoid boxesoverlapLauncher(const int num_a, const float *boxes_a,\n                          const int num_b, const float *boxes_b,\n                          float *ans_overlap);\nvoid boxesioubevLauncher(const int num_a, const float *boxes_a, const int num_b,\n                         const float *boxes_b, float *ans_iou);\nvoid nmsLauncher(const float *boxes, unsigned long long *mask, int boxes_num,\n                 float nms_overlap_thresh);\nvoid nmsNormalLauncher(const float *boxes, unsigned long long *mask,\n                       int boxes_num, float nms_overlap_thresh);\n\nint boxes_overlap_bev_gpu(at::Tensor boxes_a, at::Tensor boxes_b,\n                          at::Tensor ans_overlap) {\n  // params boxes_a: (N, 5) [x1, y1, x2, y2, ry]\n  // params boxes_b: (M, 5)\n  // params ans_overlap: (N, M)\n\n  CHECK_INPUT(boxes_a);\n  CHECK_INPUT(boxes_b);\n  CHECK_INPUT(ans_overlap);\n\n  int num_a = boxes_a.size(0);\n  int num_b = boxes_b.size(0);\n\n  const float *boxes_a_data = boxes_a.data_ptr<float>();\n  const float *boxes_b_data = boxes_b.data_ptr<float>();\n  float *ans_overlap_data = ans_overlap.data_ptr<float>();\n\n  boxesoverlapLauncher(num_a, boxes_a_data, num_b, boxes_b_data,\n                       ans_overlap_data);\n\n  return 1;\n}\n\nint boxes_iou_bev_gpu(at::Tensor boxes_a, at::Tensor boxes_b,\n                      at::Tensor ans_iou) {\n  // params boxes_a: (N, 5) [x1, y1, x2, y2, ry]\n  // params boxes_b: (M, 5)\n  // params ans_overlap: (N, M)\n\n  CHECK_INPUT(boxes_a);\n  CHECK_INPUT(boxes_b);\n  CHECK_INPUT(ans_iou);\n\n  int num_a = boxes_a.size(0);\n  int num_b = boxes_b.size(0);\n\n  const float *boxes_a_data = boxes_a.data_ptr<float>();\n  const float *boxes_b_data = boxes_b.data_ptr<float>();\n  float *ans_iou_data = ans_iou.data_ptr<float>();\n\n  boxesioubevLauncher(num_a, boxes_a_data, num_b, boxes_b_data, ans_iou_data);\n\n  return 1;\n}\n\nint nms_gpu(at::Tensor boxes, at::Tensor keep,\n\t    float nms_overlap_thresh, int device_id) {\n  // params boxes: (N, 5) [x1, y1, x2, y2, ry]\n  // params keep: (N)\n\n  CHECK_INPUT(boxes);\n  CHECK_CONTIGUOUS(keep);\n  cudaSetDevice(device_id);\n\n  int boxes_num = boxes.size(0);\n  const float *boxes_data = boxes.data_ptr<float>();\n  long *keep_data = keep.data_ptr<long>();\n\n  const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS);\n\n  unsigned long long *mask_data = NULL;\n  CHECK_ERROR(cudaMalloc((void **)&mask_data,\n                         boxes_num * col_blocks * sizeof(unsigned long long)));\n  nmsLauncher(boxes_data, mask_data, boxes_num, nms_overlap_thresh);\n\n  // unsigned long long mask_cpu[boxes_num * col_blocks];\n  // unsigned long long *mask_cpu = new unsigned long long [boxes_num *\n  // col_blocks];\n  std::vector<unsigned long long> mask_cpu(boxes_num * col_blocks);\n\n  //    printf(\"boxes_num=%d, col_blocks=%d\\n\", boxes_num, col_blocks);\n  CHECK_ERROR(cudaMemcpy(&mask_cpu[0], mask_data,\n                         boxes_num * col_blocks * sizeof(unsigned long long),\n                         cudaMemcpyDeviceToHost));\n\n  cudaFree(mask_data);\n\n  unsigned long long remv_cpu[col_blocks];\n  memset(remv_cpu, 0, col_blocks * sizeof(unsigned long long));\n\n  int num_to_keep = 0;\n\n  for (int i = 0; i < boxes_num; i++) {\n    int nblock = i / THREADS_PER_BLOCK_NMS;\n    int inblock = i % THREADS_PER_BLOCK_NMS;\n\n    if (!(remv_cpu[nblock] & (1ULL << inblock))) {\n      keep_data[num_to_keep++] = i;\n      unsigned long long *p = &mask_cpu[0] + i * col_blocks;\n      for (int j = nblock; j < col_blocks; j++) {\n        remv_cpu[j] |= p[j];\n      }\n    }\n  }\n  if (cudaSuccess != cudaGetLastError()) printf(\"Error!\\n\");\n\n  return num_to_keep;\n}\n\nint nms_normal_gpu(at::Tensor boxes, at::Tensor keep,\n                   float nms_overlap_thresh, int device_id) {\n  // params boxes: (N, 5) [x1, y1, x2, y2, ry]\n  // params keep: (N)\n\n  CHECK_INPUT(boxes);\n  CHECK_CONTIGUOUS(keep);\n  cudaSetDevice(device_id);\n\n  int boxes_num = boxes.size(0);\n  const float *boxes_data = boxes.data_ptr<float>();\n  long *keep_data = keep.data_ptr<long>();\n\n  const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS);\n\n  unsigned long long *mask_data = NULL;\n  CHECK_ERROR(cudaMalloc((void **)&mask_data,\n                         boxes_num * col_blocks * sizeof(unsigned long long)));\n  nmsNormalLauncher(boxes_data, mask_data, boxes_num, nms_overlap_thresh);\n\n  // unsigned long long mask_cpu[boxes_num * col_blocks];\n  // unsigned long long *mask_cpu = new unsigned long long [boxes_num *\n  // col_blocks];\n  std::vector<unsigned long long> mask_cpu(boxes_num * col_blocks);\n\n  //    printf(\"boxes_num=%d, col_blocks=%d\\n\", boxes_num, col_blocks);\n  CHECK_ERROR(cudaMemcpy(&mask_cpu[0], mask_data,\n                         boxes_num * col_blocks * sizeof(unsigned long long),\n                         cudaMemcpyDeviceToHost));\n\n  cudaFree(mask_data);\n\n  unsigned long long remv_cpu[col_blocks];\n  memset(remv_cpu, 0, col_blocks * sizeof(unsigned long long));\n\n  int num_to_keep = 0;\n\n  for (int i = 0; i < boxes_num; i++) {\n    int nblock = i / THREADS_PER_BLOCK_NMS;\n    int inblock = i % THREADS_PER_BLOCK_NMS;\n\n    if (!(remv_cpu[nblock] & (1ULL << inblock))) {\n      keep_data[num_to_keep++] = i;\n      unsigned long long *p = &mask_cpu[0] + i * col_blocks;\n      for (int j = nblock; j < col_blocks; j++) {\n        remv_cpu[j] |= p[j];\n      }\n    }\n  }\n  if (cudaSuccess != cudaGetLastError()) printf(\"Error!\\n\");\n\n  return num_to_keep;\n}\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n  m.def(\"boxes_overlap_bev_gpu\", &boxes_overlap_bev_gpu,\n        \"oriented boxes overlap\");\n  m.def(\"boxes_iou_bev_gpu\", &boxes_iou_bev_gpu, \"oriented boxes iou\");\n  m.def(\"nms_gpu\", &nms_gpu, \"oriented nms gpu\");\n  m.def(\"nms_normal_gpu\", &nms_normal_gpu, \"nms gpu\");\n}\n"
  },
  {
    "path": "mmdet3d/ops/iou3d/src/iou3d_kernel.cu",
    "content": "// Modified from\n// https://github.com/open-mmlab/OpenPCDet/blob/master/pcdet/ops/iou3d_nms/src/iou3d_nms_kernel.cu\n\n/*\n3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others)\nWritten by Shaoshuai Shi\nAll Rights Reserved 2019-2020.\n*/\n\n#include <stdio.h>\n#define THREADS_PER_BLOCK 16\n#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))\n\n//#define DEBUG\nconst int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8;\nconst float EPS = 1e-8;\nstruct Point {\n  float x, y;\n  __device__ Point() {}\n  __device__ Point(double _x, double _y) { x = _x, y = _y; }\n\n  __device__ void set(float _x, float _y) {\n    x = _x;\n    y = _y;\n  }\n\n  __device__ Point operator+(const Point &b) const {\n    return Point(x + b.x, y + b.y);\n  }\n\n  __device__ Point operator-(const Point &b) const {\n    return Point(x - b.x, y - b.y);\n  }\n};\n\n__device__ inline float cross(const Point &a, const Point &b) {\n  return a.x * b.y - a.y * b.x;\n}\n\n__device__ inline float cross(const Point &p1, const Point &p2,\n                              const Point &p0) {\n  return (p1.x - p0.x) * (p2.y - p0.y) - (p2.x - p0.x) * (p1.y - p0.y);\n}\n\n__device__ int check_rect_cross(const Point &p1, const Point &p2,\n                                const Point &q1, const Point &q2) {\n  int ret = min(p1.x, p2.x) <= max(q1.x, q2.x) &&\n            min(q1.x, q2.x) <= max(p1.x, p2.x) &&\n            min(p1.y, p2.y) <= max(q1.y, q2.y) &&\n            min(q1.y, q2.y) <= max(p1.y, p2.y);\n  return ret;\n}\n\n__device__ inline int check_in_box2d(const float *box, const Point &p) {\n  // params: box (5) [x1, y1, x2, y2, angle]\n  const float MARGIN = 1e-5;\n\n  float center_x = (box[0] + box[2]) / 2;\n  float center_y = (box[1] + box[3]) / 2;\n  float angle_cos = cos(-box[4]),\n        angle_sin =\n            sin(-box[4]);  // rotate the point in the opposite direction of box\n  float rot_x =\n      (p.x - center_x) * angle_cos + (p.y - center_y) * angle_sin + center_x;\n  float rot_y =\n      -(p.x - center_x) * angle_sin + (p.y - center_y) * angle_cos + center_y;\n#ifdef DEBUG\n  printf(\"box: (%.3f, %.3f, %.3f, %.3f, %.3f)\\n\", box[0], box[1], box[2],\n         box[3], box[4]);\n  printf(\n      \"center: (%.3f, %.3f), cossin(%.3f, %.3f), src(%.3f, %.3f), rot(%.3f, \"\n      \"%.3f)\\n\",\n      center_x, center_y, angle_cos, angle_sin, p.x, p.y, rot_x, rot_y);\n#endif\n  return (rot_x > box[0] - MARGIN && rot_x < box[2] + MARGIN &&\n          rot_y > box[1] - MARGIN && rot_y < box[3] + MARGIN);\n}\n\n__device__ inline int intersection(const Point &p1, const Point &p0,\n                                   const Point &q1, const Point &q0,\n                                   Point &ans) {\n  // fast exclusion\n  if (check_rect_cross(p0, p1, q0, q1) == 0) return 0;\n\n  // check cross standing\n  float s1 = cross(q0, p1, p0);\n  float s2 = cross(p1, q1, p0);\n  float s3 = cross(p0, q1, q0);\n  float s4 = cross(q1, p1, q0);\n\n  if (!(s1 * s2 > 0 && s3 * s4 > 0)) return 0;\n\n  // calculate intersection of two lines\n  float s5 = cross(q1, p1, p0);\n  if (fabs(s5 - s1) > EPS) {\n    ans.x = (s5 * q0.x - s1 * q1.x) / (s5 - s1);\n    ans.y = (s5 * q0.y - s1 * q1.y) / (s5 - s1);\n\n  } else {\n    float a0 = p0.y - p1.y, b0 = p1.x - p0.x, c0 = p0.x * p1.y - p1.x * p0.y;\n    float a1 = q0.y - q1.y, b1 = q1.x - q0.x, c1 = q0.x * q1.y - q1.x * q0.y;\n    float D = a0 * b1 - a1 * b0;\n\n    ans.x = (b0 * c1 - b1 * c0) / D;\n    ans.y = (a1 * c0 - a0 * c1) / D;\n  }\n\n  return 1;\n}\n\n__device__ inline void rotate_around_center(const Point &center,\n                                            const float angle_cos,\n                                            const float angle_sin, Point &p) {\n  float new_x =\n      (p.x - center.x) * angle_cos + (p.y - center.y) * angle_sin + center.x;\n  float new_y =\n      -(p.x - center.x) * angle_sin + (p.y - center.y) * angle_cos + center.y;\n  p.set(new_x, new_y);\n}\n\n__device__ inline int point_cmp(const Point &a, const Point &b,\n                                const Point &center) {\n  return atan2(a.y - center.y, a.x - center.x) >\n         atan2(b.y - center.y, b.x - center.x);\n}\n\n__device__ inline float box_overlap(const float *box_a, const float *box_b) {\n  // params: box_a (5) [x1, y1, x2, y2, angle]\n  // params: box_b (5) [x1, y1, x2, y2, angle]\n\n  float a_x1 = box_a[0], a_y1 = box_a[1], a_x2 = box_a[2], a_y2 = box_a[3],\n        a_angle = box_a[4];\n  float b_x1 = box_b[0], b_y1 = box_b[1], b_x2 = box_b[2], b_y2 = box_b[3],\n        b_angle = box_b[4];\n\n  Point center_a((a_x1 + a_x2) / 2, (a_y1 + a_y2) / 2);\n  Point center_b((b_x1 + b_x2) / 2, (b_y1 + b_y2) / 2);\n#ifdef DEBUG\n  printf(\n      \"a: (%.3f, %.3f, %.3f, %.3f, %.3f), b: (%.3f, %.3f, %.3f, %.3f, %.3f)\\n\",\n      a_x1, a_y1, a_x2, a_y2, a_angle, b_x1, b_y1, b_x2, b_y2, b_angle);\n  printf(\"center a: (%.3f, %.3f), b: (%.3f, %.3f)\\n\", center_a.x, center_a.y,\n         center_b.x, center_b.y);\n#endif\n\n  Point box_a_corners[5];\n  box_a_corners[0].set(a_x1, a_y1);\n  box_a_corners[1].set(a_x2, a_y1);\n  box_a_corners[2].set(a_x2, a_y2);\n  box_a_corners[3].set(a_x1, a_y2);\n\n  Point box_b_corners[5];\n  box_b_corners[0].set(b_x1, b_y1);\n  box_b_corners[1].set(b_x2, b_y1);\n  box_b_corners[2].set(b_x2, b_y2);\n  box_b_corners[3].set(b_x1, b_y2);\n\n  // get oriented corners\n  float a_angle_cos = cos(a_angle), a_angle_sin = sin(a_angle);\n  float b_angle_cos = cos(b_angle), b_angle_sin = sin(b_angle);\n\n  for (int k = 0; k < 4; k++) {\n#ifdef DEBUG\n    printf(\"before corner %d: a(%.3f, %.3f), b(%.3f, %.3f) \\n\", k,\n           box_a_corners[k].x, box_a_corners[k].y, box_b_corners[k].x,\n           box_b_corners[k].y);\n#endif\n    rotate_around_center(center_a, a_angle_cos, a_angle_sin, box_a_corners[k]);\n    rotate_around_center(center_b, b_angle_cos, b_angle_sin, box_b_corners[k]);\n#ifdef DEBUG\n    printf(\"corner %d: a(%.3f, %.3f), b(%.3f, %.3f) \\n\", k, box_a_corners[k].x,\n           box_a_corners[k].y, box_b_corners[k].x, box_b_corners[k].y);\n#endif\n  }\n\n  box_a_corners[4] = box_a_corners[0];\n  box_b_corners[4] = box_b_corners[0];\n\n  // get intersection of lines\n  Point cross_points[16];\n  Point poly_center;\n  int cnt = 0, flag = 0;\n\n  poly_center.set(0, 0);\n  for (int i = 0; i < 4; i++) {\n    for (int j = 0; j < 4; j++) {\n      flag = intersection(box_a_corners[i + 1], box_a_corners[i],\n                          box_b_corners[j + 1], box_b_corners[j],\n                          cross_points[cnt]);\n      if (flag) {\n        poly_center = poly_center + cross_points[cnt];\n        cnt++;\n      }\n    }\n  }\n\n  // check corners\n  for (int k = 0; k < 4; k++) {\n    if (check_in_box2d(box_a, box_b_corners[k])) {\n      poly_center = poly_center + box_b_corners[k];\n      cross_points[cnt] = box_b_corners[k];\n      cnt++;\n    }\n    if (check_in_box2d(box_b, box_a_corners[k])) {\n      poly_center = poly_center + box_a_corners[k];\n      cross_points[cnt] = box_a_corners[k];\n      cnt++;\n    }\n  }\n\n  poly_center.x /= cnt;\n  poly_center.y /= cnt;\n\n  // sort the points of polygon\n  Point temp;\n  for (int j = 0; j < cnt - 1; j++) {\n    for (int i = 0; i < cnt - j - 1; i++) {\n      if (point_cmp(cross_points[i], cross_points[i + 1], poly_center)) {\n        temp = cross_points[i];\n        cross_points[i] = cross_points[i + 1];\n        cross_points[i + 1] = temp;\n      }\n    }\n  }\n\n#ifdef DEBUG\n  printf(\"cnt=%d\\n\", cnt);\n  for (int i = 0; i < cnt; i++) {\n    printf(\"All cross point %d: (%.3f, %.3f)\\n\", i, cross_points[i].x,\n           cross_points[i].y);\n  }\n#endif\n\n  // get the overlap areas\n  float area = 0;\n  for (int k = 0; k < cnt - 1; k++) {\n    area += cross(cross_points[k] - cross_points[0],\n                  cross_points[k + 1] - cross_points[0]);\n  }\n\n  return fabs(area) / 2.0;\n}\n\n__device__ inline float iou_bev(const float *box_a, const float *box_b) {\n  // params: box_a (5) [x1, y1, x2, y2, angle]\n  // params: box_b (5) [x1, y1, x2, y2, angle]\n  float sa = (box_a[2] - box_a[0]) * (box_a[3] - box_a[1]);\n  float sb = (box_b[2] - box_b[0]) * (box_b[3] - box_b[1]);\n  float s_overlap = box_overlap(box_a, box_b);\n  return s_overlap / fmaxf(sa + sb - s_overlap, EPS);\n}\n\n__global__ void boxes_overlap_kernel(const int num_a, const float *boxes_a,\n                                     const int num_b, const float *boxes_b,\n                                     float *ans_overlap) {\n  const int a_idx = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y;\n  const int b_idx = blockIdx.x * THREADS_PER_BLOCK + threadIdx.x;\n\n  if (a_idx >= num_a || b_idx >= num_b) {\n    return;\n  }\n  const float *cur_box_a = boxes_a + a_idx * 5;\n  const float *cur_box_b = boxes_b + b_idx * 5;\n  float s_overlap = box_overlap(cur_box_a, cur_box_b);\n  ans_overlap[a_idx * num_b + b_idx] = s_overlap;\n}\n\n__global__ void boxes_iou_bev_kernel(const int num_a, const float *boxes_a,\n                                     const int num_b, const float *boxes_b,\n                                     float *ans_iou) {\n  const int a_idx = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y;\n  const int b_idx = blockIdx.x * THREADS_PER_BLOCK + threadIdx.x;\n\n  if (a_idx >= num_a || b_idx >= num_b) {\n    return;\n  }\n\n  const float *cur_box_a = boxes_a + a_idx * 5;\n  const float *cur_box_b = boxes_b + b_idx * 5;\n  float cur_iou_bev = iou_bev(cur_box_a, cur_box_b);\n  ans_iou[a_idx * num_b + b_idx] = cur_iou_bev;\n}\n\n__global__ void nms_kernel(const int boxes_num, const float nms_overlap_thresh,\n                           const float *boxes, unsigned long long *mask) {\n  // params: boxes (N, 5) [x1, y1, x2, y2, ry]\n  // params: mask (N, N/THREADS_PER_BLOCK_NMS)\n\n  const int row_start = blockIdx.y;\n  const int col_start = blockIdx.x;\n\n  // if (row_start > col_start) return;\n\n  const int row_size = fminf(boxes_num - row_start * THREADS_PER_BLOCK_NMS,\n                             THREADS_PER_BLOCK_NMS);\n  const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS,\n                             THREADS_PER_BLOCK_NMS);\n\n  __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 5];\n\n  if (threadIdx.x < col_size) {\n    block_boxes[threadIdx.x * 5 + 0] =\n        boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 0];\n    block_boxes[threadIdx.x * 5 + 1] =\n        boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 1];\n    block_boxes[threadIdx.x * 5 + 2] =\n        boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 2];\n    block_boxes[threadIdx.x * 5 + 3] =\n        boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 3];\n    block_boxes[threadIdx.x * 5 + 4] =\n        boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 4];\n  }\n  __syncthreads();\n\n  if (threadIdx.x < row_size) {\n    const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x;\n    const float *cur_box = boxes + cur_box_idx * 5;\n\n    int i = 0;\n    unsigned long long t = 0;\n    int start = 0;\n    if (row_start == col_start) {\n      start = threadIdx.x + 1;\n    }\n    for (i = start; i < col_size; i++) {\n      if (iou_bev(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {\n        t |= 1ULL << i;\n      }\n    }\n    const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS);\n    mask[cur_box_idx * col_blocks + col_start] = t;\n  }\n}\n\n__device__ inline float iou_normal(float const *const a, float const *const b) {\n  float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]);\n  float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]);\n  float width = fmaxf(right - left, 0.f), height = fmaxf(bottom - top, 0.f);\n  float interS = width * height;\n  float Sa = (a[2] - a[0]) * (a[3] - a[1]);\n  float Sb = (b[2] - b[0]) * (b[3] - b[1]);\n  return interS / fmaxf(Sa + Sb - interS, EPS);\n}\n\n__global__ void nms_normal_kernel(const int boxes_num,\n                                  const float nms_overlap_thresh,\n                                  const float *boxes,\n                                  unsigned long long *mask) {\n  // params: boxes (N, 5) [x1, y1, x2, y2, ry]\n  // params: mask (N, N/THREADS_PER_BLOCK_NMS)\n\n  const int row_start = blockIdx.y;\n  const int col_start = blockIdx.x;\n\n  // if (row_start > col_start) return;\n\n  const int row_size = fminf(boxes_num - row_start * THREADS_PER_BLOCK_NMS,\n                             THREADS_PER_BLOCK_NMS);\n  const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS,\n                             THREADS_PER_BLOCK_NMS);\n\n  __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 5];\n\n  if (threadIdx.x < col_size) {\n    block_boxes[threadIdx.x * 5 + 0] =\n        boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 0];\n    block_boxes[threadIdx.x * 5 + 1] =\n        boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 1];\n    block_boxes[threadIdx.x * 5 + 2] =\n        boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 2];\n    block_boxes[threadIdx.x * 5 + 3] =\n        boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 3];\n    block_boxes[threadIdx.x * 5 + 4] =\n        boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 4];\n  }\n  __syncthreads();\n\n  if (threadIdx.x < row_size) {\n    const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x;\n    const float *cur_box = boxes + cur_box_idx * 5;\n\n    int i = 0;\n    unsigned long long t = 0;\n    int start = 0;\n    if (row_start == col_start) {\n      start = threadIdx.x + 1;\n    }\n    for (i = start; i < col_size; i++) {\n      if (iou_normal(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {\n        t |= 1ULL << i;\n      }\n    }\n    const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS);\n    mask[cur_box_idx * col_blocks + col_start] = t;\n  }\n}\n\nvoid boxesoverlapLauncher(const int num_a, const float *boxes_a,\n                          const int num_b, const float *boxes_b,\n                          float *ans_overlap) {\n  dim3 blocks(\n      DIVUP(num_b, THREADS_PER_BLOCK),\n      DIVUP(num_a, THREADS_PER_BLOCK));  // blockIdx.x(col), blockIdx.y(row)\n  dim3 threads(THREADS_PER_BLOCK, THREADS_PER_BLOCK);\n\n  boxes_overlap_kernel<<<blocks, threads>>>(num_a, boxes_a, num_b, boxes_b,\n                                            ans_overlap);\n#ifdef DEBUG\n  cudaDeviceSynchronize();  // for using printf in kernel function\n#endif\n}\n\nvoid boxesioubevLauncher(const int num_a, const float *boxes_a, const int num_b,\n                         const float *boxes_b, float *ans_iou) {\n  dim3 blocks(\n      DIVUP(num_b, THREADS_PER_BLOCK),\n      DIVUP(num_a, THREADS_PER_BLOCK));  // blockIdx.x(col), blockIdx.y(row)\n  dim3 threads(THREADS_PER_BLOCK, THREADS_PER_BLOCK);\n\n  boxes_iou_bev_kernel<<<blocks, threads>>>(num_a, boxes_a, num_b, boxes_b,\n                                            ans_iou);\n}\n\nvoid nmsLauncher(const float *boxes, unsigned long long *mask, int boxes_num,\n                 float nms_overlap_thresh) {\n  dim3 blocks(DIVUP(boxes_num, THREADS_PER_BLOCK_NMS),\n              DIVUP(boxes_num, THREADS_PER_BLOCK_NMS));\n  dim3 threads(THREADS_PER_BLOCK_NMS);\n  nms_kernel<<<blocks, threads>>>(boxes_num, nms_overlap_thresh, boxes, mask);\n}\n\nvoid nmsNormalLauncher(const float *boxes, unsigned long long *mask,\n                       int boxes_num, float nms_overlap_thresh) {\n  dim3 blocks(DIVUP(boxes_num, THREADS_PER_BLOCK_NMS),\n              DIVUP(boxes_num, THREADS_PER_BLOCK_NMS));\n  dim3 threads(THREADS_PER_BLOCK_NMS);\n  nms_normal_kernel<<<blocks, threads>>>(boxes_num, nms_overlap_thresh, boxes,\n                                         mask);\n}\n"
  },
  {
    "path": "mmdet3d/ops/knn/__init__.py",
    "content": "from .knn import knn\n\n__all__ = ['knn']\n"
  },
  {
    "path": "mmdet3d/ops/knn/knn.py",
    "content": "import torch\nfrom torch.autograd import Function\n\nfrom . import knn_ext\n\n\nclass KNN(Function):\n    \"\"\"KNN (CUDA).\n\n    Find k-nearest points.\n    \"\"\"\n\n    @staticmethod\n    def forward(ctx,\n                k: int,\n                xyz: torch.Tensor,\n                center_xyz: torch.Tensor,\n                transposed: bool = False) -> torch.Tensor:\n        \"\"\"forward.\n\n        Args:\n            k (int): number of nearest neighbors.\n            xyz (Tensor): (B, N, 3) if transposed == False, else (B, 3, N).\n                xyz coordinates of the features.\n            center_xyz (Tensor): (B, npoint, 3) if transposed == False,\n                else (B, 3, npoint). centers of the knn query.\n            transposed (bool): whether the input tensors are transposed.\n                defaults to False. Should not expicitly use this keyword\n                when calling knn (=KNN.apply), just add the fourth param.\n\n        Returns:\n            Tensor: (B, k, npoint) tensor with the indicies of\n                the features that form k-nearest neighbours.\n        \"\"\"\n        assert k > 0\n\n        if not transposed:\n            xyz = xyz.transpose(2, 1).contiguous()\n            center_xyz = center_xyz.transpose(2, 1).contiguous()\n\n        B, _, npoint = center_xyz.shape\n        N = xyz.shape[2]\n\n        assert center_xyz.is_contiguous()\n        assert xyz.is_contiguous()\n\n        center_xyz_device = center_xyz.get_device()\n        assert center_xyz_device == xyz.get_device(), \\\n            'center_xyz and xyz should be put on the same device'\n        if torch.cuda.current_device() != center_xyz_device:\n            torch.cuda.set_device(center_xyz_device)\n\n        idx = center_xyz.new_zeros((B, k, npoint)).long()\n\n        for bi in range(B):\n            knn_ext.knn_wrapper(xyz[bi], N, center_xyz[bi], npoint, idx[bi], k)\n\n        ctx.mark_non_differentiable(idx)\n\n        idx -= 1\n\n        return idx\n\n    @staticmethod\n    def backward(ctx, a=None):\n        return None, None\n\n\nknn = KNN.apply\n"
  },
  {
    "path": "mmdet3d/ops/knn/src/knn.cpp",
    "content": "// Modified from https://github.com/unlimblue/KNN_CUDA\n\n#include <vector>\n#include <torch/extension.h>\n#include <ATen/cuda/CUDAContext.h>\n\n#define CHECK_CONTIGUOUS(x) AT_ASSERTM(x.is_contiguous(), #x \" must be contiguous\")\n#define CHECK_TYPE(x, t) AT_ASSERTM(x.dtype() == t, #x \" must be \" #t)\n#define CHECK_CUDA(x) AT_ASSERTM(x.device().type() == at::Device::Type::CUDA, #x \" must be on CUDA\")\n#define CHECK_INPUT(x, t) CHECK_CONTIGUOUS(x); CHECK_TYPE(x, t); CHECK_CUDA(x)\n\n\nvoid knn_kernels_launcher(\n    const float* ref_dev,\n    int ref_nb,\n    const float* query_dev,\n    int query_nb,\n    int dim,\n    int k,\n    float* dist_dev,\n    long* ind_dev,\n    cudaStream_t stream\n    );\n\n// std::vector<at::Tensor> knn_wrapper(\nvoid knn_wrapper(\n    at::Tensor & ref,\n    int ref_nb,\n    at::Tensor & query,\n    int query_nb,\n    at::Tensor & ind,\n    const int k\n    ) {\n\n    CHECK_INPUT(ref, at::kFloat);\n    CHECK_INPUT(query, at::kFloat);\n    const float * ref_dev = ref.data_ptr<float>();\n    const float * query_dev = query.data_ptr<float>();\n    int dim = query.size(0);\n    auto dist = at::empty({ref_nb, query_nb}, query.options().dtype(at::kFloat));\n    float * dist_dev = dist.data_ptr<float>();\n    long * ind_dev = ind.data_ptr<long>();\n\n    cudaStream_t stream = at::cuda::getCurrentCUDAStream();\n\n    knn_kernels_launcher(\n        ref_dev,\n        ref_nb,\n        query_dev,\n        query_nb,\n        dim,\n        k,\n        dist_dev,\n        ind_dev,\n        stream\n    );\n}\n\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n    m.def(\"knn_wrapper\", &knn_wrapper, \"knn_wrapper\");\n}\n"
  },
  {
    "path": "mmdet3d/ops/knn/src/knn_cuda.cu",
    "content": "/** Modified from https://github.com/unlimblue/KNN_CUDA\n * which is the modified version of knn-CUDA\n * from https://github.com/vincentfpgarcia/kNN-CUDA\n * Last modified by Christopher B. Choy <chrischoy@ai.stanford.edu> 12/23/2016\n * vincentfpgarcia wrote the original cuda code, Christopher modified it and\n * set it up for pytorch 0.4, and unlimblue updated it to pytorch >= 1.0\n */\n\n// Includes\n#include <cstdio>\n#include \"cuda.h\"\n\n// Constants used by the program\n#define BLOCK_DIM                      16\n#define DEBUG                          0\n\n/**\n  * Computes the distance between two matrix A (reference points) and\n  * B (query points) containing respectively wA and wB points.\n  *\n  * @param A     pointer on the matrix A\n  * @param wA    width of the matrix A = number of points in A\n  * @param B     pointer on the matrix B\n  * @param wB    width of the matrix B = number of points in B\n  * @param dim   dimension of points = height of matrices A and B\n  * @param AB    pointer on the matrix containing the wA*wB distances computed\n  */\n__global__ void cuComputeDistanceGlobal(const float* A, int wA,\n    const float* B, int wB, int dim, float* AB){\n\n  // Declaration of the shared memory arrays As and Bs used to store the sub-matrix of A and B\n  __shared__ float shared_A[BLOCK_DIM][BLOCK_DIM];\n  __shared__ float shared_B[BLOCK_DIM][BLOCK_DIM];\n\n  // Sub-matrix of A (begin, step, end) and Sub-matrix of B (begin, step)\n  __shared__ int begin_A;\n  __shared__ int begin_B;\n  __shared__ int step_A;\n  __shared__ int step_B;\n  __shared__ int end_A;\n\n  // Thread index\n  int tx = threadIdx.x;\n  int ty = threadIdx.y;\n\n  // Other variables\n  float tmp;\n  float ssd = 0;\n\n  // Loop parameters\n  begin_A = BLOCK_DIM * blockIdx.y;\n  begin_B = BLOCK_DIM * blockIdx.x;\n  step_A  = BLOCK_DIM * wA;\n  step_B  = BLOCK_DIM * wB;\n  end_A   = begin_A + (dim-1) * wA;\n\n  // Conditions\n  int cond0 = (begin_A + tx < wA); // used to write in shared memory\n  int cond1 = (begin_B + tx < wB); // used to write in shared memory & to computations and to write in output matrix\n  int cond2 = (begin_A + ty < wA); // used to computations and to write in output matrix\n\n  // Loop over all the sub-matrices of A and B required to compute the block sub-matrix\n  for (int a = begin_A, b = begin_B; a <= end_A; a += step_A, b += step_B) {\n    // Load the matrices from device memory to shared memory; each thread loads one element of each matrix\n    if (a/wA + ty < dim){\n      shared_A[ty][tx] = (cond0)? A[a + wA * ty + tx] : 0;\n      shared_B[ty][tx] = (cond1)? B[b + wB * ty + tx] : 0;\n    }\n    else{\n      shared_A[ty][tx] = 0;\n      shared_B[ty][tx] = 0;\n    }\n\n    // Synchronize to make sure the matrices are loaded\n    __syncthreads();\n\n    // Compute the difference between the two matrixes; each thread computes one element of the block sub-matrix\n    if (cond2 && cond1){\n      for (int k = 0; k < BLOCK_DIM; ++k){\n        tmp = shared_A[k][ty] - shared_B[k][tx];\n        ssd += tmp*tmp;\n      }\n    }\n\n    // Synchronize to make sure that the preceding computation is done before loading two new sub-matrices of A and B in the next iteration\n    __syncthreads();\n  }\n\n  // Write the block sub-matrix to device memory; each thread writes one element\n  if (cond2 && cond1)\n    AB[(begin_A + ty) * wB + begin_B + tx] = ssd;\n}\n\n\n/**\n  * Gathers k-th smallest distances for each column of the distance matrix in the top.\n  *\n  * @param dist        distance matrix\n  * @param ind         index matrix\n  * @param width       width of the distance matrix and of the index matrix\n  * @param height      height of the distance matrix and of the index matrix\n  * @param k           number of neighbors to consider\n  */\n__global__ void cuInsertionSort(float *dist, long *ind, int width, int height, int k){\n\n  // Variables\n  int l, i, j;\n  float *p_dist;\n  long  *p_ind;\n  float curr_dist, max_dist;\n  long  curr_row,  max_row;\n  unsigned int xIndex = blockIdx.x * blockDim.x + threadIdx.x;\n  if (xIndex<width){\n    // Pointer shift, initialization, and max value\n    p_dist   = dist + xIndex;\n    p_ind    = ind  + xIndex;\n    max_dist = p_dist[0];\n    p_ind[0] = 1;\n\n    // Part 1 : sort kth firt elementZ\n    for (l=1; l<k; l++){\n      curr_row  = l * width;\n      curr_dist = p_dist[curr_row];\n      if (curr_dist<max_dist){\n        i=l-1;\n        for (int a=0; a<l-1; a++){\n          if (p_dist[a*width]>curr_dist){\n            i=a;\n            break;\n          }\n        }\n        for (j=l; j>i; j--){\n          p_dist[j*width] = p_dist[(j-1)*width];\n          p_ind[j*width]   = p_ind[(j-1)*width];\n        }\n        p_dist[i*width] = curr_dist;\n        p_ind[i*width]  = l + 1;\n      } else {\n        p_ind[l*width] = l + 1;\n      }\n      max_dist = p_dist[curr_row];\n    }\n\n    // Part 2 : insert element in the k-th first lines\n    max_row = (k-1)*width;\n    for (l=k; l<height; l++){\n      curr_dist = p_dist[l*width];\n      if (curr_dist<max_dist){\n        i=k-1;\n        for (int a=0; a<k-1; a++){\n          if (p_dist[a*width]>curr_dist){\n            i=a;\n            break;\n          }\n        }\n        for (j=k-1; j>i; j--){\n          p_dist[j*width] = p_dist[(j-1)*width];\n          p_ind[j*width]   = p_ind[(j-1)*width];\n        }\n        p_dist[i*width] = curr_dist;\n        p_ind[i*width]   = l + 1;\n        max_dist             = p_dist[max_row];\n      }\n    }\n  }\n}\n\n\n/**\n  * Computes the square root of the first line (width-th first element)\n  * of the distance matrix.\n  *\n  * @param dist    distance matrix\n  * @param width   width of the distance matrix\n  * @param k       number of neighbors to consider\n  */\n__global__ void cuParallelSqrt(float *dist, int width, int k){\n    unsigned int xIndex = blockIdx.x * blockDim.x + threadIdx.x;\n    unsigned int yIndex = blockIdx.y * blockDim.y + threadIdx.y;\n  if (xIndex<width && yIndex<k)\n    dist[yIndex*width + xIndex] = sqrt(dist[yIndex*width + xIndex]);\n}\n\n\nvoid debug(float * dist_dev, long * ind_dev, const int query_nb, const int k){\n  float* dist_host = new float[query_nb * k];\n  long*  idx_host  = new long[query_nb * k];\n\n  // Memory copy of output from device to host\n  cudaMemcpy(dist_host, dist_dev,\n      query_nb * k * sizeof(float), cudaMemcpyDeviceToHost);\n\n  cudaMemcpy(idx_host, ind_dev,\n      query_nb * k * sizeof(long), cudaMemcpyDeviceToHost);\n\n  int i, j;\n  for(i = 0; i < k; i++){\n    for (j = 0; j < query_nb; j++) {\n      if (j % 8 == 0)\n        printf(\"/\\n\");\n      printf(\"%f \", sqrt(dist_host[i*query_nb + j]));\n    }\n    printf(\"\\n\");\n  }\n}\n\n\n\n//-----------------------------------------------------------------------------------------------//\n//                                   K-th NEAREST NEIGHBORS                                      //\n//-----------------------------------------------------------------------------------------------//\n\n/**\n  * K nearest neighbor algorithm\n  * - Initialize CUDA\n  * - Allocate device memory\n  * - Copy point sets (reference and query points) from host to device memory\n  * - Compute the distances + indexes to the k nearest neighbors for each query point\n  * - Copy distances from device to host memory\n  *\n  * @param ref_host      reference points ; pointer to linear matrix\n  * @param ref_nb        number of reference points ; width of the matrix\n  * @param query_host    query points ; pointer to linear matrix\n  * @param query_nb      number of query points ; width of the matrix\n  * @param dim           dimension of points ; height of the matrices\n  * @param k             number of neighbor to consider\n  * @param dist_host     distances to k nearest neighbors ; pointer to linear matrix\n  * @param dist_host     indexes of the k nearest neighbors ; pointer to linear matrix\n  *\n  */\nvoid knn_kernels_launcher(const float* ref_dev, int ref_nb, const float* query_dev, int query_nb,\n    int dim, int k, float* dist_dev, long* ind_dev, cudaStream_t stream){\n\n  // Grids ans threads\n  dim3 g_16x16(query_nb / BLOCK_DIM, ref_nb / BLOCK_DIM, 1);\n  dim3 t_16x16(BLOCK_DIM, BLOCK_DIM, 1);\n  if (query_nb % BLOCK_DIM != 0) g_16x16.x += 1;\n  if (ref_nb   % BLOCK_DIM != 0) g_16x16.y += 1;\n  //\n  dim3 g_256x1(query_nb / 256, 1, 1);\n  dim3 t_256x1(256, 1, 1);\n  if (query_nb%256 != 0) g_256x1.x += 1;\n\n  dim3 g_k_16x16(query_nb / BLOCK_DIM, k / BLOCK_DIM, 1);\n  dim3 t_k_16x16(BLOCK_DIM, BLOCK_DIM, 1);\n  if (query_nb % BLOCK_DIM != 0) g_k_16x16.x += 1;\n  if (k  % BLOCK_DIM != 0) g_k_16x16.y += 1;\n\n  // Kernel 1: Compute all the distances\n  cuComputeDistanceGlobal<<<g_16x16, t_16x16, 0, stream>>>(ref_dev, ref_nb,\n      query_dev, query_nb, dim, dist_dev);\n\n#if DEBUG\n  printf(\"Pre insertionSort\\n\");\n  debug(dist_dev, ind_dev, query_nb, k);\n#endif\n\n  // Kernel 2: Sort each column\n  cuInsertionSort<<<g_256x1, t_256x1, 0, stream>>>(dist_dev, ind_dev, query_nb, ref_nb, k);\n\n#if DEBUG\n  printf(\"Post insertionSort\\n\");\n  debug(dist_dev, ind_dev, query_nb, k);\n#endif\n\n  // Kernel 3: Compute square root of k first elements\n  cuParallelSqrt<<<g_k_16x16,t_k_16x16, 0, stream>>>(dist_dev, query_nb, k);\n}\n"
  },
  {
    "path": "mmdet3d/ops/norm.py",
    "content": "import torch\nfrom mmcv.cnn import NORM_LAYERS\nfrom mmcv.runner import force_fp32\nfrom torch import distributed as dist\nfrom torch import nn as nn\nfrom torch.autograd.function import Function\n\n\nclass AllReduce(Function):\n\n    @staticmethod\n    def forward(ctx, input):\n        input_list = [\n            torch.zeros_like(input) for k in range(dist.get_world_size())\n        ]\n        # Use allgather instead of allreduce in-place operations is unreliable\n        dist.all_gather(input_list, input, async_op=False)\n        inputs = torch.stack(input_list, dim=0)\n        return torch.sum(inputs, dim=0)\n\n    @staticmethod\n    def backward(ctx, grad_output):\n        dist.all_reduce(grad_output, async_op=False)\n        return grad_output\n\n\n@NORM_LAYERS.register_module('naiveSyncBN1d')\nclass NaiveSyncBatchNorm1d(nn.BatchNorm1d):\n    \"\"\"Syncronized Batch Normalization for 3D Tensors.\n\n    Note:\n        This implementation is modified from\n        https://github.com/facebookresearch/detectron2/\n\n        `torch.nn.SyncBatchNorm` has known unknown bugs.\n        It produces significantly worse AP (and sometimes goes NaN)\n        when the batch size on each worker is quite different\n        (e.g., when scale augmentation is used).\n        In 3D detection, different workers has points of different shapes,\n        whish also cause instability.\n\n        Use this implementation before `nn.SyncBatchNorm` is fixed.\n        It is slower than `nn.SyncBatchNorm`.\n    \"\"\"\n\n    def __init__(self, *args, **kwargs):\n        super().__init__(*args, **kwargs)\n        self.fp16_enabled = False\n\n    # customized normalization layer still needs this decorator\n    # to force the input to be fp32 and the output to be fp16\n    # TODO: make mmcv fp16 utils handle customized norm layers\n    @force_fp32(out_fp16=True)\n    def forward(self, input):\n        assert input.dtype == torch.float32, \\\n            f'input should be in float32 type, got {input.dtype}'\n        if dist.get_world_size() == 1 or not self.training:\n            return super().forward(input)\n        assert input.shape[0] > 0, 'SyncBN does not support empty inputs'\n        C = input.shape[1]\n        mean = torch.mean(input, dim=[0, 2])\n        meansqr = torch.mean(input * input, dim=[0, 2])\n\n        vec = torch.cat([mean, meansqr], dim=0)\n        vec = AllReduce.apply(vec) * (1.0 / dist.get_world_size())\n\n        mean, meansqr = torch.split(vec, C)\n        var = meansqr - mean * mean\n        self.running_mean += self.momentum * (\n            mean.detach() - self.running_mean)\n        self.running_var += self.momentum * (var.detach() - self.running_var)\n\n        invstd = torch.rsqrt(var + self.eps)\n        scale = self.weight * invstd\n        bias = self.bias - mean * scale\n        scale = scale.reshape(1, -1, 1)\n        bias = bias.reshape(1, -1, 1)\n        return input * scale + bias\n\n\n@NORM_LAYERS.register_module('naiveSyncBN2d')\nclass NaiveSyncBatchNorm2d(nn.BatchNorm2d):\n    \"\"\"Syncronized Batch Normalization for 4D Tensors.\n\n    Note:\n        This implementation is modified from\n        https://github.com/facebookresearch/detectron2/\n\n        `torch.nn.SyncBatchNorm` has known unknown bugs.\n        It produces significantly worse AP (and sometimes goes NaN)\n        when the batch size on each worker is quite different\n        (e.g., when scale augmentation is used).\n        This phenomenon also occurs when the multi-modality feature fusion\n        modules of multi-modality detectors use SyncBN.\n\n        Use this implementation before `nn.SyncBatchNorm` is fixed.\n        It is slower than `nn.SyncBatchNorm`.\n    \"\"\"\n\n    def __init__(self, *args, **kwargs):\n        super().__init__(*args, **kwargs)\n        self.fp16_enabled = False\n\n    # customized normalization layer still needs this decorator\n    # to force the input to be fp32 and the output to be fp16\n    # TODO: make mmcv fp16 utils handle customized norm layers\n    @force_fp32(out_fp16=True)\n    def forward(self, input):\n        assert input.dtype == torch.float32, \\\n            f'input should be in float32 type, got {input.dtype}'\n        if dist.get_world_size() == 1 or not self.training:\n            return super().forward(input)\n\n        assert input.shape[0] > 0, 'SyncBN does not support empty inputs'\n        C = input.shape[1]\n        mean = torch.mean(input, dim=[0, 2, 3])\n        meansqr = torch.mean(input * input, dim=[0, 2, 3])\n\n        vec = torch.cat([mean, meansqr], dim=0)\n        vec = AllReduce.apply(vec) * (1.0 / dist.get_world_size())\n\n        mean, meansqr = torch.split(vec, C)\n        var = meansqr - mean * mean\n        self.running_mean += self.momentum * (\n            mean.detach() - self.running_mean)\n        self.running_var += self.momentum * (var.detach() - self.running_var)\n\n        invstd = torch.rsqrt(var + self.eps)\n        scale = self.weight * invstd\n        bias = self.bias - mean * scale\n        scale = scale.reshape(1, -1, 1, 1)\n        bias = bias.reshape(1, -1, 1, 1)\n        return input * scale + bias\n"
  },
  {
    "path": "mmdet3d/ops/pointnet_modules/__init__.py",
    "content": "from .builder import build_sa_module\nfrom .point_fp_module import PointFPModule\nfrom .point_sa_module import PointSAModule, PointSAModuleMSG\n\n__all__ = [\n    'build_sa_module', 'PointSAModuleMSG', 'PointSAModule', 'PointFPModule'\n]\n"
  },
  {
    "path": "mmdet3d/ops/pointnet_modules/builder.py",
    "content": "from .registry import SA_MODULES\n\n\ndef build_sa_module(cfg, *args, **kwargs):\n    \"\"\"Build PointNet2 set abstraction (SA) module.\n\n    Args:\n        cfg (None or dict): The SA module config, which should contain:\n            - type (str): Module type.\n            - module args: Args needed to instantiate an SA module.\n        args (argument list): Arguments passed to the `__init__`\n            method of the corresponding module.\n        kwargs (keyword arguments): Keyword arguments passed to the `__init__`\n            method of the corresponding SA module .\n\n    Returns:\n        nn.Module: Created SA module.\n    \"\"\"\n    if cfg is None:\n        cfg_ = dict(type='PointSAModule')\n    else:\n        if not isinstance(cfg, dict):\n            raise TypeError('cfg must be a dict')\n        if 'type' not in cfg:\n            raise KeyError('the cfg dict must contain the key \"type\"')\n        cfg_ = cfg.copy()\n\n    module_type = cfg_.pop('type')\n    if module_type not in SA_MODULES:\n        raise KeyError(f'Unrecognized module type {module_type}')\n    else:\n        sa_module = SA_MODULES.get(module_type)\n\n    module = sa_module(*args, **kwargs, **cfg_)\n\n    return module\n"
  },
  {
    "path": "mmdet3d/ops/pointnet_modules/point_fp_module.py",
    "content": "import torch\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import force_fp32\nfrom torch import nn as nn\nfrom typing import List\n\nfrom mmdet3d.ops import three_interpolate, three_nn\n\n\nclass PointFPModule(nn.Module):\n    \"\"\"Point feature propagation module used in PointNets.\n\n    Propagate the features from one set to another.\n\n    Args:\n        mlp_channels (list[int]): List of mlp channels.\n        norm_cfg (dict): Type of normalization method.\n            Default: dict(type='BN2d').\n    \"\"\"\n\n    def __init__(self,\n                 mlp_channels: List[int],\n                 norm_cfg: dict = dict(type='BN2d')):\n        super().__init__()\n        self.fp16_enabled = False\n        self.mlps = nn.Sequential()\n        for i in range(len(mlp_channels) - 1):\n            self.mlps.add_module(\n                f'layer{i}',\n                ConvModule(\n                    mlp_channels[i],\n                    mlp_channels[i + 1],\n                    kernel_size=(1, 1),\n                    stride=(1, 1),\n                    conv_cfg=dict(type='Conv2d'),\n                    norm_cfg=norm_cfg))\n\n    @force_fp32()\n    def forward(self, target: torch.Tensor, source: torch.Tensor,\n                target_feats: torch.Tensor,\n                source_feats: torch.Tensor) -> torch.Tensor:\n        \"\"\"forward.\n\n        Args:\n            target (Tensor): (B, n, 3) tensor of the xyz positions of\n                the target features.\n            source (Tensor): (B, m, 3) tensor of the xyz positions of\n                the source features.\n            target_feats (Tensor): (B, C1, n) tensor of the features to be\n                propagated to.\n            source_feats (Tensor): (B, C2, m) tensor of features\n                to be propagated.\n\n        Return:\n            Tensor: (B, M, N) M = mlp[-1], tensor of the target features.\n        \"\"\"\n        if source is not None:\n            dist, idx = three_nn(target, source)\n            dist_reciprocal = 1.0 / (dist + 1e-8)\n            norm = torch.sum(dist_reciprocal, dim=2, keepdim=True)\n            weight = dist_reciprocal / norm\n\n            interpolated_feats = three_interpolate(source_feats, idx, weight)\n        else:\n            interpolated_feats = source_feats.expand(*source_feats.size()[0:2],\n                                                     target.size(1))\n\n        if target_feats is not None:\n            new_features = torch.cat([interpolated_feats, target_feats],\n                                     dim=1)  # (B, C2 + C1, n)\n        else:\n            new_features = interpolated_feats\n\n        new_features = new_features.unsqueeze(-1)\n        new_features = self.mlps(new_features)\n\n        return new_features.squeeze(-1)\n"
  },
  {
    "path": "mmdet3d/ops/pointnet_modules/point_sa_module.py",
    "content": "import torch\nfrom mmcv.cnn import ConvModule\nfrom torch import nn as nn\nfrom torch.nn import functional as F\nfrom typing import List\n\nfrom mmdet3d.ops import GroupAll, Points_Sampler, QueryAndGroup, gather_points\nfrom .registry import SA_MODULES\n\n\n@SA_MODULES.register_module()\nclass PointSAModuleMSG(nn.Module):\n    \"\"\"Point set abstraction module with multi-scale grouping used in\n    Pointnets.\n\n    Args:\n        num_point (int): Number of points.\n        radii (list[float]): List of radius in each ball query.\n        sample_nums (list[int]): Number of samples in each ball query.\n        mlp_channels (list[int]): Specify of the pointnet before\n            the global pooling for each scale.\n        fps_mod (list[str]: Type of FPS method, valid mod\n            ['F-FPS', 'D-FPS', 'FS'], Default: ['D-FPS'].\n            F-FPS: using feature distances for FPS.\n            D-FPS: using Euclidean distances of points for FPS.\n            FS: using F-FPS and D-FPS simultaneously.\n        fps_sample_range_list (list[int]): Range of points to apply FPS.\n            Default: [-1].\n        dilated_group (bool): Whether to use dilated ball query.\n            Default: False.\n        norm_cfg (dict): Type of normalization method.\n            Default: dict(type='BN2d').\n        use_xyz (bool): Whether to use xyz.\n            Default: True.\n        pool_mod (str): Type of pooling method.\n            Default: 'max_pool'.\n        normalize_xyz (bool): Whether to normalize local XYZ with radius.\n            Default: False.\n        bias (bool | str): If specified as `auto`, it will be decided by the\n            norm_cfg. Bias will be set as True if `norm_cfg` is None, otherwise\n            False. Default: \"auto\".\n    \"\"\"\n\n    def __init__(self,\n                 num_point: int,\n                 radii: List[float],\n                 sample_nums: List[int],\n                 mlp_channels: List[List[int]],\n                 fps_mod: List[str] = ['D-FPS'],\n                 fps_sample_range_list: List[int] = [-1],\n                 dilated_group: bool = False,\n                 norm_cfg: dict = dict(type='BN2d'),\n                 use_xyz: bool = True,\n                 pool_mod='max',\n                 normalize_xyz: bool = False,\n                 bias='auto'):\n        super().__init__()\n\n        assert len(radii) == len(sample_nums) == len(mlp_channels)\n        assert pool_mod in ['max', 'avg']\n        assert isinstance(fps_mod, list) or isinstance(fps_mod, tuple)\n        assert isinstance(fps_sample_range_list, list) or isinstance(\n            fps_sample_range_list, tuple)\n        assert len(fps_mod) == len(fps_sample_range_list)\n\n        if isinstance(mlp_channels, tuple):\n            mlp_channels = list(map(list, mlp_channels))\n\n        if isinstance(num_point, int):\n            self.num_point = [num_point]\n        elif isinstance(num_point, list) or isinstance(num_point, tuple):\n            self.num_point = num_point\n        else:\n            raise NotImplementedError('Error type of num_point!')\n\n        self.pool_mod = pool_mod\n        self.groupers = nn.ModuleList()\n        self.mlps = nn.ModuleList()\n        self.fps_mod_list = fps_mod\n        self.fps_sample_range_list = fps_sample_range_list\n\n        self.points_sampler = Points_Sampler(self.num_point, self.fps_mod_list,\n                                             self.fps_sample_range_list)\n\n        for i in range(len(radii)):\n            radius = radii[i]\n            sample_num = sample_nums[i]\n            if num_point is not None:\n                if dilated_group and i != 0:\n                    min_radius = radii[i - 1]\n                else:\n                    min_radius = 0\n                grouper = QueryAndGroup(\n                    radius,\n                    sample_num,\n                    min_radius=min_radius,\n                    use_xyz=use_xyz,\n                    normalize_xyz=normalize_xyz)\n            else:\n                grouper = GroupAll(use_xyz)\n            self.groupers.append(grouper)\n\n            mlp_spec = mlp_channels[i]\n            if use_xyz:\n                mlp_spec[0] += 3\n\n            mlp = nn.Sequential()\n            for i in range(len(mlp_spec) - 1):\n                mlp.add_module(\n                    f'layer{i}',\n                    ConvModule(\n                        mlp_spec[i],\n                        mlp_spec[i + 1],\n                        kernel_size=(1, 1),\n                        stride=(1, 1),\n                        conv_cfg=dict(type='Conv2d'),\n                        norm_cfg=norm_cfg,\n                        bias=bias))\n            self.mlps.append(mlp)\n\n    def forward(\n        self,\n        points_xyz: torch.Tensor,\n        features: torch.Tensor = None,\n        indices: torch.Tensor = None,\n        target_xyz: torch.Tensor = None,\n    ) -> (torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor):\n        \"\"\"forward.\n\n        Args:\n            points_xyz (Tensor): (B, N, 3) xyz coordinates of the features.\n            features (Tensor): (B, C, N) features of each point.\n                Default: None.\n            indices (Tensor): (B, num_point) Index of the features.\n                Default: None.\n            target_xyz (Tensor): (B, M, 3) new_xyz coordinates of the outputs.\n\n        Returns:\n            Tensor: (B, M, 3) where M is the number of points.\n                New features xyz.\n            Tensor: (B, M, sum_k(mlps[k][-1])) where M is the number\n                of points. New feature descriptors.\n            Tensor: (B, M) where M is the number of points.\n                Index of the features.\n        \"\"\"\n        new_features_list = []\n        xyz_flipped = points_xyz.transpose(1, 2).contiguous()\n        if indices is not None:\n            assert (indices.shape[1] == self.num_point[0])\n            new_xyz = gather_points(xyz_flipped, indices).transpose(\n                1, 2).contiguous() if self.num_point is not None else None\n        elif target_xyz is not None:\n            new_xyz = target_xyz.contiguous()\n        else:\n            indices = self.points_sampler(points_xyz, features)\n            new_xyz = gather_points(xyz_flipped, indices).transpose(\n                1, 2).contiguous() if self.num_point is not None else None\n\n        for i in range(len(self.groupers)):\n            # (B, C, num_point, nsample)\n            new_features = self.groupers[i](points_xyz, new_xyz, features)\n\n            # (B, mlp[-1], num_point, nsample)\n            new_features = self.mlps[i](new_features)\n            if self.pool_mod == 'max':\n                # (B, mlp[-1], num_point, 1)\n                new_features = F.max_pool2d(\n                    new_features, kernel_size=[1, new_features.size(3)])\n            elif self.pool_mod == 'avg':\n                # (B, mlp[-1], num_point, 1)\n                new_features = F.avg_pool2d(\n                    new_features, kernel_size=[1, new_features.size(3)])\n            else:\n                raise NotImplementedError\n\n            new_features = new_features.squeeze(-1)  # (B, mlp[-1], num_point)\n            new_features_list.append(new_features)\n\n        return new_xyz, torch.cat(new_features_list, dim=1), indices\n\n\n@SA_MODULES.register_module()\nclass PointSAModule(PointSAModuleMSG):\n    \"\"\"Point set abstraction module used in Pointnets.\n\n    Args:\n        mlp_channels (list[int]): Specify of the pointnet before\n            the global pooling for each scale.\n        num_point (int): Number of points.\n            Default: None.\n        radius (float): Radius to group with.\n            Default: None.\n        num_sample (int): Number of samples in each ball query.\n            Default: None.\n        norm_cfg (dict): Type of normalization method.\n            Default: dict(type='BN2d').\n        use_xyz (bool): Whether to use xyz.\n            Default: True.\n        pool_mod (str): Type of pooling method.\n            Default: 'max_pool'.\n        fps_mod (list[str]: Type of FPS method, valid mod\n            ['F-FPS', 'D-FPS', 'FS'], Default: ['D-FPS'].\n        fps_sample_range_list (list[int]): Range of points to apply FPS.\n            Default: [-1].\n        normalize_xyz (bool): Whether to normalize local XYZ with radius.\n            Default: False.\n    \"\"\"\n\n    def __init__(self,\n                 mlp_channels: List[int],\n                 num_point: int = None,\n                 radius: float = None,\n                 num_sample: int = None,\n                 norm_cfg: dict = dict(type='BN2d'),\n                 use_xyz: bool = True,\n                 pool_mod: str = 'max',\n                 fps_mod: List[str] = ['D-FPS'],\n                 fps_sample_range_list: List[int] = [-1],\n                 normalize_xyz: bool = False):\n        super().__init__(\n            mlp_channels=[mlp_channels],\n            num_point=num_point,\n            radii=[radius],\n            sample_nums=[num_sample],\n            norm_cfg=norm_cfg,\n            use_xyz=use_xyz,\n            pool_mod=pool_mod,\n            fps_mod=fps_mod,\n            fps_sample_range_list=fps_sample_range_list,\n            normalize_xyz=normalize_xyz)\n"
  },
  {
    "path": "mmdet3d/ops/pointnet_modules/registry.py",
    "content": "from mmcv.utils import Registry\n\nSA_MODULES = Registry('point_sa_module')\n"
  },
  {
    "path": "mmdet3d/ops/roiaware_pool3d/__init__.py",
    "content": "from .points_in_boxes import (points_in_boxes_batch, points_in_boxes_cpu,\n                              points_in_boxes_gpu)\nfrom .roiaware_pool3d import RoIAwarePool3d\n\n__all__ = [\n    'RoIAwarePool3d', 'points_in_boxes_gpu', 'points_in_boxes_cpu',\n    'points_in_boxes_batch'\n]\n"
  },
  {
    "path": "mmdet3d/ops/roiaware_pool3d/points_in_boxes.py",
    "content": "import torch\n\nfrom . import roiaware_pool3d_ext\n\n\ndef points_in_boxes_gpu(points, boxes):\n    \"\"\"Find points that are in boxes (CUDA)\n\n    Args:\n        points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR coordinate\n        boxes (torch.Tensor): [B, T, 7],\n            num_valid_boxes <= T, [x, y, z, w, l, h, ry] in LiDAR coordinate,\n            (x, y, z) is the bottom center\n\n    Returns:\n        box_idxs_of_pts (torch.Tensor): (B, M), default background = -1\n    \"\"\"\n    assert boxes.shape[0] == points.shape[0], \\\n        f'Points and boxes should have the same batch size, ' \\\n        f'got {boxes.shape[0]} and {boxes.shape[0]}'\n    assert boxes.shape[2] == 7, \\\n        f'boxes dimension should be 7, ' \\\n        f'got unexpected shape {boxes.shape[2]}'\n    assert points.shape[2] == 3, \\\n        f'points dimension should be 3, ' \\\n        f'got unexpected shape {points.shape[2]}'\n    batch_size, num_points, _ = points.shape\n\n    box_idxs_of_pts = points.new_zeros((batch_size, num_points),\n                                       dtype=torch.int).fill_(-1)\n\n    # If manually put the tensor 'points' or 'boxes' on a device\n    # which is not the current device, some temporary variables\n    # will be created on the current device in the cuda op,\n    # and the output will be incorrect.\n    # Therefore, we force the current device to be the same\n    # as the device of the tensors if it was not.\n    # Please refer to https://github.com/open-mmlab/mmdetection3d/issues/305\n    # for the incorrect output before the fix.\n    points_device = points.get_device()\n    assert points_device == boxes.get_device(), \\\n        'Points and boxes should be put on the same device'\n    if torch.cuda.current_device() != points_device:\n        torch.cuda.set_device(points_device)\n\n    roiaware_pool3d_ext.points_in_boxes_gpu(boxes.contiguous(),\n                                            points.contiguous(),\n                                            box_idxs_of_pts)\n\n    return box_idxs_of_pts\n\n\ndef points_in_boxes_cpu(points, boxes):\n    \"\"\"Find points that are in boxes (CPU)\n\n    Note:\n        Currently, the output of this function is different from that of\n        points_in_boxes_gpu.\n\n    Args:\n        points (torch.Tensor): [npoints, 3]\n        boxes (torch.Tensor): [N, 7], in LiDAR coordinate,\n            (x, y, z) is the bottom center\n\n    Returns:\n        point_indices (torch.Tensor): (N, npoints)\n    \"\"\"\n    # TODO: Refactor this function as a CPU version of points_in_boxes_gpu\n    assert boxes.shape[1] == 7, \\\n        f'boxes dimension should be 7, ' \\\n        f'got unexpected shape {boxes.shape[2]}'\n    assert points.shape[1] == 3, \\\n        f'points dimension should be 3, ' \\\n        f'got unexpected shape {points.shape[2]}'\n\n    point_indices = points.new_zeros((boxes.shape[0], points.shape[0]),\n                                     dtype=torch.int)\n    roiaware_pool3d_ext.points_in_boxes_cpu(boxes.float().contiguous(),\n                                            points.float().contiguous(),\n                                            point_indices)\n\n    return point_indices\n\n\ndef points_in_boxes_batch(points, boxes):\n    \"\"\"Find points that are in boxes (CUDA)\n\n    Args:\n        points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR coordinate\n        boxes (torch.Tensor): [B, T, 7],\n            num_valid_boxes <= T, [x, y, z, w, l, h, ry] in LiDAR coordinate,\n            (x, y, z) is the bottom center.\n\n    Returns:\n        box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0\n    \"\"\"\n    assert boxes.shape[0] == points.shape[0], \\\n        f'Points and boxes should have the same batch size, ' \\\n        f'got {boxes.shape[0]} and {boxes.shape[0]}'\n    assert boxes.shape[2] == 7, \\\n        f'boxes dimension should be 7, ' \\\n        f'got unexpected shape {boxes.shape[2]}'\n    assert points.shape[2] == 3, \\\n        f'points dimension should be 3, ' \\\n        f'got unexpected shape {points.shape[2]}'\n    batch_size, num_points, _ = points.shape\n    num_boxes = boxes.shape[1]\n\n    box_idxs_of_pts = points.new_zeros((batch_size, num_points, num_boxes),\n                                       dtype=torch.int).fill_(0)\n\n    # Same reason as line 25-32\n    points_device = points.get_device()\n    assert points_device == boxes.get_device(), \\\n        'Points and boxes should be put on the same device'\n    if torch.cuda.current_device() != points_device:\n        torch.cuda.set_device(points_device)\n\n    roiaware_pool3d_ext.points_in_boxes_batch(boxes.contiguous(),\n                                              points.contiguous(),\n                                              box_idxs_of_pts)\n\n    return box_idxs_of_pts\n"
  },
  {
    "path": "mmdet3d/ops/roiaware_pool3d/roiaware_pool3d.py",
    "content": "import mmcv\nimport torch\nfrom torch import nn as nn\nfrom torch.autograd import Function\n\nfrom . import roiaware_pool3d_ext\n\n\nclass RoIAwarePool3d(nn.Module):\n\n    def __init__(self, out_size, max_pts_per_voxel=128, mode='max'):\n        super().__init__()\n        \"\"\"RoIAwarePool3d module\n\n        Args:\n            out_size (int or tuple): n or [n1, n2, n3]\n            max_pts_per_voxel (int): m\n            mode (str): 'max' or 'avg'\n        \"\"\"\n        self.out_size = out_size\n        self.max_pts_per_voxel = max_pts_per_voxel\n        assert mode in ['max', 'avg']\n        pool_method_map = {'max': 0, 'avg': 1}\n        self.mode = pool_method_map[mode]\n\n    def forward(self, rois, pts, pts_feature):\n        \"\"\"RoIAwarePool3d module forward.\n\n        Args:\n            rois (torch.Tensor): [N, 7],in LiDAR coordinate,\n                (x, y, z) is the bottom center of rois\n            pts (torch.Tensor): [npoints, 3]\n            pts_feature (torch.Tensor): [npoints, C]\n\n        Returns:\n            pooled_features (torch.Tensor): [N, out_x, out_y, out_z, C]\n        \"\"\"\n\n        return RoIAwarePool3dFunction.apply(rois, pts, pts_feature,\n                                            self.out_size,\n                                            self.max_pts_per_voxel, self.mode)\n\n\nclass RoIAwarePool3dFunction(Function):\n\n    @staticmethod\n    def forward(ctx, rois, pts, pts_feature, out_size, max_pts_per_voxel,\n                mode):\n        \"\"\"RoIAwarePool3d function forward.\n\n        Args:\n            rois (torch.Tensor): [N, 7], in LiDAR coordinate,\n                (x, y, z) is the bottom center of rois\n            pts (torch.Tensor): [npoints, 3]\n            pts_feature (torch.Tensor): [npoints, C]\n            out_size (int or tuple): n or [n1, n2, n3]\n            max_pts_per_voxel (int): m\n            mode (int): 0 (max pool) or 1 (average pool)\n\n        Returns:\n            pooled_features (torch.Tensor): [N, out_x, out_y, out_z, C]\n        \"\"\"\n\n        if isinstance(out_size, int):\n            out_x = out_y = out_z = out_size\n        else:\n            assert len(out_size) == 3\n            assert mmcv.is_tuple_of(out_size, int)\n            out_x, out_y, out_z = out_size\n\n        num_rois = rois.shape[0]\n        num_channels = pts_feature.shape[-1]\n        num_pts = pts.shape[0]\n\n        pooled_features = pts_feature.new_zeros(\n            (num_rois, out_x, out_y, out_z, num_channels))\n        argmax = pts_feature.new_zeros(\n            (num_rois, out_x, out_y, out_z, num_channels), dtype=torch.int)\n        pts_idx_of_voxels = pts_feature.new_zeros(\n            (num_rois, out_x, out_y, out_z, max_pts_per_voxel),\n            dtype=torch.int)\n\n        roiaware_pool3d_ext.forward(rois, pts, pts_feature, argmax,\n                                    pts_idx_of_voxels, pooled_features, mode)\n\n        ctx.roiaware_pool3d_for_backward = (pts_idx_of_voxels, argmax, mode,\n                                            num_pts, num_channels)\n        return pooled_features\n\n    @staticmethod\n    def backward(ctx, grad_out):\n        \"\"\"RoIAwarePool3d function forward.\n\n        Args:\n            grad_out (torch.Tensor): [N, out_x, out_y, out_z, C]\n        Returns:\n            grad_in (torch.Tensor): [npoints, C]\n        \"\"\"\n        ret = ctx.roiaware_pool3d_for_backward\n        pts_idx_of_voxels, argmax, mode, num_pts, num_channels = ret\n\n        grad_in = grad_out.new_zeros((num_pts, num_channels))\n        roiaware_pool3d_ext.backward(pts_idx_of_voxels, argmax,\n                                     grad_out.contiguous(), grad_in, mode)\n\n        return None, None, grad_in, None, None, None\n\n\nif __name__ == '__main__':\n    pass\n"
  },
  {
    "path": "mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cpu.cpp",
    "content": "// Modified from\n// https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu\n// Written by Shaoshuai Shi\n// All Rights Reserved 2019.\n\n#include <assert.h>\n#include <math.h>\n#include <stdio.h>\n#include <torch/extension.h>\n#include <torch/serialize/tensor.h>\n\n#define CHECK_CONTIGUOUS(x) \\\n  TORCH_CHECK(x.is_contiguous(), #x, \" must be contiguous \")\n// #define DEBUG\n\ninline void lidar_to_local_coords_cpu(float shift_x, float shift_y, float rz,\n                                      float &local_x, float &local_y) {\n  // should rotate pi/2 + alpha to translate LiDAR to local\n  float rot_angle = rz + M_PI / 2;\n  float cosa = cos(rot_angle), sina = sin(rot_angle);\n  local_x = shift_x * cosa + shift_y * (-sina);\n  local_y = shift_x * sina + shift_y * cosa;\n}\n\ninline int check_pt_in_box3d_cpu(const float *pt, const float *box3d,\n                                 float &local_x, float &local_y) {\n  // param pt: (x, y, z)\n  // param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the\n  // bottom center\n  float x = pt[0], y = pt[1], z = pt[2];\n  float cx = box3d[0], cy = box3d[1], cz = box3d[2];\n  float w = box3d[3], l = box3d[4], h = box3d[5], rz = box3d[6];\n  cz += h / 2.0;  // shift to the center since cz in box3d is the bottom center\n\n  if (fabsf(z - cz) > h / 2.0) return 0;\n  lidar_to_local_coords_cpu(x - cx, y - cy, rz, local_x, local_y);\n  float in_flag = (local_x > -l / 2.0) & (local_x < l / 2.0) &\n                  (local_y > -w / 2.0) & (local_y < w / 2.0);\n  return in_flag;\n}\n\nint points_in_boxes_cpu(at::Tensor boxes_tensor, at::Tensor pts_tensor,\n                        at::Tensor pts_indices_tensor) {\n  // params boxes: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is the\n  // bottom center, each box DO NOT overlaps params pts: (npoints, 3) [x, y, z]\n  // in LiDAR coordinate params pts_indices: (N, npoints)\n\n  CHECK_CONTIGUOUS(boxes_tensor);\n  CHECK_CONTIGUOUS(pts_tensor);\n  CHECK_CONTIGUOUS(pts_indices_tensor);\n\n  int boxes_num = boxes_tensor.size(0);\n  int pts_num = pts_tensor.size(0);\n\n  const float *boxes = boxes_tensor.data_ptr<float>();\n  const float *pts = pts_tensor.data_ptr<float>();\n  int *pts_indices = pts_indices_tensor.data_ptr<int>();\n\n  float local_x = 0, local_y = 0;\n  for (int i = 0; i < boxes_num; i++) {\n    for (int j = 0; j < pts_num; j++) {\n      int cur_in_flag =\n          check_pt_in_box3d_cpu(pts + j * 3, boxes + i * 7, local_x, local_y);\n      pts_indices[i * pts_num + j] = cur_in_flag;\n    }\n  }\n\n  return 1;\n}\n"
  },
  {
    "path": "mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cuda.cu",
    "content": "// Modified from\n// https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu\n// Written by Shaoshuai Shi\n// All Rights Reserved 2019.\n\n#include <assert.h>\n#include <math.h>\n#include <stdio.h>\n#include <torch/serialize/tensor.h>\n#include <torch/types.h>\n\n#define THREADS_PER_BLOCK 256\n#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))\n\n#define CHECK_CUDA(x) \\\n  TORCH_CHECK(x.device().is_cuda(), #x, \" must be a CUDAtensor \")\n#define CHECK_CONTIGUOUS(x) \\\n  TORCH_CHECK(x.is_contiguous(), #x, \" must be contiguous \")\n#define CHECK_INPUT(x) \\\n  CHECK_CUDA(x);       \\\n  CHECK_CONTIGUOUS(x)\n// #define DEBUG\n\n__device__ inline void lidar_to_local_coords(float shift_x, float shift_y,\n                                             float rz, float &local_x,\n                                             float &local_y) {\n  // should rotate pi/2 + alpha to translate LiDAR to local\n  float rot_angle = rz + M_PI / 2;\n  float cosa = cos(rot_angle), sina = sin(rot_angle);\n  local_x = shift_x * cosa + shift_y * (-sina);\n  local_y = shift_x * sina + shift_y * cosa;\n}\n\n__device__ inline int check_pt_in_box3d(const float *pt, const float *box3d,\n                                        float &local_x, float &local_y) {\n  // param pt: (x, y, z)\n  // param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the\n  // bottom center\n  float x = pt[0], y = pt[1], z = pt[2];\n  float cx = box3d[0], cy = box3d[1], cz = box3d[2];\n  float w = box3d[3], l = box3d[4], h = box3d[5], rz = box3d[6];\n  cz += h / 2.0;  // shift to the center since cz in box3d is the bottom center\n\n  if (fabsf(z - cz) > h / 2.0) return 0;\n  lidar_to_local_coords(x - cx, y - cy, rz, local_x, local_y);\n  float in_flag = (local_x > -l / 2.0) & (local_x < l / 2.0) &\n                  (local_y > -w / 2.0) & (local_y < w / 2.0);\n  return in_flag;\n}\n\n__global__ void points_in_boxes_kernel(int batch_size, int boxes_num,\n                                       int pts_num, const float *boxes,\n                                       const float *pts,\n                                       int *box_idx_of_points) {\n  // params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is\n  // the bottom center, each box DO NOT overlaps params pts: (B, npoints, 3) [x,\n  // y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default\n  // -1\n\n  int bs_idx = blockIdx.y;\n  int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;\n  if (bs_idx >= batch_size || pt_idx >= pts_num) return;\n\n  boxes += bs_idx * boxes_num * 7;\n  pts += bs_idx * pts_num * 3 + pt_idx * 3;\n  box_idx_of_points += bs_idx * pts_num + pt_idx;\n\n  float local_x = 0, local_y = 0;\n  int cur_in_flag = 0;\n  for (int k = 0; k < boxes_num; k++) {\n    cur_in_flag = check_pt_in_box3d(pts, boxes + k * 7, local_x, local_y);\n    if (cur_in_flag) {\n      box_idx_of_points[0] = k;\n      break;\n    }\n  }\n}\n\n__global__ void points_in_boxes_batch_kernel(int batch_size, int boxes_num,\n                                             int pts_num, const float *boxes,\n                                             const float *pts,\n                                             int *box_idx_of_points) {\n  // params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is\n  // the bottom center, each box DO NOT overlaps params pts: (B, npoints, 3) [x,\n  // y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default\n  // -1\n\n  int bs_idx = blockIdx.y;\n  int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;\n  if (bs_idx >= batch_size || pt_idx >= pts_num) return;\n\n  boxes += bs_idx * boxes_num * 7;\n  pts += bs_idx * pts_num * 3 + pt_idx * 3;\n  box_idx_of_points += bs_idx * pts_num * boxes_num + pt_idx * boxes_num;\n\n  float local_x = 0, local_y = 0;\n  int cur_in_flag = 0;\n  for (int k = 0; k < boxes_num; k++) {\n    cur_in_flag = check_pt_in_box3d(pts, boxes + k * 7, local_x, local_y);\n    if (cur_in_flag) {\n      box_idx_of_points[k] = 1;\n    }\n    cur_in_flag = 0;\n  }\n}\n\nvoid points_in_boxes_launcher(int batch_size, int boxes_num, int pts_num,\n                              const float *boxes, const float *pts,\n                              int *box_idx_of_points) {\n  // params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is\n  // the bottom center, each box DO NOT overlaps params pts: (B, npoints, 3) [x,\n  // y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default\n  // -1\n  cudaError_t err;\n\n  dim3 blocks(DIVUP(pts_num, THREADS_PER_BLOCK), batch_size);\n  dim3 threads(THREADS_PER_BLOCK);\n  points_in_boxes_kernel<<<blocks, threads>>>(batch_size, boxes_num, pts_num,\n                                              boxes, pts, box_idx_of_points);\n\n  err = cudaGetLastError();\n  if (cudaSuccess != err) {\n    fprintf(stderr, \"CUDA kernel failed : %s\\n\", cudaGetErrorString(err));\n    exit(-1);\n  }\n\n#ifdef DEBUG\n  cudaDeviceSynchronize();  // for using printf in kernel function\n#endif\n}\n\nvoid points_in_boxes_batch_launcher(int batch_size, int boxes_num, int pts_num,\n                                    const float *boxes, const float *pts,\n                                    int *box_idx_of_points) {\n  // params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is\n  // the bottom center, each box params pts: (B, npoints, 3) [x, y, z] in\n  // LiDAR coordinate params boxes_idx_of_points: (B, npoints), default -1\n  cudaError_t err;\n\n  dim3 blocks(DIVUP(pts_num, THREADS_PER_BLOCK), batch_size);\n  dim3 threads(THREADS_PER_BLOCK);\n  points_in_boxes_batch_kernel<<<blocks, threads>>>(\n      batch_size, boxes_num, pts_num, boxes, pts, box_idx_of_points);\n\n  err = cudaGetLastError();\n  if (cudaSuccess != err) {\n    fprintf(stderr, \"CUDA kernel failed : %s\\n\", cudaGetErrorString(err));\n    exit(-1);\n  }\n\n#ifdef DEBUG\n  cudaDeviceSynchronize();  // for using printf in kernel function\n#endif\n}\n\nint points_in_boxes_gpu(at::Tensor boxes_tensor, at::Tensor pts_tensor,\n                        at::Tensor box_idx_of_points_tensor) {\n  // params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is\n  // the bottom center, each box DO NOT overlaps params pts: (B, npoints, 3) [x,\n  // y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default\n  // -1\n\n  CHECK_INPUT(boxes_tensor);\n  CHECK_INPUT(pts_tensor);\n  CHECK_INPUT(box_idx_of_points_tensor);\n\n  int batch_size = boxes_tensor.size(0);\n  int boxes_num = boxes_tensor.size(1);\n  int pts_num = pts_tensor.size(1);\n\n  const float *boxes = boxes_tensor.data_ptr<float>();\n  const float *pts = pts_tensor.data_ptr<float>();\n  int *box_idx_of_points = box_idx_of_points_tensor.data_ptr<int>();\n\n  points_in_boxes_launcher(batch_size, boxes_num, pts_num, boxes, pts,\n                           box_idx_of_points);\n\n  return 1;\n}\n\nint points_in_boxes_batch(at::Tensor boxes_tensor, at::Tensor pts_tensor,\n                          at::Tensor box_idx_of_points_tensor) {\n  // params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is\n  // the bottom center. params pts: (B, npoints, 3) [x, y, z] in LiDAR\n  // coordinate params boxes_idx_of_points: (B, npoints), default -1\n\n  CHECK_INPUT(boxes_tensor);\n  CHECK_INPUT(pts_tensor);\n  CHECK_INPUT(box_idx_of_points_tensor);\n\n  int batch_size = boxes_tensor.size(0);\n  int boxes_num = boxes_tensor.size(1);\n  int pts_num = pts_tensor.size(1);\n\n  const float *boxes = boxes_tensor.data_ptr<float>();\n  const float *pts = pts_tensor.data_ptr<float>();\n  int *box_idx_of_points = box_idx_of_points_tensor.data_ptr<int>();\n\n  points_in_boxes_batch_launcher(batch_size, boxes_num, pts_num, boxes, pts,\n                                 box_idx_of_points);\n\n  return 1;\n}\n"
  },
  {
    "path": "mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d.cpp",
    "content": "// Modified from\n// https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu\n// Written by Shaoshuai Shi\n// All Rights Reserved 2019.\n\n#include <assert.h>\n#include <torch/extension.h>\n#include <torch/serialize/tensor.h>\n\n#define CHECK_CUDA(x) \\\n  TORCH_CHECK(x.device().is_cuda(), #x, \" must be a CUDAtensor \")\n#define CHECK_CONTIGUOUS(x) \\\n  TORCH_CHECK(x.is_contiguous(), #x, \" must be contiguous \")\n#define CHECK_INPUT(x) \\\n  CHECK_CUDA(x);       \\\n  CHECK_CONTIGUOUS(x)\n\nvoid roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels,\n                              int max_pts_each_voxel, int out_x, int out_y,\n                              int out_z, const float *rois, const float *pts,\n                              const float *pts_feature, int *argmax,\n                              int *pts_idx_of_voxels, float *pooled_features,\n                              int pool_method);\n\nvoid roiaware_pool3d_backward_launcher(int boxes_num, int out_x, int out_y,\n                                       int out_z, int channels,\n                                       int max_pts_each_voxel,\n                                       const int *pts_idx_of_voxels,\n                                       const int *argmax, const float *grad_out,\n                                       float *grad_in, int pool_method);\n\nint roiaware_pool3d_gpu(at::Tensor rois, at::Tensor pts, at::Tensor pts_feature,\n                        at::Tensor argmax, at::Tensor pts_idx_of_voxels,\n                        at::Tensor pooled_features, int pool_method);\n\nint roiaware_pool3d_gpu_backward(at::Tensor pts_idx_of_voxels,\n                                 at::Tensor argmax, at::Tensor grad_out,\n                                 at::Tensor grad_in, int pool_method);\n\nint points_in_boxes_cpu(at::Tensor boxes_tensor, at::Tensor pts_tensor,\n                        at::Tensor pts_indices_tensor);\n\nint points_in_boxes_gpu(at::Tensor boxes_tensor, at::Tensor pts_tensor,\n                        at::Tensor box_idx_of_points_tensor);\n\nint points_in_boxes_batch(at::Tensor boxes_tensor, at::Tensor pts_tensor,\n                          at::Tensor box_idx_of_points_tensor);\n\nint roiaware_pool3d_gpu(at::Tensor rois, at::Tensor pts, at::Tensor pts_feature,\n                        at::Tensor argmax, at::Tensor pts_idx_of_voxels,\n                        at::Tensor pooled_features, int pool_method) {\n  // params rois: (N, 7) [x, y, z, w, l, h, ry] in LiDAR coordinate\n  // params pts: (npoints, 3) [x, y, z] in LiDAR coordinate\n  // params pts_feature: (npoints, C)\n  // params argmax: (N, out_x, out_y, out_z, C)\n  // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)\n  // params pooled_features: (N, out_x, out_y, out_z, C)\n  // params pool_method: 0: max_pool 1: avg_pool\n\n  CHECK_INPUT(rois);\n  CHECK_INPUT(pts);\n  CHECK_INPUT(pts_feature);\n  CHECK_INPUT(argmax);\n  CHECK_INPUT(pts_idx_of_voxels);\n  CHECK_INPUT(pooled_features);\n\n  int boxes_num = rois.size(0);\n  int pts_num = pts.size(0);\n  int channels = pts_feature.size(1);\n  int max_pts_each_voxel = pts_idx_of_voxels.size(4);  // index 0 is the counter\n  int out_x = pts_idx_of_voxels.size(1);\n  int out_y = pts_idx_of_voxels.size(2);\n  int out_z = pts_idx_of_voxels.size(3);\n  assert((out_x < 256) && (out_y < 256) &&\n         (out_z < 256));  // we encode index with 8bit\n\n  const float *rois_data = rois.data_ptr<float>();\n  const float *pts_data = pts.data_ptr<float>();\n  const float *pts_feature_data = pts_feature.data_ptr<float>();\n  int *argmax_data = argmax.data_ptr<int>();\n  int *pts_idx_of_voxels_data = pts_idx_of_voxels.data_ptr<int>();\n  float *pooled_features_data = pooled_features.data_ptr<float>();\n\n  roiaware_pool3d_launcher(\n      boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z,\n      rois_data, pts_data, pts_feature_data, argmax_data,\n      pts_idx_of_voxels_data, pooled_features_data, pool_method);\n\n  return 1;\n}\n\nint roiaware_pool3d_gpu_backward(at::Tensor pts_idx_of_voxels,\n                                 at::Tensor argmax, at::Tensor grad_out,\n                                 at::Tensor grad_in, int pool_method) {\n  // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)\n  // params argmax: (N, out_x, out_y, out_z, C)\n  // params grad_out: (N, out_x, out_y, out_z, C)\n  // params grad_in: (npoints, C), return value\n  // params pool_method: 0: max_pool 1: avg_pool\n\n  CHECK_INPUT(pts_idx_of_voxels);\n  CHECK_INPUT(argmax);\n  CHECK_INPUT(grad_out);\n  CHECK_INPUT(grad_in);\n\n  int boxes_num = pts_idx_of_voxels.size(0);\n  int out_x = pts_idx_of_voxels.size(1);\n  int out_y = pts_idx_of_voxels.size(2);\n  int out_z = pts_idx_of_voxels.size(3);\n  int max_pts_each_voxel = pts_idx_of_voxels.size(4);  // index 0 is the counter\n  int channels = grad_out.size(4);\n\n  const int *pts_idx_of_voxels_data = pts_idx_of_voxels.data_ptr<int>();\n  const int *argmax_data = argmax.data_ptr<int>();\n  const float *grad_out_data = grad_out.data_ptr<float>();\n  float *grad_in_data = grad_in.data_ptr<float>();\n\n  roiaware_pool3d_backward_launcher(boxes_num, out_x, out_y, out_z, channels,\n                                    max_pts_each_voxel, pts_idx_of_voxels_data,\n                                    argmax_data, grad_out_data, grad_in_data,\n                                    pool_method);\n\n  return 1;\n}\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n  m.def(\"forward\", &roiaware_pool3d_gpu, \"roiaware pool3d forward (CUDA)\");\n  m.def(\"backward\", &roiaware_pool3d_gpu_backward,\n        \"roiaware pool3d backward (CUDA)\");\n  m.def(\"points_in_boxes_gpu\", &points_in_boxes_gpu,\n        \"points_in_boxes_gpu forward (CUDA)\");\n  m.def(\"points_in_boxes_batch\", &points_in_boxes_batch,\n        \"points_in_boxes_batch forward (CUDA)\");\n  m.def(\"points_in_boxes_cpu\", &points_in_boxes_cpu,\n        \"points_in_boxes_cpu forward (CPU)\");\n}\n"
  },
  {
    "path": "mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu",
    "content": "// Modified from\n// https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu\n// Written by Shaoshuai Shi\n// All Rights Reserved 2019.\n\n#include <assert.h>\n#include <math.h>\n#include <stdio.h>\n#include <torch/serialize/tensor.h>\n#include <torch/types.h>\n\n#define THREADS_PER_BLOCK 256\n#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))\n\n// #define DEBUG\n\n__device__ inline void lidar_to_local_coords(float shift_x, float shift_y,\n                                             float rz, float &local_x,\n                                             float &local_y) {\n  // should rotate pi/2 + alpha to translate LiDAR to local\n  float rot_angle = rz + M_PI / 2;\n  float cosa = cos(rot_angle), sina = sin(rot_angle);\n  local_x = shift_x * cosa + shift_y * (-sina);\n  local_y = shift_x * sina + shift_y * cosa;\n}\n\n__device__ inline int check_pt_in_box3d(const float *pt, const float *box3d,\n                                        float &local_x, float &local_y) {\n  // param pt: (x, y, z)\n  // param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the\n  // bottom center\n  float x = pt[0], y = pt[1], z = pt[2];\n  float cx = box3d[0], cy = box3d[1], cz = box3d[2];\n  float w = box3d[3], l = box3d[4], h = box3d[5], rz = box3d[6];\n  cz += h / 2.0;  // shift to the center since cz in box3d is the bottom center\n\n  if (fabsf(z - cz) > h / 2.0) return 0;\n  lidar_to_local_coords(x - cx, y - cy, rz, local_x, local_y);\n  float in_flag = (local_x > -l / 2.0) & (local_x < l / 2.0) &\n                  (local_y > -w / 2.0) & (local_y < w / 2.0);\n  return in_flag;\n}\n\n__global__ void generate_pts_mask_for_box3d(int boxes_num, int pts_num,\n                                            int out_x, int out_y, int out_z,\n                                            const float *rois, const float *pts,\n                                            int *pts_mask) {\n  // params rois: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate\n  // params pts: (npoints, 3) [x, y, z]\n  // params pts_mask: (N, npoints): -1 means point doesnot in this box,\n  // otherwise: encode (x_idxs, y_idxs, z_idxs) by binary bit\n  int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;\n  int box_idx = blockIdx.y;\n  if (pt_idx >= pts_num || box_idx >= boxes_num) return;\n\n  pts += pt_idx * 3;\n  rois += box_idx * 7;\n  pts_mask += box_idx * pts_num + pt_idx;\n\n  float local_x = 0, local_y = 0;\n  int cur_in_flag = check_pt_in_box3d(pts, rois, local_x, local_y);\n\n  pts_mask[0] = -1;\n  if (cur_in_flag > 0) {\n    float local_z = pts[2] - rois[2];\n    float w = rois[3], l = rois[4], h = rois[5];\n\n    float x_res = l / out_x;\n    float y_res = w / out_y;\n    float z_res = h / out_z;\n\n    unsigned int x_idx = int((local_x + l / 2) / x_res);\n    unsigned int y_idx = int((local_y + w / 2) / y_res);\n    unsigned int z_idx = int(local_z / z_res);\n\n    x_idx = min(max(x_idx, 0), out_x - 1);\n    y_idx = min(max(y_idx, 0), out_y - 1);\n    z_idx = min(max(z_idx, 0), out_z - 1);\n\n    unsigned int idx_encoding = (x_idx << 16) + (y_idx << 8) + z_idx;\n#ifdef DEBUG\n    printf(\n        \"mask: pts_%d(%.3f, %.3f, %.3f), local(%.3f, %.3f, %.3f), idx(%d, %d, \"\n        \"%d), res(%.3f, %.3f, %.3f), idx_encoding=%x\\n\",\n        pt_idx, pts[0], pts[1], pts[2], local_x, local_y, local_z, x_idx, y_idx,\n        z_idx, x_res, y_res, z_res, idx_encoding);\n#endif\n\n    pts_mask[0] = idx_encoding;\n  }\n}\n\n__global__ void collect_inside_pts_for_box3d(int boxes_num, int pts_num,\n                                             int max_pts_each_voxel, int out_x,\n                                             int out_y, int out_z,\n                                             const int *pts_mask,\n                                             int *pts_idx_of_voxels) {\n  // params pts_mask: (N, npoints)  0 or 1\n  // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)\n\n  int box_idx = blockIdx.x * blockDim.x + threadIdx.x;\n  if (box_idx >= boxes_num) return;\n\n  int max_num_pts = max_pts_each_voxel - 1;  // index 0 is the counter\n  pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel;\n\n  for (int k = 0; k < pts_num; k++) {\n    if (pts_mask[box_idx * pts_num + k] != -1) {\n      unsigned int idx_encoding = pts_mask[box_idx * pts_num + k];\n      unsigned int x_idx = (idx_encoding >> 16) & 0xFF;\n      unsigned int y_idx = (idx_encoding >> 8) & 0xFF;\n      unsigned int z_idx = idx_encoding & 0xFF;\n      unsigned int base_offset = x_idx * out_y * out_z * max_pts_each_voxel +\n                                 y_idx * out_z * max_pts_each_voxel +\n                                 z_idx * max_pts_each_voxel;\n      unsigned int cnt = pts_idx_of_voxels[base_offset];\n      if (cnt < max_num_pts) {\n        pts_idx_of_voxels[base_offset + cnt + 1] = k;\n        pts_idx_of_voxels[base_offset]++;\n      }\n#ifdef DEBUG\n      printf(\"collect: pts_%d, idx(%d, %d, %d), idx_encoding=%x\\n\", k, x_idx,\n             y_idx, z_idx, idx_encoding);\n#endif\n    }\n  }\n}\n\n__global__ void roiaware_maxpool3d(int boxes_num, int pts_num, int channels,\n                                   int max_pts_each_voxel, int out_x, int out_y,\n                                   int out_z, const float *pts_feature,\n                                   const int *pts_idx_of_voxels,\n                                   float *pooled_features, int *argmax) {\n  // params pts_feature: (npoints, C)\n  // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel),\n  // index 0 is the counter params pooled_features: (N, out_x, out_y, out_z, C)\n  // params argmax: (N, out_x, out_y, out_z, C)\n\n  int box_idx = blockIdx.z;\n  int channel_idx = blockIdx.y;\n  int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x;\n\n  int x_idx = voxel_idx_flat / (out_y * out_z);\n  int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;\n  int z_idx = voxel_idx_flat % out_z;\n  if (box_idx >= boxes_num || channel_idx >= channels || x_idx >= out_x ||\n      y_idx >= out_y || z_idx >= out_z)\n    return;\n\n#ifdef DEBUG\n  printf(\"src pts_idx_of_voxels: (%p, ), argmax: %p\\n\", pts_idx_of_voxels,\n         argmax);\n#endif\n\n  int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;\n  pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel +\n                       offset_base * max_pts_each_voxel;\n  pooled_features += box_idx * out_x * out_y * out_z * channels +\n                     offset_base * channels + channel_idx;\n  argmax += box_idx * out_x * out_y * out_z * channels +\n            offset_base * channels + channel_idx;\n\n  int argmax_idx = -1;\n  float max_val = -1e50;\n\n  int total_pts = pts_idx_of_voxels[0];\n\n  for (int k = 1; k <= total_pts; k++) {\n    if (pts_feature[pts_idx_of_voxels[k] * channels + channel_idx] > max_val) {\n      max_val = pts_feature[pts_idx_of_voxels[k] * channels + channel_idx];\n      argmax_idx = pts_idx_of_voxels[k];\n    }\n  }\n\n  if (argmax_idx != -1) {\n    pooled_features[0] = max_val;\n  }\n  argmax[0] = argmax_idx;\n\n#ifdef DEBUG\n  printf(\n      \"channel_%d idx(%d, %d, %d), argmax_idx=(%d, %.3f), total=%d, after \"\n      \"pts_idx: %p, argmax: (%p, %d)\\n\",\n      channel_idx, x_idx, y_idx, z_idx, argmax_idx, max_val, total_pts,\n      pts_idx_of_voxels, argmax, argmax_idx);\n#endif\n}\n\n__global__ void roiaware_avgpool3d(int boxes_num, int pts_num, int channels,\n                                   int max_pts_each_voxel, int out_x, int out_y,\n                                   int out_z, const float *pts_feature,\n                                   const int *pts_idx_of_voxels,\n                                   float *pooled_features) {\n  // params pts_feature: (npoints, C)\n  // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel),\n  // index 0 is the counter params pooled_features: (N, out_x, out_y, out_z, C)\n  // params argmax: (N, out_x, out_y, out_z, C)\n\n  int box_idx = blockIdx.z;\n  int channel_idx = blockIdx.y;\n  int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x;\n\n  int x_idx = voxel_idx_flat / (out_y * out_z);\n  int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;\n  int z_idx = voxel_idx_flat % out_z;\n  if (box_idx >= boxes_num || channel_idx >= channels || x_idx >= out_x ||\n      y_idx >= out_y || z_idx >= out_z)\n    return;\n\n  int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;\n  pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel +\n                       offset_base * max_pts_each_voxel;\n  pooled_features += box_idx * out_x * out_y * out_z * channels +\n                     offset_base * channels + channel_idx;\n\n  float sum_val = 0;\n  int total_pts = pts_idx_of_voxels[0];\n\n  for (int k = 1; k <= total_pts; k++) {\n    sum_val += pts_feature[pts_idx_of_voxels[k] * channels + channel_idx];\n  }\n\n  if (total_pts > 0) {\n    pooled_features[0] = sum_val / total_pts;\n  }\n}\n\nvoid roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels,\n                              int max_pts_each_voxel, int out_x, int out_y,\n                              int out_z, const float *rois, const float *pts,\n                              const float *pts_feature, int *argmax,\n                              int *pts_idx_of_voxels, float *pooled_features,\n                              int pool_method) {\n  // params rois: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate\n  // params pts: (npoints, 3) [x, y, z] in LiDAR coordinate\n  // params pts_feature: (npoints, C)\n  // params argmax: (N, out_x, out_y, out_z, C)\n  // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)\n  // params pooled_features: (N, out_x, out_y, out_z, C)\n  // params pool_method: 0: max_pool 1: avg_pool\n\n  int *pts_mask = NULL;\n  cudaMalloc(&pts_mask, boxes_num * pts_num * sizeof(int));  // (N, M)\n  cudaMemset(pts_mask, -1, boxes_num * pts_num * sizeof(int));\n\n  dim3 blocks_mask(DIVUP(pts_num, THREADS_PER_BLOCK), boxes_num);\n  dim3 threads(THREADS_PER_BLOCK);\n  generate_pts_mask_for_box3d<<<blocks_mask, threads>>>(\n      boxes_num, pts_num, out_x, out_y, out_z, rois, pts, pts_mask);\n\n  // TODO: Merge the collect and pool functions, SS\n\n  dim3 blocks_collect(DIVUP(boxes_num, THREADS_PER_BLOCK));\n  collect_inside_pts_for_box3d<<<blocks_collect, threads>>>(\n      boxes_num, pts_num, max_pts_each_voxel, out_x, out_y, out_z, pts_mask,\n      pts_idx_of_voxels);\n\n  dim3 blocks_pool(DIVUP(out_x * out_y * out_z, THREADS_PER_BLOCK), channels,\n                   boxes_num);\n  if (pool_method == 0) {\n    roiaware_maxpool3d<<<blocks_pool, threads>>>(\n        boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z,\n        pts_feature, pts_idx_of_voxels, pooled_features, argmax);\n  } else if (pool_method == 1) {\n    roiaware_avgpool3d<<<blocks_pool, threads>>>(\n        boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z,\n        pts_feature, pts_idx_of_voxels, pooled_features);\n  }\n\n  cudaFree(pts_mask);\n\n#ifdef DEBUG\n  cudaDeviceSynchronize();  // for using printf in kernel function\n#endif\n}\n\n__global__ void roiaware_maxpool3d_backward(int boxes_num, int channels,\n                                            int out_x, int out_y, int out_z,\n                                            const int *argmax,\n                                            const float *grad_out,\n                                            float *grad_in) {\n  // params argmax: (N, out_x, out_y, out_z, C)\n  // params grad_out: (N, out_x, out_y, out_z, C)\n  // params grad_in: (npoints, C), return value\n\n  int box_idx = blockIdx.z;\n  int channel_idx = blockIdx.y;\n  int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x;\n\n  int x_idx = voxel_idx_flat / (out_y * out_z);\n  int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;\n  int z_idx = voxel_idx_flat % out_z;\n  if (box_idx >= boxes_num || channel_idx >= channels || x_idx >= out_x ||\n      y_idx >= out_y || z_idx >= out_z)\n    return;\n\n  int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;\n  argmax += box_idx * out_x * out_y * out_z * channels +\n            offset_base * channels + channel_idx;\n  grad_out += box_idx * out_x * out_y * out_z * channels +\n              offset_base * channels + channel_idx;\n\n  if (argmax[0] == -1) return;\n\n  atomicAdd(grad_in + argmax[0] * channels + channel_idx, grad_out[0] * 1);\n}\n\n__global__ void roiaware_avgpool3d_backward(int boxes_num, int channels,\n                                            int out_x, int out_y, int out_z,\n                                            int max_pts_each_voxel,\n                                            const int *pts_idx_of_voxels,\n                                            const float *grad_out,\n                                            float *grad_in) {\n  // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)\n  // params grad_out: (N, out_x, out_y, out_z, C)\n  // params grad_in: (npoints, C), return value\n\n  int box_idx = blockIdx.z;\n  int channel_idx = blockIdx.y;\n  int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x;\n\n  int x_idx = voxel_idx_flat / (out_y * out_z);\n  int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;\n  int z_idx = voxel_idx_flat % out_z;\n  if (box_idx >= boxes_num || channel_idx >= channels || x_idx >= out_x ||\n      y_idx >= out_y || z_idx >= out_z)\n    return;\n\n  int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;\n  pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel +\n                       offset_base * max_pts_each_voxel;\n  grad_out += box_idx * out_x * out_y * out_z * channels +\n              offset_base * channels + channel_idx;\n\n  int total_pts = pts_idx_of_voxels[0];\n  float cur_grad = 1 / fmaxf(float(total_pts), 1.0);\n  for (int k = 1; k <= total_pts; k++) {\n    atomicAdd(grad_in + pts_idx_of_voxels[k] * channels + channel_idx,\n              grad_out[0] * cur_grad);\n  }\n}\n\nvoid roiaware_pool3d_backward_launcher(int boxes_num, int out_x, int out_y,\n                                       int out_z, int channels,\n                                       int max_pts_each_voxel,\n                                       const int *pts_idx_of_voxels,\n                                       const int *argmax, const float *grad_out,\n                                       float *grad_in, int pool_method) {\n  // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)\n  // params argmax: (N, out_x, out_y, out_z, C)\n  // params grad_out: (N, out_x, out_y, out_z, C)\n  // params grad_in: (npoints, C), return value\n  // params pool_method: 0: max_pool, 1: avg_pool\n\n  dim3 blocks(DIVUP(out_x * out_y * out_z, THREADS_PER_BLOCK), channels,\n              boxes_num);\n  dim3 threads(THREADS_PER_BLOCK);\n  if (pool_method == 0) {\n    roiaware_maxpool3d_backward<<<blocks, threads>>>(\n        boxes_num, channels, out_x, out_y, out_z, argmax, grad_out, grad_in);\n  } else if (pool_method == 1) {\n    roiaware_avgpool3d_backward<<<blocks, threads>>>(\n        boxes_num, channels, out_x, out_y, out_z, max_pts_each_voxel,\n        pts_idx_of_voxels, grad_out, grad_in);\n  }\n}\n"
  },
  {
    "path": "mmdet3d/ops/sparse_block.py",
    "content": "from mmcv.cnn import build_conv_layer, build_norm_layer\nfrom torch import nn\n\nfrom mmdet3d.ops import spconv\nfrom mmdet.models.backbones.resnet import BasicBlock, Bottleneck\n\ndef replace_feature(out, new_features):\n    if 'replace_feature' in out.__dir__():\n        # spconv 2.x behaviour\n        return out.replace_feature(new_features)\n    else:\n        out.features = new_features\n        return out\n\nclass SparseBottleneck(Bottleneck, spconv.SparseModule):\n    \"\"\"Sparse bottleneck block for PartA^2.\n\n    Bottleneck block implemented with submanifold sparse convolution.\n\n    Args:\n        inplanes (int): inplanes of block.\n        planes (int): planes of block.\n        stride (int): stride of the first block. Default: 1\n        downsample (None | Module): down sample module for block.\n        conv_cfg (dict): dictionary to construct and config conv layer.\n            Default: None\n        norm_cfg (dict): dictionary to construct and config norm layer.\n            Default: dict(type='BN')\n    \"\"\"\n\n    expansion = 4\n\n    def __init__(self,\n                 inplanes,\n                 planes,\n                 stride=1,\n                 downsample=None,\n                 conv_cfg=None,\n                 norm_cfg=None):\n\n        spconv.SparseModule.__init__(self)\n        Bottleneck.__init__(\n            self,\n            inplanes,\n            planes,\n            stride=stride,\n            downsample=downsample,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg)\n\n    def forward(self, x):\n        identity = x.features\n\n        out = self.conv1(x)\n        # out.features = self.bn1(out.features)\n        # out.features = self.relu(out.features)\n        out = replace_feature(out, self.bn1(out.features))\n        out = replace_feature(out, self.relu(out.features))\n\n        out = self.conv2(out)\n        # out.features = self.bn2(out.features)\n        # out.features = self.relu(out.features)\n        out = replace_feature(out, self.bn2(out.features))\n        out = replace_feature(out, self.relu(out.features))\n\n        out = self.conv3(out)\n        # out.features = self.bn3(out.features)\n        out = replace_feature(out, self.bn3(out.features))\n\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        # out.features += identity\n        # out.features = self.relu(out.features)\n        out = replace_feature(out, out.features + identity)\n        out = replace_feature(out, self.relu(out.features))\n\n        return out\n\n\nclass SparseBasicBlock(BasicBlock, spconv.SparseModule):\n    \"\"\"Sparse basic block for PartA^2.\n\n    Sparse basic block implemented with submanifold sparse convolution.\n\n    Args:\n        inplanes (int): inplanes of block.\n        planes (int): planes of block.\n        stride (int): stride of the first block. Default: 1\n        downsample (None | Module): down sample module for block.\n        conv_cfg (dict): dictionary to construct and config conv layer.\n            Default: None\n        norm_cfg (dict): dictionary to construct and config norm layer.\n            Default: dict(type='BN')\n    \"\"\"\n\n    expansion = 1\n\n    def __init__(self,\n                 inplanes,\n                 planes,\n                 stride=1,\n                 downsample=None,\n                 conv_cfg=None,\n                 norm_cfg=None):\n        spconv.SparseModule.__init__(self)\n        BasicBlock.__init__(\n            self,\n            inplanes,\n            planes,\n            stride=stride,\n            downsample=downsample,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg)\n\n    def forward(self, x):\n        identity = x.features\n\n        assert x.features.dim() == 2, f'x.features.dim()={x.features.dim()}'\n\n        out = self.conv1(x)\n        # out.features = self.norm1(out.features)\n        # out.features = self.relu(out.features)\n        out = replace_feature(out, self.norm1(out.features))\n        out = replace_feature(out, self.relu(out.features))\n\n        out = self.conv2(out)\n        # out.features = self.norm2(out.features)\n        out = replace_feature(out, self.norm2(out.features))\n\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        # out.features += identity\n        # out.features = self.relu(out.features)\n        out = replace_feature(out, out.features + identity)\n        out = replace_feature(out, self.relu(out.features))\n\n        return out\n\n\ndef make_sparse_convmodule(in_channels,\n                           out_channels,\n                           kernel_size,\n                           indice_key,\n                           stride=1,\n                           padding=0,\n                           conv_type='SubMConv3d',\n                           norm_cfg=None,\n                           order=('conv', 'norm', 'act')):\n    \"\"\"Make sparse convolution module.\n\n    Args:\n        in_channels (int): the number of input channels\n        out_channels (int): the number of out channels\n        kernel_size (int|tuple(int)): kernel size of convolution\n        indice_key (str): the indice key used for sparse tensor\n        stride (int|tuple(int)): the stride of convolution\n        padding (int or list[int]): the padding number of input\n        conv_type (str): sparse conv type in spconv\n        norm_cfg (dict[str]): config of normalization layer\n        order (tuple[str]): The order of conv/norm/activation layers. It is a\n            sequence of \"conv\", \"norm\" and \"act\". Common examples are\n            (\"conv\", \"norm\", \"act\") and (\"act\", \"conv\", \"norm\").\n\n    Returns:\n        spconv.SparseSequential: sparse convolution module.\n    \"\"\"\n    assert isinstance(order, tuple) and len(order) <= 3\n    assert set(order) | {'conv', 'norm', 'act'} == {'conv', 'norm', 'act'}\n\n    conv_cfg = dict(type=conv_type, indice_key=indice_key)\n\n    layers = list()\n    for layer in order:\n        if layer == 'conv':\n            if conv_type not in [\n                    'SparseInverseConv3d', 'SparseInverseConv2d',\n                    'SparseInverseConv1d'\n            ]:\n                layers.append(\n                    build_conv_layer(\n                        conv_cfg,\n                        in_channels,\n                        out_channels,\n                        kernel_size,\n                        stride=stride,\n                        padding=padding,\n                        bias=False))\n            else:\n                layers.append(\n                    build_conv_layer(\n                        conv_cfg,\n                        in_channels,\n                        out_channels,\n                        kernel_size,\n                        bias=False))\n        elif layer == 'norm':\n            layers.append(build_norm_layer(norm_cfg, out_channels)[1])\n        elif layer == 'act':\n            layers.append(nn.ReLU(inplace=True))\n\n    layers = spconv.SparseSequential(*layers)\n    return layers\n"
  },
  {
    "path": "mmdet3d/ops/spconv/__init__.py",
    "content": "# Copyright 2019 Yan Yan\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\n# from .conv import (SparseConv2d, SparseConv3d, SparseConvTranspose2d,\n#                    SparseConvTranspose3d, SparseInverseConv2d,\n#                    SparseInverseConv3d, SubMConv2d, SubMConv3d)\n# from .modules import SparseModule, SparseSequential\n# from .pool import SparseMaxPool2d, SparseMaxPool3d\n# from .structure import SparseConvTensor, scatter_nd\nfrom spconv.pytorch.conv import (SparseConv2d, SparseConv3d, SparseConvTranspose2d,\n                   SparseConvTranspose3d, SparseInverseConv2d,\n                   SparseInverseConv3d, SubMConv2d, SubMConv3d)\nfrom spconv.pytorch.modules import SparseModule, SparseSequential\nfrom spconv.pytorch.pool import SparseMaxPool2d, SparseMaxPool3d\nfrom spconv.pytorch.core import SparseConvTensor, scatter_nd\nfrom .overwrite_spconv.write_spconv2 import register_spconv2\n\nregister_spconv2()\n\n__all__ = [\n    'SparseConv2d',\n    'SparseConv3d',\n    'SubMConv2d',\n    'SubMConv3d',\n    'SparseConvTranspose2d',\n    'SparseConvTranspose3d',\n    'SparseInverseConv2d',\n    'SparseInverseConv3d',\n    'SparseModule',\n    'SparseSequential',\n    'SparseMaxPool2d',\n    'SparseMaxPool3d',\n    'SparseConvTensor',\n    'scatter_nd',\n]\n"
  },
  {
    "path": "mmdet3d/ops/spconv/conv.py",
    "content": "# Copyright 2019 Yan Yan\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nimport math\nimport numpy as np\nimport torch\nfrom mmcv.cnn import CONV_LAYERS\nfrom torch.nn import init\nfrom torch.nn.parameter import Parameter\n\nfrom . import functional as Fsp\nfrom . import ops\nfrom .modules import SparseModule\nfrom .structure import SparseConvTensor\n\n\ndef _calculate_fan_in_and_fan_out_hwio(tensor):\n    dimensions = tensor.ndimension()\n    if dimensions < 2:\n        raise ValueError('fan in and fan out can not be computed for tensor'\n                         'with fewer than 2 dimensions')\n\n    if dimensions == 2:  # Linear\n        fan_in = tensor.size(-2)\n        fan_out = tensor.size(-1)\n    else:\n        num_input_fmaps = tensor.size(-2)\n        num_output_fmaps = tensor.size(-1)\n        receptive_field_size = 1\n        if tensor.dim() > 2:\n            receptive_field_size = tensor[..., 0, 0].numel()\n        fan_in = num_input_fmaps * receptive_field_size\n        fan_out = num_output_fmaps * receptive_field_size\n\n    return fan_in, fan_out\n\n\nclass SparseConvolution(SparseModule):\n\n    def __init__(self,\n                 ndim,\n                 in_channels,\n                 out_channels,\n                 kernel_size=3,\n                 stride=1,\n                 padding=0,\n                 dilation=1,\n                 groups=1,\n                 bias=True,\n                 subm=False,\n                 output_padding=0,\n                 transposed=False,\n                 inverse=False,\n                 indice_key=None,\n                 fused_bn=False):\n        super(SparseConvolution, self).__init__()\n        assert groups == 1\n        if not isinstance(kernel_size, (list, tuple)):\n            kernel_size = [kernel_size] * ndim\n        if not isinstance(stride, (list, tuple)):\n            stride = [stride] * ndim\n        if not isinstance(padding, (list, tuple)):\n            padding = [padding] * ndim\n        if not isinstance(dilation, (list, tuple)):\n            dilation = [dilation] * ndim\n        if not isinstance(output_padding, (list, tuple)):\n            output_padding = [output_padding] * ndim\n\n        for d, s in zip(dilation, stride):\n            assert any([s == 1, d == 1]), \"don't support this.\"\n\n        self.ndim = ndim\n        self.in_channels = in_channels\n        self.out_channels = out_channels\n        self.kernel_size = kernel_size\n        self.conv1x1 = np.prod(kernel_size) == 1\n        self.stride = stride\n        self.padding = padding\n        self.dilation = dilation\n        self.transposed = transposed\n        self.inverse = inverse\n        self.output_padding = output_padding\n        self.groups = groups\n        self.subm = subm\n        self.indice_key = indice_key\n        self.fused_bn = fused_bn\n\n        self.weight = Parameter(\n            torch.Tensor(*kernel_size, in_channels, out_channels))\n        if bias:\n            self.bias = Parameter(torch.Tensor(out_channels))\n        else:\n            self.register_parameter('bias', None)\n        self.reset_parameters()\n\n    def reset_parameters(self):\n        init.kaiming_uniform_(self.weight, a=math.sqrt(5))\n        if self.bias is not None:\n            fan_in, _ = _calculate_fan_in_and_fan_out_hwio(self.weight)\n            bound = 1 / math.sqrt(fan_in)\n            init.uniform_(self.bias, -bound, bound)\n\n    def forward(self, input):\n        assert isinstance(input, SparseConvTensor)\n        features = input.features\n        device = features.device\n        indices = input.indices\n        spatial_shape = input.spatial_shape\n        batch_size = input.batch_size\n        if not self.subm:\n            if self.transposed:\n                out_spatial_shape = ops.get_deconv_output_size(\n                    spatial_shape, self.kernel_size, self.stride, self.padding,\n                    self.dilation, self.output_padding)\n            else:\n                out_spatial_shape = ops.get_conv_output_size(\n                    spatial_shape, self.kernel_size, self.stride, self.padding,\n                    self.dilation)\n\n        else:\n            out_spatial_shape = spatial_shape\n        # input.update_grid(out_spatial_shape)\n        # t = time.time()\n        if self.conv1x1:\n            features = torch.mm(\n                input.features,\n                self.weight.view(self.in_channels, self.out_channels))\n            if self.bias is not None:\n                features += self.bias\n            out_tensor = SparseConvTensor(features, input.indices,\n                                          input.spatial_shape,\n                                          input.batch_size)\n            out_tensor.indice_dict = input.indice_dict\n            out_tensor.grid = input.grid\n            return out_tensor\n        datas = input.find_indice_pair(self.indice_key)\n        if self.inverse:\n            assert datas is not None and self.indice_key is not None\n            _, outids, indice_pairs, indice_pair_num, out_spatial_shape = datas\n            assert indice_pairs.shape[0] == np.prod(\n                self.kernel_size\n            ), 'inverse conv must have same kernel size as its couple conv'\n        else:\n            if self.indice_key is not None and datas is not None:\n                outids, _, indice_pairs, indice_pair_num, _ = datas\n            else:\n                outids, indice_pairs, indice_pair_num = ops.get_indice_pairs(\n                    indices,\n                    batch_size,\n                    spatial_shape,\n                    self.kernel_size,\n                    self.stride,\n                    self.padding,\n                    self.dilation,\n                    self.output_padding,\n                    self.subm,\n                    self.transposed,\n                    grid=input.grid)\n                input.indice_dict[self.indice_key] = (outids, indices,\n                                                      indice_pairs,\n                                                      indice_pair_num,\n                                                      spatial_shape)\n        if self.fused_bn:\n            assert self.bias is not None\n            out_features = ops.fused_indice_conv(features, self.weight,\n                                                 self.bias,\n                                                 indice_pairs.to(device),\n                                                 indice_pair_num,\n                                                 outids.shape[0], self.inverse,\n                                                 self.subm)\n        else:\n            if self.subm:\n                out_features = Fsp.indice_subm_conv(features, self.weight,\n                                                    indice_pairs.to(device),\n                                                    indice_pair_num,\n                                                    outids.shape[0])\n            else:\n                if self.inverse:\n                    out_features = Fsp.indice_inverse_conv(\n                        features, self.weight, indice_pairs.to(device),\n                        indice_pair_num, outids.shape[0])\n                else:\n                    out_features = Fsp.indice_conv(features, self.weight,\n                                                   indice_pairs.to(device),\n                                                   indice_pair_num,\n                                                   outids.shape[0])\n\n            if self.bias is not None:\n                out_features += self.bias\n        out_tensor = SparseConvTensor(out_features, outids, out_spatial_shape,\n                                      batch_size)\n        out_tensor.indice_dict = input.indice_dict\n        out_tensor.grid = input.grid\n        return out_tensor\n\n\n@CONV_LAYERS.register_module()\nclass SparseConv2d(SparseConvolution):\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 kernel_size,\n                 stride=1,\n                 padding=0,\n                 dilation=1,\n                 groups=1,\n                 bias=True,\n                 indice_key=None):\n        super(SparseConv2d, self).__init__(\n            2,\n            in_channels,\n            out_channels,\n            kernel_size,\n            stride,\n            padding,\n            dilation,\n            groups,\n            bias,\n            indice_key=indice_key)\n\n\n@CONV_LAYERS.register_module()\nclass SparseConv3d(SparseConvolution):\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 kernel_size,\n                 stride=1,\n                 padding=0,\n                 dilation=1,\n                 groups=1,\n                 bias=True,\n                 indice_key=None):\n        super(SparseConv3d, self).__init__(\n            3,\n            in_channels,\n            out_channels,\n            kernel_size,\n            stride,\n            padding,\n            dilation,\n            groups,\n            bias,\n            indice_key=indice_key)\n\n\n@CONV_LAYERS.register_module()\nclass SparseConv4d(SparseConvolution):\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 kernel_size,\n                 stride=1,\n                 padding=0,\n                 dilation=1,\n                 groups=1,\n                 bias=True,\n                 indice_key=None):\n        super(SparseConv4d, self).__init__(\n            4,\n            in_channels,\n            out_channels,\n            kernel_size,\n            stride,\n            padding,\n            dilation,\n            groups,\n            bias,\n            indice_key=indice_key)\n\n\n@CONV_LAYERS.register_module()\nclass SparseConvTranspose2d(SparseConvolution):\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 kernel_size,\n                 stride=1,\n                 padding=0,\n                 dilation=1,\n                 groups=1,\n                 bias=True,\n                 indice_key=None):\n        super(SparseConvTranspose2d, self).__init__(\n            2,\n            in_channels,\n            out_channels,\n            kernel_size,\n            stride,\n            padding,\n            dilation,\n            groups,\n            bias,\n            transposed=True,\n            indice_key=indice_key)\n\n\n@CONV_LAYERS.register_module()\nclass SparseConvTranspose3d(SparseConvolution):\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 kernel_size,\n                 stride=1,\n                 padding=0,\n                 dilation=1,\n                 groups=1,\n                 bias=True,\n                 indice_key=None):\n        super(SparseConvTranspose3d, self).__init__(\n            3,\n            in_channels,\n            out_channels,\n            kernel_size,\n            stride,\n            padding,\n            dilation,\n            groups,\n            bias,\n            transposed=True,\n            indice_key=indice_key)\n\n\n@CONV_LAYERS.register_module()\nclass SparseInverseConv2d(SparseConvolution):\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 kernel_size,\n                 indice_key,\n                 bias=True):\n        super(SparseInverseConv2d, self).__init__(\n            2,\n            in_channels,\n            out_channels,\n            kernel_size,\n            bias=bias,\n            inverse=True,\n            indice_key=indice_key)\n\n\n@CONV_LAYERS.register_module()\nclass SparseInverseConv3d(SparseConvolution):\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 kernel_size,\n                 indice_key,\n                 bias=True):\n        super(SparseInverseConv3d, self).__init__(\n            3,\n            in_channels,\n            out_channels,\n            kernel_size,\n            bias=bias,\n            inverse=True,\n            indice_key=indice_key)\n\n\n@CONV_LAYERS.register_module()\nclass SubMConv2d(SparseConvolution):\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 kernel_size,\n                 stride=1,\n                 padding=0,\n                 dilation=1,\n                 groups=1,\n                 bias=True,\n                 indice_key=None):\n        super(SubMConv2d, self).__init__(\n            2,\n            in_channels,\n            out_channels,\n            kernel_size,\n            stride,\n            padding,\n            dilation,\n            groups,\n            bias,\n            True,\n            indice_key=indice_key)\n\n\n@CONV_LAYERS.register_module()\nclass SubMConv3d(SparseConvolution):\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 kernel_size,\n                 stride=1,\n                 padding=0,\n                 dilation=1,\n                 groups=1,\n                 bias=True,\n                 indice_key=None):\n        super(SubMConv3d, self).__init__(\n            3,\n            in_channels,\n            out_channels,\n            kernel_size,\n            stride,\n            padding,\n            dilation,\n            groups,\n            bias,\n            True,\n            indice_key=indice_key)\n\n\n@CONV_LAYERS.register_module()\nclass SubMConv4d(SparseConvolution):\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 kernel_size,\n                 stride=1,\n                 padding=0,\n                 dilation=1,\n                 groups=1,\n                 bias=True,\n                 indice_key=None):\n        super(SubMConv4d, self).__init__(\n            4,\n            in_channels,\n            out_channels,\n            kernel_size,\n            stride,\n            padding,\n            dilation,\n            groups,\n            bias,\n            True,\n            indice_key=indice_key)\n"
  },
  {
    "path": "mmdet3d/ops/spconv/functional.py",
    "content": "# Copyright 2019 Yan Yan\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nfrom torch.autograd import Function\n\nfrom . import ops as ops\n\n\nclass SparseConvFunction(Function):\n\n    @staticmethod\n    def forward(ctx, features, filters, indice_pairs, indice_pair_num,\n                num_activate_out):\n        ctx.save_for_backward(indice_pairs, indice_pair_num, features, filters)\n        return ops.indice_conv(features, filters, indice_pairs,\n                               indice_pair_num, num_activate_out, False)\n\n    @staticmethod\n    def backward(ctx, grad_output):\n        indice_pairs, indice_pair_num, features, filters = ctx.saved_tensors\n        input_bp, filters_bp = ops.indice_conv_backward(\n            features, filters, grad_output, indice_pairs, indice_pair_num,\n            False)\n\n        return input_bp, filters_bp, None, None, None\n\n\nclass SparseInverseConvFunction(Function):\n\n    @staticmethod\n    def forward(ctx, features, filters, indice_pairs, indice_pair_num,\n                num_activate_out):\n        ctx.save_for_backward(indice_pairs, indice_pair_num, features, filters)\n        return ops.indice_conv(features, filters, indice_pairs,\n                               indice_pair_num, num_activate_out, True, False)\n\n    @staticmethod\n    def backward(ctx, grad_output):\n        indice_pairs, indice_pair_num, features, filters = ctx.saved_tensors\n        input_bp, filters_bp = ops.indice_conv_backward(\n            features, filters, grad_output, indice_pairs, indice_pair_num,\n            True, False)\n\n        return input_bp, filters_bp, None, None, None\n\n\nclass SubMConvFunction(Function):\n\n    @staticmethod\n    def forward(ctx, features, filters, indice_pairs, indice_pair_num,\n                num_activate_out):\n        ctx.save_for_backward(indice_pairs, indice_pair_num, features, filters)\n        return ops.indice_conv(features, filters, indice_pairs,\n                               indice_pair_num, num_activate_out, False, True)\n\n    @staticmethod\n    def backward(ctx, grad_output):\n        indice_pairs, indice_pair_num, features, filters = ctx.saved_tensors\n        input_bp, filters_bp = ops.indice_conv_backward(\n            features, filters, grad_output, indice_pairs, indice_pair_num,\n            False, True)\n\n        return input_bp, filters_bp, None, None, None\n\n\nclass SparseMaxPoolFunction(Function):\n\n    @staticmethod\n    def forward(ctx, features, indice_pairs, indice_pair_num,\n                num_activate_out):\n        out = ops.indice_maxpool(features, indice_pairs, indice_pair_num,\n                                 num_activate_out)\n        ctx.save_for_backward(indice_pairs, indice_pair_num, features, out)\n        return out\n\n    @staticmethod\n    def backward(ctx, grad_output):\n        indice_pairs, indice_pair_num, features, out = ctx.saved_tensors\n        input_bp = ops.indice_maxpool_backward(features, out, grad_output,\n                                               indice_pairs, indice_pair_num)\n        return input_bp, None, None, None\n\n\nindice_conv = SparseConvFunction.apply\nindice_inverse_conv = SparseInverseConvFunction.apply\nindice_subm_conv = SubMConvFunction.apply\nindice_maxpool = SparseMaxPoolFunction.apply\n"
  },
  {
    "path": "mmdet3d/ops/spconv/include/paramsgrid.h",
    "content": "// Copyright 2019 Yan Yan\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#ifndef PARAMS_GRID_H_\n#define PARAMS_GRID_H_\n#include <tuple>\n#include <vector>\n\nnamespace detail {\ntemplate <class T>\nint getTotalSize(std::vector<T> arg) {\n  return arg.size();\n}\n\ntemplate <class T, class... TArgs>\nint getTotalSize(std::vector<T> arg, std::vector<TArgs>... args) {\n  return arg.size() * getTotalSize(args...);\n}\ntemplate <typename T>\nint getSize(std::vector<T> arg) {\n  return arg.size();\n}\n\ntemplate <int Idx, class TT, class T>\nvoid assigner(TT &src, std::vector<int> counter, std::vector<T> &arg) {\n  std::get<Idx>(src) = arg[counter[Idx]];\n}\n\ntemplate <int Idx, class TT, class T, class... TArgs>\nvoid assigner(TT &src, std::vector<int> counter, std::vector<T> &arg,\n              std::vector<TArgs> &... args) {\n  std::get<Idx>(src) = arg[counter[Idx]];\n  assigner<Idx + 1>(src, counter, args...);\n}\n}  // namespace detail\ntemplate <class... TArgs>\nstd::vector<std::tuple<TArgs...>> paramsGrid(std::vector<TArgs>... args) {\n  int length = detail::getTotalSize(args...);\n  std::vector<int> sizes = {detail::getSize(args)...};\n  int size = sizes.size();\n\n  std::vector<std::tuple<TArgs...>> params(length);\n  std::vector<int> counter(size);\n  for (int i = 0; i < length; ++i) {\n    detail::assigner<0>(params[i], counter, args...);\n    counter[size - 1] += 1;\n    for (int c = size - 1; c >= 0; --c) {\n      if (counter[c] == sizes[c] && c > 0) {\n        counter[c - 1] += 1;\n        counter[c] = 0;\n      }\n    }\n  }\n  return params;\n}\n\n#endif\n"
  },
  {
    "path": "mmdet3d/ops/spconv/include/prettyprint.h",
    "content": "//          Copyright Louis Delacroix 2010 - 2014.\n// Distributed under the Boost Software License, Version 1.0.\n//    (See accompanying file LICENSE_1_0.txt or copy at\n//          http://www.boost.org/LICENSE_1_0.txt)\n//\n// A pretty printing library for C++\n//\n// Usage:\n// Include this header, and operator<< will \"just work\".\n\n#ifndef H_PRETTY_PRINT\n#define H_PRETTY_PRINT\n\n#include <cstddef>\n#include <iterator>\n#include <memory>\n#include <ostream>\n#include <set>\n#include <tuple>\n#include <type_traits>\n#include <unordered_set>\n#include <utility>\n#include <valarray>\n\nnamespace pretty_print {\nnamespace detail {\n// SFINAE type trait to detect whether T::const_iterator exists.\n\nstruct sfinae_base {\n  using yes = char;\n  using no = yes[2];\n};\n\ntemplate <typename T>\nstruct has_const_iterator : private sfinae_base {\n private:\n  template <typename C>\n  static yes &test(typename C::const_iterator *);\n  template <typename C>\n  static no &test(...);\n\n public:\n  static const bool value = sizeof(test<T>(nullptr)) == sizeof(yes);\n  using type = T;\n};\n\ntemplate <typename T>\nstruct has_begin_end : private sfinae_base {\n private:\n  template <typename C>\n  static yes &\n  f(typename std::enable_if<\n      std::is_same<decltype(static_cast<typename C::const_iterator (C::*)()\n                                            const>(&C::begin)),\n                   typename C::const_iterator (C::*)() const>::value>::type *);\n\n  template <typename C>\n  static no &f(...);\n\n  template <typename C>\n  static yes &g(typename std::enable_if<\n                std::is_same<decltype(static_cast<typename C::const_iterator (\n                                          C::*)() const>(&C::end)),\n                             typename C::const_iterator (C::*)() const>::value,\n                void>::type *);\n\n  template <typename C>\n  static no &g(...);\n\n public:\n  static bool const beg_value = sizeof(f<T>(nullptr)) == sizeof(yes);\n  static bool const end_value = sizeof(g<T>(nullptr)) == sizeof(yes);\n};\n\n}  // namespace detail\n\n// Holds the delimiter values for a specific character type\n\ntemplate <typename TChar>\nstruct delimiters_values {\n  using char_type = TChar;\n  const char_type *prefix;\n  const char_type *delimiter;\n  const char_type *postfix;\n};\n\n// Defines the delimiter values for a specific container and character type\n\ntemplate <typename T, typename TChar>\nstruct delimiters {\n  using type = delimiters_values<TChar>;\n  static const type values;\n};\n\n// Functor to print containers. You can use this directly if you want\n// to specificy a non-default delimiters type. The printing logic can\n// be customized by specializing the nested template.\n\ntemplate <typename T, typename TChar = char,\n          typename TCharTraits = ::std::char_traits<TChar>,\n          typename TDelimiters = delimiters<T, TChar>>\nstruct print_container_helper {\n  using delimiters_type = TDelimiters;\n  using ostream_type = std::basic_ostream<TChar, TCharTraits>;\n\n  template <typename U>\n  struct printer {\n    static void print_body(const U &c, ostream_type &stream) {\n      using std::begin;\n      using std::end;\n\n      auto it = begin(c);\n      const auto the_end = end(c);\n\n      if (it != the_end) {\n        for (;;) {\n          stream << *it;\n\n          if (++it == the_end) break;\n\n          if (delimiters_type::values.delimiter != NULL)\n            stream << delimiters_type::values.delimiter;\n        }\n      }\n    }\n  };\n\n  print_container_helper(const T &container) : container_(container) {}\n\n  inline void operator()(ostream_type &stream) const {\n    if (delimiters_type::values.prefix != NULL)\n      stream << delimiters_type::values.prefix;\n\n    printer<T>::print_body(container_, stream);\n\n    if (delimiters_type::values.postfix != NULL)\n      stream << delimiters_type::values.postfix;\n  }\n\n private:\n  const T &container_;\n};\n\n// Specialization for pairs\n\ntemplate <typename T, typename TChar, typename TCharTraits,\n          typename TDelimiters>\ntemplate <typename T1, typename T2>\nstruct print_container_helper<T, TChar, TCharTraits,\n                              TDelimiters>::printer<std::pair<T1, T2>> {\n  using ostream_type =\n      typename print_container_helper<T, TChar, TCharTraits,\n                                      TDelimiters>::ostream_type;\n\n  static void print_body(const std::pair<T1, T2> &c, ostream_type &stream) {\n    stream << c.first;\n    if (print_container_helper<T, TChar, TCharTraits,\n                               TDelimiters>::delimiters_type::values\n            .delimiter != NULL)\n      stream << print_container_helper<T, TChar, TCharTraits,\n                                       TDelimiters>::delimiters_type::values\n                    .delimiter;\n    stream << c.second;\n  }\n};\n\n// Specialization for tuples\n\ntemplate <typename T, typename TChar, typename TCharTraits,\n          typename TDelimiters>\ntemplate <typename... Args>\nstruct print_container_helper<T, TChar, TCharTraits,\n                              TDelimiters>::printer<std::tuple<Args...>> {\n  using ostream_type =\n      typename print_container_helper<T, TChar, TCharTraits,\n                                      TDelimiters>::ostream_type;\n  using element_type = std::tuple<Args...>;\n\n  template <std::size_t I>\n  struct Int {};\n\n  static void print_body(const element_type &c, ostream_type &stream) {\n    tuple_print(c, stream, Int<0>());\n  }\n\n  static void tuple_print(const element_type &, ostream_type &,\n                          Int<sizeof...(Args)>) {}\n\n  static void tuple_print(\n      const element_type &c, ostream_type &stream,\n      typename std::conditional<sizeof...(Args) != 0, Int<0>,\n                                std::nullptr_t>::type) {\n    stream << std::get<0>(c);\n    tuple_print(c, stream, Int<1>());\n  }\n\n  template <std::size_t N>\n  static void tuple_print(const element_type &c, ostream_type &stream, Int<N>) {\n    if (print_container_helper<T, TChar, TCharTraits,\n                               TDelimiters>::delimiters_type::values\n            .delimiter != NULL)\n      stream << print_container_helper<T, TChar, TCharTraits,\n                                       TDelimiters>::delimiters_type::values\n                    .delimiter;\n\n    stream << std::get<N>(c);\n\n    tuple_print(c, stream, Int<N + 1>());\n  }\n};\n\n// Prints a print_container_helper to the specified stream.\n\ntemplate <typename T, typename TChar, typename TCharTraits,\n          typename TDelimiters>\ninline std::basic_ostream<TChar, TCharTraits> &operator<<(\n    std::basic_ostream<TChar, TCharTraits> &stream,\n    const print_container_helper<T, TChar, TCharTraits, TDelimiters> &helper) {\n  helper(stream);\n  return stream;\n}\n\n// Basic is_container template; specialize to derive from std::true_type for all\n// desired container types\n\ntemplate <typename T>\nstruct is_container\n    : public std::integral_constant<bool,\n                                    detail::has_const_iterator<T>::value &&\n                                        detail::has_begin_end<T>::beg_value &&\n                                        detail::has_begin_end<T>::end_value> {};\n\ntemplate <typename T, std::size_t N>\nstruct is_container<T[N]> : std::true_type {};\n\ntemplate <std::size_t N>\nstruct is_container<char[N]> : std::false_type {};\n\ntemplate <typename T>\nstruct is_container<std::valarray<T>> : std::true_type {};\n\ntemplate <typename T1, typename T2>\nstruct is_container<std::pair<T1, T2>> : std::true_type {};\n\ntemplate <typename... Args>\nstruct is_container<std::tuple<Args...>> : std::true_type {};\n\n// Default delimiters\n\ntemplate <typename T>\nstruct delimiters<T, char> {\n  static const delimiters_values<char> values;\n};\ntemplate <typename T>\nconst delimiters_values<char> delimiters<T, char>::values = {\"[\", \", \", \"]\"};\ntemplate <typename T>\nstruct delimiters<T, wchar_t> {\n  static const delimiters_values<wchar_t> values;\n};\ntemplate <typename T>\nconst delimiters_values<wchar_t> delimiters<T, wchar_t>::values = {L\"[\", L\", \",\n                                                                   L\"]\"};\n\n// Delimiters for (multi)set and unordered_(multi)set\n\ntemplate <typename T, typename TComp, typename TAllocator>\nstruct delimiters<::std::set<T, TComp, TAllocator>, char> {\n  static const delimiters_values<char> values;\n};\n\ntemplate <typename T, typename TComp, typename TAllocator>\nconst delimiters_values<char>\n    delimiters<::std::set<T, TComp, TAllocator>, char>::values = {\"{\", \", \",\n                                                                  \"}\"};\n\ntemplate <typename T, typename TComp, typename TAllocator>\nstruct delimiters<::std::set<T, TComp, TAllocator>, wchar_t> {\n  static const delimiters_values<wchar_t> values;\n};\n\ntemplate <typename T, typename TComp, typename TAllocator>\nconst delimiters_values<wchar_t>\n    delimiters<::std::set<T, TComp, TAllocator>, wchar_t>::values = {\n        L\"{\", L\", \", L\"}\"};\n\ntemplate <typename T, typename TComp, typename TAllocator>\nstruct delimiters<::std::multiset<T, TComp, TAllocator>, char> {\n  static const delimiters_values<char> values;\n};\n\ntemplate <typename T, typename TComp, typename TAllocator>\nconst delimiters_values<char>\n    delimiters<::std::multiset<T, TComp, TAllocator>, char>::values = {\n        \"{\", \", \", \"}\"};\n\ntemplate <typename T, typename TComp, typename TAllocator>\nstruct delimiters<::std::multiset<T, TComp, TAllocator>, wchar_t> {\n  static const delimiters_values<wchar_t> values;\n};\n\ntemplate <typename T, typename TComp, typename TAllocator>\nconst delimiters_values<wchar_t>\n    delimiters<::std::multiset<T, TComp, TAllocator>, wchar_t>::values = {\n        L\"{\", L\", \", L\"}\"};\n\ntemplate <typename T, typename THash, typename TEqual, typename TAllocator>\nstruct delimiters<::std::unordered_set<T, THash, TEqual, TAllocator>, char> {\n  static const delimiters_values<char> values;\n};\n\ntemplate <typename T, typename THash, typename TEqual, typename TAllocator>\nconst delimiters_values<char> delimiters<\n    ::std::unordered_set<T, THash, TEqual, TAllocator>, char>::values = {\n    \"{\", \", \", \"}\"};\n\ntemplate <typename T, typename THash, typename TEqual, typename TAllocator>\nstruct delimiters<::std::unordered_set<T, THash, TEqual, TAllocator>, wchar_t> {\n  static const delimiters_values<wchar_t> values;\n};\n\ntemplate <typename T, typename THash, typename TEqual, typename TAllocator>\nconst delimiters_values<wchar_t> delimiters<\n    ::std::unordered_set<T, THash, TEqual, TAllocator>, wchar_t>::values = {\n    L\"{\", L\", \", L\"}\"};\n\ntemplate <typename T, typename THash, typename TEqual, typename TAllocator>\nstruct delimiters<::std::unordered_multiset<T, THash, TEqual, TAllocator>,\n                  char> {\n  static const delimiters_values<char> values;\n};\n\ntemplate <typename T, typename THash, typename TEqual, typename TAllocator>\nconst delimiters_values<char> delimiters<\n    ::std::unordered_multiset<T, THash, TEqual, TAllocator>, char>::values = {\n    \"{\", \", \", \"}\"};\n\ntemplate <typename T, typename THash, typename TEqual, typename TAllocator>\nstruct delimiters<::std::unordered_multiset<T, THash, TEqual, TAllocator>,\n                  wchar_t> {\n  static const delimiters_values<wchar_t> values;\n};\n\ntemplate <typename T, typename THash, typename TEqual, typename TAllocator>\nconst delimiters_values<wchar_t>\n    delimiters<::std::unordered_multiset<T, THash, TEqual, TAllocator>,\n               wchar_t>::values = {L\"{\", L\", \", L\"}\"};\n\n// Delimiters for pair and tuple\n\ntemplate <typename T1, typename T2>\nstruct delimiters<std::pair<T1, T2>, char> {\n  static const delimiters_values<char> values;\n};\ntemplate <typename T1, typename T2>\nconst delimiters_values<char> delimiters<std::pair<T1, T2>, char>::values = {\n    \"(\", \", \", \")\"};\ntemplate <typename T1, typename T2>\nstruct delimiters<::std::pair<T1, T2>, wchar_t> {\n  static const delimiters_values<wchar_t> values;\n};\ntemplate <typename T1, typename T2>\nconst delimiters_values<wchar_t>\n    delimiters<::std::pair<T1, T2>, wchar_t>::values = {L\"(\", L\", \", L\")\"};\n\ntemplate <typename... Args>\nstruct delimiters<std::tuple<Args...>, char> {\n  static const delimiters_values<char> values;\n};\ntemplate <typename... Args>\nconst delimiters_values<char> delimiters<std::tuple<Args...>, char>::values = {\n    \"(\", \", \", \")\"};\ntemplate <typename... Args>\nstruct delimiters<::std::tuple<Args...>, wchar_t> {\n  static const delimiters_values<wchar_t> values;\n};\ntemplate <typename... Args>\nconst delimiters_values<wchar_t>\n    delimiters<::std::tuple<Args...>, wchar_t>::values = {L\"(\", L\", \", L\")\"};\n\n// Type-erasing helper class for easy use of custom delimiters.\n// Requires TCharTraits = std::char_traits<TChar> and TChar = char or wchar_t,\n// and MyDelims needs to be defined for TChar. Usage: \"cout <<\n// pretty_print::custom_delims<MyDelims>(x)\".\n\nstruct custom_delims_base {\n  virtual ~custom_delims_base() {}\n  virtual std::ostream &stream(::std::ostream &) = 0;\n  virtual std::wostream &stream(::std::wostream &) = 0;\n};\n\ntemplate <typename T, typename Delims>\nstruct custom_delims_wrapper : custom_delims_base {\n  custom_delims_wrapper(const T &t_) : t(t_) {}\n\n  std::ostream &stream(std::ostream &s) {\n    return s << print_container_helper<T, char, std::char_traits<char>, Delims>(\n               t);\n  }\n\n  std::wostream &stream(std::wostream &s) {\n    return s << print_container_helper<T, wchar_t, std::char_traits<wchar_t>,\n                                       Delims>(t);\n  }\n\n private:\n  const T &t;\n};\n\ntemplate <typename Delims>\nstruct custom_delims {\n  template <typename Container>\n  custom_delims(const Container &c)\n      : base(new custom_delims_wrapper<Container, Delims>(c)) {}\n\n  std::unique_ptr<custom_delims_base> base;\n};\n\ntemplate <typename TChar, typename TCharTraits, typename Delims>\ninline std::basic_ostream<TChar, TCharTraits> &operator<<(\n    std::basic_ostream<TChar, TCharTraits> &s, const custom_delims<Delims> &p) {\n  return p.base->stream(s);\n}\n\n// A wrapper for a C-style array given as pointer-plus-size.\n// Usage: std::cout << pretty_print_array(arr, n) << std::endl;\n\ntemplate <typename T>\nstruct array_wrapper_n {\n  typedef const T *const_iterator;\n  typedef T value_type;\n\n  array_wrapper_n(const T *const a, size_t n) : _array(a), _n(n) {}\n  inline const_iterator begin() const { return _array; }\n  inline const_iterator end() const { return _array + _n; }\n\n private:\n  const T *const _array;\n  size_t _n;\n};\n\n// A wrapper for hash-table based containers that offer local iterators to each\n// bucket. Usage: std::cout << bucket_print(m, 4) << std::endl;  (Prints bucket\n// 5 of container m.)\n\ntemplate <typename T>\nstruct bucket_print_wrapper {\n  typedef typename T::const_local_iterator const_iterator;\n  typedef typename T::size_type size_type;\n\n  const_iterator begin() const { return m_map.cbegin(n); }\n\n  const_iterator end() const { return m_map.cend(n); }\n\n  bucket_print_wrapper(const T &m, size_type bucket) : m_map(m), n(bucket) {}\n\n private:\n  const T &m_map;\n  const size_type n;\n};\n\n}  // namespace pretty_print\n\n// Global accessor functions for the convenience wrappers\n\ntemplate <typename T>\ninline pretty_print::array_wrapper_n<T> pretty_print_array(const T *const a,\n                                                           size_t n) {\n  return pretty_print::array_wrapper_n<T>(a, n);\n}\n\ntemplate <typename T>\npretty_print::bucket_print_wrapper<T> bucket_print(const T &m,\n                                                   typename T::size_type n) {\n  return pretty_print::bucket_print_wrapper<T>(m, n);\n}\n\n// Main magic entry point: An overload snuck into namespace std.\n// Can we do better?\n\nnamespace std {\n// Prints a container to the stream using default delimiters\n\ntemplate <typename T, typename TChar, typename TCharTraits>\ninline typename enable_if<::pretty_print::is_container<T>::value,\n                          basic_ostream<TChar, TCharTraits> &>::type\noperator<<(basic_ostream<TChar, TCharTraits> &stream, const T &container) {\n  return stream\n         << ::pretty_print::print_container_helper<T, TChar, TCharTraits>(\n                container);\n}\n}  // namespace std\n\n#endif  // H_PRETTY_PRINT\n"
  },
  {
    "path": "mmdet3d/ops/spconv/include/pybind11_utils.h",
    "content": "// Copyright 2019 Yan Yan\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n#include <algorithm>\n#include <iostream>\n#include <pybind11/embed.h> // everything needed for embedding\n#include <pybind11/functional.h>\n#include <pybind11/numpy.h>\n#include <pybind11/pybind11.h>\n#include <pybind11/stl.h>\n\n#include <tensorview/tensorview.h>\n\nnamespace py = pybind11;\n\ntemplate <typename T, typename TPyObject>\nstd::vector<T> array2Vector(TPyObject arr){\n    py::array arr_np = arr;\n    size_t size = arr.attr(\"size\").template cast<size_t>();\n    py::array_t<T> arr_cc = arr_np;\n    std::vector<T> data(arr_cc.data(), arr_cc.data() + size);\n    return data;\n}\n\ntemplate <typename T>\nstd::vector<T> arrayT2Vector(py::array_t<T> arr)\n{\n  std::vector<T> data(arr.data(), arr.data() + arr.size());\n  return data;\n}\n\ntemplate <typename T, typename TPyObject>\ntv::TensorView<T> array2TensorView(TPyObject arr){\n    py::array arr_np = arr;\n    py::array_t<T> arr_cc = arr_np;\n    tv::Shape shape;\n    for (int i = 0; i < arr_cc.ndim(); ++i){\n        shape.push_back(arr_cc.shape(i));\n    }\n    return tv::TensorView<T>(arr_cc.mutable_data(), shape);\n}\ntemplate <typename T>\ntv::TensorView<T> arrayT2TensorView(py::array_t<T> arr){\n    tv::Shape shape;\n    for (int i = 0; i < arr.ndim(); ++i){\n        shape.push_back(arr.shape(i));\n    }\n    return tv::TensorView<T>(arr.mutable_data(), shape);\n}\n"
  },
  {
    "path": "mmdet3d/ops/spconv/include/spconv/fused_spconv_ops.h",
    "content": "// Copyright 2019 Yan Yan\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#ifndef FUSED_SPARSE_CONV_OP_H_\n#define FUSED_SPARSE_CONV_OP_H_\n\n#include <cuda_runtime_api.h>\n#include <spconv/indice.h>\n#include <spconv/reordering.h>\n#include <torch/script.h>\n#include <torch_utils.h>\n#include <utility/timer.h>\n\nnamespace spconv {\n// torch.jit's doc says only support int64, so we need to convert to int32.\n\ntemplate <typename T>\ntorch::Tensor fusedIndiceConvBatchNorm(\n    torch::Tensor features, torch::Tensor filters, torch::Tensor bias,\n    torch::Tensor indicePairs, torch::Tensor indiceNum, int64_t numActOut,\n    int64_t _inverse, int64_t _subM) {\n  bool subM = _subM != 0;\n  bool inverse = _inverse != 0;\n  auto device = features.device().type();\n  auto ndim = filters.dim() - 2;\n  auto kernelVolume = indicePairs.size(0);\n  auto numInPlanes = features.size(1);\n  auto numOutPlanes = filters.size(ndim + 1);\n  auto indicePairNumCpu = indiceNum.to({torch::kCPU});\n  auto indicePairMaxSizeIter =\n      std::max_element(indicePairNumCpu.data_ptr<int>(),\n                       indicePairNumCpu.data_ptr<int>() + kernelVolume);\n  int indicePairMaxOffset =\n      indicePairMaxSizeIter - indicePairNumCpu.data_ptr<int>();\n  int indicePairMaxSize = *indicePairMaxSizeIter;\n\n  /*if (_subM){\n    std::vector<int> indicePairNumVec(indicePairNumCpu.data_ptr<int>(),\n  indicePairNumCpu.data_ptr<int>() + kernelVolume);\n    indicePairNumVec.erase(indicePairNumVec.begin() + indicePairMaxOffset);\n\n    auto indicePairVecMaxSizeIter = std::max_element(\n        indicePairNumVec.begin(), indicePairNumVec.end());\n    indicePairMaxSize = *indicePairVecMaxSizeIter;\n  }*/\n\n  auto options =\n      torch::TensorOptions().dtype(features.dtype()).device(features.device());\n  // auto indicePairOptions =\n  //     torch::TensorOptions().dtype(torch::kInt64).device(indicePairs.device());\n\n  torch::Tensor output =\n      torch::zeros({numActOut, numOutPlanes}, options).copy_(bias);\n  torch::Tensor inputBuffer =\n      torch::zeros({indicePairMaxSize, numInPlanes}, options);\n  torch::Tensor outputBuffer =\n      torch::zeros({indicePairMaxSize, numOutPlanes}, options);\n  filters = filters.view({-1, numInPlanes, numOutPlanes});\n  if (subM) {  // the center index of subm conv don't need gather and scatter\n               // add.\n    torch::mm_out(output, features, filters[indicePairMaxOffset]);\n  }\n  double totalGatherTime = 0;\n  double totalGEMMTime = 0;\n  double totalSAddTime = 0;\n  for (int i = 0; i < kernelVolume; ++i) {\n    auto nHot = indicePairNumCpu.data_ptr<int>()[i];\n    if (nHot <= 0 || (subM && i == indicePairMaxOffset)) {\n      continue;\n    }\n    // auto timer = spconv::CudaContextTimer<>();\n    auto outputBufferBlob = torch::from_blob(outputBuffer.data_ptr<T>(),\n                                             {nHot, numOutPlanes}, options);\n    auto inputBufferBlob = torch::from_blob(inputBuffer.data_ptr<T>(),\n                                            {nHot, numInPlanes}, options);\n\n    if (device == torch::kCPU) {\n      functor::SparseGatherFunctor<tv::CPU, T, int> gatherFtor;\n      gatherFtor(tv::CPU(), tv::torch2tv<T>(inputBuffer),\n                 tv::torch2tv<const T>(features),\n                 tv::torch2tv<const int>(indicePairs).subview(i, inverse),\n                 nHot);\n    } else {\n      functor::SparseGatherFunctor<tv::GPU, T, int> gatherFtor;\n      gatherFtor(tv::TorchGPU(), tv::torch2tv<T>(inputBuffer),\n                 tv::torch2tv<const T>(features),\n                 tv::torch2tv<const int>(indicePairs).subview(i, inverse),\n                 nHot);\n      TV_CHECK_CUDA_ERR();\n      /* slower than SparseGatherFunctor, may due to int->long conversion\n      auto indicePairLong = indicePairs[i][inverse].to(torch::kInt64);\n      auto indicePairBlob = torch::from_blob(indicePairLong.data_ptr<long>(),\n      {nHot}, indicePairOptions); torch::index_select_out(inputBufferBlob,\n      features, 0, indicePairBlob);*/\n    }\n    // totalGatherTime += timer.report() / 1000.0;\n    torch::mm_out(outputBufferBlob, inputBufferBlob, filters[i]);\n    // totalGEMMTime += timer.report() / 1000.0;\n\n    if (device == torch::kCPU) {\n      functor::SparseScatterAddFunctor<tv::CPU, T, int> scatterFtor;\n      scatterFtor(tv::CPU(), tv::torch2tv<T>(output),\n                  tv::torch2tv<const T>(outputBuffer),\n                  tv::torch2tv<const int>(indicePairs).subview(i, !inverse),\n                  nHot, true);\n    } else {\n      functor::SparseScatterAddFunctor<tv::GPU, T, int> scatterFtor;\n      scatterFtor(tv::TorchGPU(), tv::torch2tv<T>(output),\n                  tv::torch2tv<const T>(outputBuffer),\n                  tv::torch2tv<const int>(indicePairs).subview(i, !inverse),\n                  nHot, true);\n      TV_CHECK_CUDA_ERR();\n    }\n    // totalSAddTime += timer.report() / 1000.0;\n  }\n  // std::cout << \"gather time \" << totalGatherTime << std::endl;\n  // std::cout << \"gemm time \" << totalGEMMTime << std::endl;\n  // std::cout << \"scatteradd time \" << totalSAddTime << std::endl;\n  return output;\n}\n}  // namespace spconv\n\n#endif\n"
  },
  {
    "path": "mmdet3d/ops/spconv/include/spconv/geometry.h",
    "content": "// Copyright 2019 Yan Yan\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#ifndef SPCONV_GEOMETRY_H_\n#define SPCONV_GEOMETRY_H_\n\n#include <tensorview/tensorview.h>\n\n#include <iostream>\n#include <limits>\n\nnamespace spconv {\ntemplate <typename Index, unsigned NDim>\nTV_HOST_DEVICE Index getValidOutPos(const Index *input_pos,\n                                    const Index *kernelSize,\n                                    const Index *stride, const Index *padding,\n                                    const Index *dilation,\n                                    const Index *outSpatialShape, Index *out) {\n  Index lowers[NDim];\n  Index uppers[NDim];\n  Index counter[NDim];\n  Index counterSize[NDim];\n  Index pointCounter = 0;\n  Index val;\n  Index numPoints = 1;\n  Index m, offset;\n  bool valid = false;\n#pragma unroll\n  for (int i = 0; i < NDim; ++i) {\n    lowers[i] = (input_pos[i] - (kernelSize[i] - 1) * dilation[i] - 1 +\n                 stride[i] + padding[i]) /\n                stride[i];\n    uppers[i] = (input_pos[i] + padding[i]) / stride[i];\n  }\n\n#pragma unroll\n  for (unsigned i = 0; i < NDim; ++i) {\n    counterSize[i] = ((uppers[i] - lowers[i]) / dilation[i] + 1);\n    numPoints *= counterSize[i];\n  }\n\n#pragma unroll\n  for (int i = 0; i < NDim; ++i) {\n    counter[i] = 0;\n  }\n  for (int i = 0; i < numPoints; ++i) {\n    valid = true;\n    m = 1;\n    offset = 0;\n#pragma unroll\n    for (int j = NDim - 1; j >= 0; --j) {\n      val = uppers[j] - counter[j] * dilation[j];\n      out[pointCounter * (NDim + 1) + j] = val;\n      if (val < 0 || (val > outSpatialShape[j] - 1)) {\n        valid = false;\n        // break;\n      }\n      offset += m * (input_pos[j] - val * stride[j] + padding[j]) / dilation[j];\n      m *= kernelSize[j];\n    }\n\n    out[pointCounter * (NDim + 1) + NDim] = offset;\n    if (valid) ++pointCounter;\n    counter[NDim - 1] += 1;\n#pragma unroll\n    for (int c = NDim - 1; c >= 0; --c) {\n      if (counter[c] == counterSize[c] && c > 0) {\n        counter[c - 1] += 1;\n        counter[c] = 0;\n      }\n    }\n  }\n  return pointCounter;\n}\n\ntemplate <typename Index, unsigned NDim>\nTV_HOST_DEVICE Index getValidOutPosTranspose(\n    const Index *input_pos, const Index *kernelSize, const Index *stride,\n    const Index *padding, const Index *dilation, const Index *outSpatialShape,\n    Index *out) {\n  Index lowers[NDim];\n  Index uppers[NDim];\n  Index counter[NDim];\n  Index counterSize[NDim];\n  Index pointCounter = 0;\n  Index val;\n  Index numPoints = 1;\n  Index m, offset;\n  bool valid = false;\n#pragma unroll\n  for (int i = 0; i < NDim; ++i) {\n    lowers[i] = input_pos[i] * stride[i] - padding[i];\n    uppers[i] = lowers[i] + (kernelSize[i] - 1) * dilation[i];\n  }\n#pragma unroll\n  for (unsigned i = 0; i < NDim; ++i) {\n    counterSize[i] = ((uppers[i] - lowers[i]) / dilation[i] + 1);\n    numPoints *= counterSize[i];\n  }\n#pragma unroll\n  for (int i = 0; i < NDim; ++i) {\n    counter[i] = 0;\n  }\n  for (int i = 0; i < numPoints; ++i) {\n    valid = true;\n    m = 1;\n    offset = 0;\n#pragma unroll\n    for (int j = NDim - 1; j >= 0; --j) {\n      val = uppers[j] - counter[j] * dilation[j];\n      out[pointCounter * (NDim + 1) + j] = val;\n      if (val < 0 || (val > outSpatialShape[j] - 1)) {\n        valid = false;\n        // break;\n      }\n      offset += m * (val - lowers[j]) / dilation[j];\n      m *= kernelSize[j];\n    }\n    out[pointCounter * (NDim + 1) + NDim] = offset;\n    if (valid) ++pointCounter;\n    counter[NDim - 1] += 1;\n#pragma unroll\n    for (int c = NDim - 1; c >= 0; --c) {\n      if (counter[c] == counterSize[c] && c > 0) {\n        counter[c - 1] += 1;\n        counter[c] = 0;\n      }\n    }\n  }\n  return pointCounter;\n}\n\ntemplate <typename Index, typename IndexGrid, unsigned NDim>\nIndex getIndicePairsConv(tv::TensorView<const Index> indicesIn,\n                         tv::TensorView<Index> indicesOut,\n                         tv::TensorView<IndexGrid> gridsOut,\n                         tv::TensorView<Index> indicePairs,\n                         tv::TensorView<Index> indiceNum,\n                         const Index *kernelSize, const Index *stride,\n                         const Index *padding, const Index *dilation,\n                         const Index *outSpatialShape) {\n  // indicesOut: num_active * kernelVolume * (NDim + 1)\n  Index numAct = 0;\n  auto numActIn = indicesIn.dim(0);\n  Index batchIdx = 0;\n  Index spatialVolume = 1;\n#pragma unroll\n  for (int i = 0; i < NDim; ++i) {\n    spatialVolume *= outSpatialShape[i];\n  }\n  Index kernelVolume = 1;\n#pragma unroll\n  for (int i = 0; i < NDim; ++i) {\n    kernelVolume *= kernelSize[i];\n  }\n  Index numValidPoints = 0;\n  std::vector<Index> validPoints_(kernelVolume * (NDim + 1));\n  Index *validPoints = validPoints_.data();\n  Index *pointPtr = nullptr;\n  for (int j = 0; j < numActIn; ++j) {\n    batchIdx = indicesIn(j, 0);\n    numValidPoints = getValidOutPos<Index, NDim>(\n        indicesIn.data() + j * (NDim + 1) + 1, kernelSize, stride, padding,\n        dilation, outSpatialShape, validPoints);\n    for (Index i = 0; i < numValidPoints; ++i) {\n      pointPtr = validPoints + i * (NDim + 1);\n      auto offset = pointPtr[NDim];\n      auto index = tv::rowArrayIdx<Index, NDim>(pointPtr, outSpatialShape) +\n                   spatialVolume * batchIdx;\n      if (gridsOut[index] == -1) {\n        for (unsigned k = 1; k < NDim + 1; ++k) {\n          indicesOut(numAct, k) = pointPtr[k - 1];\n        }\n        indicesOut(numAct, 0) = batchIdx;\n        gridsOut[index] = numAct++;\n      }\n      // indicePairs: [K, 2, L]\n      indicePairs(offset, 0, indiceNum[offset]) = j;\n      indicePairs(offset, 1, indiceNum[offset]++) = gridsOut[index];\n    }\n  }\n  return numAct;\n}\n\ntemplate <typename Index, typename IndexGrid, unsigned NDim>\nIndex getIndicePairsDeConv(tv::TensorView<const Index> indicesIn,\n                           tv::TensorView<Index> indicesOut,\n                           tv::TensorView<IndexGrid> gridsOut,\n                           tv::TensorView<Index> indicePairs,\n                           tv::TensorView<Index> indiceNum,\n                           const Index *kernelSize, const Index *stride,\n                           const Index *padding, const Index *dilation,\n                           const Index *outSpatialShape) {\n  Index numAct = 0;\n  auto numActIn = indicesIn.dim(0);\n  Index batchIdx = 0;\n  Index spatialVolume = 1;\n#pragma unroll\n  for (int i = 0; i < NDim; ++i) {\n    spatialVolume *= outSpatialShape[i];\n  }\n  Index kernelVolume = 1;\n#pragma unroll\n  for (int i = 0; i < NDim; ++i) {\n    kernelVolume *= kernelSize[i];\n  }\n  Index numValidPoints = 0;\n  std::vector<Index> validPoints_(kernelVolume * (NDim + 1));\n  Index *validPoints = validPoints_.data();\n  Index *pointPtr = nullptr;\n  for (int j = 0; j < numActIn; ++j) {\n    batchIdx = indicesIn(j, 0);\n    numValidPoints = getValidOutPosTranspose<Index, NDim>(\n        indicesIn.data() + j * (NDim + 1) + 1, kernelSize, stride, padding,\n        dilation, outSpatialShape, validPoints);\n    for (Index i = 0; i < numValidPoints; ++i) {\n      pointPtr = validPoints + i * (NDim + 1);\n      auto offset = pointPtr[NDim];\n      auto index = tv::rowArrayIdx<Index, NDim>(pointPtr, outSpatialShape) +\n                   spatialVolume * batchIdx;\n      if (gridsOut[index] == -1) {\n        for (unsigned k = 1; k < NDim + 1; ++k) {\n          indicesOut(numAct, k) = pointPtr[k - 1];\n        }\n        indicesOut(numAct, 0) = batchIdx;\n        gridsOut[index] = numAct++;\n      }\n      // indicePairs: [K, 2, L]\n      indicePairs(offset, 0, indiceNum[offset]) = j;\n      indicePairs(offset, 1, indiceNum[offset]++) = gridsOut[index];\n    }\n  }\n  return numAct;\n}\n\ntemplate <typename Index, typename IndexGrid, unsigned NDim>\nIndex getIndicePairsSubM(tv::TensorView<const Index> indicesIn,\n                         tv::TensorView<IndexGrid> gridsOut,\n                         tv::TensorView<Index> indicePairs,\n                         tv::TensorView<Index> indiceNum,\n                         const Index *const kernelSize,\n                         const Index *const stride, const Index *const padding,\n                         const Index *dilation,\n                         const Index *const outSpatialShape) {\n  Index numAct = 0;\n  auto numActIn = indicesIn.dim(0);\n  Index batchIdx = 0;\n  Index spatialVolume = 1;\n#pragma unroll\n  for (int i = 0; i < NDim; ++i) {\n    spatialVolume *= outSpatialShape[i];\n  }\n  Index kernelVolume = 1;\n#pragma unroll\n  for (int i = 0; i < NDim; ++i) {\n    kernelVolume *= kernelSize[i];\n  }\n  Index numValidPoints = 0;\n  // Index validPoints[kernelVolume * (NDim + 1)];\n  std::vector<Index> validPoints_(kernelVolume * (NDim + 1));\n  Index *validPoints = validPoints_.data();\n  Index *pointPtr = nullptr;\n  Index index = 0;\n  for (int j = 0; j < numActIn; ++j) {\n    index = tv::rowArrayIdx<Index, NDim>(indicesIn.data() + j * (NDim + 1) + 1,\n                                         outSpatialShape) +\n            spatialVolume * indicesIn(j, 0);\n    gridsOut[index] = j;\n  }\n  for (int j = 0; j < numActIn; ++j) {\n    numValidPoints = getValidOutPos<Index, NDim>(\n        indicesIn.data() + j * (NDim + 1) + 1, kernelSize, stride, padding,\n        dilation, outSpatialShape, validPoints);\n    for (Index i = 0; i < numValidPoints; ++i) {\n      pointPtr = validPoints + i * (NDim + 1);\n      auto offset = pointPtr[NDim];\n      index = tv::rowArrayIdx<Index, NDim>(pointPtr, outSpatialShape) +\n              spatialVolume * indicesIn(j, 0);\n      if (gridsOut[index] > -1) {\n        indicePairs(offset, 0, indiceNum[offset]) = j;\n        indicePairs(offset, 1, indiceNum[offset]++) = gridsOut[index];\n      }\n    }\n  }\n  return numActIn;\n}\n\n}  // namespace spconv\n\n#endif\n"
  },
  {
    "path": "mmdet3d/ops/spconv/include/spconv/indice.cu.h",
    "content": "// Copyright 2019 Yan Yan\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#ifndef INDICE_CU_H_\n#define INDICE_CU_H_\n#include <spconv/geometry.h>\n#include <tensorview/helper_kernel.cu.h>\n#include <tensorview/tensorview.h>\n\nnamespace spconv {\ntemplate <typename Index, typename IndexGrid, unsigned NDim,\n          int KernelMaxVolume = 256>\n__global__ void prepareIndicePairsKernel(\n    tv::TensorView<const Index> indicesIn, tv::TensorView<Index> indicesOut,\n    tv::TensorView<IndexGrid> gridsOut, tv::TensorView<Index> indicePairs,\n    tv::TensorView<Index> indiceNum, tv::TensorView<Index> indicePairUnique,\n    const tv::SimpleVector<Index, NDim> kernelSize,\n    const tv::SimpleVector<Index, NDim> stride,\n    const tv::SimpleVector<Index, NDim> padding,\n    const tv::SimpleVector<Index, NDim> dilation,\n    const tv::SimpleVector<Index, NDim> outSpatialShape) {\n  auto numActIn = indicesIn.dim(0);\n  Index spatialVolume = 1;\n#pragma unroll\n  for (int i = 0; i < NDim; ++i) {\n    spatialVolume *= outSpatialShape[i];\n  }\n  Index kernelVolume = 1;\n#pragma unroll\n  for (int i = 0; i < NDim; ++i) {\n    kernelVolume *= kernelSize[i];\n  }\n  Index numValidPoints = 0;\n  Index validPoints[KernelMaxVolume * (NDim + 1)];\n  Index *pointPtr = nullptr;\n  auto indicePairsDim2 = indicePairs.dim(2);\n  Index index;\n  for (int ix : tv::KernelLoopX<int>(numActIn)) {\n    numValidPoints = getValidOutPos<Index, NDim>(\n        indicesIn.data() + ix * (NDim + 1) + 1, kernelSize.data(),\n        stride.data(), padding.data(), dilation.data(), outSpatialShape.data(),\n        validPoints);\n    for (Index i = 0; i < numValidPoints; ++i) {\n      pointPtr = validPoints + i * (NDim + 1);\n      auto offset = pointPtr[NDim];\n      auto oldNum = atomicAdd(indiceNum.data() + offset, Index(1));\n      indicePairs(offset, 0, oldNum) = ix;\n      index = tv::rowArrayIdx<Index, NDim>(pointPtr, outSpatialShape.data()) +\n              spatialVolume * indicesIn(ix, 0);\n      indicePairs(offset, 1, oldNum) = index;\n      indicePairUnique[offset * indicePairsDim2 + oldNum] = index;\n    }\n  }\n}\n\ntemplate <typename Index, typename IndexGrid, unsigned NDim,\n          int KernelMaxVolume = 256>\n__global__ void prepareDeConvIndicePairsKernel(\n    tv::TensorView<const Index> indicesIn, tv::TensorView<Index> indicesOut,\n    tv::TensorView<IndexGrid> gridsOut, tv::TensorView<Index> indicePairs,\n    tv::TensorView<Index> indiceNum, tv::TensorView<Index> indicePairUnique,\n    const tv::SimpleVector<Index, NDim> kernelSize,\n    const tv::SimpleVector<Index, NDim> stride,\n    const tv::SimpleVector<Index, NDim> padding,\n    const tv::SimpleVector<Index, NDim> dilation,\n    const tv::SimpleVector<Index, NDim> outSpatialShape) {\n  auto numActIn = indicesIn.dim(0);\n  Index spatialVolume = 1;\n#pragma unroll\n  for (int i = 0; i < NDim; ++i) {\n    spatialVolume *= outSpatialShape[i];\n  }\n  Index kernelVolume = 1;\n#pragma unroll\n  for (int i = 0; i < NDim; ++i) {\n    kernelVolume *= kernelSize[i];\n  }\n  Index numValidPoints = 0;\n  Index validPoints[KernelMaxVolume * (NDim + 1)];\n  Index *pointPtr = nullptr;\n  auto indicePairsDim2 = indicePairs.dim(2);\n  Index index;\n  for (int ix : tv::KernelLoopX<int>(numActIn)) {\n    numValidPoints = getValidOutPosTranspose<Index, NDim>(\n        indicesIn.data() + ix * (NDim + 1) + 1, kernelSize.data(),\n        stride.data(), padding.data(), dilation.data(), outSpatialShape.data(),\n        validPoints);\n    for (Index i = 0; i < numValidPoints; ++i) {\n      pointPtr = validPoints + i * (NDim + 1);\n      auto offset = pointPtr[NDim];\n      auto oldNum = atomicAdd(indiceNum.data() + offset, Index(1));\n      indicePairs(offset, 0, oldNum) = ix;\n      index = tv::rowArrayIdx<Index, NDim>(pointPtr, outSpatialShape.data()) +\n              spatialVolume * indicesIn(ix, 0);\n      indicePairs(offset, 1, oldNum) = index;\n      indicePairUnique[offset * indicePairsDim2 + oldNum] = index;\n    }\n  }\n}\n\ntemplate <typename Index, typename IndexGrid, unsigned NDim>\n__global__ void assignGridAndIndiceOutKernel(\n    tv::TensorView<Index> indicesOut, tv::TensorView<IndexGrid> gridsOut,\n    int numAct, tv::TensorView<Index> indicePairs,\n    tv::TensorView<Index> indicePairUnique,\n    const tv::SimpleVector<Index, NDim> outSpatialShape, int batchSize) {\n  Index index;\n  auto indicesOutPtr = indicesOut.data();\n  for (int ix : tv::KernelLoopX<int>(numAct)) {\n    index = indicePairUnique[ix];\n    gridsOut[index] = ix;\n    index = tv::rowArrayIdxInv<Index, NDim>(\n        index, indicesOutPtr + ix * (NDim + 1) + 1, outSpatialShape.data());\n    indicesOut[ix * (NDim + 1)] = index % batchSize;\n  }\n}\n\ntemplate <typename Index, typename IndexGrid, unsigned NDim>\n__global__ void assignIndicePairsKernel(\n    tv::TensorView<Index> indicesOut, tv::TensorView<IndexGrid> gridsOut,\n    int numActIn, tv::TensorView<Index> indicePairs,\n    tv::TensorView<Index> indicePairUnique,\n    const tv::SimpleVector<Index, NDim> outSpatialShape) {\n  Index index;\n  int kernelVolume = indicePairs.dim(0);\n  for (int ix : tv::KernelLoopX<int>(numActIn)) {\n    for (int i = 0; i < kernelVolume; ++i) {\n      index = indicePairs(i, 1, ix);\n      if (index > -1) {\n        indicePairs(i, 1, ix) = gridsOut[index];\n      }\n    }\n  }\n}\n\ntemplate <typename Index, typename IndexGrid, unsigned NDim>\n__global__ void prepareSubMGridKernel(\n    tv::TensorView<const Index> indicesIn, tv::TensorView<IndexGrid> gridsOut,\n    const tv::SimpleVector<Index, NDim> outSpatialShape) {\n  auto numActIn = indicesIn.dim(0);\n  Index spatialVolume = 1;\n#pragma unroll\n  for (int i = 0; i < NDim; ++i) {\n    spatialVolume *= outSpatialShape[i];\n  }\n  Index index = 0;\n  for (int ix : tv::KernelLoopX<int>(numActIn)) {\n    index = tv::rowArrayIdx<Index, NDim>(indicesIn.data() + ix * (NDim + 1) + 1,\n                                         outSpatialShape.data()) +\n            spatialVolume * indicesIn(ix, 0);\n    gridsOut[index] = ix;\n  }\n}\n\ntemplate <typename Index, typename IndexGrid, unsigned NDim,\n          int KernelMaxVolume = 256>\n__global__ void getSubMIndicePairsKernel(\n    tv::TensorView<const Index> indicesIn, tv::TensorView<IndexGrid> gridsOut,\n    tv::TensorView<Index> indicePairs, tv::TensorView<Index> indiceNum,\n    const tv::SimpleVector<Index, NDim> kernelSize,\n    const tv::SimpleVector<Index, NDim> stride,\n    const tv::SimpleVector<Index, NDim> padding,\n    const tv::SimpleVector<Index, NDim> dilation,\n    const tv::SimpleVector<Index, NDim> outSpatialShape) {\n  auto numActIn = indicesIn.dim(0);\n  Index spatialVolume = 1;\n#pragma unroll\n  for (int i = 0; i < NDim; ++i) {\n    spatialVolume *= outSpatialShape[i];\n  }\n  Index numValidPoints = 0;\n  Index validPoints[KernelMaxVolume * (NDim + 1)];\n  Index *pointPtr = nullptr;\n  Index index = 0;\n  for (int ix : tv::KernelLoopX<int>(numActIn)) {\n    numValidPoints = getValidOutPos<Index, NDim>(\n        indicesIn.data() + ix * (NDim + 1) + 1, kernelSize.data(),\n        stride.data(), padding.data(), dilation.data(), outSpatialShape.data(),\n        validPoints);\n    for (int i = 0; i < numValidPoints; ++i) {\n      pointPtr = validPoints + i * (NDim + 1);\n      auto offset = pointPtr[NDim];\n      index = tv::rowArrayIdx<Index, NDim>(pointPtr, outSpatialShape.data()) +\n              spatialVolume * indicesIn(ix, 0);\n      if (gridsOut[index] > -1) {\n        auto oldNum = atomicAdd(indiceNum.data() + offset, Index(1));\n        indicePairs(offset, 1, oldNum) = gridsOut[index];\n        indicePairs(offset, 0, oldNum) = ix;\n      }\n    }\n  }\n}\n\ntemplate <typename Index, typename IndexGrid, unsigned NDim>\n__global__ void resetGridKernel(const Index *indicePairUnique,\n                                tv::TensorView<IndexGrid> gridsOut,\n                                int numAct) {\n  for (int ix : tv::KernelLoopX<int>(numAct)) {\n    gridsOut[indicePairUnique[ix]] = -1;\n  }\n}\n\ntemplate <typename Index, typename IndexGrid, unsigned NDim>\n__global__ void resetGridSubMKernel(\n    const Index *indices, tv::TensorView<IndexGrid> gridsOut,\n    const tv::SimpleVector<Index, NDim> outSpatialShape, int numAct) {\n  int outSpatialShapeReg[NDim];\n  for (int i = 0; i < NDim; ++i) {\n    outSpatialShapeReg[i] = outSpatialShape[i];\n  }\n  Index spatialVolume = 1;\n  auto indsPtr = indices;\n#pragma unroll\n  for (int i = 0; i < NDim; ++i) {\n    spatialVolume *= outSpatialShape[i];\n  }\n  Index index;\n  for (int ix : tv::KernelLoopX<int>(numAct)) {\n    indsPtr = indices + ix * (NDim + 1);\n    index = tv::rowArrayIdx<Index, NDim>(indsPtr + 1, outSpatialShapeReg);\n    gridsOut[index + spatialVolume * indsPtr[0]] = -1;\n  }\n}\n\n}  // namespace spconv\n\n#endif\n"
  },
  {
    "path": "mmdet3d/ops/spconv/include/spconv/indice.h",
    "content": "// Copyright 2019 Yan Yan\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#ifndef SPARSE_CONV_INDICE_FUNCTOR_H_\n#define SPARSE_CONV_INDICE_FUNCTOR_H_\n#include <tensorview/tensorview.h>\n\nnamespace spconv {\nnamespace functor {\ntemplate <typename Device, typename Index, typename IndexGrid, unsigned NDim>\nstruct CreateConvIndicePairFunctorP1 {\n  Index operator()(const Device& d, tv::TensorView<const Index> indicesIn,\n                   tv::TensorView<Index> indicesOut,\n                   tv::TensorView<IndexGrid> gridsOut,\n                   tv::TensorView<Index> indicePairs,\n                   tv::TensorView<Index> indiceNum,\n                   tv::TensorView<Index> indicePairUnique,\n                   const tv::SimpleVector<Index, NDim> kernelSize,\n                   const tv::SimpleVector<Index, NDim> stride,\n                   const tv::SimpleVector<Index, NDim> padding,\n                   const tv::SimpleVector<Index, NDim> dilation,\n                   const tv::SimpleVector<Index, NDim> outSpatialShape,\n                   bool transpose);\n};\n\ntemplate <typename Device, typename Index, typename IndexGrid, unsigned NDim>\nstruct CreateConvIndicePairFunctorP2 {\n  Index operator()(const Device& d, tv::TensorView<const Index> indicesIn,\n                   tv::TensorView<Index> indicesOut,\n                   tv::TensorView<IndexGrid> gridsOut,\n                   tv::TensorView<Index> indicePairs,\n                   tv::TensorView<Index> indiceNum,\n                   tv::TensorView<Index> indicePairUnique,\n                   const tv::SimpleVector<Index, NDim> outSpatialShape,\n                   bool transpose, bool resetGrid = false);\n};\n\ntemplate <typename Device, typename Index, typename IndexGrid, unsigned NDim>\nstruct CreateConvIndicePairFunctor {\n  Index operator()(const Device& d, tv::TensorView<const Index> indicesIn,\n                   tv::TensorView<Index> indicesOut,\n                   tv::TensorView<IndexGrid> gridsOut,\n                   tv::TensorView<Index> indicePairs,\n                   tv::TensorView<Index> indiceNum,\n                   const tv::SimpleVector<Index, NDim> kernelSize,\n                   const tv::SimpleVector<Index, NDim> stride,\n                   const tv::SimpleVector<Index, NDim> padding,\n                   const tv::SimpleVector<Index, NDim> dilation,\n                   const tv::SimpleVector<Index, NDim> outSpatialShape,\n                   bool transpose, bool resetGrid = false);\n};\n\ntemplate <typename Device, typename Index, typename IndexGrid, unsigned NDim>\nstruct CreateSubMIndicePairFunctor {\n  Index operator()(const Device& d, tv::TensorView<const Index> indicesIn,\n                   tv::TensorView<IndexGrid> gridsOut,\n                   tv::TensorView<Index> indicePairs,\n                   tv::TensorView<Index> indiceNum,\n                   const tv::SimpleVector<Index, NDim> kernelSize,\n                   const tv::SimpleVector<Index, NDim> stride,\n                   const tv::SimpleVector<Index, NDim> padding,\n                   const tv::SimpleVector<Index, NDim> dilation,\n                   const tv::SimpleVector<Index, NDim> outSpatialShape,\n                   bool transpose, bool resetGrid = false);\n};\n}  // namespace functor\n}  // namespace spconv\n\n#endif\n"
  },
  {
    "path": "mmdet3d/ops/spconv/include/spconv/maxpool.h",
    "content": "// Copyright 2019 Yan Yan\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#ifndef SPARSE_MAXPOOL_FUNCTOR_H_\n#define SPARSE_MAXPOOL_FUNCTOR_H_\n#include <tensorview/tensorview.h>\n\nnamespace spconv {\nnamespace functor {\ntemplate <typename Device, typename T, typename Index>\nstruct SparseMaxPoolForwardFunctor {\n  void operator()(const Device& d, tv::TensorView<T> outFeatures,\n                  tv::TensorView<const T> inFeatures,\n                  tv::TensorView<const Index> indices, int size);\n};\n\ntemplate <typename Device, typename T, typename Index>\nstruct SparseMaxPoolBackwardFunctor {\n  void operator()(const Device& d, tv::TensorView<const T> outFeatures,\n                  tv::TensorView<const T> inFeatures,\n                  tv::TensorView<const T> dout, tv::TensorView<T> din,\n                  tv::TensorView<const Index> indices, int size);\n};\n\n}  // namespace functor\n}  // namespace spconv\n\n#endif\n"
  },
  {
    "path": "mmdet3d/ops/spconv/include/spconv/mp_helper.h",
    "content": "#ifndef MP_HELPER_H_\n#define MP_HELPER_H_\n#include <type_traits>\n#include <utility>\n\nnamespace spconv {\ntemplate <class... T>\nstruct mp_list {};\n\ntemplate <class T, T... I>\nusing mp_list_c = mp_list<std::integral_constant<T, I>...>;\n\nnamespace detail {\n\ntemplate <class... T, class F>\nconstexpr F mp_for_each_impl(mp_list<T...>, F &&f) {\n  return std::initializer_list<int>{(f(T()), 0)...}, std::forward<F>(f);\n}\n\ntemplate <class F>\nconstexpr F mp_for_each_impl(mp_list<>, F &&f) {\n  return std::forward<F>(f);\n}\n\n}  // namespace detail\n\nnamespace detail {\n\ntemplate <class A, template <class...> class B>\nstruct mp_rename_impl {\n  // An error \"no type named 'type'\" here means that the first argument to\n  // mp_rename is not a list\n};\n\ntemplate <template <class...> class A, class... T, template <class...> class B>\nstruct mp_rename_impl<A<T...>, B> {\n  using type = B<T...>;\n};\n\n}  // namespace detail\n\ntemplate <class A, template <class...> class B>\nusing mp_rename = typename detail::mp_rename_impl<A, B>::type;\n\ntemplate <class L, class F>\nconstexpr F mp_for_each(F &&f) {\n  return detail::mp_for_each_impl(mp_rename<L, mp_list>(), std::forward<F>(f));\n}\n}  // namespace spconv\n\n#endif\n"
  },
  {
    "path": "mmdet3d/ops/spconv/include/spconv/point2voxel.h",
    "content": "// Copyright 2019 Yan Yan\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n#include <pybind11/pybind11.h>\n// must include pybind11/eigen.h if using eigen matrix as arguments.\n// must include pybind11/stl.h if using containers in STL in arguments.\n#include <pybind11/numpy.h>\n#include <pybind11/stl.h>\n\n#include <algorithm>\n// #include <vector>\n#include <math.h>\n\n#include <iostream>\n\nnamespace spconv {\nnamespace py = pybind11;\nusing namespace pybind11::literals;\n\ntemplate <typename DType, int NDim>\nint points_to_voxel_3d_np(py::array_t<DType> points, py::array_t<DType> voxels,\n                          py::array_t<int> coors,\n                          py::array_t<int> num_points_per_voxel,\n                          py::array_t<int> coor_to_voxelidx,\n                          std::vector<DType> voxel_size,\n                          std::vector<DType> coors_range, int max_points,\n                          int max_voxels) {\n  auto points_rw = points.template mutable_unchecked<2>();\n  auto voxels_rw = voxels.template mutable_unchecked<3>();\n  auto coors_rw = coors.mutable_unchecked<2>();\n  auto num_points_per_voxel_rw = num_points_per_voxel.mutable_unchecked<1>();\n  auto coor_to_voxelidx_rw = coor_to_voxelidx.mutable_unchecked<NDim>();\n  auto N = points_rw.shape(0);\n  auto num_features = points_rw.shape(1);\n  // auto ndim = points_rw.shape(1) - 1;\n  constexpr int ndim_minus_1 = NDim - 1;\n  int voxel_num = 0;\n  bool failed = false;\n  int coor[NDim];\n  int c;\n  int grid_size[NDim];\n  for (int i = 0; i < NDim; ++i) {\n    grid_size[i] =\n        round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]);\n  }\n  int voxelidx, num;\n  for (int i = 0; i < N; ++i) {\n    failed = false;\n    for (int j = 0; j < NDim; ++j) {\n      c = floor((points_rw(i, j) - coors_range[j]) / voxel_size[j]);\n      if ((c < 0 || c >= grid_size[j])) {\n        failed = true;\n        break;\n      }\n      coor[ndim_minus_1 - j] = c;\n    }\n    if (failed) continue;\n    voxelidx = coor_to_voxelidx_rw(coor[0], coor[1], coor[2]);\n    if (voxelidx == -1) {\n      voxelidx = voxel_num;\n      if (voxel_num >= max_voxels) break;\n      voxel_num += 1;\n      coor_to_voxelidx_rw(coor[0], coor[1], coor[2]) = voxelidx;\n      for (int k = 0; k < NDim; ++k) {\n        coors_rw(voxelidx, k) = coor[k];\n      }\n    }\n    num = num_points_per_voxel_rw(voxelidx);\n    if (num < max_points) {\n      for (int k = 0; k < num_features; ++k) {\n        voxels_rw(voxelidx, num, k) = points_rw(i, k);\n      }\n      num_points_per_voxel_rw(voxelidx) += 1;\n    }\n  }\n  for (int i = 0; i < voxel_num; ++i) {\n    coor_to_voxelidx_rw(coors_rw(i, 0), coors_rw(i, 1), coors_rw(i, 2)) = -1;\n  }\n  return voxel_num;\n}\n\ntemplate <typename DType, int NDim>\nint points_to_voxel_3d_np_mean(py::array_t<DType> points,\n                               py::array_t<DType> voxels,\n                               py::array_t<DType> means, py::array_t<int> coors,\n                               py::array_t<int> num_points_per_voxel,\n                               py::array_t<int> coor_to_voxelidx,\n                               std::vector<DType> voxel_size,\n                               std::vector<DType> coors_range, int max_points,\n                               int max_voxels) {\n  auto points_rw = points.template mutable_unchecked<2>();\n  auto means_rw = means.template mutable_unchecked<2>();\n  auto voxels_rw = voxels.template mutable_unchecked<3>();\n  auto coors_rw = coors.mutable_unchecked<2>();\n  auto num_points_per_voxel_rw = num_points_per_voxel.mutable_unchecked<1>();\n  auto coor_to_voxelidx_rw = coor_to_voxelidx.mutable_unchecked<NDim>();\n  auto N = points_rw.shape(0);\n  auto num_features = points_rw.shape(1);\n  // auto ndim = points_rw.shape(1) - 1;\n  constexpr int ndim_minus_1 = NDim - 1;\n  int voxel_num = 0;\n  bool failed = false;\n  int coor[NDim];\n  int c;\n  int grid_size[NDim];\n  for (int i = 0; i < NDim; ++i) {\n    grid_size[i] =\n        round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]);\n  }\n  int voxelidx, num;\n  for (int i = 0; i < N; ++i) {\n    failed = false;\n    for (int j = 0; j < NDim; ++j) {\n      c = floor((points_rw(i, j) - coors_range[j]) / voxel_size[j]);\n      if ((c < 0 || c >= grid_size[j])) {\n        failed = true;\n        break;\n      }\n      coor[ndim_minus_1 - j] = c;\n    }\n    if (failed) continue;\n    voxelidx = coor_to_voxelidx_rw(coor[0], coor[1], coor[2]);\n    if (voxelidx == -1) {\n      voxelidx = voxel_num;\n      if (voxel_num >= max_voxels) break;\n      voxel_num += 1;\n      coor_to_voxelidx_rw(coor[0], coor[1], coor[2]) = voxelidx;\n      for (int k = 0; k < NDim; ++k) {\n        coors_rw(voxelidx, k) = coor[k];\n      }\n    }\n    num = num_points_per_voxel_rw(voxelidx);\n    if (num < max_points) {\n      for (int k = 0; k < num_features; ++k) {\n        voxels_rw(voxelidx, num, k) = points_rw(i, k);\n      }\n      num_points_per_voxel_rw(voxelidx) += 1;\n      for (int k = 0; k < num_features; ++k) {\n        means_rw(voxelidx, k) +=\n            (points_rw(i, k) - means_rw(voxelidx, k)) / DType(num + 1);\n      }\n    }\n  }\n  for (int i = 0; i < voxel_num; ++i) {\n    coor_to_voxelidx_rw(coors_rw(i, 0), coors_rw(i, 1), coors_rw(i, 2)) = -1;\n    num = num_points_per_voxel_rw(i);\n    for (int j = num; j < max_points; ++j) {\n      for (int k = 0; k < num_features; ++k) {\n        voxels_rw(i, j, k) = means_rw(i, k);\n      }\n    }\n  }\n  return voxel_num;\n}\n\ntemplate <typename DType, int NDim>\nint points_to_voxel_3d_np_height(\n    py::array_t<DType> points, py::array_t<DType> voxels,\n    py::array_t<DType> height, py::array_t<DType> maxs, py::array_t<int> coors,\n    py::array_t<int> num_points_per_voxel, py::array_t<int> coor_to_voxelidx,\n    std::vector<DType> voxel_size, std::vector<DType> coors_range,\n    int max_points, int max_voxels) {\n  auto points_rw = points.template mutable_unchecked<2>();\n  auto height_rw = height.template mutable_unchecked<2>();\n  auto maxs_rw = maxs.template mutable_unchecked<2>();\n  auto voxels_rw = voxels.template mutable_unchecked<3>();\n  auto coors_rw = coors.mutable_unchecked<2>();\n  auto num_points_per_voxel_rw = num_points_per_voxel.mutable_unchecked<1>();\n  auto coor_to_voxelidx_rw = coor_to_voxelidx.mutable_unchecked<NDim>();\n  auto N = points_rw.shape(0);\n  auto num_features = points_rw.shape(1);\n  // auto ndim = points_rw.shape(1) - 1;\n  constexpr int ndim_minus_1 = NDim - 1;\n  int voxel_num = 0;\n  bool failed = false;\n  int coor[NDim];\n  int c;\n  int grid_size[NDim];\n  for (int i = 0; i < NDim; ++i) {\n    grid_size[i] =\n        round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]);\n  }\n  int voxelidx, num;\n  for (int i = 0; i < N; ++i) {\n    failed = false;\n    for (int j = 0; j < NDim; ++j) {\n      c = floor((points_rw(i, j) - coors_range[j]) / voxel_size[j]);\n      if ((c < 0 || c >= grid_size[j])) {\n        failed = true;\n        break;\n      }\n      coor[ndim_minus_1 - j] = c;\n    }\n    if (failed) continue;\n    voxelidx = coor_to_voxelidx_rw(coor[0], coor[1], coor[2]);\n    if (voxelidx == -1) {\n      voxelidx = voxel_num;\n      if (voxel_num >= max_voxels) break;\n      voxel_num += 1;\n      coor_to_voxelidx_rw(coor[0], coor[1], coor[2]) = voxelidx;\n      for (int k = 0; k < NDim; ++k) {\n        coors_rw(voxelidx, k) = coor[k];\n      }\n    }\n    num = num_points_per_voxel_rw(voxelidx);\n    if (num < max_points) {\n      for (int k = 0; k < num_features; ++k) {\n        voxels_rw(voxelidx, num, k) = points_rw(i, k);\n        height_rw(voxelidx, k) =\n            std::min(points_rw(i, k), height_rw(voxelidx, k));\n        maxs_rw(voxelidx, k) = std::max(points_rw(i, k), maxs_rw(voxelidx, k));\n      }\n      num_points_per_voxel_rw(voxelidx) += 1;\n    }\n  }\n  for (int i = 0; i < voxel_num; ++i) {\n    coor_to_voxelidx_rw(coors_rw(i, 0), coors_rw(i, 1), coors_rw(i, 2)) = -1;\n    for (int k = 0; k < num_features; ++k) {\n      height_rw(i, k) = maxs_rw(i, k) - height_rw(i, k);\n    }\n  }\n  return voxel_num;\n}\n\ntemplate <typename DType, int NDim>\nint block_filtering(py::array_t<DType> points, py::array_t<int> mask,\n                    py::array_t<DType> height, py::array_t<DType> maxs,\n                    py::array_t<int> coor_to_voxelidx,\n                    std::vector<DType> voxel_size,\n                    std::vector<DType> coors_range, int max_voxels, DType eps) {\n  auto points_rw = points.template mutable_unchecked<2>();\n  auto height_rw = height.template mutable_unchecked<1>();\n  auto maxs_rw = maxs.template mutable_unchecked<1>();\n  auto coor_to_voxelidx_rw = coor_to_voxelidx.mutable_unchecked<NDim>();\n  auto N = points_rw.shape(0);\n  auto num_features = points_rw.shape(1);\n  // auto ndim = points_rw.shape(1) - 1;\n  constexpr int ndim_minus_1 = NDim - 1;\n  int voxel_num = 0;\n  bool failed = false;\n  int coor[NDim];\n  int c;\n  int grid_size[NDim];\n  for (int i = 0; i < NDim; ++i) {\n    grid_size[i] =\n        round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]);\n  }\n  int voxelidx, num;\n  for (int i = 0; i < N; ++i) {\n    failed = false;\n    for (int j = 0; j < NDim; ++j) {\n      c = floor((points_rw(i, j) - coors_range[j]) / voxel_size[j]);\n      if ((c < 0 || c >= grid_size[j])) {\n        failed = true;\n        break;\n      }\n      coor[ndim_minus_1 - j] = c;\n    }\n    if (failed) continue;\n    voxelidx = coor_to_voxelidx_rw(coor[0], coor[1], coor[2]);\n    if (voxelidx == -1) {\n      voxelidx = voxel_num;\n      voxel_num += 1;\n      coor_to_voxelidx_rw(coor[0], coor[1], coor[2]) = voxelidx;\n    }\n    height_rw(voxelidx) = std::min(points_rw(i, 2), height_rw(voxelidx));\n    maxs_rw(voxelidx) = std::max(points_rw(i, 2), maxs_rw(voxelidx));\n  }\n  for (int i = 0; i < N; ++i) {\n    failed = false;\n    for (int j = 0; j < NDim; ++j) {\n      c = floor((points_rw(i, j) - coors_range[j]) / voxel_size[j]);\n      if ((c < 0 || c >= grid_size[j])) {\n        failed = true;\n        break;\n      }\n      coor[ndim_minus_1 - j] = c;\n    }\n    if (failed) continue;\n    voxelidx = coor_to_voxelidx_rw(coor[0], coor[1], coor[2]);\n    if ((maxs_rw(voxelidx) - height_rw(voxelidx, 2)) < eps) {\n      mask(i) = 0;\n    }\n  }\n}\n\ntemplate <typename DType, int NDim>\nint points_to_voxel_3d_with_filtering(\n    py::array_t<DType> points, py::array_t<DType> voxels,\n    py::array_t<int> voxel_mask, py::array_t<DType> mins,\n    py::array_t<DType> maxs, py::array_t<int> coors,\n    py::array_t<int> num_points_per_voxel, py::array_t<int> coor_to_voxelidx,\n    std::vector<DType> voxel_size, std::vector<DType> coors_range,\n    int max_points, int max_voxels, int block_factor, int block_size,\n    DType height_threshold) {\n  auto points_rw = points.template mutable_unchecked<2>();\n  auto mins_rw = mins.template mutable_unchecked<2>();\n  auto maxs_rw = maxs.template mutable_unchecked<2>();\n  auto voxels_rw = voxels.template mutable_unchecked<3>();\n  auto voxel_mask_rw = voxel_mask.template mutable_unchecked<1>();\n  auto coors_rw = coors.mutable_unchecked<2>();\n  auto num_points_per_voxel_rw = num_points_per_voxel.mutable_unchecked<1>();\n  auto coor_to_voxelidx_rw = coor_to_voxelidx.mutable_unchecked<NDim>();\n  auto N = points_rw.shape(0);\n  auto num_features = points_rw.shape(1);\n  // auto ndim = points_rw.shape(1) - 1;\n  constexpr int ndim_minus_1 = NDim - 1;\n  int voxel_num = 0;\n  bool failed = false;\n  int coor[NDim];\n  int c;\n  int grid_size[NDim];\n\n  DType max_value, min_value;\n  for (int i = 0; i < NDim; ++i) {\n    grid_size[i] =\n        round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]);\n  }\n  int block_shape_H = grid_size[1] / block_factor;\n  int block_shape_W = grid_size[0] / block_factor;\n  int voxelidx, num;\n  int block_coor[2];\n  int startx, stopx, starty, stopy;\n  for (int i = 0; i < N; ++i) {\n    failed = false;\n    for (int j = 0; j < NDim; ++j) {\n      c = floor((points_rw(i, j) - coors_range[j]) / voxel_size[j]);\n      if ((c < 0 || c >= grid_size[j])) {\n        failed = true;\n        break;\n      }\n      coor[ndim_minus_1 - j] = c;\n    }\n    if (failed) continue;\n    voxelidx = coor_to_voxelidx_rw(coor[0], coor[1], coor[2]);\n    if (voxelidx == -1) {\n      voxelidx = voxel_num;\n      if (voxel_num >= max_voxels) break;\n      voxel_num += 1;\n      coor_to_voxelidx_rw(coor[0], coor[1], coor[2]) = voxelidx;\n      for (int k = 0; k < NDim; ++k) {\n        coors_rw(voxelidx, k) = coor[k];\n      }\n    }\n    num = num_points_per_voxel_rw(voxelidx);\n    if (num < max_points) {\n      for (int k = 0; k < num_features; ++k) {\n        voxels_rw(voxelidx, num, k) = points_rw(i, k);\n      }\n      block_coor[0] = coor[1] / block_factor;\n      block_coor[1] = coor[2] / block_factor;\n      mins_rw(block_coor[0], block_coor[1]) =\n          std::min(points_rw(i, 2), mins_rw(block_coor[0], block_coor[1]));\n      maxs_rw(block_coor[0], block_coor[1]) =\n          std::max(points_rw(i, 2), maxs_rw(block_coor[0], block_coor[1]));\n      num_points_per_voxel_rw(voxelidx) += 1;\n    }\n  }\n  for (int i = 0; i < voxel_num; ++i) {\n    coor[1] = coors_rw(i, 1);\n    coor[2] = coors_rw(i, 2);\n    coor_to_voxelidx_rw(coors_rw(i, 0), coor[1], coor[2]) = -1;\n    block_coor[0] = coor[1] / block_factor;\n    block_coor[1] = coor[2] / block_factor;\n    min_value = mins_rw(block_coor[0], block_coor[1]);\n    max_value = maxs_rw(block_coor[0], block_coor[1]);\n    startx = std::max(0, block_coor[0] - block_size / 2);\n    stopx =\n        std::min(block_shape_H, block_coor[0] + block_size - block_size / 2);\n    starty = std::max(0, block_coor[1] - block_size / 2);\n    stopy =\n        std::min(block_shape_W, block_coor[1] + block_size - block_size / 2);\n\n    for (int j = startx; j < stopx; ++j) {\n      for (int k = starty; k < stopy; ++k) {\n        min_value = std::min(min_value, mins_rw(j, k));\n        max_value = std::max(max_value, maxs_rw(j, k));\n      }\n    }\n    voxel_mask_rw(i) = (max_value - min_value) > height_threshold;\n  }\n  return voxel_num;\n}\n\n}  // namespace spconv\n"
  },
  {
    "path": "mmdet3d/ops/spconv/include/spconv/pool_ops.h",
    "content": "// Copyright 2019 Yan Yan\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#ifndef SPARSE_POOL_OP_H_\n#define SPARSE_POOL_OP_H_\n\n#include <cuda_runtime_api.h>\n#include <spconv/maxpool.h>\n#include <torch/script.h>\n#include <torch_utils.h>\n#include <utility/timer.h>\n\nnamespace spconv {\ntemplate <typename T>\ntorch::Tensor indiceMaxPool(torch::Tensor features, torch::Tensor indicePairs,\n                            torch::Tensor indiceNum, int64_t numAct) {\n  auto device = features.device().type();\n  auto kernelVolume = indicePairs.size(0);\n  auto numInPlanes = features.size(1);\n  auto indicePairNumCpu = indiceNum.to({torch::kCPU});\n  auto options =\n      torch::TensorOptions().dtype(features.dtype()).device(features.device());\n  torch::Tensor output = torch::zeros({numAct, numInPlanes}, options);\n  double totalTime = 0;\n  for (int i = 0; i < kernelVolume; ++i) {\n    auto nHot = indicePairNumCpu.data_ptr<int>()[i];\n    if (nHot <= 0) {\n      continue;\n    }\n    // auto timer = spconv::CudaContextTimer<>();\n    if (device == torch::kCPU) {\n      functor::SparseMaxPoolForwardFunctor<tv::CPU, T, int> forwardFtor;\n      forwardFtor(tv::CPU(), tv::torch2tv<T>(output),\n                  tv::torch2tv<const T>(features),\n                  tv::torch2tv<const int>(indicePairs).subview(i), nHot);\n    } else {\n      functor::SparseMaxPoolForwardFunctor<tv::GPU, T, int> forwardFtor;\n      forwardFtor(tv::TorchGPU(), tv::torch2tv<T>(output),\n                  tv::torch2tv<const T>(features),\n                  tv::torch2tv<const int>(indicePairs).subview(i), nHot);\n      TV_CHECK_CUDA_ERR();\n    }\n    // totalTime += timer.report() / 1000.0;\n  }\n  // std::cout << \"maxpool forward time \" << totalTime << std::endl;\n  return output;\n}\n\ntemplate <typename T>\ntorch::Tensor indiceMaxPoolBackward(torch::Tensor features,\n                                    torch::Tensor outFeatures,\n                                    torch::Tensor outGrad,\n                                    torch::Tensor indicePairs,\n                                    torch::Tensor indiceNum) {\n  auto device = features.device().type();\n  auto numInPlanes = features.size(1);\n  auto indicePairNumCpu = indiceNum.to({torch::kCPU});\n  auto options =\n      torch::TensorOptions().dtype(features.dtype()).device(features.device());\n  torch::Tensor inputGrad = torch::zeros(features.sizes(), options);\n  auto kernelVolume = indicePairs.size(0);\n  for (int i = 0; i < kernelVolume; ++i) {\n    auto nHot = indicePairNumCpu.data_ptr<int>()[i];\n    if (nHot <= 0) {\n      continue;\n    }\n    if (device == torch::kCPU) {\n      functor::SparseMaxPoolBackwardFunctor<tv::CPU, T, int> backwardFtor;\n      backwardFtor(tv::CPU(), tv::torch2tv<const T>(outFeatures),\n                   tv::torch2tv<const T>(features),\n                   tv::torch2tv<const T>(outGrad), tv::torch2tv<T>(inputGrad),\n                   tv::torch2tv<const int>(indicePairs).subview(i), nHot);\n    } else {\n      functor::SparseMaxPoolBackwardFunctor<tv::GPU, T, int> backwardFtor;\n      backwardFtor(tv::TorchGPU(), tv::torch2tv<const T>(outFeatures),\n                   tv::torch2tv<const T>(features),\n                   tv::torch2tv<const T>(outGrad), tv::torch2tv<T>(inputGrad),\n                   tv::torch2tv<const int>(indicePairs).subview(i), nHot);\n      TV_CHECK_CUDA_ERR();\n    }\n  }\n  return inputGrad;\n}\n\n}  // namespace spconv\n\n#endif\n"
  },
  {
    "path": "mmdet3d/ops/spconv/include/spconv/reordering.cu.h",
    "content": "// Copyright 2019 Yan Yan\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#ifndef REORDERING_CU_H_\n#define REORDERING_CU_H_\n#include <tensorview/helper_kernel.cu.h>\n\n// see http://www.nvidia.com/content/GTC-2010/pdfs/2238_GTC2010.pdf.\nnamespace spconv {\ntemplate <typename T, typename Index, int NumTLP, int NumILP>\n__global__ void gatherGenericKernel(T *buffer, const T *features,\n                                    const Index *indices, int size,\n                                    int numPlanes) {\n  int ILPStrideX[NumILP];\n  Index inds[NumILP];\n#pragma unroll\n  for (int ilp = 0; ilp < NumILP; ilp++)\n    ILPStrideX[ilp] = ilp * gridDim.x * blockDim.x;\n\n  for (int ix : tv::KernelLoopX<int, NumILP>(size)) {\n#pragma unroll\n    for (int ilp = 0; ilp < NumILP; ilp++) {\n      if (ix + ILPStrideX[ilp] < size)\n        inds[ilp] = indices[ix + ILPStrideX[ilp]] * numPlanes;\n    }\n    for (int iy : tv::KernelLoopY<int>(numPlanes)) {\n#pragma unroll\n      for (int ilp = 0; ilp < NumILP; ++ilp) {\n        if (ix + ILPStrideX[ilp] < size)\n          buffer[(ix + ILPStrideX[ilp]) * numPlanes + iy] =\n              features[inds[ilp] + iy];\n      }\n    }\n  }\n}\n\ntemplate <typename T, typename Index, int NumTLP, int NumILP, typename VecType>\n__global__ void gatherVecKernel(T *buffer, const T *features,\n                                const Index *indices, int size, int numPlanes) {\n  int ILPStrideX[NumILP];\n  Index inds[NumILP];\n#pragma unroll\n  for (int ilp = 0; ilp < NumILP; ilp++)\n    ILPStrideX[ilp] = ilp * gridDim.x * blockDim.x;\n\n  for (int ix : tv::KernelLoopX<int, NumILP>(size)) {\n#pragma unroll\n    for (int ilp = 0; ilp < NumILP; ilp++) {\n      if (ix + ILPStrideX[ilp] < size)\n        inds[ilp] = indices[ix + ILPStrideX[ilp]] * numPlanes;\n    }\n    for (int iy : tv::KernelLoopY<int>(numPlanes)) {\n#pragma unroll\n      for (int ilp = 0; ilp < NumILP; ++ilp) {\n        if (ix + ILPStrideX[ilp] < size)\n          reinterpret_cast<VecType *>(\n              buffer)[(ix + ILPStrideX[ilp]) * numPlanes + iy] =\n              reinterpret_cast<const VecType *>(features)[inds[ilp] + iy];\n      }\n    }\n  }\n}\n\ntemplate <typename T, typename Index, int NumTLP, int NumILP,\n          typename VecType = int4>\n__global__ void gatherVecBlockKernel(T *buffer, const T *features,\n                                     const Index *indices, int size,\n                                     int numPlanes) {\n  int ILPStrideY[NumILP];\n#pragma unroll\n  for (int ilp = 0; ilp < NumILP; ilp++)\n    ILPStrideY[ilp] = ilp * gridDim.y * blockDim.y;\n  features += blockIdx.x * NumTLP;\n  buffer += blockIdx.x * NumTLP;\n\n  for (int iy : tv::KernelLoopY<int, NumILP>(size)) {\n#pragma unroll\n    for (int ilp = 0; ilp < NumILP; ++ilp) {\n      reinterpret_cast<VecType *>(\n          buffer)[(iy + ILPStrideY[ilp]) * numPlanes + threadIdx.x] =\n          reinterpret_cast<const VecType *>(\n              features)[indices[iy + ILPStrideY[ilp]] * numPlanes +\n                        threadIdx.x];\n    }\n  }\n}\n\ntemplate <typename T, typename Index, int NumTLP, int NumILP>\n__global__ void scatterAddGenericKernel(T *outFeatures, const T *buffer,\n                                        const Index *indices, int size,\n                                        int numPlanes) {\n  int ILPStrideX[NumILP];\n  Index inds[NumILP];\n#pragma unroll\n  for (int ilp = 0; ilp < NumILP; ilp++)\n    ILPStrideX[ilp] = ilp * gridDim.x * blockDim.x;\n  for (int ix : tv::KernelLoopX<int, NumILP>(size)) {\n#pragma unroll\n    for (int ilp = 0; ilp < NumILP; ilp++) {\n      if (ix + ILPStrideX[ilp] < size)\n        inds[ilp] = indices[ix + ILPStrideX[ilp]] * numPlanes;\n    }\n    for (int iy : tv::KernelLoopY<int>(numPlanes)) {\n#pragma unroll\n      for (int ilp = 0; ilp < NumILP; ++ilp) {\n        if (ix + ILPStrideX[ilp] < size) {\n          outFeatures[inds[ilp] + iy] +=\n              buffer[(ix + ILPStrideX[ilp]) * numPlanes + iy];\n        }\n      }\n    }\n  }\n}\n\ntemplate <typename T, typename Index, int NumTLP, int NumILP,\n          typename VecType = int4>\n__global__ void scatterAddVecBlockKernel(T *outFeatures, const T *buffer,\n                                         const Index *indices, int size,\n                                         int numPlanes) {\n  int ILPStrideY[NumILP];\n  constexpr int vecloadFactor = sizeof(VecType) / sizeof(T);\n#pragma unroll\n  for (int ilp = 0; ilp < NumILP; ilp++)\n    ILPStrideY[ilp] = ilp * gridDim.y * blockDim.y;\n  outFeatures += blockIdx.x * NumTLP;\n  buffer += blockIdx.x * NumTLP;\n  T buf[vecloadFactor];\n  T buf2[vecloadFactor];\n  Index idx;\n  for (int iy : tv::KernelLoopY<int, NumILP>(size)) {\n#pragma unroll\n    for (int ilp = 0; ilp < NumILP; ++ilp) {\n      idx = indices[iy + ILPStrideY[ilp]] * numPlanes + threadIdx.x;\n      reinterpret_cast<VecType *>(buf)[0] =\n          reinterpret_cast<VecType *>(outFeatures)[idx];\n      reinterpret_cast<VecType *>(buf2)[0] = reinterpret_cast<const VecType *>(\n          buffer)[(iy + ILPStrideY[ilp]) * numPlanes + threadIdx.x];\n#pragma unroll\n      for (int i = 0; i < vecloadFactor; i++) {\n        buf[i] += buf2[i];\n      }\n      reinterpret_cast<VecType *>(outFeatures)[idx] =\n          reinterpret_cast<VecType *>(buf)[0];\n    }\n  }\n}\n\n}  // namespace spconv\n\n#endif\n"
  },
  {
    "path": "mmdet3d/ops/spconv/include/spconv/reordering.h",
    "content": "// Copyright 2019 Yan Yan\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#ifndef SPARSE_REORDERING_FUNCTOR_H_\n#define SPARSE_REORDERING_FUNCTOR_H_\n#include <tensorview/tensorview.h>\n\nnamespace spconv {\nnamespace functor {\ntemplate <typename Device, typename T, typename Index>\nstruct SparseGatherFunctor {\n  void operator()(const Device& d, tv::TensorView<T> buffer,\n                  tv::TensorView<const T> features,\n                  tv::TensorView<const Index> indices, int size);\n};\n\ntemplate <typename Device, typename T, typename Index>\nstruct SparseScatterAddFunctor {\n  void operator()(const Device& d, tv::TensorView<T> out_features,\n                  tv::TensorView<const T> buffer,\n                  tv::TensorView<const Index> indices, int size,\n                  bool stable = false);\n};\n}  // namespace functor\n}  // namespace spconv\n\n#endif\n"
  },
  {
    "path": "mmdet3d/ops/spconv/include/spconv/spconv_ops.h",
    "content": "// Copyright 2019 Yan Yan\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#ifndef SPARSE_CONV_OP_H_\n#define SPARSE_CONV_OP_H_\n\n#include <cuda_runtime_api.h>\n#include <spconv/indice.h>\n#include <spconv/reordering.h>\n#include <torch/script.h>\n#include <torch_utils.h>\n#include <utility/timer.h>\n\nnamespace spconv {\n// torch.jit's doc says only support int64, so we need to convert to int32.\ntemplate <unsigned NDim>\nstd::vector<torch::Tensor> getIndicePair(\n    torch::Tensor indices, int64_t batchSize,\n    std::vector<int64_t> outSpatialShape, std::vector<int64_t> spatialShape,\n    std::vector<int64_t> kernelSize, std::vector<int64_t> stride,\n    std::vector<int64_t> padding, std::vector<int64_t> dilation,\n    std::vector<int64_t> outPadding, int64_t _subM, int64_t _transpose) {\n  // auto timer = spconv::CudaContextTimer<>();\n  bool subM = _subM != 0;\n  bool transpose = _transpose != 0;\n  auto numAct = indices.size(0);\n  auto coorDim = indices.size(1) - 1;  // batchIdx + xyz\n  TV_ASSERT_RT_ERR(NDim == coorDim, \"error\");\n  TV_ASSERT_RT_ERR(kernelSize.size() == coorDim, \"error\");\n  TV_ASSERT_RT_ERR(outSpatialShape.size() == coorDim, \"error\");\n  TV_ASSERT_RT_ERR(stride.size() == coorDim, \"error\");\n  TV_ASSERT_RT_ERR(padding.size() == coorDim, \"error\");\n  TV_ASSERT_RT_ERR(outPadding.size() == coorDim, \"error\");\n  TV_ASSERT_RT_ERR(dilation.size() == coorDim, \"error\");\n  auto kernelVolume = kernelSize[0];\n  for (int i = 1; i < kernelSize.size(); ++i) {\n    kernelVolume *= kernelSize[i];\n  }\n  TV_ASSERT_RT_ERR(kernelVolume <= 4096, \"error\");\n  auto outputVolume = outSpatialShape[0];\n  for (int i = 1; i < outSpatialShape.size(); ++i) {\n    outputVolume *= outSpatialShape[i];\n  }\n  torch::Tensor indicePairs =\n      torch::full({kernelVolume, 2, numAct}, -1,\n                  torch::dtype(torch::kInt32).device(indices.device()));\n  torch::Tensor indiceNum = torch::zeros(\n      {kernelVolume}, torch::dtype(torch::kInt32).device(indices.device()));\n  torch::Tensor gridOut =\n      torch::full({batchSize * outputVolume}, -1,\n                  torch::dtype(torch::kInt32).device(indices.device()));\n  // std::cout << \"full time \" << timer.report() / 1000.0 << std::endl;\n  int64_t numActOut = -1;\n  tv::SimpleVector<int, NDim> outSpatialShape32;\n  tv::SimpleVector<int, NDim> kernelSize32;\n  tv::SimpleVector<int, NDim> stride32;\n  tv::SimpleVector<int, NDim> padding32;\n  tv::SimpleVector<int, NDim> dilation32;\n  auto indicePairUnique = torch::full(\n      {indicePairs.numel() / 2 + 1}, std::numeric_limits<int>::max(),\n      torch::dtype(torch::kInt32).device(indices.device()));\n  for (int i = 0; i < NDim; ++i) {\n    outSpatialShape32.push_back(outSpatialShape[i]);\n    kernelSize32.push_back(kernelSize[i]);\n    if (subM) {\n      stride32.push_back(1);\n      padding32.push_back(kernelSize[i] / 2);\n      dilation32.push_back(dilation[i]);\n    } else {\n      stride32.push_back(stride[i]);\n      padding32.push_back(padding[i]);\n      dilation32.push_back(dilation[i]);\n    }\n  }\n  if (subM) {\n    if (indices.device().type() == torch::kCPU) {\n      auto getIndicePairFtor =\n          functor::CreateSubMIndicePairFunctor<tv::CPU, int, int, NDim>();\n      numActOut = getIndicePairFtor(\n          tv::CPU(), tv::torch2tv<const int>(indices),\n          tv::torch2tv<int>(gridOut), tv::torch2tv<int>(indicePairs),\n          tv::torch2tv<int>(indiceNum), kernelSize32, stride32, padding32,\n          dilation32, outSpatialShape32, transpose);\n    } else {\n      auto getIndicePairFtor =\n          functor::CreateSubMIndicePairFunctor<tv::GPU, int, int, NDim>();\n      numActOut = getIndicePairFtor(\n          tv::TorchGPU(), tv::torch2tv<const int>(indices),\n          tv::torch2tv<int>(gridOut), tv::torch2tv<int>(indicePairs),\n          tv::torch2tv<int>(indiceNum), kernelSize32, stride32, padding32,\n          dilation32, outSpatialShape32, transpose);\n    }\n    return {indices, indicePairs, indiceNum};\n  } else {\n    torch::Tensor outInds =\n        torch::zeros({numAct * kernelVolume, coorDim + 1},\n                     torch::dtype(torch::kInt32).device(indices.device()));\n    if (indices.device().type() == torch::kCPU) {\n      auto getIndicePairFtor =\n          functor::CreateConvIndicePairFunctor<tv::CPU, int, int, NDim>();\n      numActOut = getIndicePairFtor(\n          tv::CPU(), tv::torch2tv<const int>(indices),\n          tv::torch2tv<int>(outInds), tv::torch2tv<int>(gridOut),\n          tv::torch2tv<int>(indicePairs), tv::torch2tv<int>(indiceNum),\n          kernelSize32, stride32, padding32, dilation32, outSpatialShape32,\n          transpose);\n    } else {\n      auto getIndicePairFtorP1 =\n          functor::CreateConvIndicePairFunctorP1<tv::GPU, int, int, NDim>();\n      auto getIndicePairFtorP2 =\n          functor::CreateConvIndicePairFunctorP2<tv::GPU, int, int, NDim>();\n      numActOut = getIndicePairFtorP1(\n          tv::TorchGPU(), tv::torch2tv<const int>(indices),\n          tv::torch2tv<int>(outInds), tv::torch2tv<int>(gridOut),\n          tv::torch2tv<int>(indicePairs), tv::torch2tv<int>(indiceNum),\n          tv::torch2tv<int>(indicePairUnique), kernelSize32, stride32,\n          padding32, dilation32, outSpatialShape32, transpose);\n      if (numActOut > 0) {\n        auto res = torch::_unique(indicePairUnique);\n        indicePairUnique = std::get<0>(res);\n        numActOut = getIndicePairFtorP2(\n            tv::TorchGPU(), tv::torch2tv<const int>(indices),\n            tv::torch2tv<int>(outInds), tv::torch2tv<int>(gridOut),\n            tv::torch2tv<int>(indicePairs), tv::torch2tv<int>(indiceNum),\n            tv::torch2tv<int>(indicePairUnique), outSpatialShape32, transpose);\n      }\n    }\n    return {outInds.slice(0, 0, numActOut), indicePairs, indiceNum};\n  }\n}\n\ntemplate <unsigned NDim>\nstd::vector<torch::Tensor> getIndicePairPreGrid(\n    torch::Tensor indices, torch::Tensor gridOut, int64_t batchSize,\n    std::vector<int64_t> outSpatialShape, std::vector<int64_t> spatialShape,\n    std::vector<int64_t> kernelSize, std::vector<int64_t> stride,\n    std::vector<int64_t> padding, std::vector<int64_t> dilation,\n    std::vector<int64_t> outPadding, int64_t _subM, int64_t _transpose) {\n  // auto timer = spconv::CudaContextTimer<>();\n  bool subM = _subM != 0;\n  bool transpose = _transpose != 0;\n  auto numAct = indices.size(0);\n  auto coorDim = indices.size(1) - 1;  // batchIdx + xyz\n  TV_ASSERT_RT_ERR(NDim == coorDim, \"error\");\n  TV_ASSERT_RT_ERR(kernelSize.size() == coorDim, \"error\");\n  TV_ASSERT_RT_ERR(outSpatialShape.size() == coorDim, \"error\");\n  TV_ASSERT_RT_ERR(stride.size() == coorDim, \"error\");\n  TV_ASSERT_RT_ERR(padding.size() == coorDim, \"error\");\n  TV_ASSERT_RT_ERR(outPadding.size() == coorDim, \"error\");\n  TV_ASSERT_RT_ERR(dilation.size() == coorDim, \"error\");\n  auto kernelVolume = kernelSize[0];\n  for (int i = 1; i < kernelSize.size(); ++i) {\n    kernelVolume *= kernelSize[i];\n  }\n  TV_ASSERT_RT_ERR(kernelVolume <= 4096, \"error\");\n  auto outputVolume = outSpatialShape[0];\n  for (int i = 1; i < outSpatialShape.size(); ++i) {\n    outputVolume *= outSpatialShape[i];\n  }\n  TV_ASSERT_INVALID_ARG(gridOut.numel() >= outputVolume * batchSize, \"error\");\n  torch::Tensor indicePairs =\n      torch::full({kernelVolume, 2, numAct}, -1,\n                  torch::dtype(torch::kInt32).device(indices.device()));\n  torch::Tensor indiceNum = torch::zeros(\n      {kernelVolume}, torch::dtype(torch::kInt32).device(indices.device()));\n  // std::cout << \"full time \" << timer.report() / 1000.0 << std::endl;\n  int64_t numActOut = -1;\n  tv::SimpleVector<int, NDim> outSpatialShape32;\n  tv::SimpleVector<int, NDim> kernelSize32;\n  tv::SimpleVector<int, NDim> stride32;\n  tv::SimpleVector<int, NDim> padding32;\n  tv::SimpleVector<int, NDim> dilation32;\n  auto indicePairUnique = torch::full(\n      {indicePairs.numel() / 2 + 1}, std::numeric_limits<int>::max(),\n      torch::dtype(torch::kInt32).device(indices.device()));\n  for (int i = 0; i < NDim; ++i) {\n    outSpatialShape32.push_back(outSpatialShape[i]);\n    kernelSize32.push_back(kernelSize[i]);\n    if (subM) {\n      stride32.push_back(1);\n      padding32.push_back(kernelSize[i] / 2);\n      dilation32.push_back(dilation[i]);\n    } else {\n      stride32.push_back(stride[i]);\n      padding32.push_back(padding[i]);\n      dilation32.push_back(dilation[i]);\n    }\n  }\n  if (subM) {\n    if (indices.device().type() == torch::kCPU) {\n      auto getIndicePairFtor =\n          functor::CreateSubMIndicePairFunctor<tv::CPU, int, int, NDim>();\n      numActOut = getIndicePairFtor(\n          tv::CPU(), tv::torch2tv<const int>(indices),\n          tv::torch2tv<int>(gridOut), tv::torch2tv<int>(indicePairs),\n          tv::torch2tv<int>(indiceNum), kernelSize32, stride32, padding32,\n          dilation32, outSpatialShape32, transpose);\n      gridOut.fill_(-1);\n    } else {\n      auto getIndicePairFtor =\n          functor::CreateSubMIndicePairFunctor<tv::GPU, int, int, NDim>();\n      numActOut = getIndicePairFtor(\n          tv::TorchGPU(), tv::torch2tv<const int>(indices),\n          tv::torch2tv<int>(gridOut), tv::torch2tv<int>(indicePairs),\n          tv::torch2tv<int>(indiceNum), kernelSize32, stride32, padding32,\n          dilation32, outSpatialShape32, transpose, true);\n    }\n    return {indices, indicePairs, indiceNum};\n  } else {\n    torch::Tensor outInds =\n        torch::zeros({numAct * kernelVolume, coorDim + 1},\n                     torch::dtype(torch::kInt32).device(indices.device()));\n    if (indices.device().type() == torch::kCPU) {\n      auto getIndicePairFtor =\n          functor::CreateConvIndicePairFunctor<tv::CPU, int, int, NDim>();\n      numActOut = getIndicePairFtor(\n          tv::CPU(), tv::torch2tv<const int>(indices),\n          tv::torch2tv<int>(outInds), tv::torch2tv<int>(gridOut),\n          tv::torch2tv<int>(indicePairs), tv::torch2tv<int>(indiceNum),\n          kernelSize32, stride32, padding32, dilation32, outSpatialShape32,\n          transpose, true);\n      gridOut.fill_(-1);\n    } else {\n      auto getIndicePairFtorP1 =\n          functor::CreateConvIndicePairFunctorP1<tv::GPU, int, int, NDim>();\n      auto getIndicePairFtorP2 =\n          functor::CreateConvIndicePairFunctorP2<tv::GPU, int, int, NDim>();\n      numActOut = getIndicePairFtorP1(\n          tv::TorchGPU(), tv::torch2tv<const int>(indices),\n          tv::torch2tv<int>(outInds), tv::torch2tv<int>(gridOut),\n          tv::torch2tv<int>(indicePairs), tv::torch2tv<int>(indiceNum),\n          tv::torch2tv<int>(indicePairUnique), kernelSize32, stride32,\n          padding32, dilation32, outSpatialShape32, transpose);\n      if (numActOut > 0) {\n        auto res = torch::_unique(indicePairUnique);\n        indicePairUnique = std::get<0>(res);\n        numActOut = getIndicePairFtorP2(\n            tv::TorchGPU(), tv::torch2tv<const int>(indices),\n            tv::torch2tv<int>(outInds), tv::torch2tv<int>(gridOut),\n            tv::torch2tv<int>(indicePairs), tv::torch2tv<int>(indiceNum),\n            tv::torch2tv<int>(indicePairUnique), outSpatialShape32, transpose,\n            true);\n      }\n    }\n    return {outInds.slice(0, 0, numActOut), indicePairs, indiceNum};\n  }\n}\n\ntemplate <typename T>\ntorch::Tensor indiceConv(torch::Tensor features, torch::Tensor filters,\n                         torch::Tensor indicePairs, torch::Tensor indiceNum,\n                         int64_t numActOut, int64_t _inverse, int64_t _subM) {\n  bool subM = _subM != 0;\n  bool inverse = _inverse != 0;\n  auto device = features.device().type();\n  auto ndim = filters.dim() - 2;\n  auto kernelVolume = indicePairs.size(0);\n  auto numInPlanes = features.size(1);\n  auto numOutPlanes = filters.size(ndim + 1);\n  auto indicePairNumCpu = indiceNum.to({torch::kCPU});\n  auto indicePairMaxSizeIter =\n      std::max_element(indicePairNumCpu.data_ptr<int>(),\n                       indicePairNumCpu.data_ptr<int>() + kernelVolume);\n  int indicePairMaxOffset =\n      indicePairMaxSizeIter - indicePairNumCpu.data_ptr<int>();\n  int indicePairMaxSize = *indicePairMaxSizeIter;\n\n  /*if (_subM){\n    std::vector<int> indicePairNumVec(indicePairNumCpu.data_ptr<int>(),\n  indicePairNumCpu.data_ptr<int>() + kernelVolume);\n    indicePairNumVec.erase(indicePairNumVec.begin() + indicePairMaxOffset);\n\n    auto indicePairVecMaxSizeIter = std::max_element(\n        indicePairNumVec.begin(), indicePairNumVec.end());\n    indicePairMaxSize = *indicePairVecMaxSizeIter;\n  }*/\n\n  auto options =\n      torch::TensorOptions().dtype(features.dtype()).device(features.device());\n  // auto indicePairOptions =\n  //     torch::TensorOptions().dtype(torch::kInt64).device(indicePairs.device());\n\n  torch::Tensor output = torch::zeros({numActOut, numOutPlanes}, options);\n  torch::Tensor inputBuffer =\n      torch::zeros({indicePairMaxSize, numInPlanes}, options);\n  torch::Tensor outputBuffer =\n      torch::zeros({indicePairMaxSize, numOutPlanes}, options);\n  filters = filters.view({-1, numInPlanes, numOutPlanes});\n  if (subM) {  // the center index of subm conv don't need gather and scatter\n               // add.\n    torch::mm_out(output, features, filters[indicePairMaxOffset]);\n  }\n  double totalGatherTime = 0;\n  double totalGEMMTime = 0;\n  double totalSAddTime = 0;\n  for (int i = 0; i < kernelVolume; ++i) {\n    auto nHot = indicePairNumCpu.data_ptr<int>()[i];\n    if (nHot <= 0 || (subM && i == indicePairMaxOffset)) {\n      continue;\n    }\n    // auto timer = spconv::CudaContextTimer<>();\n    auto outputBufferBlob = torch::from_blob(outputBuffer.data_ptr<T>(),\n                                             {nHot, numOutPlanes}, options);\n    auto inputBufferBlob = torch::from_blob(inputBuffer.data_ptr<T>(),\n                                            {nHot, numInPlanes}, options);\n\n    if (device == torch::kCPU) {\n      functor::SparseGatherFunctor<tv::CPU, T, int> gatherFtor;\n      gatherFtor(tv::CPU(), tv::torch2tv<T>(inputBuffer),\n                 tv::torch2tv<const T>(features),\n                 tv::torch2tv<const int>(indicePairs).subview(i, inverse),\n                 nHot);\n    } else {\n      functor::SparseGatherFunctor<tv::GPU, T, int> gatherFtor;\n      gatherFtor(tv::TorchGPU(), tv::torch2tv<T>(inputBuffer),\n                 tv::torch2tv<const T>(features),\n                 tv::torch2tv<const int>(indicePairs).subview(i, inverse),\n                 nHot);\n      TV_CHECK_CUDA_ERR();\n      /* slower than SparseGatherFunctor, may due to int->long conversion\n      auto indicePairLong = indicePairs[i][inverse].to(torch::kInt64);\n      auto indicePairBlob = torch::from_blob(indicePairLong.data_ptr<long>(),\n      {nHot}, indicePairOptions); torch::index_select_out(inputBufferBlob,\n      features, 0, indicePairBlob);*/\n    }\n    // totalGatherTime += timer.report() / 1000.0;\n    torch::mm_out(outputBufferBlob, inputBufferBlob, filters[i]);\n    // totalGEMMTime += timer.report() / 1000.0;\n\n    if (device == torch::kCPU) {\n      functor::SparseScatterAddFunctor<tv::CPU, T, int> scatterFtor;\n      scatterFtor(tv::CPU(), tv::torch2tv<T>(output),\n                  tv::torch2tv<const T>(outputBuffer),\n                  tv::torch2tv<const int>(indicePairs).subview(i, !inverse),\n                  nHot, true);\n    } else {\n      functor::SparseScatterAddFunctor<tv::GPU, T, int> scatterFtor;\n      scatterFtor(tv::TorchGPU(), tv::torch2tv<T>(output),\n                  tv::torch2tv<const T>(outputBuffer),\n                  tv::torch2tv<const int>(indicePairs).subview(i, !inverse),\n                  nHot, true);\n      TV_CHECK_CUDA_ERR();\n    }\n    // totalSAddTime += timer.report() / 1000.0;\n  }\n  // std::cout << \"gather time \" << totalGatherTime << std::endl;\n  // std::cout << \"gemm time \" << totalGEMMTime << std::endl;\n  // std::cout << \"scatteradd time \" << totalSAddTime << std::endl;\n  return output;\n}\n\ntemplate <typename T>\nstd::vector<torch::Tensor> indiceConvBackward(torch::Tensor features,\n                                              torch::Tensor filters,\n                                              torch::Tensor outGrad,\n                                              torch::Tensor indicePairs,\n                                              torch::Tensor indiceNum,\n                                              int64_t _inverse, int64_t _subM) {\n  bool subM = _subM != 0;\n  bool inverse = _inverse != 0;\n\n  auto device = features.device().type();\n  auto ndim = filters.dim() - 2;\n  auto kernelVolume = indicePairs.size(0);\n  auto numInPlanes = features.size(1);\n  auto numOutPlanes = filters.size(ndim + 1);\n  auto indicePairNumCpu = indiceNum.to({torch::kCPU});\n  auto indicePairMaxSizeIter =\n      std::max_element(indicePairNumCpu.data_ptr<int>(),\n                       indicePairNumCpu.data_ptr<int>() + kernelVolume);\n  int indicePairMaxOffset =\n      indicePairMaxSizeIter - indicePairNumCpu.data_ptr<int>();\n  int indicePairMaxSize = *indicePairMaxSizeIter;\n  auto options =\n      torch::TensorOptions().dtype(features.dtype()).device(features.device());\n  auto filterShape = filters.sizes();\n  torch::Tensor inputGrad = torch::zeros(features.sizes(), options);\n  torch::Tensor filtersGrad = torch::zeros(filterShape, options);\n  torch::Tensor inputBuffer =\n      torch::zeros({indicePairMaxSize, numInPlanes}, options);\n  torch::Tensor outputBuffer =\n      torch::zeros({indicePairMaxSize, numOutPlanes}, options);\n\n  filters = filters.view({-1, numInPlanes, numOutPlanes});\n  filtersGrad = filtersGrad.view({-1, numInPlanes, numOutPlanes});\n  if (subM) {\n    auto filterGradSub = filtersGrad[indicePairMaxOffset];\n    torch::mm_out(filterGradSub, features.t(), outGrad);\n    torch::mm_out(inputGrad, outGrad, filters[indicePairMaxOffset].t());\n  }\n  for (int i = 0; i < kernelVolume; ++i) {\n    auto nHot = indicePairNumCpu.data_ptr<int>()[i];\n    if (nHot <= 0 || (subM && i == indicePairMaxOffset)) {\n      continue;\n    }\n    if (device == torch::kCPU) {\n      functor::SparseGatherFunctor<tv::CPU, T, int> gatherFtor;\n      functor::SparseGatherFunctor<tv::CPU, T, int> gatherFtorOut;\n      gatherFtor(tv::CPU(), tv::torch2tv<T>(inputBuffer),\n                 tv::torch2tv<const T>(features),\n                 tv::torch2tv<const int>(indicePairs).subview(i, inverse),\n                 nHot);\n      gatherFtorOut(tv::CPU(), tv::torch2tv<T>(outputBuffer),\n                    tv::torch2tv<const T>(outGrad),\n                    tv::torch2tv<const int>(indicePairs).subview(i, !inverse),\n                    nHot);\n    } else {\n      functor::SparseGatherFunctor<tv::GPU, T, int> gatherFtor;\n      functor::SparseGatherFunctor<tv::GPU, T, int> gatherFtorOut;\n      gatherFtor(tv::TorchGPU(), tv::torch2tv<T>(inputBuffer),\n                 tv::torch2tv<const T>(features),\n                 tv::torch2tv<const int>(indicePairs).subview(i, inverse),\n                 nHot);\n      TV_CHECK_CUDA_ERR();\n      gatherFtorOut(tv::TorchGPU(), tv::torch2tv<T>(outputBuffer),\n                    tv::torch2tv<const T>(outGrad),\n                    tv::torch2tv<const int>(indicePairs).subview(i, !inverse),\n                    nHot);\n      TV_CHECK_CUDA_ERR();\n    }\n    auto filterGradSub = filtersGrad[i];\n    auto outputBufferBlob = torch::from_blob(outputBuffer.data_ptr<T>(),\n                                             {nHot, numOutPlanes}, options);\n    auto inputBufferBlob = torch::from_blob(inputBuffer.data_ptr<T>(),\n                                            {nHot, numInPlanes}, options);\n\n    torch::mm_out(filterGradSub, inputBufferBlob.t(), outputBufferBlob);\n    torch::mm_out(inputBufferBlob, outputBufferBlob, filters[i].t());\n    if (device == torch::kCPU) {\n      functor::SparseScatterAddFunctor<tv::CPU, T, int> scatterFtor;\n      scatterFtor(tv::CPU(), tv::torch2tv<T>(inputGrad),\n                  tv::torch2tv<const T>(inputBuffer),\n                  tv::torch2tv<const int>(indicePairs).subview(i, inverse),\n                  nHot);\n    } else {\n      functor::SparseScatterAddFunctor<tv::GPU, T, int> scatterFtor;\n      scatterFtor(tv::TorchGPU(), tv::torch2tv<T>(inputGrad),\n                  tv::torch2tv<const T>(inputBuffer),\n                  tv::torch2tv<const int>(indicePairs).subview(i, inverse),\n                  nHot);\n      TV_CHECK_CUDA_ERR();\n    }\n  }\n  return {inputGrad, filtersGrad.view(filterShape)};\n}\n\ntemplate <typename T>\ntorch::Tensor indiceConvDevelopDontUse(torch::Tensor features,\n                                       torch::Tensor filters,\n                                       torch::Tensor indicePairs,\n                                       torch::Tensor indiceNum,\n                                       int64_t numActOut, int64_t _inverse,\n                                       int64_t _subM) {\n  bool subM = _subM != 0;\n  bool inverse = _inverse != 0;\n\n  auto device = features.device().type();\n  auto ndim = filters.dim() - 2;\n  auto kernelVolume = indicePairs.size(0);\n  auto numInPlanes = features.size(1);\n  auto numOutPlanes = filters.size(ndim + 1);\n  auto indicePairNumCpu = indiceNum.to({torch::kCPU});\n  auto totalActsTen = indicePairNumCpu.sum();\n  auto totalActs = indicePairNumCpu.data_ptr<int>()[0];\n  auto indicePairMaxSizeIter =\n      std::max_element(indicePairNumCpu.data_ptr<int>(),\n                       indicePairNumCpu.data_ptr<int>() + kernelVolume);\n  int indicePairMaxOffset =\n      indicePairMaxSizeIter - indicePairNumCpu.data_ptr<int>();\n  int indicePairMaxSize = *indicePairMaxSizeIter;\n  std::vector<int> indicePairNumVec(\n      indicePairNumCpu.data_ptr<int>(),\n      indicePairNumCpu.data_ptr<int>() + kernelVolume);\n  indicePairNumVec.erase(indicePairNumVec.begin() + indicePairMaxOffset);\n  int subRuleMaxSize =\n      *std::max_element(indicePairNumVec.begin(), indicePairNumVec.end());\n  if (subM) {\n    indicePairMaxSize = subRuleMaxSize;\n  }\n  auto timer = spconv::CudaContextTimer<>();\n  auto options =\n      torch::TensorOptions().dtype(features.dtype()).device(features.device());\n  // auto indicePairOptions =\n  //     torch::TensorOptions().dtype(torch::kInt64).device(indicePairs.device());\n\n  torch::Tensor output = torch::zeros({numActOut, numOutPlanes}, options);\n  torch::Tensor inputBuffer =\n      torch::zeros({kernelVolume, indicePairMaxSize, numInPlanes}, options);\n  torch::Tensor outputBuffer =\n      torch::zeros({kernelVolume, indicePairMaxSize, numOutPlanes}, options);\n  filters = filters.view({-1, numInPlanes, numOutPlanes});\n  std::cout << \"create time \" << timer.report() / 1000.0 << std::endl;\n  if (subM) {  // the center index of subm conv don't need gather and scatter\n               // add.\n    torch::mm_out(output, features, filters[indicePairMaxOffset]);\n  }\n  double totalGatherTime = 0;\n  double totalGEMMTime = 0;\n  double totalSAddTime = 0;\n  // auto timer = spconv::CudaContextTimer<>();\n  for (int i = 0; i < kernelVolume; ++i) {\n    auto nHot = indicePairNumCpu.data_ptr<int>()[i];\n    if (nHot <= 0 || (subM && i == indicePairMaxOffset)) {\n      continue;\n    }\n    //\n    auto outputBufferBlob = torch::from_blob(outputBuffer[i].data_ptr<T>(),\n                                             {nHot, numOutPlanes}, options);\n    auto inputBufferBlob = torch::from_blob(inputBuffer[i].data_ptr<T>(),\n                                            {nHot, numInPlanes}, options);\n    if (device == torch::kCPU) {\n      functor::SparseGatherFunctor<tv::CPU, T, int> gatherFtor;\n      gatherFtor(tv::CPU(), tv::torch2tv<T>(inputBufferBlob),\n                 tv::torch2tv<const T>(features),\n                 tv::torch2tv<const int>(indicePairs).subview(i, inverse),\n                 nHot);\n    } else {\n      functor::SparseGatherFunctor<tv::GPU, T, int> gatherFtor;\n      gatherFtor(tv::TorchGPU(), tv::torch2tv<T>(inputBufferBlob),\n                 tv::torch2tv<const T>(features),\n                 tv::torch2tv<const int>(indicePairs).subview(i, inverse),\n                 nHot);\n      TV_CHECK_CUDA_ERR();\n    }\n    // }\n    // for (int i = 0; i < kernelVolume; ++i) {\n    // totalGatherTime += timer.report() / 1000.0;\n    // auto outputBufferBlob = torch::from_blob(outputBuffer[i].data_ptr<T>(),\n    // {nHot, numOutPlanes}, options);\n  }\n  // totalGatherTime += timer.report() / 1000.0;\n  for (int i = 0; i < kernelVolume; ++i) {\n    auto nHot = indicePairNumCpu.data_ptr<int>()[i];\n    if (nHot <= 0 || (subM && i == indicePairMaxOffset)) {\n      continue;\n    }\n    auto outputBufferBlob = torch::from_blob(outputBuffer[i].data_ptr<T>(),\n                                             {nHot, numOutPlanes}, options);\n    auto inputBufferBlob = torch::from_blob(inputBuffer[i].data_ptr<T>(),\n                                            {nHot, numInPlanes}, options);\n\n    torch::mm_out(outputBufferBlob, inputBufferBlob, filters[i]);\n  }\n  // totalGEMMTime += timer.report() / 1000.0;\n  // totalGEMMTime += timer.report() / 1000.0;\n  for (int i = 0; i < kernelVolume; ++i) {\n    auto nHot = indicePairNumCpu.data_ptr<int>()[i];\n    if (nHot <= 0 || (subM && i == indicePairMaxOffset)) {\n      continue;\n    }\n    auto outputBufferBlob = torch::from_blob(outputBuffer[i].data_ptr<T>(),\n                                             {nHot, numOutPlanes}, options);\n    auto inputBufferBlob = torch::from_blob(inputBuffer[i].data_ptr<T>(),\n                                            {nHot, numInPlanes}, options);\n\n    if (device == torch::kCPU) {\n      functor::SparseScatterAddFunctor<tv::CPU, T, int> scatterFtor;\n      scatterFtor(tv::CPU(), tv::torch2tv<T>(output),\n                  tv::torch2tv<const T>(outputBufferBlob),\n                  tv::torch2tv<const int>(indicePairs).subview(i, !inverse),\n                  nHot, true);\n    } else {\n      functor::SparseScatterAddFunctor<tv::GPU, T, int> scatterFtor;\n      scatterFtor(tv::TorchGPU(), tv::torch2tv<T>(output),\n                  tv::torch2tv<const T>(outputBufferBlob),\n                  tv::torch2tv<const int>(indicePairs).subview(i, !inverse),\n                  nHot, true);\n      TV_CHECK_CUDA_ERR();\n    }\n    // totalSAddTime += timer.report() / 1000.0;\n  }\n  // totalSAddTime += timer.report() / 1000.0;\n  // std::cout << \"gather time \" << totalGatherTime << std::endl;\n  // std::cout << \"gemm time \" << totalGEMMTime << std::endl;\n  // std::cout << \"scatteradd time \" << totalSAddTime << std::endl;\n  return output;\n}\n\n}  // namespace spconv\n\n#endif\n"
  },
  {
    "path": "mmdet3d/ops/spconv/include/tensorview/helper_kernel.cu.h",
    "content": "#pragma once\n// from tensorflow\nnamespace tv {\nnamespace detail {\n\ntemplate <typename T>\nclass KernelLoop {\n  struct Iterator {\n    __forceinline__ __device__ Iterator(T index, T delta)\n        : index_(index), delta_(delta) {}\n    __forceinline__ __device__ T operator*() const { return index_; }\n    __forceinline__ __device__ Iterator &operator++() {\n      index_ += delta_;\n      return *this;\n    }\n    __forceinline__ __device__ bool operator!=(const Iterator &other) const {\n      bool greater = index_ > other.index_;\n      bool less = index_ < other.index_;\n      // Anything past an end iterator (delta_ == 0) is equal.\n      // In range-based for loops, this optimizes to 'return less'.\n      if (!other.delta_) {\n        return less;\n      }\n      if (!delta_) {\n        return greater;\n      }\n      return less || greater;\n    }\n\n   private:\n    T index_;\n    const T delta_;\n  };\n\n public:\n  __forceinline__ __device__ KernelLoop(T begin, T delta, T end)\n      : begin_(begin), delta_(delta), end_(end) {}\n\n  __forceinline__ __device__ Iterator begin() const {\n    return Iterator{begin_, delta_};\n  }\n  __forceinline__ __device__ Iterator end() const { return Iterator{end_, 0}; }\n\n private:\n  T begin_;\n  T delta_;\n  T end_;\n};\n\n}  // namespace detail\ntemplate <typename T, int NumILP = 1>\n__forceinline__ __device__ detail::KernelLoop<T> KernelLoopX(T count) {\n  return detail::KernelLoop<T>(blockIdx.x * blockDim.x + threadIdx.x,\n                               gridDim.x * blockDim.x * NumILP, count);\n}\n\n// Helper to visit indices in the range 0 <= i < count using the y-coordinate.\n// Usage: for(int i : KernelLoopY(count)) { visit(i); }\ntemplate <typename T, int NumILP = 1>\n__forceinline__ __device__ detail::KernelLoop<T> KernelLoopY(T count) {\n  return detail::KernelLoop<T>(blockIdx.y * blockDim.y + threadIdx.y,\n                               gridDim.y * blockDim.y * NumILP, count);\n}\n\n// Helper to visit indices in the range 0 <= i < count using the z-coordinate.\n// Usage: for(int i : KernelLoopZ(count)) { visit(i); }\ntemplate <typename T, int NumILP = 1>\n__forceinline__ __device__ detail::KernelLoop<T> KernelLoopZ(T count) {\n  return detail::KernelLoop<T>(blockIdx.z * blockDim.z + threadIdx.z,\n                               gridDim.z * blockDim.z * NumILP, count);\n}\n\n}  // namespace tv\n"
  },
  {
    "path": "mmdet3d/ops/spconv/include/tensorview/helper_launch.h",
    "content": "#pragma once\n// from pytorch.aten\n#include \"tensorview.h\"\nnamespace tv\n{\nnamespace launch\n{\n\ntemplate <typename T1, typename T2>\ninline int DivUp(const T1 a, const T2 b) { return (a + b - 1) / b; }\n\n// Use 1024 threads per block, which requires cuda sm_2x or above\nconstexpr int CUDA_NUM_THREADS = 1024;\n// CUDA: number of blocks for threads.\ninline int getBlocks(const int N)\n{\n    TV_ASSERT_RT_ERR(N > 0, \"CUDA kernel launch blocks must be positive, but got N=\", N);\n    return DivUp(N, CUDA_NUM_THREADS);\n}\n} // namespace launch\n} // namespace tv\n"
  },
  {
    "path": "mmdet3d/ops/spconv/include/tensorview/tensorview.h",
    "content": "// Copyright 2019 Yan Yan\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n#include <cuda_runtime_api.h>\n\n#include <algorithm>\n#include <cassert>\n#include <cstdlib>\n#include <iostream>\n#include <memory>\n// #include <prettyprint.h>\n#include <sstream>\n#include <type_traits>\n#include <vector>\n\nnamespace tv {\n\n#ifdef __NVCC__\n#define TV_HOST_DEVICE_INLINE __forceinline__ __device__ __host__\n#define TV_DEVICE_INLINE __forceinline__ __device__\n#define TV_HOST_DEVICE __device__ __host__\n#define TV_ASSERT(expr) assert(expr)\n#elif defined(__CUDACC_RTC__)\n#define TV_ASSERT(expr) assert(expr)\n#define TV_HOST_DEVICE_INLINE __forceinline__ __device__\n#define TV_DEVICE_INLINE __forceinline__ __device__\n#define TV_HOST_DEVICE __device__ __host__\n#else\n#define TV_ASSERT(x) assert(x)\n#define TV_HOST_DEVICE_INLINE inline\n#define TV_HOST_DEVICE\n#endif\n\n#define TV_REQUIRE(expr, ...) \\\n  {                           \\\n    if (!(expr)) {            \\\n      printf(__VA_ARGS__);    \\\n      assert(expr);           \\\n    }                         \\\n  }\n\n#define TV_DEVICE_REQUIRE(expr, ...)                      \\\n  {                                                       \\\n    if (!(expr) && threadIdx.x == 0) printf(__VA_ARGS__); \\\n    assert(expr);                                         \\\n  }\n\ntemplate <class SStream, class T>\nvoid sstream_print(SStream &ss, T val) {\n  ss << val;\n}\n\ntemplate <class SStream, class T, class... TArgs>\nvoid sstream_print(SStream &ss, T val, TArgs... args) {\n  ss << val << \" \";\n  sstream_print(ss, args...);\n}\n\n#define TV_ASSERT_RT_ERR(expr, ...)                     \\\n  {                                                     \\\n    if (!(expr)) {                                      \\\n      std::stringstream __macro_s;                      \\\n      __macro_s << __FILE__ << \" \" << __LINE__ << \"\\n\"; \\\n      __macro_s << #expr << \" assert faild. \";          \\\n      tv::sstream_print(__macro_s, __VA_ARGS__);        \\\n      throw std::runtime_error(__macro_s.str());        \\\n    }                                                   \\\n  }\n\n#define TV_ASSERT_INVALID_ARG(expr, ...)                \\\n  {                                                     \\\n    if (!(expr)) {                                      \\\n      std::stringstream __macro_s;                      \\\n      __macro_s << __FILE__ << \" \" << __LINE__ << \"\\n\"; \\\n      __macro_s << #expr << \" assert faild. \";          \\\n      tv::sstream_print(__macro_s, __VA_ARGS__);        \\\n      throw std::invalid_argument(__macro_s.str());     \\\n    }                                                   \\\n  }\n\n#define TV_CHECK_CUDA_ERR()                                    \\\n  {                                                            \\\n    auto err = cudaGetLastError();                             \\\n    if (err != cudaSuccess) {                                  \\\n      std::stringstream __macro_s;                             \\\n      __macro_s << __FILE__ << \" \" << __LINE__ << \"\\n\";        \\\n      __macro_s << \"cuda execution failed with error \" << err; \\\n      throw std::runtime_error(__macro_s.str());               \\\n    }                                                          \\\n  }\n\nstruct GPU {\n  GPU(cudaStream_t s = 0) : mStream(s) {}\n  virtual cudaStream_t getStream() const { return mStream; }\n  cudaStream_t mStream = 0;\n};\nstruct CPU {};\n\n#define TV_MAX_DIM 6\n/*\ntemplate <typename T>\nconstexpr size_t calc_align(size_t ndim)\n{\n  if (ndim * sizeof(T) == 1)\n    return 1;\n  else if (ndim * sizeof(T) == 2)\n    return 2;\n  else if (ndim * sizeof(T) <= 4 && ndim * sizeof(T) > 2)\n    return 4;\n  else if (ndim * sizeof(T) <= 8 && ndim * sizeof(T) > 4)\n    return 8;\n  else if (ndim * sizeof(T) <= 16 && ndim * sizeof(T) > 8)\n    return 16;\n  else if (ndim * sizeof(T) <= 32 && ndim * sizeof(T) > 16)\n    return 32;\n  else\n    return 64;\n}\n*/\ntemplate <typename T, size_t MaxDim = TV_MAX_DIM>\nstruct /*alignas(calc_align<T>(MaxDim))*/ SimpleVector {\n public:\n  TV_HOST_DEVICE_INLINE SimpleVector(){};\n  TV_HOST_DEVICE_INLINE SimpleVector(std::initializer_list<T> q) {\n    TV_ASSERT(q.size() <= MaxDim);\n    mSize = 0;\n    for (T s : q) {\n      mArray[mSize++] = s;\n    }\n    mSize = q.size();\n  }\n  SimpleVector(const std::vector<T> &arr) {\n    TV_ASSERT(arr.size() <= MaxDim);\n    for (size_t i = 0; i < arr.size(); ++i) {\n      mArray[i] = arr[i];\n    }\n    mSize = arr.size();\n  }\n  TV_HOST_DEVICE_INLINE SimpleVector(const SimpleVector<T, MaxDim> &arr) {\n    TV_ASSERT(arr.size() <= MaxDim);\n    for (size_t i = 0; i < arr.size(); ++i) {\n      mArray[i] = arr[i];\n    }\n    mSize = arr.size();\n  }\n  TV_HOST_DEVICE_INLINE T &operator[](int idx) {\n#ifdef TV_DEBUG\n    TV_ASSERT(idx >= 0 && idx < mSize);\n#endif\n    return mArray[idx];\n  }\n  TV_HOST_DEVICE_INLINE const T &operator[](int idx) const {\n#ifdef TV_DEBUG\n    TV_ASSERT(idx >= 0 && idx < mSize);\n#endif\n    return mArray[idx];\n  }\n  TV_HOST_DEVICE_INLINE void push_back(T s) {\n#ifdef TV_DEBUG\n    TV_ASSERT(mSize < MaxDim);\n#endif\n    mArray[mSize] = s;\n    mSize++;\n  }\n  TV_HOST_DEVICE_INLINE void pop_back() {\n#ifdef TV_DEBUG\n    TV_ASSERT(mSize > 0);\n#endif\n    mSize--;\n  }\n\n  TV_HOST_DEVICE_INLINE size_t size() const { return mSize; }\n  TV_HOST_DEVICE_INLINE const T *data() const { return mArray; }\n  TV_HOST_DEVICE_INLINE size_t empty() const { return mSize == 0; }\n\n  typedef size_t size_type;\n\n  class iterator {\n   public:\n    typedef iterator self_type;\n    typedef T value_type;\n    typedef T &reference;\n    typedef T *pointer;\n    typedef std::forward_iterator_tag iterator_category;\n    typedef std::ptrdiff_t difference_type;\n    TV_HOST_DEVICE_INLINE iterator(pointer ptr) : ptr_(ptr) {}\n    TV_HOST_DEVICE_INLINE self_type operator++(int junk) {\n      self_type i = *this;\n      ptr_++;\n      return i;\n    }\n    TV_HOST_DEVICE_INLINE self_type operator++() {\n      ptr_++;\n      return *this;\n    }\n    TV_HOST_DEVICE_INLINE reference operator*() { return *ptr_; }\n    TV_HOST_DEVICE_INLINE pointer operator->() { return ptr_; }\n    TV_HOST_DEVICE_INLINE bool operator==(const self_type &rhs) {\n      return ptr_ == rhs.ptr_;\n    }\n    TV_HOST_DEVICE_INLINE bool operator!=(const self_type &rhs) {\n      return ptr_ != rhs.ptr_;\n    }\n\n   private:\n    pointer ptr_;\n  };\n\n  class const_iterator {\n   public:\n    typedef const_iterator self_type;\n    typedef T value_type;\n    typedef const T &reference;\n    typedef const T *pointer;\n    typedef std::ptrdiff_t difference_type;\n    typedef std::forward_iterator_tag iterator_category;\n    TV_HOST_DEVICE_INLINE const_iterator(pointer ptr) : ptr_(ptr) {}\n    TV_HOST_DEVICE_INLINE self_type operator++(int junk) {\n      self_type i = *this;\n      ptr_++;\n      return i;\n    }\n    TV_HOST_DEVICE_INLINE self_type operator++() {\n      ptr_++;\n      return *this;\n    }\n    TV_HOST_DEVICE_INLINE reference operator*() { return *ptr_; }\n    TV_HOST_DEVICE_INLINE pointer operator->() { return ptr_; }\n    TV_HOST_DEVICE_INLINE bool operator==(const self_type &rhs) {\n      return ptr_ == rhs.ptr_;\n    }\n    TV_HOST_DEVICE_INLINE bool operator!=(const self_type &rhs) {\n      return ptr_ != rhs.ptr_;\n    }\n\n   private:\n    pointer ptr_;\n  };\n\n  TV_HOST_DEVICE_INLINE iterator begin() { return iterator(mArray); }\n\n  TV_HOST_DEVICE_INLINE iterator end() { return iterator(mArray + mSize); }\n\n  TV_HOST_DEVICE_INLINE const_iterator begin() const {\n    return const_iterator(mArray);\n  }\n\n  TV_HOST_DEVICE_INLINE const_iterator end() const {\n    return const_iterator(mArray + mSize);\n  }\n  TV_HOST_DEVICE_INLINE const_iterator cbegin() const {\n    return const_iterator(mArray);\n  }\n\n  TV_HOST_DEVICE_INLINE const_iterator cend() const {\n    return const_iterator(mArray + mSize);\n  }\n\n protected:\n  T mArray[MaxDim];\n  size_t mSize = 0;\n};\n\ntemplate <typename T, size_t MaxDim>\nbool operator==(const SimpleVector<T, MaxDim> &lfs,\n                const SimpleVector<T, MaxDim> &rfs) {\n  if (lfs.size() != rfs.size()) return false;\n  for (size_t i = 0; i < lfs.size(); ++i) {\n    if (lfs[i] != rfs[i]) return false;\n  }\n  return true;\n}\n\ntemplate <typename T, size_t MaxDim>\nbool operator!=(const SimpleVector<T, MaxDim> &lfs,\n                const SimpleVector<T, MaxDim> &rfs) {\n  return !(lfs == rfs);\n}\n\nstruct Slice {\n  template <class... Integers>\n  TV_HOST_DEVICE_INLINE Slice(Integers... ints) {\n    static_assert(sizeof...(ints) <= 3, \"slice init must smaller than 3\");\n    SimpleVector<int, 3> slices{int(ints)...};\n    mSlices[0] = -1;\n    mSlices[1] = -1;\n    mSlices[2] = -1;\n    for (size_t i = 0; i < slices.size(); ++i) {\n      mSlices[i] = slices[i];\n    }\n  }\n\n  TV_HOST_DEVICE_INLINE Slice() {\n    mSlices[0] = -1;\n    mSlices[1] = -1;\n    mSlices[2] = -1;\n  }\n  template <typename T>\n  TV_HOST_DEVICE_INLINE Slice(std::initializer_list<T> slice) {\n    mSlices[0] = -1;\n    mSlices[1] = -1;\n    mSlices[2] = -1;\n    TV_ASSERT(slice.size() <= 3);\n    int idx = 0;\n    for (T s : slice) {\n      mSlices[idx] = int(s);\n      ++idx;\n    }\n  }\n  TV_HOST_DEVICE_INLINE int &operator[](int idx) {\n#ifdef TV_DEBUG\n    TV_ASSERT(idx >= 0 && idx < 3);\n#endif\n    return mSlices[idx];\n  }\n  TV_HOST_DEVICE_INLINE const int &operator[](int idx) const {\n#ifdef TV_DEBUG\n    TV_ASSERT(idx >= 0 && idx < 3);\n#endif\n    return mSlices[idx];\n  }\n\n protected:\n  int mSlices[3];\n};\n\ntemplate <size_t MaxDim = TV_MAX_DIM>\nstruct ShapeBase : public SimpleVector<int, MaxDim> {\n  TV_HOST_DEVICE_INLINE ShapeBase() : SimpleVector<int, MaxDim>(){};\n  TV_HOST_DEVICE_INLINE ShapeBase(std::initializer_list<int> shape)\n      : SimpleVector<int, MaxDim>(shape) {}\n\n  template <typename T, template <class...> class Container>\n  ShapeBase(Container<T> shape) : SimpleVector<int, MaxDim>(shape) {}\n  TV_HOST_DEVICE_INLINE ShapeBase(const ShapeBase<MaxDim> &shape)\n      : SimpleVector<int, MaxDim>(shape) {}\n  ShapeBase(const std::vector<int> &arr) : SimpleVector<int, MaxDim>(arr) {}\n\n  ShapeBase<MaxDim> &operator=(const ShapeBase<MaxDim> &shape) = default;\n  TV_HOST_DEVICE_INLINE ShapeBase<MaxDim> subshape(int start, int end) const {\n#ifdef TV_DEBUG\n    TV_ASSERT(start >= 0 && end < this->mSize && end > start);\n#endif\n    ShapeBase<MaxDim> shape;\n    for (int i = start; i < end; ++i) {\n      shape.push_back(this->mArray[i]);\n    }\n    return shape;\n  }\n  TV_HOST_DEVICE_INLINE ShapeBase<MaxDim> subshape(int start) const {\n#ifdef TV_DEBUG\n    TV_ASSERT(start >= 0 && start <= this->mSize);\n#endif\n    ShapeBase<MaxDim> shape;\n    for (int i = start; i < this->mSize; ++i) {\n      shape.push_back(this->mArray[i]);\n    }\n    return shape;\n  }\n\n  TV_HOST_DEVICE_INLINE size_t size() const {\n    if (this->mSize == 0) return 0;\n    size_t s = 1;\n    for (int i = 0; i < int(this->mSize); ++i) {\n      s *= this->mArray[i];\n    }\n    return s;\n  }\n  TV_HOST_DEVICE_INLINE size_t ndim() const { return this->mSize; }\n  TV_HOST_DEVICE_INLINE ShapeBase<MaxDim> squeeze() const {\n    ShapeBase<MaxDim> shape;\n    for (int i = 0; i < this->mSize; ++i) {\n      if (this->mArray[i] != 1) shape.push_back(this->mArray[i]);\n    }\n    return shape;\n  }\n  TV_HOST_DEVICE_INLINE ShapeBase<MaxDim> squeeze(int dim) const {\n    ShapeBase<MaxDim> shape;\n    for (int i = 0; i < this->mSize; ++i) {\n      if (i != dim || this->mArray[i] != 1) shape.push_back(this->mArray[i]);\n    }\n    return shape;\n  }\n};\n\nusing Shape = ShapeBase<TV_MAX_DIM>;\n\ntemplate <class... Inds>\nTV_HOST_DEVICE_INLINE unsigned rowArrayIdx(std::vector<int> &shape,\n                                           Inds... indexes) {\n  unsigned offset = 0;\n  unsigned m = 1;\n  int indexes_vec[sizeof...(indexes)] = {indexes...};\n#ifdef TV_DEBUG\n  TV_ASSERT(sizeof...(indexes) == shape.size());\n#endif\n#pragma unroll\n  for (int i = sizeof...(indexes) - 1; i >= 0; --i) {\n    offset += m * indexes_vec[i];\n    m *= shape[i];\n  }\n  return offset;\n}\n\nTV_HOST_DEVICE_INLINE unsigned rowArrayIdx(std::vector<int> &shape,\n                                           std::vector<int> &indexes_vec) {\n  unsigned offset = 0;\n  unsigned m = 1;\n  for (int i = shape.size() - 1; i >= 0; --i) {\n    offset += m * indexes_vec[i];\n    m *= shape[i];\n  }\n  return offset;\n}\n\ntemplate <class... Inds>\nTV_HOST_DEVICE_INLINE unsigned rowArrayIdx(const Shape &shape,\n                                           Inds... indexes) {\n  unsigned offset = 0;\n  unsigned m = 1;\n  int indexes_vec[sizeof...(indexes)] = {indexes...};\n#pragma unroll\n  for (int i = sizeof...(indexes) - 1; i >= 0; --i) {\n    offset += m * indexes_vec[i];\n    m *= shape[i];\n  }\n  return offset;\n}\n\nTV_HOST_DEVICE_INLINE unsigned rowArrayIdx(const Shape &shape,\n                                           const Shape &indexes_vec) {\n  unsigned offset = 0;\n  unsigned m = 1;\n  for (int i = indexes_vec.ndim() - 1; i >= 0; --i) {\n    offset += m * indexes_vec[i];\n    m *= shape[i];\n  }\n  return offset;\n}\n\ntemplate <typename Index, unsigned NDim>\nTV_HOST_DEVICE_INLINE unsigned rowArrayIdx(const Index *indexes,\n                                           const Index *shape) {\n  unsigned offset = 0;\n  unsigned m = 1;\n#pragma unroll\n  for (int i = NDim - 1; i >= 0; --i) {\n    offset += m * indexes[i];\n    m *= shape[i];\n  }\n  return offset;\n}\n\ntemplate <typename Index, unsigned NDim>\nTV_HOST_DEVICE_INLINE Index rowArrayIdxInv(Index index, Index *output,\n                                           const Index *shape) {\n#pragma unroll\n  for (int i = NDim - 1; i >= 0; --i) {\n    output[i] = index % shape[i];\n    index -= output[i];\n    index /= shape[i];\n  }\n  return index;\n}\n\ntemplate <int N>\nstruct ArrayIndexRowMajor {\n  // mPtr[((i1 * mShape[1] + i2) * mShape[2] + i3) * mShape[3] + i4];\n  TV_HOST_DEVICE_INLINE static unsigned run(const Shape &shape,\n                                            const Shape &indexes) {\n    return indexes[N - 1] +\n           shape[N - 1] * ArrayIndexRowMajor<N - 1>::run(shape, indexes);\n  }\n};\n\ntemplate <>\nstruct ArrayIndexRowMajor<0> {\n  TV_HOST_DEVICE_INLINE static unsigned run(const Shape &shape,\n                                            const Shape &indexes) {\n    return 0;\n  }\n};\n\nnamespace detail {\ntemplate <typename T>\nconstexpr const char *simpleTypeName(T val = T());\ntemplate <>\nconstexpr const char *simpleTypeName(float val) {\n  return \"float32\";\n}\ntemplate <>\nconstexpr const char *simpleTypeName(double val) {\n  return \"float64\";\n}\ntemplate <>\nconstexpr const char *simpleTypeName(int val) {\n  return \"int32\";\n}\ntemplate <>\nconstexpr const char *simpleTypeName(unsigned val) {\n  return \"uint32\";\n}\ntemplate <>\nconstexpr const char *simpleTypeName(long val) {\n  return \"int64\";\n}\ntemplate <>\nconstexpr const char *simpleTypeName(unsigned long val) {\n  return \"uint64\";\n}\n};  // namespace detail\n\ntemplate <typename T, int Rank = -1>\nstruct TensorView {\n  TV_HOST_DEVICE_INLINE TensorView() {}\n  explicit TV_HOST_DEVICE_INLINE TensorView(T *ptr, Shape shape)\n      : mPtr(ptr), mShape(shape) {}\n  // explicit TV_HOST_DEVICE_INLINE TensorView(const\n  // TensorView<std::remove_const_t<T>> &tview) : mPtr(tview.data()),\n  // mShape(tview.shape()) {}\n  template <class... Integers>\n  explicit TV_HOST_DEVICE_INLINE TensorView(T *ptr, Integers... shapes)\n      : mPtr(ptr) {\n    mShape = {int(shapes)...};\n  }\n\n  TV_HOST_DEVICE_INLINE TensorView<T, Rank> &assign(\n      const TensorView<T, Rank> &tensor) {\n    TV_REQUIRE(tensor.shape() == shape(), \"you must provide same input size%s\",\n               \"\\n\");\n    T *ptr = mPtr;\n    const T *other_ptr = tensor.data();\n    for (size_t i = 0; i < size(); ++i) *(ptr++) = *(other_ptr++);\n    return *this;\n  }\n\n  template <typename T1>\n  TV_HOST_DEVICE_INLINE TensorView<T, Rank> &assign(\n      std::initializer_list<T1> seq) {\n    TV_REQUIRE(seq.size() == size(), \"you must provide same input size%s\",\n               \"\\n\");\n    T *ptr = mPtr;\n    for (const T1 &s : seq) *(ptr++) = T(s);\n    return *this;\n  }\n\n  template <class... Inds>\n  TV_HOST_DEVICE_INLINE T &operator()(Inds... inds) {\n#ifdef TV_DEBUG\n    int idxes[sizeof...(Inds)]{int(inds)...};\n    TV_REQUIRE(sizeof...(inds) == mShape.ndim(),\n               \"you provide %d indexes, but dim is %d\\n\", sizeof...(inds),\n               mShape.ndim());\n    for (int i = 0; i < sizeof...(inds); ++i) {\n      TV_REQUIRE(idxes[i] >= 0 && idxes[i] < mShape[i],\n                 \"index-%d(%d) out-of-range: [0, %d)\\n\", i, idxes[i],\n                 mShape[i]);\n    }\n#endif\n    return mPtr[rowArrayIdx(mShape, int(inds)...)];\n  }\n  template <class... Inds>\n  TV_HOST_DEVICE_INLINE const T &operator()(Inds... inds) const {\n#ifdef TV_DEBUG\n    int idxes[sizeof...(Inds)]{int(inds)...};\n    TV_REQUIRE(sizeof...(inds) == mShape.ndim(),\n               \"you provide %d indexes, but dim is %d\\n\", sizeof...(inds),\n               mShape.ndim());\n    for (int i = 0; i < sizeof...(inds); ++i) {\n      TV_REQUIRE(idxes[i] >= 0 && idxes[i] < mShape[i],\n                 \"index-%d(%d) out-of-range: [0, %d)\\n\", i, idxes[i],\n                 mShape[i]);\n    }\n#endif\n    return mPtr[rowArrayIdx(mShape, int(inds)...)];\n  }\n  TV_HOST_DEVICE_INLINE T &operator()() {\n#if defined TV_DEBUG\n#if defined(__CUDA_ARCH__)\n    TV_DEVICE_REQUIRE(mPtr != nullptr,\n                      \"you want get value but the view is empty.%s\", \"\\n\");\n    TV_DEVICE_REQUIRE(mShape.ndim() == 0,\n                      \"you provide 0 indexes, but dim is %ld\\n\", mShape.ndim());\n#else\n    TV_REQUIRE(mPtr != nullptr, \"you want get value but the view is empty.%s\",\n               \"\\n\");\n    TV_REQUIRE(mShape.ndim() == 0, \"you provide 0 indexes, but dim is %ld\\n\",\n               mShape.ndim());\n#endif\n#endif\n    return mPtr[0];\n  }\n  TV_HOST_DEVICE_INLINE const T &operator()() const {\n#if defined TV_DEBUG\n#if defined(__CUDA_ARCH__)\n    TV_DEVICE_REQUIRE(mPtr != nullptr,\n                      \"you want get value but the view is empty.%s\", \"\\n\");\n    TV_DEVICE_REQUIRE(mShape.ndim() == 0,\n                      \"you provide 0 indexes, but dim is %ld\\n\", mShape.ndim());\n#else\n    TV_REQUIRE(mPtr != nullptr, \"you want get value but the view is empty.%s\",\n               \"\\n\");\n    TV_REQUIRE(mShape.ndim() == 0, \"you provide 0 indexes, but dim is %ld\\n\",\n               mShape.ndim());\n#endif\n#endif\n    return mPtr[0];\n  }\n\n  template <class T1>\n  TV_HOST_DEVICE_INLINE T &operator()(T1 i1) {\n#if defined TV_DEBUG\n#if defined(__CUDA_ARCH__)\n    TV_DEVICE_REQUIRE(mShape.ndim() == 1,\n                      \"you provide 1 indexes, but dim is %ld\\n\", mShape.ndim());\n    TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],\n                      \"index-%d(%d) out-of-range: [0, %d)\\n\", 0, i1, mShape[0]);\n#else\n    TV_REQUIRE(mShape.ndim() == 1, \"you provide 1 indexes, but dim is %ld\\n\",\n               mShape.ndim());\n    TV_REQUIRE(i1 >= 0 && i1 < mShape[0],\n               \"index-%d(%d) out-of-range: [0, %d)\\n\", 0, i1, mShape[0]);\n#endif\n#endif\n    return mPtr[i1];\n  }\n  template <class T1, class T2>\n  TV_HOST_DEVICE_INLINE T &operator()(T1 i1, T2 i2) {\n#ifdef TV_DEBUG\n#if defined(__CUDA_ARCH__)\n    TV_DEVICE_REQUIRE(mShape.ndim() == 2,\n                      \"you provide 2 indexes, but dim is %ld\\n\", mShape.ndim());\n    TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],\n                      \"index-%d(%d) out-of-range: [0, %d)\\n\", 0, int(i1),\n                      mShape[0]);\n    TV_DEVICE_REQUIRE(i2 >= 0 && i2 < mShape[1],\n                      \"index-%d(%d) out-of-range: [0, %d)\\n\", 1, int(i2),\n                      mShape[1]);\n#else\n    TV_REQUIRE(mShape.ndim() == 2, \"you provide 2 indexes, but dim is %ld\\n\",\n               mShape.ndim());\n    TV_REQUIRE(i1 >= 0 && i1 < mShape[0],\n               \"index-%d(%d) out-of-range: [0, %d)\\n\", 0, int(i1), mShape[0]);\n    TV_REQUIRE(i2 >= 0 && i2 < mShape[1],\n               \"index-%d(%d) out-of-range: [0, %d)\\n\", 1, int(i2), mShape[1]);\n#endif\n#endif\n    return mPtr[i1 * mShape[1] + i2];\n  }\n  template <class T1, class T2, class T3>\n  TV_HOST_DEVICE_INLINE T &operator()(T1 i1, T2 i2, T3 i3) {\n#ifdef TV_DEBUG\n#if defined(__CUDA_ARCH__)\n    TV_DEVICE_REQUIRE(mShape.ndim() == 3,\n                      \"you provide 3 indexes, but dim is %ld\\n\", mShape.ndim());\n    TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],\n                      \"index-%d(%d) out-of-range: [0, %d)\\n\", 0, int(i1),\n                      mShape[0]);\n    TV_DEVICE_REQUIRE(i2 >= 0 && i2 < mShape[1],\n                      \"index-%d(%d) out-of-range: [0, %d)\\n\", 1, int(i2),\n                      mShape[1]);\n    TV_DEVICE_REQUIRE(i3 >= 0 && i3 < mShape[2],\n                      \"index-%d(%d) out-of-range: [0, %d)\\n\", 2, int(i3),\n                      mShape[2]);\n#else\n    TV_REQUIRE(mShape.ndim() == 3, \"you provide 3 indexes, but dim is %ld\\n\",\n               mShape.ndim());\n    TV_REQUIRE(i1 >= 0 && i1 < mShape[0],\n               \"index-%d(%d) out-of-range: [0, %d)\\n\", 0, int(i1), mShape[0]);\n    TV_REQUIRE(i2 >= 0 && i2 < mShape[1],\n               \"index-%d(%d) out-of-range: [0, %d)\\n\", 1, int(i2), mShape[1]);\n    TV_REQUIRE(i3 >= 0 && i3 < mShape[2],\n               \"index-%d(%d) out-of-range: [0, %d)\\n\", 2, int(i3), mShape[2]);\n#endif\n#endif\n    return mPtr[(i1 * mShape[1] + i2) * mShape[2] + i3];\n  }\n  template <class T1, class T2, class T3, class T4>\n  TV_HOST_DEVICE_INLINE T &operator()(T1 i1, T2 i2, T3 i3, T4 i4) {\n#ifdef TV_DEBUG\n#if defined(__CUDA_ARCH__)\n    TV_DEVICE_REQUIRE(mShape.ndim() == 4,\n                      \"you provide 4 indexes, but dim is %ld\\n\", mShape.ndim());\n    TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],\n                      \"index-%d(%d) out-of-range: [0, %d)\\n\", 0, int(i1),\n                      mShape[0]);\n    TV_DEVICE_REQUIRE(i2 >= 0 && i2 < mShape[1],\n                      \"index-%d(%d) out-of-range: [0, %d)\\n\", 1, int(i2),\n                      mShape[1]);\n    TV_DEVICE_REQUIRE(i3 >= 0 && i3 < mShape[2],\n                      \"index-%d(%d) out-of-range: [0, %d)\\n\", 2, int(i3),\n                      mShape[2]);\n    TV_DEVICE_REQUIRE(i4 >= 0 && i4 < mShape[3],\n                      \"index-%d(%d) out-of-range: [0, %d)\\n\", 3, int(i4),\n                      mShape[3]);\n#else\n    TV_REQUIRE(mShape.ndim() == 4, \"you provide 4 indexes, but dim is %ld\\n\",\n               mShape.ndim());\n    TV_REQUIRE(i1 >= 0 && i1 < mShape[0],\n               \"index-%d(%d) out-of-range: [0, %d)\\n\", 0, int(i1), mShape[0]);\n    TV_REQUIRE(i2 >= 0 && i2 < mShape[1],\n               \"index-%d(%d) out-of-range: [0, %d)\\n\", 1, int(i2), mShape[1]);\n    TV_REQUIRE(i3 >= 0 && i3 < mShape[2],\n               \"index-%d(%d) out-of-range: [0, %d)\\n\", 2, int(i3), mShape[2]);\n    TV_REQUIRE(i4 >= 0 && i4 < mShape[3],\n               \"index-%d(%d) out-of-range: [0, %d)\\n\", 3, int(i4), mShape[3]);\n#endif\n#endif\n    return mPtr[((i1 * mShape[1] + i2) * mShape[2] + i3) * mShape[3] + i4];\n  }\n\n  template <class T1>\n  TV_HOST_DEVICE_INLINE const T &operator()(T1 i1) const {\n#ifdef TV_DEBUG\n#if defined(__CUDA_ARCH__)\n    TV_DEVICE_REQUIRE(mShape.ndim() == 1,\n                      \"you provide 1 indexes, but dim is %ld\\n\", mShape.ndim());\n    TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],\n                      \"index-%d(%d) out-of-range: [0, %d)\\n\", 0, int(i1),\n                      mShape[0]);\n#else\n    TV_REQUIRE(mShape.ndim() == 1, \"you provide 1 indexes, but dim is %ld\\n\",\n               mShape.ndim());\n    TV_REQUIRE(i1 >= 0 && i1 < mShape[0],\n               \"index-%d(%d) out-of-range: [0, %d)\\n\", 0, int(i1), mShape[0]);\n#endif\n#endif\n    return mPtr[i1];\n  }\n  template <class T1, class T2>\n  TV_HOST_DEVICE_INLINE const T &operator()(T1 i1, T2 i2) const {\n#ifdef TV_DEBUG\n#if defined(__CUDA_ARCH__)\n    TV_DEVICE_REQUIRE(mShape.ndim() == 2,\n                      \"you provide 2 indexes, but dim is %ld\\n\", mShape.ndim());\n    TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],\n                      \"index-%d(%d) out-of-range: [0, %d)\\n\", 0, int(i1),\n                      mShape[0]);\n    TV_DEVICE_REQUIRE(i2 >= 0 && i2 < mShape[1],\n                      \"index-%d(%d) out-of-range: [0, %d)\\n\", 1, int(i2),\n                      mShape[1]);\n#else\n    TV_REQUIRE(mShape.ndim() == 2, \"you provide 2 indexes, but dim is %ld\\n\",\n               mShape.ndim());\n    TV_REQUIRE(i1 >= 0 && i1 < mShape[0],\n               \"index-%d(%d) out-of-range: [0, %d)\\n\", 0, int(i1), mShape[0]);\n    TV_REQUIRE(i2 >= 0 && i2 < mShape[1],\n               \"index-%d(%d) out-of-range: [0, %d)\\n\", 1, int(i2), mShape[1]);\n\n#endif\n#endif\n    return mPtr[i1 * mShape[1] + i2];\n  }\n  template <class T1, class T2, class T3>\n  TV_HOST_DEVICE_INLINE const T &operator()(T1 i1, T2 i2, T3 i3) const {\n#ifdef TV_DEBUG\n#if defined(__CUDA_ARCH__)\n    TV_DEVICE_REQUIRE(mShape.ndim() == 3,\n                      \"you provide 3 indexes, but dim is %ld\\n\", mShape.ndim());\n    TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],\n                      \"index-%d(%d) out-of-range: [0, %d)\\n\", 0, int(i1),\n                      mShape[0]);\n    TV_DEVICE_REQUIRE(i2 >= 0 && i2 < mShape[1],\n                      \"index-%d(%d) out-of-range: [0, %d)\\n\", 1, int(i2),\n                      mShape[1]);\n    TV_DEVICE_REQUIRE(i3 >= 0 && i3 < mShape[2],\n                      \"index-%d(%d) out-of-range: [0, %d)\\n\", 2, int(i3),\n                      mShape[2]);\n#else\n    TV_REQUIRE(mShape.ndim() == 3, \"you provide 3 indexes, but dim is %ld\\n\",\n               mShape.ndim());\n    TV_REQUIRE(i1 >= 0 && i1 < mShape[0],\n               \"index-%d(%d) out-of-range: [0, %d)\\n\", 0, int(i1), mShape[0]);\n    TV_REQUIRE(i2 >= 0 && i2 < mShape[1],\n               \"index-%d(%d) out-of-range: [0, %d)\\n\", 1, int(i2), mShape[1]);\n    TV_REQUIRE(i3 >= 0 && i3 < mShape[2],\n               \"index-%d(%d) out-of-range: [0, %d)\\n\", 2, int(i3), mShape[2]);\n#endif\n#endif\n    return mPtr[(i1 * mShape[1] + i2) * mShape[2] + i3];\n  }\n  template <class T1, class T2, class T3, class T4>\n  TV_HOST_DEVICE_INLINE const T &operator()(T1 i1, T2 i2, T3 i3, T4 i4) const {\n#ifdef TV_DEBUG\n#if defined(__CUDA_ARCH__)\n    TV_DEVICE_REQUIRE(mShape.ndim() == 4,\n                      \"you provide 4 indexes, but dim is %ld\\n\", mShape.ndim());\n    TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],\n                      \"index-%d(%d) out-of-range: [0, %d)\\n\", 0, int(i1),\n                      mShape[0]);\n    TV_DEVICE_REQUIRE(i2 >= 0 && i2 < mShape[1],\n                      \"index-%d(%d) out-of-range: [0, %d)\\n\", 1, int(i2),\n                      mShape[1]);\n    TV_DEVICE_REQUIRE(i3 >= 0 && i3 < mShape[2],\n                      \"index-%d(%d) out-of-range: [0, %d)\\n\", 2, int(i3),\n                      mShape[2]);\n    TV_DEVICE_REQUIRE(i4 >= 0 && i4 < mShape[3],\n                      \"index-%d(%d) out-of-range: [0, %d)\\n\", 3, int(i4),\n                      mShape[3]);\n#else\n    TV_REQUIRE(mShape.ndim() == 4, \"you provide 4 indexes, but dim is %ld\\n\",\n               mShape.ndim());\n    TV_REQUIRE(i1 >= 0 && i1 < mShape[0],\n               \"index-%d(%d) out-of-range: [0, %d)\\n\", 0, int(i1), mShape[0]);\n    TV_REQUIRE(i2 >= 0 && i2 < mShape[1],\n               \"index-%d(%d) out-of-range: [0, %d)\\n\", 1, int(i2), mShape[1]);\n    TV_REQUIRE(i3 >= 0 && i3 < mShape[2],\n               \"index-%d(%d) out-of-range: [0, %d)\\n\", 2, int(i3), mShape[2]);\n    TV_REQUIRE(i4 >= 0 && i4 < mShape[3],\n               \"index-%d(%d) out-of-range: [0, %d)\\n\", 3, int(i4), mShape[3]);\n#endif\n#endif\n    return mPtr[((i1 * mShape[1] + i2) * mShape[2] + i3) * mShape[3] + i4];\n  }\n\n  TV_HOST_DEVICE_INLINE T &operator[](int idx) {\n#ifdef TV_DEBUG\n#if defined(__CUDA_ARCH__)\n    TV_DEVICE_REQUIRE(idx >= 0 && idx < size(),\n                      \"index(%d) out-of-range: [0, %ld)\\n\", int(idx), size());\n#else\n    TV_REQUIRE(idx >= 0 && idx < size(), \"index(%d) out-of-range: [0, %ld)\\n\",\n               int(idx), size());\n#endif\n#endif\n    return mPtr[idx];\n  }\n  // TODO: this is conflcit with operator[](SimpleVector<Slice> slice_vec).\n  /*TV_HOST_DEVICE_INLINE T &operator[](const Shape index) {\n    int idx = rowArrayIdx(mShape, index);\n#ifdef TV_DEBUG\n    TV_REQUIRE(idx >= 0 && idx < size(), \"index(%d) out-of-range: [0, %ld)\\n\",\n                int(idx), size());\n#endif\n    return mPtr[idx];\n  }\n  TV_HOST_DEVICE_INLINE const T &operator[](const Shape index) const {\n    int idx = rowArrayIdx(mShape, index);\n#ifdef TV_DEBUG\n    TV_REQUIRE(idx >= 0 && idx < size(), \"index(%d) out-of-range: [0, %ld)\\n\",\n                int(idx), size());\n#endif\n    return mPtr[idx];\n  }*/\n  TV_HOST_DEVICE_INLINE TensorView<T, Rank> operator[](\n      SimpleVector<Slice> slice_vec) {\n    return _subview(slice_vec);\n  }\n  TV_HOST_DEVICE_INLINE const TensorView<T, Rank> operator[](\n      SimpleVector<Slice> slice_vec) const {\n    return _subview(slice_vec);\n  }\n  TV_HOST_DEVICE_INLINE bool empty() const { return mPtr == nullptr; }\n  TV_HOST_DEVICE_INLINE T *data() { return mPtr; }\n  TV_HOST_DEVICE_INLINE const T *data() const { return mPtr; }\n  TV_HOST_DEVICE_INLINE const Shape &shape() const { return mShape; }\n  TV_HOST_DEVICE_INLINE int dim(int idx) const { return mShape[idx]; }\n  TV_HOST_DEVICE_INLINE int ndim() const { return mShape.ndim(); }\n  template <class... Inds>\n  TV_HOST_DEVICE_INLINE TensorView<T, Rank> &reshape(Inds... newShapes) {\n    Shape shapes{int(newShapes)...};\n    TV_ASSERT(shapes.size() == size());\n    mShape = shapes;\n    return *this;\n  }\n  TV_HOST_DEVICE_INLINE TensorView<T, Rank> &reshape(Shape shapes) {\n    TV_ASSERT(shapes.size() == size());\n    mShape = shapes;\n    return *this;\n  }\n  template <class... Inds>\n  TV_HOST_DEVICE_INLINE TensorView<T, Rank> view(Inds... newShapes) const {\n    Shape shapes{int(newShapes)...};\n    for (size_t i = 0; i < shapes.ndim(); ++i) {\n      if (shapes[i] == -1) {\n        shapes[i] = 1;\n        shapes[i] = size() / shapes.size();\n        break;\n      }\n    }\n    TV_ASSERT(shapes.size() == size());\n    return TensorView<T, Rank>(mPtr, shapes);\n  }\n  TV_HOST_DEVICE_INLINE TensorView<T, Rank> view(Shape shapes) const {\n    TV_ASSERT(shapes.size() == size());\n    return TensorView<T, Rank>(mPtr, shapes);\n  }\n  TV_HOST_DEVICE_INLINE TensorView<T, Rank> squeeze() const {\n    return TensorView<T, Rank>(mPtr, mShape.squeeze());\n  }\n  TV_HOST_DEVICE_INLINE TensorView<T, Rank> squeeze(int dim) const {\n    return TensorView<T, Rank>(mPtr, mShape.squeeze(dim));\n  }\n  TV_HOST_DEVICE_INLINE size_t size() const { return mShape.size(); }\n\n  template <class... Slices>\n  TV_HOST_DEVICE_INLINE TensorView<T, Rank> subview(Slice slice,\n                                                    Slices... slices) const {\n    return subview<float, Slice, Slices...>(slice, slices...);\n  }\n  template <class T2 = float, class... Slices>\n  TV_HOST_DEVICE_INLINE TensorView<T, Rank> subview(Slices... slices) const {\n    Slice slice_vec[sizeof...(Slices)] = {to_slice(slices)...};\n    Shape new_shape{to_slice(slices)[0]...};\n    Shape start{to_slice(slices)[0]...};\n    TV_ASSERT(new_shape.ndim() <= mShape.ndim());\n    TV_ASSERT(new_shape.ndim() != 0);\n    size_t idxsize = new_shape.ndim();\n    for (size_t i = idxsize; i < mShape.ndim(); ++i) {\n      new_shape.push_back(0);\n      start.push_back(0);\n    }\n#pragma unroll\n    for (size_t i = 0; i < sizeof...(Slices); ++i) {\n      if (slice_vec[i][1] != -1) {\n        new_shape[i] = slice_vec[i][1] - slice_vec[i][0];\n        TV_ASSERT(new_shape[i] >= 0);\n      } else {\n        new_shape[i] = 1;  // reduce dim\n      }\n    }\n    auto offset = rowArrayIdx(mShape, start);\n#pragma unroll\n    for (size_t i = sizeof...(Slices); i < mShape.ndim(); ++i) {\n      new_shape[i] = mShape[i];\n      TV_ASSERT(new_shape[i] >= 0);\n    }\n    Shape reduced_shape;\n#pragma unroll\n    for (size_t i = 0; i < sizeof...(Slices); ++i) {\n      if (slice_vec[i][1] != -1) {\n        reduced_shape.push_back(new_shape[i]);\n      }\n    }\n#pragma unroll\n    for (size_t i = sizeof...(Slices); i < mShape.ndim(); ++i) {\n      reduced_shape.push_back(new_shape[i]);\n    }\n    return TensorView<T, Rank>(mPtr + offset, reduced_shape);\n  }\n\n  template <class... Integers>\n  TV_HOST_DEVICE_INLINE TensorView<T, Rank> subview(int id, Integers... ints) {\n    Shape start = {id, ints...};\n    for (int i = 1 + sizeof...(ints); i < ndim(); ++i) {\n      start.push_back(0);\n    }\n    return TensorView<T, Rank>(mPtr + rowArrayIdx(mShape, start),\n                               mShape.subshape(sizeof...(ints) + 1));\n  }\n\n  std::string repr() const {\n    std::ostringstream ss;\n    if (empty()) return \"\";\n    if (mShape.ndim() == 0) {\n      ss << *mPtr;\n      // ss << fmt::format(\"\\nTensor: shape={}, dtype={}\", mShape,\n      // detail::simpleTypeName<T>());\n      ss << \"Tensor: dtype=\" << detail::simpleTypeName<T>();\n      return ss.str();\n    }\n    Shape counter = mShape;\n    auto tensor_flat = this->view(-1);\n    for (int i = 0; i < counter.ndim(); ++i) {\n      counter[i] = 0;\n      ss << \"[\";\n    }\n    for (size_t i = 0; i < this->size(); ++i) {\n      ss << tensor_flat(rowArrayIdx(mShape, counter));\n      counter[counter.ndim() - 1] += 1;\n      int inc_count = 0;\n      bool print_comma = true;\n      for (int c = counter.ndim() - 1; c >= 0; --c) {\n        if (counter[c] == this->dim(c) && c > 0) {\n          ++inc_count;\n          counter[c - 1] += 1;\n          counter[c] = 0;\n          print_comma = false;\n        }\n      }\n      if (print_comma && i != this->size() - 1) ss << \", \";\n      for (int j = 0; j < inc_count; ++j) {\n        ss << \"]\";\n      }\n      if (i != this->size() - 1) {\n        if (inc_count != 0) ss << \"\\n\";\n        for (int j = 0; j < inc_count; ++j) {\n          ss << \"[\";\n        }\n      }\n    }\n    ss << \"]\";\n    // ss << fmt::format(\"\\nTensor: shape={}, dtype={}\", mShape,\n    // detail::simpleTypeName<T>());\n    ss << \"Tensor: dtype=\" << detail::simpleTypeName<T>();\n    return ss.str();\n  }\n\n protected:\n  // TODO: make this function public.\n  // currently this function is called unexpectedly when using subview({0, 0}).\n  TV_HOST_DEVICE_INLINE TensorView<T, Rank> _subview(\n      SimpleVector<Slice> slice_vec) {\n    Shape new_shape;\n    for (int i = 0; i < slice_vec.size(); ++i) {\n      new_shape.push_back(slice_vec[i][0]);\n    }\n    Shape start = new_shape;\n    TV_ASSERT(new_shape.ndim() <= mShape.ndim());\n    TV_ASSERT(new_shape.ndim() != 0);\n    size_t idxsize = new_shape.ndim();\n    for (size_t i = idxsize; i < mShape.ndim(); ++i) {\n      new_shape.push_back(0);\n      start.push_back(0);\n    }\n    for (size_t i = 0; i < slice_vec.size(); ++i) {\n      if (slice_vec[i][1] != -1) {\n        new_shape[i] = slice_vec[i][1] - slice_vec[i][0];\n        TV_ASSERT(new_shape[i] >= 0);\n      } else {\n        new_shape[i] = 1;  // reduce dim\n      }\n    }\n    auto offset = rowArrayIdx(mShape, start);\n    for (size_t i = slice_vec.size(); i < mShape.ndim(); ++i) {\n      new_shape[i] = mShape[i];\n      TV_ASSERT(new_shape[i] >= 0);\n    }\n    Shape reduced_shape;\n    for (size_t i = 0; i < slice_vec.size(); ++i) {\n      if (slice_vec[i][1] != -1) {\n        reduced_shape.push_back(new_shape[i]);\n      }\n    }\n    for (size_t i = slice_vec.size(); i < mShape.ndim(); ++i) {\n      reduced_shape.push_back(new_shape[i]);\n    }\n    return TensorView<T, Rank>(mPtr + offset, reduced_shape);\n  }\n  template <typename T1>\n  TV_HOST_DEVICE_INLINE Slice to_slice(T1 s) const {\n    return Slice{int(s), -1, -1};\n  }\n\n  TV_HOST_DEVICE_INLINE Slice to_slice(Slice s) const { return Slice(s); }\n\n  T *mPtr = nullptr;\n  Shape mShape;\n};\n\ntemplate <typename Os, typename T, int Rank>\nOs &operator<<(Os &os, const TensorView<T, Rank> &dt) {\n  os << dt.repr();\n  return os;\n}\n\ntemplate <typename Os, typename T, int Rank>\nOs &operator<<(Os &os, const TensorView<const T, Rank> &dt) {\n  os << dt.repr();\n  return os;\n}\n\nnamespace detail {\ntemplate <typename T>\nconstexpr const char *printfTypeFormat(T val = T());\ntemplate <>\nconstexpr const char *printfTypeFormat(float val) {\n  return \"%.2f\";\n}\ntemplate <>\nconstexpr const char *printfTypeFormat(double val) {\n  return \"%.2f\";\n}\ntemplate <>\nconstexpr const char *printfTypeFormat(int val) {\n  return \"%d\";\n}\ntemplate <>\nconstexpr const char *printfTypeFormat(unsigned val) {\n  return \"%u\";\n}\ntemplate <>\nconstexpr const char *printfTypeFormat(long val) {\n  return \"%ld\";\n}\ntemplate <>\nconstexpr const char *printfTypeFormat(unsigned long val) {\n  return \"%lu\";\n}\n};  // namespace detail\n\ntemplate <typename T>\nTV_HOST_DEVICE void printTensorView(const TensorView<T> tensor,\n                                    const char *format) {\n  if (tensor.empty()) return;\n  if (tensor.ndim() == 0) {\n    printf(format, tensor());\n    printf(\"\\n\");\n    return;\n  }\n  Shape counter = tensor.shape();\n  auto tensor_flat = tensor.view(-1);\n  for (int i = 0; i < counter.ndim(); ++i) {\n    counter[i] = 0;\n    printf(\"[\");\n  }\n  for (size_t i = 0; i < tensor.size(); ++i) {\n    printf(format, tensor_flat(rowArrayIdx(tensor.shape(), counter)));\n    counter[counter.ndim() - 1] += 1;\n    int inc_count = 0;\n    bool print_comma = true;\n    for (int c = counter.ndim() - 1; c >= 0; --c) {\n      if (counter[c] == tensor.dim(c) && c > 0) {\n        ++inc_count;\n        counter[c - 1] += 1;\n        counter[c] = 0;\n        print_comma = false;\n      }\n    }\n    if (print_comma && i != tensor.size() - 1) printf(\", \");\n    for (int j = 0; j < inc_count; ++j) {\n      printf(\"]\");\n    }\n    if (i != tensor.size() - 1) {\n      if (inc_count != 0) printf(\"\\n\");\n      for (int j = 0; j < inc_count; ++j) {\n        printf(\"[\");\n      }\n    }\n  }\n  printf(\"]\\n\");\n}\n\ntemplate <typename T>\nTV_HOST_DEVICE void printTensorView(TensorView<T> tensor) {\n  using Traw = typename std::remove_const<T>::type;\n  return printTensorView(tensor, detail::printfTypeFormat<Traw>());\n}\ntemplate <typename T>\nTV_HOST_DEVICE void printTensorView(const T *ptr, Shape shape) {\n  using Traw = typename std::remove_const<T>::type;\n  return printTensorView(TensorView<const T>(ptr, shape),\n                         detail::printfTypeFormat<Traw>());\n}\ntemplate <typename T>\nTV_HOST_DEVICE void printTensorView(const T *ptr, Shape shape,\n                                    const char *format) {\n  return printTensorView(TensorView<const T>(ptr, shape), format);\n}\n\n}  // namespace tv\n"
  },
  {
    "path": "mmdet3d/ops/spconv/include/torch_utils.h",
    "content": "// Copyright 2019 Yan Yan\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n#include <ATen/ATen.h>\n#include <ATen/cuda/CUDAContext.h>\n#include <tensorview/tensorview.h>\n#include <torch/script.h>\n\nnamespace tv {\n\nstruct TorchGPU : public tv::GPU {\n  virtual cudaStream_t getStream() const override {\n    return at::cuda::getCurrentCUDAStream();\n  }\n};\n\ntemplate <typename T>\nvoid check_torch_dtype(const torch::Tensor &tensor) {\n  switch (tensor.type().scalarType()) {\n    case at::ScalarType::Double: {\n      auto val = std::is_same<std::remove_const_t<T>, double>::value;\n      TV_ASSERT_RT_ERR(val, \"error\");\n      break;\n    }\n    case at::ScalarType::Float: {\n      auto val = std::is_same<std::remove_const_t<T>, float>::value;\n      TV_ASSERT_RT_ERR(val, \"error\");\n      break;\n    }\n    case at::ScalarType::Int: {\n      auto val = std::is_same<std::remove_const_t<T>, int>::value;\n      TV_ASSERT_RT_ERR(val, \"error\");\n      break;\n    }\n    case at::ScalarType::Half: {\n      auto val = std::is_same<std::remove_const_t<T>, at::Half>::value;\n      TV_ASSERT_RT_ERR(val, \"error\");\n      break;\n    }\n    case at::ScalarType::Long: {\n      auto val = std::is_same<std::remove_const_t<T>, long>::value;\n      TV_ASSERT_RT_ERR(val, \"error\");\n      break;\n    }\n    default:\n      TV_ASSERT_RT_ERR(false, \"error\");\n  }\n}\n\ntemplate <typename T>\ntv::TensorView<T> torch2tv(const torch::Tensor &tensor) {\n  check_torch_dtype<T>(tensor);\n  tv::Shape shape;\n  for (auto i : tensor.sizes()) {\n    shape.push_back(i);\n  }\n  return tv::TensorView<T>(tensor.data_ptr<std::remove_const_t<T>>(), shape);\n}\n}  // namespace tv\n"
  },
  {
    "path": "mmdet3d/ops/spconv/include/utility/timer.h",
    "content": "// Copyright 2019 Yan Yan\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n#include <chrono>\n#include <cuda_runtime_api.h>\n#include <iostream>\n\nnamespace spconv {\n\ntemplate <typename TimeT = std::chrono::microseconds> struct CudaContextTimer {\n  CudaContextTimer() {\n    cudaDeviceSynchronize();\n    mCurTime = std::chrono::steady_clock::now();\n  }\n  typename TimeT::rep report() {\n    cudaDeviceSynchronize();\n    auto duration = std::chrono::duration_cast<TimeT>(\n        std::chrono::steady_clock::now() - mCurTime);\n    auto res = duration.count();\n    mCurTime = std::chrono::steady_clock::now();\n    return res;\n  }\n\nprivate:\n  std::chrono::time_point<std::chrono::steady_clock> mCurTime;\n};\n\ntemplate <typename TimeT = std::chrono::microseconds> struct CPUTimer {\n  CPUTimer() { mCurTime = std::chrono::steady_clock::now(); }\n  typename TimeT::rep report() {\n    auto duration = std::chrono::duration_cast<TimeT>(\n        std::chrono::steady_clock::now() - mCurTime);\n    auto res = duration.count();\n    mCurTime = std::chrono::steady_clock::now();\n    return res;\n  }\n\nprivate:\n  std::chrono::time_point<std::chrono::steady_clock> mCurTime;\n};\n\n} // namespace spconv\n"
  },
  {
    "path": "mmdet3d/ops/spconv/modules.py",
    "content": "# Copyright 2019 Yan Yan\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nimport sys\nimport torch\nfrom collections import OrderedDict\nfrom torch import nn\n\nfrom .structure import SparseConvTensor\n\n\ndef is_spconv_module(module):\n    spconv_modules = (SparseModule, )\n    return isinstance(module, spconv_modules)\n\n\ndef is_sparse_conv(module):\n    from .conv import SparseConvolution\n    return isinstance(module, SparseConvolution)\n\n\ndef _mean_update(vals, m_vals, t):\n    outputs = []\n    if not isinstance(vals, list):\n        vals = [vals]\n    if not isinstance(m_vals, list):\n        m_vals = [m_vals]\n    for val, m_val in zip(vals, m_vals):\n        output = t / float(t + 1) * m_val + 1 / float(t + 1) * val\n        outputs.append(output)\n    if len(outputs) == 1:\n        outputs = outputs[0]\n    return outputs\n\n\nclass SparseModule(nn.Module):\n    \"\"\"place holder, All module subclass from this will take sptensor in\n    SparseSequential.\"\"\"\n    pass\n\n\nclass SparseSequential(SparseModule):\n    r\"\"\"A sequential container.\n    Modules will be added to it in the order they are passed in the\n    constructor.\n    Alternatively, an ordered dict of modules can also be passed in.\n\n    To make it easier to understand, given is a small example::\n\n        # Example of using Sequential\n        model = SparseSequential(\n                  SparseConv2d(1,20,5),\n                  nn.ReLU(),\n                  SparseConv2d(20,64,5),\n                  nn.ReLU()\n                )\n\n        # Example of using Sequential with OrderedDict\n        model = SparseSequential(OrderedDict([\n                  ('conv1', SparseConv2d(1,20,5)),\n                  ('relu1', nn.ReLU()),\n                  ('conv2', SparseConv2d(20,64,5)),\n                  ('relu2', nn.ReLU())\n                ]))\n\n        # Example of using Sequential with kwargs(python 3.6+)\n        model = SparseSequential(\n                  conv1=SparseConv2d(1,20,5),\n                  relu1=nn.ReLU(),\n                  conv2=SparseConv2d(20,64,5),\n                  relu2=nn.ReLU()\n                )\n    \"\"\"\n\n    def __init__(self, *args, **kwargs):\n        super(SparseSequential, self).__init__()\n        if len(args) == 1 and isinstance(args[0], OrderedDict):\n            for key, module in args[0].items():\n                self.add_module(key, module)\n        else:\n            for idx, module in enumerate(args):\n                self.add_module(str(idx), module)\n        for name, module in kwargs.items():\n            if sys.version_info < (3, 6):\n                raise ValueError('kwargs only supported in py36+')\n            if name in self._modules:\n                raise ValueError('name exists.')\n            self.add_module(name, module)\n        self._sparity_dict = {}\n\n    def __getitem__(self, idx):\n        if not (-len(self) <= idx < len(self)):\n            raise IndexError('index {} is out of range'.format(idx))\n        if idx < 0:\n            idx += len(self)\n        it = iter(self._modules.values())\n        for i in range(idx):\n            next(it)\n        return next(it)\n\n    def __len__(self):\n        return len(self._modules)\n\n    @property\n    def sparity_dict(self):\n        return self._sparity_dict\n\n    def add(self, module, name=None):\n        if name is None:\n            name = str(len(self._modules))\n            if name in self._modules:\n                raise KeyError('name exists')\n        self.add_module(name, module)\n\n    def forward(self, input):\n        for k, module in self._modules.items():\n            if is_spconv_module(module):  # use SpConvTensor as input\n                assert isinstance(input, SparseConvTensor)\n                self._sparity_dict[k] = input.sparity\n                input = module(input)\n            else:\n                if isinstance(input, SparseConvTensor):\n                    if input.indices.shape[0] != 0:\n                        input.features = module(input.features)\n                else:\n                    input = module(input)\n        return input\n\n    def fused(self):\n        \"\"\"don't use this.\n\n        no effect.\n        \"\"\"\n        from .conv import SparseConvolution\n        mods = [v for k, v in self._modules.items()]\n        fused_mods = []\n        idx = 0\n        while idx < len(mods):\n            if is_sparse_conv(mods[idx]):\n                if idx < len(mods) - 1 and isinstance(mods[idx + 1],\n                                                      nn.BatchNorm1d):\n                    new_module = SparseConvolution(\n                        ndim=mods[idx].ndim,\n                        in_channels=mods[idx].in_channels,\n                        out_channels=mods[idx].out_channels,\n                        kernel_size=mods[idx].kernel_size,\n                        stride=mods[idx].stride,\n                        padding=mods[idx].padding,\n                        dilation=mods[idx].dilation,\n                        groups=mods[idx].groups,\n                        bias=True,\n                        subm=mods[idx].subm,\n                        output_padding=mods[idx].output_padding,\n                        transposed=mods[idx].transposed,\n                        inverse=mods[idx].inverse,\n                        indice_key=mods[idx].indice_key,\n                        fused_bn=True,\n                    )\n                    new_module.load_state_dict(mods[idx].state_dict(), False)\n                    new_module.to(mods[idx].weight.device)\n                    conv = new_module\n                    bn = mods[idx + 1]\n                    conv.bias.data.zero_()\n                    conv.weight.data[:] = conv.weight.data * bn.weight.data / (\n                        torch.sqrt(bn.running_var) + bn.eps)\n                    conv.bias.data[:] = (\n                        conv.bias.data - bn.running_mean) * bn.weight.data / (\n                            torch.sqrt(bn.running_var) + bn.eps) + bn.bias.data\n                    fused_mods.append(conv)\n                    idx += 2\n                else:\n                    fused_mods.append(mods[idx])\n                    idx += 1\n            else:\n                fused_mods.append(mods[idx])\n                idx += 1\n        return SparseSequential(*fused_mods)\n\n\nclass ToDense(SparseModule):\n    \"\"\"convert SparseConvTensor to NCHW dense tensor.\"\"\"\n\n    def forward(self, x: SparseConvTensor):\n        return x.dense()\n\n\nclass RemoveGrid(SparseModule):\n    \"\"\"remove pre-allocated grid buffer.\"\"\"\n\n    def forward(self, x: SparseConvTensor):\n        x.grid = None\n        return x\n"
  },
  {
    "path": "mmdet3d/ops/spconv/ops.py",
    "content": "# Copyright 2019 Yan Yan\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nimport torch\n\nfrom . import sparse_conv_ext\n\n\ndef get_conv_output_size(input_size, kernel_size, stride, padding, dilation):\n    ndim = len(input_size)\n    output_size = []\n    for i in range(ndim):\n        size = (input_size[i] + 2 * padding[i] - dilation[i] *\n                (kernel_size[i] - 1) - 1) // stride[i] + 1\n        if kernel_size[i] == -1:\n            output_size.append(1)\n        else:\n            output_size.append(size)\n    return output_size\n\n\ndef get_deconv_output_size(input_size, kernel_size, stride, padding, dilation,\n                           output_padding):\n    ndim = len(input_size)\n    output_size = []\n    for i in range(ndim):\n        if kernel_size[i] == -1:\n            raise ValueError(\"deconv don't support kernel_size < 0\")\n        size = (input_size[i] - 1) * stride[i] - 2 * padding[i] + kernel_size[\n            i] + output_padding[i]\n        output_size.append(size)\n    return output_size\n\n\ndef get_indice_pairs(indices,\n                     batch_size,\n                     spatial_shape,\n                     ksize=3,\n                     stride=1,\n                     padding=0,\n                     dilation=1,\n                     out_padding=0,\n                     subm=False,\n                     transpose=False,\n                     grid=None):\n    ndim = indices.shape[1] - 1\n    if not isinstance(ksize, (list, tuple)):\n        ksize = [ksize] * ndim\n    if not isinstance(stride, (list, tuple)):\n        stride = [stride] * ndim\n    if not isinstance(padding, (list, tuple)):\n        padding = [padding] * ndim\n    if not isinstance(dilation, (list, tuple)):\n        dilation = [dilation] * ndim\n    if not isinstance(out_padding, (list, tuple)):\n        out_padding = [out_padding] * ndim\n\n    for d, s in zip(dilation, stride):\n        assert any([s == 1, d == 1]), \"don't support this.\"\n\n    if not subm:\n        if transpose:\n            out_shape = get_deconv_output_size(spatial_shape, ksize, stride,\n                                               padding, dilation, out_padding)\n        else:\n            out_shape = get_conv_output_size(spatial_shape, ksize, stride,\n                                             padding, dilation)\n\n    else:\n        out_shape = spatial_shape\n    if grid is None:\n        if ndim == 2:\n            get_indice_pairs_func = sparse_conv_ext.get_indice_pairs_2d\n        elif ndim == 3:\n            get_indice_pairs_func = sparse_conv_ext.get_indice_pairs_3d\n        elif ndim == 4:\n            get_indice_pairs_func = sparse_conv_ext.get_indice_pairs_4d\n        else:\n            raise NotImplementedError\n        return get_indice_pairs_func(indices, batch_size, out_shape,\n                                     spatial_shape, ksize, stride, padding,\n                                     dilation, out_padding, int(subm),\n                                     int(transpose))\n    else:\n        if ndim == 2:\n            get_indice_pairs_func = sparse_conv_ext.get_indice_pairs_grid_2d\n        elif ndim == 3:\n            get_indice_pairs_func = sparse_conv_ext.get_indice_pairs_grid_3d\n        else:\n            raise NotImplementedError\n        return get_indice_pairs_func(indices, grid, batch_size, out_shape,\n                                     spatial_shape, ksize, stride, padding,\n                                     dilation, out_padding, int(subm),\n                                     int(transpose))\n\n\ndef indice_conv(features,\n                filters,\n                indice_pairs,\n                indice_pair_num,\n                num_activate_out,\n                inverse=False,\n                subm=False):\n    if filters.dtype == torch.float32:\n        return sparse_conv_ext.indice_conv_fp32(features, filters,\n                                                indice_pairs, indice_pair_num,\n                                                num_activate_out, int(inverse),\n                                                int(subm))\n    elif filters.dtype == torch.half:\n        return sparse_conv_ext.indice_conv_half(features, filters,\n                                                indice_pairs, indice_pair_num,\n                                                num_activate_out, int(inverse),\n                                                int(subm))\n    else:\n        raise NotImplementedError\n\n\ndef fused_indice_conv(features, filters, bias, indice_pairs, indice_pair_num,\n                      num_activate_out, inverse, subm):\n    if features.dtype == torch.half:\n        func = sparse_conv_ext.fused_indice_conv_half\n    elif filters.dtype == torch.float32:\n        func = sparse_conv_ext.fused_indice_conv_fp32\n    else:\n        raise NotImplementedError\n\n    return func(features, filters, bias, indice_pairs, indice_pair_num,\n                num_activate_out, int(inverse), int(subm))\n\n\ndef indice_conv_backward(features,\n                         filters,\n                         out_bp,\n                         indice_pairs,\n                         indice_pair_num,\n                         inverse=False,\n                         subm=False):\n    if filters.dtype == torch.float32:\n        return sparse_conv_ext.indice_conv_backward_fp32(\n            features, filters, out_bp, indice_pairs, indice_pair_num,\n            int(inverse), int(subm))\n    elif filters.dtype == torch.half:\n        return sparse_conv_ext.indice_conv_backward_half(\n            features, filters, out_bp, indice_pairs, indice_pair_num,\n            int(inverse), int(subm))\n    else:\n        raise NotImplementedError\n\n\ndef indice_maxpool(features, indice_pairs, indice_pair_num, num_activate_out):\n    if features.dtype == torch.float32:\n        return sparse_conv_ext.indice_maxpool_fp32(features, indice_pairs,\n                                                   indice_pair_num,\n                                                   num_activate_out)\n    elif features.dtype == torch.half:\n        return sparse_conv_ext.indice_maxpool_half(features, indice_pairs,\n                                                   indice_pair_num,\n                                                   num_activate_out)\n    else:\n        raise NotImplementedError\n\n\ndef indice_maxpool_backward(features, out_features, out_bp, indice_pairs,\n                            indice_pair_num):\n    if features.dtype == torch.float32:\n        return sparse_conv_ext.indice_maxpool_backward_fp32(\n            features, out_features, out_bp, indice_pairs, indice_pair_num)\n    elif features.dtype == torch.half:\n        return sparse_conv_ext.indice_maxpool_backward_half(\n            features, out_features, out_bp, indice_pairs, indice_pair_num)\n    else:\n        raise NotImplementedError\n"
  },
  {
    "path": "mmdet3d/ops/spconv/overwrite_spconv/write_spconv2.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\r\nimport itertools\r\n\r\nfrom mmcv.cnn.bricks.registry import CONV_LAYERS\r\nfrom torch.nn.parameter import Parameter\r\n\r\n\r\ndef register_spconv2():\r\n    \"\"\"This func registers spconv2.0 spconv ops to overwrite the default mmcv\r\n    spconv ops.\"\"\"\r\n    try:\r\n        from spconv.pytorch import (SparseConv2d, SparseConv3d, SparseConv4d,\r\n                                    SparseConvTranspose2d,\r\n                                    SparseConvTranspose3d, SparseInverseConv2d,\r\n                                    SparseInverseConv3d, SparseModule,\r\n                                    SubMConv2d, SubMConv3d, SubMConv4d)\r\n    except ImportError:\r\n        return False\r\n    else:\r\n        CONV_LAYERS._register_module(SparseConv2d, 'SparseConv2d', force=True)\r\n        CONV_LAYERS._register_module(SparseConv3d, 'SparseConv3d', force=True)\r\n        CONV_LAYERS._register_module(SparseConv4d, 'SparseConv4d', force=True)\r\n\r\n        CONV_LAYERS._register_module(\r\n            SparseConvTranspose2d, 'SparseConvTranspose2d', force=True)\r\n        CONV_LAYERS._register_module(\r\n            SparseConvTranspose3d, 'SparseConvTranspose3d', force=True)\r\n\r\n        CONV_LAYERS._register_module(\r\n            SparseInverseConv2d, 'SparseInverseConv2d', force=True)\r\n        CONV_LAYERS._register_module(\r\n            SparseInverseConv3d, 'SparseInverseConv3d', force=True)\r\n\r\n        CONV_LAYERS._register_module(SubMConv2d, 'SubMConv2d', force=True)\r\n        CONV_LAYERS._register_module(SubMConv3d, 'SubMConv3d', force=True)\r\n        CONV_LAYERS._register_module(SubMConv4d, 'SubMConv4d', force=True)\r\n        # SparseModule._load_from_state_dict = _load_from_state_dict\r\n        # SparseModule._save_to_state_dict = _save_to_state_dict\r\n        return True\r\n\r\n\r\ndef _save_to_state_dict(self, destination, prefix, keep_vars):\r\n    \"\"\"Rewrite this func to compat the convolutional kernel weights between\r\n    spconv 1.x in MMCV and 2.x in spconv2.x.\r\n\r\n    Kernel weights in MMCV spconv has shape in (D,H,W,in_channel,out_channel) ,\r\n    while those in spcon2.x is in (out_channel,D,H,W,in_channel).\r\n    \"\"\"\r\n    for name, param in self._parameters.items():\r\n        if param is not None:\r\n            param = param if keep_vars else param.detach()\r\n            if name == 'weight':\r\n                dims = list(range(1, len(param.shape))) + [0]\r\n                param = param.permute(*dims)\r\n            destination[prefix + name] = param\r\n    for name, buf in self._buffers.items():\r\n        if buf is not None and name not in self._non_persistent_buffers_set:\r\n            destination[prefix + name] = buf if keep_vars else buf.detach()\r\n\r\n\r\ndef _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,\r\n                          missing_keys, unexpected_keys, error_msgs):\r\n    \"\"\"Rewrite this func to compat the convolutional kernel weights between\r\n    spconv 1.x in MMCV and 2.x in spconv2.x.\r\n\r\n    Kernel weights in MMCV spconv has shape in (D,H,W,in_channel,out_channel) ,\r\n    while those in spcon2.x is in (out_channel,D,H,W,in_channel).\r\n    \"\"\"\r\n    for hook in self._load_state_dict_pre_hooks.values():\r\n        hook(state_dict, prefix, local_metadata, strict, missing_keys,\r\n             unexpected_keys, error_msgs)\r\n\r\n    local_name_params = itertools.chain(self._parameters.items(),\r\n                                        self._buffers.items())\r\n    local_state = {k: v.data for k, v in local_name_params if v is not None}\r\n\r\n    for name, param in local_state.items():\r\n        key = prefix + name\r\n        if key in state_dict:\r\n            input_param = state_dict[key]\r\n\r\n            # Backward compatibility: loading 1-dim tensor from\r\n            # 0.3.* to version 0.4+\r\n            if len(param.shape) == 0 and len(input_param.shape) == 1:\r\n                input_param = input_param[0]\r\n            dims = [len(input_param.shape) - 1] + list(\r\n                range(len(input_param.shape) - 1))\r\n            input_param = input_param.permute(*dims)\r\n            if input_param.shape != param.shape:\r\n                # local shape should match the one in checkpoint\r\n                error_msgs.append(\r\n                    f'size mismatch for {key}: copying a param with '\r\n                    f'shape {key, input_param.shape} from checkpoint,'\r\n                    f'the shape in current model is {param.shape}.')\r\n                continue\r\n\r\n            if isinstance(input_param, Parameter):\r\n                # backwards compatibility for serialized parameters\r\n                input_param = input_param.data\r\n            try:\r\n                param.copy_(input_param)\r\n            except Exception:\r\n                error_msgs.append(\r\n                    f'While copying the parameter named \"{key}\", whose '\r\n                    f'dimensions in the model are {param.size()} and whose '\r\n                    f'dimensions in the checkpoint are {input_param.size()}.')\r\n        elif strict:\r\n            missing_keys.append(key)\r\n\r\n    if strict:\r\n        for key, input_param in state_dict.items():\r\n            if key.startswith(prefix):\r\n                input_name = key[len(prefix):]\r\n                input_name = input_name.split(\r\n                    '.', 1)[0]  # get the name of param/buffer/child\r\n                if input_name not in self._modules \\\r\n                        and input_name not in local_state:\r\n                    unexpected_keys.append(key)"
  },
  {
    "path": "mmdet3d/ops/spconv/pool.py",
    "content": "# Copyright 2019 Yan Yan\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nfrom . import functional as Fsp\nfrom . import ops\nfrom .modules import SparseModule\nfrom .structure import SparseConvTensor\n\n\nclass SparseMaxPool(SparseModule):\n\n    def __init__(self,\n                 ndim,\n                 kernel_size,\n                 stride=1,\n                 padding=0,\n                 dilation=1,\n                 subm=False):\n        super(SparseMaxPool, self).__init__()\n        if not isinstance(kernel_size, (list, tuple)):\n            kernel_size = [kernel_size] * ndim\n        if not isinstance(stride, (list, tuple)):\n            stride = [stride] * ndim\n        if not isinstance(padding, (list, tuple)):\n            padding = [padding] * ndim\n        if not isinstance(dilation, (list, tuple)):\n            dilation = [dilation] * ndim\n\n        self.ndim = ndim\n        self.kernel_size = kernel_size\n        self.stride = stride\n        self.padding = padding\n        self.subm = subm\n        self.dilation = dilation\n\n    def forward(self, input):\n        assert isinstance(input, SparseConvTensor)\n        features = input.features\n        device = features.device\n        indices = input.indices\n        spatial_shape = input.spatial_shape\n        batch_size = input.batch_size\n        if not self.subm:\n            out_spatial_shape = ops.get_conv_output_size(\n                spatial_shape, self.kernel_size, self.stride, self.padding,\n                self.dilation)\n        else:\n            out_spatial_shape = spatial_shape\n        outids, indice_pairs, indice_pairs_num = ops.get_indice_pairs(\n            indices, batch_size, spatial_shape, self.kernel_size, self.stride,\n            self.padding, self.dilation, 0, self.subm)\n\n        out_features = Fsp.indice_maxpool(features, indice_pairs.to(device),\n                                          indice_pairs_num.to(device),\n                                          outids.shape[0])\n        out_tensor = SparseConvTensor(out_features, outids, out_spatial_shape,\n                                      batch_size)\n        out_tensor.indice_dict = input.indice_dict\n        out_tensor.grid = input.grid\n        return out_tensor\n\n\nclass SparseMaxPool2d(SparseMaxPool):\n\n    def __init__(self, kernel_size, stride=1, padding=0, dilation=1):\n        super(SparseMaxPool2d, self).__init__(2, kernel_size, stride, padding,\n                                              dilation)\n\n\nclass SparseMaxPool3d(SparseMaxPool):\n\n    def __init__(self, kernel_size, stride=1, padding=0, dilation=1):\n        super(SparseMaxPool3d, self).__init__(3, kernel_size, stride, padding,\n                                              dilation)\n"
  },
  {
    "path": "mmdet3d/ops/spconv/src/all.cc",
    "content": "// Copyright 2019 Yan Yan\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <cuda_runtime_api.h>\n#include <spconv/fused_spconv_ops.h>\n#include <spconv/pool_ops.h>\n#include <spconv/spconv_ops.h>\n#include <torch/extension.h>\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n  m.def(\"get_indice_pairs_2d\", &spconv::getIndicePair<2>,\n        \"get_indice_pairs_2d\");\n  m.def(\"get_indice_pairs_3d\", &spconv::getIndicePair<3>,\n        \"get_indice_pairs_3d\");\n  m.def(\"get_indice_pairs_4d\", &spconv::getIndicePair<4>,\n        \"get_indice_pairs_4d\");\n  m.def(\"get_indice_pairs_grid_2d\", &spconv::getIndicePairPreGrid<2>,\n        \"get_indice_pairs_grid_2d\");\n  m.def(\"get_indice_pairs_grid_3d\", &spconv::getIndicePairPreGrid<3>,\n        \"get_indice_pairs_grid_3d\");\n  m.def(\"indice_conv_fp32\", &spconv::indiceConv<float>, \"indice_conv_fp32\");\n  m.def(\"indice_conv_backward_fp32\", &spconv::indiceConvBackward<float>,\n        \"indice_conv_backward_fp32\");\n  m.def(\"indice_conv_half\", &spconv::indiceConv<at::Half>, \"indice_conv_half\");\n  m.def(\"indice_conv_backward_half\", &spconv::indiceConvBackward<at::Half>,\n        \"indice_conv_backward_half\");\n  m.def(\"fused_indice_conv_fp32\", &spconv::fusedIndiceConvBatchNorm<float>,\n        \"fused_indice_conv_fp32\");\n  m.def(\"fused_indice_conv_half\", &spconv::fusedIndiceConvBatchNorm<at::Half>,\n        \"fused_indice_conv_half\");\n  m.def(\"indice_maxpool_fp32\", &spconv::indiceMaxPool<float>,\n        \"indice_maxpool_fp32\");\n  m.def(\"indice_maxpool_backward_fp32\", &spconv::indiceMaxPoolBackward<float>,\n        \"indice_maxpool_backward_fp32\");\n  m.def(\"indice_maxpool_half\", &spconv::indiceMaxPool<at::Half>,\n        \"indice_maxpool_half\");\n  m.def(\"indice_maxpool_backward_half\",\n        &spconv::indiceMaxPoolBackward<at::Half>,\n        \"indice_maxpool_backward_half\");\n}\n"
  },
  {
    "path": "mmdet3d/ops/spconv/src/indice.cc",
    "content": "// Copyright 2019 Yan Yan\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <spconv/geometry.h>\n#include <spconv/indice.h>\n#include <spconv/spconv_ops.h>\n#include <torch/script.h>\n\nnamespace spconv {\n\nnamespace functor {\ntemplate <typename Index, typename IndexGrid, unsigned NDim>\nstruct CreateConvIndicePairFunctor<tv::CPU, Index, IndexGrid, NDim> {\n  Index operator()(const tv::CPU& d, tv::TensorView<const Index> indicesIn,\n                   tv::TensorView<Index> indicesOut,\n                   tv::TensorView<IndexGrid> gridsOut,\n                   tv::TensorView<Index> indicePairs,\n                   tv::TensorView<Index> indiceNum,\n                   const tv::SimpleVector<Index, NDim> kernelSize,\n                   const tv::SimpleVector<Index, NDim> stride,\n                   const tv::SimpleVector<Index, NDim> padding,\n                   const tv::SimpleVector<Index, NDim> dilation,\n                   const tv::SimpleVector<Index, NDim> outSpatialShape,\n                   bool transpose, bool resetGrid) {\n    if (transpose)\n      return getIndicePairsDeConv<Index, IndexGrid, NDim>(\n          indicesIn, indicesOut, gridsOut, indicePairs, indiceNum,\n          kernelSize.data(), stride.data(), padding.data(), dilation.data(),\n          outSpatialShape.data());\n    else\n      return getIndicePairsConv<Index, IndexGrid, NDim>(\n          indicesIn, indicesOut, gridsOut, indicePairs, indiceNum,\n          kernelSize.data(), stride.data(), padding.data(), dilation.data(),\n          outSpatialShape.data());\n  }\n};\ntemplate <typename Index, typename IndexGrid, unsigned NDim>\nstruct CreateSubMIndicePairFunctor<tv::CPU, Index, IndexGrid, NDim> {\n  Index operator()(const tv::CPU& d, tv::TensorView<const Index> indicesIn,\n                   tv::TensorView<IndexGrid> gridsOut,\n                   tv::TensorView<Index> indicePairs,\n                   tv::TensorView<Index> indiceNum,\n                   const tv::SimpleVector<Index, NDim> kernelSize,\n                   const tv::SimpleVector<Index, NDim> stride,\n                   const tv::SimpleVector<Index, NDim> padding,\n                   const tv::SimpleVector<Index, NDim> dilation,\n                   const tv::SimpleVector<Index, NDim> outSpatialShape,\n                   bool transpose, bool resetGrid) {\n    return getIndicePairsSubM<Index, IndexGrid, NDim>(\n        indicesIn, gridsOut, indicePairs, indiceNum, kernelSize.data(),\n        stride.data(), padding.data(), dilation.data(), outSpatialShape.data());\n  }\n};\n}  // namespace functor\n\n#define DECLARE_CPU_SPECS_INDEX_NDIM(Index, NDIM)                           \\\n  template struct functor::CreateConvIndicePairFunctor<tv::CPU, Index, int, \\\n                                                       NDIM>;               \\\n  template struct functor::CreateSubMIndicePairFunctor<tv::CPU, Index, int, \\\n                                                       NDIM>;\n\n#define DECLARE_CPU_INDEX(Index)          \\\n  DECLARE_CPU_SPECS_INDEX_NDIM(Index, 1); \\\n  DECLARE_CPU_SPECS_INDEX_NDIM(Index, 2); \\\n  DECLARE_CPU_SPECS_INDEX_NDIM(Index, 3); \\\n  DECLARE_CPU_SPECS_INDEX_NDIM(Index, 4);\n\nDECLARE_CPU_INDEX(int);\nDECLARE_CPU_INDEX(long);\n\n#undef DECLARE_CPU_INDEX\n#undef DECLARE_CPU_SPECS_INDEX_NDIM\n\n}  // namespace spconv\n"
  },
  {
    "path": "mmdet3d/ops/spconv/src/indice_cuda.cu",
    "content": "// Copyright 2019 Yan Yan\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <ATen/ATen.h>\n#include <spconv/indice.cu.h>\n#include <spconv/indice.h>\n#include <spconv/mp_helper.h>\n#include <tensorview/helper_launch.h>\n#include <tensorview/tensorview.h>\n#include <utility/timer.h>\n\n#include <chrono>\n#include <limits>\n#include <type_traits>\n\nnamespace spconv {\nnamespace functor {\ntemplate <typename Index, typename IndexGrid, unsigned NDim>\nstruct CreateConvIndicePairFunctorP1<tv::GPU, Index, IndexGrid, NDim> {\n  Index operator()(const tv::GPU &d, tv::TensorView<const Index> indicesIn,\n                   tv::TensorView<Index> indicesOut,\n                   tv::TensorView<IndexGrid> gridsOut,\n                   tv::TensorView<Index> indicePairs,\n                   tv::TensorView<Index> indiceNum,\n                   tv::TensorView<Index> indicePairUnique,\n                   const tv::SimpleVector<Index, NDim> kernelSize,\n                   const tv::SimpleVector<Index, NDim> stride,\n                   const tv::SimpleVector<Index, NDim> padding,\n                   const tv::SimpleVector<Index, NDim> dilation,\n                   const tv::SimpleVector<Index, NDim> outSpatialShape,\n                   bool transpose) {\n    Index batchSize = gridsOut.dim(0);\n    auto numActIn = indicesIn.dim(0);\n    if (numActIn == 0) return 0;\n    // auto timer = spconv::CudaContextTimer<>();\n    if (transpose)\n      prepareDeConvIndicePairsKernel<Index, IndexGrid, NDim, 4096>\n          <<<tv::launch::getBlocks(numActIn), tv::launch::CUDA_NUM_THREADS, 0,\n             d.getStream()>>>(indicesIn, indicesOut, gridsOut, indicePairs,\n                              indiceNum, indicePairUnique, kernelSize, stride,\n                              padding, dilation, outSpatialShape);\n    else\n      prepareIndicePairsKernel<Index, IndexGrid, NDim, 4096>\n          <<<tv::launch::getBlocks(numActIn), tv::launch::CUDA_NUM_THREADS, 0,\n             d.getStream()>>>(indicesIn, indicesOut, gridsOut, indicePairs,\n                              indiceNum, indicePairUnique, kernelSize, stride,\n                              padding, dilation, outSpatialShape);\n    TV_CHECK_CUDA_ERR();\n    // std::cout << \"p1 gene time \" << timer.report() / 1000.0 << std::endl;\n    return 1;\n  }\n};\n\ntemplate <typename Index, typename IndexGrid, unsigned NDim>\nstruct CreateConvIndicePairFunctorP2<tv::GPU, Index, IndexGrid, NDim> {\n  Index operator()(const tv::GPU &d, tv::TensorView<const Index> indicesIn,\n                   tv::TensorView<Index> indicesOut,\n                   tv::TensorView<IndexGrid> gridsOut,\n                   tv::TensorView<Index> indicePairs,\n                   tv::TensorView<Index> indiceNum,\n                   tv::TensorView<Index> indicePairUnique,\n                   const tv::SimpleVector<Index, NDim> outSpatialShape,\n                   bool transpose, bool resetGrid) {\n    Index batchSize = gridsOut.dim(0);\n    auto kernelVolume = indicePairs.dim(0);\n    auto numActIn = indicesIn.dim(0);\n    if (numActIn == 0) return 0;\n    Index numAct = indicePairUnique.dim(0) - 1;\n    assignGridAndIndiceOutKernel<Index, IndexGrid, NDim>\n        <<<tv::launch::getBlocks(numAct), tv::launch::CUDA_NUM_THREADS, 0,\n           d.getStream()>>>(indicesOut, gridsOut, numAct, indicePairs,\n                            indicePairUnique, outSpatialShape, batchSize);\n    TV_CHECK_CUDA_ERR();\n    assignIndicePairsKernel<Index, IndexGrid, NDim>\n        <<<tv::launch::getBlocks(numActIn), tv::launch::CUDA_NUM_THREADS, 0,\n           d.getStream()>>>(indicesOut, gridsOut, numActIn, indicePairs,\n                            indicePairUnique, outSpatialShape);\n    TV_CHECK_CUDA_ERR();\n    if (resetGrid) {\n      resetGridKernel<Index, IndexGrid, NDim>\n          <<<tv::launch::getBlocks(numAct), tv::launch::CUDA_NUM_THREADS, 0,\n             d.getStream()>>>(indicePairUnique.data(), gridsOut, numAct);\n      TV_CHECK_CUDA_ERR();\n    }\n    return numAct;\n  }\n};\n\ntemplate <typename Index, typename IndexGrid, unsigned NDim>\nstruct CreateSubMIndicePairFunctor<tv::GPU, Index, IndexGrid, NDim> {\n  Index operator()(const tv::GPU &d, tv::TensorView<const Index> indicesIn,\n                   tv::TensorView<IndexGrid> gridsOut,\n                   tv::TensorView<Index> indicePairs,\n                   tv::TensorView<Index> indiceNum,\n                   const tv::SimpleVector<Index, NDim> kernelSize,\n                   const tv::SimpleVector<Index, NDim> stride,\n                   const tv::SimpleVector<Index, NDim> padding,\n                   const tv::SimpleVector<Index, NDim> dilation,\n                   const tv::SimpleVector<Index, NDim> outSpatialShape,\n                   bool transpose, bool resetGrid) {\n    auto numActIn = indicesIn.dim(0);\n    if (numActIn == 0) return 0;\n    // auto timer = spconv::CudaContextTimer<>();\n    prepareSubMGridKernel<Index, IndexGrid, NDim>\n        <<<tv::launch::getBlocks(numActIn), tv::launch::CUDA_NUM_THREADS, 0,\n           d.getStream()>>>(indicesIn, gridsOut, outSpatialShape);\n    TV_CHECK_CUDA_ERR();\n    getSubMIndicePairsKernel<Index, IndexGrid, NDim, 4096>\n        <<<tv::launch::getBlocks(numActIn), tv::launch::CUDA_NUM_THREADS, 0,\n           d.getStream()>>>(indicesIn, gridsOut, indicePairs, indiceNum,\n                            kernelSize, stride, padding, dilation,\n                            outSpatialShape);\n    TV_CHECK_CUDA_ERR();\n    // std::cout << \"subm gene time \" << timer.report() / 1000.0 << std::endl;\n    if (resetGrid) {\n      resetGridSubMKernel<Index, IndexGrid, NDim>\n          <<<tv::launch::getBlocks(numActIn), tv::launch::CUDA_NUM_THREADS, 0,\n             d.getStream()>>>(indicesIn.data(), gridsOut, outSpatialShape,\n                              numActIn);\n      TV_CHECK_CUDA_ERR();\n    }\n    return numActIn;\n  }\n};\n}  // namespace functor\n\n#define DECLARE_GPU_SPECS_INDEX_NDIM(Index, NDIM)                             \\\n  template struct functor::CreateConvIndicePairFunctor<tv::GPU, Index, int,   \\\n                                                       NDIM>;                 \\\n  template struct functor::CreateConvIndicePairFunctorP1<tv::GPU, Index, int, \\\n                                                         NDIM>;               \\\n  template struct functor::CreateConvIndicePairFunctorP2<tv::GPU, Index, int, \\\n                                                         NDIM>;               \\\n  template struct functor::CreateSubMIndicePairFunctor<tv::GPU, Index, int,   \\\n                                                       NDIM>;\n\n#define DECLARE_GPU_INDEX(Index)          \\\n  DECLARE_GPU_SPECS_INDEX_NDIM(Index, 1); \\\n  DECLARE_GPU_SPECS_INDEX_NDIM(Index, 2); \\\n  DECLARE_GPU_SPECS_INDEX_NDIM(Index, 3); \\\n  DECLARE_GPU_SPECS_INDEX_NDIM(Index, 4);\n\nDECLARE_GPU_INDEX(int);\n\n#undef DECLARE_GPU_INDEX\n#undef DECLARE_GPU_SPECS_INDEX_NDIM\n}  // namespace spconv\n"
  },
  {
    "path": "mmdet3d/ops/spconv/src/maxpool.cc",
    "content": "// Copyright 2019 Yan Yan\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <spconv/maxpool.h>\n#include <torch/script.h>\n\nnamespace spconv {\n\nnamespace functor {\ntemplate <typename T, typename Index>\nstruct SparseMaxPoolForwardFunctor<tv::CPU, T, Index> {\n  void operator()(const tv::CPU &d, tv::TensorView<T> outFeatures,\n                  tv::TensorView<const T> inFeatures,\n                  tv::TensorView<const Index> indices, int size) {\n    int stride = outFeatures.dim(1);\n    auto outFeaturesData = outFeatures.data();\n    auto inFeaturesData = inFeatures.data();\n    auto indicesIn = indices.subview(0).data();\n    auto indicesOut = indices.subview(1).data();\n    Index idxi, idxo;\n    for (int row = 0; row < size; row++) {\n      idxi = indicesIn[row] * stride;\n      idxo = indicesOut[row] * stride;\n      for (int plane = 0; plane < stride; ++plane)\n        if (outFeaturesData[idxo + plane] < inFeaturesData[idxi + plane])\n          outFeaturesData[idxo + plane] = inFeaturesData[idxi + plane];\n    }\n  }\n};\n\ntemplate <typename T, typename Index>\nstruct SparseMaxPoolBackwardFunctor<tv::CPU, T, Index> {\n  void operator()(const tv::CPU &d, tv::TensorView<const T> outFeatures,\n                  tv::TensorView<const T> inFeatures,\n                  tv::TensorView<const T> dout, tv::TensorView<T> din,\n                  tv::TensorView<const Index> indices, int size) {\n    int stride = outFeatures.dim(1);\n    auto outFeaturesData = outFeatures.data();\n    auto inFeaturesData = inFeatures.data();\n    auto doutData = dout.data();\n    auto dinData = din.data();\n    auto indicesIn = indices.subview(0).data();\n    auto indicesOut = indices.subview(1).data();\n    Index idxi, idxo;\n    for (int row = 0; row < size; row++) {\n      idxi = indicesIn[row] * stride;\n      idxo = indicesOut[row] * stride;\n      for (int plane = 0; plane < stride; ++plane)\n        if (outFeaturesData[idxo + plane] == inFeaturesData[idxi + plane])\n          dinData[idxi + plane] += doutData[idxo + plane];\n    }\n  }\n};\n}  // namespace functor\n\n#define DECLARE_CPU_SPECS_T_INDEX(T, Index)                                \\\n  template struct functor::SparseMaxPoolForwardFunctor<tv::CPU, T, Index>; \\\n  template struct functor::SparseMaxPoolBackwardFunctor<tv::CPU, T, Index>;\n\n#define DECLARE_CPU_SPECS(T)         \\\n  DECLARE_CPU_SPECS_T_INDEX(T, int); \\\n  DECLARE_CPU_SPECS_T_INDEX(T, long);\n\nDECLARE_CPU_SPECS(float);\nDECLARE_CPU_SPECS(double);\nDECLARE_CPU_SPECS(at::Half);\n\n#undef DECLARE_CPU_SPECS\n#undef DECLARE_CPU_SPECS_T_INDEX\n\n}  // namespace spconv\n"
  },
  {
    "path": "mmdet3d/ops/spconv/src/maxpool_cuda.cu",
    "content": "// Copyright 2019 Yan Yan\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <ATen/ATen.h>\n#include <spconv/maxpool.h>\n#include <spconv/mp_helper.h>\n#include <tensorview/helper_kernel.cu.h>\n#include <tensorview/helper_launch.h>\n#include <tensorview/tensorview.h>\n\n#include <chrono>\n#include <limits>\n#include <type_traits>\n\nnamespace spconv {\ntemplate <typename T, typename Index, int NumTLP, int NumILP>\n__global__ void maxPoolFwdBlockKernel(T *outFeatures, const T *inFeatures,\n                                      const Index *indicesIn,\n                                      const Index *indicesOut, int numHot,\n                                      int numPlanes) {\n  T in, out;\n  int ILPStrideY[NumILP];\n  Index idxo, idxi;\n#pragma unroll\n  for (int ilp = 0; ilp < NumILP; ilp++)\n    ILPStrideY[ilp] = threadIdx.y + ilp * blockDim.y;\n  outFeatures += blockIdx.y * NumTLP;\n  inFeatures += blockIdx.y * NumTLP;\n  for (int ix = blockIdx.x * blockDim.x; ix < numHot;\n       ix += blockDim.x * gridDim.x) {\n    {\n#pragma unroll\n      for (int ilp = 0; ilp < NumILP; ++ilp) {\n        idxi = indicesIn[ix + ILPStrideY[ilp]] * numPlanes + threadIdx.x;\n        idxo = indicesOut[ix + ILPStrideY[ilp]] * numPlanes + threadIdx.x;\n        in = inFeatures[idxi];\n        out = outFeatures[idxo];\n        if (in > out) {\n          outFeatures[idxo] = in;\n        }\n      }\n    }\n  }\n}\n\ntemplate <typename T, typename Index, int NumTLP, int NumILP>\n__global__ void maxPoolFwdGenericBlockKernel(T *outFeatures,\n                                             const T *inFeatures,\n                                             const Index *indicesIn,\n                                             const Index *indicesOut,\n                                             int numHot, int numPlanes) {\n  // see http://www.nvidia.com/content/GTC-2010/pdfs/2238_GTC2010.pdf.\n  int ILPStrideX[NumILP];\n  Index RI[NumILP];\n  Index RO[NumILP];\n  T in, out;\n#pragma unroll\n  for (int ilp = 0; ilp < NumILP; ilp++)\n    ILPStrideX[ilp] = ilp * gridDim.x * blockDim.x;\n  for (int ix : tv::KernelLoopX<int, NumILP>(numHot)) {\n#pragma unroll\n    for (int ilp = 0; ilp < NumILP; ilp++) {\n      RI[ilp] = indicesIn[ix + ILPStrideX[ilp]] * numPlanes;\n      RO[ilp] = indicesOut[ix + ILPStrideX[ilp]] * numPlanes;\n    }\n    for (int iy : tv::KernelLoopY<int>(numPlanes)) {\n#pragma unroll\n      for (int ilp = 0; ilp < NumILP; ++ilp) {\n        in = inFeatures[RI[ilp] + iy];\n        out = outFeatures[RO[ilp] + iy];\n        if (in > out) {\n          outFeatures[RO[ilp] + iy] = in;\n        }\n      }\n    }\n  }\n}\n\ntemplate <typename T, typename Index, int NumTLP, int NumILP, typename VecType>\n__global__ void maxPoolFwdVecBlockKernel(T *outFeatures, const T *inFeatures,\n                                         const Index *indicesIn,\n                                         const Index *indicesOut, int numHot,\n                                         int numPlanes) {\n  // see http://www.nvidia.com/content/GTC-2010/pdfs/2238_GTC2010.pdf.\n  int ILPStrideY[NumILP];\n  constexpr int vecloadFactor = sizeof(VecType) / sizeof(T);\n  T bufi[vecloadFactor];\n  T bufo[vecloadFactor];\n  Index idxi, idxo;\n#pragma unroll\n  for (int ilp = 0; ilp < NumILP; ilp++)\n    ILPStrideY[ilp] = threadIdx.y + ilp * blockDim.y;\n  outFeatures += blockIdx.y * NumTLP;\n  inFeatures += blockIdx.y * NumTLP;\n  for (int ix = blockIdx.x * blockDim.x * vecloadFactor; ix < numHot;\n       ix += blockDim.x * gridDim.x * vecloadFactor) {\n#pragma unroll\n    for (int ilp = 0; ilp < NumILP; ++ilp) {\n      idxi = indicesIn[ix + ILPStrideY[ilp]] * numPlanes + threadIdx.x;\n      idxo = indicesOut[ix + ILPStrideY[ilp]] * numPlanes + threadIdx.x;\n      reinterpret_cast<VecType *>(bufo)[0] =\n          reinterpret_cast<VecType *>(outFeatures)[idxo];\n      reinterpret_cast<VecType *>(bufi)[0] =\n          reinterpret_cast<const VecType *>(inFeatures)[idxi];\n#pragma unroll\n      for (int i = 0; i < vecloadFactor; i++) {\n        if (bufi[i] > bufo[i]) {\n          bufo[i] = bufi[i];\n        }\n      }\n      reinterpret_cast<VecType *>(outFeatures)[idxo] =\n          reinterpret_cast<VecType *>(bufo)[0];\n    }\n  }\n}\n\ntemplate <typename T, typename Index, int NumTLP, int NumILP>\n__global__ void maxPoolFwdGenericKernel(T *outFeatures, const T *inFeatures,\n                                        const Index *indicesIn,\n                                        const Index *indicesOut, int numHot,\n                                        int numPlanes) {\n  // see http://www.nvidia.com/content/GTC-2010/pdfs/2238_GTC2010.pdf.\n  int ILPStrideX[NumILP];\n  Index RI[NumILP];\n  Index RO[NumILP];\n  T in, out;\n#pragma unroll\n  for (int ilp = 0; ilp < NumILP; ilp++)\n    ILPStrideX[ilp] = ilp * gridDim.x * blockDim.x;\n  for (int ix : tv::KernelLoopX<int, NumILP>(numHot)) {\n#pragma unroll\n    for (int ilp = 0; ilp < NumILP; ilp++) {\n      if (ix + ILPStrideX[ilp] < numHot) {\n        RI[ilp] = indicesIn[ix + ILPStrideX[ilp]] * numPlanes;\n        RO[ilp] = indicesOut[ix + ILPStrideX[ilp]] * numPlanes;\n      }\n    }\n    for (int iy : tv::KernelLoopY<int>(numPlanes)) {\n#pragma unroll\n      for (int ilp = 0; ilp < NumILP; ++ilp) {\n        if (ix + ILPStrideX[ilp] < numHot) {\n          in = inFeatures[RI[ilp] + iy];\n          out = outFeatures[RO[ilp] + iy];\n          if (in > out) {\n            outFeatures[RO[ilp] + iy] = in;\n          }\n        }\n      }\n    }\n  }\n}\n\ntemplate <typename T, typename Index, int NumTLP, int NumILP>\n__global__ void maxPoolBwdBlockKernel(const T *outFeatures, const T *inFeatures,\n                                      const T *dout, T *din,\n                                      const Index *indicesIn,\n                                      const Index *indicesOut, int numHot,\n                                      int numPlanes) {\n  // see http://www.nvidia.com/content/GTC-2010/pdfs/2238_GTC2010.pdf.\n  T in, out;\n  Index idxo, idxi;\n  int ILPStrideY[NumILP];\n#pragma unroll\n  for (int ilp = 0; ilp < NumILP; ilp++)\n    ILPStrideY[ilp] = threadIdx.y + ilp * blockDim.y;\n  outFeatures += blockIdx.y * NumTLP;\n  inFeatures += blockIdx.y * NumTLP;\n  dout += blockIdx.y * NumTLP;\n  din += blockIdx.y * NumTLP;\n  for (int ix = blockIdx.x * blockDim.x; ix < numHot;\n       ix += blockDim.x * gridDim.x) {\n    {\n#pragma unroll\n      for (int ilp = 0; ilp < NumILP; ++ilp) {\n        idxi = indicesIn[ix + ILPStrideY[ilp]] * numPlanes + threadIdx.x;\n        idxo = indicesOut[ix + ILPStrideY[ilp]] * numPlanes + threadIdx.x;\n        in = inFeatures[idxi];\n        out = outFeatures[idxo];\n        if (in == out) {\n          din[idxi] += dout[idxo];\n        }\n      }\n    }\n  }\n}\n\ntemplate <typename T, typename Index, int NumTLP, int NumILP>\n__global__ void maxPoolBwdGenericBlockKernel(const T *outFeatures,\n                                             const T *inFeatures, const T *dout,\n                                             T *din, const Index *indicesIn,\n                                             const Index *indicesOut,\n                                             int numHot, int numPlanes) {\n  // see http://www.nvidia.com/content/GTC-2010/pdfs/2238_GTC2010.pdf.\n  int ILPStrideX[NumILP];\n  Index RI[NumILP];\n  Index RO[NumILP];\n  T in, out;\n#pragma unroll\n  for (int ilp = 0; ilp < NumILP; ilp++)\n    ILPStrideX[ilp] = ilp * gridDim.x * blockDim.x;\n  for (int ix : tv::KernelLoopX<int, NumILP>(numHot)) {\n#pragma unroll\n    for (int ilp = 0; ilp < NumILP; ilp++) {\n      RI[ilp] = indicesIn[ix + ILPStrideX[ilp]] * numPlanes;\n      RO[ilp] = indicesOut[ix + ILPStrideX[ilp]] * numPlanes;\n    }\n    for (int iy : tv::KernelLoopY<int>(numPlanes)) {\n#pragma unroll\n      for (int ilp = 0; ilp < NumILP; ++ilp) {\n        in = inFeatures[RI[ilp] + iy];\n        out = outFeatures[RO[ilp] + iy];\n        if (in == out) {\n          din[RI[ilp] + iy] += dout[RO[ilp] + iy];\n        }\n      }\n    }\n  }\n}\n\ntemplate <typename T, typename Index, int NumTLP, int NumILP, typename VecType>\n__global__ void maxPoolBwdVecBlockKernel(const T *outFeatures,\n                                         const T *inFeatures, const T *dout,\n                                         T *din, const Index *indicesIn,\n                                         const Index *indicesOut, int numHot,\n                                         int numPlanes) {\n  // see http://www.nvidia.com/content/GTC-2010/pdfs/2238_GTC2010.pdf.\n  int ILPStrideY[NumILP];\n  constexpr int vecloadFactor = sizeof(VecType) / sizeof(T);\n  T bufi[vecloadFactor];\n  T bufo[vecloadFactor];\n  T bufdi[vecloadFactor];\n  T bufdo[vecloadFactor];\n  Index idxi, idxo;\n#pragma unroll\n  for (int ilp = 0; ilp < NumILP; ilp++)\n    ILPStrideY[ilp] = threadIdx.y + ilp * blockDim.y;\n  outFeatures += blockIdx.y * NumTLP;\n  inFeatures += blockIdx.y * NumTLP;\n  for (int ix = blockIdx.x * blockDim.x * vecloadFactor; ix < numHot;\n       ix += blockDim.x * gridDim.x * vecloadFactor) {\n#pragma unroll\n    for (int ilp = 0; ilp < NumILP; ++ilp) {\n      idxi = indicesIn[ix + ILPStrideY[ilp]] * numPlanes + threadIdx.x;\n      idxo = indicesOut[ix + ILPStrideY[ilp]] * numPlanes + threadIdx.x;\n      reinterpret_cast<VecType *>(bufo)[0] =\n          reinterpret_cast<const VecType *>(outFeatures)[idxo];\n      reinterpret_cast<VecType *>(bufi)[0] =\n          reinterpret_cast<const VecType *>(inFeatures)[idxi];\n      reinterpret_cast<VecType *>(bufdo)[0] =\n          reinterpret_cast<const VecType *>(dout)[idxo];\n      reinterpret_cast<VecType *>(bufdi)[0] =\n          reinterpret_cast<VecType *>(din)[idxi];\n\n#pragma unroll\n      for (int i = 0; i < vecloadFactor; i++) {\n        if (bufi[i] == bufo[i]) {\n          bufdi[i] += bufdo[i];\n        }\n      }\n      reinterpret_cast<VecType *>(din)[idxi] =\n          reinterpret_cast<VecType *>(bufdi)[0];\n    }\n  }\n}\n\ntemplate <typename T, typename Index, int NumTLP, int NumILP>\n__global__ void maxPoolBwdGenericKernel(const T *outFeatures,\n                                        const T *inFeatures, const T *dout,\n                                        T *din, const Index *indicesIn,\n                                        const Index *indicesOut, int numHot,\n                                        int numPlanes) {\n  // see http://www.nvidia.com/content/GTC-2010/pdfs/2238_GTC2010.pdf.\n  int ILPStrideX[NumILP];\n  Index RI[NumILP];\n  Index RO[NumILP];\n  T in, out;\n#pragma unroll\n  for (int ilp = 0; ilp < NumILP; ilp++)\n    ILPStrideX[ilp] = ilp * gridDim.x * blockDim.x;\n  for (int ix : tv::KernelLoopX<int, NumILP>(numHot)) {\n#pragma unroll\n    for (int ilp = 0; ilp < NumILP; ilp++) {\n      if (ix + ILPStrideX[ilp] < numHot) {\n        RI[ilp] = indicesIn[ix + ILPStrideX[ilp]] * numPlanes;\n        RO[ilp] = indicesOut[ix + ILPStrideX[ilp]] * numPlanes;\n      }\n    }\n    for (int iy : tv::KernelLoopY<int>(numPlanes)) {\n#pragma unroll\n      for (int ilp = 0; ilp < NumILP; ++ilp) {\n        if (ix + ILPStrideX[ilp] < numHot) {\n          in = inFeatures[RI[ilp] + iy];\n          out = outFeatures[RO[ilp] + iy];\n          if (in == out) {\n            din[RI[ilp] + iy] += dout[RO[ilp] + iy];\n          }\n        }\n      }\n    }\n  }\n}\n\nnamespace functor {\ntemplate <typename T, typename Index>\nstruct SparseMaxPoolForwardFunctor<tv::GPU, T, Index> {\n  using vecload_type_t =\n      std::conditional_t<std::is_same<T, at::Half>::value, int2, int4>;\n  using kernel_block_t = mp_list_c<int, 64, 32, 16>;\n  void operator()(const tv::GPU &d, tv::TensorView<T> outFeatures,\n                  tv::TensorView<const T> inFeatures,\n                  tv::TensorView<const Index> indices, int size) {\n    if (size <= 0) return;\n    int numPlanes = inFeatures.dim(1);\n    bool notFound = true;\n    constexpr int vecloadFactor = sizeof(vecload_type_t) / sizeof(T);\n    mp_for_each<kernel_block_t>([=, &outFeatures, &inFeatures, &indices,\n                                 &notFound](auto NumTLP) {\n      constexpr int NumILP = NumTLP / 4;\n\n      int numHotBlock = (size / NumTLP) * NumTLP;\n      if (notFound) {\n        if (numPlanes % NumTLP == 0) {\n          if (numHotBlock >= NumTLP) {\n            maxPoolFwdVecBlockKernel<T, Index, int(NumTLP), NumILP,\n                                     vecload_type_t>\n                <<<dim3(std::min(size / NumTLP, 512), numPlanes / NumTLP),\n                   dim3(NumTLP / vecloadFactor, NumTLP / NumILP), 0,\n                   d.getStream()>>>(outFeatures.data(), inFeatures.data(),\n                                    indices.subview(0).data(),\n                                    indices.subview(1).data(), numHotBlock,\n                                    numPlanes / vecloadFactor);\n            TV_CHECK_CUDA_ERR();\n          }\n\n          if (size > numHotBlock) {\n            maxPoolFwdGenericKernel<T, Index, int(NumTLP), NumILP>\n                <<<dim3(1, numPlanes / NumTLP), dim3(NumTLP / NumILP, NumTLP),\n                   0, d.getStream()>>>(outFeatures.data(), inFeatures.data(),\n                                       indices.subview(0).data() + numHotBlock,\n                                       indices.subview(1).data() + numHotBlock,\n                                       size - numHotBlock, numPlanes);\n            TV_CHECK_CUDA_ERR();\n          }\n          notFound = false;\n        }\n      }\n    });\n\n    if (notFound) {\n      constexpr int NumTLP = 64;\n      constexpr int NumILP = NumTLP / 4;\n      int numHotBlock = (size / NumTLP) * NumTLP;\n      if (numHotBlock >= NumTLP) {\n        maxPoolFwdGenericBlockKernel<T, Index, NumTLP, NumILP>\n            <<<dim3(size / NumTLP, tv::launch::DivUp(numPlanes, NumTLP)),\n               dim3(NumTLP / NumILP, NumTLP), 0, d.getStream()>>>(\n                outFeatures.data(), inFeatures.data(),\n                indices.subview(0).data(), indices.subview(1).data(),\n                numHotBlock, numPlanes);\n        TV_CHECK_CUDA_ERR();\n      }\n\n      if (size > numHotBlock) {\n        maxPoolFwdGenericKernel<T, Index, NumTLP, NumILP>\n            <<<dim3(1, tv::launch::DivUp(numPlanes, NumTLP)),\n               dim3(NumTLP / NumILP, NumTLP), 0, d.getStream()>>>(\n                outFeatures.data(), inFeatures.data(),\n                indices.subview(0).data() + numHotBlock,\n                indices.subview(1).data() + numHotBlock, size - numHotBlock,\n                numPlanes);\n        TV_CHECK_CUDA_ERR();\n      }\n    }\n  }\n};\n\ntemplate <typename T, typename Index>\nstruct SparseMaxPoolBackwardFunctor<tv::GPU, T, Index> {\n  using vecload_type_t =\n      std::conditional_t<std::is_same<T, at::Half>::value, int2, int4>;\n  using kernel_block_t = mp_list_c<int, 64, 32, 16>;\n  void operator()(const tv::GPU &d, tv::TensorView<const T> outFeatures,\n                  tv::TensorView<const T> inFeatures,\n                  tv::TensorView<const T> dout, tv::TensorView<T> din,\n                  tv::TensorView<const Index> indices, int size) {\n    if (size <= 0) return;\n    int numPlanes = inFeatures.dim(1);\n    bool notFound = true;\n    constexpr int vecloadFactor = sizeof(vecload_type_t) / sizeof(T);\n    mp_for_each<kernel_block_t>([=, &outFeatures, &inFeatures, &dout, &din,\n                                 &indices, &notFound](auto NumTLP) {\n      constexpr int NumILP = NumTLP / 4;\n\n      int numHotBlock = (size / NumTLP) * NumTLP;\n      if (notFound) {\n        if (numPlanes % NumTLP == 0) {\n          if (numHotBlock >= NumTLP) {\n            maxPoolBwdVecBlockKernel<T, Index, int(NumTLP), NumILP,\n                                     vecload_type_t>\n                <<<dim3(std::min(size / NumTLP, 512), numPlanes / NumTLP),\n                   dim3(NumTLP / vecloadFactor, NumTLP / NumILP), 0,\n                   d.getStream()>>>(outFeatures.data(), inFeatures.data(),\n                                    dout.data(), din.data(),\n                                    indices.subview(0).data(),\n                                    indices.subview(1).data(), numHotBlock,\n                                    numPlanes / vecloadFactor);\n            TV_CHECK_CUDA_ERR();\n          }\n\n          if (size > numHotBlock) {\n            maxPoolBwdGenericKernel<T, Index, int(NumTLP), NumILP>\n                <<<dim3(1, numPlanes / NumTLP), dim3(NumTLP / NumILP, NumTLP),\n                   0, d.getStream()>>>(outFeatures.data(), inFeatures.data(),\n                                       dout.data(), din.data(),\n                                       indices.subview(0).data() + numHotBlock,\n                                       indices.subview(1).data() + numHotBlock,\n                                       size - numHotBlock, numPlanes);\n            TV_CHECK_CUDA_ERR();\n          }\n          notFound = false;\n        }\n      }\n    });\n\n    if (notFound) {\n      constexpr int NumTLP = 64;\n      constexpr int NumILP = NumTLP / 4;\n      int numHotBlock = (size / NumTLP) * NumTLP;\n      if (numHotBlock >= NumTLP) {\n        maxPoolBwdGenericBlockKernel<T, Index, NumTLP, NumILP>\n            <<<dim3(size / NumTLP, tv::launch::DivUp(numPlanes, NumTLP)),\n               dim3(NumTLP / NumILP, NumTLP), 0, d.getStream()>>>(\n                outFeatures.data(), inFeatures.data(), dout.data(), din.data(),\n                indices.subview(0).data(), indices.subview(1).data(),\n                numHotBlock, numPlanes);\n        TV_CHECK_CUDA_ERR();\n      }\n\n      if (size > numHotBlock) {\n        maxPoolBwdGenericKernel<T, Index, NumTLP, NumILP>\n            <<<dim3(1, tv::launch::DivUp(numPlanes, NumTLP)),\n               dim3(NumTLP / NumILP, NumTLP), 0, d.getStream()>>>(\n                outFeatures.data(), inFeatures.data(), dout.data(), din.data(),\n                indices.subview(0).data() + numHotBlock,\n                indices.subview(1).data() + numHotBlock, size - numHotBlock,\n                numPlanes);\n        TV_CHECK_CUDA_ERR();\n      }\n    }\n  }\n};\n\n}  // namespace functor\n\n#define DECLARE_GPU_SPECS_T_INDEX(T, Index)                                \\\n  template struct functor::SparseMaxPoolForwardFunctor<tv::GPU, T, Index>; \\\n  template struct functor::SparseMaxPoolBackwardFunctor<tv::GPU, T, Index>;\n\n#define DECLARE_GPU_SPECS(T) DECLARE_GPU_SPECS_T_INDEX(T, int);\n\nDECLARE_GPU_SPECS(float);\nDECLARE_GPU_SPECS(double);\nDECLARE_GPU_SPECS(at::Half);\n\n#undef DECLARE_GPU_SPECS\n#undef DECLARE_GPU_SPECS_T_INDEX\n}  // namespace spconv\n"
  },
  {
    "path": "mmdet3d/ops/spconv/src/reordering.cc",
    "content": "// Copyright 2019 Yan Yan\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <spconv/reordering.h>\n#include <torch/script.h>\n\nnamespace spconv {\nnamespace functor {\ntemplate <typename T, typename Index>\nstruct SparseGatherFunctor<tv::CPU, T, Index> {\n  void operator()(const tv::CPU& d, tv::TensorView<T> buffer,\n                  tv::TensorView<const T> features,\n                  tv::TensorView<const Index> indices, int size) {\n    int numPlanes = features.dim(1);\n    for (int i = 0; i < size; ++i) {\n      std::memcpy(buffer.data() + i * numPlanes,\n                  features.data() + indices[i] * numPlanes,\n                  sizeof(T) * numPlanes);\n    }\n  }\n};\n\ntemplate <typename T, typename Index>\nstruct SparseScatterAddFunctor<tv::CPU, T, Index> {\n  void operator()(const tv::CPU& d, tv::TensorView<T> outFeatures,\n                  tv::TensorView<const T> buffer,\n                  tv::TensorView<const Index> indices, int size, bool stable) {\n    int numPlanes = outFeatures.dim(1);\n    const T* buf = buffer.data();\n    T* out = outFeatures.data();\n    for (int i = 0; i < size; ++i) {\n      buf = buffer.data() + i * numPlanes;\n      out = outFeatures.data() + indices[i] * numPlanes;\n      for (int j = 0; j < numPlanes; ++j) {\n        out[j] += buf[j];\n      }\n    }\n  }\n};\n\n}  // namespace functor\n\n#define DECLARE_CPU_SPECS_T_INDEX(T, Index)                        \\\n  template struct functor::SparseGatherFunctor<tv::CPU, T, Index>; \\\n  template struct functor::SparseScatterAddFunctor<tv::CPU, T, Index>;\n\n#define DECLARE_CPU_SPECS(T)         \\\n  DECLARE_CPU_SPECS_T_INDEX(T, int); \\\n  DECLARE_CPU_SPECS_T_INDEX(T, long);\n\nDECLARE_CPU_SPECS(float);\nDECLARE_CPU_SPECS(double);\nDECLARE_CPU_SPECS(at::Half);\n\n#undef DECLARE_CPU_SPECS\n#undef DECLARE_CPU_SPECS_T_INDEX\n\n}  // namespace spconv\n"
  },
  {
    "path": "mmdet3d/ops/spconv/src/reordering_cuda.cu",
    "content": "// Copyright 2019 Yan Yan\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <ATen/ATen.h>\n#include <spconv/mp_helper.h>\n#include <spconv/reordering.cu.h>\n#include <spconv/reordering.h>\n#include <tensorview/helper_kernel.cu.h>\n#include <tensorview/helper_launch.h>\n#include <tensorview/tensorview.h>\n#include <utility/timer.h>\n\n#include <chrono>\n#include <limits>\n#include <type_traits>\n\nnamespace spconv {\nnamespace functor {\ntemplate <typename T, typename Index>\nstruct SparseGatherFunctor<tv::GPU, T, Index> {\n  using vecload_type_t =\n      std::conditional_t<std::is_same<T, at::Half>::value, int2, int4>;\n  using kernel_block_t = mp_list_c<int, 64, 32, 16>;\n  void operator()(const tv::GPU &d, tv::TensorView<T> buffer,\n                  tv::TensorView<const T> features,\n                  tv::TensorView<const Index> indices, int size) {\n    if (size <= 0) return;\n    int numPlanes = features.dim(1);\n    bool notFound = true;\n    constexpr int vecloadFactor = sizeof(vecload_type_t) / sizeof(T);\n    mp_for_each<kernel_block_t>([=, &buffer, &features, &indices,\n                                 &notFound](auto NumTLP) {\n      constexpr int NumILP = NumTLP / 4;\n      // constexpr int NumILP = NumTLP / (64 / (NumTLP / vecloadFactor));\n      int nHotBlock = (size / NumTLP) * NumTLP;\n      if (notFound) {\n        if (numPlanes % NumTLP == 0) {\n          if (nHotBlock >= NumTLP) {\n            gatherVecBlockKernel<T, Index, int(NumTLP), NumILP, vecload_type_t>\n                <<<dim3(numPlanes / NumTLP, size / NumTLP),\n                   dim3(NumTLP / vecloadFactor, NumTLP / NumILP), 0,\n                   d.getStream()>>>(buffer.data(), features.data(),\n                                    indices.data(), nHotBlock,\n                                    numPlanes / vecloadFactor);\n\n            TV_CHECK_CUDA_ERR();\n          }\n          if (size - nHotBlock > 0) {\n            gatherVecKernel<T, Index, int(NumTLP), NumILP, vecload_type_t>\n                <<<dim3(1, numPlanes / NumTLP),\n                   dim3(NumTLP / NumILP, NumTLP / vecloadFactor), 0,\n                   d.getStream()>>>(buffer.data() + nHotBlock * numPlanes,\n                                    features.data(), indices.data() + nHotBlock,\n                                    size - nHotBlock,\n                                    numPlanes / vecloadFactor);\n            TV_CHECK_CUDA_ERR();\n          }\n          notFound = false;\n        }\n      }\n    });\n\n    if (notFound) {\n      constexpr int NumTLP = 64;\n      constexpr int NumILP = NumTLP / 4;\n      gatherGenericKernel<T, Index, NumTLP, NumILP>\n          <<<dim3(tv::launch::DivUp(size, NumTLP),\n                  tv::launch::DivUp(numPlanes, NumTLP)),\n             dim3(NumTLP / NumILP, NumTLP), 0, d.getStream()>>>(\n              buffer.data(), features.data(), indices.data(), size, numPlanes);\n      TV_CHECK_CUDA_ERR();\n    }\n  }\n};\ntemplate <typename T, typename Index>\nstruct SparseScatterAddFunctor<tv::GPU, T, Index> {\n  using vecload_type_t =\n      std::conditional_t<std::is_same<T, at::Half>::value, int2, int4>;\n  using kernel_block_t = mp_list_c<int, 64, 32, 16>;\n  void operator()(const tv::GPU &d, tv::TensorView<T> outFeatures,\n                  tv::TensorView<const T> buffer,\n                  tv::TensorView<const Index> indices, int size, bool stable) {\n    if (size <= 0) return;\n    int numPlanes = outFeatures.dim(1);\n    bool notFound = true;\n    constexpr int vecloadFactor =\n        sizeof(vecload_type_t) / sizeof(T);  // important for half.\n    mp_for_each<kernel_block_t>([=, &d, &outFeatures, &buffer, &indices,\n                                 &notFound](auto NumTLP) {\n      // constexpr int NumILP = NumTLP / (64 / (NumTLP / vecloadFactor));\n      constexpr int NumILP = NumTLP / 4;\n      int nHotBlock = (size / NumTLP) * NumTLP;\n      if (notFound) {\n        if (numPlanes % NumTLP == 0) {\n          if (nHotBlock >= NumTLP) {\n            scatterAddVecBlockKernel<T, Index, int(NumTLP), NumILP,\n                                     vecload_type_t>\n                <<<dim3(numPlanes / NumTLP, size / NumTLP),\n                   dim3(NumTLP / vecloadFactor, NumTLP / NumILP), 0,\n                   d.getStream()>>>(outFeatures.data(), buffer.data(),\n                                    indices.data(), nHotBlock,\n                                    numPlanes / vecloadFactor);\n            TV_CHECK_CUDA_ERR();\n          }\n          if (size - nHotBlock > 0) {\n            scatterAddGenericKernel<T, Index, int(NumTLP), NumILP>\n                <<<dim3(1, numPlanes / NumTLP), dim3(NumTLP / NumILP, NumTLP),\n                   0, d.getStream()>>>(\n                    outFeatures.data(), buffer.data() + nHotBlock * numPlanes,\n                    indices.data() + nHotBlock, size - nHotBlock, numPlanes);\n            TV_CHECK_CUDA_ERR();\n          }\n          notFound = false;\n        }\n      }\n    });\n    if (notFound) {\n      constexpr int NumTLP = 64;\n      constexpr int NumILP = NumTLP / 4;\n      scatterAddGenericKernel<T, Index, NumTLP, NumILP>\n          <<<dim3(tv::launch::DivUp(size, NumTLP),\n                  tv::launch::DivUp(numPlanes, NumTLP)),\n             dim3(NumTLP / NumILP, NumTLP), 0, d.getStream()>>>(\n              outFeatures.data(), buffer.data(), indices.data(), size,\n              numPlanes);\n      TV_CHECK_CUDA_ERR();\n    }\n  }\n};\n}  // namespace functor\n\n#define DECLARE_GPU_SPECS_T_INDEX(T, Index)                        \\\n  template struct functor::SparseGatherFunctor<tv::GPU, T, Index>; \\\n  template struct functor::SparseScatterAddFunctor<tv::GPU, T, Index>;\n\n#define DECLARE_GPU_SPECS(T) DECLARE_GPU_SPECS_T_INDEX(T, int);\n\nDECLARE_GPU_SPECS(float);\nDECLARE_GPU_SPECS(double);\nDECLARE_GPU_SPECS(at::Half);\n\n#undef DECLARE_GPU_SPECS\n#undef DECLARE_GPU_SPECS_T_INDEX\n}  // namespace spconv\n"
  },
  {
    "path": "mmdet3d/ops/spconv/structure.py",
    "content": "import numpy as np\nimport torch\n\n\ndef scatter_nd(indices, updates, shape):\n    \"\"\"pytorch edition of tensorflow scatter_nd.\n\n    this function don't contain except handle code. so use this carefully when\n    indice repeats, don't support repeat add which is supported in tensorflow.\n    \"\"\"\n    ret = torch.zeros(*shape, dtype=updates.dtype, device=updates.device)\n    ndim = indices.shape[-1]\n    output_shape = list(indices.shape[:-1]) + shape[indices.shape[-1]:]\n    flatted_indices = indices.view(-1, ndim)\n    slices = [flatted_indices[:, i] for i in range(ndim)]\n    slices += [Ellipsis]\n    ret[slices] = updates.view(*output_shape)\n    return ret\n\n\nclass SparseConvTensor(object):\n\n    def __init__(self,\n                 features,\n                 indices,\n                 spatial_shape,\n                 batch_size,\n                 grid=None):\n        \"\"\"\n        Args:\n            grid: pre-allocated grid tensor.\n                  should be used when the volume of spatial shape\n                  is very large.\n        \"\"\"\n        self.features = features\n        self.indices = indices\n        if self.indices.dtype != torch.int32:\n            self.indices.int()\n        self.spatial_shape = spatial_shape\n        self.batch_size = batch_size\n        self.indice_dict = {}\n        self.grid = grid\n\n    @property\n    def spatial_size(self):\n        return np.prod(self.spatial_shape)\n\n    def find_indice_pair(self, key):\n        if key is None:\n            return None\n        if key in self.indice_dict:\n            return self.indice_dict[key]\n        return None\n\n    def dense(self, channels_first=True):\n        output_shape = [self.batch_size] + list(\n            self.spatial_shape) + [self.features.shape[1]]\n        res = scatter_nd(self.indices.long(), self.features, output_shape)\n        if not channels_first:\n            return res\n        ndim = len(self.spatial_shape)\n        trans_params = list(range(0, ndim + 1))\n        trans_params.insert(1, ndim + 1)\n        return res.permute(*trans_params).contiguous()\n\n    @property\n    def sparity(self):\n        return (self.indices.shape[0] / np.prod(self.spatial_shape) /\n                self.batch_size)\n"
  },
  {
    "path": "mmdet3d/ops/spconv/test_utils.py",
    "content": "# Copyright 2019 Yan Yan\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nimport numpy as np\nimport unittest\n\n\nclass TestCase(unittest.TestCase):\n\n    def _GetNdArray(self, a):\n        if not isinstance(a, np.ndarray):\n            a = np.array(a)\n        return a\n\n    def assertAllEqual(self, a, b):\n        \"\"\"Asserts that two numpy arrays have the same values.\n\n        Args:\n        a: the expected numpy ndarray or anything can be converted to one.\n        b: the actual numpy ndarray or anything can be converted to one.\n        \"\"\"\n        a = self._GetNdArray(a)\n        b = self._GetNdArray(b)\n        self.assertEqual(\n            a.shape, b.shape,\n            'Shape mismatch: expected %s, got %s.' % (a.shape, b.shape))\n        same = (a == b)\n\n        if a.dtype == np.float32 or a.dtype == np.float64:\n            same = np.logical_or(same,\n                                 np.logical_and(np.isnan(a), np.isnan(b)))\n        if not np.all(same):\n            # Prints more details than np.testing.assert_array_equal.\n            diff = np.logical_not(same)\n            if a.ndim:\n                x = a[np.where(diff)]\n                y = b[np.where(diff)]\n                print('not equal where = ', np.where(diff))\n            else:\n                # np.where is broken for scalars\n                x, y = a, b\n            print('not equal lhs = ', x)\n            print('not equal rhs = ', y)\n            np.testing.assert_array_equal(a, b)\n\n    def assertAllClose(self, a, b, rtol=1e-6, atol=1e-6):\n        \"\"\"Asserts that two numpy arrays, or dicts of same, have near values.\n\n        This does not support nested dicts.\n        Args:\n        a: The expected numpy ndarray (or anything can be converted to one), or\n            dict of same. Must be a dict iff `b` is a dict.\n        b: The actual numpy ndarray (or anything can be converted to one), or\n            dict of same. Must be a dict iff `a` is a dict.\n        rtol: relative tolerance.\n        atol: absolute tolerance.\n        Raises:\n        ValueError: if only one of `a` and `b` is a dict.\n        \"\"\"\n        is_a_dict = isinstance(a, dict)\n        if is_a_dict != isinstance(b, dict):\n            raise ValueError(\"Can't compare dict to non-dict, %s vs %s.\" %\n                             (a, b))\n        if is_a_dict:\n            self.assertCountEqual(\n                a.keys(),\n                b.keys(),\n                msg='mismatched keys, expected %s, got %s' %\n                (a.keys(), b.keys()))\n            for k in a:\n                self._assertArrayLikeAllClose(\n                    a[k],\n                    b[k],\n                    rtol=rtol,\n                    atol=atol,\n                    msg='%s: expected %s, got %s.' % (k, a, b))\n        else:\n            self._assertArrayLikeAllClose(a, b, rtol=rtol, atol=atol)\n\n    def _assertArrayLikeAllClose(self, a, b, rtol=1e-6, atol=1e-6, msg=None):\n        a = self._GetNdArray(a)\n        b = self._GetNdArray(b)\n        self.assertEqual(\n            a.shape, b.shape,\n            'Shape mismatch: expected %s, got %s.' % (a.shape, b.shape))\n        if not np.allclose(a, b, rtol=rtol, atol=atol):\n            # Prints more details than np.testing.assert_allclose.\n            #\n            # NOTE: numpy.allclose (and numpy.testing.assert_allclose)\n            # checks whether two arrays are element-wise equal within a\n            # tolerance. The relative difference (rtol * abs(b)) and the\n            # absolute difference atol are added together to compare against\n            # the absolute difference between a and b.  Here, we want to\n            # print out which elements violate such conditions.\n            cond = np.logical_or(\n                np.abs(a - b) > atol + rtol * np.abs(b),\n                np.isnan(a) != np.isnan(b))\n            if a.ndim:\n                x = a[np.where(cond)]\n                y = b[np.where(cond)]\n                print('not close where = ', np.where(cond))\n            else:\n                # np.where is broken for scalars\n                x, y = a, b\n            print('not close lhs = ', x)\n            print('not close rhs = ', y)\n            print('not close dif = ', np.abs(x - y))\n            print('not close tol = ', atol + rtol * np.abs(y))\n            print('dtype = %s, shape = %s' % (a.dtype, a.shape))\n            np.testing.assert_allclose(a, b, rtol=rtol, atol=atol, err_msg=msg)\n\n\ndef params_grid(*params):\n    size = len(params)\n    length = 1\n    for p in params:\n        length *= len(p)\n    sizes = [len(p) for p in params]\n    counter = [0] * size\n    total = []\n    for i in range(length):\n        total.append([0] * size)\n    for i in range(length):\n        for j in range(size):\n            total[i][j] = params[j][counter[j]]\n        counter[size - 1] += 1\n        for c in range(size - 1, -1, -1):\n            if (counter[c] == sizes[c] and c > 0):\n                counter[c - 1] += 1\n                counter[c] = 0\n    return total\n\n\ndef generate_sparse_data(shape,\n                         num_points,\n                         num_channels,\n                         integer=False,\n                         data_range=(-1, 1),\n                         with_dense=True,\n                         dtype=np.float32):\n    dense_shape = shape\n    ndim = len(dense_shape)\n    # num_points = np.random.randint(10, 100, size=[batch_size, ndim])\n    num_points = np.array(num_points)\n    # num_points = np.array([3, 2])\n    batch_size = len(num_points)\n    batch_indices = []\n    coors_total = np.stack(\n        np.meshgrid(*[np.arange(0, s) for s in shape]), axis=-1)\n    coors_total = coors_total.reshape(-1, ndim)\n    for i in range(batch_size):\n        np.random.shuffle(coors_total)\n        inds_total = coors_total[:num_points[i]]\n        inds_total = np.pad(\n            inds_total, ((0, 0), (0, 1)), mode='constant', constant_values=i)\n        batch_indices.append(inds_total)\n    if integer:\n        sparse_data = np.random.randint(\n            data_range[0],\n            data_range[1],\n            size=[num_points.sum(), num_channels]).astype(dtype)\n    else:\n        sparse_data = np.random.uniform(\n            data_range[0],\n            data_range[1],\n            size=[num_points.sum(), num_channels]).astype(dtype)\n\n    res = {\n        'features': sparse_data.astype(dtype),\n    }\n    if with_dense:\n        dense_data = np.zeros([batch_size, num_channels, *dense_shape],\n                              dtype=sparse_data.dtype)\n        start = 0\n        for i, inds in enumerate(batch_indices):\n            for j, ind in enumerate(inds):\n                dense_slice = (i, slice(None), *ind[:-1])\n                dense_data[dense_slice] = sparse_data[start + j]\n            start += len(inds)\n        res['features_dense'] = dense_data.astype(dtype)\n    batch_indices = np.concatenate(batch_indices, axis=0)\n    res['indices'] = batch_indices.astype(np.int32)\n    return res\n"
  },
  {
    "path": "mmdet3d/ops/voxel/__init__.py",
    "content": "from .scatter_points import DynamicScatter, dynamic_scatter\nfrom .voxelize import Voxelization, voxelization\n\n__all__ = ['Voxelization', 'voxelization', 'dynamic_scatter', 'DynamicScatter']\n"
  },
  {
    "path": "mmdet3d/ops/voxel/scatter_points.py",
    "content": "import torch\nfrom torch import nn\nfrom torch.autograd import Function\n\nfrom .voxel_layer import (dynamic_point_to_voxel_backward,\n                          dynamic_point_to_voxel_forward)\n\n\nclass _dynamic_scatter(Function):\n\n    @staticmethod\n    def forward(ctx, feats, coors, reduce_type='max'):\n        \"\"\"convert kitti points(N, >=3) to voxels.\n\n        Args:\n            feats: [N, C] float tensor. points features to be reduced\n                into voxels.\n            coors: [N, ndim] int tensor. corresponding voxel coordinates\n                (specifically multi-dim voxel index) of each points.\n            reduce_type: str. reduce op. support 'max', 'sum' and 'mean'\n        Returns:\n            tuple\n            voxel_feats: [M, C] float tensor. reduced features. input features\n                that shares the same voxel coordinates are reduced to one row\n            coordinates: [M, ndim] int tensor, voxel coordinates.\n        \"\"\"\n        results = dynamic_point_to_voxel_forward(feats, coors, reduce_type)\n        (voxel_feats, voxel_coors, point2voxel_map,\n         voxel_points_count) = results\n        ctx.reduce_type = reduce_type\n        ctx.save_for_backward(feats, voxel_feats, point2voxel_map,\n                              voxel_points_count)\n        ctx.mark_non_differentiable(voxel_coors)\n        return voxel_feats, voxel_coors\n\n    @staticmethod\n    def backward(ctx, grad_voxel_feats, grad_voxel_coors=None):\n        (feats, voxel_feats, point2voxel_map,\n         voxel_points_count) = ctx.saved_tensors\n        grad_feats = torch.zeros_like(feats)\n        # TODO: whether to use index put or use cuda_backward\n        # To use index put, need point to voxel index\n        dynamic_point_to_voxel_backward(grad_feats,\n                                        grad_voxel_feats.contiguous(), feats,\n                                        voxel_feats, point2voxel_map,\n                                        voxel_points_count, ctx.reduce_type)\n        return grad_feats, None, None\n\n\ndynamic_scatter = _dynamic_scatter.apply\n\n\nclass DynamicScatter(nn.Module):\n\n    def __init__(self, voxel_size, point_cloud_range, average_points: bool):\n        super(DynamicScatter, self).__init__()\n        \"\"\"Scatters points into voxels, used in the voxel encoder with\n           dynamic voxelization\n\n        **Note**: The CPU and GPU implementation get the same output, but\n        have numerical difference after summation and division (e.g., 5e-7).\n\n        Args:\n            average_points (bool): whether to use avg pooling to scatter\n                points into voxel voxel_size (list): list [x, y, z] size\n                of three dimension\n            point_cloud_range (list):\n                [x_min, y_min, z_min, x_max, y_max, z_max]\n        \"\"\"\n        self.voxel_size = voxel_size\n        self.point_cloud_range = point_cloud_range\n        self.average_points = average_points\n\n    def forward_single(self, points, coors):\n        reduce = 'mean' if self.average_points else 'max'\n        return dynamic_scatter(points.contiguous(), coors.contiguous(), reduce)\n\n    def forward(self, points, coors):\n        \"\"\"\n        Args:\n            input: NC points\n        \"\"\"\n        if coors.size(-1) == 3:\n            return self.forward_single(points, coors)\n        else:\n            batch_size = coors[-1, 0] + 1\n            voxels, voxel_coors = [], []\n            for i in range(batch_size):\n                inds = torch.where(coors[:, 0] == i)\n                voxel, voxel_coor = self.forward_single(\n                    points[inds], coors[inds][:, 1:])\n                coor_pad = nn.functional.pad(\n                    voxel_coor, (1, 0), mode='constant', value=i)\n                voxel_coors.append(coor_pad)\n                voxels.append(voxel)\n            features = torch.cat(voxels, dim=0)\n            feature_coors = torch.cat(voxel_coors, dim=0)\n\n            return features, feature_coors\n\n    def __repr__(self):\n        tmpstr = self.__class__.__name__ + '('\n        tmpstr += 'voxel_size=' + str(self.voxel_size)\n        tmpstr += ', point_cloud_range=' + str(self.point_cloud_range)\n        tmpstr += ', average_points=' + str(self.average_points)\n        tmpstr += ')'\n        return tmpstr\n"
  },
  {
    "path": "mmdet3d/ops/voxel/src/scatter_points_cpu.cpp",
    "content": "#include <ATen/TensorUtils.h>\n#include <torch/extension.h>\n// #include \"voxelization.h\"\n\nnamespace {\n\ntemplate <typename T_int>\nvoid determin_max_points_kernel(\n    torch::TensorAccessor<T_int, 2> coor,\n    torch::TensorAccessor<T_int, 1> point_to_voxelidx,\n    torch::TensorAccessor<T_int, 1> num_points_per_voxel,\n    torch::TensorAccessor<T_int, 3> coor_to_voxelidx, int& voxel_num,\n    int& max_points, const int num_points) {\n  int voxelidx, num;\n  for (int i = 0; i < num_points; ++i) {\n    if (coor[i][0] == -1) continue;\n    voxelidx = coor_to_voxelidx[coor[i][0]][coor[i][1]][coor[i][2]];\n\n    // record voxel\n    if (voxelidx == -1) {\n      voxelidx = voxel_num;\n      voxel_num += 1;\n      coor_to_voxelidx[coor[i][0]][coor[i][1]][coor[i][2]] = voxelidx;\n    }\n\n    // put points into voxel\n    num = num_points_per_voxel[voxelidx];\n    point_to_voxelidx[i] = num;\n    num_points_per_voxel[voxelidx] += 1;\n\n    // update max points per voxel\n    max_points = std::max(max_points, num + 1);\n  }\n\n  return;\n}\n\ntemplate <typename T, typename T_int>\nvoid scatter_point_to_voxel_kernel(\n    const torch::TensorAccessor<T, 2> points,\n    torch::TensorAccessor<T_int, 2> coor,\n    torch::TensorAccessor<T_int, 1> point_to_voxelidx,\n    torch::TensorAccessor<T_int, 3> coor_to_voxelidx,\n    torch::TensorAccessor<T, 3> voxels,\n    torch::TensorAccessor<T_int, 2> voxel_coors, const int num_features,\n    const int num_points, const int NDim) {\n  for (int i = 0; i < num_points; ++i) {\n    int num = point_to_voxelidx[i];\n    int voxelidx = coor_to_voxelidx[coor[i][0]][coor[i][1]][coor[i][2]];\n    for (int k = 0; k < num_features; ++k) {\n      voxels[voxelidx][num][k] = points[i][k];\n    }\n    for (int k = 0; k < NDim; ++k) {\n      voxel_coors[voxelidx][k] = coor[i][k];\n    }\n  }\n}\n\n}  // namespace\n\nnamespace voxelization {\n\nstd::vector<at::Tensor> dynamic_point_to_voxel_cpu(\n    const at::Tensor& points, const at::Tensor& voxel_mapping,\n    const std::vector<float> voxel_size, const std::vector<float> coors_range) {\n  // current version tooks about 0.02s_0.03s for one frame on cpu\n  // check device\n  AT_ASSERTM(points.device().is_cpu(), \"points must be a CPU tensor\");\n\n  const int NDim = voxel_mapping.size(1);\n  const int num_points = points.size(0);\n  const int num_features = points.size(1);\n\n  std::vector<int> grid_size(NDim);\n  for (int i = 0; i < NDim; ++i) {\n    grid_size[i] =\n        round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]);\n  }\n\n  at::Tensor num_points_per_voxel = at::zeros(\n      {\n          num_points,\n      },\n      voxel_mapping.options());\n  at::Tensor coor_to_voxelidx = -at::ones(\n      {grid_size[2], grid_size[1], grid_size[0]}, voxel_mapping.options());\n  at::Tensor point_to_voxelidx = -at::ones(\n      {\n          num_points,\n      },\n      voxel_mapping.options());\n\n  int voxel_num = 0;\n  int max_points = 0;\n  AT_DISPATCH_ALL_TYPES(voxel_mapping.scalar_type(), \"determin_max_point\", [&] {\n    determin_max_points_kernel<scalar_t>(\n        voxel_mapping.accessor<scalar_t, 2>(),\n        point_to_voxelidx.accessor<scalar_t, 1>(),\n        num_points_per_voxel.accessor<scalar_t, 1>(),\n        coor_to_voxelidx.accessor<scalar_t, 3>(), voxel_num, max_points,\n        num_points);\n  });\n\n  at::Tensor voxels =\n      at::zeros({voxel_num, max_points, num_features}, points.options());\n  at::Tensor voxel_coors =\n      at::zeros({voxel_num, NDim}, points.options().dtype(at::kInt));\n\n  AT_DISPATCH_ALL_TYPES(points.scalar_type(), \"scatter_point_to_voxel\", [&] {\n    scatter_point_to_voxel_kernel<scalar_t, int>(\n        points.accessor<scalar_t, 2>(), voxel_mapping.accessor<int, 2>(),\n        point_to_voxelidx.accessor<int, 1>(),\n        coor_to_voxelidx.accessor<int, 3>(), voxels.accessor<scalar_t, 3>(),\n        voxel_coors.accessor<int, 2>(), num_features, num_points, NDim);\n  });\n\n  at::Tensor num_points_per_voxel_out =\n      num_points_per_voxel.slice(/*dim=*/0, /*start=*/0, /*end=*/voxel_num);\n  return {voxels, voxel_coors, num_points_per_voxel_out};\n}\n\n}  // namespace voxelization\n"
  },
  {
    "path": "mmdet3d/ops/voxel/src/scatter_points_cuda.cu",
    "content": "#include <ATen/ATen.h>\n#include <ATen/cuda/CUDAContext.h>\n#include <torch/types.h>\n\n#include <ATen/cuda/CUDAApplyUtils.cuh>\n\ntypedef enum { SUM = 0, MEAN = 1, MAX = 2 } reduce_t;\n\n#define CHECK_CUDA(x) \\\n  TORCH_CHECK(x.device().is_cuda(), #x \" must be a CUDA tensor\")\n#define CHECK_CONTIGUOUS(x) \\\n  TORCH_CHECK(x.is_contiguous(), #x \" must be contiguous\")\n#define CHECK_INPUT(x) \\\n  CHECK_CUDA(x);       \\\n  CHECK_CONTIGUOUS(x)\n\nnamespace {\nint const threadsPerBlock = 512;\nint const maxGridDim = 50000;\n}  // namespace\n\n__device__ __forceinline__ static void reduceMax(float *address, float val) {\n  int *address_as_i = reinterpret_cast<int *>(address);\n  int old = *address_as_i, assumed;\n  do {\n    assumed = old;\n    old = atomicCAS(address_as_i, assumed,\n                    __float_as_int(fmaxf(val, __int_as_float(assumed))));\n  } while (assumed != old || __int_as_float(old) < val);\n}\n\n__device__ __forceinline__ static void reduceMax(double *address, double val) {\n  unsigned long long *address_as_ull =\n      reinterpret_cast<unsigned long long *>(address);\n  unsigned long long old = *address_as_ull, assumed;\n  do {\n    assumed = old;\n    old = atomicCAS(\n        address_as_ull, assumed,\n        __double_as_longlong(fmax(val, __longlong_as_double(assumed))));\n  } while (assumed != old || __longlong_as_double(old) < val);\n}\n\n// get rid of meaningless warnings when compiling host code\n#ifdef __CUDA_ARCH__\n__device__ __forceinline__ static void reduceAdd(float *address, float val) {\n#if (__CUDA_ARCH__ < 200)\n#warning \\\n    \"compute capability lower than 2.x. fall back to use CAS version of atomicAdd for float32\"\n  int *address_as_i = reinterpret_cast<int *>(address);\n  int old = *address_as_i, assumed;\n  do {\n    assumed = old;\n    old = atomicCAS(address_as_i, assumed,\n                    __float_as_int(val + __int_as_float(assumed)));\n  } while (assumed != old);\n#else\n  atomicAdd(address, val);\n#endif\n}\n\n__device__ __forceinline__ static void reduceAdd(double *address, double val) {\n#if (__CUDA_ARCH__ < 600)\n#warning \\\n    \"compute capability lower than 6.x. fall back to use CAS version of atomicAdd for float64\"\n  unsigned long long *address_as_ull =\n      reinterpret_cast<unsigned long long *>(address);\n  unsigned long long old = *address_as_ull, assumed;\n  do {\n    assumed = old;\n    old = atomicCAS(address_as_ull, assumed,\n                    __double_as_longlong(val + __longlong_as_double(assumed)));\n  } while (assumed != old);\n#else\n  atomicAdd(address, val);\n#endif\n}\n#endif\n\ntemplate <typename T_int>\n__global__ void coors_id_kernel(const T_int *coors, const T_int *dim,\n                                int64_t *coors_id, const int num_input,\n                                const int NDim) {\n  for (int x = blockIdx.x * blockDim.x + threadIdx.x; x < num_input;\n       x += gridDim.x * blockDim.x) {\n    const T_int *coor_x = coors + x * NDim;\n    auto coor_id = 0;\n    for (int i = 0; i < NDim && coor_id != -1; i++) {\n      coor_id *= dim[i];\n      auto t = static_cast<int64_t>(coor_x[i]);\n      coor_id = (t < 0) ? -1 : coor_id + t;\n    }\n    coors_id[x] = coor_id;\n  }\n}\n\ntemplate <typename T_int>\n__global__ void coors_map_init_kernel(const int64_t *coors_id,\n                                      const T_int *coors_id_argsort,\n                                      int32_t *coors_map, const int num_input) {\n  for (int x = blockIdx.x * blockDim.x + threadIdx.x; x < num_input;\n       x += gridDim.x * blockDim.x) {\n    auto here = coors_id[coors_id_argsort[x]];\n    if (x == 0) {\n      if (here == -1) {  // there is invalid points\n        coors_map[0] = -1;\n      } else {\n        coors_map[0] = 0;\n      }\n      continue;\n    }\n    auto left = coors_id[coors_id_argsort[x - 1]];\n    coors_map[x] = (left < here) ? 1 : 0;\n  }\n}\n\ntemplate <typename T, typename T_int>\n__global__ void feats_reduce_kernel(\n    const T *feats, const T_int *coors, int32_t *coors_map,\n    int32_t *reduce_count,  // shall be 0 at initialization\n    T *reduced_feats,       // shall be 0 at initialization\n    T_int *out_coors, const int num_input, const int num_feats, const int NDim,\n    const reduce_t reduce_type) {\n  for (int x = blockIdx.x * blockDim.x + threadIdx.x; x < num_input;\n       x += gridDim.x * blockDim.x) {\n    int32_t reduce_to = coors_map[x];\n    if (reduce_to == -1) continue;\n\n    const T_int *coors_offset = coors + x * NDim;\n    T_int *out_coors_offset = out_coors + reduce_to * NDim;\n    for (int i = 0; i < NDim; i++) {\n      out_coors_offset[i] = coors_offset[i];\n    }\n\n    const T *feats_offset = feats + x * num_feats;\n    T *reduced_feats_offset = reduced_feats + reduce_to * num_feats;\n    if (reduce_type == reduce_t::MAX) {\n      for (int i = 0; i < num_feats; i++) {\n        reduceMax(&reduced_feats_offset[i], feats_offset[i]);\n      }\n    } else {\n      if (reduce_type == reduce_t::MEAN) {\n        atomicAdd(&reduce_count[reduce_to], static_cast<int32_t>(1));\n      }\n      for (int i = 0; i < num_feats; i++) {\n        reduceAdd(&reduced_feats_offset[i], feats_offset[i]);\n      }\n    }\n  }\n}\n\ntemplate <typename T>\n__global__ void add_reduce_traceback_grad_kernel(\n    T *grad_feats, const T *grad_reduced_feats, const int32_t *coors_map,\n    const int32_t *reduce_count, const int num_input, const int num_feats,\n    const reduce_t reduce_type) {\n  for (int x = blockIdx.x * blockDim.x + threadIdx.x; x < num_input;\n       x += gridDim.x * blockDim.x) {\n    int32_t reduce_to = coors_map[x];\n    if (reduce_to == -1) {\n      continue;\n    }\n\n    const int input_offset = x * num_feats;\n    T *grad_feats_offset = grad_feats + input_offset;\n    const int reduced_offset = reduce_to * num_feats;\n    const T *grad_reduced_feats_offset = grad_reduced_feats + reduced_offset;\n\n    if (reduce_type == reduce_t::SUM) {\n      for (int i = 0; i < num_feats; i++) {\n        grad_feats_offset[i] = grad_reduced_feats_offset[i];\n      }\n    } else if (reduce_type == reduce_t::MEAN) {\n      for (int i = 0; i < num_feats; i++) {\n        grad_feats_offset[i] = grad_reduced_feats_offset[i] /\n                               static_cast<T>(reduce_count[reduce_to]);\n      }\n    }\n  }\n}\n\ntemplate <typename T>\n__global__ void max_reduce_traceback_scatter_idx_kernel(\n    const T *feats, const T *reduced_feats, int32_t *reduce_from,\n    const int32_t *coors_map, const int num_input, const int num_feats) {\n  for (int x = blockIdx.x * blockDim.x + threadIdx.x; x < num_input;\n       x += gridDim.x * blockDim.x) {\n    int32_t reduce_to = coors_map[x];\n\n    const int input_offset = x * num_feats;\n    const T *feats_offset = feats + input_offset;\n\n    if (reduce_to == -1) {\n      continue;\n    }\n\n    const int reduced_offset = reduce_to * num_feats;\n    const T *reduced_feats_offset = reduced_feats + reduced_offset;\n    int32_t *reduce_from_offset = reduce_from + reduced_offset;\n\n    for (int i = 0; i < num_feats; i++) {\n      if (feats_offset[i] == reduced_feats_offset[i]) {\n        atomicMin(&reduce_from_offset[i], static_cast<int32_t>(x));\n      }\n    }\n  }\n}\n\ntemplate <typename T>\n__global__ void max_reduce_scatter_grad_kernel(T *grad_feats,\n                                               const T *grad_reduced_feats,\n                                               const int32_t *reduce_from,\n                                               const int num_reduced,\n                                               const int num_feats) {\n  for (int x = blockIdx.x * blockDim.x + threadIdx.x; x < num_reduced;\n       x += gridDim.x * blockDim.x) {\n    const int reduced_offset = x * num_feats;\n    const int32_t *scatter_to_offset = reduce_from + reduced_offset;\n    const T *grad_reduced_feats_offset = grad_reduced_feats + reduced_offset;\n\n    for (int i = 0; i < num_feats; i++) {\n      grad_feats[scatter_to_offset[i] * num_feats + i] =\n          grad_reduced_feats_offset[i];\n    }\n  }\n}\n\nnamespace voxelization {\n\nstd::vector<at::Tensor> dynamic_point_to_voxel_forward_gpu(\n    const at::Tensor &feats, const at::Tensor &coors,\n    const reduce_t reduce_type) {\n  CHECK_INPUT(feats);\n  CHECK_INPUT(coors);\n\n  const int NDim = coors.size(1);\n  const int num_input = feats.size(0);\n  const int num_feats = feats.size(1);\n\n  auto coors_id = at::empty({num_input}, coors.options().dtype(torch::kInt64));\n  auto coor_space_dim = std::get<0>(coors.max(0)) + 1;\n  auto coors_map_sorted =\n      at::empty({num_input}, coors.options().dtype(torch::kInt32));\n  auto coors_map = at::empty({num_input}, coors.options().dtype(torch::kInt32));\n  auto num_coors = at::zeros({1}, coors.options().dtype(torch::kInt32));\n\n  AT_DISPATCH_INTEGRAL_TYPES(\n      coors.scalar_type(), \"coors_id_kernel\", ([&] {\n        dim3 blocks(std::min(at::cuda::ATenCeilDiv(num_input, threadsPerBlock),\n                             maxGridDim));\n        dim3 threads(threadsPerBlock);\n        coors_id_kernel<<<blocks, threads>>>(\n            coors.data_ptr<scalar_t>(), coor_space_dim.data_ptr<scalar_t>(),\n            coors_id.data_ptr<int64_t>(), num_input, NDim);\n      }));\n  AT_CUDA_CHECK(cudaGetLastError());\n\n  auto coors_id_argsort = coors_id.argsort();\n\n  AT_DISPATCH_INTEGRAL_TYPES(\n      coors_id_argsort.scalar_type(), \"coors_map_init_kernel\", ([&] {\n        dim3 blocks(std::min(at::cuda::ATenCeilDiv(num_input, threadsPerBlock),\n                             maxGridDim));\n        dim3 threads(threadsPerBlock);\n        coors_map_init_kernel<<<blocks, threads>>>(\n            coors_id.data_ptr<int64_t>(), coors_id_argsort.data_ptr<scalar_t>(),\n            coors_map_sorted.data_ptr<int32_t>(), num_input);\n      }));\n  AT_CUDA_CHECK(cudaGetLastError());\n\n  coors_map_sorted = coors_map_sorted.cumsum(0, torch::kInt32);\n  coors_map.index_put_(coors_id_argsort, coors_map_sorted);\n\n  const int num_coors_cpu =\n      coors_map_sorted[-1].cpu().data_ptr<int32_t>()[0] + 1;\n  auto out_coors = at::empty({num_coors_cpu, NDim}, coors.options());\n  auto reduced_feats = at::empty({num_coors_cpu, num_feats}, feats.options());\n  auto reduce_count =\n      at::zeros({num_coors_cpu}, coors.options().dtype(torch::kInt32));\n\n  AT_DISPATCH_FLOATING_TYPES(\n      feats.scalar_type(), \"feats_reduce_kernel\", ([&] {\n        using F_t = scalar_t;\n        AT_DISPATCH_INTEGRAL_TYPES(\n            coors.scalar_type(), \"feats_reduce_kernel\", ([&] {\n              using I_t = scalar_t;\n\n              if (reduce_type == reduce_t::MAX)\n                reduced_feats.fill_(-std::numeric_limits<F_t>::infinity());\n              else\n                reduced_feats.fill_(static_cast<F_t>(0));\n\n              dim3 blocks(\n                  std::min(at::cuda::ATenCeilDiv(num_input, threadsPerBlock),\n                           maxGridDim));\n              dim3 threads(threadsPerBlock);\n              feats_reduce_kernel<<<blocks, threads>>>(\n                  feats.data_ptr<F_t>(), coors.data_ptr<I_t>(),\n                  coors_map.data_ptr<int32_t>(),\n                  reduce_count.data_ptr<int32_t>(),\n                  reduced_feats.data_ptr<F_t>(), out_coors.data_ptr<I_t>(),\n                  num_input, num_feats, NDim, reduce_type);\n              if (reduce_type == reduce_t::MEAN)\n                reduced_feats /=\n                    reduce_count.unsqueeze(-1).to(reduced_feats.dtype());\n            }));\n      }));\n  AT_CUDA_CHECK(cudaGetLastError());\n\n  return {reduced_feats, out_coors, coors_map, reduce_count};\n}\n\nvoid dynamic_point_to_voxel_backward_gpu(at::Tensor &grad_feats,\n                                         const at::Tensor &grad_reduced_feats,\n                                         const at::Tensor &feats,\n                                         const at::Tensor &reduced_feats,\n                                         const at::Tensor &coors_map,\n                                         const at::Tensor &reduce_count,\n                                         const reduce_t reduce_type) {\n  CHECK_INPUT(grad_feats);\n  CHECK_INPUT(grad_reduced_feats);\n  CHECK_INPUT(feats);\n  CHECK_INPUT(reduced_feats);\n  CHECK_INPUT(coors_map);\n  CHECK_INPUT(reduce_count);\n\n  const int num_input = feats.size(0);\n  const int num_reduced = reduced_feats.size(0);\n  const int num_feats = feats.size(1);\n\n  grad_feats.fill_(0);\n  // copy voxel grad to points\n\n  if (reduce_type == reduce_t::MEAN || reduce_type == reduce_t::SUM) {\n    AT_DISPATCH_FLOATING_TYPES(\n        grad_reduced_feats.scalar_type(), \"add_reduce_traceback_grad_kernel\",\n        ([&] {\n          dim3 blocks(std::min(\n              at::cuda::ATenCeilDiv(num_input, threadsPerBlock), maxGridDim));\n          dim3 threads(threadsPerBlock);\n          add_reduce_traceback_grad_kernel<<<blocks, threads>>>(\n              grad_feats.data_ptr<scalar_t>(),\n              grad_reduced_feats.data_ptr<scalar_t>(),\n              coors_map.data_ptr<int32_t>(), reduce_count.data_ptr<int32_t>(),\n              num_input, num_feats, reduce_type);\n        }));\n    AT_CUDA_CHECK(cudaGetLastError());\n  } else {\n    auto reduce_from = at::full({num_reduced, num_feats}, num_input,\n                                coors_map.options().dtype(torch::kInt32));\n    AT_DISPATCH_FLOATING_TYPES(\n        grad_reduced_feats.scalar_type(),\n        \"max_reduce_traceback_scatter_idx_kernel\", ([&] {\n          dim3 blocks(std::min(\n              at::cuda::ATenCeilDiv(num_input, threadsPerBlock), maxGridDim));\n          dim3 threads(threadsPerBlock);\n          max_reduce_traceback_scatter_idx_kernel<<<blocks, threads>>>(\n              feats.data_ptr<scalar_t>(), reduced_feats.data_ptr<scalar_t>(),\n              reduce_from.data_ptr<int32_t>(), coors_map.data_ptr<int32_t>(),\n              num_input, num_feats);\n        }));\n    AT_CUDA_CHECK(cudaGetLastError());\n\n    AT_DISPATCH_FLOATING_TYPES(\n        grad_reduced_feats.scalar_type(),\n        \"max_reduce_traceback_scatter_idx_kernel\", ([&] {\n          dim3 blocks(std::min(\n              at::cuda::ATenCeilDiv(num_reduced, threadsPerBlock), maxGridDim));\n          dim3 threads(threadsPerBlock);\n          max_reduce_scatter_grad_kernel<<<blocks, threads>>>(\n              grad_feats.data_ptr<scalar_t>(),\n              grad_reduced_feats.data_ptr<scalar_t>(),\n              reduce_from.data_ptr<int32_t>(), num_reduced, num_feats);\n        }));\n    AT_CUDA_CHECK(cudaGetLastError());\n  }\n  return;\n}\n\n}  // namespace voxelization\n"
  },
  {
    "path": "mmdet3d/ops/voxel/src/voxelization.cpp",
    "content": "#include <torch/extension.h>\n#include \"voxelization.h\"\n\nnamespace voxelization {\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n  m.def(\"hard_voxelize\", &hard_voxelize, \"hard voxelize\");\n  m.def(\"dynamic_voxelize\", &dynamic_voxelize, \"dynamic voxelization\");\n  m.def(\"dynamic_point_to_voxel_forward\", &dynamic_point_to_voxel_forward, \"dynamic point to voxel forward\");\n  m.def(\"dynamic_point_to_voxel_backward\", &dynamic_point_to_voxel_backward, \"dynamic point to voxel backward\");\n}\n\n} // namespace voxelization\n"
  },
  {
    "path": "mmdet3d/ops/voxel/src/voxelization.h",
    "content": "#pragma once\n#include <torch/extension.h>\n\ntypedef enum { SUM = 0, MEAN = 1, MAX = 2 } reduce_t;\n\nnamespace voxelization {\n\nint hard_voxelize_cpu(const at::Tensor &points, at::Tensor &voxels,\n                      at::Tensor &coors, at::Tensor &num_points_per_voxel,\n                      const std::vector<float> voxel_size,\n                      const std::vector<float> coors_range,\n                      const int max_points, const int max_voxels,\n                      const int NDim = 3);\n\nvoid dynamic_voxelize_cpu(const at::Tensor &points, at::Tensor &coors,\n                          const std::vector<float> voxel_size,\n                          const std::vector<float> coors_range,\n                          const int NDim = 3);\n\nstd::vector<at::Tensor> dynamic_point_to_voxel_cpu(\n    const at::Tensor &points, const at::Tensor &voxel_mapping,\n    const std::vector<float> voxel_size, const std::vector<float> coors_range);\n\n#ifdef WITH_CUDA\nint hard_voxelize_gpu(const at::Tensor &points, at::Tensor &voxels,\n                      at::Tensor &coors, at::Tensor &num_points_per_voxel,\n                      const std::vector<float> voxel_size,\n                      const std::vector<float> coors_range,\n                      const int max_points, const int max_voxels,\n                      const int NDim = 3);\n\nvoid dynamic_voxelize_gpu(const at::Tensor &points, at::Tensor &coors,\n                          const std::vector<float> voxel_size,\n                          const std::vector<float> coors_range,\n                          const int NDim = 3);\n\nstd::vector<torch::Tensor> dynamic_point_to_voxel_forward_gpu(const torch::Tensor &feats,\n                                                              const torch::Tensor &coors,\n                                                              const reduce_t reduce_type);\n\nvoid dynamic_point_to_voxel_backward_gpu(torch::Tensor &grad_feats,\n                                         const torch::Tensor &grad_reduced_feats,\n                                         const torch::Tensor &feats,\n                                         const torch::Tensor &reduced_feats,\n                                         const torch::Tensor &coors_idx,\n                                         const torch::Tensor &reduce_count,\n                                         const reduce_t reduce_type);\n#endif\n\n// Interface for Python\ninline int hard_voxelize(const at::Tensor &points, at::Tensor &voxels,\n                         at::Tensor &coors, at::Tensor &num_points_per_voxel,\n                         const std::vector<float> voxel_size,\n                         const std::vector<float> coors_range,\n                         const int max_points, const int max_voxels,\n                         const int NDim = 3) {\n  if (points.device().is_cuda()) {\n#ifdef WITH_CUDA\n    return hard_voxelize_gpu(points, voxels, coors, num_points_per_voxel,\n                             voxel_size, coors_range, max_points, max_voxels,\n                             NDim);\n#else\n    AT_ERROR(\"Not compiled with GPU support\");\n#endif\n  }\n  return hard_voxelize_cpu(points, voxels, coors, num_points_per_voxel,\n                           voxel_size, coors_range, max_points, max_voxels,\n                           NDim);\n}\n\ninline void dynamic_voxelize(const at::Tensor &points, at::Tensor &coors,\n                             const std::vector<float> voxel_size,\n                             const std::vector<float> coors_range,\n                             const int NDim = 3) {\n  if (points.device().is_cuda()) {\n#ifdef WITH_CUDA\n    return dynamic_voxelize_gpu(points, coors, voxel_size, coors_range, NDim);\n#else\n    AT_ERROR(\"Not compiled with GPU support\");\n#endif\n  }\n  return dynamic_voxelize_cpu(points, coors, voxel_size, coors_range, NDim);\n}\n\ninline reduce_t convert_reduce_type(const std::string &reduce_type) {\n  if (reduce_type == \"max\")\n    return reduce_t::MAX;\n  else if (reduce_type == \"sum\")\n    return reduce_t::SUM;\n  else if (reduce_type == \"mean\")\n    return reduce_t::MEAN;\n  else TORCH_CHECK(false, \"do not support reduce type \" + reduce_type)\n  return reduce_t::SUM;\n}\n\ninline std::vector<torch::Tensor> dynamic_point_to_voxel_forward(const torch::Tensor &feats,\n                                                                 const torch::Tensor &coors,\n                                                                 const std::string &reduce_type) {\n  if (feats.device().is_cuda()) {\n#ifdef WITH_CUDA\n    return dynamic_point_to_voxel_forward_gpu(feats, coors, convert_reduce_type(reduce_type));\n#else\n    TORCH_CHECK(false, \"Not compiled with GPU support\");\n#endif\n  }\n  TORCH_CHECK(false, \"do not support cpu yet\");\n  return std::vector<torch::Tensor>();\n}\n\ninline void dynamic_point_to_voxel_backward(torch::Tensor &grad_feats,\n                                            const torch::Tensor &grad_reduced_feats,\n                                            const torch::Tensor &feats,\n                                            const torch::Tensor &reduced_feats,\n                                            const torch::Tensor &coors_idx,\n                                            const torch::Tensor &reduce_count,\n                                            const std::string &reduce_type) {\n  if (grad_feats.device().is_cuda()) {\n#ifdef WITH_CUDA\n    dynamic_point_to_voxel_backward_gpu(\n        grad_feats, grad_reduced_feats, feats, reduced_feats, coors_idx, reduce_count,\n        convert_reduce_type(reduce_type));\n    return;\n#else\n    TORCH_CHECK(false, \"Not compiled with GPU support\");\n#endif\n  }\n  TORCH_CHECK(false, \"do not support cpu yet\");\n}\n\n}  // namespace voxelization\n"
  },
  {
    "path": "mmdet3d/ops/voxel/src/voxelization_cpu.cpp",
    "content": "#include <ATen/TensorUtils.h>\n#include <torch/extension.h>\n// #include \"voxelization.h\"\n\nnamespace {\n\ntemplate <typename T, typename T_int>\nvoid dynamic_voxelize_kernel(const torch::TensorAccessor<T, 2> points,\n                             torch::TensorAccessor<T_int, 2> coors,\n                             const std::vector<float> voxel_size,\n                             const std::vector<float> coors_range,\n                             const std::vector<int> grid_size,\n                             const int num_points, const int num_features,\n                             const int NDim) {\n  const int ndim_minus_1 = NDim - 1;\n  bool failed = false;\n  int coor[NDim];\n  int c;\n\n  for (int i = 0; i < num_points; ++i) {\n    failed = false;\n    for (int j = 0; j < NDim; ++j) {\n      c = floor((points[i][j] - coors_range[j]) / voxel_size[j]);\n      // necessary to rm points out of range\n      if ((c < 0 || c >= grid_size[j])) {\n        failed = true;\n        break;\n      }\n      coor[ndim_minus_1 - j] = c;\n    }\n\n    for (int k = 0; k < NDim; ++k) {\n      if (failed)\n        coors[i][k] = -1;\n      else\n        coors[i][k] = coor[k];\n    }\n  }\n\n  return;\n}\n\ntemplate <typename T, typename T_int>\nvoid hard_voxelize_kernel(const torch::TensorAccessor<T, 2> points,\n                          torch::TensorAccessor<T, 3> voxels,\n                          torch::TensorAccessor<T_int, 2> coors,\n                          torch::TensorAccessor<T_int, 1> num_points_per_voxel,\n                          torch::TensorAccessor<T_int, 3> coor_to_voxelidx,\n                          int& voxel_num, const std::vector<float> voxel_size,\n                          const std::vector<float> coors_range,\n                          const std::vector<int> grid_size,\n                          const int max_points, const int max_voxels,\n                          const int num_points, const int num_features,\n                          const int NDim) {\n  // declare a temp coors\n  at::Tensor temp_coors = at::zeros(\n      {num_points, NDim}, at::TensorOptions().dtype(at::kInt).device(at::kCPU));\n\n  // First use dynamic voxelization to get coors,\n  // then check max points/voxels constraints\n  dynamic_voxelize_kernel<T, int>(points, temp_coors.accessor<int, 2>(),\n                                  voxel_size, coors_range, grid_size,\n                                  num_points, num_features, NDim);\n\n  int voxelidx, num;\n  auto coor = temp_coors.accessor<int, 2>();\n\n  for (int i = 0; i < num_points; ++i) {\n    // T_int* coor = temp_coors.data_ptr<int>() + i * NDim;\n\n    if (coor[i][0] == -1) continue;\n\n    voxelidx = coor_to_voxelidx[coor[i][0]][coor[i][1]][coor[i][2]];\n\n    // record voxel\n    if (voxelidx == -1) {\n      voxelidx = voxel_num;\n      if (max_voxels != -1 && voxel_num >= max_voxels) break;\n      voxel_num += 1;\n\n      coor_to_voxelidx[coor[i][0]][coor[i][1]][coor[i][2]] = voxelidx;\n\n      for (int k = 0; k < NDim; ++k) {\n        coors[voxelidx][k] = coor[i][k];\n      }\n    }\n\n    // put points into voxel\n    num = num_points_per_voxel[voxelidx];\n    if (max_points == -1 || num < max_points) {\n      for (int k = 0; k < num_features; ++k) {\n        voxels[voxelidx][num][k] = points[i][k];\n      }\n      num_points_per_voxel[voxelidx] += 1;\n    }\n  }\n\n  return;\n}\n\n}  // namespace\n\nnamespace voxelization {\n\nint hard_voxelize_cpu(const at::Tensor& points, at::Tensor& voxels,\n                      at::Tensor& coors, at::Tensor& num_points_per_voxel,\n                      const std::vector<float> voxel_size,\n                      const std::vector<float> coors_range,\n                      const int max_points, const int max_voxels,\n                      const int NDim = 3) {\n  // current version tooks about 0.02s_0.03s for one frame on cpu\n  // check device\n  AT_ASSERTM(points.device().is_cpu(), \"points must be a CPU tensor\");\n\n  std::vector<int> grid_size(NDim);\n  const int num_points = points.size(0);\n  const int num_features = points.size(1);\n\n  for (int i = 0; i < NDim; ++i) {\n    grid_size[i] =\n        round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]);\n  }\n\n  // coors, num_points_per_voxel, coor_to_voxelidx are int Tensor\n  // printf(\"cpu coor_to_voxelidx size: [%d, %d, %d]\\n\", grid_size[2],\n  // grid_size[1], grid_size[0]);\n  at::Tensor coor_to_voxelidx =\n      -at::ones({grid_size[2], grid_size[1], grid_size[0]}, coors.options());\n\n  int voxel_num = 0;\n  AT_DISPATCH_FLOATING_TYPES_AND_HALF(\n      points.scalar_type(), \"hard_voxelize_forward\", [&] {\n        hard_voxelize_kernel<scalar_t, int>(\n            points.accessor<scalar_t, 2>(), voxels.accessor<scalar_t, 3>(),\n            coors.accessor<int, 2>(), num_points_per_voxel.accessor<int, 1>(),\n            coor_to_voxelidx.accessor<int, 3>(), voxel_num, voxel_size,\n            coors_range, grid_size, max_points, max_voxels, num_points,\n            num_features, NDim);\n      });\n\n  return voxel_num;\n}\n\nvoid dynamic_voxelize_cpu(const at::Tensor& points, at::Tensor& coors,\n                          const std::vector<float> voxel_size,\n                          const std::vector<float> coors_range,\n                          const int NDim = 3) {\n  // check device\n  AT_ASSERTM(points.device().is_cpu(), \"points must be a CPU tensor\");\n\n  std::vector<int> grid_size(NDim);\n  const int num_points = points.size(0);\n  const int num_features = points.size(1);\n\n  for (int i = 0; i < NDim; ++i) {\n    grid_size[i] =\n        round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]);\n  }\n\n  // coors, num_points_per_voxel, coor_to_voxelidx are int Tensor\n  AT_DISPATCH_FLOATING_TYPES_AND_HALF(\n      points.scalar_type(), \"hard_voxelize_forward\", [&] {\n        dynamic_voxelize_kernel<scalar_t, int>(\n            points.accessor<scalar_t, 2>(), coors.accessor<int, 2>(),\n            voxel_size, coors_range, grid_size, num_points, num_features, NDim);\n      });\n\n  return;\n}\n\n}  // namespace voxelization\n"
  },
  {
    "path": "mmdet3d/ops/voxel/src/voxelization_cuda.cu",
    "content": "#include <ATen/ATen.h>\n#include <ATen/cuda/CUDAContext.h>\n#include <c10/cuda/CUDAGuard.h>\n#include <torch/types.h>\n\n#include <ATen/cuda/CUDAApplyUtils.cuh>\n\n#define CHECK_CUDA(x) \\\n  TORCH_CHECK(x.device().is_cuda(), #x \" must be a CUDA tensor\")\n#define CHECK_CONTIGUOUS(x) \\\n  TORCH_CHECK(x.is_contiguous(), #x \" must be contiguous\")\n#define CHECK_INPUT(x) \\\n  CHECK_CUDA(x);       \\\n  CHECK_CONTIGUOUS(x)\n\nnamespace {\nint const threadsPerBlock = sizeof(unsigned long long) * 8;\n}\n\n#define CUDA_1D_KERNEL_LOOP(i, n)                            \\\n  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \\\n       i += blockDim.x * gridDim.x)\n\ntemplate <typename T, typename T_int>\n__global__ void dynamic_voxelize_kernel(\n    const T* points, T_int* coors, const float voxel_x, const float voxel_y,\n    const float voxel_z, const float coors_x_min, const float coors_y_min,\n    const float coors_z_min, const float coors_x_max, const float coors_y_max,\n    const float coors_z_max, const int grid_x, const int grid_y,\n    const int grid_z, const int num_points, const int num_features,\n    const int NDim) {\n  //   const int index = blockIdx.x * threadsPerBlock + threadIdx.x;\n  CUDA_1D_KERNEL_LOOP(index, num_points) {\n    // To save some computation\n    auto points_offset = points + index * num_features;\n    auto coors_offset = coors + index * NDim;\n    int c_x = floor((points_offset[0] - coors_x_min) / voxel_x);\n    if (c_x < 0 || c_x >= grid_x) {\n      coors_offset[0] = -1;\n      return;\n    }\n\n    int c_y = floor((points_offset[1] - coors_y_min) / voxel_y);\n    if (c_y < 0 || c_y >= grid_y) {\n      coors_offset[0] = -1;\n      coors_offset[1] = -1;\n      return;\n    }\n\n    int c_z = floor((points_offset[2] - coors_z_min) / voxel_z);\n    if (c_z < 0 || c_z >= grid_z) {\n      coors_offset[0] = -1;\n      coors_offset[1] = -1;\n      coors_offset[2] = -1;\n    } else {\n      coors_offset[0] = c_z;\n      coors_offset[1] = c_y;\n      coors_offset[2] = c_x;\n    }\n  }\n}\n\ntemplate <typename T, typename T_int>\n__global__ void assign_point_to_voxel(const int nthreads, const T* points,\n                                      T_int* point_to_voxelidx,\n                                      T_int* coor_to_voxelidx, T* voxels,\n                                      const int max_points,\n                                      const int num_features,\n                                      const int num_points, const int NDim) {\n  CUDA_1D_KERNEL_LOOP(thread_idx, nthreads) {\n    // const int index = blockIdx.x * threadsPerBlock + threadIdx.x;\n    int index = thread_idx / num_features;\n\n    int num = point_to_voxelidx[index];\n    int voxelidx = coor_to_voxelidx[index];\n    if (num > -1 && voxelidx > -1) {\n      auto voxels_offset =\n          voxels + voxelidx * max_points * num_features + num * num_features;\n\n      int k = thread_idx % num_features;\n      voxels_offset[k] = points[thread_idx];\n    }\n  }\n}\n\ntemplate <typename T, typename T_int>\n__global__ void assign_voxel_coors(const int nthreads, T_int* coor,\n                                   T_int* point_to_voxelidx,\n                                   T_int* coor_to_voxelidx, T_int* voxel_coors,\n                                   const int num_points, const int NDim) {\n  CUDA_1D_KERNEL_LOOP(thread_idx, nthreads) {\n    // const int index = blockIdx.x * threadsPerBlock + threadIdx.x;\n    // if (index >= num_points) return;\n    int index = thread_idx / NDim;\n    int num = point_to_voxelidx[index];\n    int voxelidx = coor_to_voxelidx[index];\n    if (num == 0 && voxelidx > -1) {\n      auto coors_offset = voxel_coors + voxelidx * NDim;\n      int k = thread_idx % NDim;\n      coors_offset[k] = coor[thread_idx];\n    }\n  }\n}\n\ntemplate <typename T_int>\n__global__ void point_to_voxelidx_kernel(const T_int* coor,\n                                         T_int* point_to_voxelidx,\n                                         T_int* point_to_pointidx,\n                                         const int max_points,\n                                         const int max_voxels,\n                                         const int num_points, const int NDim) {\n  CUDA_1D_KERNEL_LOOP(index, num_points) {\n    auto coor_offset = coor + index * NDim;\n    // skip invalid points\n    if ((index >= num_points) || (coor_offset[0] == -1)) return;\n\n    int num = 0;\n    int coor_x = coor_offset[0];\n    int coor_y = coor_offset[1];\n    int coor_z = coor_offset[2];\n    // only calculate the coors before this coor[index]\n    for (int i = 0; i < index; ++i) {\n      auto prev_coor = coor + i * NDim;\n      if (prev_coor[0] == -1) continue;\n\n      // Find all previous points that have the same coors\n      // if find the same coor, record it\n      if ((prev_coor[0] == coor_x) && (prev_coor[1] == coor_y) &&\n          (prev_coor[2] == coor_z)) {\n        num++;\n        if (num == 1) {\n          // point to the same coor that first show up\n          point_to_pointidx[index] = i;\n        } else if (num >= max_points) {\n          // out of boundary\n          return;\n        }\n      }\n    }\n    if (num == 0) {\n      point_to_pointidx[index] = index;\n    }\n    if (num < max_points) {\n      point_to_voxelidx[index] = num;\n    }\n  }\n}\n\ntemplate <typename T_int>\n__global__ void determin_voxel_num(\n    // const T_int* coor,\n    T_int* num_points_per_voxel, T_int* point_to_voxelidx,\n    T_int* point_to_pointidx, T_int* coor_to_voxelidx, T_int* voxel_num,\n    const int max_points, const int max_voxels, const int num_points) {\n  // only calculate the coors before this coor[index]\n  for (int i = 0; i < num_points; ++i) {\n    // if (coor[i][0] == -1)\n    //    continue;\n    int point_pos_in_voxel = point_to_voxelidx[i];\n    // record voxel\n    if (point_pos_in_voxel == -1) {\n      // out of max_points or invalid point\n      continue;\n    } else if (point_pos_in_voxel == 0) {\n      // record new voxel\n      int voxelidx = voxel_num[0];\n      if (voxel_num[0] >= max_voxels) break;\n      voxel_num[0] += 1;\n      coor_to_voxelidx[i] = voxelidx;\n      num_points_per_voxel[voxelidx] = 1;\n    } else {\n      int point_idx = point_to_pointidx[i];\n      int voxelidx = coor_to_voxelidx[point_idx];\n      if (voxelidx != -1) {\n        coor_to_voxelidx[i] = voxelidx;\n        num_points_per_voxel[voxelidx] += 1;\n      }\n    }\n  }\n}\n\nnamespace voxelization {\n\nint hard_voxelize_gpu(const at::Tensor& points, at::Tensor& voxels,\n                      at::Tensor& coors, at::Tensor& num_points_per_voxel,\n                      const std::vector<float> voxel_size,\n                      const std::vector<float> coors_range,\n                      const int max_points, const int max_voxels,\n                      const int NDim = 3) {\n  // current version tooks about 0.04s for one frame on cpu\n  // check device\n  CHECK_INPUT(points);\n\n  at::cuda::CUDAGuard device_guard(points.device());\n\n  const int num_points = points.size(0);\n  const int num_features = points.size(1);\n\n  const float voxel_x = voxel_size[0];\n  const float voxel_y = voxel_size[1];\n  const float voxel_z = voxel_size[2];\n  const float coors_x_min = coors_range[0];\n  const float coors_y_min = coors_range[1];\n  const float coors_z_min = coors_range[2];\n  const float coors_x_max = coors_range[3];\n  const float coors_y_max = coors_range[4];\n  const float coors_z_max = coors_range[5];\n\n  const int grid_x = round((coors_x_max - coors_x_min) / voxel_x);\n  const int grid_y = round((coors_y_max - coors_y_min) / voxel_y);\n  const int grid_z = round((coors_z_max - coors_z_min) / voxel_z);\n\n  // map points to voxel coors\n  at::Tensor temp_coors =\n      at::zeros({num_points, NDim}, points.options().dtype(at::kInt));\n\n  dim3 grid(std::min(at::cuda::ATenCeilDiv(num_points, 512), 4096));\n  dim3 block(512);\n\n  // 1. link point to corresponding voxel coors\n  AT_DISPATCH_ALL_TYPES(\n      points.scalar_type(), \"hard_voxelize_kernel\", ([&] {\n        dynamic_voxelize_kernel<scalar_t, int>\n            <<<grid, block, 0, at::cuda::getCurrentCUDAStream()>>>(\n                points.contiguous().data_ptr<scalar_t>(),\n                temp_coors.contiguous().data_ptr<int>(), voxel_x, voxel_y,\n                voxel_z, coors_x_min, coors_y_min, coors_z_min, coors_x_max,\n                coors_y_max, coors_z_max, grid_x, grid_y, grid_z, num_points,\n                num_features, NDim);\n      }));\n  cudaDeviceSynchronize();\n  AT_CUDA_CHECK(cudaGetLastError());\n\n  // 2. map point to the idx of the corresponding voxel, find duplicate coor\n  // create some temporary variables\n  auto point_to_pointidx = -at::ones(\n      {\n          num_points,\n      },\n      points.options().dtype(at::kInt));\n  auto point_to_voxelidx = -at::ones(\n      {\n          num_points,\n      },\n      points.options().dtype(at::kInt));\n\n  dim3 map_grid(std::min(at::cuda::ATenCeilDiv(num_points, 512), 4096));\n  dim3 map_block(512);\n  AT_DISPATCH_ALL_TYPES(\n      temp_coors.scalar_type(), \"determin_duplicate\", ([&] {\n        point_to_voxelidx_kernel<int>\n            <<<map_grid, map_block, 0, at::cuda::getCurrentCUDAStream()>>>(\n                temp_coors.contiguous().data_ptr<int>(),\n                point_to_voxelidx.contiguous().data_ptr<int>(),\n                point_to_pointidx.contiguous().data_ptr<int>(), max_points,\n                max_voxels, num_points, NDim);\n      }));\n  cudaDeviceSynchronize();\n  AT_CUDA_CHECK(cudaGetLastError());\n\n  // 3. determin voxel num and voxel's coor index\n  // make the logic in the CUDA device could accelerate about 10 times\n  auto coor_to_voxelidx = -at::ones(\n      {\n          num_points,\n      },\n      points.options().dtype(at::kInt));\n  auto voxel_num = at::zeros(\n      {\n          1,\n      },\n      points.options().dtype(at::kInt));  // must be zero from the begining\n\n  AT_DISPATCH_ALL_TYPES(\n      temp_coors.scalar_type(), \"determin_duplicate\", ([&] {\n        determin_voxel_num<int><<<1, 1, 0, at::cuda::getCurrentCUDAStream()>>>(\n            num_points_per_voxel.contiguous().data_ptr<int>(),\n            point_to_voxelidx.contiguous().data_ptr<int>(),\n            point_to_pointidx.contiguous().data_ptr<int>(),\n            coor_to_voxelidx.contiguous().data_ptr<int>(),\n            voxel_num.contiguous().data_ptr<int>(), max_points, max_voxels,\n            num_points);\n      }));\n  cudaDeviceSynchronize();\n  AT_CUDA_CHECK(cudaGetLastError());\n\n  // 4. copy point features to voxels\n  // Step 4 & 5 could be parallel\n  auto pts_output_size = num_points * num_features;\n  dim3 cp_grid(std::min(at::cuda::ATenCeilDiv(pts_output_size, 512), 4096));\n  dim3 cp_block(512);\n  AT_DISPATCH_ALL_TYPES(\n      points.scalar_type(), \"assign_point_to_voxel\", ([&] {\n        assign_point_to_voxel<float, int>\n            <<<cp_grid, cp_block, 0, at::cuda::getCurrentCUDAStream()>>>(\n                pts_output_size, points.contiguous().data_ptr<float>(),\n                point_to_voxelidx.contiguous().data_ptr<int>(),\n                coor_to_voxelidx.contiguous().data_ptr<int>(),\n                voxels.contiguous().data_ptr<float>(), max_points, num_features,\n                num_points, NDim);\n      }));\n  //   cudaDeviceSynchronize();\n  //   AT_CUDA_CHECK(cudaGetLastError());\n\n  // 5. copy coors of each voxels\n  auto coors_output_size = num_points * NDim;\n  dim3 coors_cp_grid(\n      std::min(at::cuda::ATenCeilDiv(coors_output_size, 512), 4096));\n  dim3 coors_cp_block(512);\n  AT_DISPATCH_ALL_TYPES(\n      points.scalar_type(), \"assign_point_to_voxel\", ([&] {\n        assign_voxel_coors<float, int><<<coors_cp_grid, coors_cp_block, 0,\n                                         at::cuda::getCurrentCUDAStream()>>>(\n            coors_output_size, temp_coors.contiguous().data_ptr<int>(),\n            point_to_voxelidx.contiguous().data_ptr<int>(),\n            coor_to_voxelidx.contiguous().data_ptr<int>(),\n            coors.contiguous().data_ptr<int>(), num_points, NDim);\n      }));\n  cudaDeviceSynchronize();\n  AT_CUDA_CHECK(cudaGetLastError());\n\n  auto voxel_num_cpu = voxel_num.to(at::kCPU);\n  int voxel_num_int = voxel_num_cpu.data_ptr<int>()[0];\n\n  return voxel_num_int;\n}\n\nvoid dynamic_voxelize_gpu(const at::Tensor& points, at::Tensor& coors,\n                          const std::vector<float> voxel_size,\n                          const std::vector<float> coors_range,\n                          const int NDim = 3) {\n  // current version tooks about 0.04s for one frame on cpu\n  // check device\n  CHECK_INPUT(points);\n\n  at::cuda::CUDAGuard device_guard(points.device());\n\n  const int num_points = points.size(0);\n  const int num_features = points.size(1);\n\n  const float voxel_x = voxel_size[0];\n  const float voxel_y = voxel_size[1];\n  const float voxel_z = voxel_size[2];\n  const float coors_x_min = coors_range[0];\n  const float coors_y_min = coors_range[1];\n  const float coors_z_min = coors_range[2];\n  const float coors_x_max = coors_range[3];\n  const float coors_y_max = coors_range[4];\n  const float coors_z_max = coors_range[5];\n\n  const int grid_x = round((coors_x_max - coors_x_min) / voxel_x);\n  const int grid_y = round((coors_y_max - coors_y_min) / voxel_y);\n  const int grid_z = round((coors_z_max - coors_z_min) / voxel_z);\n\n  const int col_blocks = at::cuda::ATenCeilDiv(num_points, threadsPerBlock);\n  dim3 blocks(col_blocks);\n  dim3 threads(threadsPerBlock);\n  cudaStream_t stream = at::cuda::getCurrentCUDAStream();\n\n  AT_DISPATCH_ALL_TYPES(points.scalar_type(), \"dynamic_voxelize_kernel\", [&] {\n    dynamic_voxelize_kernel<scalar_t, int><<<blocks, threads, 0, stream>>>(\n        points.contiguous().data_ptr<scalar_t>(),\n        coors.contiguous().data_ptr<int>(), voxel_x, voxel_y, voxel_z,\n        coors_x_min, coors_y_min, coors_z_min, coors_x_max, coors_y_max,\n        coors_z_max, grid_x, grid_y, grid_z, num_points, num_features, NDim);\n  });\n  cudaDeviceSynchronize();\n  AT_CUDA_CHECK(cudaGetLastError());\n\n  return;\n}\n\n}  // namespace voxelization\n"
  },
  {
    "path": "mmdet3d/ops/voxel/voxelize.py",
    "content": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved\nimport torch\nfrom torch import nn\nfrom torch.autograd import Function\nfrom torch.nn.modules.utils import _pair\n\nfrom .voxel_layer import dynamic_voxelize, hard_voxelize\n\n\nclass _Voxelization(Function):\n\n    @staticmethod\n    def forward(ctx,\n                points,\n                voxel_size,\n                coors_range,\n                max_points=35,\n                max_voxels=20000):\n        \"\"\"convert kitti points(N, >=3) to voxels.\n\n        Args:\n            points: [N, ndim] float tensor. points[:, :3] contain xyz points\n                and points[:, 3:] contain other information like reflectivity\n            voxel_size: [3] list/tuple or array, float. xyz, indicate voxel\n                size\n            coors_range: [6] list/tuple or array, float. indicate voxel\n                range. format: xyzxyz, minmax\n            max_points: int. indicate maximum points contained in a voxel. if\n                max_points=-1, it means using dynamic_voxelize\n            max_voxels: int. indicate maximum voxels this function create.\n                for second, 20000 is a good choice. Users should shuffle points\n                before call this function because max_voxels may drop points.\n\n        Returns:\n            voxels: [M, max_points, ndim] float tensor. only contain points\n                    and returned when max_points != -1.\n            coordinates: [M, 3] int32 tensor, always returned.\n            num_points_per_voxel: [M] int32 tensor. Only returned when\n                max_points != -1.\n        \"\"\"\n        if max_points == -1 or max_voxels == -1:\n            coors = points.new_zeros(size=(points.size(0), 3), dtype=torch.int)\n            dynamic_voxelize(points, coors, voxel_size, coors_range, 3)\n            return coors\n        else:\n            voxels = points.new_zeros(\n                size=(max_voxels, max_points, points.size(1)))\n            coors = points.new_zeros(size=(max_voxels, 3), dtype=torch.int)\n            num_points_per_voxel = points.new_zeros(\n                size=(max_voxels, ), dtype=torch.int)\n            voxel_num = hard_voxelize(points, voxels, coors,\n                                      num_points_per_voxel, voxel_size,\n                                      coors_range, max_points, max_voxels, 3)\n            # select the valid voxels\n            voxels_out = voxels[:voxel_num]\n            coors_out = coors[:voxel_num]\n            num_points_per_voxel_out = num_points_per_voxel[:voxel_num]\n            return voxels_out, coors_out, num_points_per_voxel_out\n\n\nvoxelization = _Voxelization.apply\n\n\nclass Voxelization(nn.Module):\n\n    def __init__(self,\n                 voxel_size,\n                 point_cloud_range,\n                 max_num_points,\n                 max_voxels=20000):\n        super(Voxelization, self).__init__()\n        \"\"\"\n        Args:\n            voxel_size (list): list [x, y, z] size of three dimension\n            point_cloud_range (list):\n                [x_min, y_min, z_min, x_max, y_max, z_max]\n            max_num_points (int): max number of points per voxel\n            max_voxels (tuple or int): max number of voxels in\n                (training, testing) time\n        \"\"\"\n        self.voxel_size = voxel_size\n        self.point_cloud_range = point_cloud_range\n        self.max_num_points = max_num_points\n        if isinstance(max_voxels, tuple):\n            self.max_voxels = max_voxels\n        else:\n            self.max_voxels = _pair(max_voxels)\n\n        point_cloud_range = torch.tensor(\n            point_cloud_range, dtype=torch.float32)\n        # [0, -40, -3, 70.4, 40, 1]\n        voxel_size = torch.tensor(voxel_size, dtype=torch.float32)\n        grid_size = (point_cloud_range[3:] -\n                     point_cloud_range[:3]) / voxel_size\n        grid_size = torch.round(grid_size).long()\n        input_feat_shape = grid_size[:2]\n        self.grid_size = grid_size\n        # the origin shape is as [x-len, y-len, z-len]\n        # [w, h, d] -> [d, h, w]\n        self.pcd_shape = [*input_feat_shape, 1][::-1]\n\n    def forward(self, input):\n        \"\"\"\n        Args:\n            input: NC points\n        \"\"\"\n        if self.training:\n            max_voxels = self.max_voxels[0]\n        else:\n            max_voxels = self.max_voxels[1]\n\n        return voxelization(input, self.voxel_size, self.point_cloud_range,\n                            self.max_num_points, max_voxels)\n\n    def __repr__(self):\n        tmpstr = self.__class__.__name__ + '('\n        tmpstr += 'voxel_size=' + str(self.voxel_size)\n        tmpstr += ', point_cloud_range=' + str(self.point_cloud_range)\n        tmpstr += ', max_num_points=' + str(self.max_num_points)\n        tmpstr += ', max_voxels=' + str(self.max_voxels)\n        tmpstr += ')'\n        return tmpstr\n"
  },
  {
    "path": "mmdet3d/utils/__init__.py",
    "content": "from mmcv.utils import Registry, build_from_cfg, print_log\n\nfrom mmdet.utils import get_root_logger\nfrom .collect_env import collect_env\n\n__all__ = [\n    'Registry', 'build_from_cfg', 'get_root_logger', 'collect_env', 'print_log'\n]\n"
  },
  {
    "path": "mmdet3d/utils/collect_env.py",
    "content": "from mmcv.utils import collect_env as collect_base_env\nfrom mmcv.utils import get_git_hash\n\nimport mmdet\nimport mmdet3d\n\n\ndef collect_env():\n    \"\"\"Collect the information of the running environments.\"\"\"\n    env_info = collect_base_env()\n    env_info['MMDetection'] = mmdet.__version__\n    env_info['MMDetection3D'] = mmdet3d.__version__ + '+' + get_git_hash()[:7]\n\n    return env_info\n\n\nif __name__ == '__main__':\n    for name, val in collect_env().items():\n        print(f'{name}: {val}')\n"
  },
  {
    "path": "mmdet3d/version.py",
    "content": "# Copyright (c) Open-MMLab. All rights reserved.\n\n__version__ = '0.11.0'\nshort_version = __version__\n\n\ndef parse_version_info(version_str):\n    version_info = []\n    for x in version_str.split('.'):\n        if x.isdigit():\n            version_info.append(int(x))\n        elif x.find('rc') != -1:\n            patch_version = x.split('rc')\n            version_info.append(int(patch_version[0]))\n            version_info.append(f'rc{patch_version[1]}')\n    return tuple(version_info)\n\n\nversion_info = parse_version_info(__version__)\n"
  },
  {
    "path": "requirements/build.txt",
    "content": ""
  },
  {
    "path": "requirements/docs.txt",
    "content": "m2r\nrecommonmark\nsphinx==3.1.2\nsphinx_markdown_tables\nsphinx_rtd_theme\n"
  },
  {
    "path": "requirements/optional.txt",
    "content": "open3d\nwaymo-open-dataset-tf-2-1-0==1.2.0\n"
  },
  {
    "path": "requirements/readthedocs.txt",
    "content": "mmcv\ntorch\ntorchvision\n"
  },
  {
    "path": "requirements/runtime.txt",
    "content": "lyft_dataset_sdk\nnetworkx>=2.2,<2.3\n# we may unlock the verion of numba in the future\nnumba==0.48.0\nnumpy<1.20.0\nnuscenes-devkit\nplyfile\nscikit-image\n# by default we also use tensorboard to log results\ntensorboard\ntrimesh>=2.35.39,<2.35.40\n"
  },
  {
    "path": "requirements/tests.txt",
    "content": "asynctest\ncodecov\nflake8\ninterrogate\nisort\n# Note: used for kwarray.group_items, this may be ported to mmcv in the future.\nkwarray\npytest\npytest-cov\npytest-runner\nubelt\nxdoctest >= 0.10.0\nyapf\n"
  },
  {
    "path": "requirements.txt",
    "content": "-r requirements/build.txt\n-r requirements/optional.txt\n-r requirements/runtime.txt\n-r requirements/tests.txt\n"
  },
  {
    "path": "setup.cfg",
    "content": "[yapf]\nBASED_ON_STYLE = pep8\nBLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true\nSPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true\n\n[isort]\nline_length = 79\nmulti_line_output = 0\nknown_standard_library = setuptools\nknown_first_party = mmdet,mmdet3d\nknown_third_party = cv2,load_scannet_data,lyft_dataset_sdk,m2r,matplotlib,mmcv,nuimages,numba,numpy,nuscenes,pandas,plyfile,pycocotools,pyquaternion,pytest,recommonmark,scannet_utils,scipy,seaborn,shapely,skimage,tensorflow,terminaltables,torch,trimesh,waymo_open_dataset\nno_lines_before = STDLIB,LOCALFOLDER\ndefault_section = THIRDPARTY\n"
  },
  {
    "path": "setup.py",
    "content": "from setuptools import find_packages, setup\n\nimport os\nimport torch\nfrom torch.utils.cpp_extension import (BuildExtension, CppExtension,\n                                       CUDAExtension)\n\n\ndef readme():\n    with open('README.md', encoding='utf-8') as f:\n        content = f.read()\n    return content\n\n\nversion_file = 'mmdet3d/version.py'\n\n\ndef get_version():\n    with open(version_file, 'r') as f:\n        exec(compile(f.read(), version_file, 'exec'))\n    import sys\n\n    # return short version for sdist\n    if 'sdist' in sys.argv or 'bdist_wheel' in sys.argv:\n        return locals()['short_version']\n    else:\n        return locals()['__version__']\n\n\ndef make_cuda_ext(name,\n                  module,\n                  sources,\n                  sources_cuda=[],\n                  extra_args=[],\n                  extra_include_path=[]):\n\n    define_macros = []\n    extra_compile_args = {'cxx': [] + extra_args}\n\n    if torch.cuda.is_available() or os.getenv('FORCE_CUDA', '0') == '1':\n        define_macros += [('WITH_CUDA', None)]\n        extension = CUDAExtension\n        extra_compile_args['nvcc'] = extra_args + [\n            '-D__CUDA_NO_HALF_OPERATORS__',\n            '-D__CUDA_NO_HALF_CONVERSIONS__',\n            '-D__CUDA_NO_HALF2_OPERATORS__',\n        ]\n        sources += sources_cuda\n    else:\n        print('Compiling {} without CUDA'.format(name))\n        extension = CppExtension\n        # raise EnvironmentError('CUDA is required to compile MMDetection!')\n\n    return extension(\n        name='{}.{}'.format(module, name),\n        sources=[os.path.join(*module.split('.'), p) for p in sources],\n        include_dirs=extra_include_path,\n        define_macros=define_macros,\n        extra_compile_args=extra_compile_args)\n\n\ndef parse_requirements(fname='requirements.txt', with_version=True):\n    \"\"\"Parse the package dependencies listed in a requirements file but strips\n    specific versioning information.\n\n    Args:\n        fname (str): path to requirements file\n        with_version (bool, default=False): if True include version specs\n\n    Returns:\n        list[str]: list of requirements items\n\n    CommandLine:\n        python -c \"import setup; print(setup.parse_requirements())\"\n    \"\"\"\n    import re\n    import sys\n    from os.path import exists\n    require_fpath = fname\n\n    def parse_line(line):\n        \"\"\"Parse information from a line in a requirements text file.\"\"\"\n        if line.startswith('-r '):\n            # Allow specifying requirements in other files\n            target = line.split(' ')[1]\n            for info in parse_require_file(target):\n                yield info\n        else:\n            info = {'line': line}\n            if line.startswith('-e '):\n                info['package'] = line.split('#egg=')[1]\n            else:\n                # Remove versioning from the package\n                pat = '(' + '|'.join(['>=', '==', '>']) + ')'\n                parts = re.split(pat, line, maxsplit=1)\n                parts = [p.strip() for p in parts]\n\n                info['package'] = parts[0]\n                if len(parts) > 1:\n                    op, rest = parts[1:]\n                    if ';' in rest:\n                        # Handle platform specific dependencies\n                        # http://setuptools.readthedocs.io/en/latest/setuptools.html#declaring-platform-specific-dependencies\n                        version, platform_deps = map(str.strip,\n                                                     rest.split(';'))\n                        info['platform_deps'] = platform_deps\n                    else:\n                        version = rest  # NOQA\n                    info['version'] = (op, version)\n            yield info\n\n    def parse_require_file(fpath):\n        with open(fpath, 'r') as f:\n            for line in f.readlines():\n                line = line.strip()\n                if line and not line.startswith('#'):\n                    for info in parse_line(line):\n                        yield info\n\n    def gen_packages_items():\n        if exists(require_fpath):\n            for info in parse_require_file(require_fpath):\n                parts = [info['package']]\n                if with_version and 'version' in info:\n                    parts.extend(info['version'])\n                if not sys.version.startswith('3.4'):\n                    # apparently package_deps are broken in 3.4\n                    platform_deps = info.get('platform_deps')\n                    if platform_deps is not None:\n                        parts.append(';' + platform_deps)\n                item = ''.join(parts)\n                yield item\n\n    packages = list(gen_packages_items())\n    return packages\n\n\nif __name__ == '__main__':\n    setup(\n        name='mmdet3d',\n        version=get_version(),\n        description=(\"OpenMMLab's next-generation platform\"\n                     'for general 3D object detection.'),\n        long_description=readme(),\n        long_description_content_type='text/markdown',\n        author='OpenMMLab',\n        author_email='zwwdev@gmail.com',\n        keywords='computer vision, 3D object detection',\n        url='https://github.com/open-mmlab/mmdetection3d',\n        packages=find_packages(exclude=('configs', 'tools', 'demo')),\n        package_data={'mmdet3d.ops': ['*/*.so']},\n        classifiers=[\n            'Development Status :: 4 - Beta',\n            'License :: OSI Approved :: Apache Software License',\n            'Operating System :: OS Independent',\n            'Programming Language :: Python :: 3',\n            'Programming Language :: Python :: 3.6',\n            'Programming Language :: Python :: 3.7',\n        ],\n        license='Apache License 2.0',\n        setup_requires=parse_requirements('requirements/build.txt'),\n        tests_require=parse_requirements('requirements/tests.txt'),\n        install_requires=parse_requirements('requirements/runtime.txt'),\n        extras_require={\n            'all': parse_requirements('requirements.txt'),\n            'tests': parse_requirements('requirements/tests.txt'),\n            'build': parse_requirements('requirements/build.txt'),\n            'optional': parse_requirements('requirements/optional.txt'),\n        },\n        ext_modules=[\n            make_cuda_ext(\n                name='sparse_conv_ext',\n                module='mmdet3d.ops.spconv',\n                extra_include_path=[\n                    # PyTorch 1.5 uses ninjia, which requires absolute path\n                    # of included files, relative path will cause failure.\n                    os.path.abspath(\n                        os.path.join(*'mmdet3d.ops.spconv'.split('.'),\n                                     'include/'))\n                ],\n                sources=[\n                    'src/all.cc',\n                    'src/reordering.cc',\n                    'src/reordering_cuda.cu',\n                    'src/indice.cc',\n                    'src/indice_cuda.cu',\n                    'src/maxpool.cc',\n                    'src/maxpool_cuda.cu',\n                ],\n                extra_args=['-w', '-std=c++14']),\n            make_cuda_ext(\n                name='iou3d_cuda',\n                module='mmdet3d.ops.iou3d',\n                sources=[\n                    'src/iou3d.cpp',\n                    'src/iou3d_kernel.cu',\n                ]),\n            make_cuda_ext(\n                name='voxel_layer',\n                module='mmdet3d.ops.voxel',\n                sources=[\n                    'src/voxelization.cpp',\n                    'src/scatter_points_cpu.cpp',\n                    'src/scatter_points_cuda.cu',\n                    'src/voxelization_cpu.cpp',\n                    'src/voxelization_cuda.cu',\n                ]),\n            make_cuda_ext(\n                name='roiaware_pool3d_ext',\n                module='mmdet3d.ops.roiaware_pool3d',\n                sources=[\n                    'src/roiaware_pool3d.cpp',\n                    'src/points_in_boxes_cpu.cpp',\n                ],\n                sources_cuda=[\n                    'src/roiaware_pool3d_kernel.cu',\n                    'src/points_in_boxes_cuda.cu',\n                ]),\n            make_cuda_ext(\n                name='ball_query_ext',\n                module='mmdet3d.ops.ball_query',\n                sources=['src/ball_query.cpp'],\n                sources_cuda=['src/ball_query_cuda.cu']),\n            make_cuda_ext(\n                name='knn_ext',\n                module='mmdet3d.ops.knn',\n                sources=['src/knn.cpp'],\n                sources_cuda=['src/knn_cuda.cu']),\n            make_cuda_ext(\n                name='group_points_ext',\n                module='mmdet3d.ops.group_points',\n                sources=['src/group_points.cpp'],\n                sources_cuda=['src/group_points_cuda.cu']),\n            make_cuda_ext(\n                name='interpolate_ext',\n                module='mmdet3d.ops.interpolate',\n                sources=['src/interpolate.cpp'],\n                sources_cuda=[\n                    'src/three_interpolate_cuda.cu', 'src/three_nn_cuda.cu'\n                ]),\n            make_cuda_ext(\n                name='furthest_point_sample_ext',\n                module='mmdet3d.ops.furthest_point_sample',\n                sources=['src/furthest_point_sample.cpp'],\n                sources_cuda=['src/furthest_point_sample_cuda.cu']),\n            make_cuda_ext(\n                name='gather_points_ext',\n                module='mmdet3d.ops.gather_points',\n                sources=['src/gather_points.cpp'],\n                sources_cuda=['src/gather_points_cuda.cu'])\n        ],\n        cmdclass={'build_ext': BuildExtension},\n        zip_safe=False)\n"
  },
  {
    "path": "tests/test_data/test_datasets/test_dataset_wrappers.py",
    "content": "import numpy as np\nimport torch\n\nfrom mmdet3d.datasets.builder import build_dataset\n\n\ndef test_getitem():\n    np.random.seed(1)\n    torch.manual_seed(1)\n    point_cloud_range = [-50, -50, -5, 50, 50, 3]\n    file_client_args = dict(backend='disk')\n    class_names = [\n        'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',\n        'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'\n    ]\n    pipeline = [\n        dict(\n            type='LoadPointsFromFile',\n            coord_type='LIDAR',\n            load_dim=5,\n            use_dim=5,\n            file_client_args=file_client_args),\n        dict(\n            type='LoadPointsFromMultiSweeps',\n            sweeps_num=9,\n            use_dim=[0, 1, 2, 3, 4],\n            file_client_args=file_client_args,\n            pad_empty_sweeps=True,\n            remove_close=True,\n            test_mode=True),\n        dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n        # dict(type='ObjectSample', db_sampler=db_sampler),\n        dict(\n            type='GlobalRotScaleTrans',\n            rot_range=[-0.3925, 0.3925],\n            scale_ratio_range=[0.95, 1.05],\n            translation_std=[0, 0, 0]),\n        dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n        dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n        dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n        dict(type='ObjectNameFilter', classes=class_names),\n        dict(type='PointShuffle'),\n        dict(type='DefaultFormatBundle3D', class_names=class_names),\n        dict(\n            type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n    ]\n    input_modality = dict(\n        use_lidar=True,\n        use_camera=False,\n        use_radar=False,\n        use_map=False,\n        use_external=False)\n    dataset_cfg = dict(\n        type='CBGSDataset',\n        dataset=dict(\n            type='NuScenesDataset',\n            data_root='tests/data/nuscenes',\n            ann_file='tests/data/nuscenes/nus_info.pkl',\n            pipeline=pipeline,\n            classes=class_names,\n            modality=input_modality,\n            test_mode=False,\n            use_valid_flag=True,\n            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n            # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n            box_type_3d='LiDAR'))\n    nus_dataset = build_dataset(dataset_cfg)\n    assert len(nus_dataset) == 20\n\n    data = nus_dataset[0]\n    assert data['img_metas'].data['flip'] is True\n    assert data['img_metas'].data['pcd_horizontal_flip'] is True\n    assert data['points']._data.shape == (537, 5)\n\n    data = nus_dataset[2]\n    assert data['img_metas'].data['flip'] is False\n    assert data['img_metas'].data['pcd_horizontal_flip'] is False\n    assert data['points']._data.shape == (901, 5)\n"
  },
  {
    "path": "tests/test_data/test_datasets/test_kitti_dataset.py",
    "content": "import numpy as np\nimport os\nimport pytest\nimport tempfile\nimport torch\n\nfrom mmdet3d.core.bbox import LiDARInstance3DBoxes\nfrom mmdet3d.datasets import KittiDataset\n\n\ndef _generate_kitti_dataset_config():\n    data_root = 'tests/data/kitti'\n    ann_file = 'tests/data/kitti/kitti_infos_train.pkl'\n    classes = ['Pedestrian', 'Cyclist', 'Car']\n    pts_prefix = 'velodyne_reduced'\n    pipeline = [\n        dict(\n            type='LoadPointsFromFile',\n            coord_type='LIDAR',\n            load_dim=4,\n            use_dim=4,\n            file_client_args=dict(backend='disk')),\n        dict(\n            type='MultiScaleFlipAug3D',\n            img_scale=(1333, 800),\n            pts_scale_ratio=1,\n            flip=False,\n            transforms=[\n                dict(\n                    type='GlobalRotScaleTrans',\n                    rot_range=[0, 0],\n                    scale_ratio_range=[1.0, 1.0],\n                    translation_std=[0, 0, 0]),\n                dict(type='RandomFlip3D'),\n                dict(\n                    type='PointsRangeFilter',\n                    point_cloud_range=[0, -40, -3, 70.4, 40, 1]),\n                dict(\n                    type='DefaultFormatBundle3D',\n                    class_names=['Pedestrian', 'Cyclist', 'Car'],\n                    with_label=False),\n                dict(type='Collect3D', keys=['points'])\n            ])\n    ]\n    modality = dict(use_lidar=True, use_camera=False)\n    split = 'training'\n    return data_root, ann_file, classes, pts_prefix, pipeline, modality, split\n\n\ndef test_getitem():\n    np.random.seed(0)\n    data_root, ann_file, classes, pts_prefix,\\\n        pipeline, modality, split = _generate_kitti_dataset_config()\n    pipeline = [\n        dict(\n            type='LoadPointsFromFile',\n            coord_type='LIDAR',\n            load_dim=4,\n            use_dim=4,\n            file_client_args=dict(backend='disk')),\n        dict(\n            type='LoadAnnotations3D',\n            with_bbox_3d=True,\n            with_label_3d=True,\n            file_client_args=dict(backend='disk')),\n        dict(\n            type='ObjectSample',\n            db_sampler=dict(\n                data_root='tests/data/kitti/',\n                info_path='tests/data/kitti/kitti_dbinfos_train.pkl',\n                rate=1.0,\n                prepare=dict(\n                    filter_by_difficulty=[-1],\n                    filter_by_min_points=dict(Pedestrian=10)),\n                classes=['Pedestrian', 'Cyclist', 'Car'],\n                sample_groups=dict(Pedestrian=6))),\n        dict(\n            type='ObjectNoise',\n            num_try=100,\n            translation_std=[1.0, 1.0, 0.5],\n            global_rot_range=[0.0, 0.0],\n            rot_range=[-0.78539816, 0.78539816]),\n        dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n        dict(\n            type='GlobalRotScaleTrans',\n            rot_range=[-0.78539816, 0.78539816],\n            scale_ratio_range=[0.95, 1.05]),\n        dict(\n            type='PointsRangeFilter',\n            point_cloud_range=[0, -40, -3, 70.4, 40, 1]),\n        dict(\n            type='ObjectRangeFilter',\n            point_cloud_range=[0, -40, -3, 70.4, 40, 1]),\n        dict(type='PointShuffle'),\n        dict(\n            type='DefaultFormatBundle3D',\n            class_names=['Pedestrian', 'Cyclist', 'Car']),\n        dict(\n            type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n    ]\n    self = KittiDataset(data_root, ann_file, split, pts_prefix, pipeline,\n                        classes, modality)\n    data = self[0]\n    points = data['points']._data\n    gt_bboxes_3d = data['gt_bboxes_3d']._data\n    gt_labels_3d = data['gt_labels_3d']._data\n    expected_gt_bboxes_3d = torch.tensor(\n        [[9.5081, -5.2269, -1.1370, 0.4915, 1.2288, 1.9353, -2.7136]])\n    expected_gt_labels_3d = torch.tensor([0])\n    assert points.shape == (780, 4)\n    assert torch.allclose(\n        gt_bboxes_3d.tensor, expected_gt_bboxes_3d, atol=1e-4)\n    assert torch.all(gt_labels_3d == expected_gt_labels_3d)\n\n\ndef test_evaluate():\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and torch+cuda')\n    data_root, ann_file, classes, pts_prefix,\\\n        pipeline, modality, split = _generate_kitti_dataset_config()\n    self = KittiDataset(data_root, ann_file, split, pts_prefix, pipeline,\n                        classes, modality)\n    boxes_3d = LiDARInstance3DBoxes(\n        torch.tensor(\n            [[8.7314, -1.8559, -1.5997, 0.4800, 1.2000, 1.8900, 0.0100]]))\n    labels_3d = torch.tensor([\n        0,\n    ])\n    scores_3d = torch.tensor([0.5])\n    metric = ['mAP']\n    result = dict(boxes_3d=boxes_3d, labels_3d=labels_3d, scores_3d=scores_3d)\n    ap_dict = self.evaluate([result], metric)\n    assert np.isclose(ap_dict['KITTI/Overall_3D_easy'], 3.0303030303030307)\n    assert np.isclose(ap_dict['KITTI/Overall_3D_moderate'], 3.0303030303030307)\n    assert np.isclose(ap_dict['KITTI/Overall_3D_hard'], 3.0303030303030307)\n\n\ndef test_show():\n    import mmcv\n    import tempfile\n    from os import path as osp\n\n    from mmdet3d.core.bbox import LiDARInstance3DBoxes\n    temp_dir = tempfile.mkdtemp()\n    data_root, ann_file, classes, pts_prefix,\\\n        pipeline, modality, split = _generate_kitti_dataset_config()\n    kitti_dataset = KittiDataset(\n        data_root, ann_file, split=split, modality=modality, pipeline=pipeline)\n    boxes_3d = LiDARInstance3DBoxes(\n        torch.tensor(\n            [[46.1218, -4.6496, -0.9275, 0.5316, 1.4442, 1.7450, 1.1749],\n             [33.3189, 0.1981, 0.3136, 0.5656, 1.2301, 1.7985, 1.5723],\n             [46.1366, -4.6404, -0.9510, 0.5162, 1.6501, 1.7540, 1.3778],\n             [33.2646, 0.2297, 0.3446, 0.5746, 1.3365, 1.7947, 1.5430],\n             [58.9079, 16.6272, -1.5829, 1.5656, 3.9313, 1.4899, 1.5505]]))\n    scores_3d = torch.tensor([0.1815, 0.1663, 0.5792, 0.2194, 0.2780])\n    labels_3d = torch.tensor([0, 0, 1, 1, 2])\n    result = dict(boxes_3d=boxes_3d, scores_3d=scores_3d, labels_3d=labels_3d)\n    results = [result]\n    kitti_dataset.show(results, temp_dir, show=False)\n    pts_file_path = osp.join(temp_dir, '000000', '000000_points.obj')\n    gt_file_path = osp.join(temp_dir, '000000', '000000_gt.ply')\n    pred_file_path = osp.join(temp_dir, '000000', '000000_pred.ply')\n    mmcv.check_file_exist(pts_file_path)\n    mmcv.check_file_exist(gt_file_path)\n    mmcv.check_file_exist(pred_file_path)\n\n\ndef test_format_results():\n    from mmdet3d.core.bbox import LiDARInstance3DBoxes\n    data_root, ann_file, classes, pts_prefix,\\\n        pipeline, modality, split = _generate_kitti_dataset_config()\n    self = KittiDataset(data_root, ann_file, split, pts_prefix, pipeline,\n                        classes, modality)\n    boxes_3d = LiDARInstance3DBoxes(\n        torch.tensor(\n            [[8.7314, -1.8559, -1.5997, 0.4800, 1.2000, 1.8900, 0.0100]]))\n    labels_3d = torch.tensor([\n        0,\n    ])\n    scores_3d = torch.tensor([0.5])\n    result = dict(boxes_3d=boxes_3d, labels_3d=labels_3d, scores_3d=scores_3d)\n    results = [result]\n    result_files, _ = self.format_results(results)\n    expected_name = np.array(['Pedestrian'])\n    expected_truncated = np.array([0.])\n    expected_occluded = np.array([0])\n    expected_alpha = np.array([-3.3410306])\n    expected_bbox = np.array([[710.443, 144.00221, 820.29114, 307.58667]])\n    expected_dimensions = np.array([[1.2, 1.89, 0.48]])\n    expected_location = np.array([[1.8399826, 1.4700007, 8.410018]])\n    expected_rotation_y = np.array([-3.1315928])\n    expected_score = np.array([0.5])\n    expected_sample_idx = np.array([0])\n    assert np.all(result_files[0]['name'] == expected_name)\n    assert np.allclose(result_files[0]['truncated'], expected_truncated)\n    assert np.all(result_files[0]['occluded'] == expected_occluded)\n    assert np.allclose(result_files[0]['alpha'], expected_alpha)\n    assert np.allclose(result_files[0]['bbox'], expected_bbox)\n    assert np.allclose(result_files[0]['dimensions'], expected_dimensions)\n    assert np.allclose(result_files[0]['location'], expected_location)\n    assert np.allclose(result_files[0]['rotation_y'], expected_rotation_y)\n    assert np.allclose(result_files[0]['score'], expected_score)\n    assert np.allclose(result_files[0]['sample_idx'], expected_sample_idx)\n\n\ndef test_bbox2result_kitti():\n    data_root, ann_file, classes, pts_prefix,\\\n        pipeline, modality, split = _generate_kitti_dataset_config()\n    self = KittiDataset(data_root, ann_file, split, pts_prefix, pipeline,\n                        classes, modality)\n    boxes_3d = LiDARInstance3DBoxes(\n        torch.tensor(\n            [[8.7314, -1.8559, -1.5997, 0.4800, 1.2000, 1.8900, 0.0100]]))\n    labels_3d = torch.tensor([\n        0,\n    ])\n    scores_3d = torch.tensor([0.5])\n    result = dict(boxes_3d=boxes_3d, labels_3d=labels_3d, scores_3d=scores_3d)\n    results = [result]\n    temp_kitti_result_dir = tempfile.mkdtemp()\n    det_annos = self.bbox2result_kitti(\n        results, classes, submission_prefix=temp_kitti_result_dir)\n    expected_file_path = os.path.join(temp_kitti_result_dir, '000000.txt')\n    expected_name = np.array(['Pedestrian'])\n    expected_dimensions = np.array([1.2000, 1.8900, 0.4800])\n    expected_rotation_y = np.array([0.0100]) - np.pi\n    expected_score = np.array([0.5])\n    assert np.all(det_annos[0]['name'] == expected_name)\n    assert np.allclose(det_annos[0]['rotation_y'], expected_rotation_y)\n    assert np.allclose(det_annos[0]['score'], expected_score)\n    assert np.allclose(det_annos[0]['dimensions'], expected_dimensions)\n    assert os.path.exists(expected_file_path)\n    os.remove(expected_file_path)\n    os.removedirs(temp_kitti_result_dir)\n\n    temp_kitti_result_dir = tempfile.mkdtemp()\n    boxes_3d = LiDARInstance3DBoxes(torch.tensor([]))\n    labels_3d = torch.tensor([])\n    scores_3d = torch.tensor([])\n    empty_result = dict(\n        boxes_3d=boxes_3d, labels_3d=labels_3d, scores_3d=scores_3d)\n    results = [empty_result]\n    det_annos = self.bbox2result_kitti(\n        results, classes, submission_prefix=temp_kitti_result_dir)\n    expected_file_path = os.path.join(temp_kitti_result_dir, '000000.txt')\n    assert os.path.exists(expected_file_path)\n    os.remove(expected_file_path)\n    os.removedirs(temp_kitti_result_dir)\n\n\ndef test_bbox2result_kitti2d():\n    data_root, ann_file, classes, pts_prefix,\\\n        pipeline, modality, split = _generate_kitti_dataset_config()\n    self = KittiDataset(data_root, ann_file, split, pts_prefix, pipeline,\n                        classes, modality)\n    bboxes = np.array([[[46.1218, -4.6496, -0.9275, 0.5316, 0.5],\n                        [33.3189, 0.1981, 0.3136, 0.5656, 0.5]],\n                       [[46.1366, -4.6404, -0.9510, 0.5162, 0.5],\n                        [33.2646, 0.2297, 0.3446, 0.5746, 0.5]]])\n    det_annos = self.bbox2result_kitti2d([bboxes], classes)\n    expected_name = np.array(\n        ['Pedestrian', 'Pedestrian', 'Cyclist', 'Cyclist'])\n    expected_bbox = np.array([[46.1218, -4.6496, -0.9275, 0.5316],\n                              [33.3189, 0.1981, 0.3136, 0.5656],\n                              [46.1366, -4.6404, -0.951, 0.5162],\n                              [33.2646, 0.2297, 0.3446, 0.5746]])\n    expected_score = np.array([0.5, 0.5, 0.5, 0.5])\n    assert np.all(det_annos[0]['name'] == expected_name)\n    assert np.allclose(det_annos[0]['bbox'], expected_bbox)\n    assert np.allclose(det_annos[0]['score'], expected_score)\n"
  },
  {
    "path": "tests/test_data/test_datasets/test_lyft_dataset.py",
    "content": "import mmcv\nimport numpy as np\nimport torch\n\nfrom mmdet3d.datasets import LyftDataset\n\n\ndef test_getitem():\n    np.random.seed(0)\n    torch.manual_seed(0)\n    root_path = './tests/data/lyft'\n    ann_file = './tests/data/lyft/lyft_infos.pkl'\n    class_names = ('car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle',\n                   'motorcycle', 'bicycle', 'pedestrian', 'animal')\n    point_cloud_range = [-80, -80, -10, 80, 80, 10]\n    pipelines = [\n        dict(\n            type='LoadPointsFromFile',\n            coord_type='LIDAR',\n            load_dim=5,\n            use_dim=5,\n            file_client_args=dict(backend='disk')),\n        dict(\n            type='LoadPointsFromMultiSweeps',\n            sweeps_num=2,\n            file_client_args=dict(backend='disk')),\n        dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n        dict(\n            type='GlobalRotScaleTrans',\n            rot_range=[-0.523599, 0.523599],\n            scale_ratio_range=[0.85, 1.15],\n            translation_std=[0, 0, 0]),\n        dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n        dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n        dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n        dict(type='PointShuffle'),\n        dict(type='DefaultFormatBundle3D', class_names=class_names),\n        dict(\n            type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n    ]\n    lyft_dataset = LyftDataset(ann_file, pipelines, root_path)\n    data = lyft_dataset[0]\n    points = data['points']._data\n    gt_bboxes_3d = data['gt_bboxes_3d']._data\n    gt_labels_3d = data['gt_labels_3d']._data\n    pts_filename = data['img_metas']._data['pts_filename']\n    pcd_horizontal_flip = data['img_metas']._data['pcd_horizontal_flip']\n    pcd_scale_factor = data['img_metas']._data['pcd_scale_factor']\n    pcd_rotation = data['img_metas']._data['pcd_rotation']\n    sample_idx = data['img_metas']._data['sample_idx']\n    pcd_rotation_expected = np.array([[0.99869376, -0.05109515, 0.],\n                                      [0.05109515, 0.99869376, 0.],\n                                      [0., 0., 1.]])\n    assert pts_filename == \\\n        'tests/data/lyft/lidar/host-a017_lidar1_1236118886901125926.bin'\n    assert pcd_horizontal_flip is True\n    assert abs(pcd_scale_factor - 1.0645568099117257) < 1e-5\n    assert np.allclose(pcd_rotation, pcd_rotation_expected, 1e-3)\n    assert sample_idx == \\\n        'b98a05255ba2632e957884758cb31f0e6fcc8d3cd6ee76b6d0ba55b72f08fc54'\n    expected_points = torch.tensor([[61.4785, -3.7393, 6.7699, 0.4001],\n                                    [47.7904, -3.9887, 6.0926, 0.0000],\n                                    [52.5683, -4.2178, 6.7179, 0.0000],\n                                    [52.4867, -4.0315, 6.7057, 0.0000],\n                                    [59.8372, -1.7366, 6.5864, 0.4001],\n                                    [53.0842, -3.7064, 6.7811, 0.0000],\n                                    [60.5549, -3.4978, 6.6578, 0.4001],\n                                    [59.1695, -1.2910, 7.0296, 0.2000],\n                                    [53.0702, -3.8868, 6.7807, 0.0000],\n                                    [47.9579, -4.1648, 5.6219, 0.2000],\n                                    [59.8226, -1.5522, 6.5867, 0.4001],\n                                    [61.2858, -4.2254, 7.3089, 0.2000],\n                                    [49.9896, -4.5202, 5.8823, 0.2000],\n                                    [61.4597, -4.6402, 7.3340, 0.2000],\n                                    [59.8244, -1.3499, 6.5895, 0.4001]])\n    expected_gt_bboxes_3d = torch.tensor(\n        [[63.2257, 17.5206, -0.6307, 2.0109, 5.1652, 1.9471, -1.5868],\n         [-25.3804, 27.4598, -2.3297, 2.7412, 8.4792, 3.4343, -1.5939],\n         [-15.2098, -7.0109, -2.2566, 0.7931, 0.8410, 1.7916, 1.5090]])\n    expected_gt_labels = np.array([0, 4, 7])\n    original_classes = lyft_dataset.CLASSES\n\n    assert torch.allclose(points, expected_points, 1e-2)\n    assert torch.allclose(gt_bboxes_3d.tensor, expected_gt_bboxes_3d, 1e-3)\n    assert np.all(gt_labels_3d.numpy() == expected_gt_labels)\n    assert original_classes == class_names\n\n    lyft_dataset = LyftDataset(\n        ann_file, None, root_path, classes=['car', 'pedestrian'])\n    assert lyft_dataset.CLASSES != original_classes\n    assert lyft_dataset.CLASSES == ['car', 'pedestrian']\n\n    lyft_dataset = LyftDataset(\n        ann_file, None, root_path, classes=('car', 'pedestrian'))\n    assert lyft_dataset.CLASSES != original_classes\n    assert lyft_dataset.CLASSES == ('car', 'pedestrian')\n\n    import tempfile\n    tmp_file = tempfile.NamedTemporaryFile()\n    with open(tmp_file.name, 'w') as f:\n        f.write('car\\npedestrian\\n')\n\n    lyft_dataset = LyftDataset(\n        ann_file, None, root_path, classes=tmp_file.name)\n    assert lyft_dataset.CLASSES != original_classes\n    assert lyft_dataset.CLASSES == ['car', 'pedestrian']\n\n\ndef test_evaluate():\n    root_path = './tests/data/lyft'\n    ann_file = './tests/data/lyft/lyft_infos_val.pkl'\n    lyft_dataset = LyftDataset(ann_file, None, root_path)\n    results = mmcv.load('./tests/data/lyft/sample_results.pkl')\n    ap_dict = lyft_dataset.evaluate(results, 'bbox')\n    car_precision = ap_dict['pts_bbox_Lyft/car_AP']\n    assert car_precision == 0.6\n"
  },
  {
    "path": "tests/test_data/test_datasets/test_nuscene_dataset.py",
    "content": "import numpy as np\n\nfrom mmdet3d.datasets import NuScenesDataset\n\n\ndef test_getitem():\n    np.random.seed(0)\n    point_cloud_range = [-50, -50, -5, 50, 50, 3]\n    file_client_args = dict(backend='disk')\n    class_names = [\n        'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',\n        'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'\n    ]\n    pipeline = [\n        dict(\n            type='LoadPointsFromFile',\n            coord_type='LIDAR',\n            load_dim=5,\n            use_dim=5,\n            file_client_args=file_client_args),\n        dict(\n            type='LoadPointsFromMultiSweeps',\n            sweeps_num=2,\n            file_client_args=file_client_args),\n        dict(\n            type='MultiScaleFlipAug3D',\n            img_scale=(1333, 800),\n            pts_scale_ratio=1,\n            flip=False,\n            transforms=[\n                dict(\n                    type='GlobalRotScaleTrans',\n                    rot_range=[0, 0],\n                    scale_ratio_range=[1., 1.],\n                    translation_std=[0, 0, 0]),\n                dict(type='RandomFlip3D'),\n                dict(\n                    type='PointsRangeFilter',\n                    point_cloud_range=point_cloud_range),\n                dict(\n                    type='DefaultFormatBundle3D',\n                    class_names=class_names,\n                    with_label=False),\n                dict(type='Collect3D', keys=['points'])\n            ])\n    ]\n\n    nus_dataset = NuScenesDataset(\n        'tests/data/nuscenes/nus_info.pkl',\n        pipeline,\n        'tests/data/nuscenes',\n        test_mode=True)\n    data = nus_dataset[0]\n    assert data['img_metas'][0].data['flip'] is False\n    assert data['img_metas'][0].data['pcd_horizontal_flip'] is False\n    assert data['points'][0]._data.shape == (100, 4)\n\n    data = nus_dataset[1]\n    assert data['img_metas'][0].data['flip'] is False\n    assert data['img_metas'][0].data['pcd_horizontal_flip'] is False\n    assert data['points'][0]._data.shape == (597, 4)\n"
  },
  {
    "path": "tests/test_data/test_datasets/test_scannet_dataset.py",
    "content": "import numpy as np\nimport pytest\nimport torch\n\nfrom mmdet3d.datasets import ScanNetDataset\n\n\ndef test_getitem():\n    np.random.seed(0)\n    root_path = './tests/data/scannet/'\n    ann_file = './tests/data/scannet/scannet_infos.pkl'\n    class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door',\n                   'window', 'bookshelf', 'picture', 'counter', 'desk',\n                   'curtain', 'refrigerator', 'showercurtrain', 'toilet',\n                   'sink', 'bathtub', 'garbagebin')\n    pipelines = [\n        dict(\n            type='LoadPointsFromFile',\n            coord_type='DEPTH',\n            shift_height=True,\n            load_dim=6,\n            use_dim=[0, 1, 2]),\n        dict(\n            type='LoadAnnotations3D',\n            with_bbox_3d=True,\n            with_label_3d=True,\n            with_mask_3d=True,\n            with_seg_3d=True),\n        dict(type='IndoorPointSample', num_points=5),\n        dict(\n            type='RandomFlip3D',\n            sync_2d=False,\n            flip_ratio_bev_horizontal=1.0,\n            flip_ratio_bev_vertical=1.0),\n        dict(\n            type='GlobalRotScaleTrans',\n            rot_range=[-0.087266, 0.087266],\n            scale_ratio_range=[1.0, 1.0],\n            shift_height=True),\n        dict(type='DefaultFormatBundle3D', class_names=class_names),\n        dict(\n            type='Collect3D',\n            keys=[\n                'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',\n                'pts_instance_mask'\n            ],\n            meta_keys=['file_name', 'sample_idx', 'pcd_rotation']),\n    ]\n\n    scannet_dataset = ScanNetDataset(root_path, ann_file, pipelines)\n    data = scannet_dataset[0]\n    points = data['points']._data\n    gt_bboxes_3d = data['gt_bboxes_3d']._data\n    gt_labels = data['gt_labels_3d']._data\n    pts_semantic_mask = data['pts_semantic_mask']._data\n    pts_instance_mask = data['pts_instance_mask']._data\n    file_name = data['img_metas']._data['file_name']\n    pcd_rotation = data['img_metas']._data['pcd_rotation']\n    sample_idx = data['img_metas']._data['sample_idx']\n    expected_rotation = np.array([[0.99654, 0.08311407, 0.],\n                                  [-0.08311407, 0.99654, 0.], [0., 0., 1.]])\n    assert file_name == './tests/data/scannet/points/scene0000_00.bin'\n    assert np.allclose(pcd_rotation, expected_rotation, 1e-3)\n    assert sample_idx == 'scene0000_00'\n    expected_points = torch.tensor([[-2.7231, -2.2068, 2.3543, 2.3895],\n                                    [-0.4065, -3.4857, 2.1330, 2.1682],\n                                    [-1.4578, 1.3510, -0.0441, -0.0089],\n                                    [2.2428, -1.1323, -0.0288, 0.0064],\n                                    [0.7052, -2.9752, 1.5560, 1.5912]])\n    expected_gt_bboxes_3d = torch.tensor(\n        [[-1.1835, -3.6317, 1.5704, 1.7577, 0.3761, 0.5724, 0.0000],\n         [-3.1832, 3.2269, 1.1911, 0.6727, 0.2251, 0.6715, 0.0000],\n         [-0.9598, -2.2864, 0.0093, 0.7506, 2.5709, 1.2145, 0.0000],\n         [-2.6988, -2.7354, 0.8288, 0.7680, 1.8877, 0.2870, 0.0000],\n         [3.2989, 0.2885, -0.0090, 0.7600, 3.8814, 2.1603, 0.0000]])\n    expected_gt_labels = np.array([\n        6, 6, 4, 9, 11, 11, 10, 0, 15, 17, 17, 17, 3, 12, 4, 4, 14, 1, 0, 0, 0,\n        0, 0, 0, 5, 5, 5\n    ])\n    expected_pts_semantic_mask = np.array([3, 1, 2, 2, 15])\n    expected_pts_instance_mask = np.array([44, 22, 10, 10, 57])\n    original_classes = scannet_dataset.CLASSES\n\n    assert scannet_dataset.CLASSES == class_names\n    assert torch.allclose(points, expected_points, 1e-2)\n    assert gt_bboxes_3d.tensor[:5].shape == (5, 7)\n    assert torch.allclose(gt_bboxes_3d.tensor[:5], expected_gt_bboxes_3d, 1e-2)\n    assert np.all(gt_labels.numpy() == expected_gt_labels)\n    assert np.all(pts_semantic_mask.numpy() == expected_pts_semantic_mask)\n    assert np.all(pts_instance_mask.numpy() == expected_pts_instance_mask)\n    assert original_classes == class_names\n\n    scannet_dataset = ScanNetDataset(\n        root_path, ann_file, pipeline=None, classes=['cabinet', 'bed'])\n    assert scannet_dataset.CLASSES != original_classes\n    assert scannet_dataset.CLASSES == ['cabinet', 'bed']\n\n    scannet_dataset = ScanNetDataset(\n        root_path, ann_file, pipeline=None, classes=('cabinet', 'bed'))\n    assert scannet_dataset.CLASSES != original_classes\n    assert scannet_dataset.CLASSES == ('cabinet', 'bed')\n\n    # Test load classes from file\n    import tempfile\n    tmp_file = tempfile.NamedTemporaryFile()\n    with open(tmp_file.name, 'w') as f:\n        f.write('cabinet\\nbed\\n')\n\n    scannet_dataset = ScanNetDataset(\n        root_path, ann_file, pipeline=None, classes=tmp_file.name)\n    assert scannet_dataset.CLASSES != original_classes\n    assert scannet_dataset.CLASSES == ['cabinet', 'bed']\n\n\ndef test_evaluate():\n    if not torch.cuda.is_available():\n        pytest.skip()\n    from mmdet3d.core.bbox.structures import DepthInstance3DBoxes\n    root_path = './tests/data/scannet'\n    ann_file = './tests/data/scannet/scannet_infos.pkl'\n    scannet_dataset = ScanNetDataset(root_path, ann_file)\n    results = []\n    pred_boxes = dict()\n    pred_boxes['boxes_3d'] = DepthInstance3DBoxes(\n        torch.tensor([[\n            1.4813e+00, 3.5207e+00, 1.5704e+00, 1.7445e+00, 2.3196e-01,\n            5.7235e-01, 0.0000e+00\n        ],\n                      [\n                          2.9040e+00, -3.4803e+00, 1.1911e+00, 6.6078e-01,\n                          1.7072e-01, 6.7154e-01, 0.0000e+00\n                      ],\n                      [\n                          1.1466e+00, 2.1987e+00, 9.2576e-03, 5.4184e-01,\n                          2.5346e+00, 1.2145e+00, 0.0000e+00\n                      ],\n                      [\n                          2.9168e+00, 2.5016e+00, 8.2875e-01, 6.1697e-01,\n                          1.8428e+00, 2.8697e-01, 0.0000e+00\n                      ],\n                      [\n                          -3.3114e+00, -1.3351e-02, -8.9524e-03, 4.4082e-01,\n                          3.8582e+00, 2.1603e+00, 0.0000e+00\n                      ],\n                      [\n                          -2.0135e+00, -3.4857e+00, 9.3848e-01, 1.9911e+00,\n                          2.1603e-01, 1.2767e+00, 0.0000e+00\n                      ],\n                      [\n                          -2.1945e+00, -3.1402e+00, -3.8165e-02, 1.4801e+00,\n                          6.8676e-01, 1.0586e+00, 0.0000e+00\n                      ],\n                      [\n                          -2.7553e+00, 2.4055e+00, -2.9972e-02, 1.4764e+00,\n                          1.4927e+00, 2.3380e+00, 0.0000e+00\n                      ]]))\n    pred_boxes['labels_3d'] = torch.tensor([6, 6, 4, 9, 11, 11])\n    pred_boxes['scores_3d'] = torch.tensor([0.5, 1.0, 1.0, 1.0, 1.0, 0.5])\n    results.append(pred_boxes)\n    metric = [0.25, 0.5]\n    ret_dict = scannet_dataset.evaluate(results, metric)\n    assert abs(ret_dict['table_AP_0.25'] - 0.3333) < 0.01\n    assert abs(ret_dict['window_AP_0.25'] - 1.0) < 0.01\n    assert abs(ret_dict['counter_AP_0.25'] - 1.0) < 0.01\n    assert abs(ret_dict['curtain_AP_0.25'] - 1.0) < 0.01\n\n\ndef test_show():\n    import mmcv\n    import tempfile\n    from os import path as osp\n\n    from mmdet3d.core.bbox import DepthInstance3DBoxes\n    temp_dir = tempfile.mkdtemp()\n    root_path = './tests/data/scannet'\n    ann_file = './tests/data/scannet/scannet_infos.pkl'\n    scannet_dataset = ScanNetDataset(root_path, ann_file)\n    boxes_3d = DepthInstance3DBoxes(\n        torch.tensor([[\n            -2.4053e+00, 9.2295e-01, 8.0661e-02, 2.4054e+00, 2.1468e+00,\n            8.5990e-01, 0.0000e+00\n        ],\n                      [\n                          -1.9341e+00, -2.0741e+00, 3.0698e-03, 3.2206e-01,\n                          2.5322e-01, 3.5144e-01, 0.0000e+00\n                      ],\n                      [\n                          -3.6908e+00, 8.0684e-03, 2.6201e-01, 4.1515e-01,\n                          7.6489e-01, 5.3585e-01, 0.0000e+00\n                      ],\n                      [\n                          2.6332e+00, 8.5143e-01, -4.9964e-03, 3.0367e-01,\n                          1.3448e+00, 1.8329e+00, 0.0000e+00\n                      ],\n                      [\n                          2.0221e-02, 2.6153e+00, 1.5109e-02, 7.3335e-01,\n                          1.0429e+00, 1.0251e+00, 0.0000e+00\n                      ]]))\n    scores_3d = torch.tensor(\n        [1.2058e-04, 2.3012e-03, 6.2324e-06, 6.6139e-06, 6.7965e-05])\n    labels_3d = torch.tensor([0, 0, 0, 0, 0])\n    result = dict(boxes_3d=boxes_3d, scores_3d=scores_3d, labels_3d=labels_3d)\n    results = [result]\n    scannet_dataset.show(results, temp_dir, show=False)\n    pts_file_path = osp.join(temp_dir, 'scene0000_00',\n                             'scene0000_00_points.obj')\n    gt_file_path = osp.join(temp_dir, 'scene0000_00', 'scene0000_00_gt.ply')\n    pred_file_path = osp.join(temp_dir, 'scene0000_00',\n                              'scene0000_00_pred.ply')\n    mmcv.check_file_exist(pts_file_path)\n    mmcv.check_file_exist(gt_file_path)\n    mmcv.check_file_exist(pred_file_path)\n"
  },
  {
    "path": "tests/test_data/test_datasets/test_semantickitti_dataset.py",
    "content": "import numpy as np\n\nfrom mmdet3d.datasets import SemanticKITTIDataset\n\n\ndef test_getitem():\n    np.random.seed(0)\n    root_path = './tests/data/semantickitti/'\n    ann_file = './tests/data/semantickitti/semantickitti_infos.pkl'\n    class_names = ('unlabeled', 'car', 'bicycle', 'motorcycle', 'truck', 'bus',\n                   'person', 'bicyclist', 'motorcyclist', 'road', 'parking',\n                   'sidewalk', 'other-ground', 'building', 'fence',\n                   'vegetation', 'trunck', 'terrian', 'pole', 'traffic-sign')\n    pipelines = [\n        dict(\n            type='LoadPointsFromFile',\n            coord_type='LIDAR',\n            shift_height=True,\n            load_dim=4,\n            use_dim=[0, 1, 2]),\n        dict(\n            type='LoadAnnotations3D',\n            with_bbox_3d=False,\n            with_label_3d=False,\n            with_mask_3d=False,\n            with_seg_3d=True,\n            seg_3d_dtype=np.int32),\n        dict(\n            type='RandomFlip3D',\n            sync_2d=False,\n            flip_ratio_bev_horizontal=1.0,\n            flip_ratio_bev_vertical=1.0),\n        dict(\n            type='GlobalRotScaleTrans',\n            rot_range=[-0.087266, 0.087266],\n            scale_ratio_range=[1.0, 1.0],\n            shift_height=True),\n        dict(type='DefaultFormatBundle3D', class_names=class_names),\n        dict(\n            type='Collect3D',\n            keys=[\n                'points',\n                'pts_semantic_mask',\n            ],\n            meta_keys=['file_name', 'sample_idx', 'pcd_rotation']),\n    ]\n\n    semantickitti_dataset = SemanticKITTIDataset(root_path, ann_file,\n                                                 pipelines)\n    data = semantickitti_dataset[0]\n    assert data['points']._data.shape[0] == data[\n        'pts_semantic_mask']._data.shape[0]\n"
  },
  {
    "path": "tests/test_data/test_datasets/test_sunrgbd_dataset.py",
    "content": "import numpy as np\nimport pytest\nimport torch\n\nfrom mmdet3d.datasets import SUNRGBDDataset\n\n\ndef test_getitem():\n    np.random.seed(0)\n    root_path = './tests/data/sunrgbd'\n    ann_file = './tests/data/sunrgbd/sunrgbd_infos.pkl'\n    class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk',\n                   'dresser', 'night_stand', 'bookshelf', 'bathtub')\n    pipelines = [\n        dict(\n            type='LoadPointsFromFile',\n            coord_type='DEPTH',\n            shift_height=True,\n            load_dim=6,\n            use_dim=[0, 1, 2]),\n        dict(type='LoadAnnotations3D'),\n        dict(\n            type='RandomFlip3D',\n            sync_2d=False,\n            flip_ratio_bev_horizontal=0.5,\n        ),\n        dict(\n            type='GlobalRotScaleTrans',\n            rot_range=[-0.523599, 0.523599],\n            scale_ratio_range=[0.85, 1.15],\n            shift_height=True),\n        dict(type='IndoorPointSample', num_points=5),\n        dict(type='DefaultFormatBundle3D', class_names=class_names),\n        dict(\n            type='Collect3D',\n            keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'],\n            meta_keys=[\n                'file_name', 'pcd_horizontal_flip', 'sample_idx',\n                'pcd_scale_factor', 'pcd_rotation'\n            ]),\n    ]\n\n    sunrgbd_dataset = SUNRGBDDataset(root_path, ann_file, pipelines)\n    data = sunrgbd_dataset[0]\n    points = data['points']._data\n    gt_bboxes_3d = data['gt_bboxes_3d']._data\n    gt_labels_3d = data['gt_labels_3d']._data\n    file_name = data['img_metas']._data['file_name']\n    pcd_horizontal_flip = data['img_metas']._data['pcd_horizontal_flip']\n    pcd_scale_factor = data['img_metas']._data['pcd_scale_factor']\n    pcd_rotation = data['img_metas']._data['pcd_rotation']\n    sample_idx = data['img_metas']._data['sample_idx']\n    pcd_rotation_expected = np.array([[0.99889565, 0.04698427, 0.],\n                                      [-0.04698427, 0.99889565, 0.],\n                                      [0., 0., 1.]])\n    assert file_name == './tests/data/sunrgbd/points/000001.bin'\n    assert pcd_horizontal_flip is False\n    assert abs(pcd_scale_factor - 0.9770964398016714) < 1e-5\n    assert np.allclose(pcd_rotation, pcd_rotation_expected, 1e-3)\n    assert sample_idx == 1\n    expected_points = torch.tensor([[-0.9904, 1.2596, 0.1105, 0.0905],\n                                    [-0.9948, 1.2758, 0.0437, 0.0238],\n                                    [-0.9866, 1.2641, 0.0504, 0.0304],\n                                    [-0.9915, 1.2586, 0.1265, 0.1065],\n                                    [-0.9890, 1.2561, 0.1216, 0.1017]])\n    expected_gt_bboxes_3d = torch.tensor(\n        [[0.8308, 4.1168, -1.2035, 2.2493, 1.8444, 1.9245, 1.6486],\n         [2.3002, 4.8149, -1.2442, 0.5718, 0.8629, 0.9510, 1.6030],\n         [-1.1477, 1.8090, -1.1725, 0.6965, 1.5273, 2.0563, 0.0552]])\n    expected_gt_labels = np.array([0, 7, 6])\n    original_classes = sunrgbd_dataset.CLASSES\n\n    assert torch.allclose(points, expected_points, 1e-2)\n    assert torch.allclose(gt_bboxes_3d.tensor, expected_gt_bboxes_3d, 1e-3)\n    assert np.all(gt_labels_3d.numpy() == expected_gt_labels)\n    assert original_classes == class_names\n\n    SUNRGBD_dataset = SUNRGBDDataset(\n        root_path, ann_file, pipeline=None, classes=['bed', 'table'])\n    assert SUNRGBD_dataset.CLASSES != original_classes\n    assert SUNRGBD_dataset.CLASSES == ['bed', 'table']\n\n    SUNRGBD_dataset = SUNRGBDDataset(\n        root_path, ann_file, pipeline=None, classes=('bed', 'table'))\n    assert SUNRGBD_dataset.CLASSES != original_classes\n    assert SUNRGBD_dataset.CLASSES == ('bed', 'table')\n\n    import tempfile\n    tmp_file = tempfile.NamedTemporaryFile()\n    with open(tmp_file.name, 'w') as f:\n        f.write('bed\\ntable\\n')\n\n    SUNRGBD_dataset = SUNRGBDDataset(\n        root_path, ann_file, pipeline=None, classes=tmp_file.name)\n    assert SUNRGBD_dataset.CLASSES != original_classes\n    assert SUNRGBD_dataset.CLASSES == ['bed', 'table']\n\n\ndef test_evaluate():\n    if not torch.cuda.is_available():\n        pytest.skip()\n    from mmdet3d.core.bbox.structures import DepthInstance3DBoxes\n    root_path = './tests/data/sunrgbd'\n    ann_file = './tests/data/sunrgbd/sunrgbd_infos.pkl'\n    sunrgbd_dataset = SUNRGBDDataset(root_path, ann_file)\n    results = []\n    pred_boxes = dict()\n    pred_boxes['boxes_3d'] = DepthInstance3DBoxes(\n        torch.tensor(\n            [[1.0473, 4.1687, -1.2317, 2.3021, 1.8876, 1.9696, 1.6956],\n             [2.5831, 4.8117, -1.2733, 0.5852, 0.8832, 0.9733, 1.6500],\n             [-1.0864, 1.9045, -1.2000, 0.7128, 1.5631, 2.1045, 0.1022]]))\n    pred_boxes['labels_3d'] = torch.tensor([0, 7, 6])\n    pred_boxes['scores_3d'] = torch.tensor([0.5, 1.0, 1.0])\n    results.append(pred_boxes)\n    metric = [0.25, 0.5]\n    ap_dict = sunrgbd_dataset.evaluate(results, metric)\n    bed_precision_25 = ap_dict['bed_AP_0.25']\n    dresser_precision_25 = ap_dict['dresser_AP_0.25']\n    night_stand_precision_25 = ap_dict['night_stand_AP_0.25']\n    assert abs(bed_precision_25 - 1) < 0.01\n    assert abs(dresser_precision_25 - 1) < 0.01\n    assert abs(night_stand_precision_25 - 1) < 0.01\n\n\ndef test_show():\n    import mmcv\n    import tempfile\n    from os import path as osp\n\n    from mmdet3d.core.bbox import DepthInstance3DBoxes\n    temp_dir = tempfile.mkdtemp()\n    root_path = './tests/data/sunrgbd'\n    ann_file = './tests/data/sunrgbd/sunrgbd_infos.pkl'\n    sunrgbd_dataset = SUNRGBDDataset(root_path, ann_file)\n    boxes_3d = DepthInstance3DBoxes(\n        torch.tensor(\n            [[1.1500, 4.2614, -1.0669, 1.3219, 2.1593, 1.0267, 1.6473],\n             [-0.9583, 2.1916, -1.0881, 0.6213, 1.3022, 1.6275, -3.0720],\n             [2.5697, 4.8152, -1.1157, 0.5421, 0.7019, 0.7896, 1.6712],\n             [0.7283, 2.5448, -1.0356, 0.7691, 0.9056, 0.5771, 1.7121],\n             [-0.9860, 3.2413, -1.2349, 0.5110, 0.9940, 1.1245, 0.3295]]))\n    scores_3d = torch.tensor(\n        [1.5280e-01, 1.6682e-03, 6.2811e-04, 1.2860e-03, 9.4229e-06])\n    labels_3d = torch.tensor([0, 0, 0, 0, 0])\n    result = dict(boxes_3d=boxes_3d, scores_3d=scores_3d, labels_3d=labels_3d)\n    results = [result]\n    sunrgbd_dataset.show(results, temp_dir, show=False)\n    pts_file_path = osp.join(temp_dir, '000001', '000001_points.obj')\n    gt_file_path = osp.join(temp_dir, '000001', '000001_gt.ply')\n    pred_file_path = osp.join(temp_dir, '000001', '000001_pred.ply')\n    mmcv.check_file_exist(pts_file_path)\n    mmcv.check_file_exist(gt_file_path)\n    mmcv.check_file_exist(pred_file_path)\n"
  },
  {
    "path": "tests/test_data/test_pipelines/test_augmentations/test_data_augment_utils.py",
    "content": "import mmcv\nimport numpy as np\n\nfrom mmdet3d.datasets.pipelines.data_augment_utils import (\n    noise_per_object_v3_, points_transform_)\n\n\ndef test_noise_per_object_v3_():\n    np.random.seed(0)\n    points = np.fromfile(\n        './tests/data/kitti/training/velodyne_reduced/000000.bin',\n        np.float32).reshape(-1, 4)\n    annos = mmcv.load('./tests/data/kitti/kitti_infos_train.pkl')\n    info = annos[0]\n    annos = info['annos']\n    loc = annos['location']\n    dims = annos['dimensions']\n    rots = annos['rotation_y']\n    gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]],\n                                  axis=1).astype(np.float32)\n\n    noise_per_object_v3_(gt_boxes=gt_bboxes_3d, points=points)\n    expected_gt_bboxes_3d = np.array(\n        [[3.3430212, 2.1475432, 9.388738, 1.2, 1.89, 0.48, 0.05056486]])\n\n    assert points.shape == (800, 4)\n    assert np.allclose(gt_bboxes_3d, expected_gt_bboxes_3d)\n\n\ndef test_points_transform():\n    points = np.array([[46.5090, 6.1140, -0.7790, 0.0000],\n                       [42.9490, 6.4050, -0.7050, 0.0000],\n                       [42.9010, 6.5360, -0.7050, 0.0000],\n                       [46.1960, 6.0960, -1.0100, 0.0000],\n                       [43.3080, 6.2680, -0.9360, 0.0000]])\n    gt_boxes = np.array([[\n        1.5340e+01, 8.4691e+00, -1.6855e+00, 1.6400e+00, 3.7000e+00,\n        1.4900e+00, 3.1300e+00\n    ],\n                         [\n                             1.7999e+01, 8.2386e+00, -1.5802e+00, 1.5500e+00,\n                             4.0200e+00, 1.5200e+00, 3.1300e+00\n                         ],\n                         [\n                             2.9620e+01, 8.2617e+00, -1.6185e+00, 1.7800e+00,\n                             4.2500e+00, 1.9000e+00, -3.1200e+00\n                         ],\n                         [\n                             4.8218e+01, 7.8035e+00, -1.3790e+00, 1.6400e+00,\n                             3.7000e+00, 1.5200e+00, -1.0000e-02\n                         ],\n                         [\n                             3.3079e+01, -8.4817e+00, -1.3092e+00, 4.3000e-01,\n                             1.7000e+00, 1.6200e+00, -1.5700e+00\n                         ]])\n    point_masks = np.array([[False, False, False, False, False],\n                            [False, False, False, False, False],\n                            [False, False, False, False, False],\n                            [False, False, False, False, False],\n                            [False, False, False, False, False]])\n    loc_transforms = np.array([[-1.8635, -0.2774, -0.1774],\n                               [-1.0297, -1.0302, -0.3062],\n                               [1.6680, 0.2597, 0.0551],\n                               [0.2230, 0.7257, -0.0097],\n                               [-0.1403, 0.8300, 0.3431]])\n    rot_transforms = np.array([0.6888, -0.3858, 0.1910, -0.0044, -0.0036])\n    valid_mask = np.array([True, True, True, True, True])\n    points_transform_(points, gt_boxes[:, :3], point_masks, loc_transforms,\n                      rot_transforms, valid_mask)\n    assert points.shape == (5, 4)\n    assert gt_boxes.shape == (5, 7)\n"
  },
  {
    "path": "tests/test_data/test_pipelines/test_augmentations/test_test_augment_utils.py",
    "content": "import numpy as np\nimport torch\n\nfrom mmdet3d.core.points import DepthPoints\nfrom mmdet3d.datasets.pipelines import MultiScaleFlipAug3D\n\n\ndef test_multi_scale_flip_aug_3D():\n    np.random.seed(0)\n    transforms = [{\n        'type': 'GlobalRotScaleTrans',\n        'rot_range': [-0.1, 0.1],\n        'scale_ratio_range': [0.9, 1.1],\n        'translation_std': [0, 0, 0]\n    }, {\n        'type': 'RandomFlip3D',\n        'sync_2d': False,\n        'flip_ratio_bev_horizontal': 0.5\n    }, {\n        'type': 'IndoorPointSample',\n        'num_points': 5\n    }, {\n        'type':\n        'DefaultFormatBundle3D',\n        'class_names': ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk',\n                        'dresser', 'night_stand', 'bookshelf', 'bathtub'),\n        'with_label':\n        False\n    }, {\n        'type': 'Collect3D',\n        'keys': ['points']\n    }]\n    img_scale = (1333, 800)\n    pts_scale_ratio = 1\n    multi_scale_flip_aug_3D = MultiScaleFlipAug3D(transforms, img_scale,\n                                                  pts_scale_ratio)\n    pts_file_name = 'tests/data/sunrgbd/points/000001.bin'\n    sample_idx = 4\n    file_name = 'tests/data/sunrgbd/points/000001.bin'\n    bbox3d_fields = []\n    points = np.array([[0.20397437, 1.4267826, -1.0503972, 0.16195858],\n                       [-2.2095256, 3.3159535, -0.7706928, 0.4416629],\n                       [1.5090443, 3.2764456, -1.1913797, 0.02097607],\n                       [-1.373904, 3.8711405, 0.8524302, 2.064786],\n                       [-1.8139812, 3.538856, -1.0056694, 0.20668638]])\n    points = DepthPoints(points, points_dim=4, attribute_dims=dict(height=3))\n    results = dict(\n        points=points,\n        pts_file_name=pts_file_name,\n        sample_idx=sample_idx,\n        file_name=file_name,\n        bbox3d_fields=bbox3d_fields)\n    results = multi_scale_flip_aug_3D(results)\n    expected_points = torch.tensor(\n        [[-2.2095, 3.3160, -0.7707, 0.4417], [-1.3739, 3.8711, 0.8524, 2.0648],\n         [-1.8140, 3.5389, -1.0057, 0.2067], [0.2040, 1.4268, -1.0504, 0.1620],\n         [1.5090, 3.2764, -1.1914, 0.0210]],\n        dtype=torch.float32)\n\n    assert torch.allclose(\n        results['points'][0]._data, expected_points, atol=1e-4)\n"
  },
  {
    "path": "tests/test_data/test_pipelines/test_augmentations/test_transforms_3d.py",
    "content": "import mmcv\nimport numpy as np\nimport pytest\nimport torch\n\nfrom mmdet3d.core import Box3DMode, CameraInstance3DBoxes, LiDARInstance3DBoxes\nfrom mmdet3d.core.points import LiDARPoints\nfrom mmdet3d.datasets import (BackgroundPointsFilter, ObjectNoise,\n                              ObjectSample, RandomFlip3D,\n                              VoxelBasedPointSampler)\n\n\ndef test_remove_points_in_boxes():\n    points = np.array([[68.1370, 3.3580, 2.5160, 0.0000],\n                       [67.6970, 3.5500, 2.5010, 0.0000],\n                       [67.6490, 3.7600, 2.5000, 0.0000],\n                       [66.4140, 3.9010, 2.4590, 0.0000],\n                       [66.0120, 4.0850, 2.4460, 0.0000],\n                       [65.8340, 4.1780, 2.4400, 0.0000],\n                       [65.8410, 4.3860, 2.4400, 0.0000],\n                       [65.7450, 4.5870, 2.4380, 0.0000],\n                       [65.5510, 4.7800, 2.4320, 0.0000],\n                       [65.4860, 4.9820, 2.4300, 0.0000]])\n\n    boxes = np.array(\n        [[30.0285, 10.5110, -1.5304, 0.5100, 0.8700, 1.6000, 1.6400],\n         [7.8369, 1.6053, -1.5605, 0.5800, 1.2300, 1.8200, -3.1000],\n         [10.8740, -1.0827, -1.3310, 0.6000, 0.5200, 1.7100, 1.3500],\n         [14.9783, 2.2466, -1.4950, 0.6100, 0.7300, 1.5300, -1.9200],\n         [11.0656, 0.6195, -1.5202, 0.6600, 1.0100, 1.7600, -1.4600],\n         [10.5994, -7.9049, -1.4980, 0.5300, 1.9600, 1.6800, 1.5600],\n         [28.7068, -8.8244, -1.1485, 0.6500, 1.7900, 1.7500, 3.1200],\n         [20.2630, 5.1947, -1.4799, 0.7300, 1.7600, 1.7300, 1.5100],\n         [18.2496, 3.1887, -1.6109, 0.5600, 1.6800, 1.7100, 1.5600],\n         [7.7396, -4.3245, -1.5801, 0.5600, 1.7900, 1.8000, -0.8300]])\n    points = LiDARPoints(points, points_dim=4)\n    points = ObjectSample.remove_points_in_boxes(points, boxes)\n    assert points.tensor.numpy().shape == (10, 4)\n\n\ndef test_object_sample():\n    db_sampler = mmcv.ConfigDict({\n        'data_root': './tests/data/kitti/',\n        'info_path': './tests/data/kitti/kitti_dbinfos_train.pkl',\n        'rate': 1.0,\n        'prepare': {\n            'filter_by_difficulty': [-1],\n            'filter_by_min_points': {\n                'Pedestrian': 10\n            }\n        },\n        'classes': ['Pedestrian', 'Cyclist', 'Car'],\n        'sample_groups': {\n            'Pedestrian': 6\n        }\n    })\n    np.random.seed(0)\n    object_sample = ObjectSample(db_sampler)\n    points = np.fromfile(\n        './tests/data/kitti/training/velodyne_reduced/000000.bin',\n        np.float32).reshape(-1, 4)\n    annos = mmcv.load('./tests/data/kitti/kitti_infos_train.pkl')\n    info = annos[0]\n    rect = info['calib']['R0_rect'].astype(np.float32)\n    Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)\n    annos = info['annos']\n    loc = annos['location']\n    dims = annos['dimensions']\n    rots = annos['rotation_y']\n    gt_names = annos['name']\n\n    gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]],\n                                  axis=1).astype(np.float32)\n    gt_bboxes_3d = CameraInstance3DBoxes(gt_bboxes_3d).convert_to(\n        Box3DMode.LIDAR, np.linalg.inv(rect @ Trv2c))\n    CLASSES = ('Pedestrian', 'Cyclist', 'Car')\n    gt_labels = []\n    for cat in gt_names:\n        if cat in CLASSES:\n            gt_labels.append(CLASSES.index(cat))\n        else:\n            gt_labels.append(-1)\n    gt_labels = np.array(gt_labels, dtype=np.long)\n    points = LiDARPoints(points, points_dim=4)\n    input_dict = dict(\n        points=points, gt_bboxes_3d=gt_bboxes_3d, gt_labels_3d=gt_labels)\n    input_dict = object_sample(input_dict)\n    points = input_dict['points']\n    gt_bboxes_3d = input_dict['gt_bboxes_3d']\n    gt_labels_3d = input_dict['gt_labels_3d']\n    repr_str = repr(object_sample)\n    expected_repr_str = 'ObjectSample sample_2d=False, ' \\\n                        'data_root=./tests/data/kitti/, ' \\\n                        'info_path=./tests/data/kitti/kitti' \\\n                        '_dbinfos_train.pkl, rate=1.0, ' \\\n                        'prepare={\\'filter_by_difficulty\\': [-1], ' \\\n                        '\\'filter_by_min_points\\': {\\'Pedestrian\\': 10}}, ' \\\n                        'classes=[\\'Pedestrian\\', \\'Cyclist\\', \\'Car\\'], ' \\\n                        'sample_groups={\\'Pedestrian\\': 6}'\n    assert repr_str == expected_repr_str\n    assert points.tensor.numpy().shape == (800, 4)\n    assert gt_bboxes_3d.tensor.shape == (1, 7)\n    assert np.all(gt_labels_3d == [0])\n\n\ndef test_object_noise():\n    np.random.seed(0)\n    object_noise = ObjectNoise()\n    points = np.fromfile(\n        './tests/data/kitti/training/velodyne_reduced/000000.bin',\n        np.float32).reshape(-1, 4)\n    annos = mmcv.load('./tests/data/kitti/kitti_infos_train.pkl')\n    info = annos[0]\n    rect = info['calib']['R0_rect'].astype(np.float32)\n    Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)\n    annos = info['annos']\n    loc = annos['location']\n    dims = annos['dimensions']\n    rots = annos['rotation_y']\n    gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]],\n                                  axis=1).astype(np.float32)\n    gt_bboxes_3d = CameraInstance3DBoxes(gt_bboxes_3d).convert_to(\n        Box3DMode.LIDAR, np.linalg.inv(rect @ Trv2c))\n    points = LiDARPoints(points, points_dim=4)\n    input_dict = dict(points=points, gt_bboxes_3d=gt_bboxes_3d)\n    input_dict = object_noise(input_dict)\n    points = input_dict['points']\n    gt_bboxes_3d = input_dict['gt_bboxes_3d'].tensor\n    expected_gt_bboxes_3d = torch.tensor(\n        [[9.1724, -1.7559, -1.3550, 0.4800, 1.2000, 1.8900, 0.0505]])\n    repr_str = repr(object_noise)\n    expected_repr_str = 'ObjectNoise(num_try=100, ' \\\n                        'translation_std=[0.25, 0.25, 0.25], ' \\\n                        'global_rot_range=[0.0, 0.0], ' \\\n                        'rot_range=[-0.15707963267, 0.15707963267])'\n\n    assert repr_str == expected_repr_str\n    assert points.tensor.numpy().shape == (800, 4)\n    assert torch.allclose(gt_bboxes_3d, expected_gt_bboxes_3d, 1e-3)\n\n\ndef test_random_flip_3d():\n    random_flip_3d = RandomFlip3D(\n        flip_ratio_bev_horizontal=1.0, flip_ratio_bev_vertical=1.0)\n    points = np.array([[22.7035, 9.3901, -0.2848, 0.0000],\n                       [21.9826, 9.1766, -0.2698, 0.0000],\n                       [21.4329, 9.0209, -0.2578, 0.0000],\n                       [21.3068, 9.0205, -0.2558, 0.0000],\n                       [21.3400, 9.1305, -0.2578, 0.0000],\n                       [21.3291, 9.2099, -0.2588, 0.0000],\n                       [21.2759, 9.2599, -0.2578, 0.0000],\n                       [21.2686, 9.2982, -0.2588, 0.0000],\n                       [21.2334, 9.3607, -0.2588, 0.0000],\n                       [21.2179, 9.4372, -0.2598, 0.0000]])\n    bbox3d_fields = ['gt_bboxes_3d']\n    img_fields = []\n    box_type_3d = LiDARInstance3DBoxes\n    gt_bboxes_3d = LiDARInstance3DBoxes(\n        torch.tensor(\n            [[38.9229, 18.4417, -1.1459, 0.7100, 1.7600, 1.8600, -2.2652],\n             [12.7768, 0.5795, -2.2682, 0.5700, 0.9900, 1.7200, -2.5029],\n             [12.7557, 2.2996, -1.4869, 0.6100, 1.1100, 1.9000, -1.9390],\n             [10.6677, 0.8064, -1.5435, 0.7900, 0.9600, 1.7900, 1.0856],\n             [5.0903, 5.1004, -1.2694, 0.7100, 1.7000, 1.8300, -1.9136]]))\n    points = LiDARPoints(points, points_dim=4)\n    input_dict = dict(\n        points=points,\n        bbox3d_fields=bbox3d_fields,\n        box_type_3d=box_type_3d,\n        img_fields=img_fields,\n        gt_bboxes_3d=gt_bboxes_3d)\n    input_dict = random_flip_3d(input_dict)\n    points = input_dict['points'].tensor.numpy()\n    gt_bboxes_3d = input_dict['gt_bboxes_3d'].tensor\n    expected_points = np.array([[22.7035, -9.3901, -0.2848, 0.0000],\n                                [21.9826, -9.1766, -0.2698, 0.0000],\n                                [21.4329, -9.0209, -0.2578, 0.0000],\n                                [21.3068, -9.0205, -0.2558, 0.0000],\n                                [21.3400, -9.1305, -0.2578, 0.0000],\n                                [21.3291, -9.2099, -0.2588, 0.0000],\n                                [21.2759, -9.2599, -0.2578, 0.0000],\n                                [21.2686, -9.2982, -0.2588, 0.0000],\n                                [21.2334, -9.3607, -0.2588, 0.0000],\n                                [21.2179, -9.4372, -0.2598, 0.0000]])\n    expected_gt_bboxes_3d = torch.tensor(\n        [[38.9229, -18.4417, -1.1459, 0.7100, 1.7600, 1.8600, 5.4068],\n         [12.7768, -0.5795, -2.2682, 0.5700, 0.9900, 1.7200, 5.6445],\n         [12.7557, -2.2996, -1.4869, 0.6100, 1.1100, 1.9000, 5.0806],\n         [10.6677, -0.8064, -1.5435, 0.7900, 0.9600, 1.7900, 2.0560],\n         [5.0903, -5.1004, -1.2694, 0.7100, 1.7000, 1.8300, 5.0552]])\n    repr_str = repr(random_flip_3d)\n    expected_repr_str = 'RandomFlip3D(sync_2d=True,' \\\n                        'flip_ratio_bev_vertical=1.0)'\n    assert np.allclose(points, expected_points)\n    assert torch.allclose(gt_bboxes_3d, expected_gt_bboxes_3d)\n    assert repr_str == expected_repr_str\n\n\ndef test_background_points_filter():\n    np.random.seed(0)\n    background_points_filter = BackgroundPointsFilter((0.5, 2.0, 0.5))\n    points = np.fromfile(\n        './tests/data/kitti/training/velodyne_reduced/000000.bin',\n        np.float32).reshape(-1, 4)\n    orig_points = points.copy()\n    annos = mmcv.load('./tests/data/kitti/kitti_infos_train.pkl')\n    info = annos[0]\n    rect = info['calib']['R0_rect'].astype(np.float32)\n    Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)\n    annos = info['annos']\n    loc = annos['location']\n    dims = annos['dimensions']\n    rots = annos['rotation_y']\n    gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]],\n                                  axis=1).astype(np.float32)\n    gt_bboxes_3d = CameraInstance3DBoxes(gt_bboxes_3d).convert_to(\n        Box3DMode.LIDAR, np.linalg.inv(rect @ Trv2c))\n    extra_points = gt_bboxes_3d.corners.reshape(8, 3)[[1, 2, 5, 6], :]\n    extra_points[:, 2] += 0.1\n    extra_points = torch.cat([extra_points, extra_points.new_zeros(4, 1)], 1)\n    points = np.concatenate([points, extra_points.numpy()], 0)\n    points = LiDARPoints(points, points_dim=4)\n    input_dict = dict(points=points, gt_bboxes_3d=gt_bboxes_3d)\n    input_dict = background_points_filter(input_dict)\n\n    points = input_dict['points'].tensor.numpy()\n    repr_str = repr(background_points_filter)\n    expected_repr_str = 'BackgroundPointsFilter(bbox_enlarge_range=' \\\n                        '[[0.5, 2.0, 0.5]])'\n    assert repr_str == expected_repr_str\n    assert points.shape == (800, 4)\n    assert np.allclose(orig_points, points)\n\n    # test single float config\n    BackgroundPointsFilter(0.5)\n\n    # The length of bbox_enlarge_range should be 3\n    with pytest.raises(AssertionError):\n        BackgroundPointsFilter((0.5, 2.0))\n\n\ndef test_voxel_based_point_filter():\n    np.random.seed(0)\n    cur_sweep_cfg = dict(\n        voxel_size=[0.1, 0.1, 0.1],\n        point_cloud_range=[-50, -50, -4, 50, 50, 2],\n        max_num_points=1,\n        max_voxels=1024)\n    prev_sweep_cfg = dict(\n        voxel_size=[0.1, 0.1, 0.1],\n        point_cloud_range=[-50, -50, -4, 50, 50, 2],\n        max_num_points=1,\n        max_voxels=1024)\n    voxel_based_points_filter = VoxelBasedPointSampler(\n        cur_sweep_cfg, prev_sweep_cfg, time_dim=3)\n    points = np.stack([\n        np.random.rand(4096) * 120 - 60,\n        np.random.rand(4096) * 120 - 60,\n        np.random.rand(4096) * 10 - 6\n    ],\n                      axis=-1)\n\n    input_time = np.concatenate([np.zeros([2048, 1]), np.ones([2048, 1])], 0)\n    input_points = np.concatenate([points, input_time], 1)\n    input_points = LiDARPoints(input_points, points_dim=4)\n    input_dict = dict(\n        points=input_points, pts_mask_fields=[], pts_seg_fields=[])\n    input_dict = voxel_based_points_filter(input_dict)\n\n    points = input_dict['points']\n    repr_str = repr(voxel_based_points_filter)\n    expected_repr_str = \"\"\"VoxelBasedPointSampler(\n    num_cur_sweep=1024,\n    num_prev_sweep=1024,\n    time_dim=3,\n    cur_voxel_generator=\n        VoxelGenerator(voxel_size=[0.1 0.1 0.1],\n                       point_cloud_range=[-50.0, -50.0, -4.0, 50.0, 50.0, 2.0],\n                       max_num_points=1,\n                       max_voxels=1024,\n                       grid_size=[1000, 1000, 60]),\n    prev_voxel_generator=\n        VoxelGenerator(voxel_size=[0.1 0.1 0.1],\n                       point_cloud_range=[-50.0, -50.0, -4.0, 50.0, 50.0, 2.0],\n                       max_num_points=1,\n                       max_voxels=1024,\n                       grid_size=[1000, 1000, 60]))\"\"\"\n\n    assert repr_str == expected_repr_str\n    assert points.shape == (2048, 4)\n    assert (points.tensor[:, :3].min(0)[0].numpy() <\n            cur_sweep_cfg['point_cloud_range'][0:3]).sum() == 0\n    assert (points.tensor[:, :3].max(0)[0].numpy() >\n            cur_sweep_cfg['point_cloud_range'][3:6]).sum() == 0\n\n    # Test instance mask and semantic mask\n    input_dict = dict(points=input_points)\n    input_dict['pts_instance_mask'] = np.random.randint(0, 10, [4096])\n    input_dict['pts_semantic_mask'] = np.random.randint(0, 6, [4096])\n    input_dict['pts_mask_fields'] = ['pts_instance_mask']\n    input_dict['pts_seg_fields'] = ['pts_semantic_mask']\n\n    input_dict = voxel_based_points_filter(input_dict)\n    pts_instance_mask = input_dict['pts_instance_mask']\n    pts_semantic_mask = input_dict['pts_semantic_mask']\n    assert pts_instance_mask.shape == (2048, )\n    assert pts_semantic_mask.shape == (2048, )\n    assert pts_instance_mask.max() < 10\n    assert pts_instance_mask.min() >= 0\n    assert pts_semantic_mask.max() < 6\n    assert pts_semantic_mask.min() >= 0\n"
  },
  {
    "path": "tests/test_data/test_pipelines/test_indoor_pipeline.py",
    "content": "import mmcv\nimport numpy as np\nimport torch\nfrom os import path as osp\n\nfrom mmdet3d.core.bbox import DepthInstance3DBoxes\nfrom mmdet3d.datasets.pipelines import Compose\n\n\ndef test_scannet_pipeline():\n    class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door',\n                   'window', 'bookshelf', 'picture', 'counter', 'desk',\n                   'curtain', 'refrigerator', 'showercurtrain', 'toilet',\n                   'sink', 'bathtub', 'garbagebin')\n\n    np.random.seed(0)\n    pipelines = [\n        dict(\n            type='LoadPointsFromFile',\n            coord_type='DEPTH',\n            shift_height=True,\n            load_dim=6,\n            use_dim=[0, 1, 2]),\n        dict(\n            type='LoadAnnotations3D',\n            with_bbox_3d=True,\n            with_label_3d=True,\n            with_mask_3d=True,\n            with_seg_3d=True),\n        dict(type='IndoorPointSample', num_points=5),\n        dict(\n            type='RandomFlip3D',\n            sync_2d=False,\n            flip_ratio_bev_horizontal=1.0,\n            flip_ratio_bev_vertical=1.0),\n        dict(\n            type='GlobalRotScaleTrans',\n            rot_range=[-0.087266, 0.087266],\n            scale_ratio_range=[1.0, 1.0],\n            shift_height=True),\n        dict(type='DefaultFormatBundle3D', class_names=class_names),\n        dict(\n            type='Collect3D',\n            keys=[\n                'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',\n                'pts_instance_mask'\n            ]),\n    ]\n    pipeline = Compose(pipelines)\n    info = mmcv.load('./tests/data/scannet/scannet_infos.pkl')[0]\n    results = dict()\n    data_path = './tests/data/scannet'\n    results['pts_filename'] = osp.join(data_path, info['pts_path'])\n    if info['annos']['gt_num'] != 0:\n        scannet_gt_bboxes_3d = info['annos']['gt_boxes_upright_depth'].astype(\n            np.float32)\n        scannet_gt_labels_3d = info['annos']['class'].astype(np.long)\n    else:\n        scannet_gt_bboxes_3d = np.zeros((1, 6), dtype=np.float32)\n        scannet_gt_labels_3d = np.zeros((1, ), dtype=np.long)\n    results['ann_info'] = dict()\n    results['ann_info']['pts_instance_mask_path'] = osp.join(\n        data_path, info['pts_instance_mask_path'])\n    results['ann_info']['pts_semantic_mask_path'] = osp.join(\n        data_path, info['pts_semantic_mask_path'])\n    results['ann_info']['gt_bboxes_3d'] = DepthInstance3DBoxes(\n        scannet_gt_bboxes_3d, box_dim=6, with_yaw=False)\n    results['ann_info']['gt_labels_3d'] = scannet_gt_labels_3d\n\n    results['img_fields'] = []\n    results['bbox3d_fields'] = []\n    results['pts_mask_fields'] = []\n    results['pts_seg_fields'] = []\n\n    results = pipeline(results)\n\n    points = results['points']._data\n    gt_bboxes_3d = results['gt_bboxes_3d']._data\n    gt_labels_3d = results['gt_labels_3d']._data\n    pts_semantic_mask = results['pts_semantic_mask']._data\n    pts_instance_mask = results['pts_instance_mask']._data\n    expected_points = torch.tensor([[-2.7231, -2.2068, 2.3543, 2.3895],\n                                    [-0.4065, -3.4857, 2.1330, 2.1682],\n                                    [-1.4578, 1.3510, -0.0441, -0.0089],\n                                    [2.2428, -1.1323, -0.0288, 0.0064],\n                                    [0.7052, -2.9752, 1.5560, 1.5912]])\n    expected_gt_bboxes_3d = torch.tensor(\n        [[-1.1835, -3.6317, 1.8565, 1.7577, 0.3761, 0.5724, 0.0000],\n         [-3.1832, 3.2269, 1.5268, 0.6727, 0.2251, 0.6715, 0.0000],\n         [-0.9598, -2.2864, 0.6165, 0.7506, 2.5709, 1.2145, 0.0000],\n         [-2.6988, -2.7354, 0.9722, 0.7680, 1.8877, 0.2870, 0.0000],\n         [3.2989, 0.2885, 1.0712, 0.7600, 3.8814, 2.1603, 0.0000]])\n    expected_gt_labels_3d = np.array([\n        6, 6, 4, 9, 11, 11, 10, 0, 15, 17, 17, 17, 3, 12, 4, 4, 14, 1, 0, 0, 0,\n        0, 0, 0, 5, 5, 5\n    ])\n    expected_pts_semantic_mask = np.array([3, 1, 2, 2, 15])\n    expected_pts_instance_mask = np.array([44, 22, 10, 10, 57])\n    assert torch.allclose(points, expected_points, 1e-2)\n    assert torch.allclose(gt_bboxes_3d.tensor[:5, :], expected_gt_bboxes_3d,\n                          1e-2)\n    assert np.all(gt_labels_3d.numpy() == expected_gt_labels_3d)\n    assert np.all(pts_semantic_mask.numpy() == expected_pts_semantic_mask)\n    assert np.all(pts_instance_mask.numpy() == expected_pts_instance_mask)\n\n\ndef test_sunrgbd_pipeline():\n    class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk',\n                   'dresser', 'night_stand', 'bookshelf', 'bathtub')\n    np.random.seed(0)\n    pipelines = [\n        dict(\n            type='LoadPointsFromFile',\n            coord_type='DEPTH',\n            shift_height=True,\n            load_dim=6,\n            use_dim=[0, 1, 2]),\n        dict(type='LoadAnnotations3D'),\n        dict(\n            type='RandomFlip3D',\n            sync_2d=False,\n            flip_ratio_bev_horizontal=1.0,\n        ),\n        dict(\n            type='GlobalRotScaleTrans',\n            rot_range=[-0.523599, 0.523599],\n            scale_ratio_range=[0.85, 1.15],\n            shift_height=True),\n        dict(type='IndoorPointSample', num_points=5),\n        dict(type='DefaultFormatBundle3D', class_names=class_names),\n        dict(\n            type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),\n    ]\n    pipeline = Compose(pipelines)\n    results = dict()\n    info = mmcv.load('./tests/data/sunrgbd/sunrgbd_infos.pkl')[0]\n    data_path = './tests/data/sunrgbd'\n    results['pts_filename'] = osp.join(data_path, info['pts_path'])\n\n    if info['annos']['gt_num'] != 0:\n        gt_bboxes_3d = info['annos']['gt_boxes_upright_depth'].astype(\n            np.float32)\n        gt_labels_3d = info['annos']['class'].astype(np.long)\n    else:\n        gt_bboxes_3d = np.zeros((1, 7), dtype=np.float32)\n        gt_labels_3d = np.zeros((1, ), dtype=np.long)\n\n    # prepare input of pipeline\n    results['ann_info'] = dict()\n    results['ann_info']['gt_bboxes_3d'] = DepthInstance3DBoxes(gt_bboxes_3d)\n    results['ann_info']['gt_labels_3d'] = gt_labels_3d\n    results['img_fields'] = []\n    results['bbox3d_fields'] = []\n    results['pts_mask_fields'] = []\n    results['pts_seg_fields'] = []\n\n    results = pipeline(results)\n    points = results['points']._data\n    gt_bboxes_3d = results['gt_bboxes_3d']._data\n    gt_labels_3d = results['gt_labels_3d']._data\n    expected_points = torch.tensor([[0.8678, 1.3470, 0.1105, 0.0905],\n                                    [0.8707, 1.3635, 0.0437, 0.0238],\n                                    [0.8636, 1.3511, 0.0504, 0.0304],\n                                    [0.8690, 1.3461, 0.1265, 0.1065],\n                                    [0.8668, 1.3434, 0.1216, 0.1017]])\n    expected_gt_bboxes_3d = torch.tensor(\n        [[-1.2136, 4.0206, -0.2412, 2.2493, 1.8444, 1.9245, 1.3989],\n         [-2.7420, 4.5777, -0.7686, 0.5718, 0.8629, 0.9510, 1.4446],\n         [0.9729, 1.9087, -0.1443, 0.6965, 1.5273, 2.0563, 2.9924]])\n    expected_gt_labels_3d = np.array([0, 7, 6])\n    assert torch.allclose(gt_bboxes_3d.tensor, expected_gt_bboxes_3d, 1e-3)\n    assert np.allclose(gt_labels_3d.flatten(), expected_gt_labels_3d)\n    assert torch.allclose(points, expected_points, 1e-2)\n"
  },
  {
    "path": "tests/test_data/test_pipelines/test_indoor_sample.py",
    "content": "import numpy as np\n\nfrom mmdet3d.core.points import DepthPoints\nfrom mmdet3d.datasets.pipelines import IndoorPointSample\n\n\ndef test_indoor_sample():\n    np.random.seed(0)\n    scannet_sample_points = IndoorPointSample(5)\n    scannet_results = dict()\n    scannet_points = np.array([[1.0719866, -0.7870435, 0.8408122, 0.9196809],\n                               [1.103661, 0.81065744, 2.6616862, 2.7405548],\n                               [1.0276475, 1.5061463, 2.6174362, 2.6963048],\n                               [-0.9709588, 0.6750515, 0.93901765, 1.0178864],\n                               [1.0578915, 1.1693821, 0.87503505, 0.95390373],\n                               [0.05560996, -1.5688863, 1.2440368, 1.3229055],\n                               [-0.15731563, -1.7735453, 2.7535574, 2.832426],\n                               [1.1188195, -0.99211365, 2.5551798, 2.6340485],\n                               [-0.9186557, -1.7041215, 2.0562649, 2.1351335],\n                               [-1.0128691, -1.3394243, 0.040936, 0.1198047]])\n    scannet_results['points'] = DepthPoints(\n        scannet_points, points_dim=4, attribute_dims=dict(height=3))\n    scannet_pts_instance_mask = np.array(\n        [15, 12, 11, 38, 0, 18, 17, 12, 17, 0])\n    scannet_results['pts_instance_mask'] = scannet_pts_instance_mask\n    scannet_pts_semantic_mask = np.array([38, 1, 1, 40, 0, 40, 1, 1, 1, 0])\n    scannet_results['pts_semantic_mask'] = scannet_pts_semantic_mask\n    scannet_results = scannet_sample_points(scannet_results)\n    scannet_points_result = scannet_results['points'].tensor.numpy()\n    scannet_instance_labels_result = scannet_results['pts_instance_mask']\n    scannet_semantic_labels_result = scannet_results['pts_semantic_mask']\n    scannet_choices = np.array([2, 8, 4, 9, 1])\n    assert np.allclose(scannet_points[scannet_choices], scannet_points_result)\n    assert np.all(scannet_pts_instance_mask[scannet_choices] ==\n                  scannet_instance_labels_result)\n    assert np.all(scannet_pts_semantic_mask[scannet_choices] ==\n                  scannet_semantic_labels_result)\n\n    np.random.seed(0)\n    sunrgbd_sample_points = IndoorPointSample(5)\n    sunrgbd_results = dict()\n    sunrgbd_point_cloud = np.array(\n        [[-1.8135729e-01, 1.4695230e+00, -1.2780589e+00, 7.8938007e-03],\n         [1.2581362e-03, 2.0561588e+00, -1.0341064e+00, 2.5184631e-01],\n         [6.8236995e-01, 3.3611867e+00, -9.2599887e-01, 3.5995382e-01],\n         [-2.9432583e-01, 1.8714852e+00, -9.0929651e-01, 3.7665617e-01],\n         [-0.5024875, 1.8032674, -1.1403012, 0.14565146],\n         [-0.520559, 1.6324949, -0.9896099, 0.2963428],\n         [0.95929825, 2.9402404, -0.8746674, 0.41128528],\n         [-0.74624217, 1.5244724, -0.8678476, 0.41810507],\n         [0.56485355, 1.5747732, -0.804522, 0.4814307],\n         [-0.0913099, 1.3673826, -1.2800645, 0.00588822]])\n    sunrgbd_results['points'] = DepthPoints(\n        sunrgbd_point_cloud, points_dim=4, attribute_dims=dict(height=3))\n    sunrgbd_results = sunrgbd_sample_points(sunrgbd_results)\n    sunrgbd_choices = np.array([2, 8, 4, 9, 1])\n    sunrgbd_points_result = sunrgbd_results['points'].tensor.numpy()\n    repr_str = repr(sunrgbd_sample_points)\n    expected_repr_str = 'IndoorPointSample(num_points=5)'\n    assert repr_str == expected_repr_str\n    assert np.allclose(sunrgbd_point_cloud[sunrgbd_choices],\n                       sunrgbd_points_result)\n"
  },
  {
    "path": "tests/test_data/test_pipelines/test_loadings/test_load_points_from_multi_sweeps.py",
    "content": "import numpy as np\n\nfrom mmdet3d.core.points import LiDARPoints\nfrom mmdet3d.datasets.pipelines.loading import LoadPointsFromMultiSweeps\n\n\ndef test_load_points_from_multi_sweeps():\n    np.random.seed(0)\n\n    file_client_args = dict(backend='disk')\n    load_points_from_multi_sweeps_1 = LoadPointsFromMultiSweeps(\n        sweeps_num=9,\n        use_dim=[0, 1, 2, 3, 4],\n        file_client_args=file_client_args)\n\n    load_points_from_multi_sweeps_2 = LoadPointsFromMultiSweeps(\n        sweeps_num=9,\n        use_dim=[0, 1, 2, 3, 4],\n        file_client_args=file_client_args,\n        pad_empty_sweeps=True,\n        remove_close=True)\n\n    load_points_from_multi_sweeps_3 = LoadPointsFromMultiSweeps(\n        sweeps_num=9,\n        use_dim=[0, 1, 2, 3, 4],\n        file_client_args=file_client_args,\n        pad_empty_sweeps=True,\n        remove_close=True,\n        test_mode=True)\n\n    points = np.random.random([100, 5]) * 2\n    points = LiDARPoints(points, points_dim=5)\n    input_results = dict(points=points, sweeps=[], timestamp=None)\n    results = load_points_from_multi_sweeps_1(input_results)\n    assert results['points'].tensor.numpy().shape == (100, 5)\n\n    input_results = dict(points=points, sweeps=[], timestamp=None)\n    results = load_points_from_multi_sweeps_2(input_results)\n    assert results['points'].tensor.numpy().shape == (775, 5)\n\n    sensor2lidar_rotation = np.array(\n        [[9.99999967e-01, 1.13183067e-05, 2.56845368e-04],\n         [-1.12839618e-05, 9.99999991e-01, -1.33719456e-04],\n         [-2.56846879e-04, 1.33716553e-04, 9.99999958e-01]])\n    sensor2lidar_translation = np.array([-0.0009198, -0.03964854, -0.00190136])\n    sweep = dict(\n        data_path='tests/data/nuscenes/sweeps/LIDAR_TOP/'\n        'n008-2018-09-18-12-07-26-0400__LIDAR_TOP__'\n        '1537287083900561.pcd.bin',\n        sensor2lidar_rotation=sensor2lidar_rotation,\n        sensor2lidar_translation=sensor2lidar_translation,\n        timestamp=0)\n\n    input_results = dict(points=points, sweeps=[sweep], timestamp=1.0)\n    results = load_points_from_multi_sweeps_1(input_results)\n    assert results['points'].tensor.numpy().shape == (500, 5)\n\n    input_results = dict(points=points, sweeps=[sweep], timestamp=1.0)\n    results = load_points_from_multi_sweeps_2(input_results)\n    assert results['points'].tensor.numpy().shape == (451, 5)\n\n    input_results = dict(points=points, sweeps=[sweep] * 10, timestamp=1.0)\n    results = load_points_from_multi_sweeps_2(input_results)\n    assert results['points'].tensor.numpy().shape == (3259, 5)\n\n    input_results = dict(points=points, sweeps=[sweep] * 10, timestamp=1.0)\n    results = load_points_from_multi_sweeps_3(input_results)\n    assert results['points'].tensor.numpy().shape == (3259, 5)\n"
  },
  {
    "path": "tests/test_data/test_pipelines/test_loadings/test_loading.py",
    "content": "import mmcv\nimport numpy as np\nimport pytest\nfrom os import path as osp\n\nfrom mmdet3d.core.bbox import DepthInstance3DBoxes\nfrom mmdet3d.core.points import LiDARPoints\nfrom mmdet3d.datasets.pipelines import (LoadAnnotations3D, LoadPointsFromFile,\n                                        LoadPointsFromMultiSweeps)\n\n\ndef test_load_points_from_indoor_file():\n    sunrgbd_info = mmcv.load('./tests/data/sunrgbd/sunrgbd_infos.pkl')\n    sunrgbd_load_points_from_file = LoadPointsFromFile(\n        coord_type='DEPTH', load_dim=6, shift_height=True)\n    sunrgbd_results = dict()\n    data_path = './tests/data/sunrgbd'\n    sunrgbd_info = sunrgbd_info[0]\n    sunrgbd_results['pts_filename'] = osp.join(data_path,\n                                               sunrgbd_info['pts_path'])\n    sunrgbd_results = sunrgbd_load_points_from_file(sunrgbd_results)\n    sunrgbd_point_cloud = sunrgbd_results['points'].tensor.numpy()\n    assert sunrgbd_point_cloud.shape == (100, 4)\n\n    scannet_info = mmcv.load('./tests/data/scannet/scannet_infos.pkl')\n    scannet_load_data = LoadPointsFromFile(\n        coord_type='DEPTH', shift_height=True)\n    scannet_results = dict()\n    data_path = './tests/data/scannet'\n    scannet_info = scannet_info[0]\n\n    scannet_results['pts_filename'] = osp.join(data_path,\n                                               scannet_info['pts_path'])\n    scannet_results = scannet_load_data(scannet_results)\n    scannet_point_cloud = scannet_results['points'].tensor.numpy()\n    repr_str = repr(scannet_load_data)\n    expected_repr_str = 'LoadPointsFromFile(shift_height=True, ' \\\n                        'file_client_args={\\'backend\\': \\'disk\\'}), ' \\\n                        'load_dim=6, use_dim=[0, 1, 2])'\n    assert repr_str == expected_repr_str\n    assert scannet_point_cloud.shape == (100, 4)\n\n\ndef test_load_points_from_outdoor_file():\n    data_path = 'tests/data/kitti/a.bin'\n    load_points_from_file = LoadPointsFromFile(\n        coord_type='LIDAR', load_dim=4, use_dim=4)\n    results = dict()\n    results['pts_filename'] = data_path\n    results = load_points_from_file(results)\n    points = results['points'].tensor.numpy()\n    assert points.shape == (50, 4)\n    assert np.allclose(points.sum(), 2637.479)\n\n    load_points_from_file = LoadPointsFromFile(\n        coord_type='LIDAR', load_dim=4, use_dim=[0, 1, 2, 3])\n    results = dict()\n    results['pts_filename'] = data_path\n    results = load_points_from_file(results)\n    new_points = results['points'].tensor.numpy()\n    assert new_points.shape == (50, 4)\n    assert np.allclose(points.sum(), 2637.479)\n    np.equal(points, new_points)\n\n    with pytest.raises(AssertionError):\n        LoadPointsFromFile(coord_type='LIDAR', load_dim=4, use_dim=5)\n\n\ndef test_load_annotations3D():\n    # Test scannet LoadAnnotations3D\n    scannet_info = mmcv.load('./tests/data/scannet/scannet_infos.pkl')[0]\n    scannet_load_annotations3D = LoadAnnotations3D(\n        with_bbox_3d=True,\n        with_label_3d=True,\n        with_mask_3d=True,\n        with_seg_3d=True)\n    scannet_results = dict()\n    data_path = './tests/data/scannet'\n\n    if scannet_info['annos']['gt_num'] != 0:\n        scannet_gt_bboxes_3d = scannet_info['annos']['gt_boxes_upright_depth']\n        scannet_gt_labels_3d = scannet_info['annos']['class']\n    else:\n        scannet_gt_bboxes_3d = np.zeros((1, 6), dtype=np.float32)\n        scannet_gt_labels_3d = np.zeros((1, ))\n\n    # prepare input of loading pipeline\n    scannet_results['ann_info'] = dict()\n    scannet_results['ann_info']['pts_instance_mask_path'] = osp.join(\n        data_path, scannet_info['pts_instance_mask_path'])\n    scannet_results['ann_info']['pts_semantic_mask_path'] = osp.join(\n        data_path, scannet_info['pts_semantic_mask_path'])\n    scannet_results['ann_info']['gt_bboxes_3d'] = DepthInstance3DBoxes(\n        scannet_gt_bboxes_3d, box_dim=6, with_yaw=False)\n    scannet_results['ann_info']['gt_labels_3d'] = scannet_gt_labels_3d\n\n    scannet_results['bbox3d_fields'] = []\n    scannet_results['pts_mask_fields'] = []\n    scannet_results['pts_seg_fields'] = []\n\n    scannet_results = scannet_load_annotations3D(scannet_results)\n    scannet_gt_boxes = scannet_results['gt_bboxes_3d']\n    scannet_gt_labels = scannet_results['gt_labels_3d']\n\n    scannet_pts_instance_mask = scannet_results['pts_instance_mask']\n    scannet_pts_semantic_mask = scannet_results['pts_semantic_mask']\n    repr_str = repr(scannet_load_annotations3D)\n    expected_repr_str = 'LoadAnnotations3D(\\n    with_bbox_3d=True,     ' \\\n                        'with_label_3d=True,     with_mask_3d=True,     ' \\\n                        'with_seg_3d=True,     with_bbox=False,     ' \\\n                        'with_label=False,     with_mask=False,     ' \\\n                        'with_seg=False,     poly2mask=True)'\n    assert repr_str == expected_repr_str\n    assert scannet_gt_boxes.tensor.shape == (27, 7)\n    assert scannet_gt_labels.shape == (27, )\n    assert scannet_pts_instance_mask.shape == (100, )\n    assert scannet_pts_semantic_mask.shape == (100, )\n\n\ndef test_load_points_from_multi_sweeps():\n    load_points_from_multi_sweeps = LoadPointsFromMultiSweeps()\n    sweep = dict(\n        data_path='./tests/data/nuscenes/sweeps/LIDAR_TOP/'\n        'n008-2018-09-18-12-07-26-0400__LIDAR_TOP__1537287083900561.pcd.bin',\n        timestamp=1537290014899034,\n        sensor2lidar_translation=[-0.02344713, -3.88266051, -0.17151584],\n        sensor2lidar_rotation=np.array(\n            [[9.99979347e-01, 3.99870769e-04, 6.41441690e-03],\n             [-4.42034222e-04, 9.99978299e-01, 6.57316197e-03],\n             [-6.41164929e-03, -6.57586161e-03, 9.99957824e-01]]))\n    points = LiDARPoints(\n        np.array([[1., 2., 3., 4., 5.], [1., 2., 3., 4., 5.],\n                  [1., 2., 3., 4., 5.]]),\n        points_dim=5)\n    results = dict(points=points, timestamp=1537290014899034, sweeps=[sweep])\n\n    results = load_points_from_multi_sweeps(results)\n    points = results['points'].tensor.numpy()\n    repr_str = repr(load_points_from_multi_sweeps)\n    expected_repr_str = 'LoadPointsFromMultiSweeps(sweeps_num=10)'\n    assert repr_str == expected_repr_str\n    assert points.shape == (403, 4)\n"
  },
  {
    "path": "tests/test_data/test_pipelines/test_outdoor_pipeline.py",
    "content": "import numpy as np\nimport torch\n\nfrom mmdet3d.core.bbox import LiDARInstance3DBoxes\nfrom mmdet3d.datasets.pipelines import Compose\n\n\ndef test_outdoor_aug_pipeline():\n    point_cloud_range = [0, -40, -3, 70.4, 40, 1]\n    class_names = ['Car']\n    np.random.seed(0)\n\n    train_pipeline = [\n        dict(\n            type='LoadPointsFromFile',\n            coord_type='LIDAR',\n            load_dim=4,\n            use_dim=4),\n        dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n        dict(\n            type='ObjectNoise',\n            num_try=100,\n            translation_std=[1.0, 1.0, 0.5],\n            global_rot_range=[0.0, 0.0],\n            rot_range=[-0.78539816, 0.78539816]),\n        dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n        dict(\n            type='GlobalRotScaleTrans',\n            rot_range=[-0.78539816, 0.78539816],\n            scale_ratio_range=[0.95, 1.05]),\n        dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n        dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n        dict(type='PointShuffle'),\n        dict(type='DefaultFormatBundle3D', class_names=class_names),\n        dict(\n            type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n    ]\n    pipeline = Compose(train_pipeline)\n\n    gt_bboxes_3d = LiDARInstance3DBoxes(\n        torch.tensor([\n            [\n                2.16902428e+01, -4.06038128e-02, -1.61906636e+00,\n                1.65999997e+00, 3.20000005e+00, 1.61000001e+00, -1.53999996e+00\n            ],\n            [\n                7.05006886e+00, -6.57459593e+00, -1.60107934e+00,\n                2.27999997e+00, 1.27799997e+01, 3.66000009e+00, 1.54999995e+00\n            ],\n            [\n                2.24698811e+01, -6.69203758e+00, -1.50118136e+00,\n                2.31999993e+00, 1.47299995e+01, 3.64000010e+00, 1.59000003e+00\n            ],\n            [\n                3.48291969e+01, -7.09058380e+00, -1.36622977e+00,\n                2.31999993e+00, 1.00400000e+01, 3.60999990e+00, 1.61000001e+00\n            ],\n            [\n                4.62394600e+01, -7.75838804e+00, -1.32405007e+00,\n                2.33999991e+00, 1.28299999e+01, 3.63000011e+00, 1.63999999e+00\n            ],\n            [\n                2.82966995e+01, -5.55755794e-01, -1.30332506e+00,\n                1.47000003e+00, 2.23000002e+00, 1.48000002e+00, -1.57000005e+00\n            ],\n            [\n                2.66690197e+01, 2.18230209e+01, -1.73605704e+00,\n                1.55999994e+00, 3.48000002e+00, 1.39999998e+00, -1.69000006e+00\n            ],\n            [\n                3.13197803e+01, 8.16214371e+00, -1.62177873e+00,\n                1.74000001e+00, 3.76999998e+00, 1.48000002e+00, 2.78999996e+00\n            ],\n            [\n                4.34395561e+01, -1.95209332e+01, -1.20757008e+00,\n                1.69000006e+00, 4.09999990e+00, 1.40999997e+00, -1.53999996e+00\n            ],\n            [\n                3.29882965e+01, -3.79360509e+00, -1.69245458e+00,\n                1.74000001e+00, 4.09000015e+00, 1.49000001e+00, -1.52999997e+00\n            ],\n            [\n                3.85469360e+01, 8.35060215e+00, -1.31423414e+00,\n                1.59000003e+00, 4.28000021e+00, 1.45000005e+00, 1.73000002e+00\n            ],\n            [\n                2.22492104e+01, -1.13536005e+01, -1.38272512e+00,\n                1.62000000e+00, 3.55999994e+00, 1.71000004e+00, 2.48000002e+00\n            ],\n            [\n                3.36115799e+01, -1.97708054e+01, -4.92827654e-01,\n                1.64999998e+00, 3.54999995e+00, 1.79999995e+00, -1.57000005e+00\n            ],\n            [\n                9.85029602e+00, -1.51294518e+00, -1.66834795e+00,\n                1.59000003e+00, 3.17000008e+00, 1.38999999e+00, -8.39999974e-01\n            ]\n        ],\n                     dtype=torch.float32))\n    gt_labels_3d = np.array([0, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0])\n    results = dict(\n        pts_filename='tests/data/kitti/a.bin',\n        ann_info=dict(gt_bboxes_3d=gt_bboxes_3d, gt_labels_3d=gt_labels_3d),\n        bbox3d_fields=[],\n        img_fields=[])\n\n    output = pipeline(results)\n\n    expected_tensor = torch.tensor(\n        [[20.6514, -8.8250, -1.0816, 1.5893, 3.0637, 1.5414, -1.9216],\n         [7.9374, 4.9457, -1.2008, 2.1829, 12.2357, 3.5041, 1.6629],\n         [20.8115, -2.0273, -1.8893, 2.2212, 14.1026, 3.4850, 2.6513],\n         [32.3850, -5.2135, -1.1321, 2.2212, 9.6124, 3.4562, 2.6498],\n         [43.7022, -7.8316, -0.5090, 2.2403, 12.2836, 3.4754, 2.0146],\n         [25.3300, -9.6670, -1.0855, 1.4074, 2.1350, 1.4170, -0.7141],\n         [16.5414, -29.0583, -0.9768, 1.4936, 3.3318, 1.3404, -0.7153],\n         [24.6548, -18.9226, -1.3567, 1.6659, 3.6094, 1.4170, 1.3970],\n         [45.8403, 1.8183, -1.1626, 1.6180, 3.9254, 1.3499, -0.6886],\n         [30.6288, -8.4497, -1.4881, 1.6659, 3.9158, 1.4265, -0.7241],\n         [32.3316, -22.4611, -1.3131, 1.5223, 4.0977, 1.3882, 2.4186],\n         [22.4492, 3.2944, -2.1674, 1.5510, 3.4084, 1.6372, 0.3928],\n         [37.3824, 5.0472, -0.6579, 1.5797, 3.3988, 1.7233, -1.4862],\n         [8.9259, -1.2578, -1.6081, 1.5223, 3.0350, 1.3308, -1.7212]])\n    assert torch.allclose(\n        output['gt_bboxes_3d']._data.tensor, expected_tensor, atol=1e-3)\n\n\ndef test_outdoor_velocity_aug_pipeline():\n    point_cloud_range = [-50, -50, -5, 50, 50, 3]\n    class_names = [\n        'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',\n        'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'\n    ]\n    np.random.seed(0)\n\n    train_pipeline = [\n        dict(\n            type='LoadPointsFromFile',\n            coord_type='LIDAR',\n            load_dim=4,\n            use_dim=4),\n        dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n        dict(\n            type='GlobalRotScaleTrans',\n            rot_range=[-0.3925, 0.3925],\n            scale_ratio_range=[0.95, 1.05],\n            translation_std=[0, 0, 0]),\n        dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n        dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n        dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n        dict(type='PointShuffle'),\n        dict(type='DefaultFormatBundle3D', class_names=class_names),\n        dict(\n            type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n    ]\n    pipeline = Compose(train_pipeline)\n\n    gt_bboxes_3d = LiDARInstance3DBoxes(\n        torch.tensor(\n            [[\n                -5.2422e+00, 4.0021e+01, -4.7643e-01, 2.0620e+00, 4.4090e+00,\n                1.5480e+00, -1.4880e+00, 8.5338e-03, 4.4934e-02\n            ],\n             [\n                 -2.6675e+01, 5.5950e+00, -1.3053e+00, 3.4300e-01, 4.5800e-01,\n                 7.8200e-01, -4.6276e+00, -4.3284e-04, -1.8543e-03\n             ],\n             [\n                 -5.8098e+00, 3.5409e+01, -6.6511e-01, 2.3960e+00, 3.9690e+00,\n                 1.7320e+00, -4.6520e+00, 0.0000e+00, 0.0000e+00\n             ],\n             [\n                 -3.1309e+01, 1.0901e+00, -1.0561e+00, 1.9440e+00, 3.8570e+00,\n                 1.7230e+00, -2.8143e+00, -2.7606e-02, -8.0573e-02\n             ],\n             [\n                 -4.5642e+01, 2.0136e+01, -2.4681e-02, 1.9870e+00, 4.4400e+00,\n                 1.9420e+00, 2.8336e-01, 0.0000e+00, 0.0000e+00\n             ],\n             [\n                 -5.1617e+00, 1.8305e+01, -1.0879e+00, 2.3230e+00, 4.8510e+00,\n                 1.3710e+00, -1.5803e+00, 0.0000e+00, 0.0000e+00\n             ],\n             [\n                 -2.5285e+01, 4.1442e+00, -1.2713e+00, 1.7550e+00, 1.9890e+00,\n                 2.2200e+00, -4.4900e+00, -3.1784e-02, -1.5291e-01\n             ],\n             [\n                 -2.2611e+00, 1.9170e+01, -1.1452e+00, 9.1900e-01, 1.1230e+00,\n                 1.9310e+00, 4.7790e-02, 6.7684e-02, -1.7537e+00\n             ],\n             [\n                 -6.5878e+01, 1.3500e+01, -2.2528e-01, 1.8200e+00, 3.8520e+00,\n                 1.5450e+00, -2.8757e+00, 0.0000e+00, 0.0000e+00\n             ],\n             [\n                 -5.4490e+00, 2.8363e+01, -7.7275e-01, 2.2360e+00, 3.7540e+00,\n                 1.5590e+00, -4.6520e+00, -7.9736e-03, 7.7207e-03\n             ]],\n            dtype=torch.float32),\n        box_dim=9)\n\n    gt_labels_3d = np.array([0, 8, 0, 0, 0, 0, -1, 7, 0, 0])\n    results = dict(\n        pts_filename='tests/data/kitti/a.bin',\n        ann_info=dict(gt_bboxes_3d=gt_bboxes_3d, gt_labels_3d=gt_labels_3d),\n        bbox3d_fields=[],\n        img_fields=[])\n\n    output = pipeline(results)\n\n    expected_tensor = torch.tensor(\n        [[\n            -3.7849e+00, -4.1057e+01, -4.8668e-01, 2.1064e+00, 4.5039e+00,\n            1.5813e+00, -1.6919e+00, 1.0469e-02, -4.5533e-02\n        ],\n         [\n             -2.7010e+01, -6.7551e+00, -1.3334e+00, 3.5038e-01, 4.6786e-01,\n             7.9883e-01, 1.4477e+00, -5.1440e-04, 1.8758e-03\n         ],\n         [\n             -4.5448e+00, -3.6372e+01, -6.7942e-01, 2.4476e+00, 4.0544e+00,\n             1.7693e+00, 1.4721e+00, 0.0000e+00, -0.0000e+00\n         ],\n         [\n             -3.1916e+01, -2.3379e+00, -1.0788e+00, 1.9858e+00, 3.9400e+00,\n             1.7601e+00, -3.6564e-01, -3.1333e-02, 8.1166e-02\n         ],\n         [\n             -4.5802e+01, -2.2340e+01, -2.5213e-02, 2.0298e+00, 4.5355e+00,\n             1.9838e+00, 2.8199e+00, 0.0000e+00, -0.0000e+00\n         ],\n         [\n             -4.5526e+00, -1.8887e+01, -1.1114e+00, 2.3730e+00, 4.9554e+00,\n             1.4005e+00, -1.5997e+00, 0.0000e+00, -0.0000e+00\n         ],\n         [\n             -2.5648e+01, -5.2197e+00, -1.2987e+00, 1.7928e+00, 2.0318e+00,\n             2.2678e+00, 1.3100e+00, -3.8428e-02, 1.5485e-01\n         ],\n         [\n             -1.5578e+00, -1.9657e+01, -1.1699e+00, 9.3878e-01, 1.1472e+00,\n             1.9726e+00, 3.0555e+00, 4.5907e-04, 1.7928e+00\n         ],\n         [\n             -4.4522e+00, -2.9166e+01, -7.8938e-01, 2.2841e+00, 3.8348e+00,\n             1.5925e+00, 1.4721e+00, -7.8371e-03, -8.1931e-03\n         ]])\n    assert torch.allclose(\n        output['gt_bboxes_3d']._data.tensor, expected_tensor, atol=1e-3)\n"
  },
  {
    "path": "tests/test_metrics/test_indoor_eval.py",
    "content": "import numpy as np\nimport pytest\nimport torch\n\nfrom mmdet3d.core.evaluation.indoor_eval import average_precision, indoor_eval\n\n\ndef test_indoor_eval():\n    if not torch.cuda.is_available():\n        pytest.skip()\n    from mmdet3d.core.bbox.structures import Box3DMode, DepthInstance3DBoxes\n    det_infos = [{\n        'labels_3d':\n        torch.tensor([0, 1, 2, 2, 0, 3, 1, 2, 3, 2]),\n        'boxes_3d':\n        DepthInstance3DBoxes(\n            torch.tensor([[\n                -2.4089e-03, -3.3174e+00, 4.9438e-01, 2.1668e+00, 2.8431e-01,\n                1.6506e+00, 0.0000e+00\n            ],\n                          [\n                              -3.4269e-01, -2.7565e+00, 2.8144e-02, 6.8554e-01,\n                              9.6854e-01, 6.1755e-01, 0.0000e+00\n                          ],\n                          [\n                              -3.8320e+00, -1.0646e+00, 1.7074e-01, 2.4981e-01,\n                              4.4708e-01, 6.2538e-01, 0.0000e+00\n                          ],\n                          [\n                              4.1073e-01, 3.3757e+00, 3.4311e-01, 8.0617e-01,\n                              2.8679e-01, 1.6060e+00, 0.0000e+00\n                          ],\n                          [\n                              6.1199e-01, -3.1041e+00, 4.1873e-01, 1.2310e+00,\n                              4.0162e-01, 1.7303e+00, 0.0000e+00\n                          ],\n                          [\n                              -5.9877e-01, -2.6011e+00, 1.1148e+00, 1.5704e-01,\n                              7.5957e-01, 9.6930e-01, 0.0000e+00\n                          ],\n                          [\n                              2.7462e-01, -3.0088e+00, 6.5231e-02, 8.1208e-01,\n                              4.1861e-01, 3.7339e-01, 0.0000e+00\n                          ],\n                          [\n                              -1.4704e+00, -2.0024e+00, 2.7479e-01, 1.7888e+00,\n                              1.0566e+00, 1.3704e+00, 0.0000e+00\n                          ],\n                          [\n                              8.2727e-02, -3.1160e+00, 2.5690e-01, 1.4054e+00,\n                              2.0772e-01, 9.6792e-01, 0.0000e+00\n                          ],\n                          [\n                              2.6896e+00, 1.9881e+00, 1.1566e+00, 9.9885e-02,\n                              3.5713e-01, 4.5638e-01, 0.0000e+00\n                          ]]),\n            origin=(0.5, 0.5, 0)),\n        'scores_3d':\n        torch.tensor([\n            1.7516e-05, 1.0167e-06, 8.4486e-07, 7.1048e-02, 6.4274e-05,\n            1.5003e-07, 5.8102e-06, 1.9399e-08, 5.3126e-07, 1.8630e-09\n        ])\n    }]\n\n    label2cat = {\n        0: 'cabinet',\n        1: 'bed',\n        2: 'chair',\n        3: 'sofa',\n    }\n    gt_annos = [{\n        'gt_num':\n        10,\n        'gt_boxes_upright_depth':\n        np.array([[\n            -2.4089e-03, -3.3174e+00, 4.9438e-01, 2.1668e+00, 2.8431e-01,\n            1.6506e+00, 0.0000e+00\n        ],\n                  [\n                      -3.4269e-01, -2.7565e+00, 2.8144e-02, 6.8554e-01,\n                      9.6854e-01, 6.1755e-01, 0.0000e+00\n                  ],\n                  [\n                      -3.8320e+00, -1.0646e+00, 1.7074e-01, 2.4981e-01,\n                      4.4708e-01, 6.2538e-01, 0.0000e+00\n                  ],\n                  [\n                      4.1073e-01, 3.3757e+00, 3.4311e-01, 8.0617e-01,\n                      2.8679e-01, 1.6060e+00, 0.0000e+00\n                  ],\n                  [\n                      6.1199e-01, -3.1041e+00, 4.1873e-01, 1.2310e+00,\n                      4.0162e-01, 1.7303e+00, 0.0000e+00\n                  ],\n                  [\n                      -5.9877e-01, -2.6011e+00, 1.1148e+00, 1.5704e-01,\n                      7.5957e-01, 9.6930e-01, 0.0000e+00\n                  ],\n                  [\n                      2.7462e-01, -3.0088e+00, 6.5231e-02, 8.1208e-01,\n                      4.1861e-01, 3.7339e-01, 0.0000e+00\n                  ],\n                  [\n                      -1.4704e+00, -2.0024e+00, 2.7479e-01, 1.7888e+00,\n                      1.0566e+00, 1.3704e+00, 0.0000e+00\n                  ],\n                  [\n                      8.2727e-02, -3.1160e+00, 2.5690e-01, 1.4054e+00,\n                      2.0772e-01, 9.6792e-01, 0.0000e+00\n                  ],\n                  [\n                      2.6896e+00, 1.9881e+00, 1.1566e+00, 9.9885e-02,\n                      3.5713e-01, 4.5638e-01, 0.0000e+00\n                  ]]),\n        'class':\n        np.array([0, 1, 2, 0, 0, 3, 1, 3, 3, 2])\n    }]\n\n    ret_value = indoor_eval(\n        gt_annos,\n        det_infos, [0.25, 0.5],\n        label2cat,\n        box_type_3d=DepthInstance3DBoxes,\n        box_mode_3d=Box3DMode.DEPTH)\n\n    assert np.isclose(ret_value['cabinet_AP_0.25'], 0.666667)\n    assert np.isclose(ret_value['bed_AP_0.25'], 1.0)\n    assert np.isclose(ret_value['chair_AP_0.25'], 0.5)\n    assert np.isclose(ret_value['mAP_0.25'], 0.708333)\n    assert np.isclose(ret_value['mAR_0.25'], 0.833333)\n\n\ndef test_indoor_eval_less_classes():\n    if not torch.cuda.is_available():\n        pytest.skip()\n    from mmdet3d.core.bbox.structures import Box3DMode, DepthInstance3DBoxes\n    det_infos = [{\n        'labels_3d':\n        torch.tensor([0]),\n        'boxes_3d':\n        DepthInstance3DBoxes(torch.tensor([[1., 1., 1., 1., 1., 1., 1.]])),\n        'scores_3d':\n        torch.tensor([.5])\n    }, {\n        'labels_3d':\n        torch.tensor([1]),\n        'boxes_3d':\n        DepthInstance3DBoxes(torch.tensor([[1., 1., 1., 1., 1., 1., 1.]])),\n        'scores_3d':\n        torch.tensor([.5])\n    }]\n\n    label2cat = {0: 'cabinet', 1: 'bed', 2: 'chair'}\n    gt_annos = [{\n        'gt_num':\n        2,\n        'gt_boxes_upright_depth':\n        np.array([[0., 0., 0., 1., 1., 1., 1.], [1., 1., 1., 1., 1., 1., 1.]]),\n        'class':\n        np.array([2, 0])\n    }, {\n        'gt_num':\n        1,\n        'gt_boxes_upright_depth':\n        np.array([\n            [1., 1., 1., 1., 1., 1., 1.],\n        ]),\n        'class':\n        np.array([1])\n    }]\n\n    ret_value = indoor_eval(\n        gt_annos,\n        det_infos, [0.25, 0.5],\n        label2cat,\n        box_type_3d=DepthInstance3DBoxes,\n        box_mode_3d=Box3DMode.DEPTH)\n\n    assert np.isclose(ret_value['mAP_0.25'], 0.666667)\n    assert np.isclose(ret_value['mAR_0.25'], 0.666667)\n\n\ndef test_average_precision():\n    ap = average_precision(\n        np.array([[0.25, 0.5, 0.75], [0.25, 0.5, 0.75]]),\n        np.array([[1., 1., 1.], [1., 1., 1.]]), '11points')\n    assert abs(ap[0] - 0.06611571) < 0.001\n"
  },
  {
    "path": "tests/test_metrics/test_kitti_eval.py",
    "content": "import numpy as np\nimport pytest\nimport torch\n\nfrom mmdet3d.core.evaluation.kitti_utils.eval import (do_eval, eval_class,\n                                                      kitti_eval)\n\n\ndef test_do_eval():\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and CUDA')\n    gt_name = np.array(\n        ['Pedestrian', 'Cyclist', 'Car', 'Car', 'Car', 'DontCare', 'DontCare'])\n    gt_truncated = np.array([0., 0., 0., -1., -1., -1., -1.])\n    gt_occluded = np.array([0, 0, 3, -1, -1, -1, -1])\n    gt_alpha = np.array([-1.57, 1.85, -1.65, -10., -10., -10., -10.])\n    gt_bbox = np.array([[674.9179, 165.48549, 693.23694, 193.42134],\n                        [676.21954, 165.70988, 691.63745, 193.83748],\n                        [389.4093, 182.48041, 421.49072, 202.13422],\n                        [232.0577, 186.16724, 301.94623, 217.4024],\n                        [758.6537, 172.98509, 816.32434, 212.76743],\n                        [532.37, 176.35, 542.68, 185.27],\n                        [559.62, 175.83, 575.4, 183.15]])\n    gt_dimensions = np.array([[12.34, 2.85, 2.63], [3.69, 1.67, 1.87],\n                              [2.02, 1.86, 0.6], [-1., -1., -1.],\n                              [-1., -1., -1.], [-1., -1., -1.],\n                              [-1., -1., -1.]])\n    gt_location = np.array([[4.700e-01, 1.490e+00, 6.944e+01],\n                            [-1.653e+01, 2.390e+00, 5.849e+01],\n                            [4.590e+00, 1.320e+00, 4.584e+01],\n                            [-1.000e+03, -1.000e+03, -1.000e+03],\n                            [-1.000e+03, -1.000e+03, -1.000e+03],\n                            [-1.000e+03, -1.000e+03, -1.000e+03],\n                            [-1.000e+03, -1.000e+03, -1.000e+03]])\n    gt_rotation_y = [-1.56, 1.57, -1.55, -10., -10., -10., -10.]\n    gt_anno = dict(\n        name=gt_name,\n        truncated=gt_truncated,\n        occluded=gt_occluded,\n        alpha=gt_alpha,\n        bbox=gt_bbox,\n        dimensions=gt_dimensions,\n        location=gt_location,\n        rotation_y=gt_rotation_y)\n\n    dt_name = np.array(['Pedestrian', 'Cyclist', 'Car', 'Car', 'Car'])\n    dt_truncated = np.array([0., 0., 0., 0., 0.])\n    dt_occluded = np.array([0, 0, 0, 0, 0])\n    dt_alpha = np.array([1.0744612, 1.2775835, 1.82563, 2.1145396, -1.7676563])\n    dt_dimensions = np.array([[1.4441837, 1.7450154, 0.53160036],\n                              [1.6501029, 1.7540325, 0.5162356],\n                              [3.9313498, 1.4899347, 1.5655756],\n                              [4.0111866, 1.5350999, 1.585221],\n                              [3.7337692, 1.5117968, 1.5515774]])\n    dt_location = np.array([[4.6671643, 1.285098, 45.836895],\n                            [4.658241, 1.3088846, 45.85148],\n                            [-16.598526, 2.298814, 58.618088],\n                            [-18.629122, 2.2990575, 39.305355],\n                            [7.0964046, 1.5178275, 29.32426]])\n    dt_rotation_y = np.array(\n        [1.174933, 1.3778262, 1.550529, 1.6742425, -1.5330327])\n    dt_bbox = np.array([[674.9179, 165.48549, 693.23694, 193.42134],\n                        [676.21954, 165.70988, 691.63745, 193.83748],\n                        [389.4093, 182.48041, 421.49072, 202.13422],\n                        [232.0577, 186.16724, 301.94623, 217.4024],\n                        [758.6537, 172.98509, 816.32434, 212.76743]])\n    dt_score = np.array(\n        [0.18151495, 0.57920843, 0.27795696, 0.23100418, 0.21541929])\n    dt_anno = dict(\n        name=dt_name,\n        truncated=dt_truncated,\n        occluded=dt_occluded,\n        alpha=dt_alpha,\n        bbox=dt_bbox,\n        dimensions=dt_dimensions,\n        location=dt_location,\n        rotation_y=dt_rotation_y,\n        score=dt_score)\n    current_classes = [1, 2, 0]\n    min_overlaps = np.array([[[0.5, 0.5, 0.7], [0.5, 0.5, 0.7],\n                              [0.5, 0.5, 0.7]],\n                             [[0.5, 0.5, 0.7], [0.25, 0.25, 0.5],\n                              [0.25, 0.25, 0.5]]])\n    eval_types = ['bbox', 'bev', '3d', 'aos']\n    mAP_bbox, mAP_bev, mAP_3d, mAP_aos = do_eval([gt_anno], [dt_anno],\n                                                 current_classes, min_overlaps,\n                                                 eval_types)\n    expected_mAP_bbox = np.array([[[0., 0.], [9.09090909, 9.09090909],\n                                   [9.09090909, 9.09090909]],\n                                  [[0., 0.], [9.09090909, 9.09090909],\n                                   [9.09090909, 9.09090909]],\n                                  [[0., 0.], [9.09090909, 9.09090909],\n                                   [9.09090909, 9.09090909]]])\n    expected_mAP_bev = np.array([[[0., 0.], [0., 0.], [0., 0.]],\n                                 [[0., 0.], [0., 0.], [0., 0.]],\n                                 [[0., 0.], [0., 0.], [0., 0.]]])\n    expected_mAP_3d = np.array([[[0., 0.], [0., 0.], [0., 0.]],\n                                [[0., 0.], [0., 0.], [0., 0.]],\n                                [[0., 0.], [0., 0.], [0., 0.]]])\n    expected_mAP_aos = np.array([[[0., 0.], [0.55020816, 0.55020816],\n                                  [0.55020816, 0.55020816]],\n                                 [[0., 0.], [8.36633862, 8.36633862],\n                                  [8.36633862, 8.36633862]],\n                                 [[0., 0.], [8.63476893, 8.63476893],\n                                  [8.63476893, 8.63476893]]])\n    assert np.allclose(mAP_bbox, expected_mAP_bbox)\n    assert np.allclose(mAP_bev, expected_mAP_bev)\n    assert np.allclose(mAP_3d, expected_mAP_3d)\n    assert np.allclose(mAP_aos, expected_mAP_aos)\n\n\ndef test_kitti_eval():\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and CUDA')\n    gt_name = np.array(\n        ['Pedestrian', 'Cyclist', 'Car', 'Car', 'Car', 'DontCare', 'DontCare'])\n    gt_truncated = np.array([0., 0., 0., -1., -1., -1., -1.])\n    gt_occluded = np.array([0, 0, 3, -1, -1, -1, -1])\n    gt_alpha = np.array([-1.57, 1.85, -1.65, -10., -10., -10., -10.])\n    gt_bbox = np.array([[674.9179, 165.48549, 693.23694, 193.42134],\n                        [676.21954, 165.70988, 691.63745, 193.83748],\n                        [389.4093, 182.48041, 421.49072, 202.13422],\n                        [232.0577, 186.16724, 301.94623, 217.4024],\n                        [758.6537, 172.98509, 816.32434, 212.76743],\n                        [532.37, 176.35, 542.68, 185.27],\n                        [559.62, 175.83, 575.4, 183.15]])\n    gt_dimensions = np.array([[12.34, 2.85, 2.63], [3.69, 1.67, 1.87],\n                              [2.02, 1.86, 0.6], [-1., -1., -1.],\n                              [-1., -1., -1.], [-1., -1., -1.],\n                              [-1., -1., -1.]])\n    gt_location = np.array([[4.700e-01, 1.490e+00, 6.944e+01],\n                            [-1.653e+01, 2.390e+00, 5.849e+01],\n                            [4.590e+00, 1.320e+00, 4.584e+01],\n                            [-1.000e+03, -1.000e+03, -1.000e+03],\n                            [-1.000e+03, -1.000e+03, -1.000e+03],\n                            [-1.000e+03, -1.000e+03, -1.000e+03],\n                            [-1.000e+03, -1.000e+03, -1.000e+03]])\n    gt_rotation_y = [-1.56, 1.57, -1.55, -10., -10., -10., -10.]\n    gt_anno = dict(\n        name=gt_name,\n        truncated=gt_truncated,\n        occluded=gt_occluded,\n        alpha=gt_alpha,\n        bbox=gt_bbox,\n        dimensions=gt_dimensions,\n        location=gt_location,\n        rotation_y=gt_rotation_y)\n\n    dt_name = np.array(['Pedestrian', 'Cyclist', 'Car', 'Car', 'Car'])\n    dt_truncated = np.array([0., 0., 0., 0., 0.])\n    dt_occluded = np.array([0, 0, 0, 0, 0])\n    dt_alpha = np.array([1.0744612, 1.2775835, 1.82563, 2.1145396, -1.7676563])\n    dt_dimensions = np.array([[1.4441837, 1.7450154, 0.53160036],\n                              [1.6501029, 1.7540325, 0.5162356],\n                              [3.9313498, 1.4899347, 1.5655756],\n                              [4.0111866, 1.5350999, 1.585221],\n                              [3.7337692, 1.5117968, 1.5515774]])\n    dt_location = np.array([[4.6671643, 1.285098, 45.836895],\n                            [4.658241, 1.3088846, 45.85148],\n                            [-16.598526, 2.298814, 58.618088],\n                            [-18.629122, 2.2990575, 39.305355],\n                            [7.0964046, 1.5178275, 29.32426]])\n    dt_rotation_y = np.array(\n        [1.174933, 1.3778262, 1.550529, 1.6742425, -1.5330327])\n    dt_bbox = np.array([[674.9179, 165.48549, 693.23694, 193.42134],\n                        [676.21954, 165.70988, 691.63745, 193.83748],\n                        [389.4093, 182.48041, 421.49072, 202.13422],\n                        [232.0577, 186.16724, 301.94623, 217.4024],\n                        [758.6537, 172.98509, 816.32434, 212.76743]])\n    dt_score = np.array(\n        [0.18151495, 0.57920843, 0.27795696, 0.23100418, 0.21541929])\n    dt_anno = dict(\n        name=dt_name,\n        truncated=dt_truncated,\n        occluded=dt_occluded,\n        alpha=dt_alpha,\n        bbox=dt_bbox,\n        dimensions=dt_dimensions,\n        location=dt_location,\n        rotation_y=dt_rotation_y,\n        score=dt_score)\n\n    current_classes = [1, 2, 0]\n    result, ret_dict = kitti_eval([gt_anno], [dt_anno], current_classes)\n    assert np.isclose(ret_dict['KITTI/Overall_2D_moderate'], 9.090909090909092)\n    assert np.isclose(ret_dict['KITTI/Overall_2D_hard'], 9.090909090909092)\n\n\ndef test_eval_class():\n    gt_name = np.array(\n        ['Pedestrian', 'Cyclist', 'Car', 'Car', 'Car', 'DontCare', 'DontCare'])\n    gt_truncated = np.array([0., 0., 0., -1., -1., -1., -1.])\n    gt_occluded = np.array([0, 0, 3, -1, -1, -1, -1])\n    gt_alpha = np.array([-1.57, 1.85, -1.65, -10., -10., -10., -10.])\n    gt_bbox = np.array([[674.9179, 165.48549, 693.23694, 193.42134],\n                        [676.21954, 165.70988, 691.63745, 193.83748],\n                        [389.4093, 182.48041, 421.49072, 202.13422],\n                        [232.0577, 186.16724, 301.94623, 217.4024],\n                        [758.6537, 172.98509, 816.32434, 212.76743],\n                        [532.37, 176.35, 542.68, 185.27],\n                        [559.62, 175.83, 575.4, 183.15]])\n    gt_anno = dict(\n        name=gt_name,\n        truncated=gt_truncated,\n        occluded=gt_occluded,\n        alpha=gt_alpha,\n        bbox=gt_bbox)\n\n    dt_name = np.array(['Pedestrian', 'Cyclist', 'Car', 'Car', 'Car'])\n    dt_truncated = np.array([0., 0., 0., 0., 0.])\n    dt_occluded = np.array([0, 0, 0, 0, 0])\n    dt_alpha = np.array([1.0744612, 1.2775835, 1.82563, 2.1145396, -1.7676563])\n    dt_bbox = np.array([[674.9179, 165.48549, 693.23694, 193.42134],\n                        [676.21954, 165.70988, 691.63745, 193.83748],\n                        [389.4093, 182.48041, 421.49072, 202.13422],\n                        [232.0577, 186.16724, 301.94623, 217.4024],\n                        [758.6537, 172.98509, 816.32434, 212.76743]])\n    dt_score = np.array(\n        [0.18151495, 0.57920843, 0.27795696, 0.23100418, 0.21541929])\n    dt_anno = dict(\n        name=dt_name,\n        truncated=dt_truncated,\n        occluded=dt_occluded,\n        alpha=dt_alpha,\n        bbox=dt_bbox,\n        score=dt_score)\n    current_classes = [1, 2, 0]\n    difficultys = [0, 1, 2]\n    metric = 0\n    min_overlaps = np.array([[[0.5, 0.5, 0.7], [0.5, 0.5, 0.7],\n                              [0.5, 0.5, 0.7]],\n                             [[0.5, 0.5, 0.7], [0.25, 0.25, 0.5],\n                              [0.25, 0.25, 0.5]]])\n\n    ret_dict = eval_class([gt_anno], [dt_anno], current_classes, difficultys,\n                          metric, min_overlaps, True, 1)\n    recall_sum = np.sum(ret_dict['recall'])\n    precision_sum = np.sum(ret_dict['precision'])\n    orientation_sum = np.sum(ret_dict['orientation'])\n    assert np.isclose(recall_sum, 16)\n    assert np.isclose(precision_sum, 16)\n    assert np.isclose(orientation_sum, 10.252829201850309)\n"
  },
  {
    "path": "tests/test_metrics/test_losses.py",
    "content": "import pytest\nimport torch\n\n\ndef test_chamfer_disrance():\n    from mmdet3d.models.losses import ChamferDistance, chamfer_distance\n\n    with pytest.raises(AssertionError):\n        # test invalid mode\n        ChamferDistance(mode='smoothl1')\n        # test invalid type of reduction\n        ChamferDistance(mode='l2', reduction=None)\n\n    self = ChamferDistance(\n        mode='l2', reduction='sum', loss_src_weight=1.0, loss_dst_weight=1.0)\n    source = torch.tensor([[[-0.9888, 0.9683, -0.8494],\n                            [-6.4536, 4.5146,\n                             1.6861], [2.0482, 5.6936, -1.4701],\n                            [-0.5173, 5.6472, 2.1748],\n                            [-2.8010, 5.4423, -1.2158],\n                            [2.4018, 2.4389, -0.2403],\n                            [-2.8811, 3.8486, 1.4750],\n                            [-0.2031, 3.8969,\n                             -1.5245], [1.3827, 4.9295, 1.1537],\n                            [-2.6961, 2.2621, -1.0976]],\n                           [[0.3692, 1.8409,\n                             -1.4983], [1.9995, 6.3602, 0.1798],\n                            [-2.1317, 4.6011,\n                             -0.7028], [2.4158, 3.1482, 0.3169],\n                            [-0.5836, 3.6250, -1.2650],\n                            [-1.9862, 1.6182, -1.4901],\n                            [2.5992, 1.2847, -0.8471],\n                            [-0.3467, 5.3681, -1.4755],\n                            [-0.8576, 3.3400, -1.7399],\n                            [2.7447, 4.6349, 0.1994]]])\n\n    target = torch.tensor([[[-0.4758, 1.0094, -0.8645],\n                            [-0.3130, 0.8564, -0.9061],\n                            [-0.1560, 2.0394, -0.8936],\n                            [-0.3685, 1.6467, -0.8271],\n                            [-0.2740, 2.2212, -0.7980]],\n                           [[1.4856, 2.5299,\n                             -1.0047], [2.3262, 3.3065, -0.9475],\n                            [2.4593, 2.5870,\n                             -0.9423], [0.0000, 0.0000, 0.0000],\n                            [0.0000, 0.0000, 0.0000]]])\n\n    loss_source, loss_target, indices1, indices2 = self(\n        source, target, return_indices=True)\n\n    assert torch.allclose(loss_source, torch.tensor(219.5936))\n    assert torch.allclose(loss_target, torch.tensor(22.3705))\n\n    expected_inds1 = [[0, 4, 4, 4, 4, 2, 4, 4, 4, 3],\n                      [0, 1, 0, 1, 0, 4, 2, 0, 0, 1]]\n    expected_inds2 = [[0, 4, 4, 4, 4, 2, 4, 4, 4, 3],\n                      [0, 1, 0, 1, 0, 3, 2, 0, 0, 1]]\n    assert (torch.equal(indices1, indices1.new_tensor(expected_inds1))\n            or torch.equal(indices1, indices1.new_tensor(expected_inds2)))\n    assert torch.equal(indices2,\n                       indices2.new_tensor([[0, 0, 0, 0, 0], [0, 3, 6, 0, 0]]))\n\n    loss_source, loss_target, indices1, indices2 = chamfer_distance(\n        source, target, reduction='sum')\n\n    assert torch.allclose(loss_source, torch.tensor(219.5936))\n    assert torch.allclose(loss_target, torch.tensor(22.3705))\n    assert (torch.equal(indices1, indices1.new_tensor(expected_inds1))\n            or torch.equal(indices1, indices1.new_tensor(expected_inds2)))\n    assert (indices2 == indices2.new_tensor([[0, 0, 0, 0, 0], [0, 3, 6, 0,\n                                                               0]])).all()\n"
  },
  {
    "path": "tests/test_metrics/test_seg_eval.py",
    "content": "import numpy as np\nimport pytest\nimport torch\n\nfrom mmdet3d.core.evaluation.seg_eval import seg_eval\n\n\ndef test_indoor_eval():\n    if not torch.cuda.is_available():\n        pytest.skip()\n    seg_preds = [\n        torch.Tensor(\n            [0, 0, 1, 0, 2, 1, 3, 1, 1, 0, 2, 2, 2, 2, 1, 3, 0, 3, 3, 3])\n    ]\n    gt_labels = [\n        torch.Tensor(\n            [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])\n    ]\n\n    label2cat = {\n        0: 'car',\n        1: 'bicycle',\n        2: 'motorcycle',\n        3: 'truck',\n    }\n    ret_value = seg_eval(gt_labels, seg_preds, label2cat)\n\n    assert np.isclose(ret_value['car'], 0.428571429)\n    assert np.isclose(ret_value['bicycle'], 0.428571429)\n    assert np.isclose(ret_value['motorcycle'], 0.6666667)\n    assert np.isclose(ret_value['truck'], 0.6666667)\n\n    assert np.isclose(ret_value['acc'], 0.7)\n    assert np.isclose(ret_value['acc_cls'], 0.7)\n    assert np.isclose(ret_value['miou'], 0.547619048)\n"
  },
  {
    "path": "tests/test_models/test_backbones.py",
    "content": "import numpy as np\nimport pytest\nimport torch\n\nfrom mmdet3d.models import build_backbone\n\n\ndef test_pointnet2_sa_ssg():\n    if not torch.cuda.is_available():\n        pytest.skip()\n\n    cfg = dict(\n        type='PointNet2SASSG',\n        in_channels=6,\n        num_points=(32, 16),\n        radius=(0.8, 1.2),\n        num_samples=(16, 8),\n        sa_channels=((8, 16), (16, 16)),\n        fp_channels=((16, 16), (16, 16)))\n    self = build_backbone(cfg)\n    self.cuda()\n    assert self.SA_modules[0].mlps[0].layer0.conv.in_channels == 6\n    assert self.SA_modules[0].mlps[0].layer0.conv.out_channels == 8\n    assert self.SA_modules[0].mlps[0].layer1.conv.out_channels == 16\n    assert self.SA_modules[1].mlps[0].layer1.conv.out_channels == 16\n    assert self.FP_modules[0].mlps.layer0.conv.in_channels == 32\n    assert self.FP_modules[0].mlps.layer0.conv.out_channels == 16\n    assert self.FP_modules[1].mlps.layer0.conv.in_channels == 19\n\n    xyz = np.fromfile('tests/data/sunrgbd/points/000001.bin', dtype=np.float32)\n    xyz = torch.from_numpy(xyz).view(1, -1, 6).cuda()  # (B, N, 6)\n    # test forward\n    ret_dict = self(xyz)\n    fp_xyz = ret_dict['fp_xyz']\n    fp_features = ret_dict['fp_features']\n    fp_indices = ret_dict['fp_indices']\n    assert len(fp_xyz) == len(fp_features) == len(fp_indices) == 3\n    assert fp_xyz[0].shape == torch.Size([1, 16, 3])\n    assert fp_xyz[1].shape == torch.Size([1, 32, 3])\n    assert fp_xyz[2].shape == torch.Size([1, 100, 3])\n    assert fp_features[2].shape == torch.Size([1, 16, 100])\n    assert fp_indices[2].shape == torch.Size([1, 100])\n\n\ndef test_multi_backbone():\n    if not torch.cuda.is_available():\n        pytest.skip()\n\n    # test list config\n    cfg_list = dict(\n        type='MultiBackbone',\n        num_streams=4,\n        suffixes=['net0', 'net1', 'net2', 'net3'],\n        backbones=[\n            dict(\n                type='PointNet2SASSG',\n                in_channels=4,\n                num_points=(256, 128, 64, 32),\n                radius=(0.2, 0.4, 0.8, 1.2),\n                num_samples=(64, 32, 16, 16),\n                sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),\n                             (128, 128, 256)),\n                fp_channels=((256, 256), (256, 256)),\n                norm_cfg=dict(type='BN2d')),\n            dict(\n                type='PointNet2SASSG',\n                in_channels=4,\n                num_points=(256, 128, 64, 32),\n                radius=(0.2, 0.4, 0.8, 1.2),\n                num_samples=(64, 32, 16, 16),\n                sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),\n                             (128, 128, 256)),\n                fp_channels=((256, 256), (256, 256)),\n                norm_cfg=dict(type='BN2d')),\n            dict(\n                type='PointNet2SASSG',\n                in_channels=4,\n                num_points=(256, 128, 64, 32),\n                radius=(0.2, 0.4, 0.8, 1.2),\n                num_samples=(64, 32, 16, 16),\n                sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),\n                             (128, 128, 256)),\n                fp_channels=((256, 256), (256, 256)),\n                norm_cfg=dict(type='BN2d')),\n            dict(\n                type='PointNet2SASSG',\n                in_channels=4,\n                num_points=(256, 128, 64, 32),\n                radius=(0.2, 0.4, 0.8, 1.2),\n                num_samples=(64, 32, 16, 16),\n                sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),\n                             (128, 128, 256)),\n                fp_channels=((256, 256), (256, 256)),\n                norm_cfg=dict(type='BN2d'))\n        ])\n\n    self = build_backbone(cfg_list)\n    self.cuda()\n\n    assert len(self.backbone_list) == 4\n\n    xyz = np.fromfile('tests/data/sunrgbd/points/000001.bin', dtype=np.float32)\n    xyz = torch.from_numpy(xyz).view(1, -1, 6).cuda()  # (B, N, 6)\n    # test forward\n    ret_dict = self(xyz[:, :, :4])\n\n    assert ret_dict['hd_feature'].shape == torch.Size([1, 256, 128])\n    assert ret_dict['fp_xyz_net0'][-1].shape == torch.Size([1, 128, 3])\n    assert ret_dict['fp_features_net0'][-1].shape == torch.Size([1, 256, 128])\n\n    # test dict config\n    cfg_dict = dict(\n        type='MultiBackbone',\n        num_streams=2,\n        suffixes=['net0', 'net1'],\n        aggregation_mlp_channels=[512, 128],\n        backbones=dict(\n            type='PointNet2SASSG',\n            in_channels=4,\n            num_points=(256, 128, 64, 32),\n            radius=(0.2, 0.4, 0.8, 1.2),\n            num_samples=(64, 32, 16, 16),\n            sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),\n                         (128, 128, 256)),\n            fp_channels=((256, 256), (256, 256)),\n            norm_cfg=dict(type='BN2d')))\n\n    self = build_backbone(cfg_dict)\n    self.cuda()\n\n    assert len(self.backbone_list) == 2\n\n    # test forward\n    ret_dict = self(xyz[:, :, :4])\n\n    assert ret_dict['hd_feature'].shape == torch.Size([1, 128, 128])\n    assert ret_dict['fp_xyz_net0'][-1].shape == torch.Size([1, 128, 3])\n    assert ret_dict['fp_features_net0'][-1].shape == torch.Size([1, 256, 128])\n\n    # Length of backbone configs list should be equal to num_streams\n    with pytest.raises(AssertionError):\n        cfg_list['num_streams'] = 3\n        build_backbone(cfg_list)\n\n    # Length of suffixes list should be equal to num_streams\n    with pytest.raises(AssertionError):\n        cfg_dict['suffixes'] = ['net0', 'net1', 'net2']\n        build_backbone(cfg_dict)\n\n    # Type of 'backbones' should be Dict or List[Dict].\n    with pytest.raises(AssertionError):\n        cfg_dict['backbones'] = 'PointNet2SASSG'\n        build_backbone(cfg_dict)\n\n\ndef test_pointnet2_sa_msg():\n    if not torch.cuda.is_available():\n        pytest.skip()\n    cfg = dict(\n        type='PointNet2SAMSG',\n        in_channels=4,\n        num_points=(256, 64, (32, 32)),\n        radii=((0.2, 0.4, 0.8), (0.4, 0.8, 1.6), (1.6, 3.2, 4.8)),\n        num_samples=((8, 8, 16), (8, 8, 16), (8, 8, 8)),\n        sa_channels=(((8, 8, 16), (8, 8, 16),\n                      (8, 8, 16)), ((16, 16, 32), (16, 16, 32), (16, 24, 32)),\n                     ((32, 32, 64), (32, 24, 64), (32, 64, 64))),\n        aggregation_channels=(16, 32, 64),\n        fps_mods=(('D-FPS'), ('FS'), ('F-FPS', 'D-FPS')),\n        fps_sample_range_lists=((-1), (-1), (64, -1)),\n        norm_cfg=dict(type='BN2d'),\n        sa_cfg=dict(\n            type='PointSAModuleMSG',\n            pool_mod='max',\n            use_xyz=True,\n            normalize_xyz=False))\n\n    self = build_backbone(cfg)\n    self.cuda()\n    assert self.SA_modules[0].mlps[0].layer0.conv.in_channels == 4\n    assert self.SA_modules[0].mlps[0].layer0.conv.out_channels == 8\n    assert self.SA_modules[0].mlps[1].layer1.conv.out_channels == 8\n    assert self.SA_modules[2].mlps[2].layer2.conv.out_channels == 64\n\n    xyz = np.fromfile('tests/data/sunrgbd/points/000001.bin', dtype=np.float32)\n    xyz = torch.from_numpy(xyz).view(1, -1, 6).cuda()  # (B, N, 6)\n    # test forward\n    ret_dict = self(xyz[:, :, :4])\n    sa_xyz = ret_dict['sa_xyz'][-1]\n    sa_features = ret_dict['sa_features'][-1]\n    sa_indices = ret_dict['sa_indices'][-1]\n\n    assert sa_xyz.shape == torch.Size([1, 64, 3])\n    assert sa_features.shape == torch.Size([1, 64, 64])\n    assert sa_indices.shape == torch.Size([1, 64])\n\n    # out_indices should smaller than the length of SA Modules.\n    with pytest.raises(AssertionError):\n        build_backbone(\n            dict(\n                type='PointNet2SAMSG',\n                in_channels=4,\n                num_points=(256, 64, (32, 32)),\n                radii=((0.2, 0.4, 0.8), (0.4, 0.8, 1.6), (1.6, 3.2, 4.8)),\n                num_samples=((8, 8, 16), (8, 8, 16), (8, 8, 8)),\n                sa_channels=(((8, 8, 16), (8, 8, 16), (8, 8, 16)),\n                             ((16, 16, 32), (16, 16, 32), (16, 24, 32)),\n                             ((32, 32, 64), (32, 24, 64), (32, 64, 64))),\n                aggregation_channels=(16, 32, 64),\n                fps_mods=(('D-FPS'), ('FS'), ('F-FPS', 'D-FPS')),\n                fps_sample_range_lists=((-1), (-1), (64, -1)),\n                out_indices=(2, 3),\n                norm_cfg=dict(type='BN2d'),\n                sa_cfg=dict(\n                    type='PointSAModuleMSG',\n                    pool_mod='max',\n                    use_xyz=True,\n                    normalize_xyz=False)))\n"
  },
  {
    "path": "tests/test_models/test_common_modules/test_middle_encoders.py",
    "content": "import pytest\nimport torch\n\nfrom mmdet3d.models.builder import build_middle_encoder\n\n\ndef test_sparse_encoder():\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and torch+cuda')\n    sparse_encoder_cfg = dict(\n        type='SparseEncoder',\n        in_channels=5,\n        sparse_shape=[40, 1024, 1024],\n        order=('conv', 'norm', 'act'),\n        encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128,\n                                                                      128)),\n        encoder_paddings=((1, 1, 1), (1, 1, 1), (1, 1, 1), (1, 1, 1), (1, 1,\n                                                                       1)),\n        block_type='basicblock')\n\n    sparse_encoder = build_middle_encoder(sparse_encoder_cfg).cuda()\n    voxel_features = torch.rand([207842, 5]).cuda()\n    coors = torch.randint(0, 4, [207842, 4]).cuda()\n\n    ret = sparse_encoder(voxel_features, coors, 4)\n    assert ret.shape == torch.Size([4, 256, 128, 128])\n"
  },
  {
    "path": "tests/test_models/test_common_modules/test_pointnet_modules.py",
    "content": "import numpy as np\nimport pytest\nimport torch\n\n\ndef test_pointnet_sa_module_msg():\n    if not torch.cuda.is_available():\n        pytest.skip()\n    from mmdet3d.ops import PointSAModuleMSG\n\n    self = PointSAModuleMSG(\n        num_point=16,\n        radii=[0.2, 0.4],\n        sample_nums=[4, 8],\n        mlp_channels=[[12, 16], [12, 32]],\n        norm_cfg=dict(type='BN2d'),\n        use_xyz=False,\n        pool_mod='max').cuda()\n\n    assert self.mlps[0].layer0.conv.in_channels == 12\n    assert self.mlps[0].layer0.conv.out_channels == 16\n    assert self.mlps[1].layer0.conv.in_channels == 12\n    assert self.mlps[1].layer0.conv.out_channels == 32\n\n    xyz = np.fromfile('tests/data/sunrgbd/points/000001.bin', np.float32)\n\n    # (B, N, 3)\n    xyz = torch.from_numpy(xyz).view(1, -1, 3).cuda()\n    # (B, C, N)\n    features = xyz.repeat([1, 1, 4]).transpose(1, 2).contiguous().cuda()\n\n    # test forward\n    new_xyz, new_features, inds = self(xyz, features)\n    assert new_xyz.shape == torch.Size([1, 16, 3])\n    assert new_features.shape == torch.Size([1, 48, 16])\n    assert inds.shape == torch.Size([1, 16])\n\n    # test D-FPS mod\n    self = PointSAModuleMSG(\n        num_point=16,\n        radii=[0.2, 0.4],\n        sample_nums=[4, 8],\n        mlp_channels=[[12, 16], [12, 32]],\n        norm_cfg=dict(type='BN2d'),\n        use_xyz=False,\n        pool_mod='max',\n        fps_mod=['D-FPS'],\n        fps_sample_range_list=[-1]).cuda()\n\n    # test forward\n    new_xyz, new_features, inds = self(xyz, features)\n    assert new_xyz.shape == torch.Size([1, 16, 3])\n    assert new_features.shape == torch.Size([1, 48, 16])\n    assert inds.shape == torch.Size([1, 16])\n\n    # test F-FPS mod\n    self = PointSAModuleMSG(\n        num_point=16,\n        radii=[0.2, 0.4],\n        sample_nums=[4, 8],\n        mlp_channels=[[12, 16], [12, 32]],\n        norm_cfg=dict(type='BN2d'),\n        use_xyz=False,\n        pool_mod='max',\n        fps_mod=['F-FPS'],\n        fps_sample_range_list=[-1]).cuda()\n\n    # test forward\n    new_xyz, new_features, inds = self(xyz, features)\n    assert new_xyz.shape == torch.Size([1, 16, 3])\n    assert new_features.shape == torch.Size([1, 48, 16])\n    assert inds.shape == torch.Size([1, 16])\n\n    # test FS mod\n    self = PointSAModuleMSG(\n        num_point=8,\n        radii=[0.2, 0.4],\n        sample_nums=[4, 8],\n        mlp_channels=[[12, 16], [12, 32]],\n        norm_cfg=dict(type='BN2d'),\n        use_xyz=False,\n        pool_mod='max',\n        fps_mod=['FS'],\n        fps_sample_range_list=[-1]).cuda()\n\n    # test forward\n    new_xyz, new_features, inds = self(xyz, features)\n    assert new_xyz.shape == torch.Size([1, 16, 3])\n    assert new_features.shape == torch.Size([1, 48, 16])\n    assert inds.shape == torch.Size([1, 16])\n\n    # test using F-FPS mod and D-FPS mod simultaneously\n    self = PointSAModuleMSG(\n        num_point=[8, 12],\n        radii=[0.2, 0.4],\n        sample_nums=[4, 8],\n        mlp_channels=[[12, 16], [12, 32]],\n        norm_cfg=dict(type='BN2d'),\n        use_xyz=False,\n        pool_mod='max',\n        fps_mod=['F-FPS', 'D-FPS'],\n        fps_sample_range_list=[64, -1]).cuda()\n\n    # test forward\n    new_xyz, new_features, inds = self(xyz, features)\n    assert new_xyz.shape == torch.Size([1, 20, 3])\n    assert new_features.shape == torch.Size([1, 48, 20])\n    assert inds.shape == torch.Size([1, 20])\n\n    # length of 'fps_mod' should be same as 'fps_sample_range_list'\n    with pytest.raises(AssertionError):\n        PointSAModuleMSG(\n            num_point=8,\n            radii=[0.2, 0.4],\n            sample_nums=[4, 8],\n            mlp_channels=[[12, 16], [12, 32]],\n            norm_cfg=dict(type='BN2d'),\n            use_xyz=False,\n            pool_mod='max',\n            fps_mod=['F-FPS', 'D-FPS'],\n            fps_sample_range_list=[-1]).cuda()\n\n    # length of 'num_point' should be same as 'fps_sample_range_list'\n    with pytest.raises(AssertionError):\n        PointSAModuleMSG(\n            num_point=[8, 8],\n            radii=[0.2, 0.4],\n            sample_nums=[4, 8],\n            mlp_channels=[[12, 16], [12, 32]],\n            norm_cfg=dict(type='BN2d'),\n            use_xyz=False,\n            pool_mod='max',\n            fps_mod=['F-FPS'],\n            fps_sample_range_list=[-1]).cuda()\n\n\ndef test_pointnet_sa_module():\n    if not torch.cuda.is_available():\n        pytest.skip()\n    from mmdet3d.ops import build_sa_module\n    sa_cfg = dict(\n        type='PointSAModule',\n        num_point=16,\n        radius=0.2,\n        num_sample=8,\n        mlp_channels=[12, 32],\n        norm_cfg=dict(type='BN2d'),\n        use_xyz=True,\n        pool_mod='max')\n    self = build_sa_module(sa_cfg).cuda()\n\n    assert self.mlps[0].layer0.conv.in_channels == 15\n    assert self.mlps[0].layer0.conv.out_channels == 32\n\n    xyz = np.fromfile('tests/data/sunrgbd/points/000001.bin', np.float32)\n\n    # (B, N, 3)\n    xyz = torch.from_numpy(xyz[..., :3]).view(1, -1, 3).cuda()\n    # (B, C, N)\n    features = xyz.repeat([1, 1, 4]).transpose(1, 2).contiguous().cuda()\n\n    # test forward\n    new_xyz, new_features, inds = self(xyz, features)\n    assert new_xyz.shape == torch.Size([1, 16, 3])\n    assert new_features.shape == torch.Size([1, 32, 16])\n    assert inds.shape == torch.Size([1, 16])\n\n\ndef test_pointnet_fp_module():\n    if not torch.cuda.is_available():\n        pytest.skip()\n    from mmdet3d.ops import PointFPModule\n\n    self = PointFPModule(mlp_channels=[24, 16]).cuda()\n    assert self.mlps.layer0.conv.in_channels == 24\n    assert self.mlps.layer0.conv.out_channels == 16\n\n    xyz = np.fromfile('tests/data/sunrgbd/points/000001.bin',\n                      np.float32).reshape((-1, 6))\n\n    # (B, N, 3)\n    xyz1 = torch.from_numpy(xyz[0::2, :3]).view(1, -1, 3).cuda()\n    # (B, C1, N)\n    features1 = xyz1.repeat([1, 1, 4]).transpose(1, 2).contiguous().cuda()\n\n    # (B, M, 3)\n    xyz2 = torch.from_numpy(xyz[1::3, :3]).view(1, -1, 3).cuda()\n    # (B, C2, N)\n    features2 = xyz2.repeat([1, 1, 4]).transpose(1, 2).contiguous().cuda()\n\n    fp_features = self(xyz1, xyz2, features1, features2)\n    assert fp_features.shape == torch.Size([1, 16, 50])\n"
  },
  {
    "path": "tests/test_models/test_common_modules/test_pointnet_ops.py",
    "content": "import pytest\nimport torch\n\nfrom mmdet3d.ops import (ball_query, furthest_point_sample,\n                         furthest_point_sample_with_dist, gather_points,\n                         grouping_operation, knn, three_interpolate, three_nn)\n\n\ndef test_fps():\n    if not torch.cuda.is_available():\n        pytest.skip()\n    xyz = torch.tensor([[[-0.2748, 1.0020, -1.1674], [0.1015, 1.3952, -1.2681],\n                         [-0.8070, 2.4137,\n                          -0.5845], [-1.0001, 2.1982, -0.5859],\n                         [0.3841, 1.8983, -0.7431]],\n                        [[-1.0696, 3.0758,\n                          -0.1899], [-0.2559, 3.5521, -0.1402],\n                         [0.8164, 4.0081, -0.1839], [-1.1000, 3.0213, -0.8205],\n                         [-0.0518, 3.7251, -0.3950]]]).cuda()\n\n    idx = furthest_point_sample(xyz, 3)\n    expected_idx = torch.tensor([[0, 2, 4], [0, 2, 1]]).cuda()\n    assert torch.all(idx == expected_idx)\n\n\ndef test_ball_query():\n    if not torch.cuda.is_available():\n        pytest.skip()\n    new_xyz = torch.tensor([[[-0.0740, 1.3147, -1.3625],\n                             [-2.2769, 2.7817, -0.2334],\n                             [-0.4003, 2.4666, -0.5116],\n                             [-0.0740, 1.3147, -1.3625],\n                             [-0.0740, 1.3147, -1.3625]],\n                            [[-2.0289, 2.4952, -0.1708],\n                             [-2.0668, 6.0278, -0.4875],\n                             [0.4066, 1.4211, -0.2947],\n                             [-2.0289, 2.4952, -0.1708],\n                             [-2.0289, 2.4952, -0.1708]]]).cuda()\n\n    xyz = torch.tensor([[[-0.0740, 1.3147, -1.3625], [0.5555, 1.0399, -1.3634],\n                         [-0.4003, 2.4666,\n                          -0.5116], [-0.5251, 2.4379, -0.8466],\n                         [-0.9691, 1.1418,\n                          -1.3733], [-0.2232, 0.9561, -1.3626],\n                         [-2.2769, 2.7817, -0.2334],\n                         [-0.2822, 1.3192, -1.3645], [0.1533, 1.5024, -1.0432],\n                         [0.4917, 1.1529, -1.3496]],\n                        [[-2.0289, 2.4952,\n                          -0.1708], [-0.7188, 0.9956, -0.5096],\n                         [-2.0668, 6.0278, -0.4875], [-1.9304, 3.3092, 0.6610],\n                         [0.0949, 1.4332, 0.3140], [-1.2879, 2.0008, -0.7791],\n                         [-0.7252, 0.9611, -0.6371], [0.4066, 1.4211, -0.2947],\n                         [0.3220, 1.4447, 0.3548], [-0.9744, 2.3856,\n                                                    -1.2000]]]).cuda()\n\n    idx = ball_query(0, 0.2, 5, xyz, new_xyz)\n    expected_idx = torch.tensor([[[0, 0, 0, 0, 0], [6, 6, 6, 6, 6],\n                                  [2, 2, 2, 2, 2], [0, 0, 0, 0, 0],\n                                  [0, 0, 0, 0, 0]],\n                                 [[0, 0, 0, 0, 0], [2, 2, 2, 2, 2],\n                                  [7, 7, 7, 7, 7], [0, 0, 0, 0, 0],\n                                  [0, 0, 0, 0, 0]]]).cuda()\n    assert torch.all(idx == expected_idx)\n\n    # test dilated ball query\n    idx = ball_query(0.2, 0.4, 5, xyz, new_xyz)\n    expected_idx = torch.tensor([[[0, 5, 7, 0, 0], [6, 6, 6, 6, 6],\n                                  [2, 3, 2, 2, 2], [0, 5, 7, 0, 0],\n                                  [0, 5, 7, 0, 0]],\n                                 [[0, 0, 0, 0, 0], [2, 2, 2, 2, 2],\n                                  [7, 7, 7, 7, 7], [0, 0, 0, 0, 0],\n                                  [0, 0, 0, 0, 0]]]).cuda()\n    assert torch.all(idx == expected_idx)\n\n\ndef test_knn():\n    if not torch.cuda.is_available():\n        pytest.skip()\n    new_xyz = torch.tensor([[[-0.0740, 1.3147, -1.3625],\n                             [-2.2769, 2.7817, -0.2334],\n                             [-0.4003, 2.4666, -0.5116],\n                             [-0.0740, 1.3147, -1.3625],\n                             [-0.0740, 1.3147, -1.3625]],\n                            [[-2.0289, 2.4952, -0.1708],\n                             [-2.0668, 6.0278, -0.4875],\n                             [0.4066, 1.4211, -0.2947],\n                             [-2.0289, 2.4952, -0.1708],\n                             [-2.0289, 2.4952, -0.1708]]]).cuda()\n\n    xyz = torch.tensor([[[-0.0740, 1.3147, -1.3625], [0.5555, 1.0399, -1.3634],\n                         [-0.4003, 2.4666,\n                          -0.5116], [-0.5251, 2.4379, -0.8466],\n                         [-0.9691, 1.1418,\n                          -1.3733], [-0.2232, 0.9561, -1.3626],\n                         [-2.2769, 2.7817, -0.2334],\n                         [-0.2822, 1.3192, -1.3645], [0.1533, 1.5024, -1.0432],\n                         [0.4917, 1.1529, -1.3496]],\n                        [[-2.0289, 2.4952,\n                          -0.1708], [-0.7188, 0.9956, -0.5096],\n                         [-2.0668, 6.0278, -0.4875], [-1.9304, 3.3092, 0.6610],\n                         [0.0949, 1.4332, 0.3140], [-1.2879, 2.0008, -0.7791],\n                         [-0.7252, 0.9611, -0.6371], [0.4066, 1.4211, -0.2947],\n                         [0.3220, 1.4447, 0.3548], [-0.9744, 2.3856,\n                                                    -1.2000]]]).cuda()\n\n    idx = knn(5, xyz, new_xyz)\n    new_xyz_ = new_xyz.unsqueeze(2).repeat(1, 1, xyz.shape[1], 1)\n    xyz_ = xyz.unsqueeze(1).repeat(1, new_xyz.shape[1], 1, 1)\n    dist = ((new_xyz_ - xyz_) * (new_xyz_ - xyz_)).sum(-1)\n    expected_idx = dist.topk(k=5, dim=2, largest=False)[1].transpose(2, 1)\n    assert torch.all(idx == expected_idx)\n\n    idx = knn(5,\n              xyz.transpose(1, 2).contiguous(),\n              new_xyz.transpose(1, 2).contiguous(), True)\n    assert torch.all(idx == expected_idx)\n\n    idx = knn(5, xyz, xyz)\n    xyz_ = xyz.unsqueeze(2).repeat(1, 1, xyz.shape[1], 1)\n    xyz__ = xyz.unsqueeze(1).repeat(1, xyz.shape[1], 1, 1)\n    dist = ((xyz_ - xyz__) * (xyz_ - xyz__)).sum(-1)\n    expected_idx = dist.topk(k=5, dim=2, largest=False)[1].transpose(2, 1)\n    assert torch.all(idx == expected_idx)\n\n\ndef test_grouping_points():\n    if not torch.cuda.is_available():\n        pytest.skip()\n    idx = torch.tensor([[[0, 0, 0], [3, 3, 3], [8, 8, 8], [0, 0, 0], [0, 0, 0],\n                         [0, 0, 0]],\n                        [[0, 0, 0], [6, 6, 6], [9, 9, 9], [0, 0, 0], [0, 0, 0],\n                         [0, 0, 0]]]).int().cuda()\n    festures = torch.tensor([[[\n        0.5798, -0.7981, -0.9280, -1.3311, 1.3687, 0.9277, -0.4164, -1.8274,\n        0.9268, 0.8414\n    ],\n                              [\n                                  5.4247, 1.5113, 2.3944, 1.4740, 5.0300,\n                                  5.1030, 1.9360, 2.1939, 2.1581, 3.4666\n                              ],\n                              [\n                                  -1.6266, -1.0281, -1.0393, -1.6931, -1.3982,\n                                  -0.5732, -1.0830, -1.7561, -1.6786, -1.6967\n                              ]],\n                             [[\n                                 -0.0380, -0.1880, -1.5724, 0.6905, -0.3190,\n                                 0.7798, -0.3693, -0.9457, -0.2942, -1.8527\n                             ],\n                              [\n                                  1.1773, 1.5009, 2.6399, 5.9242, 1.0962,\n                                  2.7346, 6.0865, 1.5555, 4.3303, 2.8229\n                              ],\n                              [\n                                  -0.6646, -0.6870, -0.1125, -0.2224, -0.3445,\n                                  -1.4049, 0.4990, -0.7037, -0.9924, 0.0386\n                              ]]]).cuda()\n\n    output = grouping_operation(festures, idx)\n    expected_output = torch.tensor([[[[0.5798, 0.5798, 0.5798],\n                                      [-1.3311, -1.3311, -1.3311],\n                                      [0.9268, 0.9268, 0.9268],\n                                      [0.5798, 0.5798, 0.5798],\n                                      [0.5798, 0.5798, 0.5798],\n                                      [0.5798, 0.5798, 0.5798]],\n                                     [[5.4247, 5.4247, 5.4247],\n                                      [1.4740, 1.4740, 1.4740],\n                                      [2.1581, 2.1581, 2.1581],\n                                      [5.4247, 5.4247, 5.4247],\n                                      [5.4247, 5.4247, 5.4247],\n                                      [5.4247, 5.4247, 5.4247]],\n                                     [[-1.6266, -1.6266, -1.6266],\n                                      [-1.6931, -1.6931, -1.6931],\n                                      [-1.6786, -1.6786, -1.6786],\n                                      [-1.6266, -1.6266, -1.6266],\n                                      [-1.6266, -1.6266, -1.6266],\n                                      [-1.6266, -1.6266, -1.6266]]],\n                                    [[[-0.0380, -0.0380, -0.0380],\n                                      [-0.3693, -0.3693, -0.3693],\n                                      [-1.8527, -1.8527, -1.8527],\n                                      [-0.0380, -0.0380, -0.0380],\n                                      [-0.0380, -0.0380, -0.0380],\n                                      [-0.0380, -0.0380, -0.0380]],\n                                     [[1.1773, 1.1773, 1.1773],\n                                      [6.0865, 6.0865, 6.0865],\n                                      [2.8229, 2.8229, 2.8229],\n                                      [1.1773, 1.1773, 1.1773],\n                                      [1.1773, 1.1773, 1.1773],\n                                      [1.1773, 1.1773, 1.1773]],\n                                     [[-0.6646, -0.6646, -0.6646],\n                                      [0.4990, 0.4990, 0.4990],\n                                      [0.0386, 0.0386, 0.0386],\n                                      [-0.6646, -0.6646, -0.6646],\n                                      [-0.6646, -0.6646, -0.6646],\n                                      [-0.6646, -0.6646, -0.6646]]]]).cuda()\n    assert torch.allclose(output, expected_output)\n\n\ndef test_gather_points():\n    if not torch.cuda.is_available():\n        pytest.skip()\n    features = torch.tensor([[[\n        -1.6095, -0.1029, -0.8876, -1.2447, -2.4031, 0.3708, -1.1586, -1.4967,\n        -0.4800, 0.2252\n    ],\n                              [\n                                  1.9138, 3.4979, 1.6854, 1.5631, 3.6776,\n                                  3.1154, 2.1705, 2.5221, 2.0411, 3.1446\n                              ],\n                              [\n                                  -1.4173, 0.3073, -1.4339, -1.4340, -1.2770,\n                                  -0.2867, -1.4162, -1.4044, -1.4245, -1.4074\n                              ]],\n                             [[\n                                 0.2160, 0.0842, 0.3661, -0.2749, -0.4909,\n                                 -0.6066, -0.8773, -0.0745, -0.9496, 0.1434\n                             ],\n                              [\n                                  1.3644, 1.8087, 1.6855, 1.9563, 1.2746,\n                                  1.9662, 0.9566, 1.8778, 1.1437, 1.3639\n                              ],\n                              [\n                                  -0.7172, 0.1692, 0.2241, 0.0721, -0.7540,\n                                  0.0462, -0.6227, 0.3223, -0.6944, -0.5294\n                              ]]]).cuda()\n\n    idx = torch.tensor([[0, 1, 4, 0, 0, 0], [0, 5, 6, 0, 0, 0]]).int().cuda()\n\n    output = gather_points(features, idx)\n    expected_output = torch.tensor(\n        [[[-1.6095, -0.1029, -2.4031, -1.6095, -1.6095, -1.6095],\n          [1.9138, 3.4979, 3.6776, 1.9138, 1.9138, 1.9138],\n          [-1.4173, 0.3073, -1.2770, -1.4173, -1.4173, -1.4173]],\n         [[0.2160, -0.6066, -0.8773, 0.2160, 0.2160, 0.2160],\n          [1.3644, 1.9662, 0.9566, 1.3644, 1.3644, 1.3644],\n          [-0.7172, 0.0462, -0.6227, -0.7172, -0.7172, -0.7172]]]).cuda()\n\n    assert torch.allclose(output, expected_output)\n\n\ndef test_three_interpolate():\n    if not torch.cuda.is_available():\n        pytest.skip()\n    features = torch.tensor([[[2.4350, 4.7516, 4.4995, 2.4350, 2.4350, 2.4350],\n                              [3.1236, 2.6278, 3.0447, 3.1236, 3.1236, 3.1236],\n                              [2.6732, 2.8677, 2.6436, 2.6732, 2.6732, 2.6732],\n                              [0.0124, 7.0150, 7.0199, 0.0124, 0.0124, 0.0124],\n                              [0.3207, 0.0000, 0.3411, 0.3207, 0.3207,\n                               0.3207]],\n                             [[0.0000, 0.9544, 2.4532, 0.0000, 0.0000, 0.0000],\n                              [0.5346, 1.9176, 1.4715, 0.5346, 0.5346, 0.5346],\n                              [0.0000, 0.2744, 2.0842, 0.0000, 0.0000, 0.0000],\n                              [0.3414, 1.5063, 1.6209, 0.3414, 0.3414, 0.3414],\n                              [0.5814, 0.0103, 0.0000, 0.5814, 0.5814,\n                               0.5814]]]).cuda()\n\n    idx = torch.tensor([[[0, 1, 2], [2, 3, 4], [2, 3, 4], [0, 1, 2], [0, 1, 2],\n                         [0, 1, 3]],\n                        [[0, 2, 3], [1, 3, 4], [2, 1, 4], [0, 2, 4], [0, 2, 4],\n                         [0, 1, 2]]]).int().cuda()\n\n    weight = torch.tensor([[[3.3333e-01, 3.3333e-01, 3.3333e-01],\n                            [1.0000e+00, 5.8155e-08, 2.2373e-08],\n                            [1.0000e+00, 1.7737e-08, 1.7356e-08],\n                            [3.3333e-01, 3.3333e-01, 3.3333e-01],\n                            [3.3333e-01, 3.3333e-01, 3.3333e-01],\n                            [3.3333e-01, 3.3333e-01, 3.3333e-01]],\n                           [[3.3333e-01, 3.3333e-01, 3.3333e-01],\n                            [1.0000e+00, 1.3651e-08, 7.7312e-09],\n                            [1.0000e+00, 1.7148e-08, 1.4070e-08],\n                            [3.3333e-01, 3.3333e-01, 3.3333e-01],\n                            [3.3333e-01, 3.3333e-01, 3.3333e-01],\n                            [3.3333e-01, 3.3333e-01, 3.3333e-01]]]).cuda()\n\n    output = three_interpolate(features, idx, weight)\n    expected_output = torch.tensor([[[\n        3.8953e+00, 4.4995e+00, 4.4995e+00, 3.8953e+00, 3.8953e+00, 3.2072e+00\n    ], [\n        2.9320e+00, 3.0447e+00, 3.0447e+00, 2.9320e+00, 2.9320e+00, 2.9583e+00\n    ], [\n        2.7281e+00, 2.6436e+00, 2.6436e+00, 2.7281e+00, 2.7281e+00, 2.7380e+00\n    ], [\n        4.6824e+00, 7.0199e+00, 7.0199e+00, 4.6824e+00, 4.6824e+00, 2.3466e+00\n    ], [\n        2.2060e-01, 3.4110e-01, 3.4110e-01, 2.2060e-01, 2.2060e-01, 2.1380e-01\n    ]],\n                                    [[\n                                        8.1773e-01, 9.5440e-01, 2.4532e+00,\n                                        8.1773e-01, 8.1773e-01, 1.1359e+00\n                                    ],\n                                     [\n                                         8.4689e-01, 1.9176e+00, 1.4715e+00,\n                                         8.4689e-01, 8.4689e-01, 1.3079e+00\n                                     ],\n                                     [\n                                         6.9473e-01, 2.7440e-01, 2.0842e+00,\n                                         6.9473e-01, 6.9473e-01, 7.8619e-01\n                                     ],\n                                     [\n                                         7.6789e-01, 1.5063e+00, 1.6209e+00,\n                                         7.6789e-01, 7.6789e-01, 1.1562e+00\n                                     ],\n                                     [\n                                         3.8760e-01, 1.0300e-02, 8.3569e-09,\n                                         3.8760e-01, 3.8760e-01, 1.9723e-01\n                                     ]]]).cuda()\n\n    assert torch.allclose(output, expected_output, 1e-4)\n\n\ndef test_three_nn():\n    if not torch.cuda.is_available():\n        pytest.skip()\n    known = torch.tensor([[[-1.8373, 3.5605,\n                            -0.7867], [0.7615, 2.9420, 0.2314],\n                           [-0.6503, 3.6637, -1.0622],\n                           [-1.8373, 3.5605, -0.7867],\n                           [-1.8373, 3.5605, -0.7867]],\n                          [[-1.3399, 1.9991, -0.3698],\n                           [-0.0799, 0.9698,\n                            -0.8457], [0.0858, 2.4721, -0.1928],\n                           [-1.3399, 1.9991, -0.3698],\n                           [-1.3399, 1.9991, -0.3698]]]).cuda()\n\n    unknown = torch.tensor([[[-1.8373, 3.5605, -0.7867],\n                             [0.7615, 2.9420, 0.2314],\n                             [-0.6503, 3.6637, -1.0622],\n                             [-1.5237, 2.3976, -0.8097],\n                             [-0.0722, 3.4017, -0.2880],\n                             [0.5198, 3.0661, -0.4605],\n                             [-2.0185, 3.5019, -0.3236],\n                             [0.5098, 3.1020, 0.5799],\n                             [-1.6137, 3.8443, -0.5269],\n                             [0.7341, 2.9626, -0.3189]],\n                            [[-1.3399, 1.9991, -0.3698],\n                             [-0.0799, 0.9698, -0.8457],\n                             [0.0858, 2.4721, -0.1928],\n                             [-0.9022, 1.6560, -1.3090],\n                             [0.1156, 1.6901, -0.4366],\n                             [-0.6477, 2.3576, -0.1563],\n                             [-0.8482, 1.1466, -1.2704],\n                             [-0.8753, 2.0845, -0.3460],\n                             [-0.5621, 1.4233, -1.2858],\n                             [-0.5883, 1.3114, -1.2899]]]).cuda()\n\n    dist, idx = three_nn(unknown, known)\n    expected_dist = torch.tensor([[[0.0000, 0.0000, 0.0000],\n                                   [0.0000, 2.0463, 2.8588],\n                                   [0.0000, 1.2229, 1.2229],\n                                   [1.2047, 1.2047, 1.2047],\n                                   [1.0011, 1.0845, 1.8411],\n                                   [0.7433, 1.4451, 2.4304],\n                                   [0.5007, 0.5007, 0.5007],\n                                   [0.4587, 2.0875, 2.7544],\n                                   [0.4450, 0.4450, 0.4450],\n                                   [0.5514, 1.7206, 2.6811]],\n                                  [[0.0000, 0.0000, 0.0000],\n                                   [0.0000, 1.6464, 1.6952],\n                                   [0.0000, 1.5125, 1.5125],\n                                   [1.0915, 1.0915, 1.0915],\n                                   [0.8197, 0.8511, 1.4894],\n                                   [0.7433, 0.8082, 0.8082],\n                                   [0.8955, 1.3340, 1.3340],\n                                   [0.4730, 0.4730, 0.4730],\n                                   [0.7949, 1.3325, 1.3325],\n                                   [0.7566, 1.3727, 1.3727]]]).cuda()\n    expected_idx = torch.tensor([[[0, 3, 4], [1, 2, 0], [2, 0, 3], [0, 3, 4],\n                                  [2, 1, 0], [1, 2, 0], [0, 3, 4], [1, 2, 0],\n                                  [0, 3, 4], [1, 2, 0]],\n                                 [[0, 3, 4], [1, 2, 0], [2, 0, 3], [0, 3, 4],\n                                  [2, 1, 0], [2, 0, 3], [1, 0, 3], [0, 3, 4],\n                                  [1, 0, 3], [1, 0, 3]]]).cuda()\n\n    assert torch.allclose(dist, expected_dist, 1e-4)\n    assert torch.all(idx == expected_idx)\n\n\ndef test_fps_with_dist():\n    if not torch.cuda.is_available():\n        pytest.skip()\n    xyz = torch.tensor([[[-0.2748, 1.0020, -1.1674], [0.1015, 1.3952, -1.2681],\n                         [-0.8070, 2.4137,\n                          -0.5845], [-1.0001, 2.1982, -0.5859],\n                         [0.3841, 1.8983, -0.7431]],\n                        [[-1.0696, 3.0758,\n                          -0.1899], [-0.2559, 3.5521, -0.1402],\n                         [0.8164, 4.0081, -0.1839], [-1.1000, 3.0213, -0.8205],\n                         [-0.0518, 3.7251, -0.3950]]]).cuda()\n\n    expected_idx = torch.tensor([[0, 2, 4], [0, 2, 1]]).cuda()\n    xyz_square_dist = ((xyz.unsqueeze(dim=1) -\n                        xyz.unsqueeze(dim=2))**2).sum(-1)\n    idx = furthest_point_sample_with_dist(xyz_square_dist, 3)\n    assert torch.all(idx == expected_idx)\n\n    import numpy as np\n    fps_idx = np.load('tests/data/ops/fps_idx.npy')\n    features_for_fps_distance = np.load(\n        'tests/data/ops/features_for_fps_distance.npy')\n    expected_idx = torch.from_numpy(fps_idx).cuda()\n    features_for_fps_distance = torch.from_numpy(\n        features_for_fps_distance).cuda()\n\n    idx = furthest_point_sample_with_dist(features_for_fps_distance, 16)\n    assert torch.all(idx == expected_idx)\n"
  },
  {
    "path": "tests/test_models/test_common_modules/test_roiaware_pool3d.py",
    "content": "import pytest\nimport torch\n\nfrom mmdet3d.ops.roiaware_pool3d import (RoIAwarePool3d, points_in_boxes_batch,\n                                         points_in_boxes_cpu,\n                                         points_in_boxes_gpu)\n\n\ndef test_RoIAwarePool3d():\n    # RoIAwarePool3d only support gpu version currently.\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and torch+cuda')\n    roiaware_pool3d_max = RoIAwarePool3d(\n        out_size=4, max_pts_per_voxel=128, mode='max')\n    roiaware_pool3d_avg = RoIAwarePool3d(\n        out_size=4, max_pts_per_voxel=128, mode='avg')\n    rois = torch.tensor(\n        [[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 0.3],\n         [-10.0, 23.0, 16.0, 10, 20, 20, 0.5]],\n        dtype=torch.float32).cuda(\n        )  # boxes (m, 7) with bottom center in lidar coordinate\n    pts = torch.tensor(\n        [[1, 2, 3.3], [1.2, 2.5, 3.0], [0.8, 2.1, 3.5], [1.6, 2.6, 3.6],\n         [0.8, 1.2, 3.9], [-9.2, 21.0, 18.2], [3.8, 7.9, 6.3],\n         [4.7, 3.5, -12.2], [3.8, 7.6, -2], [-10.6, -12.9, -20], [-16, -18, 9],\n         [-21.3, -52, -5], [0, 0, 0], [6, 7, 8], [-2, -3, -4]],\n        dtype=torch.float32).cuda()  # points (n, 3) in lidar coordinate\n    pts_feature = pts.clone()\n\n    pooled_features_max = roiaware_pool3d_max(\n        rois=rois, pts=pts, pts_feature=pts_feature)\n    assert pooled_features_max.shape == torch.Size([2, 4, 4, 4, 3])\n    assert torch.allclose(pooled_features_max.sum(),\n                          torch.tensor(51.100).cuda(), 1e-3)\n\n    pooled_features_avg = roiaware_pool3d_avg(\n        rois=rois, pts=pts, pts_feature=pts_feature)\n    assert pooled_features_avg.shape == torch.Size([2, 4, 4, 4, 3])\n    assert torch.allclose(pooled_features_avg.sum(),\n                          torch.tensor(49.750).cuda(), 1e-3)\n\n\ndef test_points_in_boxes_gpu():\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and torch+cuda')\n    boxes = torch.tensor(\n        [[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 0.3]],\n         [[-10.0, 23.0, 16.0, 10, 20, 20, 0.5]]],\n        dtype=torch.float32).cuda(\n        )  # boxes (b, t, 7) with bottom center in lidar coordinate\n    pts = torch.tensor(\n        [[[1, 2, 3.3], [1.2, 2.5, 3.0], [0.8, 2.1, 3.5], [1.6, 2.6, 3.6],\n          [0.8, 1.2, 3.9], [-9.2, 21.0, 18.2], [3.8, 7.9, 6.3],\n          [4.7, 3.5, -12.2]],\n         [[3.8, 7.6, -2], [-10.6, -12.9, -20], [-16, -18, 9], [-21.3, -52, -5],\n          [0, 0, 0], [6, 7, 8], [-2, -3, -4], [6, 4, 9]]],\n        dtype=torch.float32).cuda()  # points (b, m, 3) in lidar coordinate\n\n    point_indices = points_in_boxes_gpu(points=pts, boxes=boxes)\n    expected_point_indices = torch.tensor(\n        [[0, 0, 0, 0, 0, -1, -1, -1], [-1, -1, -1, -1, -1, -1, -1, -1]],\n        dtype=torch.int32).cuda()\n    assert point_indices.shape == torch.Size([2, 8])\n    assert (point_indices == expected_point_indices).all()\n\n    if torch.cuda.device_count() > 1:\n        pts = pts.to('cuda:1')\n        boxes = boxes.to('cuda:1')\n        expected_point_indices = expected_point_indices.to('cuda:1')\n        point_indices = points_in_boxes_gpu(points=pts, boxes=boxes)\n        assert point_indices.shape == torch.Size([2, 8])\n        assert (point_indices == expected_point_indices).all()\n\n\ndef test_points_in_boxes_cpu():\n    boxes = torch.tensor(\n        [[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 0.3],\n         [-10.0, 23.0, 16.0, 10, 20, 20, 0.5]],\n        dtype=torch.float32\n    )  # boxes (m, 7) with bottom center in lidar coordinate\n    pts = torch.tensor(\n        [[1, 2, 3.3], [1.2, 2.5, 3.0], [0.8, 2.1, 3.5], [1.6, 2.6, 3.6],\n         [0.8, 1.2, 3.9], [-9.2, 21.0, 18.2], [3.8, 7.9, 6.3],\n         [4.7, 3.5, -12.2], [3.8, 7.6, -2], [-10.6, -12.9, -20], [-16, -18, 9],\n         [-21.3, -52, -5], [0, 0, 0], [6, 7, 8], [-2, -3, -4]],\n        dtype=torch.float32)  # points (n, 3) in lidar coordinate\n\n    point_indices = points_in_boxes_cpu(points=pts, boxes=boxes)\n    expected_point_indices = torch.tensor(\n        [[1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n         [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]],\n        dtype=torch.int32)\n    assert point_indices.shape == torch.Size([2, 15])\n    assert (point_indices == expected_point_indices).all()\n\n\ndef test_points_in_boxes_batch():\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and torch+cuda')\n\n    boxes = torch.tensor(\n        [[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 0.3],\n          [-10.0, 23.0, 16.0, 10, 20, 20, 0.5]]],\n        dtype=torch.float32).cuda(\n        )  # boxes (m, 7) with bottom center in lidar coordinate\n    pts = torch.tensor(\n        [[[1, 2, 3.3], [1.2, 2.5, 3.0], [0.8, 2.1, 3.5], [1.6, 2.6, 3.6],\n          [0.8, 1.2, 3.9], [-9.2, 21.0, 18.2], [3.8, 7.9, 6.3],\n          [4.7, 3.5, -12.2], [3.8, 7.6, -2], [-10.6, -12.9, -20], [\n              -16, -18, 9\n          ], [-21.3, -52, -5], [0, 0, 0], [6, 7, 8], [-2, -3, -4]]],\n        dtype=torch.float32).cuda()  # points (n, 3) in lidar coordinate\n\n    point_indices = points_in_boxes_batch(points=pts, boxes=boxes)\n    expected_point_indices = torch.tensor(\n        [[[1, 0], [1, 0], [1, 0], [1, 0], [1, 0], [0, 1], [0, 0], [0, 0],\n          [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]]],\n        dtype=torch.int32).cuda()\n    assert point_indices.shape == torch.Size([1, 15, 2])\n    assert (point_indices == expected_point_indices).all()\n\n    if torch.cuda.device_count() > 1:\n        pts = pts.to('cuda:1')\n        boxes = boxes.to('cuda:1')\n        expected_point_indices = expected_point_indices.to('cuda:1')\n        point_indices = points_in_boxes_batch(points=pts, boxes=boxes)\n        assert point_indices.shape == torch.Size([1, 15, 2])\n        assert (point_indices == expected_point_indices).all()\n"
  },
  {
    "path": "tests/test_models/test_common_modules/test_sparse_unet.py",
    "content": "import torch\n\nfrom mmdet3d.ops import SparseBasicBlock\nfrom mmdet3d.ops import spconv as spconv\n\n\ndef test_SparseUNet():\n    from mmdet3d.models.middle_encoders.sparse_unet import SparseUNet\n    self = SparseUNet(in_channels=4, sparse_shape=[41, 1600, 1408])\n\n    # test encoder layers\n    assert len(self.encoder_layers) == 4\n    assert self.encoder_layers.encoder_layer1[0][0].in_channels == 16\n    assert self.encoder_layers.encoder_layer1[0][0].out_channels == 16\n    assert isinstance(self.encoder_layers.encoder_layer1[0][0],\n                      spconv.conv.SubMConv3d)\n    assert isinstance(self.encoder_layers.encoder_layer1[0][1],\n                      torch.nn.modules.batchnorm.BatchNorm1d)\n    assert isinstance(self.encoder_layers.encoder_layer1[0][2],\n                      torch.nn.modules.activation.ReLU)\n    assert self.encoder_layers.encoder_layer4[0][0].in_channels == 64\n    assert self.encoder_layers.encoder_layer4[0][0].out_channels == 64\n    assert isinstance(self.encoder_layers.encoder_layer4[0][0],\n                      spconv.conv.SparseConv3d)\n    assert isinstance(self.encoder_layers.encoder_layer4[2][0],\n                      spconv.conv.SubMConv3d)\n\n    # test decoder layers\n    assert isinstance(self.lateral_layer1, SparseBasicBlock)\n    assert isinstance(self.merge_layer1[0], spconv.conv.SubMConv3d)\n    assert isinstance(self.upsample_layer1[0], spconv.conv.SubMConv3d)\n    assert isinstance(self.upsample_layer2[0], spconv.conv.SparseInverseConv3d)\n\n    voxel_features = torch.tensor([[6.56126, 0.9648336, -1.7339306, 0.315],\n                                   [6.8162713, -2.480431, -1.3616394, 0.36],\n                                   [11.643568, -4.744306, -1.3580885, 0.16],\n                                   [23.482342, 6.5036807, 0.5806964, 0.35]],\n                                  dtype=torch.float32)  # n, point_features\n    coordinates = torch.tensor(\n        [[0, 12, 819, 131], [0, 16, 750, 136], [1, 16, 705, 232],\n         [1, 35, 930, 469]],\n        dtype=torch.int32)  # n, 4(batch, ind_x, ind_y, ind_z)\n\n    unet_ret_dict = self.forward(voxel_features, coordinates, 2)\n    seg_features = unet_ret_dict['seg_features']\n    spatial_features = unet_ret_dict['spatial_features']\n\n    assert seg_features.shape == torch.Size([4, 16])\n    assert spatial_features.shape == torch.Size([2, 256, 200, 176])\n\n\ndef test_SparseBasicBlock():\n    voxel_features = torch.tensor([[6.56126, 0.9648336, -1.7339306, 0.315],\n                                   [6.8162713, -2.480431, -1.3616394, 0.36],\n                                   [11.643568, -4.744306, -1.3580885, 0.16],\n                                   [23.482342, 6.5036807, 0.5806964, 0.35]],\n                                  dtype=torch.float32)  # n, point_features\n    coordinates = torch.tensor(\n        [[0, 12, 819, 131], [0, 16, 750, 136], [1, 16, 705, 232],\n         [1, 35, 930, 469]],\n        dtype=torch.int32)  # n, 4(batch, ind_x, ind_y, ind_z)\n\n    # test\n    input_sp_tensor = spconv.SparseConvTensor(voxel_features, coordinates,\n                                              [41, 1600, 1408], 2)\n    self = SparseBasicBlock(\n        4,\n        4,\n        conv_cfg=dict(type='SubMConv3d', indice_key='subm1'),\n        norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01))\n    # test conv and bn layer\n    assert isinstance(self.conv1, spconv.conv.SubMConv3d)\n    assert self.conv1.in_channels == 4\n    assert self.conv1.out_channels == 4\n    assert isinstance(self.conv2, spconv.conv.SubMConv3d)\n    assert self.conv2.out_channels == 4\n    assert self.conv2.out_channels == 4\n    assert self.bn1.eps == 1e-3\n    assert self.bn1.momentum == 0.01\n\n    out_features = self(input_sp_tensor)\n    assert out_features.features.shape == torch.Size([4, 4])\n\n\ndef test_make_sparse_convmodule():\n    from mmdet3d.ops import make_sparse_convmodule\n\n    voxel_features = torch.tensor([[6.56126, 0.9648336, -1.7339306, 0.315],\n                                   [6.8162713, -2.480431, -1.3616394, 0.36],\n                                   [11.643568, -4.744306, -1.3580885, 0.16],\n                                   [23.482342, 6.5036807, 0.5806964, 0.35]],\n                                  dtype=torch.float32)  # n, point_features\n    coordinates = torch.tensor(\n        [[0, 12, 819, 131], [0, 16, 750, 136], [1, 16, 705, 232],\n         [1, 35, 930, 469]],\n        dtype=torch.int32)  # n, 4(batch, ind_x, ind_y, ind_z)\n\n    # test\n    input_sp_tensor = spconv.SparseConvTensor(voxel_features, coordinates,\n                                              [41, 1600, 1408], 2)\n\n    sparse_block0 = make_sparse_convmodule(\n        4,\n        16,\n        3,\n        'test0',\n        stride=1,\n        padding=0,\n        conv_type='SubMConv3d',\n        norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),\n        order=('conv', 'norm', 'act'))\n    assert isinstance(sparse_block0[0], spconv.SubMConv3d)\n    assert sparse_block0[0].in_channels == 4\n    assert sparse_block0[0].out_channels == 16\n    assert isinstance(sparse_block0[1], torch.nn.BatchNorm1d)\n    assert sparse_block0[1].eps == 0.001\n    assert sparse_block0[1].momentum == 0.01\n    assert isinstance(sparse_block0[2], torch.nn.ReLU)\n\n    # test forward\n    out_features = sparse_block0(input_sp_tensor)\n    assert out_features.features.shape == torch.Size([4, 16])\n\n    sparse_block1 = make_sparse_convmodule(\n        4,\n        16,\n        3,\n        'test1',\n        stride=1,\n        padding=0,\n        conv_type='SparseInverseConv3d',\n        norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),\n        order=('norm', 'act', 'conv'))\n    assert isinstance(sparse_block1[0], torch.nn.BatchNorm1d)\n    assert isinstance(sparse_block1[1], torch.nn.ReLU)\n    assert isinstance(sparse_block1[2], spconv.SparseInverseConv3d)\n"
  },
  {
    "path": "tests/test_models/test_common_modules/test_vote_module.py",
    "content": "import torch\n\n\ndef test_vote_module():\n    from mmdet3d.models.model_utils import VoteModule\n\n    vote_loss = dict(\n        type='ChamferDistance',\n        mode='l1',\n        reduction='none',\n        loss_dst_weight=10.0)\n    self = VoteModule(vote_per_seed=3, in_channels=8, vote_loss=vote_loss)\n\n    seed_xyz = torch.rand([2, 64, 3], dtype=torch.float32)  # (b, npoints, 3)\n    seed_features = torch.rand(\n        [2, 8, 64], dtype=torch.float32)  # (b, in_channels, npoints)\n\n    # test forward\n    vote_xyz, vote_features, vote_offset = self(seed_xyz, seed_features)\n    assert vote_xyz.shape == torch.Size([2, 192, 3])\n    assert vote_features.shape == torch.Size([2, 8, 192])\n    assert vote_offset.shape == torch.Size([2, 3, 192])\n\n    # test clip offset and without feature residual\n    self = VoteModule(\n        vote_per_seed=1,\n        in_channels=8,\n        num_points=32,\n        with_res_feat=False,\n        vote_xyz_range=(2.0, 2.0, 2.0))\n\n    vote_xyz, vote_features, vote_offset = self(seed_xyz, seed_features)\n    assert vote_xyz.shape == torch.Size([2, 32, 3])\n    assert vote_features.shape == torch.Size([2, 8, 32])\n    assert vote_offset.shape == torch.Size([2, 3, 32])\n    assert torch.allclose(seed_features[..., :32], vote_features)\n    assert vote_offset.max() <= 2.0\n    assert vote_offset.min() >= -2.0\n"
  },
  {
    "path": "tests/test_models/test_detectors.py",
    "content": "import copy\nimport numpy as np\nimport pytest\nimport random\nimport torch\nfrom os.path import dirname, exists, join\n\nfrom mmdet3d.core.bbox import DepthInstance3DBoxes, LiDARInstance3DBoxes\nfrom mmdet3d.models.builder import build_detector\n\n\ndef _setup_seed(seed):\n    torch.manual_seed(seed)\n    torch.cuda.manual_seed_all(seed)\n    np.random.seed(seed)\n    random.seed(seed)\n    torch.backends.cudnn.deterministic = True\n\n\ndef _get_config_directory():\n    \"\"\"Find the predefined detector config directory.\"\"\"\n    try:\n        # Assume we are running in the source mmdetection3d repo\n        repo_dpath = dirname(dirname(dirname(__file__)))\n    except NameError:\n        # For IPython development when this __file__ is not defined\n        import mmdet3d\n        repo_dpath = dirname(dirname(mmdet3d.__file__))\n    config_dpath = join(repo_dpath, 'configs')\n    if not exists(config_dpath):\n        raise Exception('Cannot find config path')\n    return config_dpath\n\n\ndef _get_config_module(fname):\n    \"\"\"Load a configuration as a python module.\"\"\"\n    from mmcv import Config\n    config_dpath = _get_config_directory()\n    config_fpath = join(config_dpath, fname)\n    config_mod = Config.fromfile(config_fpath)\n    return config_mod\n\n\ndef _get_model_cfg(fname):\n    \"\"\"Grab configs necessary to create a model.\n\n    These are deep copied to allow for safe modification of parameters without\n    influencing other tests.\n    \"\"\"\n    config = _get_config_module(fname)\n    model = copy.deepcopy(config.model)\n\n    return model\n\n\ndef _get_detector_cfg(fname):\n    \"\"\"Grab configs necessary to create a detector.\n\n    These are deep copied to allow for safe modification of parameters without\n    influencing other tests.\n    \"\"\"\n    import mmcv\n    config = _get_config_module(fname)\n    model = copy.deepcopy(config.model)\n    train_cfg = mmcv.Config(copy.deepcopy(config.model.train_cfg))\n    test_cfg = mmcv.Config(copy.deepcopy(config.model.test_cfg))\n\n    model.update(train_cfg=train_cfg)\n    model.update(test_cfg=test_cfg)\n    return model\n\n\ndef test_get_dynamic_voxelnet():\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and torch+cuda')\n\n    dynamic_voxelnet_cfg = _get_model_cfg(\n        'dynamic_voxelization/dv_second_secfpn_6x8_80e_kitti-3d-car.py')\n    self = build_detector(dynamic_voxelnet_cfg).cuda()\n    points_0 = torch.rand([2010, 4], device='cuda')\n    points_1 = torch.rand([2020, 4], device='cuda')\n    points = [points_0, points_1]\n    feats = self.extract_feat(points, None)\n    assert feats[0].shape == torch.Size([2, 512, 200, 176])\n\n\ndef test_voxel_net():\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and torch+cuda')\n    _setup_seed(0)\n    voxel_net_cfg = _get_detector_cfg(\n        'second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py')\n\n    self = build_detector(voxel_net_cfg).cuda()\n    points_0 = torch.rand([2010, 4], device='cuda')\n    points_1 = torch.rand([2020, 4], device='cuda')\n    points = [points_0, points_1]\n    gt_bbox_0 = LiDARInstance3DBoxes(torch.rand([10, 7], device='cuda'))\n    gt_bbox_1 = LiDARInstance3DBoxes(torch.rand([10, 7], device='cuda'))\n    gt_bboxes = [gt_bbox_0, gt_bbox_1]\n    gt_labels_0 = torch.randint(0, 3, [10], device='cuda')\n    gt_labels_1 = torch.randint(0, 3, [10], device='cuda')\n    gt_labels = [gt_labels_0, gt_labels_1]\n    img_meta_0 = dict(box_type_3d=LiDARInstance3DBoxes)\n    img_meta_1 = dict(box_type_3d=LiDARInstance3DBoxes)\n    img_metas = [img_meta_0, img_meta_1]\n\n    # test forward_train\n    losses = self.forward_train(points, img_metas, gt_bboxes, gt_labels)\n    assert losses['loss_cls'][0] >= 0\n    assert losses['loss_bbox'][0] >= 0\n    assert losses['loss_dir'][0] >= 0\n\n    # test simple_test\n    results = self.simple_test(points, img_metas)\n    boxes_3d = results[0]['boxes_3d']\n    scores_3d = results[0]['scores_3d']\n    labels_3d = results[0]['labels_3d']\n    assert boxes_3d.tensor.shape == (50, 7)\n    assert scores_3d.shape == torch.Size([50])\n    assert labels_3d.shape == torch.Size([50])\n\n\ndef test_3dssd():\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and torch+cuda')\n    _setup_seed(0)\n    ssd3d_cfg = _get_detector_cfg('3dssd/3dssd_kitti-3d-car.py')\n    self = build_detector(ssd3d_cfg).cuda()\n    points_0 = torch.rand([2000, 4], device='cuda')\n    points_1 = torch.rand([2000, 4], device='cuda')\n    points = [points_0, points_1]\n    img_meta_0 = dict(box_type_3d=DepthInstance3DBoxes)\n    img_meta_1 = dict(box_type_3d=DepthInstance3DBoxes)\n    img_metas = [img_meta_0, img_meta_1]\n    gt_bbox_0 = DepthInstance3DBoxes(torch.rand([10, 7], device='cuda'))\n    gt_bbox_1 = DepthInstance3DBoxes(torch.rand([10, 7], device='cuda'))\n    gt_bboxes = [gt_bbox_0, gt_bbox_1]\n    gt_labels_0 = torch.randint(0, 10, [10], device='cuda')\n    gt_labels_1 = torch.randint(0, 10, [10], device='cuda')\n    gt_labels = [gt_labels_0, gt_labels_1]\n\n    # test forward_train\n    losses = self.forward_train(points, img_metas, gt_bboxes, gt_labels)\n    assert losses['vote_loss'] >= 0\n    assert losses['objectness_loss'] >= 0\n    assert losses['semantic_loss'] >= 0\n    assert losses['center_loss'] >= 0\n    assert losses['dir_class_loss'] >= 0\n    assert losses['dir_res_loss'] >= 0\n    assert losses['size_class_loss'] >= 0\n    assert losses['size_res_loss'] >= 0\n\n    # test simple_test\n    results = self.simple_test(points, img_metas)\n    boxes_3d = results[0]['boxes_3d']\n    scores_3d = results[0]['scores_3d']\n    labels_3d = results[0]['labels_3d']\n    assert boxes_3d.tensor.shape[0] >= 0\n    assert boxes_3d.tensor.shape[1] == 7\n    assert scores_3d.shape[0] >= 0\n    assert labels_3d.shape[0] >= 0\n\n\ndef test_vote_net():\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and torch+cuda')\n\n    _setup_seed(0)\n    vote_net_cfg = _get_detector_cfg(\n        'votenet/votenet_16x8_sunrgbd-3d-10class.py')\n    self = build_detector(vote_net_cfg).cuda()\n    points_0 = torch.rand([2000, 4], device='cuda')\n    points_1 = torch.rand([2000, 4], device='cuda')\n    points = [points_0, points_1]\n    img_meta_0 = dict(box_type_3d=DepthInstance3DBoxes)\n    img_meta_1 = dict(box_type_3d=DepthInstance3DBoxes)\n    img_metas = [img_meta_0, img_meta_1]\n    gt_bbox_0 = DepthInstance3DBoxes(torch.rand([10, 7], device='cuda'))\n    gt_bbox_1 = DepthInstance3DBoxes(torch.rand([10, 7], device='cuda'))\n    gt_bboxes = [gt_bbox_0, gt_bbox_1]\n    gt_labels_0 = torch.randint(0, 10, [10], device='cuda')\n    gt_labels_1 = torch.randint(0, 10, [10], device='cuda')\n    gt_labels = [gt_labels_0, gt_labels_1]\n\n    # test forward_train\n    losses = self.forward_train(points, img_metas, gt_bboxes, gt_labels)\n    assert losses['vote_loss'] >= 0\n    assert losses['objectness_loss'] >= 0\n    assert losses['semantic_loss'] >= 0\n    assert losses['center_loss'] >= 0\n    assert losses['dir_class_loss'] >= 0\n    assert losses['dir_res_loss'] >= 0\n    assert losses['size_class_loss'] >= 0\n    assert losses['size_res_loss'] >= 0\n\n    # test simple_test\n    results = self.simple_test(points, img_metas)\n    boxes_3d = results[0]['boxes_3d']\n    scores_3d = results[0]['scores_3d']\n    labels_3d = results[0]['labels_3d']\n    assert boxes_3d.tensor.shape[0] >= 0\n    assert boxes_3d.tensor.shape[1] == 7\n    assert scores_3d.shape[0] >= 0\n    assert labels_3d.shape[0] >= 0\n\n\ndef test_parta2():\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and torch+cuda')\n    _setup_seed(0)\n    parta2 = _get_detector_cfg(\n        'parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-3class.py')\n    self = build_detector(parta2).cuda()\n    points_0 = torch.rand([1000, 4], device='cuda')\n    points_1 = torch.rand([1000, 4], device='cuda')\n    points = [points_0, points_1]\n    img_meta_0 = dict(box_type_3d=LiDARInstance3DBoxes)\n    img_meta_1 = dict(box_type_3d=LiDARInstance3DBoxes)\n    img_metas = [img_meta_0, img_meta_1]\n    gt_bbox_0 = LiDARInstance3DBoxes(torch.rand([10, 7], device='cuda'))\n    gt_bbox_1 = LiDARInstance3DBoxes(torch.rand([10, 7], device='cuda'))\n    gt_bboxes = [gt_bbox_0, gt_bbox_1]\n    gt_labels_0 = torch.randint(0, 3, [10], device='cuda')\n    gt_labels_1 = torch.randint(0, 3, [10], device='cuda')\n    gt_labels = [gt_labels_0, gt_labels_1]\n\n    # test_forward_train\n    losses = self.forward_train(points, img_metas, gt_bboxes, gt_labels)\n    assert losses['loss_rpn_cls'][0] >= 0\n    assert losses['loss_rpn_bbox'][0] >= 0\n    assert losses['loss_rpn_dir'][0] >= 0\n    assert losses['loss_seg'] >= 0\n    assert losses['loss_part'] >= 0\n    assert losses['loss_cls'] >= 0\n    assert losses['loss_bbox'] >= 0\n    assert losses['loss_corner'] >= 0\n\n    # test_simple_test\n    results = self.simple_test(points, img_metas)\n    boxes_3d = results[0]['boxes_3d']\n    scores_3d = results[0]['scores_3d']\n    labels_3d = results[0]['labels_3d']\n    assert boxes_3d.tensor.shape[0] >= 0\n    assert boxes_3d.tensor.shape[1] == 7\n    assert scores_3d.shape[0] >= 0\n    assert labels_3d.shape[0] >= 0\n\n\ndef test_centerpoint():\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and torch+cuda')\n    centerpoint = _get_detector_cfg(\n        'centerpoint/centerpoint_0075voxel_second_secfpn_'\n        'dcn_4x8_cyclic_flip-tta_20e_nus.py')\n    self = build_detector(centerpoint).cuda()\n    points_0 = torch.rand([1000, 5], device='cuda')\n    points_1 = torch.rand([1000, 5], device='cuda')\n    points = [points_0, points_1]\n    img_meta_0 = dict(\n        box_type_3d=LiDARInstance3DBoxes,\n        flip=True,\n        pcd_horizontal_flip=True,\n        pcd_vertical_flip=False)\n    img_meta_1 = dict(\n        box_type_3d=LiDARInstance3DBoxes,\n        flip=True,\n        pcd_horizontal_flip=False,\n        pcd_vertical_flip=True)\n    img_metas = [img_meta_0, img_meta_1]\n    gt_bbox_0 = LiDARInstance3DBoxes(\n        torch.rand([10, 9], device='cuda'), box_dim=9)\n    gt_bbox_1 = LiDARInstance3DBoxes(\n        torch.rand([10, 9], device='cuda'), box_dim=9)\n    gt_bboxes = [gt_bbox_0, gt_bbox_1]\n    gt_labels_0 = torch.randint(0, 3, [10], device='cuda')\n    gt_labels_1 = torch.randint(0, 3, [10], device='cuda')\n    gt_labels = [gt_labels_0, gt_labels_1]\n\n    # test_forward_train\n    losses = self.forward_train(points, img_metas, gt_bboxes, gt_labels)\n    for key, value in losses.items():\n        assert value >= 0\n\n    # test_simple_test\n    results = self.simple_test(points, img_metas)\n    boxes_3d_0 = results[0]['pts_bbox']['boxes_3d']\n    scores_3d_0 = results[0]['pts_bbox']['scores_3d']\n    labels_3d_0 = results[0]['pts_bbox']['labels_3d']\n    assert boxes_3d_0.tensor.shape[0] >= 0\n    assert boxes_3d_0.tensor.shape[1] == 9\n    assert scores_3d_0.shape[0] >= 0\n    assert labels_3d_0.shape[0] >= 0\n    boxes_3d_1 = results[1]['pts_bbox']['boxes_3d']\n    scores_3d_1 = results[1]['pts_bbox']['scores_3d']\n    labels_3d_1 = results[1]['pts_bbox']['labels_3d']\n    assert boxes_3d_1.tensor.shape[0] >= 0\n    assert boxes_3d_1.tensor.shape[1] == 9\n    assert scores_3d_1.shape[0] >= 0\n    assert labels_3d_1.shape[0] >= 0\n\n    # test_aug_test\n    points = [[torch.rand([1000, 5], device='cuda')]]\n    img_metas = [[\n        dict(\n            box_type_3d=LiDARInstance3DBoxes,\n            pcd_scale_factor=1.0,\n            flip=True,\n            pcd_horizontal_flip=True,\n            pcd_vertical_flip=False)\n    ]]\n    results = self.aug_test(points, img_metas)\n    boxes_3d_0 = results[0]['pts_bbox']['boxes_3d']\n    scores_3d_0 = results[0]['pts_bbox']['scores_3d']\n    labels_3d_0 = results[0]['pts_bbox']['labels_3d']\n    assert boxes_3d_0.tensor.shape[0] >= 0\n    assert boxes_3d_0.tensor.shape[1] == 9\n    assert scores_3d_0.shape[0] >= 0\n    assert labels_3d_0.shape[0] >= 0\n"
  },
  {
    "path": "tests/test_models/test_forward.py",
    "content": "\"\"\"Test model forward process.\n\nCommandLine:\n    pytest tests/test_models/test_forward.py\n    xdoctest tests/test_models/test_forward.py zero\n\"\"\"\nimport copy\nimport numpy as np\nimport torch\nfrom os.path import dirname, exists, join\n\n\ndef _get_config_directory():\n    \"\"\"Find the predefined detector config directory.\"\"\"\n    try:\n        # Assume we are running in the source mmdetection3d repo\n        repo_dpath = dirname(dirname(dirname(__file__)))\n    except NameError:\n        # For IPython development when this __file__ is not defined\n        import mmdet3d\n        repo_dpath = dirname(dirname(mmdet3d.__file__))\n    config_dpath = join(repo_dpath, 'configs')\n    if not exists(config_dpath):\n        raise Exception('Cannot find config path')\n    return config_dpath\n\n\ndef _get_config_module(fname):\n    \"\"\"Load a configuration as a python module.\"\"\"\n    from mmcv import Config\n    config_dpath = _get_config_directory()\n    config_fpath = join(config_dpath, fname)\n    config_mod = Config.fromfile(config_fpath)\n    return config_mod\n\n\ndef _get_detector_cfg(fname):\n    \"\"\"Grab configs necessary to create a detector.\n\n    These are deep copied to allow for safe modification of parameters without\n    influencing other tests.\n    \"\"\"\n    config = _get_config_module(fname)\n    model = copy.deepcopy(config.model)\n    return model\n\n\ndef _test_two_stage_forward(cfg_file):\n    model = _get_detector_cfg(cfg_file)\n    model['pretrained'] = None\n\n    from mmdet.models import build_detector\n    detector = build_detector(model)\n\n    input_shape = (1, 3, 256, 256)\n\n    # Test forward train with a non-empty truth batch\n    mm_inputs = _demo_mm_inputs(input_shape, num_items=[10])\n    imgs = mm_inputs.pop('imgs')\n    img_metas = mm_inputs.pop('img_metas')\n    gt_bboxes = mm_inputs['gt_bboxes']\n    gt_labels = mm_inputs['gt_labels']\n    gt_masks = mm_inputs['gt_masks']\n    losses = detector.forward(\n        imgs,\n        img_metas,\n        gt_bboxes=gt_bboxes,\n        gt_labels=gt_labels,\n        gt_masks=gt_masks,\n        return_loss=True)\n    assert isinstance(losses, dict)\n    loss, _ = detector._parse_losses(losses)\n    loss.requires_grad_(True)\n    assert float(loss.item()) > 0\n    loss.backward()\n\n    # Test forward train with an empty truth batch\n    mm_inputs = _demo_mm_inputs(input_shape, num_items=[0])\n    imgs = mm_inputs.pop('imgs')\n    img_metas = mm_inputs.pop('img_metas')\n    gt_bboxes = mm_inputs['gt_bboxes']\n    gt_labels = mm_inputs['gt_labels']\n    gt_masks = mm_inputs['gt_masks']\n    losses = detector.forward(\n        imgs,\n        img_metas,\n        gt_bboxes=gt_bboxes,\n        gt_labels=gt_labels,\n        gt_masks=gt_masks,\n        return_loss=True)\n    assert isinstance(losses, dict)\n    loss, _ = detector._parse_losses(losses)\n    assert float(loss.item()) > 0\n    loss.backward()\n\n    # Test forward test\n    with torch.no_grad():\n        img_list = [g[None, :] for g in imgs]\n        batch_results = []\n        for one_img, one_meta in zip(img_list, img_metas):\n            result = detector.forward([one_img], [[one_meta]],\n                                      return_loss=False)\n            batch_results.append(result)\n\n\ndef _test_single_stage_forward(cfg_file):\n    model = _get_detector_cfg(cfg_file)\n    model['pretrained'] = None\n\n    from mmdet.models import build_detector\n    detector = build_detector(model)\n\n    input_shape = (1, 3, 300, 300)\n    mm_inputs = _demo_mm_inputs(input_shape)\n\n    imgs = mm_inputs.pop('imgs')\n    img_metas = mm_inputs.pop('img_metas')\n\n    # Test forward train\n    gt_bboxes = mm_inputs['gt_bboxes']\n    gt_labels = mm_inputs['gt_labels']\n    losses = detector.forward(\n        imgs,\n        img_metas,\n        gt_bboxes=gt_bboxes,\n        gt_labels=gt_labels,\n        return_loss=True)\n    assert isinstance(losses, dict)\n    loss, _ = detector._parse_losses(losses)\n    assert float(loss.item()) > 0\n\n    # Test forward test\n    with torch.no_grad():\n        img_list = [g[None, :] for g in imgs]\n        batch_results = []\n        for one_img, one_meta in zip(img_list, img_metas):\n            result = detector.forward([one_img], [[one_meta]],\n                                      return_loss=False)\n            batch_results.append(result)\n\n\ndef _demo_mm_inputs(input_shape=(1, 3, 300, 300),\n                    num_items=None, num_classes=10):  # yapf: disable\n    \"\"\"Create a superset of inputs needed to run test or train batches.\n\n    Args:\n        input_shape (tuple):\n            input batch dimensions\n\n        num_items (None | List[int]):\n            specifies the number of boxes in each batch item\n\n        num_classes (int):\n            number of different labels a box might have\n    \"\"\"\n    from mmdet.core import BitmapMasks\n\n    (N, C, H, W) = input_shape\n\n    rng = np.random.RandomState(0)\n\n    imgs = rng.rand(*input_shape)\n\n    img_metas = [{\n        'img_shape': (H, W, C),\n        'ori_shape': (H, W, C),\n        'pad_shape': (H, W, C),\n        'filename': '<demo>.png',\n        'scale_factor': 1.0,\n        'flip': False,\n    } for _ in range(N)]\n\n    gt_bboxes = []\n    gt_labels = []\n    gt_masks = []\n\n    for batch_idx in range(N):\n        if num_items is None:\n            num_boxes = rng.randint(1, 10)\n        else:\n            num_boxes = num_items[batch_idx]\n\n        cx, cy, bw, bh = rng.rand(num_boxes, 4).T\n\n        tl_x = ((cx * W) - (W * bw / 2)).clip(0, W)\n        tl_y = ((cy * H) - (H * bh / 2)).clip(0, H)\n        br_x = ((cx * W) + (W * bw / 2)).clip(0, W)\n        br_y = ((cy * H) + (H * bh / 2)).clip(0, H)\n\n        boxes = np.vstack([tl_x, tl_y, br_x, br_y]).T\n        class_idxs = rng.randint(1, num_classes, size=num_boxes)\n\n        gt_bboxes.append(torch.FloatTensor(boxes))\n        gt_labels.append(torch.LongTensor(class_idxs))\n\n    mask = np.random.randint(0, 2, (len(boxes), H, W), dtype=np.uint8)\n    gt_masks.append(BitmapMasks(mask, H, W))\n\n    mm_inputs = {\n        'imgs': torch.FloatTensor(imgs).requires_grad_(True),\n        'img_metas': img_metas,\n        'gt_bboxes': gt_bboxes,\n        'gt_labels': gt_labels,\n        'gt_bboxes_ignore': None,\n        'gt_masks': gt_masks,\n    }\n    return mm_inputs\n"
  },
  {
    "path": "tests/test_models/test_fusion/test_fusion_coord_trans.py",
    "content": "\"\"\"Tests coords transformation in fusion modules.\n\nCommandLine:\n    pytest tests/test_models/test_fusion/test_fusion_coord_trans.py\n\"\"\"\n\nimport torch\n\nfrom mmdet3d.models.fusion_layers import apply_3d_transformation\n\n\ndef test_coords_transformation():\n    \"\"\"Test the transformation of 3d coords.\"\"\"\n\n    # H+R+S+T, not reverse, depth\n    img_meta = {\n        'pcd_scale_factor':\n        1.2311e+00,\n        'pcd_rotation': [[8.660254e-01, 0.5, 0], [-0.5, 8.660254e-01, 0],\n                         [0, 0, 1.0e+00]],\n        'pcd_trans': [1.111e-02, -8.88e-03, 0.0],\n        'pcd_horizontal_flip':\n        True,\n        'transformation_3d_flow': ['HF', 'R', 'S', 'T']\n    }\n\n    pcd = torch.tensor([[-5.2422e+00, -2.9757e-01, 4.0021e+01],\n                        [-9.1435e-01, 2.6675e+01, -5.5950e+00],\n                        [2.0089e-01, 5.8098e+00, -3.5409e+01],\n                        [-1.9461e-01, 3.1309e+01, -1.0901e+00]])\n\n    pcd_transformed = apply_3d_transformation(\n        pcd, 'DEPTH', img_meta, reverse=False)\n\n    expected_tensor = torch.tensor(\n        [[5.78332345e+00, 2.900697e+00, 4.92698531e+01],\n         [-1.5433839e+01, 2.8993850e+01, -6.8880045e+00],\n         [-3.77929405e+00, 6.061661e+00, -4.35920199e+01],\n         [-1.9053658e+01, 3.3491436e+01, -1.34202211e+00]])\n\n    assert torch.allclose(expected_tensor, pcd_transformed, 1e-4)\n\n    # H+R+S+T, reverse, depth\n    img_meta = {\n        'pcd_scale_factor':\n        7.07106781e-01,\n        'pcd_rotation': [[7.07106781e-01, 7.07106781e-01, 0.0],\n                         [-7.07106781e-01, 7.07106781e-01, 0.0],\n                         [0.0, 0.0, 1.0e+00]],\n        'pcd_trans': [0.0, 0.0, 0.0],\n        'pcd_horizontal_flip':\n        False,\n        'transformation_3d_flow': ['HF', 'R', 'S', 'T']\n    }\n\n    pcd = torch.tensor([[-5.2422e+00, -2.9757e-01, 4.0021e+01],\n                        [-9.1435e+01, 2.6675e+01, -5.5950e+00],\n                        [6.061661e+00, -0.0, -1.0e+02]])\n\n    pcd_transformed = apply_3d_transformation(\n        pcd, 'DEPTH', img_meta, reverse=True)\n\n    expected_tensor = torch.tensor(\n        [[-5.53977e+00, 4.94463e+00, 5.65982409e+01],\n         [-6.476e+01, 1.1811e+02, -7.91252488e+00],\n         [6.061661e+00, -6.061661e+00, -1.41421356e+02]])\n    assert torch.allclose(expected_tensor, pcd_transformed, 1e-4)\n\n    # H+R+S+T, not reverse, camera\n    img_meta = {\n        'pcd_scale_factor':\n        1.0 / 7.07106781e-01,\n        'pcd_rotation': [[7.07106781e-01, 0.0, 7.07106781e-01],\n                         [0.0, 1.0e+00, 0.0],\n                         [-7.07106781e-01, 0.0, 7.07106781e-01]],\n        'pcd_trans': [1.0e+00, -1.0e+00, 0.0],\n        'pcd_horizontal_flip':\n        True,\n        'transformation_3d_flow': ['HF', 'S', 'R', 'T']\n    }\n\n    pcd = torch.tensor([[-5.2422e+00, 4.0021e+01, -2.9757e-01],\n                        [-9.1435e+01, -5.5950e+00, 2.6675e+01],\n                        [6.061661e+00, -1.0e+02, -0.0]])\n\n    pcd_transformed = apply_3d_transformation(\n        pcd, 'CAMERA', img_meta, reverse=False)\n\n    expected_tensor = torch.tensor(\n        [[6.53977e+00, 5.55982409e+01, 4.94463e+00],\n         [6.576e+01, -8.91252488e+00, 1.1811e+02],\n         [-5.061661e+00, -1.42421356e+02, -6.061661e+00]])\n\n    assert torch.allclose(expected_tensor, pcd_transformed, 1e-4)\n\n    # V, reverse, camera\n    img_meta = {'pcd_vertical_flip': True, 'transformation_3d_flow': ['VF']}\n\n    pcd_transformed = apply_3d_transformation(\n        pcd, 'CAMERA', img_meta, reverse=True)\n\n    expected_tensor = torch.tensor([[-5.2422e+00, 4.0021e+01, 2.9757e-01],\n                                    [-9.1435e+01, -5.5950e+00, -2.6675e+01],\n                                    [6.061661e+00, -1.0e+02, 0.0]])\n\n    assert torch.allclose(expected_tensor, pcd_transformed, 1e-4)\n\n    # V+H, not reverse, depth\n    img_meta = {\n        'pcd_vertical_flip': True,\n        'pcd_horizontal_flip': True,\n        'transformation_3d_flow': ['VF', 'HF']\n    }\n\n    pcd_transformed = apply_3d_transformation(\n        pcd, 'DEPTH', img_meta, reverse=False)\n\n    expected_tensor = torch.tensor([[5.2422e+00, -4.0021e+01, -2.9757e-01],\n                                    [9.1435e+01, 5.5950e+00, 2.6675e+01],\n                                    [-6.061661e+00, 1.0e+02, 0.0]])\n    assert torch.allclose(expected_tensor, pcd_transformed, 1e-4)\n\n    # V+H, reverse, lidar\n    img_meta = {\n        'pcd_vertical_flip': True,\n        'pcd_horizontal_flip': True,\n        'transformation_3d_flow': ['VF', 'HF']\n    }\n\n    pcd_transformed = apply_3d_transformation(\n        pcd, 'LIDAR', img_meta, reverse=True)\n\n    expected_tensor = torch.tensor([[5.2422e+00, -4.0021e+01, -2.9757e-01],\n                                    [9.1435e+01, 5.5950e+00, 2.6675e+01],\n                                    [-6.061661e+00, 1.0e+02, 0.0]])\n    assert torch.allclose(expected_tensor, pcd_transformed, 1e-4)\n"
  },
  {
    "path": "tests/test_models/test_fusion/test_point_fusion.py",
    "content": "\"\"\"Tests the core function of point fusion.\n\nCommandLine:\n    pytest tests/test_models/test_fusion/test_point_fusion.py\n\"\"\"\n\nimport torch\n\nfrom mmdet3d.models.fusion_layers import PointFusion\n\n\ndef test_sample_single():\n    # this function makes sure the rewriting of 3d coords transformation\n    # in point fusion does not change the original behaviour\n    lidar2img = torch.tensor(\n        [[6.0294e+02, -7.0791e+02, -1.2275e+01, -1.7094e+02],\n         [1.7678e+02, 8.8088e+00, -7.0794e+02, -1.0257e+02],\n         [9.9998e-01, -1.5283e-03, -5.2907e-03, -3.2757e-01],\n         [0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00]])\n\n    #  all use default\n    img_meta = {\n        'transformation_3d_flow': ['R', 'S', 'T', 'HF'],\n        'input_shape': [370, 1224],\n        'img_shape': [370, 1224],\n        'lidar2img': lidar2img,\n    }\n\n    #  dummy parameters\n    fuse = PointFusion(1, 1, 1, 1)\n    img_feat = torch.arange(370 * 1224)[None, ...].view(\n        370, 1224)[None, None, ...].float() / (370 * 1224)\n    pts = torch.tensor([[8.356, -4.312, -0.445], [11.777, -6.724, -0.564],\n                        [6.453, 2.53, -1.612], [6.227, -3.839, -0.563]])\n    out = fuse.sample_single(img_feat, pts, img_meta)\n\n    expected_tensor = torch.tensor(\n        [0.5560822, 0.5476625, 0.9687978, 0.6241757])\n    assert torch.allclose(expected_tensor, out, 1e-4)\n\n    pcd_rotation = torch.tensor([[8.660254e-01, 0.5, 0],\n                                 [-0.5, 8.660254e-01, 0], [0, 0, 1.0e+00]])\n    pcd_scale_factor = 1.111\n    pcd_trans = torch.tensor([1.0, -1.0, 0.5])\n    pts = pts @ pcd_rotation\n    pts *= pcd_scale_factor\n    pts += pcd_trans\n    pts[:, 1] = -pts[:, 1]\n\n    # not use default\n    img_meta.update({\n        'pcd_scale_factor': pcd_scale_factor,\n        'pcd_rotation': pcd_rotation,\n        'pcd_trans': pcd_trans,\n        'pcd_horizontal_flip': True\n    })\n    out = fuse.sample_single(img_feat, pts, img_meta)\n    expected_tensor = torch.tensor(\n        [0.5560822, 0.5476625, 0.9687978, 0.6241757])\n    assert torch.allclose(expected_tensor, out, 1e-4)\n"
  },
  {
    "path": "tests/test_models/test_fusion/test_vote_fusion.py",
    "content": "\"\"\"Tests the core function of vote fusion.\n\nCommandLine:\n    pytest tests/test_models/test_fusion/test_vote_fusion.py\n\"\"\"\n\nimport torch\n\nfrom mmdet3d.models.fusion_layers import VoteFusion\n\n\ndef test_vote_fusion():\n    img_meta = {\n        'ori_shape': (530, 730, 3),\n        'img_shape': (600, 826, 3),\n        'pad_shape': (608, 832, 3),\n        'scale_factor':\n        torch.tensor([1.1315, 1.1321, 1.1315, 1.1321]),\n        'flip':\n        False,\n        'pcd_horizontal_flip':\n        False,\n        'pcd_vertical_flip':\n        False,\n        'pcd_trans':\n        torch.tensor([0., 0., 0.]),\n        'pcd_scale_factor':\n        1.0308290128214932,\n        'pcd_rotation':\n        torch.tensor([[0.9747, 0.2234, 0.0000], [-0.2234, 0.9747, 0.0000],\n                      [0.0000, 0.0000, 1.0000]]),\n        'transformation_3d_flow': ['HF', 'R', 'S', 'T']\n    }\n\n    calibs = {\n        'Rt':\n        torch.tensor([[[0.979570, 0.047954, -0.195330],\n                       [0.047954, 0.887470, 0.458370],\n                       [0.195330, -0.458370, 0.867030]]]),\n        'K':\n        torch.tensor([[[529.5000, 0.0000, 365.0000],\n                       [0.0000, 529.5000, 265.0000], [0.0000, 0.0000,\n                                                      1.0000]]])\n    }\n\n    bboxes = torch.tensor([[[\n        5.4286e+02, 9.8283e+01, 6.1700e+02, 1.6742e+02, 9.7922e-01, 3.0000e+00\n    ], [\n        4.2613e+02, 8.4646e+01, 4.9091e+02, 1.6237e+02, 9.7848e-01, 3.0000e+00\n    ], [\n        2.5606e+02, 7.3244e+01, 3.7883e+02, 1.8471e+02, 9.7317e-01, 3.0000e+00\n    ], [\n        6.0104e+02, 1.0648e+02, 6.6757e+02, 1.9216e+02, 8.4607e-01, 3.0000e+00\n    ], [\n        2.2923e+02, 1.4984e+02, 7.0163e+02, 4.6537e+02, 3.5719e-01, 0.0000e+00\n    ], [\n        2.5614e+02, 7.4965e+01, 3.3275e+02, 1.5908e+02, 2.8688e-01, 3.0000e+00\n    ], [\n        9.8718e+00, 1.4142e+02, 2.0213e+02, 3.3878e+02, 1.0935e-01, 3.0000e+00\n    ], [\n        6.1930e+02, 1.1768e+02, 6.8505e+02, 2.0318e+02, 1.0720e-01, 3.0000e+00\n    ]]])\n\n    seeds_3d = torch.tensor([[[0.044544, 1.675476, -1.531831],\n                              [2.500625, 7.238662, -0.737675],\n                              [-0.600003, 4.827733, -0.084022],\n                              [1.396212, 3.994484, -1.551180],\n                              [-2.054746, 2.012759, -0.357472],\n                              [-0.582477, 6.580470, -1.466052],\n                              [1.313331, 5.722039, 0.123904],\n                              [-1.107057, 3.450359, -1.043422],\n                              [1.759746, 5.655951, -1.519564],\n                              [-0.203003, 6.453243, 0.137703],\n                              [-0.910429, 0.904407, -0.512307],\n                              [0.434049, 3.032374, -0.763842],\n                              [1.438146, 2.289263, -1.546332],\n                              [0.575622, 5.041906, -0.891143],\n                              [-1.675931, 1.417597, -1.588347]]])\n\n    imgs = torch.linspace(\n        -1, 1, steps=608 * 832).reshape(1, 608, 832).repeat(3, 1, 1)[None]\n\n    expected_tensor1 = torch.tensor(\n        [[[\n            0.000000e+00, -0.000000e+00, 0.000000e+00, -0.000000e+00,\n            0.000000e+00, 1.193706e-01, -0.000000e+00, -2.879214e-01,\n            -0.000000e+00, 0.000000e+00, 1.422463e-01, -6.474612e-01,\n            -0.000000e+00, 1.490057e-02, 0.000000e+00\n        ],\n          [\n              0.000000e+00, -0.000000e+00, -0.000000e+00, 0.000000e+00,\n              0.000000e+00, -1.873745e+00, -0.000000e+00, 1.576240e-01,\n              0.000000e+00, -0.000000e+00, -3.646177e-02, -7.751858e-01,\n              0.000000e+00, 9.593642e-02, 0.000000e+00\n          ],\n          [\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, -6.263277e-02, 0.000000e+00, -3.646387e-01,\n              0.000000e+00, 0.000000e+00, -5.875812e-01, -6.263450e-02,\n              0.000000e+00, 1.149264e-01, 0.000000e+00\n          ],\n          [\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 8.899736e-01, 0.000000e+00, 9.019017e-01,\n              0.000000e+00, 0.000000e+00, 6.917775e-01, 8.899733e-01,\n              0.000000e+00, 9.812444e-01, 0.000000e+00\n          ],\n          [\n              -0.000000e+00, -0.000000e+00, -0.000000e+00, -0.000000e+00,\n              -0.000000e+00, -4.516903e-01, -0.000000e+00, -2.315422e-01,\n              -0.000000e+00, -0.000000e+00, -4.197519e-01, -4.516906e-01,\n              -0.000000e+00, -1.547615e-01, -0.000000e+00\n          ],\n          [\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 3.571937e-01, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 3.571937e-01,\n              0.000000e+00, 0.000000e+00, 0.000000e+00\n          ],\n          [\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00\n          ],\n          [\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00\n          ],\n          [\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 9.731653e-01,\n              0.000000e+00, 0.000000e+00, 1.093455e-01, 0.000000e+00,\n              0.000000e+00, 8.460656e-01, 0.000000e+00\n          ],\n          [\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00\n          ],\n          [\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00\n          ],\n          [\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00\n          ],\n          [\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00\n          ],\n          [\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00\n          ],\n          [\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00\n          ],\n          [\n              2.316288e-03, -1.948284e-03, -3.694394e-03, 2.176163e-04,\n              -3.882605e-03, -1.901490e-03, -3.355042e-03, -1.774631e-03,\n              -6.981542e-04, -3.886823e-03, -1.302233e-03, -1.189933e-03,\n              2.540967e-03, -1.834944e-03, 1.032048e-03\n          ],\n          [\n              2.316288e-03, -1.948284e-03, -3.694394e-03, 2.176163e-04,\n              -3.882605e-03, -1.901490e-03, -3.355042e-03, -1.774631e-03,\n              -6.981542e-04, -3.886823e-03, -1.302233e-03, -1.189933e-03,\n              2.540967e-03, -1.834944e-03, 1.032048e-03\n          ],\n          [\n              2.316288e-03, -1.948284e-03, -3.694394e-03, 2.176163e-04,\n              -3.882605e-03, -1.901490e-03, -3.355042e-03, -1.774631e-03,\n              -6.981542e-04, -3.886823e-03, -1.302233e-03, -1.189933e-03,\n              2.540967e-03, -1.834944e-03, 1.032048e-03\n          ]]])\n\n    expected_tensor2 = torch.tensor([[\n        False, False, False, False, False, True, False, True, False, False,\n        True, True, False, True, False, False, False, False, False, False,\n        False, False, True, False, False, False, False, False, True, False,\n        False, False, False, False, False, False, False, False, False, False,\n        False, False, False, True, False\n    ]])\n\n    expected_tensor3 = torch.tensor(\n        [[[\n            -0.000000e+00, -0.000000e+00, -0.000000e+00, -0.000000e+00,\n            0.000000e+00, -0.000000e+00, -0.000000e+00, 0.000000e+00,\n            -0.000000e+00, -0.000000e+00, 0.000000e+00, -0.000000e+00,\n            -0.000000e+00, 1.720988e-01, 0.000000e+00\n        ],\n          [\n              0.000000e+00, -0.000000e+00, -0.000000e+00, 0.000000e+00,\n              -0.000000e+00, 0.000000e+00, -0.000000e+00, 0.000000e+00,\n              0.000000e+00, -0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 4.824460e-02, 0.000000e+00\n          ],\n          [\n              -0.000000e+00, -0.000000e+00, -0.000000e+00, -0.000000e+00,\n              -0.000000e+00, -0.000000e+00, -0.000000e+00, 0.000000e+00,\n              -0.000000e+00, -0.000000e+00, -0.000000e+00, -0.000000e+00,\n              -0.000000e+00, 1.447314e-01, -0.000000e+00\n          ],\n          [\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 9.759269e-01, 0.000000e+00\n          ],\n          [\n              -0.000000e+00, -0.000000e+00, -0.000000e+00, -0.000000e+00,\n              -0.000000e+00, -0.000000e+00, -0.000000e+00, -0.000000e+00,\n              -0.000000e+00, -0.000000e+00, -0.000000e+00, -0.000000e+00,\n              -0.000000e+00, -1.631542e-01, -0.000000e+00\n          ],\n          [\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00\n          ],\n          [\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00\n          ],\n          [\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00\n          ],\n          [\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 1.072001e-01, 0.000000e+00\n          ],\n          [\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00\n          ],\n          [\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00\n          ],\n          [\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00\n          ],\n          [\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00\n          ],\n          [\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00\n          ],\n          [\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,\n              0.000000e+00, 0.000000e+00, 0.000000e+00\n          ],\n          [\n              2.316288e-03, -1.948284e-03, -3.694394e-03, 2.176163e-04,\n              -3.882605e-03, -1.901490e-03, -3.355042e-03, -1.774631e-03,\n              -6.981542e-04, -3.886823e-03, -1.302233e-03, -1.189933e-03,\n              2.540967e-03, -1.834944e-03, 1.032048e-03\n          ],\n          [\n              2.316288e-03, -1.948284e-03, -3.694394e-03, 2.176163e-04,\n              -3.882605e-03, -1.901490e-03, -3.355042e-03, -1.774631e-03,\n              -6.981542e-04, -3.886823e-03, -1.302233e-03, -1.189933e-03,\n              2.540967e-03, -1.834944e-03, 1.032048e-03\n          ],\n          [\n              2.316288e-03, -1.948284e-03, -3.694394e-03, 2.176163e-04,\n              -3.882605e-03, -1.901490e-03, -3.355042e-03, -1.774631e-03,\n              -6.981542e-04, -3.886823e-03, -1.302233e-03, -1.189933e-03,\n              2.540967e-03, -1.834944e-03, 1.032048e-03\n          ]]])\n\n    fusion = VoteFusion()\n    out1, out2 = fusion(imgs, bboxes, seeds_3d, [img_meta], calibs)\n    assert torch.allclose(expected_tensor1, out1[:, :, :15], 1e-3)\n    assert torch.allclose(expected_tensor2.float(), out2.float(), 1e-3)\n    assert torch.allclose(expected_tensor3, out1[:, :, 30:45], 1e-3)\n\n    out1, out2 = fusion(imgs, bboxes[:, :2], seeds_3d, [img_meta], calibs)\n    out1 = out1[:, :15, 30:45]\n    out2 = out2[:, 30:45].float()\n    assert torch.allclose(torch.zeros_like(out1), out1, 1e-3)\n    assert torch.allclose(torch.zeros_like(out2), out2, 1e-3)\n"
  },
  {
    "path": "tests/test_models/test_heads/test_heads.py",
    "content": "import copy\nimport numpy as np\nimport pytest\nimport random\nimport torch\nfrom os.path import dirname, exists, join\n\nfrom mmdet3d.core.bbox import (Box3DMode, DepthInstance3DBoxes,\n                               LiDARInstance3DBoxes)\nfrom mmdet3d.models.builder import build_head\nfrom mmdet.apis import set_random_seed\n\n\ndef _setup_seed(seed):\n    torch.manual_seed(seed)\n    torch.cuda.manual_seed_all(seed)\n    np.random.seed(seed)\n    random.seed(seed)\n    torch.backends.cudnn.deterministic = True\n\n\ndef _get_config_directory():\n    \"\"\"Find the predefined detector config directory.\"\"\"\n    try:\n        # Assume we are running in the source mmdetection3d repo\n        repo_dpath = dirname(dirname(dirname(dirname(__file__))))\n    except NameError:\n        # For IPython development when this __file__ is not defined\n        import mmdet3d\n        repo_dpath = dirname(dirname(mmdet3d.__file__))\n    config_dpath = join(repo_dpath, 'configs')\n    if not exists(config_dpath):\n        raise Exception('Cannot find config path')\n    return config_dpath\n\n\ndef _get_config_module(fname):\n    \"\"\"Load a configuration as a python module.\"\"\"\n    from mmcv import Config\n    config_dpath = _get_config_directory()\n    config_fpath = join(config_dpath, fname)\n    config_mod = Config.fromfile(config_fpath)\n    return config_mod\n\n\ndef _get_head_cfg(fname):\n    \"\"\"Grab configs necessary to create a bbox_head.\n\n    These are deep copied to allow for safe modification of parameters without\n    influencing other tests.\n    \"\"\"\n    import mmcv\n    config = _get_config_module(fname)\n    model = copy.deepcopy(config.model)\n    train_cfg = mmcv.Config(copy.deepcopy(config.model.train_cfg))\n    test_cfg = mmcv.Config(copy.deepcopy(config.model.test_cfg))\n\n    bbox_head = model.bbox_head\n    bbox_head.update(train_cfg=train_cfg)\n    bbox_head.update(test_cfg=test_cfg)\n    return bbox_head\n\n\ndef _get_rpn_head_cfg(fname):\n    \"\"\"Grab configs necessary to create a rpn_head.\n\n    These are deep copied to allow for safe modification of parameters without\n    influencing other tests.\n    \"\"\"\n    import mmcv\n    config = _get_config_module(fname)\n    model = copy.deepcopy(config.model)\n    train_cfg = mmcv.Config(copy.deepcopy(config.model.train_cfg))\n    test_cfg = mmcv.Config(copy.deepcopy(config.model.test_cfg))\n\n    rpn_head = model.rpn_head\n    rpn_head.update(train_cfg=train_cfg.rpn)\n    rpn_head.update(test_cfg=test_cfg.rpn)\n    return rpn_head, train_cfg.rpn_proposal\n\n\ndef _get_roi_head_cfg(fname):\n    \"\"\"Grab configs necessary to create a roi_head.\n\n    These are deep copied to allow for safe modification of parameters without\n    influencing other tests.\n    \"\"\"\n    import mmcv\n    config = _get_config_module(fname)\n    model = copy.deepcopy(config.model)\n    train_cfg = mmcv.Config(copy.deepcopy(config.model.train_cfg))\n    test_cfg = mmcv.Config(copy.deepcopy(config.model.test_cfg))\n\n    roi_head = model.roi_head\n    roi_head.update(train_cfg=train_cfg.rcnn)\n    roi_head.update(test_cfg=test_cfg.rcnn)\n    return roi_head\n\n\ndef _get_pts_bbox_head_cfg(fname):\n    \"\"\"Grab configs necessary to create a pts_bbox_head.\n\n    These are deep copied to allow for safe modification of parameters without\n    influencing other tests.\n    \"\"\"\n    import mmcv\n    config = _get_config_module(fname)\n    model = copy.deepcopy(config.model)\n    train_cfg = mmcv.Config(copy.deepcopy(config.model.train_cfg.pts))\n    test_cfg = mmcv.Config(copy.deepcopy(config.model.test_cfg.pts))\n\n    pts_bbox_head = model.pts_bbox_head\n    pts_bbox_head.update(train_cfg=train_cfg)\n    pts_bbox_head.update(test_cfg=test_cfg)\n    return pts_bbox_head\n\n\ndef _get_vote_head_cfg(fname):\n    \"\"\"Grab configs necessary to create a vote_head.\n\n    These are deep copied to allow for safe modification of parameters without\n    influencing other tests.\n    \"\"\"\n    import mmcv\n    config = _get_config_module(fname)\n    model = copy.deepcopy(config.model)\n    train_cfg = mmcv.Config(copy.deepcopy(config.model.train_cfg))\n    test_cfg = mmcv.Config(copy.deepcopy(config.model.test_cfg))\n\n    vote_head = model.bbox_head\n    vote_head.update(train_cfg=train_cfg)\n    vote_head.update(test_cfg=test_cfg)\n    return vote_head\n\n\ndef _get_parta2_bbox_head_cfg(fname):\n    \"\"\"Grab configs necessary to create a parta2_bbox_head.\n\n    These are deep copied to allow for safe modification of parameters without\n    influencing other tests.\n    \"\"\"\n    config = _get_config_module(fname)\n    model = copy.deepcopy(config.model)\n\n    vote_head = model.roi_head.bbox_head\n    return vote_head\n\n\ndef test_anchor3d_head_loss():\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and torch+cuda')\n    bbox_head_cfg = _get_head_cfg(\n        'second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py')\n\n    from mmdet3d.models.builder import build_head\n    self = build_head(bbox_head_cfg)\n    self.cuda()\n    assert isinstance(self.conv_cls, torch.nn.modules.conv.Conv2d)\n    assert self.conv_cls.in_channels == 512\n    assert self.conv_cls.out_channels == 18\n    assert self.conv_reg.out_channels == 42\n    assert self.conv_dir_cls.out_channels == 12\n\n    # test forward\n    feats = list()\n    feats.append(torch.rand([2, 512, 200, 176], dtype=torch.float32).cuda())\n    (cls_score, bbox_pred, dir_cls_preds) = self.forward(feats)\n    assert cls_score[0].shape == torch.Size([2, 18, 200, 176])\n    assert bbox_pred[0].shape == torch.Size([2, 42, 200, 176])\n    assert dir_cls_preds[0].shape == torch.Size([2, 12, 200, 176])\n\n    # test loss\n    gt_bboxes = list(\n        torch.tensor(\n            [[[6.4118, -3.4305, -1.7291, 1.7033, 3.4693, 1.6197, -0.9091]],\n             [[16.9107, 9.7925, -1.9201, 1.6097, 3.2786, 1.5307, -2.4056]]],\n            dtype=torch.float32).cuda())\n    gt_labels = list(torch.tensor([[0], [1]], dtype=torch.int64).cuda())\n    input_metas = [{\n        'sample_idx': 1234\n    }, {\n        'sample_idx': 2345\n    }]  # fake input_metas\n\n    losses = self.loss(cls_score, bbox_pred, dir_cls_preds, gt_bboxes,\n                       gt_labels, input_metas)\n    assert losses['loss_cls'][0] > 0\n    assert losses['loss_bbox'][0] > 0\n    assert losses['loss_dir'][0] > 0\n\n    # test empty ground truth case\n    gt_bboxes = list(torch.empty((2, 0, 7)).cuda())\n    gt_labels = list(torch.empty((2, 0)).cuda())\n    empty_gt_losses = self.loss(cls_score, bbox_pred, dir_cls_preds, gt_bboxes,\n                                gt_labels, input_metas)\n    assert empty_gt_losses['loss_cls'][0] > 0\n    assert empty_gt_losses['loss_bbox'][0] == 0\n    assert empty_gt_losses['loss_dir'][0] == 0\n\n\ndef test_anchor3d_head_getboxes():\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and torch+cuda')\n    bbox_head_cfg = _get_head_cfg(\n        'second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py')\n\n    from mmdet3d.models.builder import build_head\n    self = build_head(bbox_head_cfg)\n    self.cuda()\n\n    feats = list()\n    feats.append(torch.rand([2, 512, 200, 176], dtype=torch.float32).cuda())\n    # fake input_metas\n    input_metas = [{\n        'sample_idx': 1234,\n        'box_type_3d': LiDARInstance3DBoxes,\n        'box_mode_3d': Box3DMode.LIDAR\n    }, {\n        'sample_idx': 2345,\n        'box_type_3d': LiDARInstance3DBoxes,\n        'box_mode_3d': Box3DMode.LIDAR\n    }]\n    (cls_score, bbox_pred, dir_cls_preds) = self.forward(feats)\n\n    # test get_boxes\n    cls_score[0] -= 1.5  # too many positive samples may cause cuda oom\n    result_list = self.get_bboxes(cls_score, bbox_pred, dir_cls_preds,\n                                  input_metas)\n    assert (result_list[0][1] > 0.3).all()\n\n\ndef test_parta2_rpnhead_getboxes():\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and torch+cuda')\n    rpn_head_cfg, proposal_cfg = _get_rpn_head_cfg(\n        'parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-3class.py')\n\n    self = build_head(rpn_head_cfg)\n    self.cuda()\n\n    feats = list()\n    feats.append(torch.rand([2, 512, 200, 176], dtype=torch.float32).cuda())\n    # fake input_metas\n    input_metas = [{\n        'sample_idx': 1234,\n        'box_type_3d': LiDARInstance3DBoxes,\n        'box_mode_3d': Box3DMode.LIDAR\n    }, {\n        'sample_idx': 2345,\n        'box_type_3d': LiDARInstance3DBoxes,\n        'box_mode_3d': Box3DMode.LIDAR\n    }]\n    (cls_score, bbox_pred, dir_cls_preds) = self.forward(feats)\n\n    # test get_boxes\n    cls_score[0] -= 1.5  # too many positive samples may cause cuda oom\n    result_list = self.get_bboxes(cls_score, bbox_pred, dir_cls_preds,\n                                  input_metas, proposal_cfg)\n    assert result_list[0]['scores_3d'].shape == torch.Size([512])\n    assert result_list[0]['labels_3d'].shape == torch.Size([512])\n    assert result_list[0]['cls_preds'].shape == torch.Size([512, 3])\n    assert result_list[0]['boxes_3d'].tensor.shape == torch.Size([512, 7])\n\n\ndef test_vote_head():\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and torch+cuda')\n    _setup_seed(0)\n    vote_head_cfg = _get_vote_head_cfg(\n        'votenet/votenet_8x8_scannet-3d-18class.py')\n    self = build_head(vote_head_cfg).cuda()\n    fp_xyz = [torch.rand([2, 256, 3], dtype=torch.float32).cuda()]\n    fp_features = [torch.rand([2, 256, 256], dtype=torch.float32).cuda()]\n    fp_indices = [torch.randint(0, 128, [2, 256]).cuda()]\n\n    input_dict = dict(\n        fp_xyz=fp_xyz, fp_features=fp_features, fp_indices=fp_indices)\n\n    # test forward\n    ret_dict = self(input_dict, 'vote')\n    assert ret_dict['center'].shape == torch.Size([2, 256, 3])\n    assert ret_dict['obj_scores'].shape == torch.Size([2, 256, 2])\n    assert ret_dict['size_res'].shape == torch.Size([2, 256, 18, 3])\n    assert ret_dict['dir_res'].shape == torch.Size([2, 256, 1])\n\n    # test loss\n    points = [torch.rand([40000, 4], device='cuda') for i in range(2)]\n    gt_bbox1 = LiDARInstance3DBoxes(torch.rand([10, 7], device='cuda'))\n    gt_bbox2 = LiDARInstance3DBoxes(torch.rand([10, 7], device='cuda'))\n    gt_bboxes = [gt_bbox1, gt_bbox2]\n    gt_labels = [torch.randint(0, 18, [10], device='cuda') for i in range(2)]\n    pts_semantic_mask = [\n        torch.randint(0, 18, [40000], device='cuda') for i in range(2)\n    ]\n    pts_instance_mask = [\n        torch.randint(0, 10, [40000], device='cuda') for i in range(2)\n    ]\n    losses = self.loss(ret_dict, points, gt_bboxes, gt_labels,\n                       pts_semantic_mask, pts_instance_mask)\n    assert losses['vote_loss'] >= 0\n    assert losses['objectness_loss'] >= 0\n    assert losses['semantic_loss'] >= 0\n    assert losses['center_loss'] >= 0\n    assert losses['dir_class_loss'] >= 0\n    assert losses['dir_res_loss'] >= 0\n    assert losses['size_class_loss'] >= 0\n    assert losses['size_res_loss'] >= 0\n\n    # test multiclass_nms_single\n    obj_scores = torch.rand([256], device='cuda')\n    sem_scores = torch.rand([256, 18], device='cuda')\n    points = torch.rand([40000, 3], device='cuda')\n    bbox = torch.rand([256, 7], device='cuda')\n    input_meta = dict(box_type_3d=DepthInstance3DBoxes)\n    bbox_selected, score_selected, labels = self.multiclass_nms_single(\n        obj_scores, sem_scores, bbox, points, input_meta)\n    assert bbox_selected.shape[0] >= 0\n    assert bbox_selected.shape[1] == 7\n    assert score_selected.shape[0] >= 0\n    assert labels.shape[0] >= 0\n\n    # test get_boxes\n    points = torch.rand([1, 40000, 4], device='cuda')\n    seed_points = torch.rand([1, 1024, 3], device='cuda')\n    seed_indices = torch.randint(0, 40000, [1, 1024], device='cuda')\n    vote_points = torch.rand([1, 1024, 3], device='cuda')\n    vote_features = torch.rand([1, 256, 1024], device='cuda')\n    aggregated_points = torch.rand([1, 256, 3], device='cuda')\n    aggregated_indices = torch.range(0, 256, device='cuda')\n    obj_scores = torch.rand([1, 256, 2], device='cuda')\n    center = torch.rand([1, 256, 3], device='cuda')\n    dir_class = torch.rand([1, 256, 1], device='cuda')\n    dir_res_norm = torch.rand([1, 256, 1], device='cuda')\n    dir_res = torch.rand([1, 256, 1], device='cuda')\n    size_class = torch.rand([1, 256, 18], device='cuda')\n    size_res = torch.rand([1, 256, 18, 3], device='cuda')\n    sem_scores = torch.rand([1, 256, 18], device='cuda')\n    bbox_preds = dict(\n        seed_points=seed_points,\n        seed_indices=seed_indices,\n        vote_points=vote_points,\n        vote_features=vote_features,\n        aggregated_points=aggregated_points,\n        aggregated_indices=aggregated_indices,\n        obj_scores=obj_scores,\n        center=center,\n        dir_class=dir_class,\n        dir_res_norm=dir_res_norm,\n        dir_res=dir_res,\n        size_class=size_class,\n        size_res=size_res,\n        sem_scores=sem_scores)\n    results = self.get_bboxes(points, bbox_preds, [input_meta])\n    assert results[0][0].tensor.shape[0] >= 0\n    assert results[0][0].tensor.shape[1] == 7\n    assert results[0][1].shape[0] >= 0\n    assert results[0][2].shape[0] >= 0\n\n\ndef test_parta2_bbox_head():\n    parta2_bbox_head_cfg = _get_parta2_bbox_head_cfg(\n        './parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-3class.py')\n    self = build_head(parta2_bbox_head_cfg)\n    seg_feats = torch.rand([256, 14, 14, 14, 16])\n    part_feats = torch.rand([256, 14, 14, 14, 4])\n\n    cls_score, bbox_pred = self.forward(seg_feats, part_feats)\n    assert cls_score.shape == (256, 1)\n    assert bbox_pred.shape == (256, 7)\n\n\ndef test_part_aggregation_ROI_head():\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and torch+cuda')\n\n    roi_head_cfg = _get_roi_head_cfg(\n        'parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-3class.py')\n    self = build_head(roi_head_cfg).cuda()\n\n    features = np.load('./tests/test_samples/parta2_roihead_inputs.npz')\n    seg_features = torch.tensor(\n        features['seg_features'], dtype=torch.float32, device='cuda')\n    feats_dict = dict(seg_features=seg_features)\n\n    voxels = torch.tensor(\n        features['voxels'], dtype=torch.float32, device='cuda')\n    num_points = torch.ones([500], device='cuda')\n    coors = torch.zeros([500, 4], device='cuda')\n    voxel_centers = torch.zeros([500, 3], device='cuda')\n    box_type_3d = LiDARInstance3DBoxes\n    img_metas = [dict(box_type_3d=box_type_3d)]\n    voxels_dict = dict(\n        voxels=voxels,\n        num_points=num_points,\n        coors=coors,\n        voxel_centers=voxel_centers)\n\n    pred_bboxes = LiDARInstance3DBoxes(\n        torch.tensor(\n            [[0.3990, 0.5167, 0.0249, 0.9401, 0.9459, 0.7967, 0.4150],\n             [0.8203, 0.2290, 0.9096, 0.1183, 0.0752, 0.4092, 0.9601],\n             [0.2093, 0.1940, 0.8909, 0.4387, 0.3570, 0.5454, 0.8299],\n             [0.2099, 0.7684, 0.4290, 0.2117, 0.6606, 0.1654, 0.4250],\n             [0.9927, 0.6964, 0.2472, 0.7028, 0.7494, 0.9303, 0.0494]],\n            dtype=torch.float32,\n            device='cuda'))\n    pred_scores = torch.tensor([0.9722, 0.7910, 0.4690, 0.3300, 0.3345],\n                               dtype=torch.float32,\n                               device='cuda')\n    pred_labels = torch.tensor([0, 1, 0, 2, 1],\n                               dtype=torch.int64,\n                               device='cuda')\n    pred_clses = torch.tensor(\n        [[0.7874, 0.1344, 0.2190], [0.8193, 0.6969, 0.7304],\n         [0.2328, 0.9028, 0.3900], [0.6177, 0.5012, 0.2330],\n         [0.8985, 0.4894, 0.7152]],\n        dtype=torch.float32,\n        device='cuda')\n    proposal = dict(\n        boxes_3d=pred_bboxes,\n        scores_3d=pred_scores,\n        labels_3d=pred_labels,\n        cls_preds=pred_clses)\n    proposal_list = [proposal]\n    gt_bboxes_3d = [LiDARInstance3DBoxes(torch.rand([5, 7], device='cuda'))]\n    gt_labels_3d = [torch.randint(0, 3, [5], device='cuda')]\n\n    losses = self.forward_train(feats_dict, voxels_dict, {}, proposal_list,\n                                gt_bboxes_3d, gt_labels_3d)\n    assert losses['loss_seg'] >= 0\n    assert losses['loss_part'] >= 0\n    assert losses['loss_cls'] >= 0\n    assert losses['loss_bbox'] >= 0\n    assert losses['loss_corner'] >= 0\n\n    bbox_results = self.simple_test(feats_dict, voxels_dict, img_metas,\n                                    proposal_list)\n    boxes_3d = bbox_results[0]['boxes_3d']\n    scores_3d = bbox_results[0]['scores_3d']\n    labels_3d = bbox_results[0]['labels_3d']\n    assert boxes_3d.tensor.shape == (12, 7)\n    assert scores_3d.shape == (12, )\n    assert labels_3d.shape == (12, )\n\n\ndef test_free_anchor_3D_head():\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and torch+cuda')\n    _setup_seed(0)\n    pts_bbox_head_cfg = _get_pts_bbox_head_cfg(\n        './free_anchor/hv_pointpillars_fpn_sbn-all_'\n        'free-anchor_4x8_2x_nus-3d.py')\n    self = build_head(pts_bbox_head_cfg)\n    cls_scores = [\n        torch.rand([4, 80, 200, 200], device='cuda') for i in range(3)\n    ]\n    bbox_preds = [\n        torch.rand([4, 72, 200, 200], device='cuda') for i in range(3)\n    ]\n    dir_cls_preds = [\n        torch.rand([4, 16, 200, 200], device='cuda') for i in range(3)\n    ]\n    gt_bboxes = [\n        LiDARInstance3DBoxes(torch.rand([8, 9], device='cuda'), box_dim=9)\n        for i in range(4)\n    ]\n    gt_labels = [\n        torch.randint(0, 10, [8], device='cuda', dtype=torch.long)\n        for i in range(4)\n    ]\n    input_metas = [0]\n    losses = self.loss(cls_scores, bbox_preds, dir_cls_preds, gt_bboxes,\n                       gt_labels, input_metas, None)\n    assert losses['positive_bag_loss'] >= 0\n    assert losses['negative_bag_loss'] >= 0\n\n\ndef test_primitive_head():\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and torch+cuda')\n    _setup_seed(0)\n\n    primitive_head_cfg = dict(\n        type='PrimitiveHead',\n        num_dims=2,\n        num_classes=18,\n        primitive_mode='z',\n        vote_module_cfg=dict(\n            in_channels=256,\n            vote_per_seed=1,\n            gt_per_seed=1,\n            conv_channels=(256, 256),\n            conv_cfg=dict(type='Conv1d'),\n            norm_cfg=dict(type='BN1d'),\n            norm_feats=True,\n            vote_loss=dict(\n                type='ChamferDistance',\n                mode='l1',\n                reduction='none',\n                loss_dst_weight=10.0)),\n        vote_aggregation_cfg=dict(\n            type='PointSAModule',\n            num_point=64,\n            radius=0.3,\n            num_sample=16,\n            mlp_channels=[256, 128, 128, 128],\n            use_xyz=True,\n            normalize_xyz=True),\n        feat_channels=(128, 128),\n        conv_cfg=dict(type='Conv1d'),\n        norm_cfg=dict(type='BN1d'),\n        objectness_loss=dict(\n            type='CrossEntropyLoss',\n            class_weight=[0.4, 0.6],\n            reduction='mean',\n            loss_weight=1.0),\n        center_loss=dict(\n            type='ChamferDistance',\n            mode='l1',\n            reduction='sum',\n            loss_src_weight=1.0,\n            loss_dst_weight=1.0),\n        semantic_reg_loss=dict(\n            type='ChamferDistance',\n            mode='l1',\n            reduction='sum',\n            loss_src_weight=1.0,\n            loss_dst_weight=1.0),\n        semantic_cls_loss=dict(\n            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),\n        train_cfg=dict(\n            dist_thresh=0.2,\n            var_thresh=1e-2,\n            lower_thresh=1e-6,\n            num_point=100,\n            num_point_line=10,\n            line_thresh=0.2))\n\n    self = build_head(primitive_head_cfg).cuda()\n    fp_xyz = [torch.rand([2, 64, 3], dtype=torch.float32).cuda()]\n    hd_features = torch.rand([2, 256, 64], dtype=torch.float32).cuda()\n    fp_indices = [torch.randint(0, 64, [2, 64]).cuda()]\n    input_dict = dict(\n        fp_xyz_net0=fp_xyz, hd_feature=hd_features, fp_indices_net0=fp_indices)\n\n    # test forward\n    ret_dict = self(input_dict, 'vote')\n    assert ret_dict['center_z'].shape == torch.Size([2, 64, 3])\n    assert ret_dict['size_residuals_z'].shape == torch.Size([2, 64, 2])\n    assert ret_dict['sem_cls_scores_z'].shape == torch.Size([2, 64, 18])\n    assert ret_dict['aggregated_points_z'].shape == torch.Size([2, 64, 3])\n\n    # test loss\n    points = torch.rand([2, 1024, 3], dtype=torch.float32).cuda()\n    ret_dict['seed_points'] = fp_xyz[0]\n    ret_dict['seed_indices'] = fp_indices[0]\n\n    from mmdet3d.core.bbox import DepthInstance3DBoxes\n    gt_bboxes_3d = [\n        DepthInstance3DBoxes(torch.rand([4, 7], dtype=torch.float32).cuda()),\n        DepthInstance3DBoxes(torch.rand([4, 7], dtype=torch.float32).cuda())\n    ]\n    gt_labels_3d = torch.randint(0, 18, [2, 4]).cuda()\n    gt_labels_3d = [gt_labels_3d[0], gt_labels_3d[1]]\n    pts_semantic_mask = torch.randint(0, 19, [2, 1024]).cuda()\n    pts_semantic_mask = [pts_semantic_mask[0], pts_semantic_mask[1]]\n    pts_instance_mask = torch.randint(0, 4, [2, 1024]).cuda()\n    pts_instance_mask = [pts_instance_mask[0], pts_instance_mask[1]]\n\n    loss_input_dict = dict(\n        bbox_preds=ret_dict,\n        points=points,\n        gt_bboxes_3d=gt_bboxes_3d,\n        gt_labels_3d=gt_labels_3d,\n        pts_semantic_mask=pts_semantic_mask,\n        pts_instance_mask=pts_instance_mask)\n    losses_dict = self.loss(**loss_input_dict)\n\n    assert losses_dict['flag_loss_z'] >= 0\n    assert losses_dict['vote_loss_z'] >= 0\n    assert losses_dict['center_loss_z'] >= 0\n    assert losses_dict['size_loss_z'] >= 0\n    assert losses_dict['sem_loss_z'] >= 0\n\n    # 'Primitive_mode' should be one of ['z', 'xy', 'line']\n    with pytest.raises(AssertionError):\n        primitive_head_cfg['vote_module_cfg']['in_channels'] = 'xyz'\n        build_head(primitive_head_cfg)\n\n\ndef test_h3d_head():\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and torch+cuda')\n    _setup_seed(0)\n\n    h3d_head_cfg = _get_roi_head_cfg('h3dnet/h3dnet_8x3_scannet-3d-18class.py')\n\n    num_point = 128\n    num_proposal = 64\n    h3d_head_cfg.primitive_list[0].vote_aggregation_cfg.num_point = num_point\n    h3d_head_cfg.primitive_list[1].vote_aggregation_cfg.num_point = num_point\n    h3d_head_cfg.primitive_list[2].vote_aggregation_cfg.num_point = num_point\n    h3d_head_cfg.bbox_head.num_proposal = num_proposal\n    self = build_head(h3d_head_cfg).cuda()\n\n    # prepare roi outputs\n    fp_xyz = [torch.rand([1, num_point, 3], dtype=torch.float32).cuda()]\n    hd_features = torch.rand([1, 256, num_point], dtype=torch.float32).cuda()\n    fp_indices = [torch.randint(0, 128, [1, num_point]).cuda()]\n    aggregated_points = torch.rand([1, num_proposal, 3],\n                                   dtype=torch.float32).cuda()\n    aggregated_features = torch.rand([1, 128, num_proposal],\n                                     dtype=torch.float32).cuda()\n    proposal_list = torch.cat([\n        torch.rand([1, num_proposal, 3], dtype=torch.float32).cuda() * 4 - 2,\n        torch.rand([1, num_proposal, 3], dtype=torch.float32).cuda() * 4,\n        torch.zeros([1, num_proposal, 1]).cuda()\n    ],\n                              dim=-1)\n\n    input_dict = dict(\n        fp_xyz_net0=fp_xyz,\n        hd_feature=hd_features,\n        aggregated_points=aggregated_points,\n        aggregated_features=aggregated_features,\n        seed_points=fp_xyz[0],\n        seed_indices=fp_indices[0],\n        proposal_list=proposal_list)\n\n    # prepare gt label\n    from mmdet3d.core.bbox import DepthInstance3DBoxes\n    gt_bboxes_3d = [\n        DepthInstance3DBoxes(torch.rand([4, 7], dtype=torch.float32).cuda()),\n        DepthInstance3DBoxes(torch.rand([4, 7], dtype=torch.float32).cuda())\n    ]\n    gt_labels_3d = torch.randint(0, 18, [1, 4]).cuda()\n    gt_labels_3d = [gt_labels_3d[0]]\n    pts_semantic_mask = torch.randint(0, 19, [1, num_point]).cuda()\n    pts_semantic_mask = [pts_semantic_mask[0]]\n    pts_instance_mask = torch.randint(0, 4, [1, num_point]).cuda()\n    pts_instance_mask = [pts_instance_mask[0]]\n    points = torch.rand([1, num_point, 3], dtype=torch.float32).cuda()\n\n    # prepare rpn targets\n    vote_targets = torch.rand([1, num_point, 9], dtype=torch.float32).cuda()\n    vote_target_masks = torch.rand([1, num_point], dtype=torch.float32).cuda()\n    size_class_targets = torch.rand([1, num_proposal],\n                                    dtype=torch.float32).cuda().long()\n    size_res_targets = torch.rand([1, num_proposal, 3],\n                                  dtype=torch.float32).cuda()\n    dir_class_targets = torch.rand([1, num_proposal],\n                                   dtype=torch.float32).cuda().long()\n    dir_res_targets = torch.rand([1, num_proposal], dtype=torch.float32).cuda()\n    center_targets = torch.rand([1, 4, 3], dtype=torch.float32).cuda()\n    mask_targets = torch.rand([1, num_proposal],\n                              dtype=torch.float32).cuda().long()\n    valid_gt_masks = torch.rand([1, 4], dtype=torch.float32).cuda()\n    objectness_targets = torch.rand([1, num_proposal],\n                                    dtype=torch.float32).cuda().long()\n    objectness_weights = torch.rand([1, num_proposal],\n                                    dtype=torch.float32).cuda()\n    box_loss_weights = torch.rand([1, num_proposal],\n                                  dtype=torch.float32).cuda()\n    valid_gt_weights = torch.rand([1, 4], dtype=torch.float32).cuda()\n\n    targets = (vote_targets, vote_target_masks, size_class_targets,\n               size_res_targets, dir_class_targets, dir_res_targets,\n               center_targets, mask_targets, valid_gt_masks,\n               objectness_targets, objectness_weights, box_loss_weights,\n               valid_gt_weights)\n\n    input_dict['targets'] = targets\n\n    # train forward\n    ret_dict = self.forward_train(\n        input_dict,\n        points=points,\n        gt_bboxes_3d=gt_bboxes_3d,\n        gt_labels_3d=gt_labels_3d,\n        pts_semantic_mask=pts_semantic_mask,\n        pts_instance_mask=pts_instance_mask,\n        img_metas=None)\n\n    assert ret_dict['flag_loss_z'] >= 0\n    assert ret_dict['vote_loss_z'] >= 0\n    assert ret_dict['center_loss_z'] >= 0\n    assert ret_dict['size_loss_z'] >= 0\n    assert ret_dict['sem_loss_z'] >= 0\n    assert ret_dict['objectness_loss_optimized'] >= 0\n    assert ret_dict['primitive_sem_matching_loss'] >= 0\n\n\ndef test_center_head():\n    tasks = [\n        dict(num_class=1, class_names=['car']),\n        dict(num_class=2, class_names=['truck', 'construction_vehicle']),\n        dict(num_class=2, class_names=['bus', 'trailer']),\n        dict(num_class=1, class_names=['barrier']),\n        dict(num_class=2, class_names=['motorcycle', 'bicycle']),\n        dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),\n    ]\n    bbox_cfg = dict(\n        type='CenterPointBBoxCoder',\n        post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],\n        max_num=500,\n        score_threshold=0.1,\n        pc_range=[-51.2, -51.2],\n        out_size_factor=8,\n        voxel_size=[0.2, 0.2])\n    train_cfg = dict(\n        grid_size=[1024, 1024, 40],\n        point_cloud_range=[-51.2, -51.2, -5., 51.2, 51.2, 3.],\n        voxel_size=[0.1, 0.1, 0.2],\n        out_size_factor=8,\n        dense_reg=1,\n        gaussian_overlap=0.1,\n        max_objs=500,\n        code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2, 1.0, 1.0],\n        min_radius=2)\n    test_cfg = dict(\n        post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],\n        max_per_img=500,\n        max_pool_nms=False,\n        min_radius=[4, 12, 10, 1, 0.85, 0.175],\n        post_max_size=83,\n        score_threshold=0.1,\n        pc_range=[-51.2, -51.2],\n        out_size_factor=8,\n        voxel_size=[0.2, 0.2],\n        nms_type='circle')\n    center_head_cfg = dict(\n        type='CenterHead',\n        in_channels=sum([256, 256]),\n        tasks=tasks,\n        train_cfg=train_cfg,\n        test_cfg=test_cfg,\n        bbox_coder=bbox_cfg,\n        common_heads=dict(\n            reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),\n        share_conv_channel=64,\n        norm_bbox=True)\n\n    center_head = build_head(center_head_cfg)\n\n    x = torch.rand([2, 512, 128, 128])\n    output = center_head([x])\n    for i in range(6):\n        assert output[i][0]['reg'].shape == torch.Size([2, 2, 128, 128])\n        assert output[i][0]['height'].shape == torch.Size([2, 1, 128, 128])\n        assert output[i][0]['dim'].shape == torch.Size([2, 3, 128, 128])\n        assert output[i][0]['rot'].shape == torch.Size([2, 2, 128, 128])\n        assert output[i][0]['vel'].shape == torch.Size([2, 2, 128, 128])\n        assert output[i][0]['heatmap'].shape == torch.Size(\n            [2, tasks[i]['num_class'], 128, 128])\n\n    # test get_bboxes\n    img_metas = [\n        dict(box_type_3d=LiDARInstance3DBoxes),\n        dict(box_type_3d=LiDARInstance3DBoxes)\n    ]\n    ret_lists = center_head.get_bboxes(output, img_metas)\n    for ret_list in ret_lists:\n        assert ret_list[0].tensor.shape[0] <= 500\n        assert ret_list[1].shape[0] <= 500\n        assert ret_list[2].shape[0] <= 500\n\n\ndef test_dcn_center_head():\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and CUDA')\n    set_random_seed(0)\n    tasks = [\n        dict(num_class=1, class_names=['car']),\n        dict(num_class=2, class_names=['truck', 'construction_vehicle']),\n        dict(num_class=2, class_names=['bus', 'trailer']),\n        dict(num_class=1, class_names=['barrier']),\n        dict(num_class=2, class_names=['motorcycle', 'bicycle']),\n        dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),\n    ]\n    voxel_size = [0.2, 0.2, 8]\n    dcn_center_head_cfg = dict(\n        type='CenterHead',\n        in_channels=sum([128, 128, 128]),\n        tasks=[\n            dict(num_class=1, class_names=['car']),\n            dict(num_class=2, class_names=['truck', 'construction_vehicle']),\n            dict(num_class=2, class_names=['bus', 'trailer']),\n            dict(num_class=1, class_names=['barrier']),\n            dict(num_class=2, class_names=['motorcycle', 'bicycle']),\n            dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),\n        ],\n        common_heads={\n            'reg': (2, 2),\n            'height': (1, 2),\n            'dim': (3, 2),\n            'rot': (2, 2),\n            'vel': (2, 2)\n        },\n        share_conv_channel=64,\n        bbox_coder=dict(\n            type='CenterPointBBoxCoder',\n            post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],\n            max_num=500,\n            score_threshold=0.1,\n            pc_range=[-51.2, -51.2],\n            out_size_factor=4,\n            voxel_size=voxel_size[:2],\n            code_size=9),\n        separate_head=dict(\n            type='DCNSeparateHead',\n            dcn_config=dict(\n                type='DCN',\n                in_channels=64,\n                out_channels=64,\n                kernel_size=3,\n                padding=1,\n                groups=4,\n                bias=False),  # mmcv 1.2.6 doesn't support bias=True anymore\n            init_bias=-2.19,\n            final_kernel=3),\n        loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),\n        loss_bbox=dict(type='L1Loss', reduction='none', loss_weight=0.25),\n        norm_bbox=True)\n    # model training and testing settings\n    train_cfg = dict(\n        grid_size=[512, 512, 1],\n        point_cloud_range=[-51.2, -51.2, -5., 51.2, 51.2, 3.],\n        voxel_size=voxel_size,\n        out_size_factor=4,\n        dense_reg=1,\n        gaussian_overlap=0.1,\n        max_objs=500,\n        min_radius=2,\n        code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2, 1.0, 1.0])\n\n    test_cfg = dict(\n        post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],\n        max_per_img=500,\n        max_pool_nms=False,\n        min_radius=[4, 12, 10, 1, 0.85, 0.175],\n        post_max_size=83,\n        score_threshold=0.1,\n        pc_range=[-51.2, -51.2],\n        out_size_factor=4,\n        voxel_size=voxel_size[:2],\n        nms_type='circle')\n    dcn_center_head_cfg.update(train_cfg=train_cfg, test_cfg=test_cfg)\n\n    dcn_center_head = build_head(dcn_center_head_cfg).cuda()\n\n    x = torch.ones([2, 384, 128, 128]).cuda()\n    output = dcn_center_head([x])\n    for i in range(6):\n        assert output[i][0]['reg'].shape == torch.Size([2, 2, 128, 128])\n        assert output[i][0]['height'].shape == torch.Size([2, 1, 128, 128])\n        assert output[i][0]['dim'].shape == torch.Size([2, 3, 128, 128])\n        assert output[i][0]['rot'].shape == torch.Size([2, 2, 128, 128])\n        assert output[i][0]['vel'].shape == torch.Size([2, 2, 128, 128])\n        assert output[i][0]['heatmap'].shape == torch.Size(\n            [2, tasks[i]['num_class'], 128, 128])\n\n    # Test loss.\n    gt_bboxes_0 = LiDARInstance3DBoxes(torch.rand([10, 9]).cuda(), box_dim=9)\n    gt_bboxes_1 = LiDARInstance3DBoxes(torch.rand([20, 9]).cuda(), box_dim=9)\n    gt_labels_0 = torch.randint(1, 11, [10]).cuda()\n    gt_labels_1 = torch.randint(1, 11, [20]).cuda()\n    gt_bboxes_3d = [gt_bboxes_0, gt_bboxes_1]\n    gt_labels_3d = [gt_labels_0, gt_labels_1]\n    loss = dcn_center_head.loss(gt_bboxes_3d, gt_labels_3d, output)\n    for key, item in loss.items():\n        if 'heatmap' in key:\n            assert item >= 0\n        else:\n            assert torch.sum(item) >= 0\n\n    # test get_bboxes\n    img_metas = [\n        dict(box_type_3d=LiDARInstance3DBoxes),\n        dict(box_type_3d=LiDARInstance3DBoxes)\n    ]\n    ret_lists = dcn_center_head.get_bboxes(output, img_metas)\n    for ret_list in ret_lists:\n        assert ret_list[0].tensor.shape[0] <= 500\n        assert ret_list[1].shape[0] <= 500\n        assert ret_list[2].shape[0] <= 500\n\n\ndef test_ssd3d_head():\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and torch+cuda')\n    _setup_seed(0)\n    ssd3d_head_cfg = _get_vote_head_cfg('3dssd/3dssd_kitti-3d-car.py')\n    ssd3d_head_cfg.vote_module_cfg.num_points = 64\n    self = build_head(ssd3d_head_cfg).cuda()\n    sa_xyz = [torch.rand([2, 128, 3], dtype=torch.float32).cuda()]\n    sa_features = [torch.rand([2, 256, 128], dtype=torch.float32).cuda()]\n    sa_indices = [torch.randint(0, 64, [2, 128]).cuda()]\n\n    input_dict = dict(\n        sa_xyz=sa_xyz, sa_features=sa_features, sa_indices=sa_indices)\n\n    # test forward\n    ret_dict = self(input_dict, 'spec')\n    assert ret_dict['center'].shape == torch.Size([2, 64, 3])\n    assert ret_dict['obj_scores'].shape == torch.Size([2, 1, 64])\n    assert ret_dict['size'].shape == torch.Size([2, 64, 3])\n    assert ret_dict['dir_res'].shape == torch.Size([2, 64, 12])\n\n    # test loss\n    points = [torch.rand([4000, 4], device='cuda') for i in range(2)]\n    gt_bbox1 = LiDARInstance3DBoxes(torch.rand([5, 7], device='cuda'))\n    gt_bbox2 = LiDARInstance3DBoxes(torch.rand([5, 7], device='cuda'))\n    gt_bboxes = [gt_bbox1, gt_bbox2]\n    gt_labels = [\n        torch.zeros([5], dtype=torch.long, device='cuda') for i in range(2)\n    ]\n    img_metas = [dict(box_type_3d=LiDARInstance3DBoxes) for i in range(2)]\n    losses = self.loss(\n        ret_dict, points, gt_bboxes, gt_labels, img_metas=img_metas)\n\n    assert losses['centerness_loss'] >= 0\n    assert losses['center_loss'] >= 0\n    assert losses['dir_class_loss'] >= 0\n    assert losses['dir_res_loss'] >= 0\n    assert losses['size_res_loss'] >= 0\n    assert losses['corner_loss'] >= 0\n    assert losses['vote_loss'] >= 0\n\n    # test multiclass_nms_single\n    sem_scores = ret_dict['obj_scores'].transpose(1, 2)[0]\n    obj_scores = sem_scores.max(-1)[0]\n    bbox = self.bbox_coder.decode(ret_dict)[0]\n    input_meta = img_metas[0]\n    bbox_selected, score_selected, labels = self.multiclass_nms_single(\n        obj_scores, sem_scores, bbox, points[0], input_meta)\n    assert bbox_selected.shape[0] >= 0\n    assert bbox_selected.shape[1] == 7\n    assert score_selected.shape[0] >= 0\n    assert labels.shape[0] >= 0\n\n    # test get_boxes\n    points = torch.stack(points, 0)\n    results = self.get_bboxes(points, ret_dict, img_metas)\n    assert results[0][0].tensor.shape[0] >= 0\n    assert results[0][0].tensor.shape[1] == 7\n    assert results[0][1].shape[0] >= 0\n    assert results[0][2].shape[0] >= 0\n\n\ndef test_shape_aware_head_loss():\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and torch+cuda')\n    bbox_head_cfg = _get_pts_bbox_head_cfg(\n        'ssn/hv_ssn_secfpn_sbn-all_2x16_2x_lyft-3d.py')\n    # modify bn config to avoid bugs caused by syncbn\n    for task in bbox_head_cfg['tasks']:\n        task['norm_cfg'] = dict(type='BN2d')\n\n    from mmdet3d.models.builder import build_head\n    self = build_head(bbox_head_cfg)\n    self.cuda()\n    assert len(self.heads) == 4\n    assert isinstance(self.heads[0].conv_cls, torch.nn.modules.conv.Conv2d)\n    assert self.heads[0].conv_cls.in_channels == 64\n    assert self.heads[0].conv_cls.out_channels == 36\n    assert self.heads[0].conv_reg.out_channels == 28\n    assert self.heads[0].conv_dir_cls.out_channels == 8\n\n    # test forward\n    feats = list()\n    feats.append(torch.rand([2, 384, 200, 200], dtype=torch.float32).cuda())\n    (cls_score, bbox_pred, dir_cls_preds) = self.forward(feats)\n    assert cls_score[0].shape == torch.Size([2, 420000, 9])\n    assert bbox_pred[0].shape == torch.Size([2, 420000, 7])\n    assert dir_cls_preds[0].shape == torch.Size([2, 420000, 2])\n\n    # test loss\n    gt_bboxes = [\n        LiDARInstance3DBoxes(\n            torch.tensor(\n                [[-14.5695, -6.4169, -2.1054, 1.8830, 4.6720, 1.4840, 1.5587],\n                 [25.7215, 3.4581, -1.3456, 1.6720, 4.4090, 1.5830, 1.5301]],\n                dtype=torch.float32).cuda()),\n        LiDARInstance3DBoxes(\n            torch.tensor(\n                [[-50.763, -3.5517, -0.99658, 1.7430, 4.4020, 1.6990, 1.7874],\n                 [-68.720, 0.033, -0.75276, 1.7860, 4.9100, 1.6610, 1.7525]],\n                dtype=torch.float32).cuda())\n    ]\n    gt_labels = list(torch.tensor([[4, 4], [4, 4]], dtype=torch.int64).cuda())\n    input_metas = [{\n        'sample_idx': 1234\n    }, {\n        'sample_idx': 2345\n    }]  # fake input_metas\n\n    losses = self.loss(cls_score, bbox_pred, dir_cls_preds, gt_bboxes,\n                       gt_labels, input_metas)\n\n    assert losses['loss_cls'][0] > 0\n    assert losses['loss_bbox'][0] > 0\n    assert losses['loss_dir'][0] > 0\n\n    # test empty ground truth case\n    gt_bboxes = list(torch.empty((2, 0, 7)).cuda())\n    gt_labels = list(torch.empty((2, 0)).cuda())\n    empty_gt_losses = self.loss(cls_score, bbox_pred, dir_cls_preds, gt_bboxes,\n                                gt_labels, input_metas)\n    assert empty_gt_losses['loss_cls'][0] > 0\n    assert empty_gt_losses['loss_bbox'][0] == 0\n    assert empty_gt_losses['loss_dir'][0] == 0\n\n\ndef test_shape_aware_head_getboxes():\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and torch+cuda')\n    bbox_head_cfg = _get_pts_bbox_head_cfg(\n        'ssn/hv_ssn_secfpn_sbn-all_2x16_2x_lyft-3d.py')\n    # modify bn config to avoid bugs caused by syncbn\n    for task in bbox_head_cfg['tasks']:\n        task['norm_cfg'] = dict(type='BN2d')\n\n    from mmdet3d.models.builder import build_head\n    self = build_head(bbox_head_cfg)\n    self.cuda()\n\n    feats = list()\n    feats.append(torch.rand([2, 384, 200, 200], dtype=torch.float32).cuda())\n    # fake input_metas\n    input_metas = [{\n        'sample_idx': 1234,\n        'box_type_3d': LiDARInstance3DBoxes,\n        'box_mode_3d': Box3DMode.LIDAR\n    }, {\n        'sample_idx': 2345,\n        'box_type_3d': LiDARInstance3DBoxes,\n        'box_mode_3d': Box3DMode.LIDAR\n    }]\n    (cls_score, bbox_pred, dir_cls_preds) = self.forward(feats)\n\n    # test get_bboxes\n    cls_score[0] -= 1.5  # too many positive samples may cause cuda oom\n    result_list = self.get_bboxes(cls_score, bbox_pred, dir_cls_preds,\n                                  input_metas)\n    assert len(result_list[0][1]) > 0  # ensure not all boxes are filtered\n    assert (result_list[0][1] > 0.3).all()\n"
  },
  {
    "path": "tests/test_models/test_heads/test_parta2_bbox_head.py",
    "content": "import pytest\nimport torch\nfrom mmcv import Config\nfrom torch.nn import BatchNorm1d, ReLU\n\nfrom mmdet3d.core.bbox import Box3DMode, LiDARInstance3DBoxes\nfrom mmdet3d.core.bbox.samplers import IoUNegPiecewiseSampler\nfrom mmdet3d.models import PartA2BboxHead\nfrom mmdet3d.ops import make_sparse_convmodule\nfrom mmdet3d.ops.spconv.conv import SubMConv3d\n\n\ndef test_loss():\n    self = PartA2BboxHead(\n        num_classes=3,\n        seg_in_channels=16,\n        part_in_channels=4,\n        seg_conv_channels=[64, 64],\n        part_conv_channels=[64, 64],\n        merge_conv_channels=[128, 128],\n        down_conv_channels=[128, 256],\n        shared_fc_channels=[256, 512, 512, 512],\n        cls_channels=[256, 256],\n        reg_channels=[256, 256])\n\n    cls_score = torch.Tensor([[-3.6810], [-3.9413], [-5.3971], [-17.1281],\n                              [-5.9434], [-6.2251]])\n    bbox_pred = torch.Tensor(\n        [[\n            -6.3016e-03, -5.2294e-03, -1.2793e-02, -1.0602e-02, -7.4086e-04,\n            9.2471e-03, 7.3514e-03\n        ],\n         [\n             -1.1975e-02, -1.1578e-02, -3.1219e-02, 2.7754e-02, 6.9775e-03,\n             9.4042e-04, 9.0472e-04\n         ],\n         [\n             3.7539e-03, -9.1897e-03, -5.3666e-03, -1.0380e-05, 4.3467e-03,\n             4.2470e-03, 1.8355e-03\n         ],\n         [\n             -7.6093e-02, -1.2497e-01, -9.2942e-02, 2.1404e-02, 2.3750e-02,\n             1.0365e-01, -1.3042e-02\n         ],\n         [\n             2.7577e-03, -1.1514e-02, -1.1097e-02, -2.4946e-03, 2.3268e-03,\n             1.6797e-03, -1.4076e-03\n         ],\n         [\n             3.9635e-03, -7.8551e-03, -3.5125e-03, 2.1229e-04, 9.7042e-03,\n             1.7499e-03, -5.1254e-03\n         ]])\n    rois = torch.Tensor([\n        [0.0000, 13.3711, -12.5483, -1.9306, 1.7027, 4.2836, 1.4283, -1.1499],\n        [0.0000, 19.2472, -7.2655, -10.6641, 3.3078, 83.1976, 29.3337, 2.4501],\n        [0.0000, 13.8012, -10.9791, -3.0617, 0.2504, 1.2518, 0.8807, 3.1034],\n        [0.0000, 16.2736, -9.0284, -2.0494, 8.2697, 31.2336, 9.1006, 1.9208],\n        [0.0000, 10.4462, -13.6879, -3.1869, 7.3366, 0.3518, 1.7199, -0.7225],\n        [0.0000, 11.3374, -13.6671, -3.2332, 4.9934, 0.3750, 1.6033, -0.9665]\n    ])\n    labels = torch.Tensor([0.7100, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000])\n    bbox_targets = torch.Tensor(\n        [[0.0598, 0.0243, -0.0984, -0.0454, 0.0066, 0.1114, 0.1714]])\n    pos_gt_bboxes = torch.Tensor(\n        [[13.6686, -12.5586, -2.1553, 1.6271, 4.3119, 1.5966, 2.1631]])\n    reg_mask = torch.Tensor([1, 0, 0, 0, 0, 0])\n    label_weights = torch.Tensor(\n        [0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078])\n    bbox_weights = torch.Tensor([1., 0., 0., 0., 0., 0.])\n\n    loss = self.loss(cls_score, bbox_pred, rois, labels, bbox_targets,\n                     pos_gt_bboxes, reg_mask, label_weights, bbox_weights)\n\n    expected_loss_cls = torch.Tensor([\n        2.0579e-02, 1.5005e-04, 3.5252e-05, 0.0000e+00, 2.0433e-05, 1.5422e-05\n    ])\n    expected_loss_bbox = torch.as_tensor(0.0622)\n    expected_loss_corner = torch.Tensor([0.1379])\n\n    assert torch.allclose(loss['loss_cls'], expected_loss_cls, 1e-3)\n    assert torch.allclose(loss['loss_bbox'], expected_loss_bbox, 1e-3)\n    assert torch.allclose(loss['loss_corner'], expected_loss_corner, 1e-3)\n\n\ndef test_get_targets():\n    self = PartA2BboxHead(\n        num_classes=3,\n        seg_in_channels=16,\n        part_in_channels=4,\n        seg_conv_channels=[64, 64],\n        part_conv_channels=[64, 64],\n        merge_conv_channels=[128, 128],\n        down_conv_channels=[128, 256],\n        shared_fc_channels=[256, 512, 512, 512],\n        cls_channels=[256, 256],\n        reg_channels=[256, 256])\n\n    sampling_result = IoUNegPiecewiseSampler(\n        1,\n        pos_fraction=0.55,\n        neg_piece_fractions=[0.8, 0.2],\n        neg_iou_piece_thrs=[0.55, 0.1],\n        return_iou=True)\n    sampling_result.pos_bboxes = torch.Tensor(\n        [[8.1517, 0.0384, -1.9496, 1.5271, 4.1131, 1.4879, 1.2076]])\n    sampling_result.pos_gt_bboxes = torch.Tensor(\n        [[7.8417, -0.1405, -1.9652, 1.6122, 3.2838, 1.5331, -2.0835]])\n    sampling_result.iou = torch.Tensor([\n        6.7787e-01, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,\n        0.0000e+00, 1.2839e-01, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,\n        0.0000e+00, 0.0000e+00, 0.0000e+00, 7.0261e-04, 0.0000e+00, 0.0000e+00,\n        0.0000e+00, 0.0000e+00, 5.8915e-02, 0.0000e+00, 0.0000e+00, 0.0000e+00,\n        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 5.6628e-06,\n        5.0271e-02, 0.0000e+00, 1.9608e-01, 0.0000e+00, 0.0000e+00, 2.3519e-01,\n        1.6589e-02, 0.0000e+00, 1.0162e-01, 2.1634e-02, 0.0000e+00, 0.0000e+00,\n        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 5.6326e-02,\n        1.3810e-01, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,\n        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,\n        4.5455e-02, 0.0000e+00, 1.0929e-03, 0.0000e+00, 8.8191e-02, 1.1012e-01,\n        0.0000e+00, 0.0000e+00, 0.0000e+00, 1.6236e-01, 0.0000e+00, 1.1342e-01,\n        1.0636e-01, 9.9803e-02, 5.7394e-02, 0.0000e+00, 1.6773e-01, 0.0000e+00,\n        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 6.3464e-03,\n        0.0000e+00, 2.7977e-01, 0.0000e+00, 3.1252e-01, 2.1642e-01, 2.2945e-01,\n        0.0000e+00, 1.8297e-01, 0.0000e+00, 2.1908e-01, 1.1661e-01, 1.3513e-01,\n        1.5898e-01, 7.4368e-03, 1.2523e-01, 1.4735e-04, 0.0000e+00, 0.0000e+00,\n        0.0000e+00, 1.0948e-01, 2.5889e-01, 4.4585e-04, 8.6483e-02, 1.6376e-01,\n        0.0000e+00, 2.2894e-01, 2.7489e-01, 0.0000e+00, 0.0000e+00, 0.0000e+00,\n        1.8334e-01, 1.0193e-01, 2.3389e-01, 1.1035e-01, 3.3700e-01, 1.4397e-01,\n        1.0379e-01, 0.0000e+00, 1.1226e-01, 0.0000e+00, 0.0000e+00, 1.6201e-01,\n        0.0000e+00, 1.3569e-01\n    ])\n\n    rcnn_train_cfg = Config({\n        'assigner': [{\n            'type': 'MaxIoUAssigner',\n            'iou_calculator': {\n                'type': 'BboxOverlaps3D',\n                'coordinate': 'lidar'\n            },\n            'pos_iou_thr': 0.55,\n            'neg_iou_thr': 0.55,\n            'min_pos_iou': 0.55,\n            'ignore_iof_thr': -1\n        }, {\n            'type': 'MaxIoUAssigner',\n            'iou_calculator': {\n                'type': 'BboxOverlaps3D',\n                'coordinate': 'lidar'\n            },\n            'pos_iou_thr': 0.55,\n            'neg_iou_thr': 0.55,\n            'min_pos_iou': 0.55,\n            'ignore_iof_thr': -1\n        }, {\n            'type': 'MaxIoUAssigner',\n            'iou_calculator': {\n                'type': 'BboxOverlaps3D',\n                'coordinate': 'lidar'\n            },\n            'pos_iou_thr': 0.55,\n            'neg_iou_thr': 0.55,\n            'min_pos_iou': 0.55,\n            'ignore_iof_thr': -1\n        }],\n        'sampler': {\n            'type': 'IoUNegPiecewiseSampler',\n            'num': 128,\n            'pos_fraction': 0.55,\n            'neg_piece_fractions': [0.8, 0.2],\n            'neg_iou_piece_thrs': [0.55, 0.1],\n            'neg_pos_ub': -1,\n            'add_gt_as_proposals': False,\n            'return_iou': True\n        },\n        'cls_pos_thr':\n        0.75,\n        'cls_neg_thr':\n        0.25\n    })\n\n    label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights, bbox_weights\\\n        = self.get_targets([sampling_result], rcnn_train_cfg)\n\n    expected_label = torch.Tensor([\n        0.8557, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,\n        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,\n        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,\n        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,\n        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,\n        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,\n        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,\n        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,\n        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,\n        0.0000, 0.0000, 0.0000, 0.0000, 0.0595, 0.0000, 0.1250, 0.0000, 0.0000,\n        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,\n        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0178, 0.0000, 0.0000, 0.0000,\n        0.0000, 0.0000, 0.0498, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,\n        0.0000, 0.1740, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,\n        0.0000, 0.0000\n    ])\n\n    expected_bbox_targets = torch.Tensor(\n        [[0.0805, 0.0130, 0.0047, 0.0542, -0.2252, 0.0299, -0.1495]])\n\n    expected_pos_gt_bboxes = torch.Tensor(\n        [[7.8417, -0.1405, -1.9652, 1.6122, 3.2838, 1.5331, -2.0835]])\n\n    expected_reg_mask = torch.LongTensor([\n        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n        0, 0, 0, 0, 0, 0, 0, 0\n    ])\n\n    expected_label_weights = torch.Tensor([\n        0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078,\n        0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078,\n        0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078,\n        0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078,\n        0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078,\n        0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078,\n        0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078,\n        0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078,\n        0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078,\n        0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078,\n        0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078,\n        0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078,\n        0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078,\n        0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078, 0.0078,\n        0.0078, 0.0078\n    ])\n\n    expected_bbox_weights = torch.Tensor([\n        1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n        0., 0.\n    ])\n\n    assert torch.allclose(label, expected_label, 1e-2)\n    assert torch.allclose(bbox_targets, expected_bbox_targets, 1e-2)\n    assert torch.allclose(pos_gt_bboxes, expected_pos_gt_bboxes)\n    assert torch.all(reg_mask == expected_reg_mask)\n    assert torch.allclose(label_weights, expected_label_weights, 1e-2)\n    assert torch.allclose(bbox_weights, expected_bbox_weights)\n\n\ndef test_get_bboxes():\n    if not torch.cuda.is_available():\n        pytest.skip()\n    self = PartA2BboxHead(\n        num_classes=3,\n        seg_in_channels=16,\n        part_in_channels=4,\n        seg_conv_channels=[64, 64],\n        part_conv_channels=[64, 64],\n        merge_conv_channels=[128, 128],\n        down_conv_channels=[128, 256],\n        shared_fc_channels=[256, 512, 512, 512],\n        cls_channels=[256, 256],\n        reg_channels=[256, 256])\n\n    rois = torch.Tensor([[\n        0.0000e+00, 5.6284e+01, 2.5712e+01, -1.3196e+00, 1.5943e+00,\n        3.7509e+00, 1.4969e+00, 1.2105e-03\n    ],\n                         [\n                             0.0000e+00, 5.4685e+01, 2.9132e+01, -1.9178e+00,\n                             1.6337e+00, 4.1116e+00, 1.5472e+00, -1.7312e+00\n                         ],\n                         [\n                             0.0000e+00, 5.5927e+01, 2.5830e+01, -1.4099e+00,\n                             1.5958e+00, 3.8861e+00, 1.4911e+00, -2.9276e+00\n                         ],\n                         [\n                             0.0000e+00, 5.6306e+01, 2.6310e+01, -1.3729e+00,\n                             1.5893e+00, 3.7448e+00, 1.4924e+00, 1.6071e-01\n                         ],\n                         [\n                             0.0000e+00, 3.1633e+01, -5.8557e+00, -1.2541e+00,\n                             1.6517e+00, 4.1829e+00, 1.5593e+00, -1.6037e+00\n                         ],\n                         [\n                             0.0000e+00, 3.1789e+01, -5.5308e+00, -1.3012e+00,\n                             1.6412e+00, 4.1070e+00, 1.5487e+00, -1.6517e+00\n                         ]]).cuda()\n\n    cls_score = torch.Tensor([[-2.2061], [-2.1121], [-1.4478], [-2.9614],\n                              [-0.1761], [0.7357]]).cuda()\n\n    bbox_pred = torch.Tensor(\n        [[\n            -4.7917e-02, -1.6504e-02, -2.2340e-02, 5.1296e-03, -2.0984e-02,\n            1.0598e-02, -1.1907e-01\n        ],\n         [\n             -1.6261e-02, -5.4005e-02, 6.2480e-03, 1.5496e-03, -1.3285e-02,\n             8.1482e-03, -2.2707e-03\n         ],\n         [\n             -3.9423e-02, 2.0151e-02, -2.1138e-02, -1.1845e-03, -1.5343e-02,\n             5.7208e-03, 8.5646e-03\n         ],\n         [\n             6.3104e-02, -3.9307e-02, 2.3005e-02, -7.0528e-03, -9.2637e-05,\n             2.2656e-02, 1.6358e-02\n         ],\n         [\n             -1.4864e-03, 5.6840e-02, 5.8247e-03, -3.5541e-03, -4.9658e-03,\n             2.5036e-03, 3.0302e-02\n         ],\n         [\n             -4.3259e-02, -1.9963e-02, 3.5004e-02, 3.7546e-03, 1.0876e-02,\n             -3.9637e-04, 2.0445e-02\n         ]]).cuda()\n\n    class_labels = [torch.Tensor([2, 2, 2, 2, 2, 2]).cuda()]\n\n    class_pred = [\n        torch.Tensor([[1.0877e-05, 1.0318e-05, 2.6599e-01],\n                      [1.3105e-05, 1.1904e-05, 2.4432e-01],\n                      [1.4530e-05, 1.4619e-05, 2.4395e-01],\n                      [1.3251e-05, 1.3038e-05, 2.3703e-01],\n                      [2.9156e-05, 2.5521e-05, 2.2826e-01],\n                      [3.1665e-05, 2.9054e-05, 2.2077e-01]]).cuda()\n    ]\n\n    cfg = Config(\n        dict(\n            use_rotate_nms=True,\n            use_raw_score=True,\n            nms_thr=0.01,\n            score_thr=0.1))\n    input_meta = dict(\n        box_type_3d=LiDARInstance3DBoxes, box_mode_3d=Box3DMode.LIDAR)\n    result_list = self.get_bboxes(rois, cls_score, bbox_pred, class_labels,\n                                  class_pred, [input_meta], cfg)\n    selected_bboxes, selected_scores, selected_label_preds = result_list[0]\n\n    expected_selected_bboxes = torch.Tensor(\n        [[56.2170, 25.9074, -1.3610, 1.6025, 3.6730, 1.5128, -0.1179],\n         [54.6521, 28.8846, -1.9145, 1.6362, 4.0573, 1.5599, -1.7335],\n         [31.6179, -5.6004, -1.2470, 1.6458, 4.1622, 1.5632, -1.5734]]).cuda()\n    expected_selected_scores = torch.Tensor([-2.2061, -2.1121, -0.1761]).cuda()\n    expected_selected_label_preds = torch.Tensor([2., 2., 2.]).cuda()\n\n    assert torch.allclose(selected_bboxes.tensor, expected_selected_bboxes,\n                          1e-3)\n    assert torch.allclose(selected_scores, expected_selected_scores, 1e-3)\n    assert torch.allclose(selected_label_preds, expected_selected_label_preds)\n\n\ndef test_multi_class_nms():\n    if not torch.cuda.is_available():\n        pytest.skip()\n\n    self = PartA2BboxHead(\n        num_classes=3,\n        seg_in_channels=16,\n        part_in_channels=4,\n        seg_conv_channels=[64, 64],\n        part_conv_channels=[64, 64],\n        merge_conv_channels=[128, 128],\n        down_conv_channels=[128, 256],\n        shared_fc_channels=[256, 512, 512, 512],\n        cls_channels=[256, 256],\n        reg_channels=[256, 256])\n\n    box_probs = torch.Tensor([[1.0877e-05, 1.0318e-05, 2.6599e-01],\n                              [1.3105e-05, 1.1904e-05, 2.4432e-01],\n                              [1.4530e-05, 1.4619e-05, 2.4395e-01],\n                              [1.3251e-05, 1.3038e-05, 2.3703e-01],\n                              [2.9156e-05, 2.5521e-05, 2.2826e-01],\n                              [3.1665e-05, 2.9054e-05, 2.2077e-01],\n                              [5.5738e-06, 6.2453e-06, 2.1978e-01],\n                              [9.0193e-06, 9.2154e-06, 2.1418e-01],\n                              [1.4004e-05, 1.3209e-05, 2.1316e-01],\n                              [7.9210e-06, 8.1767e-06, 2.1304e-01]]).cuda()\n\n    box_preds = torch.Tensor(\n        [[\n            5.6217e+01, 2.5908e+01, -1.3611e+00, 1.6025e+00, 3.6730e+00,\n            1.5129e+00, -1.1786e-01\n        ],\n         [\n             5.4653e+01, 2.8885e+01, -1.9145e+00, 1.6362e+00, 4.0574e+00,\n             1.5599e+00, -1.7335e+00\n         ],\n         [\n             5.5809e+01, 2.5686e+01, -1.4457e+00, 1.5939e+00, 3.8270e+00,\n             1.4997e+00, -2.9191e+00\n         ],\n         [\n             5.6107e+01, 2.6082e+01, -1.3557e+00, 1.5782e+00, 3.7444e+00,\n             1.5266e+00, 1.7707e-01\n         ],\n         [\n             3.1618e+01, -5.6004e+00, -1.2470e+00, 1.6459e+00, 4.1622e+00,\n             1.5632e+00, -1.5734e+00\n         ],\n         [\n             3.1605e+01, -5.6342e+00, -1.2467e+00, 1.6474e+00, 4.1519e+00,\n             1.5481e+00, -1.6313e+00\n         ],\n         [\n             5.6211e+01, 2.7294e+01, -1.5350e+00, 1.5422e+00, 3.7733e+00,\n             1.5140e+00, 9.5846e-02\n         ],\n         [\n             5.5907e+01, 2.7155e+01, -1.4712e+00, 1.5416e+00, 3.7611e+00,\n             1.5142e+00, -5.2059e-02\n         ],\n         [\n             5.4000e+01, 3.0585e+01, -1.6874e+00, 1.6495e+00, 4.0376e+00,\n             1.5554e+00, -1.7900e+00\n         ],\n         [\n             5.6007e+01, 2.6300e+01, -1.3945e+00, 1.5716e+00, 3.7064e+00,\n             1.4715e+00, -2.9639e+00\n         ]]).cuda()\n\n    input_meta = dict(\n        box_type_3d=LiDARInstance3DBoxes, box_mode_3d=Box3DMode.LIDAR)\n    selected = self.multi_class_nms(box_probs, box_preds, 0.1, 0.001,\n                                    input_meta)\n    expected_selected = torch.Tensor([0, 1, 4, 8]).cuda()\n\n    assert torch.all(selected == expected_selected)\n\n\ndef test_make_sparse_convmodule():\n    with pytest.raises(AssertionError):\n        # assert invalid order setting\n        make_sparse_convmodule(\n            in_channels=4,\n            out_channels=8,\n            kernel_size=3,\n            indice_key='rcnn_part2',\n            norm_cfg=dict(type='BN1d'),\n            order=('norm', 'act', 'conv', 'norm'))\n\n        # assert invalid type of order\n        make_sparse_convmodule(\n            in_channels=4,\n            out_channels=8,\n            kernel_size=3,\n            indice_key='rcnn_part2',\n            norm_cfg=dict(type='BN1d'),\n            order=['norm', 'conv'])\n\n        # assert invalid elements of order\n        make_sparse_convmodule(\n            in_channels=4,\n            out_channels=8,\n            kernel_size=3,\n            indice_key='rcnn_part2',\n            norm_cfg=dict(type='BN1d'),\n            order=('conv', 'normal', 'activate'))\n\n    sparse_convmodule = make_sparse_convmodule(\n        in_channels=4,\n        out_channels=64,\n        kernel_size=3,\n        padding=1,\n        indice_key='rcnn_part0',\n        norm_cfg=dict(type='BN1d', eps=0.001, momentum=0.01))\n\n    assert isinstance(sparse_convmodule[0], SubMConv3d)\n    assert isinstance(sparse_convmodule[1], BatchNorm1d)\n    assert isinstance(sparse_convmodule[2], ReLU)\n    assert sparse_convmodule[1].num_features == 64\n    assert sparse_convmodule[1].eps == 0.001\n    assert sparse_convmodule[1].affine is True\n    assert sparse_convmodule[1].track_running_stats is True\n    assert isinstance(sparse_convmodule[2], ReLU)\n    assert sparse_convmodule[2].inplace is True\n\n    pre_act = make_sparse_convmodule(\n        in_channels=4,\n        out_channels=8,\n        kernel_size=3,\n        indice_key='rcnn_part1',\n        norm_cfg=dict(type='BN1d'),\n        order=('norm', 'act', 'conv'))\n    assert isinstance(pre_act[0], BatchNorm1d)\n    assert isinstance(pre_act[1], ReLU)\n    assert isinstance(pre_act[2], SubMConv3d)\n"
  },
  {
    "path": "tests/test_models/test_heads/test_roi_extractors.py",
    "content": "import pytest\nimport torch\n\nfrom mmdet3d.models.roi_heads.roi_extractors import Single3DRoIAwareExtractor\n\n\ndef test_single_roiaware_extractor():\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and torch+cuda')\n\n    roi_layer_cfg = dict(\n        type='RoIAwarePool3d', out_size=4, max_pts_per_voxel=128, mode='max')\n\n    self = Single3DRoIAwareExtractor(roi_layer=roi_layer_cfg)\n    feats = torch.tensor(\n        [[1, 2, 3.3], [1.2, 2.5, 3.0], [0.8, 2.1, 3.5], [1.6, 2.6, 3.6],\n         [0.8, 1.2, 3.9], [-9.2, 21.0, 18.2], [3.8, 7.9, 6.3],\n         [4.7, 3.5, -12.2], [3.8, 7.6, -2], [-10.6, -12.9, -20], [-16, -18, 9],\n         [-21.3, -52, -5], [0, 0, 0], [6, 7, 8], [-2, -3, -4]],\n        dtype=torch.float32).cuda()\n    coordinate = feats.clone()\n    batch_inds = torch.zeros(feats.shape[0]).cuda()\n    rois = torch.tensor([[0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 0.3],\n                         [0, -10.0, 23.0, 16.0, 10, 20, 20, 0.5]],\n                        dtype=torch.float32).cuda()\n    # test forward\n    pooled_feats = self(feats, coordinate, batch_inds, rois)\n    assert pooled_feats.shape == torch.Size([2, 4, 4, 4, 3])\n    assert torch.allclose(pooled_feats.sum(),\n                          torch.tensor(51.100).cuda(), 1e-3)\n"
  },
  {
    "path": "tests/test_models/test_heads/test_semantic_heads.py",
    "content": "import pytest\nimport torch\n\nfrom mmdet3d.core.bbox import LiDARInstance3DBoxes\n\n\ndef test_PointwiseSemanticHead():\n    # PointwiseSemanticHead only support gpu version currently.\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and torch+cuda')\n    from mmdet3d.models.builder import build_head\n\n    head_cfg = dict(\n        type='PointwiseSemanticHead',\n        in_channels=8,\n        extra_width=0.2,\n        seg_score_thr=0.3,\n        num_classes=3,\n        loss_seg=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            reduction='sum',\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_part=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0))\n\n    self = build_head(head_cfg)\n    self.cuda()\n\n    # test forward\n    voxel_features = torch.rand([4, 8], dtype=torch.float32).cuda()\n    feats_dict = self.forward(voxel_features)\n    assert feats_dict['seg_preds'].shape == torch.Size(\n        [voxel_features.shape[0], 1])\n    assert feats_dict['part_preds'].shape == torch.Size(\n        [voxel_features.shape[0], 3])\n    assert feats_dict['part_feats'].shape == torch.Size(\n        [voxel_features.shape[0], 4])\n\n    voxel_centers = torch.tensor(\n        [[6.56126, 0.9648336, -1.7339306], [6.8162713, -2.480431, -1.3616394],\n         [11.643568, -4.744306, -1.3580885], [23.482342, 6.5036807, 0.5806964]\n         ],\n        dtype=torch.float32).cuda()  # n, point_features\n    coordinates = torch.tensor(\n        [[0, 12, 819, 131], [0, 16, 750, 136], [1, 16, 705, 232],\n         [1, 35, 930, 469]],\n        dtype=torch.int32).cuda()  # n, 4(batch, ind_x, ind_y, ind_z)\n    voxel_dict = dict(voxel_centers=voxel_centers, coors=coordinates)\n    gt_bboxes = [\n        LiDARInstance3DBoxes(\n            torch.tensor(\n                [[6.4118, -3.4305, -1.7291, 1.7033, 3.4693, 1.6197, -0.9091]],\n                dtype=torch.float32).cuda()),\n        LiDARInstance3DBoxes(\n            torch.tensor(\n                [[16.9107, 9.7925, -1.9201, 1.6097, 3.2786, 1.5307, -2.4056]],\n                dtype=torch.float32).cuda())\n    ]\n    # batch size is 2 in the unit test\n    gt_labels = list(torch.tensor([[0], [1]], dtype=torch.int64).cuda())\n\n    # test get_targets\n    target_dict = self.get_targets(voxel_dict, gt_bboxes, gt_labels)\n\n    assert target_dict['seg_targets'].shape == torch.Size(\n        [voxel_features.shape[0]])\n    assert torch.allclose(target_dict['seg_targets'],\n                          target_dict['seg_targets'].new_tensor([3, -1, 3, 3]))\n    assert target_dict['part_targets'].shape == torch.Size(\n        [voxel_features.shape[0], 3])\n    assert target_dict['part_targets'].sum() == 0\n\n    # test loss\n    loss_dict = self.loss(feats_dict, target_dict)\n    assert loss_dict['loss_seg'] > 0\n    assert loss_dict['loss_part'] == 0  # no points in gt_boxes\n    total_loss = loss_dict['loss_seg'] + loss_dict['loss_part']\n    total_loss.backward()\n"
  },
  {
    "path": "tests/test_models/test_necks/test_fpn.py",
    "content": "import pytest\n\n\ndef test_secfpn():\n    neck_cfg = dict(\n        type='SECONDFPN',\n        in_channels=[2, 3],\n        upsample_strides=[1, 2],\n        out_channels=[4, 6],\n    )\n    from mmdet.models.builder import build_neck\n    neck = build_neck(neck_cfg)\n    assert neck.deblocks[0][0].in_channels == 2\n    assert neck.deblocks[1][0].in_channels == 3\n    assert neck.deblocks[0][0].out_channels == 4\n    assert neck.deblocks[1][0].out_channels == 6\n    assert neck.deblocks[0][0].stride == (1, 1)\n    assert neck.deblocks[1][0].stride == (2, 2)\n    assert neck is not None\n\n    neck_cfg = dict(\n        type='SECONDFPN',\n        in_channels=[2, 2],\n        upsample_strides=[1, 2, 4],\n        out_channels=[2, 2],\n    )\n    with pytest.raises(AssertionError):\n        build_neck(neck_cfg)\n\n    neck_cfg = dict(\n        type='SECONDFPN',\n        in_channels=[2, 2, 4],\n        upsample_strides=[1, 2, 4],\n        out_channels=[2, 2],\n    )\n    with pytest.raises(AssertionError):\n        build_neck(neck_cfg)\n"
  },
  {
    "path": "tests/test_models/test_necks/test_necks.py",
    "content": "import torch\n\nfrom mmdet3d.models.builder import build_backbone, build_neck\n\n\ndef test_centerpoint_fpn():\n\n    second_cfg = dict(\n        type='SECOND',\n        in_channels=64,\n        out_channels=[64, 128, 256],\n        layer_nums=[3, 5, 5],\n        layer_strides=[2, 2, 2],\n        norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),\n        conv_cfg=dict(type='Conv2d', bias=False))\n\n    second = build_backbone(second_cfg)\n\n    # centerpoint usage of fpn\n    centerpoint_fpn_cfg = dict(\n        type='SECONDFPN',\n        in_channels=[64, 128, 256],\n        out_channels=[128, 128, 128],\n        upsample_strides=[0.5, 1, 2],\n        norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),\n        upsample_cfg=dict(type='deconv', bias=False),\n        use_conv_for_no_stride=True)\n\n    # original usage of fpn\n    fpn_cfg = dict(\n        type='SECONDFPN',\n        in_channels=[64, 128, 256],\n        upsample_strides=[1, 2, 4],\n        out_channels=[128, 128, 128])\n\n    second_fpn = build_neck(fpn_cfg)\n\n    centerpoint_second_fpn = build_neck(centerpoint_fpn_cfg)\n\n    input = torch.rand([4, 64, 512, 512])\n    sec_output = second(input)\n    centerpoint_output = centerpoint_second_fpn(sec_output)\n    second_output = second_fpn(sec_output)\n    assert centerpoint_output[0].shape == torch.Size([4, 384, 128, 128])\n    assert second_output[0].shape == torch.Size([4, 384, 256, 256])\n"
  },
  {
    "path": "tests/test_models/test_voxel_encoder/test_dynamic_scatter.py",
    "content": "import pytest\nimport torch\nfrom torch.autograd import gradcheck\n\nfrom mmdet3d.ops import DynamicScatter\n\n\ndef test_dynamic_scatter():\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and torch+cuda')\n\n    feats = torch.rand(\n        size=(200000, 3), dtype=torch.float32, device='cuda') * 100 - 50\n    coors = torch.randint(\n        low=-1, high=20, size=(200000, 3), dtype=torch.int32, device='cuda')\n    coors[coors.min(dim=-1).values < 0] = -1\n\n    dsmean = DynamicScatter([0.32, 0.32, 6],\n                            [-74.88, -74.88, -2, 74.88, 74.88, 4], True)\n    dsmax = DynamicScatter([0.32, 0.32, 6],\n                           [-74.88, -74.88, -2, 74.88, 74.88, 4], False)\n\n    ref_voxel_coors = coors.unique(dim=0, sorted=True)\n    ref_voxel_coors = ref_voxel_coors[ref_voxel_coors.min(dim=-1).values >= 0]\n    ref_voxel_feats_mean = []\n    ref_voxel_feats_max = []\n    for ref_voxel_coor in ref_voxel_coors:\n        voxel_mask = (coors == ref_voxel_coor).all(dim=-1)\n        ref_voxel_feats_mean.append(feats[voxel_mask].mean(dim=0))\n        ref_voxel_feats_max.append(feats[voxel_mask].max(dim=0).values)\n    ref_voxel_feats_mean = torch.stack(ref_voxel_feats_mean)\n    ref_voxel_feats_max = torch.stack(ref_voxel_feats_max)\n\n    feats_out_mean, coors_out_mean = dsmean(feats, coors)\n    seq_mean = (coors_out_mean[:, 0] * 400 + coors_out_mean[:, 1] * 20 +\n                coors_out_mean[:, 2]).argsort()\n    feats_out_mean = feats_out_mean[seq_mean]\n    coors_out_mean = coors_out_mean[seq_mean]\n\n    feats_out_max, coors_out_max = dsmax(feats, coors)\n    seq_max = (coors_out_max[:, 0] * 400 + coors_out_max[:, 1] * 20 +\n               coors_out_max[:, 2]).argsort()\n    feats_out_max = feats_out_max[seq_max]\n    coors_cout_max = coors_out_max[seq_max]\n\n    assert (coors_out_mean == ref_voxel_coors).all()\n    assert torch.allclose(\n        feats_out_mean, ref_voxel_feats_mean, atol=1e-2, rtol=1e-5)\n    assert (coors_cout_max == ref_voxel_coors).all()\n    assert torch.allclose(\n        feats_out_max, ref_voxel_feats_max, atol=1e-2, rtol=1e-5)\n\n    # test grad #\n    feats = torch.rand(\n        size=(100, 4), dtype=torch.float32, device='cuda') * 100 - 50\n    coors = torch.randint(\n        low=-1, high=3, size=(100, 3), dtype=torch.int32, device='cuda')\n    feats.requires_grad_()\n    gradcheck(dsmean, (feats, coors), eps=1e-2, atol=1e-2, rtol=1e-5)\n    gradcheck(dsmax, (feats, coors), eps=1e-2, atol=1e-2, rtol=1e-5)\n"
  },
  {
    "path": "tests/test_models/test_voxel_encoder/test_voxel_encoders.py",
    "content": "import torch\n\nfrom mmdet3d.models.builder import build_voxel_encoder\n\n\ndef test_pillar_feature_net():\n    pillar_feature_net_cfg = dict(\n        type='PillarFeatureNet',\n        in_channels=5,\n        feat_channels=[64],\n        with_distance=False,\n        voxel_size=(0.2, 0.2, 8),\n        point_cloud_range=(-51.2, -51.2, -5.0, 51.2, 51.2, 3.0),\n        norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01))\n\n    pillar_feature_net = build_voxel_encoder(pillar_feature_net_cfg)\n\n    features = torch.rand([97297, 20, 5])\n    num_voxels = torch.randint(1, 100, [97297])\n    coors = torch.randint(0, 100, [97297, 4])\n\n    features = pillar_feature_net(features, num_voxels, coors)\n    assert features.shape == torch.Size([97297, 64])\n\n\ndef test_hard_simple_VFE():\n    hard_simple_VFE_cfg = dict(type='HardSimpleVFE', num_features=5)\n    hard_simple_VFE = build_voxel_encoder(hard_simple_VFE_cfg)\n    features = torch.rand([240000, 10, 5])\n    num_voxels = torch.randint(1, 10, [240000])\n\n    outputs = hard_simple_VFE(features, num_voxels, None)\n    assert outputs.shape == torch.Size([240000, 5])\n"
  },
  {
    "path": "tests/test_models/test_voxel_encoder/test_voxel_generator.py",
    "content": "import numpy as np\n\nfrom mmdet3d.core.voxel.voxel_generator import VoxelGenerator\n\n\ndef test_voxel_generator():\n    np.random.seed(0)\n    voxel_size = [0.5, 0.5, 0.5]\n    point_cloud_range = [0, -40, -3, 70.4, 40, 1]\n    max_num_points = 1000\n    self = VoxelGenerator(voxel_size, point_cloud_range, max_num_points)\n    points = np.random.rand(1000, 4)\n    voxels = self.generate(points)\n    coors, voxels, num_points_per_voxel = voxels\n    expected_voxels = np.array([[7, 81, 1], [6, 81, 0], [7, 80, 1], [6, 81, 1],\n                                [7, 81, 0], [6, 80, 1], [7, 80, 0], [6, 80,\n                                                                     0]])\n    expected_num_points_per_voxel = np.array(\n        [120, 121, 127, 134, 115, 127, 125, 131])\n    assert np.all(voxels == expected_voxels)\n    assert coors.shape == (8, 1000, 4)\n    assert np.all(num_points_per_voxel == expected_num_points_per_voxel)\n"
  },
  {
    "path": "tests/test_models/test_voxel_encoder/test_voxelize.py",
    "content": "import numpy as np\nimport pytest\nimport torch\n\nfrom mmdet3d.core.voxel.voxel_generator import VoxelGenerator\nfrom mmdet3d.datasets.pipelines import LoadPointsFromFile\nfrom mmdet3d.ops.voxel.voxelize import Voxelization\n\n\ndef _get_voxel_points_indices(points, coors, voxel):\n    result_form = np.equal(coors, voxel)\n    return result_form[:, 0] & result_form[:, 1] & result_form[:, 2]\n\n\ndef test_voxelization():\n    voxel_size = [0.5, 0.5, 0.5]\n    point_cloud_range = [0, -40, -3, 70.4, 40, 1]\n    max_num_points = 1000\n    self = VoxelGenerator(voxel_size, point_cloud_range, max_num_points)\n    data_path = './tests/data/kitti/training/velodyne_reduced/000000.bin'\n    load_points_from_file = LoadPointsFromFile(\n        coord_type='LIDAR', load_dim=4, use_dim=4)\n    results = dict()\n    results['pts_filename'] = data_path\n    results = load_points_from_file(results)\n    points = results['points'].tensor.numpy()\n    voxels_generator = self.generate(points)\n    coors, voxels, num_points_per_voxel = voxels_generator\n    expected_coors = coors\n    expected_voxels = voxels\n    expected_num_points_per_voxel = num_points_per_voxel\n\n    points = torch.tensor(points)\n    max_num_points = -1\n    dynamic_voxelization = Voxelization(voxel_size, point_cloud_range,\n                                        max_num_points)\n    max_num_points = 1000\n    hard_voxelization = Voxelization(voxel_size, point_cloud_range,\n                                     max_num_points)\n    # test hard_voxelization on cpu\n    coors, voxels, num_points_per_voxel = hard_voxelization.forward(points)\n    coors = coors.detach().numpy()\n    voxels = voxels.detach().numpy()\n    num_points_per_voxel = num_points_per_voxel.detach().numpy()\n    assert np.all(coors == expected_coors)\n    assert np.all(voxels == expected_voxels)\n    assert np.all(num_points_per_voxel == expected_num_points_per_voxel)\n\n    # test dynamic_voxelization on cpu\n    coors = dynamic_voxelization.forward(points)\n    coors = coors.detach().numpy()\n    points = points.detach().numpy()\n    for i in range(expected_voxels.shape[0]):\n        indices = _get_voxel_points_indices(points, coors, expected_voxels[i])\n        num_points_current_voxel = points[indices].shape[0]\n        assert num_points_current_voxel > 0\n        assert np.all(\n            points[indices] == expected_coors[i][:num_points_current_voxel])\n        assert num_points_current_voxel == expected_num_points_per_voxel[i]\n\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and torch+cuda')\n    # test hard_voxelization on gpu\n    points = torch.tensor(points).contiguous().to(device='cuda:0')\n    coors, voxels, num_points_per_voxel = hard_voxelization.forward(points)\n    coors = coors.cpu().detach().numpy()\n    voxels = voxels.cpu().detach().numpy()\n    num_points_per_voxel = num_points_per_voxel.cpu().detach().numpy()\n    assert np.all(coors == expected_coors)\n    assert np.all(voxels == expected_voxels)\n    assert np.all(num_points_per_voxel == expected_num_points_per_voxel)\n\n    # test dynamic_voxelization on gpu\n    coors = dynamic_voxelization.forward(points)\n    coors = coors.cpu().detach().numpy()\n    points = points.cpu().detach().numpy()\n    for i in range(expected_voxels.shape[0]):\n        indices = _get_voxel_points_indices(points, coors, expected_voxels[i])\n        num_points_current_voxel = points[indices].shape[0]\n        assert num_points_current_voxel > 0\n        assert np.all(\n            points[indices] == expected_coors[i][:num_points_current_voxel])\n        assert num_points_current_voxel == expected_num_points_per_voxel[i]\n"
  },
  {
    "path": "tests/test_runtime/test_apis.py",
    "content": "import numpy as np\nimport os\nimport pytest\nimport tempfile\nimport torch\nfrom mmcv.parallel import MMDataParallel\nfrom os.path import dirname, exists, join\n\nfrom mmdet3d.apis import (convert_SyncBN, inference_detector, init_detector,\n                          show_result_meshlab, single_gpu_test)\nfrom mmdet3d.core import Box3DMode\nfrom mmdet3d.core.bbox import LiDARInstance3DBoxes\nfrom mmdet3d.datasets import build_dataloader, build_dataset\nfrom mmdet3d.models import build_detector\n\n\ndef _get_config_directory():\n    \"\"\"Find the predefined detector config directory.\"\"\"\n    try:\n        # Assume we are running in the source mmdetection3d repo\n        repo_dpath = dirname(dirname(dirname(__file__)))\n    except NameError:\n        # For IPython development when this __file__ is not defined\n        import mmdet3d\n        repo_dpath = dirname(dirname(mmdet3d.__file__))\n    config_dpath = join(repo_dpath, 'configs')\n    if not exists(config_dpath):\n        raise Exception('Cannot find config path')\n    return config_dpath\n\n\ndef _get_config_module(fname):\n    \"\"\"Load a configuration as a python module.\"\"\"\n    from mmcv import Config\n    config_dpath = _get_config_directory()\n    config_fpath = join(config_dpath, fname)\n    config_mod = Config.fromfile(config_fpath)\n    return config_mod\n\n\ndef test_convert_SyncBN():\n    cfg = _get_config_module(\n        'pointpillars/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d.py')\n    model_cfg = cfg.model\n    convert_SyncBN(model_cfg)\n    assert model_cfg['pts_voxel_encoder']['norm_cfg']['type'] == 'BN1d'\n    assert model_cfg['pts_backbone']['norm_cfg']['type'] == 'BN2d'\n    assert model_cfg['pts_neck']['norm_cfg']['type'] == 'BN2d'\n\n\ndef test_show_result_meshlab():\n    pcd = 'tests/data/nuscenes/samples/LIDAR_TOP/n015-2018-08-02-17-16-37+' \\\n              '0800__LIDAR_TOP__1533201470948018.pcd.bin'\n    box_3d = LiDARInstance3DBoxes(\n        torch.tensor(\n            [[8.7314, -1.8559, -1.5997, 0.4800, 1.2000, 1.8900, 0.0100]]))\n    labels_3d = torch.tensor([0])\n    scores_3d = torch.tensor([0.5])\n    points = np.random.rand(100, 4)\n    img_meta = dict(\n        pts_filename=pcd, boxes_3d=box_3d, box_mode_3d=Box3DMode.LIDAR)\n    data = dict(points=[[torch.tensor(points)]], img_metas=[[img_meta]])\n    result = [\n        dict(\n            pts_bbox=dict(\n                boxes_3d=box_3d, labels_3d=labels_3d, scores_3d=scores_3d))\n    ]\n    temp_out_dir = tempfile.mkdtemp()\n    out_dir, file_name = show_result_meshlab(data, result, temp_out_dir)\n    expected_outfile_ply = file_name + '_pred.ply'\n    expected_outfile_obj = file_name + '_points.obj'\n    expected_outfile_ply_path = os.path.join(out_dir, file_name,\n                                             expected_outfile_ply)\n    expected_outfile_obj_path = os.path.join(out_dir, file_name,\n                                             expected_outfile_obj)\n    assert os.path.exists(expected_outfile_ply_path)\n    assert os.path.exists(expected_outfile_obj_path)\n    os.remove(expected_outfile_obj_path)\n    os.remove(expected_outfile_ply_path)\n    os.removedirs(os.path.join(temp_out_dir, file_name))\n\n\ndef test_inference_detector():\n    pcd = 'tests/data/kitti/training/velodyne_reduced/000000.bin'\n    detector_cfg = 'configs/pointpillars/hv_pointpillars_secfpn_' \\\n                   '6x8_160e_kitti-3d-3class.py'\n    detector = init_detector(detector_cfg, device='cpu')\n    results = inference_detector(detector, pcd)\n    bboxes_3d = results[0][0]['boxes_3d']\n    scores_3d = results[0][0]['scores_3d']\n    labels_3d = results[0][0]['labels_3d']\n    assert bboxes_3d.tensor.shape[0] >= 0\n    assert bboxes_3d.tensor.shape[1] == 7\n    assert scores_3d.shape[0] >= 0\n    assert labels_3d.shape[0] >= 0\n\n\ndef test_single_gpu_test():\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and torch+cuda')\n    cfg = _get_config_module('votenet/votenet_16x8_sunrgbd-3d-10class.py')\n    cfg.model.train_cfg = None\n    model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))\n    dataset_cfg = cfg.data.test\n    dataset_cfg.data_root = './tests/data/sunrgbd'\n    dataset_cfg.ann_file = 'tests/data/sunrgbd/sunrgbd_infos.pkl'\n    dataset = build_dataset(dataset_cfg)\n    data_loader = build_dataloader(\n        dataset,\n        samples_per_gpu=1,\n        workers_per_gpu=cfg.data.workers_per_gpu,\n        dist=False,\n        shuffle=False)\n    model = MMDataParallel(model, device_ids=[0])\n    results = single_gpu_test(model, data_loader)\n    bboxes_3d = results[0]['boxes_3d']\n    scores_3d = results[0]['scores_3d']\n    labels_3d = results[0]['labels_3d']\n    assert bboxes_3d.tensor.shape[0] >= 0\n    assert bboxes_3d.tensor.shape[1] == 7\n    assert scores_3d.shape[0] >= 0\n    assert labels_3d.shape[0] >= 0\n"
  },
  {
    "path": "tests/test_runtime/test_config.py",
    "content": "from os.path import dirname, exists, join, relpath\n\n\ndef _get_config_directory():\n    \"\"\"Find the predefined detector config directory.\"\"\"\n    try:\n        # Assume we are running in the source mmdetection3d repo\n        repo_dpath = dirname(dirname(dirname(__file__)))\n    except NameError:\n        # For IPython development when this __file__ is not defined\n        import mmdet3d\n        repo_dpath = dirname(dirname(mmdet3d.__file__))\n    config_dpath = join(repo_dpath, 'configs')\n    if not exists(config_dpath):\n        raise Exception('Cannot find config path')\n    return config_dpath\n\n\ndef test_config_build_detector():\n    \"\"\"Test that all detection models defined in the configs can be\n    initialized.\"\"\"\n    from mmcv import Config\n\n    from mmdet3d.models import build_detector\n\n    config_dpath = _get_config_directory()\n    print('Found config_dpath = {!r}'.format(config_dpath))\n\n    import glob\n    config_fpaths = list(glob.glob(join(config_dpath, '**', '*.py')))\n    config_fpaths = [p for p in config_fpaths if p.find('_base_') == -1]\n    config_names = [relpath(p, config_dpath) for p in config_fpaths]\n\n    print('Using {} config files'.format(len(config_names)))\n\n    for config_fname in config_names:\n        config_fpath = join(config_dpath, config_fname)\n        config_mod = Config.fromfile(config_fpath)\n\n        config_mod.model\n        config_mod.model.train_cfg\n        config_mod.model.test_cfg\n        print('Building detector, config_fpath = {!r}'.format(config_fpath))\n\n        # Remove pretrained keys to allow for testing in an offline environment\n        if 'pretrained' in config_mod.model:\n            config_mod.model['pretrained'] = None\n\n        detector = build_detector(config_mod.model)\n        assert detector is not None\n\n        if 'roi_head' in config_mod.model.keys():\n            # for two stage detector\n            # detectors must have bbox head\n            assert detector.roi_head.with_bbox and detector.with_bbox\n            assert detector.roi_head.with_mask == detector.with_mask\n\n            head_config = config_mod.model['roi_head']\n            if head_config.type == 'PartAggregationROIHead':\n                check_parta2_roi_head(head_config, detector.roi_head)\n            elif head_config.type == 'H3DRoIHead':\n                check_h3d_roi_head(head_config, detector.roi_head)\n            else:\n                _check_roi_head(head_config, detector.roi_head)\n        # else:\n        #     # for single stage detector\n        #     # detectors must have bbox head\n        #     # assert detector.with_bbox\n        #     head_config = config_mod.model['bbox_head']\n        #     _check_bbox_head(head_config, detector.bbox_head)\n\n\ndef test_config_build_pipeline():\n    \"\"\"Test that all detection models defined in the configs can be\n    initialized.\"\"\"\n    from mmcv import Config\n\n    from mmdet3d.datasets.pipelines import Compose\n\n    config_dpath = _get_config_directory()\n    print('Found config_dpath = {!r}'.format(config_dpath))\n\n    # Other configs needs database sampler.\n    config_names = [\n        'pointpillars/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py',\n    ]\n\n    print('Using {} config files'.format(len(config_names)))\n\n    for config_fname in config_names:\n        config_fpath = join(config_dpath, config_fname)\n        config_mod = Config.fromfile(config_fpath)\n\n        # build train_pipeline\n        train_pipeline = Compose(config_mod.train_pipeline)\n        test_pipeline = Compose(config_mod.test_pipeline)\n        assert train_pipeline is not None\n        assert test_pipeline is not None\n\n\ndef _check_roi_head(config, head):\n    # check consistency between head_config and roi_head\n    assert config['type'] == head.__class__.__name__\n\n    # check roi_align\n    bbox_roi_cfg = config.bbox_roi_extractor\n    bbox_roi_extractor = head.bbox_roi_extractor\n    _check_roi_extractor(bbox_roi_cfg, bbox_roi_extractor)\n\n    # check bbox head infos\n    bbox_cfg = config.bbox_head\n    bbox_head = head.bbox_head\n    _check_bbox_head(bbox_cfg, bbox_head)\n\n    if head.with_mask:\n        # check roi_align\n        if config.mask_roi_extractor:\n            mask_roi_cfg = config.mask_roi_extractor\n            mask_roi_extractor = head.mask_roi_extractor\n            _check_roi_extractor(mask_roi_cfg, mask_roi_extractor,\n                                 bbox_roi_extractor)\n\n        # check mask head infos\n        mask_head = head.mask_head\n        mask_cfg = config.mask_head\n        _check_mask_head(mask_cfg, mask_head)\n\n\ndef _check_roi_extractor(config, roi_extractor, prev_roi_extractor=None):\n    from torch import nn as nn\n    if isinstance(roi_extractor, nn.ModuleList):\n        if prev_roi_extractor:\n            prev_roi_extractor = prev_roi_extractor[0]\n        roi_extractor = roi_extractor[0]\n\n    assert (len(config.featmap_strides) == len(roi_extractor.roi_layers))\n    assert (config.out_channels == roi_extractor.out_channels)\n    from torch.nn.modules.utils import _pair\n    assert (_pair(config.roi_layer.output_size) ==\n            roi_extractor.roi_layers[0].output_size)\n\n    if 'use_torchvision' in config.roi_layer:\n        assert (config.roi_layer.use_torchvision ==\n                roi_extractor.roi_layers[0].use_torchvision)\n    elif 'aligned' in config.roi_layer:\n        assert (\n            config.roi_layer.aligned == roi_extractor.roi_layers[0].aligned)\n\n    if prev_roi_extractor:\n        assert (roi_extractor.roi_layers[0].aligned ==\n                prev_roi_extractor.roi_layers[0].aligned)\n        assert (roi_extractor.roi_layers[0].use_torchvision ==\n                prev_roi_extractor.roi_layers[0].use_torchvision)\n\n\ndef _check_mask_head(mask_cfg, mask_head):\n    from torch import nn as nn\n    if isinstance(mask_cfg, list):\n        for single_mask_cfg, single_mask_head in zip(mask_cfg, mask_head):\n            _check_mask_head(single_mask_cfg, single_mask_head)\n    elif isinstance(mask_head, nn.ModuleList):\n        for single_mask_head in mask_head:\n            _check_mask_head(mask_cfg, single_mask_head)\n    else:\n        assert mask_cfg['type'] == mask_head.__class__.__name__\n        assert mask_cfg.in_channels == mask_head.in_channels\n        assert (\n            mask_cfg.conv_out_channels == mask_head.conv_logits.in_channels)\n        class_agnostic = mask_cfg.get('class_agnostic', False)\n        out_dim = (1 if class_agnostic else mask_cfg.num_classes)\n        assert mask_head.conv_logits.out_channels == out_dim\n\n\ndef _check_bbox_head(bbox_cfg, bbox_head):\n    from torch import nn as nn\n    if isinstance(bbox_cfg, list):\n        for single_bbox_cfg, single_bbox_head in zip(bbox_cfg, bbox_head):\n            _check_bbox_head(single_bbox_cfg, single_bbox_head)\n    elif isinstance(bbox_head, nn.ModuleList):\n        for single_bbox_head in bbox_head:\n            _check_bbox_head(bbox_cfg, single_bbox_head)\n    else:\n        assert bbox_cfg['type'] == bbox_head.__class__.__name__\n        assert bbox_cfg.in_channels == bbox_head.in_channels\n        with_cls = bbox_cfg.get('with_cls', True)\n        if with_cls:\n            fc_out_channels = bbox_cfg.get('fc_out_channels', 2048)\n            assert (fc_out_channels == bbox_head.fc_cls.in_features)\n            assert bbox_cfg.num_classes + 1 == bbox_head.fc_cls.out_features\n\n        with_reg = bbox_cfg.get('with_reg', True)\n        if with_reg:\n            out_dim = (4 if bbox_cfg.reg_class_agnostic else 4 *\n                       bbox_cfg.num_classes)\n            assert bbox_head.fc_reg.out_features == out_dim\n\n\ndef check_parta2_roi_head(config, head):\n    assert config['type'] == head.__class__.__name__\n\n    # check seg_roi_extractor\n    seg_roi_cfg = config.seg_roi_extractor\n    seg_roi_extractor = head.seg_roi_extractor\n    _check_parta2_roi_extractor(seg_roi_cfg, seg_roi_extractor)\n\n    # check part_roi_extractor\n    part_roi_cfg = config.part_roi_extractor\n    part_roi_extractor = head.part_roi_extractor\n    _check_parta2_roi_extractor(part_roi_cfg, part_roi_extractor)\n\n    # check bbox head infos\n    bbox_cfg = config.bbox_head\n    bbox_head = head.bbox_head\n    _check_parta2_bbox_head(bbox_cfg, bbox_head)\n\n\ndef _check_parta2_roi_extractor(config, roi_extractor):\n    assert config['type'] == roi_extractor.__class__.__name__\n    assert (config.roi_layer.out_size == roi_extractor.roi_layer.out_size)\n    assert (config.roi_layer.max_pts_per_voxel ==\n            roi_extractor.roi_layer.max_pts_per_voxel)\n\n\ndef _check_parta2_bbox_head(bbox_cfg, bbox_head):\n    from torch import nn as nn\n    if isinstance(bbox_cfg, list):\n        for single_bbox_cfg, single_bbox_head in zip(bbox_cfg, bbox_head):\n            _check_bbox_head(single_bbox_cfg, single_bbox_head)\n    elif isinstance(bbox_head, nn.ModuleList):\n        for single_bbox_head in bbox_head:\n            _check_bbox_head(bbox_cfg, single_bbox_head)\n    else:\n        assert bbox_cfg['type'] == bbox_head.__class__.__name__\n        assert bbox_cfg.seg_in_channels == bbox_head.seg_conv[0][0].in_channels\n        assert bbox_cfg.part_in_channels == bbox_head.part_conv[0][\n            0].in_channels\n\n\ndef check_h3d_roi_head(config, head):\n    assert config['type'] == head.__class__.__name__\n\n    # check seg_roi_extractor\n    primitive_z_cfg = config.primitive_list[0]\n    primitive_z_extractor = head.primitive_z\n    _check_primitive_extractor(primitive_z_cfg, primitive_z_extractor)\n\n    primitive_xy_cfg = config.primitive_list[1]\n    primitive_xy_extractor = head.primitive_xy\n    _check_primitive_extractor(primitive_xy_cfg, primitive_xy_extractor)\n\n    primitive_line_cfg = config.primitive_list[2]\n    primitive_line_extractor = head.primitive_line\n    _check_primitive_extractor(primitive_line_cfg, primitive_line_extractor)\n\n    # check bbox head infos\n    bbox_cfg = config.bbox_head\n    bbox_head = head.bbox_head\n    _check_h3d_bbox_head(bbox_cfg, bbox_head)\n\n\ndef _check_primitive_extractor(config, primitive_extractor):\n    assert config['type'] == primitive_extractor.__class__.__name__\n    assert (config.num_dims == primitive_extractor.num_dims)\n    assert (config.num_classes == primitive_extractor.num_classes)\n\n\ndef _check_h3d_bbox_head(bbox_cfg, bbox_head):\n    assert bbox_cfg['type'] == bbox_head.__class__.__name__\n    assert bbox_cfg.num_proposal * \\\n        6 == bbox_head.surface_center_matcher.num_point[0]\n    assert bbox_cfg.num_proposal * \\\n        12 == bbox_head.line_center_matcher.num_point[0]\n    assert bbox_cfg.suface_matching_cfg.mlp_channels[-1] * \\\n        18 == bbox_head.bbox_pred[0].in_channels\n"
  },
  {
    "path": "tests/test_utils/test_anchors.py",
    "content": "\"\"\"\nCommandLine:\n    pytest tests/test_utils/test_anchor.py\n    xdoctest tests/test_utils/test_anchor.py zero\n\n\"\"\"\nimport torch\n\nfrom mmdet3d.core.anchor import build_anchor_generator\n\n\ndef test_anchor_3d_range_generator():\n    if torch.cuda.is_available():\n        device = 'cuda'\n    else:\n        device = 'cpu'\n    anchor_generator_cfg = dict(\n        type='Anchor3DRangeGenerator',\n        ranges=[\n            [0, -39.68, -0.6, 70.4, 39.68, -0.6],\n            [0, -39.68, -0.6, 70.4, 39.68, -0.6],\n            [0, -39.68, -1.78, 70.4, 39.68, -1.78],\n        ],\n        sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],\n        rotations=[0, 1.57],\n        reshape_out=False)\n\n    anchor_generator = build_anchor_generator(anchor_generator_cfg)\n    repr_str = repr(anchor_generator)\n    expected_repr_str = 'Anchor3DRangeGenerator(anchor_range=' \\\n                        '[[0, -39.68, -0.6, 70.4, 39.68, -0.6], ' \\\n                        '[0, -39.68, -0.6, 70.4, 39.68, -0.6], ' \\\n                        '[0, -39.68, -1.78, 70.4, 39.68, -1.78]],' \\\n                        '\\nscales=[1],\\nsizes=[[0.6, 0.8, 1.73], ' \\\n                        '[0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],' \\\n                        '\\nrotations=[0, 1.57],\\nreshape_out=False,' \\\n                        '\\nsize_per_range=True)'\n    assert repr_str == expected_repr_str\n    featmap_size = (256, 256)\n    mr_anchors = anchor_generator.single_level_grid_anchors(\n        featmap_size, 1.1, device=device)\n    assert mr_anchors.shape == torch.Size([1, 256, 256, 3, 2, 7])\n\n\ndef test_aligned_anchor_generator():\n    if torch.cuda.is_available():\n        device = 'cuda'\n    else:\n        device = 'cpu'\n\n    anchor_generator_cfg = dict(\n        type='AlignedAnchor3DRangeGenerator',\n        ranges=[[-51.2, -51.2, -1.80, 51.2, 51.2, -1.80]],\n        scales=[1, 2, 4],\n        sizes=[\n            [0.8660, 2.5981, 1.],  # 1.5/sqrt(3)\n            [0.5774, 1.7321, 1.],  # 1/sqrt(3)\n            [1., 1., 1.],\n            [0.4, 0.4, 1],\n        ],\n        custom_values=[0, 0],\n        rotations=[0, 1.57],\n        size_per_range=False,\n        reshape_out=True)\n\n    featmap_sizes = [(256, 256), (128, 128), (64, 64)]\n    anchor_generator = build_anchor_generator(anchor_generator_cfg)\n    assert anchor_generator.num_base_anchors == 8\n\n    # check base anchors\n    expected_grid_anchors = [\n        torch.tensor([[\n            -51.0000, -51.0000, -1.8000, 0.8660, 2.5981, 1.0000, 0.0000,\n            0.0000, 0.0000\n        ],\n                      [\n                          -51.0000, -51.0000, -1.8000, 0.4000, 0.4000, 1.0000,\n                          1.5700, 0.0000, 0.0000\n                      ],\n                      [\n                          -50.6000, -51.0000, -1.8000, 0.4000, 0.4000, 1.0000,\n                          0.0000, 0.0000, 0.0000\n                      ],\n                      [\n                          -50.2000, -51.0000, -1.8000, 1.0000, 1.0000, 1.0000,\n                          1.5700, 0.0000, 0.0000\n                      ],\n                      [\n                          -49.8000, -51.0000, -1.8000, 1.0000, 1.0000, 1.0000,\n                          0.0000, 0.0000, 0.0000\n                      ],\n                      [\n                          -49.4000, -51.0000, -1.8000, 0.5774, 1.7321, 1.0000,\n                          1.5700, 0.0000, 0.0000\n                      ],\n                      [\n                          -49.0000, -51.0000, -1.8000, 0.5774, 1.7321, 1.0000,\n                          0.0000, 0.0000, 0.0000\n                      ],\n                      [\n                          -48.6000, -51.0000, -1.8000, 0.8660, 2.5981, 1.0000,\n                          1.5700, 0.0000, 0.0000\n                      ]],\n                     device=device),\n        torch.tensor([[\n            -50.8000, -50.8000, -1.8000, 1.7320, 5.1962, 2.0000, 0.0000,\n            0.0000, 0.0000\n        ],\n                      [\n                          -50.8000, -50.8000, -1.8000, 0.8000, 0.8000, 2.0000,\n                          1.5700, 0.0000, 0.0000\n                      ],\n                      [\n                          -50.0000, -50.8000, -1.8000, 0.8000, 0.8000, 2.0000,\n                          0.0000, 0.0000, 0.0000\n                      ],\n                      [\n                          -49.2000, -50.8000, -1.8000, 2.0000, 2.0000, 2.0000,\n                          1.5700, 0.0000, 0.0000\n                      ],\n                      [\n                          -48.4000, -50.8000, -1.8000, 2.0000, 2.0000, 2.0000,\n                          0.0000, 0.0000, 0.0000\n                      ],\n                      [\n                          -47.6000, -50.8000, -1.8000, 1.1548, 3.4642, 2.0000,\n                          1.5700, 0.0000, 0.0000\n                      ],\n                      [\n                          -46.8000, -50.8000, -1.8000, 1.1548, 3.4642, 2.0000,\n                          0.0000, 0.0000, 0.0000\n                      ],\n                      [\n                          -46.0000, -50.8000, -1.8000, 1.7320, 5.1962, 2.0000,\n                          1.5700, 0.0000, 0.0000\n                      ]],\n                     device=device),\n        torch.tensor([[\n            -50.4000, -50.4000, -1.8000, 3.4640, 10.3924, 4.0000, 0.0000,\n            0.0000, 0.0000\n        ],\n                      [\n                          -50.4000, -50.4000, -1.8000, 1.6000, 1.6000, 4.0000,\n                          1.5700, 0.0000, 0.0000\n                      ],\n                      [\n                          -48.8000, -50.4000, -1.8000, 1.6000, 1.6000, 4.0000,\n                          0.0000, 0.0000, 0.0000\n                      ],\n                      [\n                          -47.2000, -50.4000, -1.8000, 4.0000, 4.0000, 4.0000,\n                          1.5700, 0.0000, 0.0000\n                      ],\n                      [\n                          -45.6000, -50.4000, -1.8000, 4.0000, 4.0000, 4.0000,\n                          0.0000, 0.0000, 0.0000\n                      ],\n                      [\n                          -44.0000, -50.4000, -1.8000, 2.3096, 6.9284, 4.0000,\n                          1.5700, 0.0000, 0.0000\n                      ],\n                      [\n                          -42.4000, -50.4000, -1.8000, 2.3096, 6.9284, 4.0000,\n                          0.0000, 0.0000, 0.0000\n                      ],\n                      [\n                          -40.8000, -50.4000, -1.8000, 3.4640, 10.3924, 4.0000,\n                          1.5700, 0.0000, 0.0000\n                      ]],\n                     device=device)\n    ]\n    multi_level_anchors = anchor_generator.grid_anchors(\n        featmap_sizes, device=device)\n    expected_multi_level_shapes = [\n        torch.Size([524288, 9]),\n        torch.Size([131072, 9]),\n        torch.Size([32768, 9])\n    ]\n    for i, single_level_anchor in enumerate(multi_level_anchors):\n        assert single_level_anchor.shape == expected_multi_level_shapes[i]\n        # set [:56:7] thus it could cover 8 (len(size) * len(rotations))\n        # anchors on 8 location\n        assert single_level_anchor[:56:7].allclose(expected_grid_anchors[i])\n\n\ndef test_aligned_anchor_generator_per_cls():\n    if torch.cuda.is_available():\n        device = 'cuda'\n    else:\n        device = 'cpu'\n\n    anchor_generator_cfg = dict(\n        type='AlignedAnchor3DRangeGeneratorPerCls',\n        ranges=[[-100, -100, -1.80, 100, 100, -1.80],\n                [-100, -100, -1.30, 100, 100, -1.30]],\n        sizes=[[0.63, 1.76, 1.44], [0.96, 2.35, 1.59]],\n        custom_values=[0, 0],\n        rotations=[0, 1.57],\n        reshape_out=False)\n\n    featmap_sizes = [(100, 100), (50, 50)]\n    anchor_generator = build_anchor_generator(anchor_generator_cfg)\n\n    # check base anchors\n    expected_grid_anchors = [[\n        torch.tensor([[\n            -99.0000, -99.0000, -1.8000, 0.6300, 1.7600, 1.4400, 0.0000,\n            0.0000, 0.0000\n        ],\n                      [\n                          -99.0000, -99.0000, -1.8000, 0.6300, 1.7600, 1.4400,\n                          1.5700, 0.0000, 0.0000\n                      ]],\n                     device=device),\n        torch.tensor([[\n            -98.0000, -98.0000, -1.3000, 0.9600, 2.3500, 1.5900, 0.0000,\n            0.0000, 0.0000\n        ],\n                      [\n                          -98.0000, -98.0000, -1.3000, 0.9600, 2.3500, 1.5900,\n                          1.5700, 0.0000, 0.0000\n                      ]],\n                     device=device)\n    ]]\n    multi_level_anchors = anchor_generator.grid_anchors(\n        featmap_sizes, device=device)\n    expected_multi_level_shapes = [[\n        torch.Size([20000, 9]), torch.Size([5000, 9])\n    ]]\n    for i, single_level_anchor in enumerate(multi_level_anchors):\n        assert len(single_level_anchor) == len(expected_multi_level_shapes[i])\n        # set [:2*interval:interval] thus it could cover\n        # 2 (len(size) * len(rotations)) anchors on 2 location\n        # Note that len(size) for each class is always 1 in this case\n        for j in range(len(single_level_anchor)):\n            interval = int(expected_multi_level_shapes[i][j][0] / 2)\n            assert single_level_anchor[j][:2 * interval:interval].allclose(\n                expected_grid_anchors[i][j])\n"
  },
  {
    "path": "tests/test_utils/test_assigners.py",
    "content": "\"\"\"Tests the Assigner objects.\n\nCommandLine:\n    pytest tests/test_utils/test_assigner.py\n    xdoctest tests/test_utils/test_assigner.py zero\n\"\"\"\nimport torch\n\nfrom mmdet3d.core.bbox.assigners import MaxIoUAssigner\n\n\ndef test_max_iou_assigner():\n    self = MaxIoUAssigner(\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.5,\n    )\n    bboxes = torch.FloatTensor([\n        [0, 0, 10, 10],\n        [10, 10, 20, 20],\n        [5, 5, 15, 15],\n        [32, 32, 38, 42],\n    ])\n    gt_bboxes = torch.FloatTensor([\n        [0, 0, 10, 9],\n        [0, 10, 10, 19],\n    ])\n    gt_labels = torch.LongTensor([2, 3])\n    assign_result = self.assign(bboxes, gt_bboxes, gt_labels=gt_labels)\n    assert len(assign_result.gt_inds) == 4\n    assert len(assign_result.labels) == 4\n\n    expected_gt_inds = torch.LongTensor([1, 0, 2, 0])\n    assert torch.all(assign_result.gt_inds == expected_gt_inds)\n\n\ndef test_max_iou_assigner_with_ignore():\n    self = MaxIoUAssigner(\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.5,\n        ignore_iof_thr=0.5,\n        ignore_wrt_candidates=False,\n    )\n    bboxes = torch.FloatTensor([\n        [0, 0, 10, 10],\n        [10, 10, 20, 20],\n        [5, 5, 15, 15],\n        [30, 32, 40, 42],\n    ])\n    gt_bboxes = torch.FloatTensor([\n        [0, 0, 10, 9],\n        [0, 10, 10, 19],\n    ])\n    gt_bboxes_ignore = torch.Tensor([\n        [30, 30, 40, 40],\n    ])\n    assign_result = self.assign(\n        bboxes, gt_bboxes, gt_bboxes_ignore=gt_bboxes_ignore)\n\n    expected_gt_inds = torch.LongTensor([1, 0, 2, -1])\n    assert torch.all(assign_result.gt_inds == expected_gt_inds)\n\n\ndef test_max_iou_assigner_with_empty_gt():\n    \"\"\"Test corner case where an image might have no true detections.\"\"\"\n    self = MaxIoUAssigner(\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.5,\n    )\n    bboxes = torch.FloatTensor([\n        [0, 0, 10, 10],\n        [10, 10, 20, 20],\n        [5, 5, 15, 15],\n        [32, 32, 38, 42],\n    ])\n    gt_bboxes = torch.FloatTensor(size=(0, 4))\n    assign_result = self.assign(bboxes, gt_bboxes)\n\n    expected_gt_inds = torch.LongTensor([0, 0, 0, 0])\n    assert torch.all(assign_result.gt_inds == expected_gt_inds)\n\n\ndef test_max_iou_assigner_with_empty_boxes():\n    \"\"\"Test corner case where an network might predict no boxes.\"\"\"\n    self = MaxIoUAssigner(\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.5,\n    )\n    bboxes = torch.empty((0, 4))\n    gt_bboxes = torch.FloatTensor([\n        [0, 0, 10, 9],\n        [0, 10, 10, 19],\n    ])\n    gt_labels = torch.LongTensor([2, 3])\n\n    # Test with gt_labels\n    assign_result = self.assign(bboxes, gt_bboxes, gt_labels=gt_labels)\n    assert len(assign_result.gt_inds) == 0\n    assert tuple(assign_result.labels.shape) == (0, )\n\n    # Test without gt_labels\n    assign_result = self.assign(bboxes, gt_bboxes, gt_labels=None)\n    assert len(assign_result.gt_inds) == 0\n    assert assign_result.labels is None\n\n\ndef test_max_iou_assigner_with_empty_boxes_and_ignore():\n    \"\"\"Test corner case where an network might predict no boxes and\n    ignore_iof_thr is on.\"\"\"\n    self = MaxIoUAssigner(\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.5,\n        ignore_iof_thr=0.5,\n    )\n    bboxes = torch.empty((0, 4))\n    gt_bboxes = torch.FloatTensor([\n        [0, 0, 10, 9],\n        [0, 10, 10, 19],\n    ])\n    gt_bboxes_ignore = torch.Tensor([\n        [30, 30, 40, 40],\n    ])\n    gt_labels = torch.LongTensor([2, 3])\n\n    # Test with gt_labels\n    assign_result = self.assign(\n        bboxes,\n        gt_bboxes,\n        gt_labels=gt_labels,\n        gt_bboxes_ignore=gt_bboxes_ignore)\n    assert len(assign_result.gt_inds) == 0\n    assert tuple(assign_result.labels.shape) == (0, )\n\n    # Test without gt_labels\n    assign_result = self.assign(\n        bboxes, gt_bboxes, gt_labels=None, gt_bboxes_ignore=gt_bboxes_ignore)\n    assert len(assign_result.gt_inds) == 0\n    assert assign_result.labels is None\n\n\ndef test_max_iou_assigner_with_empty_boxes_and_gt():\n    \"\"\"Test corner case where an network might predict no boxes and no gt.\"\"\"\n    self = MaxIoUAssigner(\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.5,\n    )\n    bboxes = torch.empty((0, 4))\n    gt_bboxes = torch.empty((0, 4))\n    assign_result = self.assign(bboxes, gt_bboxes)\n    assert len(assign_result.gt_inds) == 0\n"
  },
  {
    "path": "tests/test_utils/test_bbox_coders.py",
    "content": "import torch\n\nfrom mmdet3d.core.bbox import DepthInstance3DBoxes, LiDARInstance3DBoxes\nfrom mmdet.core import build_bbox_coder\n\n\ndef test_partial_bin_based_box_coder():\n    box_coder_cfg = dict(\n        type='PartialBinBasedBBoxCoder',\n        num_sizes=10,\n        num_dir_bins=12,\n        with_rot=True,\n        mean_sizes=[[2.114256, 1.620300, 0.927272],\n                    [0.791118, 1.279516, 0.718182],\n                    [0.923508, 1.867419, 0.845495],\n                    [0.591958, 0.552978, 0.827272],\n                    [0.699104, 0.454178, 0.75625],\n                    [0.69519, 1.346299, 0.736364],\n                    [0.528526, 1.002642, 1.172878],\n                    [0.500618, 0.632163, 0.683424],\n                    [0.404671, 1.071108, 1.688889],\n                    [0.76584, 1.398258, 0.472728]])\n    box_coder = build_bbox_coder(box_coder_cfg)\n\n    # test eocode\n    gt_bboxes = DepthInstance3DBoxes(\n        [[0.8308, 4.1168, -1.2035, 2.2493, 1.8444, 1.9245, 1.6486],\n         [2.3002, 4.8149, -1.2442, 0.5718, 0.8629, 0.9510, 1.6030],\n         [-1.1477, 1.8090, -1.1725, 0.6965, 1.5273, 2.0563, 0.0552]])\n\n    gt_labels = torch.tensor([0, 1, 2])\n    center_target, size_class_target, size_res_target, dir_class_target, \\\n        dir_res_target = box_coder.encode(gt_bboxes, gt_labels)\n    expected_center_target = torch.tensor([[0.8308, 4.1168, -0.2413],\n                                           [2.3002, 4.8149, -0.7687],\n                                           [-1.1477, 1.8090, -0.1444]])\n    expected_size_class_target = torch.tensor([0, 1, 2])\n    expected_size_res_target = torch.tensor([[0.1350, 0.2241, 0.9972],\n                                             [-0.2193, -0.4166, 0.2328],\n                                             [-0.2270, -0.3401, 1.2108]])\n    expected_dir_class_target = torch.tensor([3, 3, 0])\n    expected_dir_res_target = torch.tensor([0.0778, 0.0322, 0.0552])\n    assert torch.allclose(center_target, expected_center_target, atol=1e-4)\n    assert torch.all(size_class_target == expected_size_class_target)\n    assert torch.allclose(size_res_target, expected_size_res_target, atol=1e-4)\n    assert torch.all(dir_class_target == expected_dir_class_target)\n    assert torch.allclose(dir_res_target, expected_dir_res_target, atol=1e-4)\n\n    # test decode\n    center = torch.tensor([[[0.8014, 3.4134,\n                             -0.6133], [2.6375, 8.4191, 2.0438],\n                            [4.2017, 5.2504,\n                             -0.7851], [-1.0088, 5.4107, 1.6293],\n                            [1.4837, 4.0268, 0.6222]]])\n\n    size_class = torch.tensor([[[\n        -1.0061, -2.2788, 1.1322, -4.4380, -11.0526, -2.8113, -2.0642, -7.5886,\n        -4.8627, -5.0437\n    ],\n                                [\n                                    -2.2058, -0.3527, -1.9976, 0.8815, -2.7980,\n                                    -1.9053, -0.5097, -2.0232, -1.4242, -4.1192\n                                ],\n                                [\n                                    -1.4783, -0.1009, -1.1537, 0.3052, -4.3147,\n                                    -2.6529, 0.2729, -0.3755, -2.6479, -3.7548\n                                ],\n                                [\n                                    -6.1809, -3.5024, -8.3273, 1.1252, -4.3315,\n                                    -7.8288, -4.6091, -5.8153, 0.7480, -10.1396\n                                ],\n                                [\n                                    -9.0424, -3.7883, -6.0788, -1.8855,\n                                    -10.2493, -9.7164, -1.0658, -4.1713,\n                                    1.1173, -10.6204\n                                ]]])\n\n    size_res = torch.tensor([[[[-9.8976e-02, -5.2152e-01, -7.6421e-02],\n                               [1.4593e-01, 5.6099e-01, 8.9421e-02],\n                               [5.1481e-02, 3.9280e-01, 1.2705e-01],\n                               [3.6869e-01, 7.0558e-01, 1.4647e-01],\n                               [4.7683e-01, 3.3644e-01, 2.3481e-01],\n                               [8.7346e-02, 8.4987e-01, 3.3265e-01],\n                               [2.1393e-01, 8.5585e-01, 9.8948e-02],\n                               [7.8530e-02, 5.9694e-02, -8.7211e-02],\n                               [1.8551e-01, 1.1308e+00, -5.1864e-01],\n                               [3.6485e-01, 7.3757e-01, 1.5264e-01]],\n                              [[-9.5593e-01, -5.0455e-01, 1.9554e-01],\n                               [-1.0870e-01, 1.8025e-01, 1.0228e-01],\n                               [-8.2882e-02, -4.3771e-01, 9.2135e-02],\n                               [-4.0840e-02, -5.9841e-02, 1.1982e-01],\n                               [7.3448e-02, 5.2045e-02, 1.7301e-01],\n                               [-4.0440e-02, 4.9532e-02, 1.1266e-01],\n                               [3.5857e-02, 1.3564e-02, 1.0212e-01],\n                               [-1.0407e-01, -5.9321e-02, 9.2622e-02],\n                               [7.4691e-03, 9.3080e-02, -4.4077e-01],\n                               [-6.0121e-02, -1.3381e-01, -6.8083e-02]],\n                              [[-9.3970e-01, -9.7823e-01, -5.1075e-02],\n                               [-1.2843e-01, -1.8381e-01, 7.1327e-02],\n                               [-1.2247e-01, -8.1115e-01, 3.6495e-02],\n                               [4.9154e-02, -4.5440e-02, 8.9520e-02],\n                               [1.5653e-01, 3.5990e-02, 1.6414e-01],\n                               [-5.9621e-02, 4.9357e-03, 1.4264e-01],\n                               [8.5235e-04, -1.0030e-01, -3.0712e-02],\n                               [-3.7255e-02, 2.8996e-02, 5.5545e-02],\n                               [3.9298e-02, -4.7420e-02, -4.9147e-01],\n                               [-1.1548e-01, -1.5895e-01, -3.9155e-02]],\n                              [[-1.8725e+00, -7.4102e-01, 1.0524e+00],\n                               [-3.3210e-01, 4.7828e-02, -3.2666e-02],\n                               [-2.7949e-01, 5.5541e-02, -1.0059e-01],\n                               [-8.5533e-02, 1.4870e-01, -1.6709e-01],\n                               [3.8283e-01, 2.6609e-01, 2.1361e-01],\n                               [-4.2156e-01, 3.2455e-01, 6.7309e-01],\n                               [-2.4336e-02, -8.3366e-02, 3.9913e-01],\n                               [8.2142e-03, 4.8323e-02, -1.5247e-01],\n                               [-4.8142e-02, -3.0074e-01, -1.6829e-01],\n                               [1.3274e-01, -2.3825e-01, -1.8127e-01]],\n                              [[-1.2576e+00, -6.1550e-01, 7.9430e-01],\n                               [-4.7222e-01, 1.5634e+00, -5.9460e-02],\n                               [-3.5367e-01, 1.3616e+00, -1.6421e-01],\n                               [-1.6611e-02, 2.4231e-01, -9.6188e-02],\n                               [5.4486e-01, 4.6833e-01, 5.1151e-01],\n                               [-6.1755e-01, 1.0292e+00, 1.2458e+00],\n                               [-6.8152e-02, 2.4786e-01, 9.5088e-01],\n                               [-4.8745e-02, 1.5134e-01, -9.9962e-02],\n                               [2.4485e-03, -7.5991e-02, 1.3545e-01],\n                               [4.1608e-01, -1.2093e-01, -3.1643e-01]]]])\n\n    dir_class = torch.tensor([[[\n        -1.0230, -5.1965, -5.2195, 2.4030, -2.7661, -7.3399, -1.1640, -4.0630,\n        -5.2940, 0.8245, -3.1869, -6.1743\n    ],\n                               [\n                                   -1.9503, -1.6940, -0.8716, -1.1494, -0.8196,\n                                   0.2862, -0.2921, -0.7894, -0.2481, -0.9916,\n                                   -1.4304, -1.2466\n                               ],\n                               [\n                                   -1.7435, -1.2043, -0.1265, 0.5083, -0.0717,\n                                   -0.9560, -1.6171, -2.6463, -2.3863, -2.1358,\n                                   -1.8812, -2.3117\n                               ],\n                               [\n                                   -1.9282, 0.3792, -1.8426, -1.4587, -0.8582,\n                                   -3.4639, -3.2133, -3.7867, -7.6781, -6.4459,\n                                   -6.2455, -5.4797\n                               ],\n                               [\n                                   -3.1869, 0.4456, -0.5824, 0.9994, -1.0554,\n                                   -8.4232, -7.7019, -7.1382, -10.2724,\n                                   -7.8229, -8.1860, -8.6194\n                               ]]])\n\n    dir_res = torch.tensor(\n        [[[\n            1.1022e-01, -2.3750e-01, 2.0381e-01, 1.2177e-01, -2.8501e-01,\n            1.5351e-01, 1.2218e-01, -2.0677e-01, 1.4468e-01, 1.1593e-01,\n            -2.6864e-01, 1.1290e-01\n        ],\n          [\n              -1.5788e-02, 4.1538e-02, -2.2857e-04, -1.4011e-02, 4.2560e-02,\n              -3.1186e-03, -5.0343e-02, 6.8110e-03, -2.6728e-02, -3.2781e-02,\n              3.6889e-02, -1.5609e-03\n          ],\n          [\n              1.9004e-02, 5.7105e-03, 6.0329e-02, 1.3074e-02, -2.5546e-02,\n              -1.1456e-02, -3.2484e-02, -3.3487e-02, 1.6609e-03, 1.7095e-02,\n              1.2647e-05, 2.4814e-02\n          ],\n          [\n              1.4482e-01, -6.3083e-02, 5.8307e-02, 9.1396e-02, -8.4571e-02,\n              4.5890e-02, 5.6243e-02, -1.2448e-01, -9.5244e-02, 4.5746e-02,\n              -1.7390e-02, 9.0267e-02\n          ],\n          [\n              1.8065e-01, -2.0078e-02, 8.5401e-02, 1.0784e-01, -1.2495e-01,\n              2.2796e-02, 1.1310e-01, -8.4364e-02, -1.1904e-01, 6.1180e-02,\n              -1.8109e-02, 1.1229e-01\n          ]]])\n    bbox_out = dict(\n        center=center,\n        size_class=size_class,\n        size_res=size_res,\n        dir_class=dir_class,\n        dir_res=dir_res)\n\n    bbox3d = box_coder.decode(bbox_out)\n    expected_bbox3d = torch.tensor(\n        [[[0.8014, 3.4134, -0.6133, 0.9750, 2.2602, 0.9725, 1.6926],\n          [2.6375, 8.4191, 2.0438, 0.5511, 0.4931, 0.9471, 2.6149],\n          [4.2017, 5.2504, -0.7851, 0.6411, 0.5075, 0.9168, 1.5839],\n          [-1.0088, 5.4107, 1.6293, 0.5064, 0.7017, 0.6602, 0.4605],\n          [1.4837, 4.0268, 0.6222, 0.4071, 0.9951, 1.8243, 1.6786]]])\n    assert torch.allclose(bbox3d, expected_bbox3d, atol=1e-4)\n\n    # test split_pred\n    cls_preds = torch.rand(2, 12, 256)\n    reg_preds = torch.rand(2, 67, 256)\n    base_xyz = torch.rand(2, 256, 3)\n    results = box_coder.split_pred(cls_preds, reg_preds, base_xyz)\n    obj_scores = results['obj_scores']\n    center = results['center']\n    dir_class = results['dir_class']\n    dir_res_norm = results['dir_res_norm']\n    dir_res = results['dir_res']\n    size_class = results['size_class']\n    size_res_norm = results['size_res_norm']\n    size_res = results['size_res']\n    sem_scores = results['sem_scores']\n    assert obj_scores.shape == torch.Size([2, 256, 2])\n    assert center.shape == torch.Size([2, 256, 3])\n    assert dir_class.shape == torch.Size([2, 256, 12])\n    assert dir_res_norm.shape == torch.Size([2, 256, 12])\n    assert dir_res.shape == torch.Size([2, 256, 12])\n    assert size_class.shape == torch.Size([2, 256, 10])\n    assert size_res_norm.shape == torch.Size([2, 256, 10, 3])\n    assert size_res.shape == torch.Size([2, 256, 10, 3])\n    assert sem_scores.shape == torch.Size([2, 256, 10])\n\n\ndef test_anchor_free_box_coder():\n    box_coder_cfg = dict(\n        type='AnchorFreeBBoxCoder', num_dir_bins=12, with_rot=True)\n    box_coder = build_bbox_coder(box_coder_cfg)\n\n    # test encode\n    gt_bboxes = LiDARInstance3DBoxes([[\n        2.1227e+00, 5.7951e+00, -9.9900e-01, 1.6736e+00, 4.2419e+00,\n        1.5473e+00, -1.5501e+00\n    ],\n                                      [\n                                          1.1791e+01, 9.0276e+00, -8.5772e-01,\n                                          1.6210e+00, 3.5367e+00, 1.4841e+00,\n                                          -1.7369e+00\n                                      ],\n                                      [\n                                          2.3638e+01, 9.6997e+00, -5.6713e-01,\n                                          1.7578e+00, 4.6103e+00, 1.5999e+00,\n                                          -1.4556e+00\n                                      ]])\n    gt_labels = torch.tensor([0, 0, 0])\n\n    (center_targets, size_targets, dir_class_targets,\n     dir_res_targets) = box_coder.encode(gt_bboxes, gt_labels)\n\n    expected_center_target = torch.tensor([[2.1227, 5.7951, -0.2253],\n                                           [11.7908, 9.0276, -0.1156],\n                                           [23.6380, 9.6997, 0.2328]])\n    expected_size_targets = torch.tensor([[0.8368, 2.1210, 0.7736],\n                                          [0.8105, 1.7683, 0.7421],\n                                          [0.8789, 2.3052, 0.8000]])\n    expected_dir_class_target = torch.tensor([9, 9, 9])\n    expected_dir_res_target = torch.tensor([0.0394, -0.3172, 0.2199])\n    assert torch.allclose(center_targets, expected_center_target, atol=1e-4)\n    assert torch.allclose(size_targets, expected_size_targets, atol=1e-4)\n    assert torch.all(dir_class_targets == expected_dir_class_target)\n    assert torch.allclose(dir_res_targets, expected_dir_res_target, atol=1e-3)\n\n    # test decode\n    center = torch.tensor([[[14.5954, 6.3312, 0.7671],\n                            [67.5245, 22.4422, 1.5610],\n                            [47.7693, -6.7980, 1.4395]]])\n\n    size_res = torch.tensor([[[-1.0752, 1.8760, 0.7715],\n                              [-0.8016, 1.1754, 0.0102],\n                              [-1.2789, 0.5948, 0.4728]]])\n\n    dir_class = torch.tensor([[[\n        0.1512, 1.7914, -1.7658, 2.1572, -0.9215, 1.2139, 0.1749, 0.8606,\n        1.1743, -0.7679, -1.6005, 0.4623\n    ],\n                               [\n                                   -0.3957, 1.2026, -1.2677, 1.3863, -0.5754,\n                                   1.7083, 0.2601, 0.1129, 0.7146, -0.1367,\n                                   -1.2892, -0.0083\n                               ],\n                               [\n                                   -0.8862, 1.2050, -1.3881, 1.6604, -0.9087,\n                                   1.1907, -0.0280, 0.2027, 1.0644, -0.7205,\n                                   -1.0738, 0.4748\n                               ]]])\n\n    dir_res = torch.tensor([[[\n        1.1151, 0.5535, -0.2053, -0.6582, -0.1616, -0.1821, 0.4675, 0.6621,\n        0.8146, -0.0448, -0.7253, -0.7171\n    ],\n                             [\n                                 0.7888, 0.2478, -0.1962, -0.7267, 0.0573,\n                                 -0.2398, 0.6984, 0.5859, 0.7507, -0.1980,\n                                 -0.6538, -0.6602\n                             ],\n                             [\n                                 0.9039, 0.6109, 0.1960, -0.5016, 0.0551,\n                                 -0.4086, 0.3398, 0.2759, 0.7247, -0.0655,\n                                 -0.5052, -0.9026\n                             ]]])\n    bbox_out = dict(\n        center=center, size=size_res, dir_class=dir_class, dir_res=dir_res)\n\n    bbox3d = box_coder.decode(bbox_out)\n    expected_bbox3d = torch.tensor(\n        [[[14.5954, 6.3312, 0.7671, 0.1000, 3.7521, 1.5429, 0.9126],\n          [67.5245, 22.4422, 1.5610, 0.1000, 2.3508, 0.1000, 2.3782],\n          [47.7693, -6.7980, 1.4395, 0.1000, 1.1897, 0.9456, 1.0692]]])\n    assert torch.allclose(bbox3d, expected_bbox3d, atol=1e-4)\n\n    # test split_pred\n    cls_preds = torch.rand(2, 1, 256)\n    reg_preds = torch.rand(2, 30, 256)\n    base_xyz = torch.rand(2, 256, 3)\n    results = box_coder.split_pred(cls_preds, reg_preds, base_xyz)\n    obj_scores = results['obj_scores']\n    center = results['center']\n    center_offset = results['center_offset']\n    dir_class = results['dir_class']\n    dir_res_norm = results['dir_res_norm']\n    dir_res = results['dir_res']\n    size = results['size']\n    assert obj_scores.shape == torch.Size([2, 1, 256])\n    assert center.shape == torch.Size([2, 256, 3])\n    assert center_offset.shape == torch.Size([2, 256, 3])\n    assert dir_class.shape == torch.Size([2, 256, 12])\n    assert dir_res_norm.shape == torch.Size([2, 256, 12])\n    assert dir_res.shape == torch.Size([2, 256, 12])\n    assert size.shape == torch.Size([2, 256, 3])\n\n\ndef test_centerpoint_bbox_coder():\n    bbox_coder_cfg = dict(\n        type='CenterPointBBoxCoder',\n        post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],\n        max_num=500,\n        score_threshold=0.1,\n        pc_range=[-51.2, -51.2],\n        out_size_factor=4,\n        voxel_size=[0.2, 0.2])\n\n    bbox_coder = build_bbox_coder(bbox_coder_cfg)\n\n    batch_dim = torch.rand([2, 3, 128, 128])\n    batch_hei = torch.rand([2, 1, 128, 128])\n    batch_hm = torch.rand([2, 2, 128, 128])\n    batch_reg = torch.rand([2, 2, 128, 128])\n    batch_rotc = torch.rand([2, 1, 128, 128])\n    batch_rots = torch.rand([2, 1, 128, 128])\n    batch_vel = torch.rand([2, 2, 128, 128])\n\n    temp = bbox_coder.decode(batch_hm, batch_rots, batch_rotc, batch_hei,\n                             batch_dim, batch_vel, batch_reg, 5)\n    for i in range(len(temp)):\n        assert temp[i]['bboxes'].shape == torch.Size([500, 9])\n        assert temp[i]['scores'].shape == torch.Size([500])\n        assert temp[i]['labels'].shape == torch.Size([500])\n"
  },
  {
    "path": "tests/test_utils/test_box3d.py",
    "content": "import numpy as np\nimport pytest\nimport torch\nimport unittest\n\nfrom mmdet3d.core.bbox import (BaseInstance3DBoxes, Box3DMode,\n                               CameraInstance3DBoxes, DepthInstance3DBoxes,\n                               LiDARInstance3DBoxes, bbox3d2roi,\n                               bbox3d_mapping_back)\nfrom mmdet3d.core.bbox.structures.utils import (get_box_type, limit_period,\n                                                points_cam2img,\n                                                rotation_3d_in_axis,\n                                                xywhr2xyxyr)\n\n\ndef test_bbox3d_mapping_back():\n    bboxes = BaseInstance3DBoxes(\n        [[\n            -5.24223238e+00, 4.00209696e+01, 2.97570381e-01, 2.06200000e+00,\n            4.40900000e+00, 1.54800000e+00, -1.48801203e+00\n        ],\n         [\n             -2.66751588e+01, 5.59499564e+00, -9.14345860e-01, 3.43000000e-01,\n             4.58000000e-01, 7.82000000e-01, -4.62759755e+00\n         ],\n         [\n             -5.80979675e+00, 3.54092357e+01, 2.00889888e-01, 2.39600000e+00,\n             3.96900000e+00, 1.73200000e+00, -4.65203216e+00\n         ],\n         [\n             -3.13086877e+01, 1.09007628e+00, -1.94612112e-01, 1.94400000e+00,\n             3.85700000e+00, 1.72300000e+00, -2.81427027e+00\n         ]])\n    new_bboxes = bbox3d_mapping_back(bboxes, 1.1, True, True)\n    expected_new_bboxes = torch.tensor(\n        [[-4.7657, 36.3827, 0.2705, 1.8745, 4.0082, 1.4073, -1.4880],\n         [-24.2501, 5.0864, -0.8312, 0.3118, 0.4164, 0.7109, -4.6276],\n         [-5.2816, 32.1902, 0.1826, 2.1782, 3.6082, 1.5745, -4.6520],\n         [-28.4624, 0.9910, -0.1769, 1.7673, 3.5064, 1.5664, -2.8143]])\n    assert torch.allclose(new_bboxes.tensor, expected_new_bboxes, atol=1e-4)\n\n\ndef test_bbox3d2roi():\n    bbox_0 = torch.tensor(\n        [[-5.2422, 4.0020, 2.9757, 2.0620, 4.4090, 1.5480, -1.4880],\n         [-5.8097, 3.5409, 2.0088, 2.3960, 3.9690, 1.7320, -4.6520]])\n    bbox_1 = torch.tensor(\n        [[-2.6675, 5.5949, -9.1434, 3.4300, 4.5800, 7.8200, -4.6275],\n         [-3.1308, 1.0900, -1.9461, 1.9440, 3.8570, 1.7230, -2.8142]])\n    bbox_list = [bbox_0, bbox_1]\n    rois = bbox3d2roi(bbox_list)\n    expected_rois = torch.tensor(\n        [[0.0000, -5.2422, 4.0020, 2.9757, 2.0620, 4.4090, 1.5480, -1.4880],\n         [0.0000, -5.8097, 3.5409, 2.0088, 2.3960, 3.9690, 1.7320, -4.6520],\n         [1.0000, -2.6675, 5.5949, -9.1434, 3.4300, 4.5800, 7.8200, -4.6275],\n         [1.0000, -3.1308, 1.0900, -1.9461, 1.9440, 3.8570, 1.7230, -2.8142]])\n    assert torch.all(torch.eq(rois, expected_rois))\n\n\ndef test_base_boxes3d():\n    # test empty initialization\n    empty_boxes = []\n    boxes = BaseInstance3DBoxes(empty_boxes)\n    assert boxes.tensor.shape[0] == 0\n    assert boxes.tensor.shape[1] == 7\n\n    # Test init with origin\n    gravity_center_box = np.array(\n        [[\n            -5.24223238e+00, 4.00209696e+01, 2.97570381e-01, 2.06200000e+00,\n            4.40900000e+00, 1.54800000e+00, -1.48801203e+00\n        ],\n         [\n             -2.66751588e+01, 5.59499564e+00, -9.14345860e-01, 3.43000000e-01,\n             4.58000000e-01, 7.82000000e-01, -4.62759755e+00\n         ],\n         [\n             -5.80979675e+00, 3.54092357e+01, 2.00889888e-01, 2.39600000e+00,\n             3.96900000e+00, 1.73200000e+00, -4.65203216e+00\n         ],\n         [\n             -3.13086877e+01, 1.09007628e+00, -1.94612112e-01, 1.94400000e+00,\n             3.85700000e+00, 1.72300000e+00, -2.81427027e+00\n         ]],\n        dtype=np.float32)\n\n    bottom_center_box = BaseInstance3DBoxes(\n        gravity_center_box, origin=(0.5, 0.5, 0.5))\n\n    assert bottom_center_box.yaw.shape[0] == 4\n\n\ndef test_lidar_boxes3d():\n    # test empty initialization\n    empty_boxes = []\n    boxes = LiDARInstance3DBoxes(empty_boxes)\n    assert boxes.tensor.shape[0] == 0\n    assert boxes.tensor.shape[1] == 7\n\n    # Test init with origin\n    gravity_center_box = np.array(\n        [[\n            -5.24223238e+00, 4.00209696e+01, 2.97570381e-01, 2.06200000e+00,\n            4.40900000e+00, 1.54800000e+00, -1.48801203e+00\n        ],\n         [\n             -2.66751588e+01, 5.59499564e+00, -9.14345860e-01, 3.43000000e-01,\n             4.58000000e-01, 7.82000000e-01, -4.62759755e+00\n         ],\n         [\n             -5.80979675e+00, 3.54092357e+01, 2.00889888e-01, 2.39600000e+00,\n             3.96900000e+00, 1.73200000e+00, -4.65203216e+00\n         ],\n         [\n             -3.13086877e+01, 1.09007628e+00, -1.94612112e-01, 1.94400000e+00,\n             3.85700000e+00, 1.72300000e+00, -2.81427027e+00\n         ]],\n        dtype=np.float32)\n    bottom_center_box = LiDARInstance3DBoxes(\n        gravity_center_box, origin=(0.5, 0.5, 0.5))\n    expected_tensor = torch.tensor(\n        [[\n            -5.24223238e+00, 4.00209696e+01, -4.76429619e-01, 2.06200000e+00,\n            4.40900000e+00, 1.54800000e+00, -1.48801203e+00\n        ],\n         [\n             -2.66751588e+01, 5.59499564e+00, -1.30534586e+00, 3.43000000e-01,\n             4.58000000e-01, 7.82000000e-01, -4.62759755e+00\n         ],\n         [\n             -5.80979675e+00, 3.54092357e+01, -6.65110112e-01, 2.39600000e+00,\n             3.96900000e+00, 1.73200000e+00, -4.65203216e+00\n         ],\n         [\n             -3.13086877e+01, 1.09007628e+00, -1.05611211e+00, 1.94400000e+00,\n             3.85700000e+00, 1.72300000e+00, -2.81427027e+00\n         ]])\n    assert torch.allclose(expected_tensor, bottom_center_box.tensor)\n\n    # Test init with numpy array\n    np_boxes = np.array(\n        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],\n         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62]],\n        dtype=np.float32)\n    boxes_1 = LiDARInstance3DBoxes(np_boxes)\n    assert torch.allclose(boxes_1.tensor, torch.from_numpy(np_boxes))\n\n    # test properties\n    assert boxes_1.volume.size(0) == 2\n    assert (boxes_1.center == boxes_1.bottom_center).all()\n    assert repr(boxes) == (\n        'LiDARInstance3DBoxes(\\n    tensor([], size=(0, 7)))')\n\n    # test init with torch.Tensor\n    th_boxes = torch.tensor(\n        [[\n            28.29669987, -0.5557558, -1.30332506, 1.47000003, 2.23000002,\n            1.48000002, -1.57000005\n        ],\n         [\n             26.66901946, 21.82302134, -1.73605708, 1.55999994, 3.48000002,\n             1.39999998, -1.69000006\n         ],\n         [\n             31.31977974, 8.16214412, -1.62177875, 1.74000001, 3.76999998,\n             1.48000002, 2.78999996\n         ]],\n        dtype=torch.float32)\n    boxes_2 = LiDARInstance3DBoxes(th_boxes)\n    assert torch.allclose(boxes_2.tensor, th_boxes)\n\n    # test clone/to/device\n    boxes_2 = boxes_2.clone()\n    boxes_1 = boxes_1.to(boxes_2.device)\n\n    # test box concatenation\n    expected_tensor = torch.tensor(\n        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],\n         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],\n         [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],\n         [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],\n         [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])\n    boxes = LiDARInstance3DBoxes.cat([boxes_1, boxes_2])\n    assert torch.allclose(boxes.tensor, expected_tensor)\n    # concatenate empty list\n    empty_boxes = LiDARInstance3DBoxes.cat([])\n    assert empty_boxes.tensor.shape[0] == 0\n    assert empty_boxes.tensor.shape[-1] == 7\n\n    # test box flip\n    points = torch.tensor([[1.2559, -0.6762, -1.4658],\n                           [4.7814, -0.8784,\n                            -1.3857], [6.7053, 0.2517, -0.9697],\n                           [0.6533, -0.5520, -0.5265],\n                           [4.5870, 0.5358, -1.4741]])\n    expected_tensor = torch.tensor(\n        [[1.7802081, -2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.6615927],\n         [8.959413, -2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.5215927],\n         [28.2967, 0.5557558, -1.303325, 1.47, 2.23, 1.48, 4.7115927],\n         [26.66902, -21.82302, -1.736057, 1.56, 3.48, 1.4, 4.8315926],\n         [31.31978, -8.162144, -1.6217787, 1.74, 3.77, 1.48, 0.35159278]])\n    expected_points = torch.tensor([[1.2559, 0.6762, -1.4658],\n                                    [4.7814, 0.8784, -1.3857],\n                                    [6.7053, -0.2517, -0.9697],\n                                    [0.6533, 0.5520, -0.5265],\n                                    [4.5870, -0.5358, -1.4741]])\n    points = boxes.flip('horizontal', points)\n    assert torch.allclose(boxes.tensor, expected_tensor)\n    assert torch.allclose(points, expected_points, 1e-3)\n\n    expected_tensor = torch.tensor(\n        [[-1.7802, -2.5162, -1.7501, 1.7500, 3.3900, 1.6500, -1.6616],\n         [-8.9594, -2.4567, -1.6357, 1.5400, 4.0100, 1.5700, -1.5216],\n         [-28.2967, 0.5558, -1.3033, 1.4700, 2.2300, 1.4800, -4.7116],\n         [-26.6690, -21.8230, -1.7361, 1.5600, 3.4800, 1.4000, -4.8316],\n         [-31.3198, -8.1621, -1.6218, 1.7400, 3.7700, 1.4800, -0.3516]])\n    boxes_flip_vert = boxes.clone()\n    points = boxes_flip_vert.flip('vertical', points)\n    expected_points = torch.tensor([[-1.2559, 0.6762, -1.4658],\n                                    [-4.7814, 0.8784, -1.3857],\n                                    [-6.7053, -0.2517, -0.9697],\n                                    [-0.6533, 0.5520, -0.5265],\n                                    [-4.5870, -0.5358, -1.4741]])\n    assert torch.allclose(boxes_flip_vert.tensor, expected_tensor, 1e-4)\n    assert torch.allclose(points, expected_points)\n\n    # test box rotation\n    expected_tensor = torch.tensor(\n        [[1.4225, -2.7344, -1.7501, 1.7500, 3.3900, 1.6500, 1.7976],\n         [8.5435, -3.6491, -1.6357, 1.5400, 4.0100, 1.5700, 1.6576],\n         [28.1106, -3.2869, -1.3033, 1.4700, 2.2300, 1.4800, 4.8476],\n         [23.4630, -25.2382, -1.7361, 1.5600, 3.4800, 1.4000, 4.9676],\n         [29.9235, -12.3342, -1.6218, 1.7400, 3.7700, 1.4800, 0.4876]])\n    points, rot_mat_T = boxes.rotate(0.13603681398218053, points)\n    expected_points = torch.tensor([[-1.1526, 0.8403, -1.4658],\n                                    [-4.6181, 1.5187, -1.3857],\n                                    [-6.6775, 0.6600, -0.9697],\n                                    [-0.5724, 0.6355, -0.5265],\n                                    [-4.6173, 0.0912, -1.4741]])\n    expected_rot_mat_T = torch.tensor([[0.9908, -0.1356, 0.0000],\n                                       [0.1356, 0.9908, 0.0000],\n                                       [0.0000, 0.0000, 1.0000]])\n    assert torch.allclose(boxes.tensor, expected_tensor, 1e-3)\n    assert torch.allclose(points, expected_points, 1e-3)\n    assert torch.allclose(rot_mat_T, expected_rot_mat_T, 1e-3)\n\n    points_np = np.array([[-1.0280, 0.9888,\n                           -1.4658], [-4.3695, 2.1310, -1.3857],\n                          [-6.5263, 1.5595,\n                           -0.9697], [-0.4809, 0.7073, -0.5265],\n                          [-4.5623, 0.7166, -1.4741]])\n    points_np, rot_mat_T_np = boxes.rotate(0.13603681398218053, points_np)\n    expected_points_np = np.array([[-0.8844, 1.1191, -1.4658],\n                                   [-4.0401, 2.7039, -1.3857],\n                                   [-6.2545, 2.4302, -0.9697],\n                                   [-0.3805, 0.7660, -0.5265],\n                                   [-4.4230, 1.3287, -1.4741]])\n    expected_rot_mat_T_np = np.array([[0.9908, -0.1356, 0.0000],\n                                      [0.1356, 0.9908, 0.0000],\n                                      [0.0000, 0.0000, 1.0000]])\n\n    assert np.allclose(points_np, expected_points_np, 1e-3)\n    assert np.allclose(rot_mat_T_np, expected_rot_mat_T_np, 1e-3)\n\n    # test box scaling\n    expected_tensor = torch.tensor([[\n        1.0443488, -2.9183323, -1.7599131, 1.7597977, 3.4089797, 1.6592377,\n        1.9336663\n    ],\n                                    [\n                                        8.014273, -4.8007393, -1.6448704,\n                                        1.5486219, 4.0324507, 1.57879,\n                                        1.7936664\n                                    ],\n                                    [\n                                        27.558605, -7.1084175, -1.310622,\n                                        1.4782301, 2.242485, 1.488286,\n                                        4.9836664\n                                    ],\n                                    [\n                                        19.934517, -28.344835, -1.7457767,\n                                        1.5687338, 3.4994833, 1.4078381,\n                                        5.1036663\n                                    ],\n                                    [\n                                        28.130915, -16.369587, -1.6308585,\n                                        1.7497417, 3.791107, 1.488286,\n                                        0.6236664\n                                    ]])\n    boxes.scale(1.00559866335275)\n    assert torch.allclose(boxes.tensor, expected_tensor)\n\n    # test box translation\n    expected_tensor = torch.tensor([[\n        1.1281544, -3.0507944, -1.9169292, 1.7597977, 3.4089797, 1.6592377,\n        1.9336663\n    ],\n                                    [\n                                        8.098079, -4.9332013, -1.8018866,\n                                        1.5486219, 4.0324507, 1.57879,\n                                        1.7936664\n                                    ],\n                                    [\n                                        27.64241, -7.2408795, -1.4676381,\n                                        1.4782301, 2.242485, 1.488286,\n                                        4.9836664\n                                    ],\n                                    [\n                                        20.018322, -28.477297, -1.9027928,\n                                        1.5687338, 3.4994833, 1.4078381,\n                                        5.1036663\n                                    ],\n                                    [\n                                        28.21472, -16.502048, -1.7878747,\n                                        1.7497417, 3.791107, 1.488286,\n                                        0.6236664\n                                    ]])\n    boxes.translate([0.0838056, -0.13246193, -0.15701613])\n    assert torch.allclose(boxes.tensor, expected_tensor)\n\n    # test bbox in_range_bev\n    expected_tensor = torch.tensor([1, 1, 1, 1, 1], dtype=torch.bool)\n    mask = boxes.in_range_bev([0., -40., 70.4, 40.])\n    assert (mask == expected_tensor).all()\n    mask = boxes.nonempty()\n    assert (mask == expected_tensor).all()\n\n    # test bbox in_range\n    expected_tensor = torch.tensor([1, 1, 0, 0, 0], dtype=torch.bool)\n    mask = boxes.in_range_3d([0, -20, -2, 22, 2, 5])\n    assert (mask == expected_tensor).all()\n\n    # test bbox indexing\n    index_boxes = boxes[2:5]\n    expected_tensor = torch.tensor([[\n        27.64241, -7.2408795, -1.4676381, 1.4782301, 2.242485, 1.488286,\n        4.9836664\n    ],\n                                    [\n                                        20.018322, -28.477297, -1.9027928,\n                                        1.5687338, 3.4994833, 1.4078381,\n                                        5.1036663\n                                    ],\n                                    [\n                                        28.21472, -16.502048, -1.7878747,\n                                        1.7497417, 3.791107, 1.488286,\n                                        0.6236664\n                                    ]])\n    assert len(index_boxes) == 3\n    assert torch.allclose(index_boxes.tensor, expected_tensor)\n\n    index_boxes = boxes[2]\n    expected_tensor = torch.tensor([[\n        27.64241, -7.2408795, -1.4676381, 1.4782301, 2.242485, 1.488286,\n        4.9836664\n    ]])\n    assert len(index_boxes) == 1\n    assert torch.allclose(index_boxes.tensor, expected_tensor)\n\n    index_boxes = boxes[[2, 4]]\n    expected_tensor = torch.tensor([[\n        27.64241, -7.2408795, -1.4676381, 1.4782301, 2.242485, 1.488286,\n        4.9836664\n    ],\n                                    [\n                                        28.21472, -16.502048, -1.7878747,\n                                        1.7497417, 3.791107, 1.488286,\n                                        0.6236664\n                                    ]])\n    assert len(index_boxes) == 2\n    assert torch.allclose(index_boxes.tensor, expected_tensor)\n\n    # test iteration\n    for i, box in enumerate(index_boxes):\n        torch.allclose(box, expected_tensor[i])\n\n    # test properties\n    assert torch.allclose(boxes.bottom_center, boxes.tensor[:, :3])\n    expected_tensor = (\n        boxes.tensor[:, :3] - boxes.tensor[:, 3:6] *\n        (torch.tensor([0.5, 0.5, 0]) - torch.tensor([0.5, 0.5, 0.5])))\n    assert torch.allclose(boxes.gravity_center, expected_tensor)\n\n    boxes.limit_yaw()\n    assert (boxes.tensor[:, 6] <= np.pi / 2).all()\n    assert (boxes.tensor[:, 6] >= -np.pi / 2).all()\n\n    Box3DMode.convert(boxes, Box3DMode.LIDAR, Box3DMode.LIDAR)\n    expected_tesor = boxes.tensor.clone()\n    assert torch.allclose(expected_tesor, boxes.tensor)\n\n    boxes.flip()\n    boxes.flip()\n    boxes.limit_yaw()\n    assert torch.allclose(expected_tesor, boxes.tensor)\n\n    # test nearest_bev\n    expected_tensor = torch.tensor([[-0.5763, -3.9307, 2.8326, -2.1709],\n                                    [6.0819, -5.7075, 10.1143, -4.1589],\n                                    [26.5212, -7.9800, 28.7637, -6.5018],\n                                    [18.2686, -29.2617, 21.7681, -27.6929],\n                                    [27.3398, -18.3976, 29.0896, -14.6065]])\n    # the pytorch print loses some precision\n    assert torch.allclose(\n        boxes.nearest_bev, expected_tensor, rtol=1e-4, atol=1e-7)\n\n    # obtained by the print of the original implementation\n    expected_tensor = torch.tensor([[[2.4093e+00, -4.4784e+00, -1.9169e+00],\n                                     [2.4093e+00, -4.4784e+00, -2.5769e-01],\n                                     [-7.7767e-01, -3.2684e+00, -2.5769e-01],\n                                     [-7.7767e-01, -3.2684e+00, -1.9169e+00],\n                                     [3.0340e+00, -2.8332e+00, -1.9169e+00],\n                                     [3.0340e+00, -2.8332e+00, -2.5769e-01],\n                                     [-1.5301e-01, -1.6232e+00, -2.5769e-01],\n                                     [-1.5301e-01, -1.6232e+00, -1.9169e+00]],\n                                    [[9.8933e+00, -6.1340e+00, -1.8019e+00],\n                                     [9.8933e+00, -6.1340e+00, -2.2310e-01],\n                                     [5.9606e+00, -5.2427e+00, -2.2310e-01],\n                                     [5.9606e+00, -5.2427e+00, -1.8019e+00],\n                                     [1.0236e+01, -4.6237e+00, -1.8019e+00],\n                                     [1.0236e+01, -4.6237e+00, -2.2310e-01],\n                                     [6.3029e+00, -3.7324e+00, -2.2310e-01],\n                                     [6.3029e+00, -3.7324e+00, -1.8019e+00]],\n                                    [[2.8525e+01, -8.2534e+00, -1.4676e+00],\n                                     [2.8525e+01, -8.2534e+00, 2.0648e-02],\n                                     [2.6364e+01, -7.6525e+00, 2.0648e-02],\n                                     [2.6364e+01, -7.6525e+00, -1.4676e+00],\n                                     [2.8921e+01, -6.8292e+00, -1.4676e+00],\n                                     [2.8921e+01, -6.8292e+00, 2.0648e-02],\n                                     [2.6760e+01, -6.2283e+00, 2.0648e-02],\n                                     [2.6760e+01, -6.2283e+00, -1.4676e+00]],\n                                    [[2.1337e+01, -2.9870e+01, -1.9028e+00],\n                                     [2.1337e+01, -2.9870e+01, -4.9495e-01],\n                                     [1.8102e+01, -2.8535e+01, -4.9495e-01],\n                                     [1.8102e+01, -2.8535e+01, -1.9028e+00],\n                                     [2.1935e+01, -2.8420e+01, -1.9028e+00],\n                                     [2.1935e+01, -2.8420e+01, -4.9495e-01],\n                                     [1.8700e+01, -2.7085e+01, -4.9495e-01],\n                                     [1.8700e+01, -2.7085e+01, -1.9028e+00]],\n                                    [[2.6398e+01, -1.7530e+01, -1.7879e+00],\n                                     [2.6398e+01, -1.7530e+01, -2.9959e-01],\n                                     [2.8612e+01, -1.4452e+01, -2.9959e-01],\n                                     [2.8612e+01, -1.4452e+01, -1.7879e+00],\n                                     [2.7818e+01, -1.8552e+01, -1.7879e+00],\n                                     [2.7818e+01, -1.8552e+01, -2.9959e-01],\n                                     [3.0032e+01, -1.5474e+01, -2.9959e-01],\n                                     [3.0032e+01, -1.5474e+01, -1.7879e+00]]])\n    # the pytorch print loses some precision\n    assert torch.allclose(boxes.corners, expected_tensor, rtol=1e-4, atol=1e-7)\n\n    # test new_box\n    new_box1 = boxes.new_box([[1, 2, 3, 4, 5, 6, 7]])\n    assert torch.allclose(\n        new_box1.tensor,\n        torch.tensor([[1, 2, 3, 4, 5, 6, 7]], dtype=boxes.tensor.dtype))\n    assert new_box1.device == boxes.device\n    assert new_box1.with_yaw == boxes.with_yaw\n    assert new_box1.box_dim == boxes.box_dim\n\n    new_box2 = boxes.new_box(np.array([[1, 2, 3, 4, 5, 6, 7]]))\n    assert torch.allclose(\n        new_box2.tensor,\n        torch.tensor([[1, 2, 3, 4, 5, 6, 7]], dtype=boxes.tensor.dtype))\n\n    new_box3 = boxes.new_box(torch.tensor([[1, 2, 3, 4, 5, 6, 7]]))\n    assert torch.allclose(\n        new_box3.tensor,\n        torch.tensor([[1, 2, 3, 4, 5, 6, 7]], dtype=boxes.tensor.dtype))\n\n\ndef test_boxes_conversion():\n    \"\"\"Test the conversion of boxes between different modes.\n\n    ComandLine:\n        xdoctest tests/test_box3d.py::test_boxes_conversion zero\n    \"\"\"\n    lidar_boxes = LiDARInstance3DBoxes(\n        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],\n         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],\n         [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],\n         [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],\n         [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])\n    cam_box_tensor = Box3DMode.convert(lidar_boxes.tensor, Box3DMode.LIDAR,\n                                       Box3DMode.CAM)\n    expected_box = lidar_boxes.convert_to(Box3DMode.CAM)\n    assert torch.equal(expected_box.tensor, cam_box_tensor)\n\n    # Some properties should be the same\n    cam_boxes = CameraInstance3DBoxes(cam_box_tensor)\n    assert torch.equal(cam_boxes.height, lidar_boxes.height)\n    assert torch.equal(cam_boxes.top_height, -lidar_boxes.top_height)\n    assert torch.equal(cam_boxes.bottom_height, -lidar_boxes.bottom_height)\n    assert torch.allclose(cam_boxes.volume, lidar_boxes.volume)\n\n    lidar_box_tensor = Box3DMode.convert(cam_box_tensor, Box3DMode.CAM,\n                                         Box3DMode.LIDAR)\n    expected_tensor = torch.tensor(\n        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],\n         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],\n         [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],\n         [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],\n         [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])\n\n    assert torch.allclose(expected_tensor, lidar_box_tensor)\n    assert torch.allclose(lidar_boxes.tensor, lidar_box_tensor)\n\n    depth_box_tensor = Box3DMode.convert(cam_box_tensor, Box3DMode.CAM,\n                                         Box3DMode.DEPTH)\n    depth_to_cam_box_tensor = Box3DMode.convert(depth_box_tensor,\n                                                Box3DMode.DEPTH, Box3DMode.CAM)\n    assert torch.allclose(cam_box_tensor, depth_to_cam_box_tensor)\n\n    # test similar mode conversion\n    same_results = Box3DMode.convert(depth_box_tensor, Box3DMode.DEPTH,\n                                     Box3DMode.DEPTH)\n    assert torch.equal(same_results, depth_box_tensor)\n\n    # test conversion with a given rt_mat\n    camera_boxes = CameraInstance3DBoxes(\n        [[0.06, 1.77, 21.4, 3.2, 1.61, 1.66, -1.54],\n         [6.59, 1.53, 6.76, 12.78, 3.66, 2.28, 1.55],\n         [6.71, 1.59, 22.18, 14.73, 3.64, 2.32, 1.59],\n         [7.11, 1.58, 34.54, 10.04, 3.61, 2.32, 1.61],\n         [7.78, 1.65, 45.95, 12.83, 3.63, 2.34, 1.64]])\n\n    rect = torch.tensor(\n        [[0.9999239, 0.00983776, -0.00744505, 0.],\n         [-0.0098698, 0.9999421, -0.00427846, 0.],\n         [0.00740253, 0.00435161, 0.9999631, 0.], [0., 0., 0., 1.]],\n        dtype=torch.float32)\n\n    Trv2c = torch.tensor(\n        [[7.533745e-03, -9.999714e-01, -6.166020e-04, -4.069766e-03],\n         [1.480249e-02, 7.280733e-04, -9.998902e-01, -7.631618e-02],\n         [9.998621e-01, 7.523790e-03, 1.480755e-02, -2.717806e-01],\n         [0.000000e+00, 0.000000e+00, 0.000000e+00, 1.000000e+00]],\n        dtype=torch.float32)\n\n    expected_tensor = torch.tensor(\n        [[\n            2.16902434e+01, -4.06038554e-02, -1.61906639e+00, 1.65999997e+00,\n            3.20000005e+00, 1.61000001e+00, -1.53999996e+00\n        ],\n         [\n             7.05006905e+00, -6.57459601e+00, -1.60107949e+00, 2.27999997e+00,\n             1.27799997e+01, 3.66000009e+00, 1.54999995e+00\n         ],\n         [\n             2.24698818e+01, -6.69203759e+00, -1.50118145e+00, 2.31999993e+00,\n             1.47299995e+01, 3.64000010e+00, 1.59000003e+00\n         ],\n         [\n             3.48291965e+01, -7.09058388e+00, -1.36622983e+00, 2.31999993e+00,\n             1.00400000e+01, 3.60999990e+00, 1.61000001e+00\n         ],\n         [\n             4.62394617e+01, -7.75838800e+00, -1.32405020e+00, 2.33999991e+00,\n             1.28299999e+01, 3.63000011e+00, 1.63999999e+00\n         ]],\n        dtype=torch.float32)\n\n    rt_mat = rect @ Trv2c\n    # test coversion with Box type\n    cam_to_lidar_box = Box3DMode.convert(camera_boxes, Box3DMode.CAM,\n                                         Box3DMode.LIDAR, rt_mat.inverse())\n    assert torch.allclose(cam_to_lidar_box.tensor, expected_tensor)\n\n    lidar_to_cam_box = Box3DMode.convert(cam_to_lidar_box.tensor,\n                                         Box3DMode.LIDAR, Box3DMode.CAM,\n                                         rt_mat)\n    assert torch.allclose(lidar_to_cam_box, camera_boxes.tensor)\n\n    # test numpy convert\n    cam_to_lidar_box = Box3DMode.convert(camera_boxes.tensor.numpy(),\n                                         Box3DMode.CAM, Box3DMode.LIDAR,\n                                         rt_mat.inverse().numpy())\n    assert np.allclose(cam_to_lidar_box, expected_tensor.numpy())\n\n    # test list convert\n    cam_to_lidar_box = Box3DMode.convert(\n        camera_boxes.tensor[0].numpy().tolist(), Box3DMode.CAM,\n        Box3DMode.LIDAR,\n        rt_mat.inverse().numpy())\n    assert np.allclose(np.array(cam_to_lidar_box), expected_tensor[0].numpy())\n\n    # test convert from depth to lidar\n    depth_boxes = torch.tensor(\n        [[2.4593, 2.5870, -0.4321, 0.8597, 0.6193, 1.0204, 3.0693],\n         [1.4856, 2.5299, -0.5570, 0.9385, 2.1404, 0.8954, 3.0601]],\n        dtype=torch.float32)\n    depth_boxes = DepthInstance3DBoxes(depth_boxes)\n    depth_to_lidar_box = depth_boxes.convert_to(Box3DMode.LIDAR)\n    expected_box = depth_to_lidar_box.convert_to(Box3DMode.DEPTH)\n    assert torch.equal(depth_boxes.tensor, expected_box.tensor)\n\n    lidar_to_depth_box = Box3DMode.convert(depth_to_lidar_box, Box3DMode.LIDAR,\n                                           Box3DMode.DEPTH)\n    assert torch.allclose(depth_boxes.tensor, lidar_to_depth_box.tensor)\n    assert torch.allclose(depth_boxes.volume, lidar_to_depth_box.volume)\n\n    # test convert from depth to camera\n    depth_to_cam_box = Box3DMode.convert(depth_boxes, Box3DMode.DEPTH,\n                                         Box3DMode.CAM)\n    cam_to_depth_box = Box3DMode.convert(depth_to_cam_box, Box3DMode.CAM,\n                                         Box3DMode.DEPTH)\n    expected_tensor = depth_to_cam_box.convert_to(Box3DMode.DEPTH)\n    assert torch.equal(expected_tensor.tensor, cam_to_depth_box.tensor)\n    assert torch.allclose(depth_boxes.tensor, cam_to_depth_box.tensor)\n    assert torch.allclose(depth_boxes.volume, cam_to_depth_box.volume)\n\n    with pytest.raises(NotImplementedError):\n        # assert invalid convert mode\n        Box3DMode.convert(depth_boxes, Box3DMode.DEPTH, 3)\n\n\ndef test_camera_boxes3d():\n    # Test init with numpy array\n    np_boxes = np.array(\n        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],\n         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62]],\n        dtype=np.float32)\n\n    boxes_1 = Box3DMode.convert(\n        LiDARInstance3DBoxes(np_boxes), Box3DMode.LIDAR, Box3DMode.CAM)\n    assert isinstance(boxes_1, CameraInstance3DBoxes)\n\n    cam_np_boxes = Box3DMode.convert(np_boxes, Box3DMode.LIDAR, Box3DMode.CAM)\n    assert torch.allclose(boxes_1.tensor,\n                          boxes_1.tensor.new_tensor(cam_np_boxes))\n\n    # test init with torch.Tensor\n    th_boxes = torch.tensor(\n        [[\n            28.29669987, -0.5557558, -1.30332506, 1.47000003, 2.23000002,\n            1.48000002, -1.57000005\n        ],\n         [\n             26.66901946, 21.82302134, -1.73605708, 1.55999994, 3.48000002,\n             1.39999998, -1.69000006\n         ],\n         [\n             31.31977974, 8.16214412, -1.62177875, 1.74000001, 3.76999998,\n             1.48000002, 2.78999996\n         ]],\n        dtype=torch.float32)\n    cam_th_boxes = Box3DMode.convert(th_boxes, Box3DMode.LIDAR, Box3DMode.CAM)\n    boxes_2 = CameraInstance3DBoxes(cam_th_boxes)\n    assert torch.allclose(boxes_2.tensor, cam_th_boxes)\n\n    # test clone/to/device\n    boxes_2 = boxes_2.clone()\n    boxes_1 = boxes_1.to(boxes_2.device)\n\n    # test box concatenation\n    expected_tensor = Box3DMode.convert(\n        torch.tensor(\n            [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],\n             [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],\n             [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],\n             [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],\n             [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]]),\n        Box3DMode.LIDAR, Box3DMode.CAM)\n    boxes = CameraInstance3DBoxes.cat([boxes_1, boxes_2])\n    assert torch.allclose(boxes.tensor, expected_tensor)\n\n    # test box flip\n    points = torch.tensor([[0.6762, 1.4658, 1.2559], [0.8784, 1.3857, 4.7814],\n                           [-0.2517, 0.9697, 6.7053], [0.5520, 0.5265, 0.6533],\n                           [-0.5358, 1.4741, 4.5870]])\n    expected_tensor = Box3DMode.convert(\n        torch.tensor(\n            [[1.7802081, -2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.6615927],\n             [8.959413, -2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.5215927],\n             [28.2967, 0.5557558, -1.303325, 1.47, 2.23, 1.48, 4.7115927],\n             [26.66902, -21.82302, -1.736057, 1.56, 3.48, 1.4, 4.8315926],\n             [31.31978, -8.162144, -1.6217787, 1.74, 3.77, 1.48, 0.35159278]]),\n        Box3DMode.LIDAR, Box3DMode.CAM)\n    points = boxes.flip('horizontal', points)\n    expected_points = torch.tensor([[-0.6762, 1.4658, 1.2559],\n                                    [-0.8784, 1.3857, 4.7814],\n                                    [0.2517, 0.9697, 6.7053],\n                                    [-0.5520, 0.5265, 0.6533],\n                                    [0.5358, 1.4741, 4.5870]])\n    assert torch.allclose(boxes.tensor, expected_tensor)\n    assert torch.allclose(points, expected_points, 1e-3)\n\n    expected_tensor = torch.tensor(\n        [[2.5162, 1.7501, -1.7802, 3.3900, 1.6500, 1.7500, -1.6616],\n         [2.4567, 1.6357, -8.9594, 4.0100, 1.5700, 1.5400, -1.5216],\n         [-0.5558, 1.3033, -28.2967, 2.2300, 1.4800, 1.4700, -4.7116],\n         [21.8230, 1.7361, -26.6690, 3.4800, 1.4000, 1.5600, -4.8316],\n         [8.1621, 1.6218, -31.3198, 3.7700, 1.4800, 1.7400, -0.3516]])\n    boxes_flip_vert = boxes.clone()\n    points = boxes_flip_vert.flip('vertical', points)\n    expected_points = torch.tensor([[-0.6762, 1.4658, -1.2559],\n                                    [-0.8784, 1.3857, -4.7814],\n                                    [0.2517, 0.9697, -6.7053],\n                                    [-0.5520, 0.5265, -0.6533],\n                                    [0.5358, 1.4741, -4.5870]])\n    assert torch.allclose(boxes_flip_vert.tensor, expected_tensor, 1e-4)\n    assert torch.allclose(points, expected_points)\n\n    # test box rotation\n    expected_tensor = Box3DMode.convert(\n        torch.tensor(\n            [[1.4225, -2.7344, -1.7501, 1.7500, 3.3900, 1.6500, 1.7976],\n             [8.5435, -3.6491, -1.6357, 1.5400, 4.0100, 1.5700, 1.6576],\n             [28.1106, -3.2869, -1.3033, 1.4700, 2.2300, 1.4800, 4.8476],\n             [23.4630, -25.2382, -1.7361, 1.5600, 3.4800, 1.4000, 4.9676],\n             [29.9235, -12.3342, -1.6218, 1.7400, 3.7700, 1.4800, 0.4876]]),\n        Box3DMode.LIDAR, Box3DMode.CAM)\n    points, rot_mat_T = boxes.rotate(torch.tensor(0.13603681398218053), points)\n    expected_points = torch.tensor([[-0.8403, 1.4658, -1.1526],\n                                    [-1.5187, 1.3857, -4.6181],\n                                    [-0.6600, 0.9697, -6.6775],\n                                    [-0.6355, 0.5265, -0.5724],\n                                    [-0.0912, 1.4741, -4.6173]])\n    expected_rot_mat_T = torch.tensor([[0.9908, 0.0000, -0.1356],\n                                       [0.0000, 1.0000, 0.0000],\n                                       [0.1356, 0.0000, 0.9908]])\n    assert torch.allclose(boxes.tensor, expected_tensor, 1e-3)\n    assert torch.allclose(points, expected_points, 1e-3)\n    assert torch.allclose(rot_mat_T, expected_rot_mat_T, 1e-3)\n\n    points_np = np.array([[0.6762, 1.2559, -1.4658, 2.5359],\n                          [0.8784, 4.7814, -1.3857, 0.7167],\n                          [-0.2517, 6.7053, -0.9697, 0.5599],\n                          [0.5520, 0.6533, -0.5265, 1.0032],\n                          [-0.5358, 4.5870, -1.4741, 0.0556]])\n    points_np, rot_mat_T_np = boxes.rotate(\n        torch.tensor(0.13603681398218053), points_np)\n    expected_points_np = np.array([[0.4712, 1.2559, -1.5440, 2.5359],\n                                   [0.6824, 4.7814, -1.4920, 0.7167],\n                                   [-0.3809, 6.7053, -0.9266, 0.5599],\n                                   [0.4755, 0.6533, -0.5965, 1.0032],\n                                   [-0.7308, 4.5870, -1.3878, 0.0556]])\n    expected_rot_mat_T_np = np.array([[0.9908, 0.0000, -0.1356],\n                                      [0.0000, 1.0000, 0.0000],\n                                      [0.1356, 0.0000, 0.9908]])\n\n    assert np.allclose(points_np, expected_points_np, 1e-3)\n    assert np.allclose(rot_mat_T_np, expected_rot_mat_T_np, 1e-3)\n\n    # test box scaling\n    expected_tensor = Box3DMode.convert(\n        torch.tensor([[\n            1.0443488, -2.9183323, -1.7599131, 1.7597977, 3.4089797, 1.6592377,\n            1.9336663\n        ],\n                      [\n                          8.014273, -4.8007393, -1.6448704, 1.5486219,\n                          4.0324507, 1.57879, 1.7936664\n                      ],\n                      [\n                          27.558605, -7.1084175, -1.310622, 1.4782301,\n                          2.242485, 1.488286, 4.9836664\n                      ],\n                      [\n                          19.934517, -28.344835, -1.7457767, 1.5687338,\n                          3.4994833, 1.4078381, 5.1036663\n                      ],\n                      [\n                          28.130915, -16.369587, -1.6308585, 1.7497417,\n                          3.791107, 1.488286, 0.6236664\n                      ]]), Box3DMode.LIDAR, Box3DMode.CAM)\n    boxes.scale(1.00559866335275)\n    assert torch.allclose(boxes.tensor, expected_tensor)\n\n    # test box translation\n    expected_tensor = Box3DMode.convert(\n        torch.tensor([[\n            1.1281544, -3.0507944, -1.9169292, 1.7597977, 3.4089797, 1.6592377,\n            1.9336663\n        ],\n                      [\n                          8.098079, -4.9332013, -1.8018866, 1.5486219,\n                          4.0324507, 1.57879, 1.7936664\n                      ],\n                      [\n                          27.64241, -7.2408795, -1.4676381, 1.4782301,\n                          2.242485, 1.488286, 4.9836664\n                      ],\n                      [\n                          20.018322, -28.477297, -1.9027928, 1.5687338,\n                          3.4994833, 1.4078381, 5.1036663\n                      ],\n                      [\n                          28.21472, -16.502048, -1.7878747, 1.7497417,\n                          3.791107, 1.488286, 0.6236664\n                      ]]), Box3DMode.LIDAR, Box3DMode.CAM)\n    boxes.translate(torch.tensor([0.13246193, 0.15701613, 0.0838056]))\n    assert torch.allclose(boxes.tensor, expected_tensor)\n\n    # test bbox in_range_bev\n    expected_tensor = torch.tensor([1, 1, 1, 1, 1], dtype=torch.bool)\n    mask = boxes.in_range_bev([0., -40., 70.4, 40.])\n    assert (mask == expected_tensor).all()\n    mask = boxes.nonempty()\n    assert (mask == expected_tensor).all()\n\n    # test bbox in_range\n    expected_tensor = torch.tensor([1, 1, 0, 0, 0], dtype=torch.bool)\n    mask = boxes.in_range_3d([-2, -5, 0, 20, 2, 22])\n    assert (mask == expected_tensor).all()\n\n    # test properties\n    assert torch.allclose(boxes.bottom_center, boxes.tensor[:, :3])\n    expected_tensor = (\n        boxes.tensor[:, :3] - boxes.tensor[:, 3:6] *\n        (torch.tensor([0.5, 1.0, 0.5]) - torch.tensor([0.5, 0.5, 0.5])))\n    assert torch.allclose(boxes.gravity_center, expected_tensor)\n\n    boxes.limit_yaw()\n    assert (boxes.tensor[:, 6] <= np.pi / 2).all()\n    assert (boxes.tensor[:, 6] >= -np.pi / 2).all()\n\n    Box3DMode.convert(boxes, Box3DMode.LIDAR, Box3DMode.LIDAR)\n    expected_tesor = boxes.tensor.clone()\n    assert torch.allclose(expected_tesor, boxes.tensor)\n\n    boxes.flip()\n    boxes.flip()\n    boxes.limit_yaw()\n    assert torch.allclose(expected_tesor, boxes.tensor)\n\n    # test nearest_bev\n    # BEV box in lidar coordinates (x, y)\n    lidar_expected_tensor = torch.tensor(\n        [[-0.5763, -3.9307, 2.8326, -2.1709],\n         [6.0819, -5.7075, 10.1143, -4.1589],\n         [26.5212, -7.9800, 28.7637, -6.5018],\n         [18.2686, -29.2617, 21.7681, -27.6929],\n         [27.3398, -18.3976, 29.0896, -14.6065]])\n    # BEV box in camera coordinate (-y, x)\n    expected_tensor = lidar_expected_tensor.clone()\n    expected_tensor[:, 0::2] = -lidar_expected_tensor[:, [3, 1]]\n    expected_tensor[:, 1::2] = lidar_expected_tensor[:, 0::2]\n    # the pytorch print loses some precision\n    assert torch.allclose(\n        boxes.nearest_bev, expected_tensor, rtol=1e-4, atol=1e-7)\n\n    # obtained by the print of the original implementation\n    expected_tensor = torch.tensor([[[3.2684e+00, 2.5769e-01, -7.7767e-01],\n                                     [1.6232e+00, 2.5769e-01, -1.5301e-01],\n                                     [1.6232e+00, 1.9169e+00, -1.5301e-01],\n                                     [3.2684e+00, 1.9169e+00, -7.7767e-01],\n                                     [4.4784e+00, 2.5769e-01, 2.4093e+00],\n                                     [2.8332e+00, 2.5769e-01, 3.0340e+00],\n                                     [2.8332e+00, 1.9169e+00, 3.0340e+00],\n                                     [4.4784e+00, 1.9169e+00, 2.4093e+00]],\n                                    [[5.2427e+00, 2.2310e-01, 5.9606e+00],\n                                     [3.7324e+00, 2.2310e-01, 6.3029e+00],\n                                     [3.7324e+00, 1.8019e+00, 6.3029e+00],\n                                     [5.2427e+00, 1.8019e+00, 5.9606e+00],\n                                     [6.1340e+00, 2.2310e-01, 9.8933e+00],\n                                     [4.6237e+00, 2.2310e-01, 1.0236e+01],\n                                     [4.6237e+00, 1.8019e+00, 1.0236e+01],\n                                     [6.1340e+00, 1.8019e+00, 9.8933e+00]],\n                                    [[7.6525e+00, -2.0648e-02, 2.6364e+01],\n                                     [6.2283e+00, -2.0648e-02, 2.6760e+01],\n                                     [6.2283e+00, 1.4676e+00, 2.6760e+01],\n                                     [7.6525e+00, 1.4676e+00, 2.6364e+01],\n                                     [8.2534e+00, -2.0648e-02, 2.8525e+01],\n                                     [6.8292e+00, -2.0648e-02, 2.8921e+01],\n                                     [6.8292e+00, 1.4676e+00, 2.8921e+01],\n                                     [8.2534e+00, 1.4676e+00, 2.8525e+01]],\n                                    [[2.8535e+01, 4.9495e-01, 1.8102e+01],\n                                     [2.7085e+01, 4.9495e-01, 1.8700e+01],\n                                     [2.7085e+01, 1.9028e+00, 1.8700e+01],\n                                     [2.8535e+01, 1.9028e+00, 1.8102e+01],\n                                     [2.9870e+01, 4.9495e-01, 2.1337e+01],\n                                     [2.8420e+01, 4.9495e-01, 2.1935e+01],\n                                     [2.8420e+01, 1.9028e+00, 2.1935e+01],\n                                     [2.9870e+01, 1.9028e+00, 2.1337e+01]],\n                                    [[1.4452e+01, 2.9959e-01, 2.8612e+01],\n                                     [1.5474e+01, 2.9959e-01, 3.0032e+01],\n                                     [1.5474e+01, 1.7879e+00, 3.0032e+01],\n                                     [1.4452e+01, 1.7879e+00, 2.8612e+01],\n                                     [1.7530e+01, 2.9959e-01, 2.6398e+01],\n                                     [1.8552e+01, 2.9959e-01, 2.7818e+01],\n                                     [1.8552e+01, 1.7879e+00, 2.7818e+01],\n                                     [1.7530e+01, 1.7879e+00, 2.6398e+01]]])\n\n    # the pytorch print loses some precision\n    assert torch.allclose(boxes.corners, expected_tensor, rtol=1e-4, atol=1e-7)\n\n    # test init with a given origin\n    boxes_origin_given = CameraInstance3DBoxes(\n        th_boxes.clone(), box_dim=7, origin=(0.5, 0.5, 0.5))\n    expected_tensor = th_boxes.clone()\n    expected_tensor[:, :3] = th_boxes[:, :3] + th_boxes[:, 3:6] * (\n        th_boxes.new_tensor((0.5, 1.0, 0.5)) - th_boxes.new_tensor(\n            (0.5, 0.5, 0.5)))\n    assert torch.allclose(boxes_origin_given.tensor, expected_tensor)\n\n\ndef test_boxes3d_overlaps():\n    \"\"\"Test the iou calculation of boxes in different modes.\n\n    ComandLine:\n        xdoctest tests/test_box3d.py::test_boxes3d_overlaps zero\n    \"\"\"\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and torch+cuda')\n\n    # Test LiDAR boxes 3D overlaps\n    boxes1_tensor = torch.tensor(\n        [[1.8, -2.5, -1.8, 1.75, 3.39, 1.65, 1.6615927],\n         [8.9, -2.5, -1.6, 1.54, 4.01, 1.57, 1.5215927],\n         [28.3, 0.5, -1.3, 1.47, 2.23, 1.48, 4.7115927],\n         [31.3, -8.2, -1.6, 1.74, 3.77, 1.48, 0.35]],\n        device='cuda')\n    boxes1 = LiDARInstance3DBoxes(boxes1_tensor)\n\n    boxes2_tensor = torch.tensor([[1.2, -3.0, -1.9, 1.8, 3.4, 1.7, 1.9],\n                                  [8.1, -2.9, -1.8, 1.5, 4.1, 1.6, 1.8],\n                                  [31.3, -8.2, -1.6, 1.74, 3.77, 1.48, 0.35],\n                                  [20.1, -28.5, -1.9, 1.6, 3.5, 1.4, 5.1]],\n                                 device='cuda')\n    boxes2 = LiDARInstance3DBoxes(boxes2_tensor)\n\n    expected_iou_tensor = torch.tensor(\n        [[0.3710, 0.0000, 0.0000, 0.0000], [0.0000, 0.3322, 0.0000, 0.0000],\n         [0.0000, 0.0000, 0.0000, 0.0000], [0.0000, 0.0000, 1.0000, 0.0000]],\n        device='cuda')\n    overlaps_3d_iou = boxes1.overlaps(boxes1, boxes2)\n    assert torch.allclose(\n        expected_iou_tensor, overlaps_3d_iou, rtol=1e-4, atol=1e-7)\n\n    expected_iof_tensor = torch.tensor(\n        [[0.5582, 0.0000, 0.0000, 0.0000], [0.0000, 0.5025, 0.0000, 0.0000],\n         [0.0000, 0.0000, 0.0000, 0.0000], [0.0000, 0.0000, 1.0000, 0.0000]],\n        device='cuda')\n    overlaps_3d_iof = boxes1.overlaps(boxes1, boxes2, mode='iof')\n    assert torch.allclose(\n        expected_iof_tensor, overlaps_3d_iof, rtol=1e-4, atol=1e-7)\n\n    empty_boxes = []\n    boxes3 = LiDARInstance3DBoxes(empty_boxes)\n    overlaps_3d_empty = boxes1.overlaps(boxes3, boxes2)\n    assert overlaps_3d_empty.shape[0] == 0\n    assert overlaps_3d_empty.shape[1] == 4\n    # Test camera boxes 3D overlaps\n    cam_boxes1_tensor = Box3DMode.convert(boxes1_tensor, Box3DMode.LIDAR,\n                                          Box3DMode.CAM)\n    cam_boxes1 = CameraInstance3DBoxes(cam_boxes1_tensor)\n\n    cam_boxes2_tensor = Box3DMode.convert(boxes2_tensor, Box3DMode.LIDAR,\n                                          Box3DMode.CAM)\n    cam_boxes2 = CameraInstance3DBoxes(cam_boxes2_tensor)\n    cam_overlaps_3d = cam_boxes1.overlaps(cam_boxes1, cam_boxes2)\n\n    # same boxes under different coordinates should have the same iou\n    assert torch.allclose(\n        expected_iou_tensor, cam_overlaps_3d, rtol=1e-4, atol=1e-7)\n    assert torch.allclose(cam_overlaps_3d, overlaps_3d_iou)\n\n    with pytest.raises(AssertionError):\n        cam_boxes1.overlaps(cam_boxes1, boxes1)\n    with pytest.raises(AssertionError):\n        boxes1.overlaps(cam_boxes1, boxes1)\n\n\ndef test_depth_boxes3d():\n    # test empty initialization\n    empty_boxes = []\n    boxes = DepthInstance3DBoxes(empty_boxes)\n    assert boxes.tensor.shape[0] == 0\n    assert boxes.tensor.shape[1] == 7\n\n    # Test init with numpy array\n    np_boxes = np.array(\n        [[1.4856, 2.5299, -0.5570, 0.9385, 2.1404, 0.8954, 3.0601],\n         [2.3262, 3.3065, --0.44255, 0.8234, 0.5325, 1.0099, 2.9971]],\n        dtype=np.float32)\n    boxes_1 = DepthInstance3DBoxes(np_boxes)\n    assert torch.allclose(boxes_1.tensor, torch.from_numpy(np_boxes))\n\n    # test properties\n\n    assert boxes_1.volume.size(0) == 2\n    assert (boxes_1.center == boxes_1.bottom_center).all()\n    expected_tensor = torch.tensor([[1.4856, 2.5299, -0.1093],\n                                    [2.3262, 3.3065, 0.9475]])\n    assert torch.allclose(boxes_1.gravity_center, expected_tensor)\n    expected_tensor = torch.tensor([[1.4856, 2.5299, 0.9385, 2.1404, 3.0601],\n                                    [2.3262, 3.3065, 0.8234, 0.5325, 2.9971]])\n    assert torch.allclose(boxes_1.bev, expected_tensor)\n    expected_tensor = torch.tensor([[1.0164, 1.4597, 1.9548, 3.6001],\n                                    [1.9145, 3.0402, 2.7379, 3.5728]])\n    assert torch.allclose(boxes_1.nearest_bev, expected_tensor, 1e-4)\n    assert repr(boxes) == (\n        'DepthInstance3DBoxes(\\n    tensor([], size=(0, 7)))')\n\n    # test init with torch.Tensor\n    th_boxes = torch.tensor(\n        [[2.4593, 2.5870, -0.4321, 0.8597, 0.6193, 1.0204, 3.0693],\n         [1.4856, 2.5299, -0.5570, 0.9385, 2.1404, 0.8954, 3.0601]],\n        dtype=torch.float32)\n    boxes_2 = DepthInstance3DBoxes(th_boxes)\n    assert torch.allclose(boxes_2.tensor, th_boxes)\n\n    # test clone/to/device\n    boxes_2 = boxes_2.clone()\n    boxes_1 = boxes_1.to(boxes_2.device)\n\n    # test box concatenation\n    expected_tensor = torch.tensor(\n        [[1.4856, 2.5299, -0.5570, 0.9385, 2.1404, 0.8954, 3.0601],\n         [2.3262, 3.3065, --0.44255, 0.8234, 0.5325, 1.0099, 2.9971],\n         [2.4593, 2.5870, -0.4321, 0.8597, 0.6193, 1.0204, 3.0693],\n         [1.4856, 2.5299, -0.5570, 0.9385, 2.1404, 0.8954, 3.0601]])\n    boxes = DepthInstance3DBoxes.cat([boxes_1, boxes_2])\n    assert torch.allclose(boxes.tensor, expected_tensor)\n    # concatenate empty list\n    empty_boxes = DepthInstance3DBoxes.cat([])\n    assert empty_boxes.tensor.shape[0] == 0\n    assert empty_boxes.tensor.shape[-1] == 7\n\n    # test box flip\n    points = torch.tensor([[0.6762, 1.2559, -1.4658, 2.5359],\n                           [0.8784, 4.7814, -1.3857, 0.7167],\n                           [-0.2517, 6.7053, -0.9697, 0.5599],\n                           [0.5520, 0.6533, -0.5265, 1.0032],\n                           [-0.5358, 4.5870, -1.4741, 0.0556]])\n    expected_tensor = torch.tensor(\n        [[-1.4856, 2.5299, -0.5570, 0.9385, 2.1404, 0.8954, 0.0815],\n         [-2.3262, 3.3065, 0.4426, 0.8234, 0.5325, 1.0099, 0.1445],\n         [-2.4593, 2.5870, -0.4321, 0.8597, 0.6193, 1.0204, 0.0723],\n         [-1.4856, 2.5299, -0.5570, 0.9385, 2.1404, 0.8954, 0.0815]])\n    points = boxes.flip(bev_direction='horizontal', points=points)\n    expected_points = torch.tensor([[-0.6762, 1.2559, -1.4658, 2.5359],\n                                    [-0.8784, 4.7814, -1.3857, 0.7167],\n                                    [0.2517, 6.7053, -0.9697, 0.5599],\n                                    [-0.5520, 0.6533, -0.5265, 1.0032],\n                                    [0.5358, 4.5870, -1.4741, 0.0556]])\n    assert torch.allclose(boxes.tensor, expected_tensor, 1e-3)\n    assert torch.allclose(points, expected_points)\n    expected_tensor = torch.tensor(\n        [[-1.4856, -2.5299, -0.5570, 0.9385, 2.1404, 0.8954, -0.0815],\n         [-2.3262, -3.3065, 0.4426, 0.8234, 0.5325, 1.0099, -0.1445],\n         [-2.4593, -2.5870, -0.4321, 0.8597, 0.6193, 1.0204, -0.0723],\n         [-1.4856, -2.5299, -0.5570, 0.9385, 2.1404, 0.8954, -0.0815]])\n    points = boxes.flip(bev_direction='vertical', points=points)\n    expected_points = torch.tensor([[-0.6762, -1.2559, -1.4658, 2.5359],\n                                    [-0.8784, -4.7814, -1.3857, 0.7167],\n                                    [0.2517, -6.7053, -0.9697, 0.5599],\n                                    [-0.5520, -0.6533, -0.5265, 1.0032],\n                                    [0.5358, -4.5870, -1.4741, 0.0556]])\n    assert torch.allclose(boxes.tensor, expected_tensor, 1e-3)\n    assert torch.allclose(points, expected_points)\n    # test box rotation\n    boxes_rot = boxes.clone()\n    expected_tensor = torch.tensor(\n        [[-1.5434, -2.4951, -0.5570, 0.9385, 2.1404, 0.8954, -0.0585],\n         [-2.4016, -3.2521, 0.4426, 0.8234, 0.5325, 1.0099, -0.1215],\n         [-2.5181, -2.5298, -0.4321, 0.8597, 0.6193, 1.0204, -0.0493],\n         [-1.5434, -2.4951, -0.5570, 0.9385, 2.1404, 0.8954, -0.0585]])\n    points, rot_mar_T = boxes_rot.rotate(-0.022998953275003075, points)\n    expected_points = torch.tensor([[-0.7049, -1.2400, -1.4658, 2.5359],\n                                    [-0.9881, -4.7599, -1.3857, 0.7167],\n                                    [0.0974, -6.7093, -0.9697, 0.5599],\n                                    [-0.5669, -0.6404, -0.5265, 1.0032],\n                                    [0.4302, -4.5981, -1.4741, 0.0556]])\n    expected_rot_mat_T = torch.tensor([[0.9997, -0.0230, 0.0000],\n                                       [0.0230, 0.9997, 0.0000],\n                                       [0.0000, 0.0000, 1.0000]])\n    assert torch.allclose(boxes_rot.tensor, expected_tensor, 1e-3)\n    assert torch.allclose(points, expected_points, 1e-3)\n    assert torch.allclose(rot_mar_T, expected_rot_mat_T, 1e-3)\n\n    points_np = np.array([[0.6762, 1.2559, -1.4658, 2.5359],\n                          [0.8784, 4.7814, -1.3857, 0.7167],\n                          [-0.2517, 6.7053, -0.9697, 0.5599],\n                          [0.5520, 0.6533, -0.5265, 1.0032],\n                          [-0.5358, 4.5870, -1.4741, 0.0556]])\n    points_np, rot_mar_T_np = boxes.rotate(-0.022998953275003075, points_np)\n    expected_points_np = np.array([[0.7049, 1.2400, -1.4658, 2.5359],\n                                   [0.9881, 4.7599, -1.3857, 0.7167],\n                                   [-0.0974, 6.7093, -0.9697, 0.5599],\n                                   [0.5669, 0.6404, -0.5265, 1.0032],\n                                   [-0.4302, 4.5981, -1.4741, 0.0556]])\n    expected_rot_mat_T_np = np.array([[0.9997, -0.0230, 0.0000],\n                                      [0.0230, 0.9997, 0.0000],\n                                      [0.0000, 0.0000, 1.0000]])\n    expected_tensor = torch.tensor(\n        [[-1.5434, -2.4951, -0.5570, 0.9385, 2.1404, 0.8954, -0.0585],\n         [-2.4016, -3.2521, 0.4426, 0.8234, 0.5325, 1.0099, -0.1215],\n         [-2.5181, -2.5298, -0.4321, 0.8597, 0.6193, 1.0204, -0.0493],\n         [-1.5434, -2.4951, -0.5570, 0.9385, 2.1404, 0.8954, -0.0585]])\n    assert torch.allclose(boxes.tensor, expected_tensor, 1e-3)\n    assert np.allclose(points_np, expected_points_np, 1e-3)\n    assert np.allclose(rot_mar_T_np, expected_rot_mat_T_np, 1e-3)\n    th_boxes = torch.tensor(\n        [[0.61211395, 0.8129094, 0.10563634, 1.497534, 0.16927195, 0.27956772],\n         [1.430009, 0.49797538, 0.9382923, 0.07694054, 0.9312509, 1.8919173]],\n        dtype=torch.float32)\n    boxes = DepthInstance3DBoxes(th_boxes, box_dim=6, with_yaw=False)\n    expected_tensor = torch.tensor([[\n        0.64884546, 0.78390356, 0.10563634, 1.50373348, 0.23795205, 0.27956772,\n        0\n    ],\n                                    [\n                                        1.45139421, 0.43169443, 0.93829232,\n                                        0.11967964, 0.93380373, 1.89191735, 0\n                                    ]])\n    boxes_3 = boxes.clone()\n    boxes_3.rotate(-0.04599790655000615)\n    assert torch.allclose(boxes_3.tensor, expected_tensor)\n    boxes.rotate(torch.tensor(-0.04599790655000615))\n    assert torch.allclose(boxes.tensor, expected_tensor)\n\n    # test bbox in_range_bev\n    expected_tensor = torch.tensor([1, 1], dtype=torch.bool)\n    mask = boxes.in_range_bev([0., -40., 70.4, 40.])\n    assert (mask == expected_tensor).all()\n    mask = boxes.nonempty()\n    assert (mask == expected_tensor).all()\n\n    expected_tensor = torch.tensor([[[-0.1030, 0.6649, 0.1056],\n                                     [-0.1030, 0.6649, 0.3852],\n                                     [-0.1030, 0.9029, 0.3852],\n                                     [-0.1030, 0.9029, 0.1056],\n                                     [1.4007, 0.6649, 0.1056],\n                                     [1.4007, 0.6649, 0.3852],\n                                     [1.4007, 0.9029, 0.3852],\n                                     [1.4007, 0.9029, 0.1056]],\n                                    [[1.3916, -0.0352, 0.9383],\n                                     [1.3916, -0.0352, 2.8302],\n                                     [1.3916, 0.8986, 2.8302],\n                                     [1.3916, 0.8986, 0.9383],\n                                     [1.5112, -0.0352, 0.9383],\n                                     [1.5112, -0.0352, 2.8302],\n                                     [1.5112, 0.8986, 2.8302],\n                                     [1.5112, 0.8986, 0.9383]]])\n    torch.allclose(boxes.corners, expected_tensor)\n\n    # test points in boxes\n    if torch.cuda.is_available():\n        box_idxs_of_pts = boxes.points_in_boxes(points.cuda())\n        expected_idxs_of_pts = torch.tensor(\n            [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],\n            device='cuda:0',\n            dtype=torch.int32)\n        assert torch.all(box_idxs_of_pts == expected_idxs_of_pts)\n\n    # test get_surface_line_center\n    boxes = torch.tensor(\n        [[0.3294, 1.0359, 0.1171, 1.0822, 1.1247, 1.3721, 0.4916],\n         [-2.4630, -2.6324, -0.1616, 0.9202, 1.7896, 0.1992, 0.3185]])\n    boxes = DepthInstance3DBoxes(\n        boxes, box_dim=boxes.shape[-1], with_yaw=True, origin=(0.5, 0.5, 0.5))\n    surface_center, line_center = boxes.get_surface_line_center()\n\n    expected_surface_center = torch.tensor([[0.3294, 1.0359, 0.8031],\n                                            [0.3294, 1.0359, -0.5689],\n                                            [0.5949, 1.5317, 0.1171],\n                                            [0.1533, 0.5018, 0.1171],\n                                            [0.8064, 0.7805, 0.1171],\n                                            [-0.1845, 1.2053, 0.1171],\n                                            [-2.4630, -2.6324, -0.0620],\n                                            [-2.4630, -2.6324, -0.2612],\n                                            [-2.0406, -1.8436, -0.1616],\n                                            [-2.7432, -3.4822, -0.1616],\n                                            [-2.0574, -2.8496, -0.1616],\n                                            [-2.9000, -2.4883, -0.1616]])\n\n    expected_line_center = torch.tensor([[0.8064, 0.7805, 0.8031],\n                                         [-0.1845, 1.2053, 0.8031],\n                                         [0.5949, 1.5317, 0.8031],\n                                         [0.1533, 0.5018, 0.8031],\n                                         [0.8064, 0.7805, -0.5689],\n                                         [-0.1845, 1.2053, -0.5689],\n                                         [0.5949, 1.5317, -0.5689],\n                                         [0.1533, 0.5018, -0.5689],\n                                         [1.0719, 1.2762, 0.1171],\n                                         [0.6672, 0.3324, 0.1171],\n                                         [0.1178, 1.7871, 0.1171],\n                                         [-0.3606, 0.6713, 0.1171],\n                                         [-2.0574, -2.8496, -0.0620],\n                                         [-2.9000, -2.4883, -0.0620],\n                                         [-2.0406, -1.8436, -0.0620],\n                                         [-2.7432, -3.4822, -0.0620],\n                                         [-2.0574, -2.8496, -0.2612],\n                                         [-2.9000, -2.4883, -0.2612],\n                                         [-2.0406, -1.8436, -0.2612],\n                                         [-2.7432, -3.4822, -0.2612],\n                                         [-1.6350, -2.0607, -0.1616],\n                                         [-2.3062, -3.6263, -0.1616],\n                                         [-2.4462, -1.6264, -0.1616],\n                                         [-3.1802, -3.3381, -0.1616]])\n\n    assert torch.allclose(surface_center, expected_surface_center, atol=1e-04)\n    assert torch.allclose(line_center, expected_line_center, atol=1e-04)\n\n\ndef test_rotation_3d_in_axis():\n    points = torch.tensor([[[-0.4599, -0.0471, 0.0000],\n                            [-0.4599, -0.0471, 1.8433],\n                            [-0.4599, 0.0471, 1.8433]],\n                           [[-0.2555, -0.2683, 0.0000],\n                            [-0.2555, -0.2683, 0.9072],\n                            [-0.2555, 0.2683, 0.9072]]])\n    rotated = rotation_3d_in_axis(\n        points, torch.tensor([-np.pi / 10, np.pi / 10]), axis=0)\n    expected_rotated = torch.tensor([[[0.0000, -0.4228, -0.1869],\n                                      [1.8433, -0.4228, -0.1869],\n                                      [1.8433, -0.4519, -0.0973]],\n                                     [[0.0000, -0.3259, -0.1762],\n                                      [0.9072, -0.3259, -0.1762],\n                                      [0.9072, -0.1601, 0.3341]]])\n    assert torch.allclose(rotated, expected_rotated, 1e-3)\n\n\ndef test_limit_period():\n    torch.manual_seed(0)\n    val = torch.rand([5, 1])\n    result = limit_period(val)\n    expected_result = torch.tensor([[0.4963], [0.7682], [0.0885], [0.1320],\n                                    [0.3074]])\n    assert torch.allclose(result, expected_result, 1e-3)\n\n\ndef test_xywhr2xyxyr():\n    torch.manual_seed(0)\n    xywhr = torch.tensor([[1., 2., 3., 4., 5.], [0., 1., 2., 3., 4.]])\n    xyxyr = xywhr2xyxyr(xywhr)\n    expected_xyxyr = torch.tensor([[-0.5000, 0.0000, 2.5000, 4.0000, 5.0000],\n                                   [-1.0000, -0.5000, 1.0000, 2.5000, 4.0000]])\n\n    assert torch.allclose(xyxyr, expected_xyxyr)\n\n\nclass test_get_box_type(unittest.TestCase):\n\n    def test_get_box_type(self):\n        box_type_3d, box_mode_3d = get_box_type('camera')\n        assert box_type_3d == CameraInstance3DBoxes\n        assert box_mode_3d == Box3DMode.CAM\n\n        box_type_3d, box_mode_3d = get_box_type('depth')\n        assert box_type_3d == DepthInstance3DBoxes\n        assert box_mode_3d == Box3DMode.DEPTH\n\n        box_type_3d, box_mode_3d = get_box_type('lidar')\n        assert box_type_3d == LiDARInstance3DBoxes\n        assert box_mode_3d == Box3DMode.LIDAR\n\n    def test_bad_box_type(self):\n        self.assertRaises(ValueError, get_box_type, 'test')\n\n\ndef test_points_cam2img():\n    torch.manual_seed(0)\n    points = torch.rand([5, 3])\n    proj_mat = torch.rand([4, 4])\n    point_2d_res = points_cam2img(points, proj_mat)\n    expected_point_2d_res = torch.tensor([[0.5832, 0.6496], [0.6146, 0.7910],\n                                          [0.6994, 0.7782], [0.5623, 0.6303],\n                                          [0.4359, 0.6532]])\n    assert torch.allclose(point_2d_res, expected_point_2d_res, 1e-3)\n"
  },
  {
    "path": "tests/test_utils/test_box_np_ops.py",
    "content": "import numpy as np\n\n\ndef test_camera_to_lidar():\n    from mmdet3d.core.bbox.box_np_ops import camera_to_lidar\n    points = np.array([[1.84, 1.47, 8.41]])\n    rect = np.array([[0.9999128, 0.01009263, -0.00851193, 0.],\n                     [-0.01012729, 0.9999406, -0.00403767, 0.],\n                     [0.00847068, 0.00412352, 0.9999556, 0.], [0., 0., 0.,\n                                                               1.]])\n    Trv2c = np.array([[0.00692796, -0.9999722, -0.00275783, -0.02457729],\n                      [-0.00116298, 0.00274984, -0.9999955, -0.06127237],\n                      [0.9999753, 0.00693114, -0.0011439, -0.3321029],\n                      [0., 0., 0., 1.]])\n    points_lidar = camera_to_lidar(points, rect, Trv2c)\n    expected_points = np.array([[8.73138192, -1.85591746, -1.59969933]])\n    assert np.allclose(points_lidar, expected_points)\n\n\ndef test_box_camera_to_lidar():\n    from mmdet3d.core.bbox.box_np_ops import box_camera_to_lidar\n    box = np.array([[1.84, 1.47, 8.41, 1.2, 1.89, 0.48, 0.01]])\n    rect = np.array([[0.9999128, 0.01009263, -0.00851193, 0.],\n                     [-0.01012729, 0.9999406, -0.00403767, 0.],\n                     [0.00847068, 0.00412352, 0.9999556, 0.], [0., 0., 0.,\n                                                               1.]])\n    Trv2c = np.array([[0.00692796, -0.9999722, -0.00275783, -0.02457729],\n                      [-0.00116298, 0.00274984, -0.9999955, -0.06127237],\n                      [0.9999753, 0.00693114, -0.0011439, -0.3321029],\n                      [0., 0., 0., 1.]])\n    box_lidar = box_camera_to_lidar(box, rect, Trv2c)\n    expected_box = np.array(\n        [[8.73138192, -1.85591746, -1.59969933, 0.48, 1.2, 1.89, 0.01]])\n    assert np.allclose(box_lidar, expected_box)\n\n\ndef test_corners_nd():\n    from mmdet3d.core.bbox.box_np_ops import corners_nd\n    dims = np.array([[0.47, 0.98]])\n    corners = corners_nd(dims)\n    expected_corners = np.array([[[-0.235, -0.49], [-0.235, 0.49],\n                                  [0.235, 0.49], [0.235, -0.49]]])\n    assert np.allclose(corners, expected_corners)\n\n\ndef test_center_to_corner_box2d():\n    from mmdet3d.core.bbox.box_np_ops import center_to_corner_box2d\n    center = np.array([[9.348705, -3.6271024]])\n    dims = np.array([[0.47, 0.98]])\n    angles = np.array([-3.14])\n    corner = center_to_corner_box2d(center, dims, angles)\n    expected_corner = np.array([[[9.584485, -3.1374772], [9.582925, -4.117476],\n                                 [9.112926, -4.1167274],\n                                 [9.114486, -3.1367288]]])\n    assert np.allclose(corner, expected_corner)\n\n\ndef test_rotation_2d():\n    from mmdet3d.core.bbox.box_np_ops import rotation_2d\n    angles = np.array([-3.14])\n    corners = np.array([[[-0.235, -0.49], [-0.235, 0.49], [0.235, 0.49],\n                         [0.235, -0.49]]])\n    corners_rotated = rotation_2d(corners, angles)\n    expected_corners = np.array([[[0.2357801, 0.48962511],\n                                  [0.2342193, -0.49037365],\n                                  [-0.2357801, -0.48962511],\n                                  [-0.2342193, 0.49037365]]])\n    assert np.allclose(corners_rotated, expected_corners)\n"
  },
  {
    "path": "tests/test_utils/test_coord_3d_mode.py",
    "content": "import numpy as np\nimport torch\n\nfrom mmdet3d.core.bbox import (CameraInstance3DBoxes, Coord3DMode,\n                               DepthInstance3DBoxes, LiDARInstance3DBoxes)\nfrom mmdet3d.core.points import CameraPoints, DepthPoints, LiDARPoints\n\n\ndef test_points_conversion():\n    \"\"\"Test the conversion of points between different modes.\"\"\"\n    points_np = np.array([[\n        -5.24223238e+00, 4.00209696e+01, 2.97570381e-01, 0.6666, 0.1956,\n        0.4974, 0.9409\n    ],\n                          [\n                              -2.66751588e+01, 5.59499564e+00, -9.14345860e-01,\n                              0.1502, 0.3707, 0.1086, 0.6297\n                          ],\n                          [\n                              -5.80979675e+00, 3.54092357e+01, 2.00889888e-01,\n                              0.6565, 0.6248, 0.6954, 0.2538\n                          ],\n                          [\n                              -3.13086877e+01, 1.09007628e+00, -1.94612112e-01,\n                              0.2803, 0.0258, 0.4896, 0.3269\n                          ]],\n                         dtype=np.float32)\n\n    # test CAM to LIDAR and DEPTH\n    cam_points = CameraPoints(\n        points_np,\n        points_dim=7,\n        attribute_dims=dict(color=[3, 4, 5], height=6))\n\n    convert_lidar_points = cam_points.convert_to(Coord3DMode.LIDAR)\n    expected_tensor = torch.tensor([[\n        2.9757e-01, 5.2422e+00, -4.0021e+01, 6.6660e-01, 1.9560e-01,\n        4.9740e-01, 9.4090e-01\n    ],\n                                    [\n                                        -9.1435e-01, 2.6675e+01, -5.5950e+00,\n                                        1.5020e-01, 3.7070e-01, 1.0860e-01,\n                                        6.2970e-01\n                                    ],\n                                    [\n                                        2.0089e-01, 5.8098e+00, -3.5409e+01,\n                                        6.5650e-01, 6.2480e-01, 6.9540e-01,\n                                        2.5380e-01\n                                    ],\n                                    [\n                                        -1.9461e-01, 3.1309e+01, -1.0901e+00,\n                                        2.8030e-01, 2.5800e-02, 4.8960e-01,\n                                        3.2690e-01\n                                    ]])\n\n    lidar_point_tensor = Coord3DMode.convert_point(cam_points.tensor,\n                                                   Coord3DMode.CAM,\n                                                   Coord3DMode.LIDAR)\n    assert torch.allclose(expected_tensor, convert_lidar_points.tensor, 1e-4)\n    assert torch.allclose(lidar_point_tensor, convert_lidar_points.tensor,\n                          1e-4)\n\n    convert_depth_points = cam_points.convert_to(Coord3DMode.DEPTH)\n    expected_tensor = torch.tensor([[\n        -5.2422e+00, 2.9757e-01, -4.0021e+01, 6.6660e-01, 1.9560e-01,\n        4.9740e-01, 9.4090e-01\n    ],\n                                    [\n                                        -2.6675e+01, -9.1435e-01, -5.5950e+00,\n                                        1.5020e-01, 3.7070e-01, 1.0860e-01,\n                                        6.2970e-01\n                                    ],\n                                    [\n                                        -5.8098e+00, 2.0089e-01, -3.5409e+01,\n                                        6.5650e-01, 6.2480e-01, 6.9540e-01,\n                                        2.5380e-01\n                                    ],\n                                    [\n                                        -3.1309e+01, -1.9461e-01, -1.0901e+00,\n                                        2.8030e-01, 2.5800e-02, 4.8960e-01,\n                                        3.2690e-01\n                                    ]])\n\n    depth_point_tensor = Coord3DMode.convert_point(cam_points.tensor,\n                                                   Coord3DMode.CAM,\n                                                   Coord3DMode.DEPTH)\n    assert torch.allclose(expected_tensor, convert_depth_points.tensor, 1e-4)\n    assert torch.allclose(depth_point_tensor, convert_depth_points.tensor,\n                          1e-4)\n\n    # test LIDAR to CAM and DEPTH\n    lidar_points = LiDARPoints(\n        points_np,\n        points_dim=7,\n        attribute_dims=dict(color=[3, 4, 5], height=6))\n\n    convert_cam_points = lidar_points.convert_to(Coord3DMode.CAM)\n    expected_tensor = torch.tensor([[\n        -4.0021e+01, -2.9757e-01, -5.2422e+00, 6.6660e-01, 1.9560e-01,\n        4.9740e-01, 9.4090e-01\n    ],\n                                    [\n                                        -5.5950e+00, 9.1435e-01, -2.6675e+01,\n                                        1.5020e-01, 3.7070e-01, 1.0860e-01,\n                                        6.2970e-01\n                                    ],\n                                    [\n                                        -3.5409e+01, -2.0089e-01, -5.8098e+00,\n                                        6.5650e-01, 6.2480e-01, 6.9540e-01,\n                                        2.5380e-01\n                                    ],\n                                    [\n                                        -1.0901e+00, 1.9461e-01, -3.1309e+01,\n                                        2.8030e-01, 2.5800e-02, 4.8960e-01,\n                                        3.2690e-01\n                                    ]])\n\n    cam_point_tensor = Coord3DMode.convert_point(lidar_points.tensor,\n                                                 Coord3DMode.LIDAR,\n                                                 Coord3DMode.CAM)\n    assert torch.allclose(expected_tensor, convert_cam_points.tensor, 1e-4)\n    assert torch.allclose(cam_point_tensor, convert_cam_points.tensor, 1e-4)\n\n    convert_depth_points = lidar_points.convert_to(Coord3DMode.DEPTH)\n    expected_tensor = torch.tensor([[\n        -4.0021e+01, -5.2422e+00, 2.9757e-01, 6.6660e-01, 1.9560e-01,\n        4.9740e-01, 9.4090e-01\n    ],\n                                    [\n                                        -5.5950e+00, -2.6675e+01, -9.1435e-01,\n                                        1.5020e-01, 3.7070e-01, 1.0860e-01,\n                                        6.2970e-01\n                                    ],\n                                    [\n                                        -3.5409e+01, -5.8098e+00, 2.0089e-01,\n                                        6.5650e-01, 6.2480e-01, 6.9540e-01,\n                                        2.5380e-01\n                                    ],\n                                    [\n                                        -1.0901e+00, -3.1309e+01, -1.9461e-01,\n                                        2.8030e-01, 2.5800e-02, 4.8960e-01,\n                                        3.2690e-01\n                                    ]])\n\n    depth_point_tensor = Coord3DMode.convert_point(lidar_points.tensor,\n                                                   Coord3DMode.LIDAR,\n                                                   Coord3DMode.DEPTH)\n    assert torch.allclose(expected_tensor, convert_depth_points.tensor, 1e-4)\n    assert torch.allclose(depth_point_tensor, convert_depth_points.tensor,\n                          1e-4)\n\n    # test DEPTH to CAM and LIDAR\n    depth_points = DepthPoints(\n        points_np,\n        points_dim=7,\n        attribute_dims=dict(color=[3, 4, 5], height=6))\n\n    convert_cam_points = depth_points.convert_to(Coord3DMode.CAM)\n    expected_tensor = torch.tensor([[\n        -5.2422e+00, -2.9757e-01, 4.0021e+01, 6.6660e-01, 1.9560e-01,\n        4.9740e-01, 9.4090e-01\n    ],\n                                    [\n                                        -2.6675e+01, 9.1435e-01, 5.5950e+00,\n                                        1.5020e-01, 3.7070e-01, 1.0860e-01,\n                                        6.2970e-01\n                                    ],\n                                    [\n                                        -5.8098e+00, -2.0089e-01, 3.5409e+01,\n                                        6.5650e-01, 6.2480e-01, 6.9540e-01,\n                                        2.5380e-01\n                                    ],\n                                    [\n                                        -3.1309e+01, 1.9461e-01, 1.0901e+00,\n                                        2.8030e-01, 2.5800e-02, 4.8960e-01,\n                                        3.2690e-01\n                                    ]])\n\n    cam_point_tensor = Coord3DMode.convert_point(depth_points.tensor,\n                                                 Coord3DMode.DEPTH,\n                                                 Coord3DMode.CAM)\n    assert torch.allclose(expected_tensor, convert_cam_points.tensor, 1e-4)\n    assert torch.allclose(cam_point_tensor, convert_cam_points.tensor, 1e-4)\n\n    rt_mat_provided = torch.tensor([[0.99789, -0.012698, -0.063678],\n                                    [-0.012698, 0.92359, -0.38316],\n                                    [0.063678, 0.38316, 0.92148]])\n\n    depth_points_new = torch.cat([\n        depth_points.tensor[:, :3] @ rt_mat_provided.t(),\n        depth_points.tensor[:, 3:]\n    ],\n                                 dim=1)\n    cam_point_tensor_new = Coord3DMode.convert_point(\n        depth_points_new,\n        Coord3DMode.DEPTH,\n        Coord3DMode.CAM,\n        rt_mat=rt_mat_provided)\n    assert torch.allclose(expected_tensor, cam_point_tensor_new, 1e-4)\n\n    convert_lidar_points = depth_points.convert_to(Coord3DMode.LIDAR)\n    expected_tensor = torch.tensor([[\n        4.0021e+01, 5.2422e+00, 2.9757e-01, 6.6660e-01, 1.9560e-01, 4.9740e-01,\n        9.4090e-01\n    ],\n                                    [\n                                        5.5950e+00, 2.6675e+01, -9.1435e-01,\n                                        1.5020e-01, 3.7070e-01, 1.0860e-01,\n                                        6.2970e-01\n                                    ],\n                                    [\n                                        3.5409e+01, 5.8098e+00, 2.0089e-01,\n                                        6.5650e-01, 6.2480e-01, 6.9540e-01,\n                                        2.5380e-01\n                                    ],\n                                    [\n                                        1.0901e+00, 3.1309e+01, -1.9461e-01,\n                                        2.8030e-01, 2.5800e-02, 4.8960e-01,\n                                        3.2690e-01\n                                    ]])\n\n    lidar_point_tensor = Coord3DMode.convert_point(depth_points.tensor,\n                                                   Coord3DMode.DEPTH,\n                                                   Coord3DMode.LIDAR)\n    assert torch.allclose(lidar_point_tensor, convert_lidar_points.tensor,\n                          1e-4)\n    assert torch.allclose(lidar_point_tensor, convert_lidar_points.tensor,\n                          1e-4)\n\n\ndef test_boxes_conversion():\n    # test CAM to LIDAR and DEPTH\n    cam_boxes = CameraInstance3DBoxes(\n        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],\n         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],\n         [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],\n         [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],\n         [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])\n    convert_lidar_boxes = Coord3DMode.convert(cam_boxes, Coord3DMode.CAM,\n                                              Coord3DMode.LIDAR)\n\n    expected_tensor = torch.tensor(\n        [[-1.7501, -1.7802, -2.5162, 1.6500, 1.7500, 3.3900, 1.4800],\n         [-1.6357, -8.9594, -2.4567, 1.5700, 1.5400, 4.0100, 1.6200],\n         [-1.3033, -28.2967, 0.5558, 1.4800, 1.4700, 2.2300, -1.5700],\n         [-1.7361, -26.6690, -21.8230, 1.4000, 1.5600, 3.4800, -1.6900],\n         [-1.6218, -31.3198, -8.1621, 1.4800, 1.7400, 3.7700, 2.7900]])\n    assert torch.allclose(expected_tensor, convert_lidar_boxes.tensor, 1e-3)\n\n    convert_depth_boxes = Coord3DMode.convert(cam_boxes, Coord3DMode.CAM,\n                                              Coord3DMode.DEPTH)\n    expected_tensor = torch.tensor(\n        [[1.7802, 1.7501, 2.5162, 1.7500, 1.6500, 3.3900, 1.4800],\n         [8.9594, 1.6357, 2.4567, 1.5400, 1.5700, 4.0100, 1.6200],\n         [28.2967, 1.3033, -0.5558, 1.4700, 1.4800, 2.2300, -1.5700],\n         [26.6690, 1.7361, 21.8230, 1.5600, 1.4000, 3.4800, -1.6900],\n         [31.3198, 1.6218, 8.1621, 1.7400, 1.4800, 3.7700, 2.7900]])\n    assert torch.allclose(expected_tensor, convert_depth_boxes.tensor, 1e-3)\n\n    # test LIDAR to CAM and DEPTH\n    lidar_boxes = LiDARInstance3DBoxes(\n        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],\n         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],\n         [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],\n         [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],\n         [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])\n    convert_cam_boxes = Coord3DMode.convert(lidar_boxes, Coord3DMode.LIDAR,\n                                            Coord3DMode.CAM)\n    expected_tensor = torch.tensor(\n        [[-2.5162, 1.7501, 1.7802, 3.3900, 1.6500, 1.7500, 1.4800],\n         [-2.4567, 1.6357, 8.9594, 4.0100, 1.5700, 1.5400, 1.6200],\n         [0.5558, 1.3033, 28.2967, 2.2300, 1.4800, 1.4700, -1.5700],\n         [-21.8230, 1.7361, 26.6690, 3.4800, 1.4000, 1.5600, -1.6900],\n         [-8.1621, 1.6218, 31.3198, 3.7700, 1.4800, 1.7400, 2.7900]])\n    assert torch.allclose(expected_tensor, convert_cam_boxes.tensor, 1e-3)\n\n    convert_depth_boxes = Coord3DMode.convert(lidar_boxes, Coord3DMode.LIDAR,\n                                              Coord3DMode.DEPTH)\n    expected_tensor = torch.tensor(\n        [[-2.5162, 1.7802, -1.7501, 3.3900, 1.7500, 1.6500, 1.4800],\n         [-2.4567, 8.9594, -1.6357, 4.0100, 1.5400, 1.5700, 1.6200],\n         [0.5558, 28.2967, -1.3033, 2.2300, 1.4700, 1.4800, -1.5700],\n         [-21.8230, 26.6690, -1.7361, 3.4800, 1.5600, 1.4000, -1.6900],\n         [-8.1621, 31.3198, -1.6218, 3.7700, 1.7400, 1.4800, 2.7900]])\n    assert torch.allclose(expected_tensor, convert_depth_boxes.tensor, 1e-3)\n\n    # test DEPTH to CAM and LIDAR\n    depth_boxes = DepthInstance3DBoxes(\n        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],\n         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],\n         [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],\n         [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],\n         [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])\n    convert_cam_boxes = Coord3DMode.convert(depth_boxes, Coord3DMode.DEPTH,\n                                            Coord3DMode.CAM)\n    expected_tensor = torch.tensor(\n        [[1.7802, -1.7501, -2.5162, 1.7500, 1.6500, 3.3900, 1.4800],\n         [8.9594, -1.6357, -2.4567, 1.5400, 1.5700, 4.0100, 1.6200],\n         [28.2967, -1.3033, 0.5558, 1.4700, 1.4800, 2.2300, -1.5700],\n         [26.6690, -1.7361, -21.8230, 1.5600, 1.4000, 3.4800, -1.6900],\n         [31.3198, -1.6218, -8.1621, 1.7400, 1.4800, 3.7700, 2.7900]])\n    assert torch.allclose(expected_tensor, convert_cam_boxes.tensor, 1e-3)\n\n    convert_lidar_boxes = Coord3DMode.convert(depth_boxes, Coord3DMode.DEPTH,\n                                              Coord3DMode.LIDAR)\n    expected_tensor = torch.tensor(\n        [[2.5162, -1.7802, -1.7501, 3.3900, 1.7500, 1.6500, 1.4800],\n         [2.4567, -8.9594, -1.6357, 4.0100, 1.5400, 1.5700, 1.6200],\n         [-0.5558, -28.2967, -1.3033, 2.2300, 1.4700, 1.4800, -1.5700],\n         [21.8230, -26.6690, -1.7361, 3.4800, 1.5600, 1.4000, -1.6900],\n         [8.1621, -31.3198, -1.6218, 3.7700, 1.7400, 1.4800, 2.7900]])\n    assert torch.allclose(expected_tensor, convert_lidar_boxes.tensor, 1e-3)\n"
  },
  {
    "path": "tests/test_utils/test_merge_augs.py",
    "content": "import mmcv\nimport pytest\nimport torch\n\nfrom mmdet3d.core import merge_aug_bboxes_3d\nfrom mmdet3d.core.bbox import DepthInstance3DBoxes\n\n\ndef test_merge_aug_bboxes_3d():\n    if not torch.cuda.is_available():\n        pytest.skip('test requires GPU and torch+cuda')\n    img_meta_0 = dict(\n        pcd_horizontal_flip=False,\n        pcd_vertical_flip=True,\n        pcd_scale_factor=1.0)\n    img_meta_1 = dict(\n        pcd_horizontal_flip=True,\n        pcd_vertical_flip=False,\n        pcd_scale_factor=1.0)\n    img_meta_2 = dict(\n        pcd_horizontal_flip=False,\n        pcd_vertical_flip=False,\n        pcd_scale_factor=0.5)\n    img_metas = [[img_meta_0], [img_meta_1], [img_meta_2]]\n    boxes_3d = DepthInstance3DBoxes(\n        torch.tensor(\n            [[1.0473, 4.1687, -1.2317, 2.3021, 1.8876, 1.9696, 1.6956],\n             [2.5831, 4.8117, -1.2733, 0.5852, 0.8832, 0.9733, 1.6500],\n             [-1.0864, 1.9045, -1.2000, 0.7128, 1.5631, 2.1045, 0.1022]],\n            device='cuda'))\n    labels_3d = torch.tensor([0, 7, 6])\n    scores_3d = torch.tensor([0.5, 1.0, 1.0])\n    aug_result = dict(\n        boxes_3d=boxes_3d, labels_3d=labels_3d, scores_3d=scores_3d)\n    aug_results = [aug_result, aug_result, aug_result]\n    test_cfg = mmcv.ConfigDict(\n        use_rotate_nms=True,\n        nms_across_levels=False,\n        nms_thr=0.01,\n        score_thr=0.1,\n        min_bbox_size=0,\n        nms_pre=100,\n        max_num=50)\n    results = merge_aug_bboxes_3d(aug_results, img_metas, test_cfg)\n    expected_boxes_3d = torch.tensor(\n        [[-1.0864, -1.9045, -1.2000, 0.7128, 1.5631, 2.1045, -0.1022],\n         [1.0864, 1.9045, -1.2000, 0.7128, 1.5631, 2.1045, 3.0394],\n         [-2.1728, 3.8090, -2.4000, 1.4256, 3.1262, 4.2090, 0.1022],\n         [2.5831, -4.8117, -1.2733, 0.5852, 0.8832, 0.9733, -1.6500],\n         [-2.5831, 4.8117, -1.2733, 0.5852, 0.8832, 0.9733, 1.4916],\n         [5.1662, 9.6234, -2.5466, 1.1704, 1.7664, 1.9466, 1.6500],\n         [1.0473, -4.1687, -1.2317, 2.3021, 1.8876, 1.9696, -1.6956],\n         [-1.0473, 4.1687, -1.2317, 2.3021, 1.8876, 1.9696, 1.4460],\n         [2.0946, 8.3374, -2.4634, 4.6042, 3.7752, 3.9392, 1.6956]])\n    expected_scores_3d = torch.tensor([\n        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 0.5000, 0.5000, 0.5000\n    ])\n    expected_labels_3d = torch.tensor([6, 6, 6, 7, 7, 7, 0, 0, 0])\n    assert torch.allclose(results['boxes_3d'].tensor, expected_boxes_3d)\n    assert torch.allclose(results['scores_3d'], expected_scores_3d)\n    assert torch.all(results['labels_3d'] == expected_labels_3d)\n"
  },
  {
    "path": "tests/test_utils/test_nms.py",
    "content": "import numpy as np\nimport torch\n\n\ndef test_aligned_3d_nms():\n    from mmdet3d.core.post_processing import aligned_3d_nms\n\n    boxes = torch.tensor([[1.2261, 0.6679, -1.2678, 2.6547, 1.0428, 0.1000],\n                          [5.0919, 0.6512, 0.7238, 5.4821, 1.2451, 2.1095],\n                          [6.8392, -1.2205, 0.8570, 7.6920, 0.3220, 3.2223],\n                          [3.6900, -0.4235, -1.0380, 4.4415, 0.2671, -0.1442],\n                          [4.8071, -1.4311, 0.7004, 5.5788, -0.6837, 1.2487],\n                          [2.1807, -1.5811, -1.1289, 3.0151, -0.1346, -0.5351],\n                          [4.4631, -4.2588, -1.1403, 5.3012, -3.4463, -0.3212],\n                          [4.7607, -3.3311, 0.5993, 5.2976, -2.7874, 1.2273],\n                          [3.1265, 0.7113, -0.0296, 3.8944, 1.3532, 0.9785],\n                          [5.5828, -3.5350, 1.0105, 8.2841, -0.0405, 3.3614],\n                          [3.0003, -2.1099, -1.0608, 5.3423, 0.0328, 0.6252],\n                          [2.7148, 0.6082, -1.1738, 3.6995, 1.2375, -0.0209],\n                          [4.9263, -0.2152, 0.2889, 5.6963, 0.3416, 1.3471],\n                          [5.0713, 1.3459, -0.2598, 5.6278, 1.9300, 1.2835],\n                          [4.5985, -2.3996, -0.3393, 5.2705, -1.7306, 0.5698],\n                          [4.1386, 0.5658, 0.0422, 4.8937, 1.1983, 0.9911],\n                          [2.7694, -1.9822, -1.0637, 4.0691, 0.3575, -0.1393],\n                          [4.6464, -3.0123, -1.0694, 5.1421, -2.4450, -0.3758],\n                          [3.4754, 0.4443, -1.1282, 4.6727, 1.3786, 0.2550],\n                          [2.5905, -0.3504, -1.1202, 3.1599, 0.1153, -0.3036],\n                          [4.1336, -3.4813, 1.1477, 6.2091, -0.8776, 2.6757],\n                          [3.9966, 0.2069, -1.1148, 5.0841, 1.0525, -0.0648],\n                          [4.3216, -1.8647, 0.4733, 6.2069, 0.6671, 3.3363],\n                          [4.7683, 0.4286, -0.0500, 5.5642, 1.2906, 0.8902],\n                          [1.7337, 0.7625, -1.0058, 3.0675, 1.3617, 0.3849],\n                          [4.7193, -3.3687, -0.9635, 5.1633, -2.7656, 1.1001],\n                          [4.4704, -2.7744, -1.1127, 5.0971, -2.0228, -0.3150],\n                          [2.7027, 0.6122, -0.9169, 3.3083, 1.2117, 0.6129],\n                          [4.8789, -2.0025, 0.8385, 5.5214, -1.3668, 1.3552],\n                          [3.7856, -1.7582, -0.1738, 5.3373, -0.6300, 0.5558]])\n\n    scores = torch.tensor([\n        3.6414e-03, 2.2901e-02, 2.7576e-04, 1.2238e-02, 5.9310e-04, 1.2659e-01,\n        2.4104e-02, 5.0742e-03, 2.3581e-03, 2.0946e-07, 8.8039e-01, 1.9127e-01,\n        5.0469e-05, 9.3638e-03, 3.0663e-03, 9.4350e-03, 5.3380e-02, 1.7895e-01,\n        2.0048e-01, 1.1294e-03, 3.0304e-08, 2.0237e-01, 1.0894e-08, 6.7972e-02,\n        6.7156e-01, 9.3986e-04, 7.9470e-01, 3.9736e-01, 1.8000e-04, 7.9151e-04\n    ])\n\n    cls = torch.tensor([\n        8, 8, 8, 3, 3, 1, 3, 3, 7, 8, 0, 6, 7, 8, 3, 7, 2, 7, 6, 3, 8, 6, 6, 7,\n        6, 8, 7, 6, 3, 1\n    ])\n\n    pick = aligned_3d_nms(boxes, scores, cls, 0.25)\n    expected_pick = torch.tensor([\n        10, 26, 24, 27, 21, 18, 17, 5, 23, 16, 6, 1, 3, 15, 13, 7, 0, 14, 8,\n        19, 25, 29, 4, 2, 28, 12, 9, 20, 22\n    ])\n\n    assert torch.all(pick == expected_pick)\n\n\ndef test_circle_nms():\n    from mmdet3d.core.post_processing import circle_nms\n    boxes = torch.tensor([[-11.1100, 2.1300, 0.8823],\n                          [-11.2810, 2.2422, 0.8914],\n                          [-10.3966, -0.3198, 0.8643],\n                          [-10.2906, -13.3159,\n                           0.8401], [5.6518, 9.9791, 0.8271],\n                          [-11.2652, 13.3637, 0.8267],\n                          [4.7768, -13.0409, 0.7810], [5.6621, 9.0422, 0.7753],\n                          [-10.5561, 18.9627, 0.7518],\n                          [-10.5643, 13.2293, 0.7200]])\n    keep = circle_nms(boxes.numpy(), 0.175)\n    expected_keep = [1, 2, 3, 4, 5, 6, 7, 8, 9]\n    assert np.all(keep == expected_keep)\n"
  },
  {
    "path": "tests/test_utils/test_points.py",
    "content": "import numpy as np\nimport torch\n\nfrom mmdet3d.core.points import (BasePoints, CameraPoints, DepthPoints,\n                                 LiDARPoints)\n\n\ndef test_base_points():\n    # test empty initialization\n    empty_boxes = []\n    points = BasePoints(empty_boxes)\n    assert points.tensor.shape[0] == 0\n    assert points.tensor.shape[1] == 3\n\n    # Test init with origin\n    points_np = np.array([[-5.24223238e+00, 4.00209696e+01, 2.97570381e-01],\n                          [-2.66751588e+01, 5.59499564e+00, -9.14345860e-01],\n                          [-5.80979675e+00, 3.54092357e+01, 2.00889888e-01],\n                          [-3.13086877e+01, 1.09007628e+00, -1.94612112e-01]],\n                         dtype=np.float32)\n    base_points = BasePoints(points_np, points_dim=3)\n    assert base_points.tensor.shape[0] == 4\n\n    # Test init with color and height\n    points_np = np.array([[\n        -5.24223238e+00, 4.00209696e+01, 2.97570381e-01, 0.6666, 0.1956,\n        0.4974, 0.9409\n    ],\n                          [\n                              -2.66751588e+01, 5.59499564e+00, -9.14345860e-01,\n                              0.1502, 0.3707, 0.1086, 0.6297\n                          ],\n                          [\n                              -5.80979675e+00, 3.54092357e+01, 2.00889888e-01,\n                              0.6565, 0.6248, 0.6954, 0.2538\n                          ],\n                          [\n                              -3.13086877e+01, 1.09007628e+00, -1.94612112e-01,\n                              0.2803, 0.0258, 0.4896, 0.3269\n                          ]],\n                         dtype=np.float32)\n    base_points = BasePoints(\n        points_np,\n        points_dim=7,\n        attribute_dims=dict(color=[3, 4, 5], height=6))\n    expected_tensor = torch.tensor([[\n        -5.24223238e+00, 4.00209696e+01, 2.97570381e-01, 0.6666, 0.1956,\n        0.4974, 0.9409\n    ],\n                                    [\n                                        -2.66751588e+01, 5.59499564e+00,\n                                        -9.14345860e-01, 0.1502, 0.3707,\n                                        0.1086, 0.6297\n                                    ],\n                                    [\n                                        -5.80979675e+00, 3.54092357e+01,\n                                        2.00889888e-01, 0.6565, 0.6248, 0.6954,\n                                        0.2538\n                                    ],\n                                    [\n                                        -3.13086877e+01, 1.09007628e+00,\n                                        -1.94612112e-01, 0.2803, 0.0258,\n                                        0.4896, 0.3269\n                                    ]])\n\n    assert torch.allclose(expected_tensor, base_points.tensor)\n    assert torch.allclose(expected_tensor[:, :3], base_points.coord)\n    assert torch.allclose(expected_tensor[:, 3:6], base_points.color)\n    assert torch.allclose(expected_tensor[:, 6], base_points.height)\n\n    # test points clone\n    new_base_points = base_points.clone()\n    assert torch.allclose(new_base_points.tensor, base_points.tensor)\n\n    # test points shuffle\n    new_base_points.shuffle()\n    assert new_base_points.tensor.shape == torch.Size([4, 7])\n\n    # test points rotation\n    rot_mat = torch.tensor([[0.93629336, -0.27509585, 0.21835066],\n                            [0.28962948, 0.95642509, -0.03695701],\n                            [-0.19866933, 0.0978434, 0.97517033]])\n\n    base_points.rotate(rot_mat)\n    expected_tensor = torch.tensor([[\n        6.6239e+00, 3.9748e+01, -2.3335e+00, 6.6660e-01, 1.9560e-01,\n        4.9740e-01, 9.4090e-01\n    ],\n                                    [\n                                        -2.3174e+01, 1.2600e+01, -6.9230e+00,\n                                        1.5020e-01, 3.7070e-01, 1.0860e-01,\n                                        6.2970e-01\n                                    ],\n                                    [\n                                        4.7760e+00, 3.5484e+01, -2.3813e+00,\n                                        6.5650e-01, 6.2480e-01, 6.9540e-01,\n                                        2.5380e-01\n                                    ],\n                                    [\n                                        -2.8960e+01, 9.6364e+00, -7.0663e+00,\n                                        2.8030e-01, 2.5800e-02, 4.8960e-01,\n                                        3.2690e-01\n                                    ]])\n    assert torch.allclose(expected_tensor, base_points.tensor, 1e-3)\n\n    new_base_points = base_points.clone()\n    new_base_points.rotate(0.1, axis=2)\n    expected_tensor = torch.tensor([[\n        2.6226e+00, 4.0211e+01, -2.3335e+00, 6.6660e-01, 1.9560e-01,\n        4.9740e-01, 9.4090e-01\n    ],\n                                    [\n                                        -2.4316e+01, 1.0224e+01, -6.9230e+00,\n                                        1.5020e-01, 3.7070e-01, 1.0860e-01,\n                                        6.2970e-01\n                                    ],\n                                    [\n                                        1.2096e+00, 3.5784e+01, -2.3813e+00,\n                                        6.5650e-01, 6.2480e-01, 6.9540e-01,\n                                        2.5380e-01\n                                    ],\n                                    [\n                                        -2.9777e+01, 6.6971e+00, -7.0663e+00,\n                                        2.8030e-01, 2.5800e-02, 4.8960e-01,\n                                        3.2690e-01\n                                    ]])\n    assert torch.allclose(expected_tensor, new_base_points.tensor, 1e-3)\n\n    # test points translation\n    translation_vector = torch.tensor([0.93629336, -0.27509585, 0.21835066])\n    base_points.translate(translation_vector)\n    expected_tensor = torch.tensor([[\n        7.5602e+00, 3.9473e+01, -2.1152e+00, 6.6660e-01, 1.9560e-01,\n        4.9740e-01, 9.4090e-01\n    ],\n                                    [\n                                        -2.2237e+01, 1.2325e+01, -6.7046e+00,\n                                        1.5020e-01, 3.7070e-01, 1.0860e-01,\n                                        6.2970e-01\n                                    ],\n                                    [\n                                        5.7123e+00, 3.5209e+01, -2.1629e+00,\n                                        6.5650e-01, 6.2480e-01, 6.9540e-01,\n                                        2.5380e-01\n                                    ],\n                                    [\n                                        -2.8023e+01, 9.3613e+00, -6.8480e+00,\n                                        2.8030e-01, 2.5800e-02, 4.8960e-01,\n                                        3.2690e-01\n                                    ]])\n    assert torch.allclose(expected_tensor, base_points.tensor, 1e-4)\n\n    # test points filter\n    point_range = [-10, -40, -10, 10, 40, 10]\n    in_range_flags = base_points.in_range_3d(point_range)\n    expected_flags = torch.tensor([True, False, True, False])\n    assert torch.all(in_range_flags == expected_flags)\n\n    # test points scale\n    base_points.scale(1.2)\n    expected_tensor = torch.tensor([[\n        9.0722e+00, 4.7368e+01, -2.5382e+00, 6.6660e-01, 1.9560e-01,\n        4.9740e-01, 9.4090e-01\n    ],\n                                    [\n                                        -2.6685e+01, 1.4790e+01, -8.0455e+00,\n                                        1.5020e-01, 3.7070e-01, 1.0860e-01,\n                                        6.2970e-01\n                                    ],\n                                    [\n                                        6.8547e+00, 4.2251e+01, -2.5955e+00,\n                                        6.5650e-01, 6.2480e-01, 6.9540e-01,\n                                        2.5380e-01\n                                    ],\n                                    [\n                                        -3.3628e+01, 1.1234e+01, -8.2176e+00,\n                                        2.8030e-01, 2.5800e-02, 4.8960e-01,\n                                        3.2690e-01\n                                    ]])\n    assert torch.allclose(expected_tensor, base_points.tensor, 1e-3)\n\n    # test get_item\n    expected_tensor = torch.tensor(\n        [[-26.6848, 14.7898, -8.0455, 0.1502, 0.3707, 0.1086, 0.6297]])\n    assert torch.allclose(expected_tensor, base_points[1].tensor, 1e-4)\n    expected_tensor = torch.tensor(\n        [[-26.6848, 14.7898, -8.0455, 0.1502, 0.3707, 0.1086, 0.6297],\n         [6.8547, 42.2509, -2.5955, 0.6565, 0.6248, 0.6954, 0.2538]])\n    assert torch.allclose(expected_tensor, base_points[1:3].tensor, 1e-4)\n    mask = torch.tensor([True, False, True, False])\n    expected_tensor = torch.tensor(\n        [[9.0722, 47.3678, -2.5382, 0.6666, 0.1956, 0.4974, 0.9409],\n         [6.8547, 42.2509, -2.5955, 0.6565, 0.6248, 0.6954, 0.2538]])\n    assert torch.allclose(expected_tensor, base_points[mask].tensor, 1e-4)\n\n    # test length\n    assert len(base_points) == 4\n\n    # test repr\n    expected_repr = 'BasePoints(\\n    '\\\n        'tensor([[ 9.0722e+00,  4.7368e+01, -2.5382e+00,  '\\\n        '6.6660e-01,  1.9560e-01,\\n          4.9740e-01,  '\\\n        '9.4090e-01],\\n        '\\\n        '[-2.6685e+01,  1.4790e+01, -8.0455e+00,  1.5020e-01,  '\\\n        '3.7070e-01,\\n          '\\\n        '1.0860e-01,  6.2970e-01],\\n        '\\\n        '[ 6.8547e+00,  4.2251e+01, -2.5955e+00,  6.5650e-01,  '\\\n        '6.2480e-01,\\n          '\\\n        '6.9540e-01,  2.5380e-01],\\n        '\\\n        '[-3.3628e+01,  1.1234e+01, -8.2176e+00,  2.8030e-01,  '\\\n        '2.5800e-02,\\n          '\\\n        '4.8960e-01,  3.2690e-01]]))'\n    assert expected_repr == str(base_points)\n\n    # test concatenate\n    base_points_clone = base_points.clone()\n    cat_points = BasePoints.cat([base_points, base_points_clone])\n    assert torch.allclose(cat_points.tensor[:len(base_points)],\n                          base_points.tensor)\n\n    # test iteration\n    for i, point in enumerate(base_points):\n        assert torch.allclose(point, base_points.tensor[i])\n\n    # test new_point\n    new_points = base_points.new_point([[1, 2, 3, 4, 5, 6, 7]])\n    assert torch.allclose(\n        new_points.tensor,\n        torch.tensor([[1, 2, 3, 4, 5, 6, 7]], dtype=base_points.tensor.dtype))\n\n\ndef test_cam_points():\n    # test empty initialization\n    empty_boxes = []\n    points = CameraPoints(empty_boxes)\n    assert points.tensor.shape[0] == 0\n    assert points.tensor.shape[1] == 3\n\n    # Test init with origin\n    points_np = np.array([[-5.24223238e+00, 4.00209696e+01, 2.97570381e-01],\n                          [-2.66751588e+01, 5.59499564e+00, -9.14345860e-01],\n                          [-5.80979675e+00, 3.54092357e+01, 2.00889888e-01],\n                          [-3.13086877e+01, 1.09007628e+00, -1.94612112e-01]],\n                         dtype=np.float32)\n    cam_points = CameraPoints(points_np, points_dim=3)\n    assert cam_points.tensor.shape[0] == 4\n\n    # Test init with color and height\n    points_np = np.array([[\n        -5.24223238e+00, 4.00209696e+01, 2.97570381e-01, 0.6666, 0.1956,\n        0.4974, 0.9409\n    ],\n                          [\n                              -2.66751588e+01, 5.59499564e+00, -9.14345860e-01,\n                              0.1502, 0.3707, 0.1086, 0.6297\n                          ],\n                          [\n                              -5.80979675e+00, 3.54092357e+01, 2.00889888e-01,\n                              0.6565, 0.6248, 0.6954, 0.2538\n                          ],\n                          [\n                              -3.13086877e+01, 1.09007628e+00, -1.94612112e-01,\n                              0.2803, 0.0258, 0.4896, 0.3269\n                          ]],\n                         dtype=np.float32)\n    cam_points = CameraPoints(\n        points_np,\n        points_dim=7,\n        attribute_dims=dict(color=[3, 4, 5], height=6))\n    expected_tensor = torch.tensor([[\n        -5.24223238e+00, 4.00209696e+01, 2.97570381e-01, 0.6666, 0.1956,\n        0.4974, 0.9409\n    ],\n                                    [\n                                        -2.66751588e+01, 5.59499564e+00,\n                                        -9.14345860e-01, 0.1502, 0.3707,\n                                        0.1086, 0.6297\n                                    ],\n                                    [\n                                        -5.80979675e+00, 3.54092357e+01,\n                                        2.00889888e-01, 0.6565, 0.6248, 0.6954,\n                                        0.2538\n                                    ],\n                                    [\n                                        -3.13086877e+01, 1.09007628e+00,\n                                        -1.94612112e-01, 0.2803, 0.0258,\n                                        0.4896, 0.3269\n                                    ]])\n\n    assert torch.allclose(expected_tensor, cam_points.tensor)\n    assert torch.allclose(expected_tensor[:, :3], cam_points.coord)\n    assert torch.allclose(expected_tensor[:, 3:6], cam_points.color)\n    assert torch.allclose(expected_tensor[:, 6], cam_points.height)\n\n    # test points clone\n    new_cam_points = cam_points.clone()\n    assert torch.allclose(new_cam_points.tensor, cam_points.tensor)\n\n    # test points shuffle\n    new_cam_points.shuffle()\n    assert new_cam_points.tensor.shape == torch.Size([4, 7])\n\n    # test points rotation\n    rot_mat = torch.tensor([[0.93629336, -0.27509585, 0.21835066],\n                            [0.28962948, 0.95642509, -0.03695701],\n                            [-0.19866933, 0.0978434, 0.97517033]])\n    cam_points.rotate(rot_mat)\n    expected_tensor = torch.tensor([[\n        6.6239e+00, 3.9748e+01, -2.3335e+00, 6.6660e-01, 1.9560e-01,\n        4.9740e-01, 9.4090e-01\n    ],\n                                    [\n                                        -2.3174e+01, 1.2600e+01, -6.9230e+00,\n                                        1.5020e-01, 3.7070e-01, 1.0860e-01,\n                                        6.2970e-01\n                                    ],\n                                    [\n                                        4.7760e+00, 3.5484e+01, -2.3813e+00,\n                                        6.5650e-01, 6.2480e-01, 6.9540e-01,\n                                        2.5380e-01\n                                    ],\n                                    [\n                                        -2.8960e+01, 9.6364e+00, -7.0663e+00,\n                                        2.8030e-01, 2.5800e-02, 4.8960e-01,\n                                        3.2690e-01\n                                    ]])\n    assert torch.allclose(expected_tensor, cam_points.tensor, 1e-3)\n\n    new_cam_points = cam_points.clone()\n    new_cam_points.rotate(0.1, axis=2)\n    expected_tensor = torch.tensor([[\n        2.6226e+00, 4.0211e+01, -2.3335e+00, 6.6660e-01, 1.9560e-01,\n        4.9740e-01, 9.4090e-01\n    ],\n                                    [\n                                        -2.4316e+01, 1.0224e+01, -6.9230e+00,\n                                        1.5020e-01, 3.7070e-01, 1.0860e-01,\n                                        6.2970e-01\n                                    ],\n                                    [\n                                        1.2096e+00, 3.5784e+01, -2.3813e+00,\n                                        6.5650e-01, 6.2480e-01, 6.9540e-01,\n                                        2.5380e-01\n                                    ],\n                                    [\n                                        -2.9777e+01, 6.6971e+00, -7.0663e+00,\n                                        2.8030e-01, 2.5800e-02, 4.8960e-01,\n                                        3.2690e-01\n                                    ]])\n    assert torch.allclose(expected_tensor, new_cam_points.tensor, 1e-3)\n\n    # test points translation\n    translation_vector = torch.tensor([0.93629336, -0.27509585, 0.21835066])\n    cam_points.translate(translation_vector)\n    expected_tensor = torch.tensor([[\n        7.5602e+00, 3.9473e+01, -2.1152e+00, 6.6660e-01, 1.9560e-01,\n        4.9740e-01, 9.4090e-01\n    ],\n                                    [\n                                        -2.2237e+01, 1.2325e+01, -6.7046e+00,\n                                        1.5020e-01, 3.7070e-01, 1.0860e-01,\n                                        6.2970e-01\n                                    ],\n                                    [\n                                        5.7123e+00, 3.5209e+01, -2.1629e+00,\n                                        6.5650e-01, 6.2480e-01, 6.9540e-01,\n                                        2.5380e-01\n                                    ],\n                                    [\n                                        -2.8023e+01, 9.3613e+00, -6.8480e+00,\n                                        2.8030e-01, 2.5800e-02, 4.8960e-01,\n                                        3.2690e-01\n                                    ]])\n    assert torch.allclose(expected_tensor, cam_points.tensor, 1e-4)\n\n    # test points filter\n    point_range = [-10, -40, -10, 10, 40, 10]\n    in_range_flags = cam_points.in_range_3d(point_range)\n    expected_flags = torch.tensor([True, False, True, False])\n    assert torch.all(in_range_flags == expected_flags)\n\n    # test points scale\n    cam_points.scale(1.2)\n    expected_tensor = torch.tensor([[\n        9.0722e+00, 4.7368e+01, -2.5382e+00, 6.6660e-01, 1.9560e-01,\n        4.9740e-01, 9.4090e-01\n    ],\n                                    [\n                                        -2.6685e+01, 1.4790e+01, -8.0455e+00,\n                                        1.5020e-01, 3.7070e-01, 1.0860e-01,\n                                        6.2970e-01\n                                    ],\n                                    [\n                                        6.8547e+00, 4.2251e+01, -2.5955e+00,\n                                        6.5650e-01, 6.2480e-01, 6.9540e-01,\n                                        2.5380e-01\n                                    ],\n                                    [\n                                        -3.3628e+01, 1.1234e+01, -8.2176e+00,\n                                        2.8030e-01, 2.5800e-02, 4.8960e-01,\n                                        3.2690e-01\n                                    ]])\n    assert torch.allclose(expected_tensor, cam_points.tensor, 1e-3)\n\n    # test get_item\n    expected_tensor = torch.tensor(\n        [[-26.6848, 14.7898, -8.0455, 0.1502, 0.3707, 0.1086, 0.6297]])\n    assert torch.allclose(expected_tensor, cam_points[1].tensor, 1e-4)\n    expected_tensor = torch.tensor(\n        [[-26.6848, 14.7898, -8.0455, 0.1502, 0.3707, 0.1086, 0.6297],\n         [6.8547, 42.2509, -2.5955, 0.6565, 0.6248, 0.6954, 0.2538]])\n    assert torch.allclose(expected_tensor, cam_points[1:3].tensor, 1e-4)\n    mask = torch.tensor([True, False, True, False])\n    expected_tensor = torch.tensor(\n        [[9.0722, 47.3678, -2.5382, 0.6666, 0.1956, 0.4974, 0.9409],\n         [6.8547, 42.2509, -2.5955, 0.6565, 0.6248, 0.6954, 0.2538]])\n    assert torch.allclose(expected_tensor, cam_points[mask].tensor, 1e-4)\n\n    # test length\n    assert len(cam_points) == 4\n\n    # test repr\n    expected_repr = 'CameraPoints(\\n    '\\\n        'tensor([[ 9.0722e+00,  4.7368e+01, -2.5382e+00,  '\\\n        '6.6660e-01,  1.9560e-01,\\n          4.9740e-01,  '\\\n        '9.4090e-01],\\n        '\\\n        '[-2.6685e+01,  1.4790e+01, -8.0455e+00,  1.5020e-01,  '\\\n        '3.7070e-01,\\n          '\\\n        '1.0860e-01,  6.2970e-01],\\n        '\\\n        '[ 6.8547e+00,  4.2251e+01, -2.5955e+00,  6.5650e-01,  '\\\n        '6.2480e-01,\\n          '\\\n        '6.9540e-01,  2.5380e-01],\\n        '\\\n        '[-3.3628e+01,  1.1234e+01, -8.2176e+00,  2.8030e-01,  '\\\n        '2.5800e-02,\\n          '\\\n        '4.8960e-01,  3.2690e-01]]))'\n    assert expected_repr == str(cam_points)\n\n    # test concatenate\n    cam_points_clone = cam_points.clone()\n    cat_points = CameraPoints.cat([cam_points, cam_points_clone])\n    assert torch.allclose(cat_points.tensor[:len(cam_points)],\n                          cam_points.tensor)\n\n    # test iteration\n    for i, point in enumerate(cam_points):\n        assert torch.allclose(point, cam_points.tensor[i])\n\n    # test new_point\n    new_points = cam_points.new_point([[1, 2, 3, 4, 5, 6, 7]])\n    assert torch.allclose(\n        new_points.tensor,\n        torch.tensor([[1, 2, 3, 4, 5, 6, 7]], dtype=cam_points.tensor.dtype))\n\n    # test in_range_bev\n    point_bev_range = [-10, -10, 10, 10]\n    in_range_flags = cam_points.in_range_bev(point_bev_range)\n    expected_flags = torch.tensor([True, False, True, False])\n    assert torch.all(in_range_flags == expected_flags)\n\n    # test flip\n    cam_points.flip(bev_direction='horizontal')\n    expected_tensor = torch.tensor([[\n        -9.0722e+00, 4.7368e+01, -2.5382e+00, 6.6660e-01, 1.9560e-01,\n        4.9740e-01, 9.4090e-01\n    ],\n                                    [\n                                        2.6685e+01, 1.4790e+01, -8.0455e+00,\n                                        1.5020e-01, 3.7070e-01, 1.0860e-01,\n                                        6.2970e-01\n                                    ],\n                                    [\n                                        -6.8547e+00, 4.2251e+01, -2.5955e+00,\n                                        6.5650e-01, 6.2480e-01, 6.9540e-01,\n                                        2.5380e-01\n                                    ],\n                                    [\n                                        3.3628e+01, 1.1234e+01, -8.2176e+00,\n                                        2.8030e-01, 2.5800e-02, 4.8960e-01,\n                                        3.2690e-01\n                                    ]])\n    assert torch.allclose(expected_tensor, cam_points.tensor, 1e-4)\n\n    cam_points.flip(bev_direction='vertical')\n    expected_tensor = torch.tensor([[\n        -9.0722e+00, 4.7368e+01, 2.5382e+00, 6.6660e-01, 1.9560e-01,\n        4.9740e-01, 9.4090e-01\n    ],\n                                    [\n                                        2.6685e+01, 1.4790e+01, 8.0455e+00,\n                                        1.5020e-01, 3.7070e-01, 1.0860e-01,\n                                        6.2970e-01\n                                    ],\n                                    [\n                                        -6.8547e+00, 4.2251e+01, 2.5955e+00,\n                                        6.5650e-01, 6.2480e-01, 6.9540e-01,\n                                        2.5380e-01\n                                    ],\n                                    [\n                                        3.3628e+01, 1.1234e+01, 8.2176e+00,\n                                        2.8030e-01, 2.5800e-02, 4.8960e-01,\n                                        3.2690e-01\n                                    ]])\n    assert torch.allclose(expected_tensor, cam_points.tensor, 1e-4)\n\n\ndef test_lidar_points():\n    # test empty initialization\n    empty_boxes = []\n    points = LiDARPoints(empty_boxes)\n    assert points.tensor.shape[0] == 0\n    assert points.tensor.shape[1] == 3\n\n    # Test init with origin\n    points_np = np.array([[-5.24223238e+00, 4.00209696e+01, 2.97570381e-01],\n                          [-2.66751588e+01, 5.59499564e+00, -9.14345860e-01],\n                          [-5.80979675e+00, 3.54092357e+01, 2.00889888e-01],\n                          [-3.13086877e+01, 1.09007628e+00, -1.94612112e-01]],\n                         dtype=np.float32)\n    lidar_points = LiDARPoints(points_np, points_dim=3)\n    assert lidar_points.tensor.shape[0] == 4\n\n    # Test init with color and height\n    points_np = np.array([[\n        -5.24223238e+00, 4.00209696e+01, 2.97570381e-01, 0.6666, 0.1956,\n        0.4974, 0.9409\n    ],\n                          [\n                              -2.66751588e+01, 5.59499564e+00, -9.14345860e-01,\n                              0.1502, 0.3707, 0.1086, 0.6297\n                          ],\n                          [\n                              -5.80979675e+00, 3.54092357e+01, 2.00889888e-01,\n                              0.6565, 0.6248, 0.6954, 0.2538\n                          ],\n                          [\n                              -3.13086877e+01, 1.09007628e+00, -1.94612112e-01,\n                              0.2803, 0.0258, 0.4896, 0.3269\n                          ]],\n                         dtype=np.float32)\n    lidar_points = LiDARPoints(\n        points_np,\n        points_dim=7,\n        attribute_dims=dict(color=[3, 4, 5], height=6))\n    expected_tensor = torch.tensor([[\n        -5.24223238e+00, 4.00209696e+01, 2.97570381e-01, 0.6666, 0.1956,\n        0.4974, 0.9409\n    ],\n                                    [\n                                        -2.66751588e+01, 5.59499564e+00,\n                                        -9.14345860e-01, 0.1502, 0.3707,\n                                        0.1086, 0.6297\n                                    ],\n                                    [\n                                        -5.80979675e+00, 3.54092357e+01,\n                                        2.00889888e-01, 0.6565, 0.6248, 0.6954,\n                                        0.2538\n                                    ],\n                                    [\n                                        -3.13086877e+01, 1.09007628e+00,\n                                        -1.94612112e-01, 0.2803, 0.0258,\n                                        0.4896, 0.3269\n                                    ]])\n\n    assert torch.allclose(expected_tensor, lidar_points.tensor)\n    assert torch.allclose(expected_tensor[:, :3], lidar_points.coord)\n    assert torch.allclose(expected_tensor[:, 3:6], lidar_points.color)\n    assert torch.allclose(expected_tensor[:, 6], lidar_points.height)\n\n    # test points clone\n    new_lidar_points = lidar_points.clone()\n    assert torch.allclose(new_lidar_points.tensor, lidar_points.tensor)\n\n    # test points shuffle\n    new_lidar_points.shuffle()\n    assert new_lidar_points.tensor.shape == torch.Size([4, 7])\n\n    # test points rotation\n    rot_mat = torch.tensor([[0.93629336, -0.27509585, 0.21835066],\n                            [0.28962948, 0.95642509, -0.03695701],\n                            [-0.19866933, 0.0978434, 0.97517033]])\n    lidar_points.rotate(rot_mat)\n    expected_tensor = torch.tensor([[\n        6.6239e+00, 3.9748e+01, -2.3335e+00, 6.6660e-01, 1.9560e-01,\n        4.9740e-01, 9.4090e-01\n    ],\n                                    [\n                                        -2.3174e+01, 1.2600e+01, -6.9230e+00,\n                                        1.5020e-01, 3.7070e-01, 1.0860e-01,\n                                        6.2970e-01\n                                    ],\n                                    [\n                                        4.7760e+00, 3.5484e+01, -2.3813e+00,\n                                        6.5650e-01, 6.2480e-01, 6.9540e-01,\n                                        2.5380e-01\n                                    ],\n                                    [\n                                        -2.8960e+01, 9.6364e+00, -7.0663e+00,\n                                        2.8030e-01, 2.5800e-02, 4.8960e-01,\n                                        3.2690e-01\n                                    ]])\n    assert torch.allclose(expected_tensor, lidar_points.tensor, 1e-3)\n\n    new_lidar_points = lidar_points.clone()\n    new_lidar_points.rotate(0.1, axis=2)\n    expected_tensor = torch.tensor([[\n        2.6226e+00, 4.0211e+01, -2.3335e+00, 6.6660e-01, 1.9560e-01,\n        4.9740e-01, 9.4090e-01\n    ],\n                                    [\n                                        -2.4316e+01, 1.0224e+01, -6.9230e+00,\n                                        1.5020e-01, 3.7070e-01, 1.0860e-01,\n                                        6.2970e-01\n                                    ],\n                                    [\n                                        1.2096e+00, 3.5784e+01, -2.3813e+00,\n                                        6.5650e-01, 6.2480e-01, 6.9540e-01,\n                                        2.5380e-01\n                                    ],\n                                    [\n                                        -2.9777e+01, 6.6971e+00, -7.0663e+00,\n                                        2.8030e-01, 2.5800e-02, 4.8960e-01,\n                                        3.2690e-01\n                                    ]])\n    assert torch.allclose(expected_tensor, new_lidar_points.tensor, 1e-3)\n\n    # test points translation\n    translation_vector = torch.tensor([0.93629336, -0.27509585, 0.21835066])\n    lidar_points.translate(translation_vector)\n    expected_tensor = torch.tensor([[\n        7.5602e+00, 3.9473e+01, -2.1152e+00, 6.6660e-01, 1.9560e-01,\n        4.9740e-01, 9.4090e-01\n    ],\n                                    [\n                                        -2.2237e+01, 1.2325e+01, -6.7046e+00,\n                                        1.5020e-01, 3.7070e-01, 1.0860e-01,\n                                        6.2970e-01\n                                    ],\n                                    [\n                                        5.7123e+00, 3.5209e+01, -2.1629e+00,\n                                        6.5650e-01, 6.2480e-01, 6.9540e-01,\n                                        2.5380e-01\n                                    ],\n                                    [\n                                        -2.8023e+01, 9.3613e+00, -6.8480e+00,\n                                        2.8030e-01, 2.5800e-02, 4.8960e-01,\n                                        3.2690e-01\n                                    ]])\n    assert torch.allclose(expected_tensor, lidar_points.tensor, 1e-4)\n\n    # test points filter\n    point_range = [-10, -40, -10, 10, 40, 10]\n    in_range_flags = lidar_points.in_range_3d(point_range)\n    expected_flags = torch.tensor([True, False, True, False])\n    assert torch.all(in_range_flags == expected_flags)\n\n    # test points scale\n    lidar_points.scale(1.2)\n    expected_tensor = torch.tensor([[\n        9.0722e+00, 4.7368e+01, -2.5382e+00, 6.6660e-01, 1.9560e-01,\n        4.9740e-01, 9.4090e-01\n    ],\n                                    [\n                                        -2.6685e+01, 1.4790e+01, -8.0455e+00,\n                                        1.5020e-01, 3.7070e-01, 1.0860e-01,\n                                        6.2970e-01\n                                    ],\n                                    [\n                                        6.8547e+00, 4.2251e+01, -2.5955e+00,\n                                        6.5650e-01, 6.2480e-01, 6.9540e-01,\n                                        2.5380e-01\n                                    ],\n                                    [\n                                        -3.3628e+01, 1.1234e+01, -8.2176e+00,\n                                        2.8030e-01, 2.5800e-02, 4.8960e-01,\n                                        3.2690e-01\n                                    ]])\n    assert torch.allclose(expected_tensor, lidar_points.tensor, 1e-3)\n\n    # test get_item\n    expected_tensor = torch.tensor(\n        [[-26.6848, 14.7898, -8.0455, 0.1502, 0.3707, 0.1086, 0.6297]])\n    assert torch.allclose(expected_tensor, lidar_points[1].tensor, 1e-4)\n    expected_tensor = torch.tensor(\n        [[-26.6848, 14.7898, -8.0455, 0.1502, 0.3707, 0.1086, 0.6297],\n         [6.8547, 42.2509, -2.5955, 0.6565, 0.6248, 0.6954, 0.2538]])\n    assert torch.allclose(expected_tensor, lidar_points[1:3].tensor, 1e-4)\n    mask = torch.tensor([True, False, True, False])\n    expected_tensor = torch.tensor(\n        [[9.0722, 47.3678, -2.5382, 0.6666, 0.1956, 0.4974, 0.9409],\n         [6.8547, 42.2509, -2.5955, 0.6565, 0.6248, 0.6954, 0.2538]])\n    assert torch.allclose(expected_tensor, lidar_points[mask].tensor, 1e-4)\n\n    # test length\n    assert len(lidar_points) == 4\n\n    # test repr\n    expected_repr = 'LiDARPoints(\\n    '\\\n        'tensor([[ 9.0722e+00,  4.7368e+01, -2.5382e+00,  '\\\n        '6.6660e-01,  1.9560e-01,\\n          4.9740e-01,  '\\\n        '9.4090e-01],\\n        '\\\n        '[-2.6685e+01,  1.4790e+01, -8.0455e+00,  1.5020e-01,  '\\\n        '3.7070e-01,\\n          '\\\n        '1.0860e-01,  6.2970e-01],\\n        '\\\n        '[ 6.8547e+00,  4.2251e+01, -2.5955e+00,  6.5650e-01,  '\\\n        '6.2480e-01,\\n          '\\\n        '6.9540e-01,  2.5380e-01],\\n        '\\\n        '[-3.3628e+01,  1.1234e+01, -8.2176e+00,  2.8030e-01,  '\\\n        '2.5800e-02,\\n          '\\\n        '4.8960e-01,  3.2690e-01]]))'\n    assert expected_repr == str(lidar_points)\n\n    # test concatenate\n    lidar_points_clone = lidar_points.clone()\n    cat_points = LiDARPoints.cat([lidar_points, lidar_points_clone])\n    assert torch.allclose(cat_points.tensor[:len(lidar_points)],\n                          lidar_points.tensor)\n\n    # test iteration\n    for i, point in enumerate(lidar_points):\n        assert torch.allclose(point, lidar_points.tensor[i])\n\n    # test new_point\n    new_points = lidar_points.new_point([[1, 2, 3, 4, 5, 6, 7]])\n    assert torch.allclose(\n        new_points.tensor,\n        torch.tensor([[1, 2, 3, 4, 5, 6, 7]], dtype=lidar_points.tensor.dtype))\n\n    # test in_range_bev\n    point_bev_range = [-30, -40, 30, 40]\n    in_range_flags = lidar_points.in_range_bev(point_bev_range)\n    expected_flags = torch.tensor([False, True, False, False])\n    assert torch.all(in_range_flags == expected_flags)\n\n    # test flip\n    lidar_points.flip(bev_direction='horizontal')\n    expected_tensor = torch.tensor([[\n        9.0722e+00, -4.7368e+01, -2.5382e+00, 6.6660e-01, 1.9560e-01,\n        4.9740e-01, 9.4090e-01\n    ],\n                                    [\n                                        -2.6685e+01, -1.4790e+01, -8.0455e+00,\n                                        1.5020e-01, 3.7070e-01, 1.0860e-01,\n                                        6.2970e-01\n                                    ],\n                                    [\n                                        6.8547e+00, -4.2251e+01, -2.5955e+00,\n                                        6.5650e-01, 6.2480e-01, 6.9540e-01,\n                                        2.5380e-01\n                                    ],\n                                    [\n                                        -3.3628e+01, -1.1234e+01, -8.2176e+00,\n                                        2.8030e-01, 2.5800e-02, 4.8960e-01,\n                                        3.2690e-01\n                                    ]])\n    assert torch.allclose(expected_tensor, lidar_points.tensor, 1e-4)\n\n    lidar_points.flip(bev_direction='vertical')\n    expected_tensor = torch.tensor([[\n        -9.0722e+00, -4.7368e+01, -2.5382e+00, 6.6660e-01, 1.9560e-01,\n        4.9740e-01, 9.4090e-01\n    ],\n                                    [\n                                        2.6685e+01, -1.4790e+01, -8.0455e+00,\n                                        1.5020e-01, 3.7070e-01, 1.0860e-01,\n                                        6.2970e-01\n                                    ],\n                                    [\n                                        -6.8547e+00, -4.2251e+01, -2.5955e+00,\n                                        6.5650e-01, 6.2480e-01, 6.9540e-01,\n                                        2.5380e-01\n                                    ],\n                                    [\n                                        3.3628e+01, -1.1234e+01, -8.2176e+00,\n                                        2.8030e-01, 2.5800e-02, 4.8960e-01,\n                                        3.2690e-01\n                                    ]])\n    assert torch.allclose(expected_tensor, lidar_points.tensor, 1e-4)\n\n\ndef test_depth_points():\n    # test empty initialization\n    empty_boxes = []\n    points = DepthPoints(empty_boxes)\n    assert points.tensor.shape[0] == 0\n    assert points.tensor.shape[1] == 3\n\n    # Test init with origin\n    points_np = np.array([[-5.24223238e+00, 4.00209696e+01, 2.97570381e-01],\n                          [-2.66751588e+01, 5.59499564e+00, -9.14345860e-01],\n                          [-5.80979675e+00, 3.54092357e+01, 2.00889888e-01],\n                          [-3.13086877e+01, 1.09007628e+00, -1.94612112e-01]],\n                         dtype=np.float32)\n    depth_points = DepthPoints(points_np, points_dim=3)\n    assert depth_points.tensor.shape[0] == 4\n\n    # Test init with color and height\n    points_np = np.array([[\n        -5.24223238e+00, 4.00209696e+01, 2.97570381e-01, 0.6666, 0.1956,\n        0.4974, 0.9409\n    ],\n                          [\n                              -2.66751588e+01, 5.59499564e+00, -9.14345860e-01,\n                              0.1502, 0.3707, 0.1086, 0.6297\n                          ],\n                          [\n                              -5.80979675e+00, 3.54092357e+01, 2.00889888e-01,\n                              0.6565, 0.6248, 0.6954, 0.2538\n                          ],\n                          [\n                              -3.13086877e+01, 1.09007628e+00, -1.94612112e-01,\n                              0.2803, 0.0258, 0.4896, 0.3269\n                          ]],\n                         dtype=np.float32)\n    depth_points = DepthPoints(\n        points_np,\n        points_dim=7,\n        attribute_dims=dict(color=[3, 4, 5], height=6))\n    expected_tensor = torch.tensor([[\n        -5.24223238e+00, 4.00209696e+01, 2.97570381e-01, 0.6666, 0.1956,\n        0.4974, 0.9409\n    ],\n                                    [\n                                        -2.66751588e+01, 5.59499564e+00,\n                                        -9.14345860e-01, 0.1502, 0.3707,\n                                        0.1086, 0.6297\n                                    ],\n                                    [\n                                        -5.80979675e+00, 3.54092357e+01,\n                                        2.00889888e-01, 0.6565, 0.6248, 0.6954,\n                                        0.2538\n                                    ],\n                                    [\n                                        -3.13086877e+01, 1.09007628e+00,\n                                        -1.94612112e-01, 0.2803, 0.0258,\n                                        0.4896, 0.3269\n                                    ]])\n\n    assert torch.allclose(expected_tensor, depth_points.tensor)\n    assert torch.allclose(expected_tensor[:, :3], depth_points.coord)\n    assert torch.allclose(expected_tensor[:, 3:6], depth_points.color)\n    assert torch.allclose(expected_tensor[:, 6], depth_points.height)\n\n    # test points clone\n    new_depth_points = depth_points.clone()\n    assert torch.allclose(new_depth_points.tensor, depth_points.tensor)\n\n    # test points shuffle\n    new_depth_points.shuffle()\n    assert new_depth_points.tensor.shape == torch.Size([4, 7])\n\n    # test points rotation\n    rot_mat = torch.tensor([[0.93629336, -0.27509585, 0.21835066],\n                            [0.28962948, 0.95642509, -0.03695701],\n                            [-0.19866933, 0.0978434, 0.97517033]])\n    depth_points.rotate(rot_mat)\n    expected_tensor = torch.tensor([[\n        6.6239e+00, 3.9748e+01, -2.3335e+00, 6.6660e-01, 1.9560e-01,\n        4.9740e-01, 9.4090e-01\n    ],\n                                    [\n                                        -2.3174e+01, 1.2600e+01, -6.9230e+00,\n                                        1.5020e-01, 3.7070e-01, 1.0860e-01,\n                                        6.2970e-01\n                                    ],\n                                    [\n                                        4.7760e+00, 3.5484e+01, -2.3813e+00,\n                                        6.5650e-01, 6.2480e-01, 6.9540e-01,\n                                        2.5380e-01\n                                    ],\n                                    [\n                                        -2.8960e+01, 9.6364e+00, -7.0663e+00,\n                                        2.8030e-01, 2.5800e-02, 4.8960e-01,\n                                        3.2690e-01\n                                    ]])\n    assert torch.allclose(expected_tensor, depth_points.tensor, 1e-3)\n\n    new_depth_points = depth_points.clone()\n    new_depth_points.rotate(0.1, axis=2)\n    expected_tensor = torch.tensor([[\n        2.6226e+00, 4.0211e+01, -2.3335e+00, 6.6660e-01, 1.9560e-01,\n        4.9740e-01, 9.4090e-01\n    ],\n                                    [\n                                        -2.4316e+01, 1.0224e+01, -6.9230e+00,\n                                        1.5020e-01, 3.7070e-01, 1.0860e-01,\n                                        6.2970e-01\n                                    ],\n                                    [\n                                        1.2096e+00, 3.5784e+01, -2.3813e+00,\n                                        6.5650e-01, 6.2480e-01, 6.9540e-01,\n                                        2.5380e-01\n                                    ],\n                                    [\n                                        -2.9777e+01, 6.6971e+00, -7.0663e+00,\n                                        2.8030e-01, 2.5800e-02, 4.8960e-01,\n                                        3.2690e-01\n                                    ]])\n    assert torch.allclose(expected_tensor, new_depth_points.tensor, 1e-3)\n\n    # test points translation\n    translation_vector = torch.tensor([0.93629336, -0.27509585, 0.21835066])\n    depth_points.translate(translation_vector)\n    expected_tensor = torch.tensor([[\n        7.5602e+00, 3.9473e+01, -2.1152e+00, 6.6660e-01, 1.9560e-01,\n        4.9740e-01, 9.4090e-01\n    ],\n                                    [\n                                        -2.2237e+01, 1.2325e+01, -6.7046e+00,\n                                        1.5020e-01, 3.7070e-01, 1.0860e-01,\n                                        6.2970e-01\n                                    ],\n                                    [\n                                        5.7123e+00, 3.5209e+01, -2.1629e+00,\n                                        6.5650e-01, 6.2480e-01, 6.9540e-01,\n                                        2.5380e-01\n                                    ],\n                                    [\n                                        -2.8023e+01, 9.3613e+00, -6.8480e+00,\n                                        2.8030e-01, 2.5800e-02, 4.8960e-01,\n                                        3.2690e-01\n                                    ]])\n    assert torch.allclose(expected_tensor, depth_points.tensor, 1e-4)\n\n    # test points filter\n    point_range = [-10, -40, -10, 10, 40, 10]\n    in_range_flags = depth_points.in_range_3d(point_range)\n    expected_flags = torch.tensor([True, False, True, False])\n    assert torch.all(in_range_flags == expected_flags)\n\n    # test points scale\n    depth_points.scale(1.2)\n    expected_tensor = torch.tensor([[\n        9.0722e+00, 4.7368e+01, -2.5382e+00, 6.6660e-01, 1.9560e-01,\n        4.9740e-01, 9.4090e-01\n    ],\n                                    [\n                                        -2.6685e+01, 1.4790e+01, -8.0455e+00,\n                                        1.5020e-01, 3.7070e-01, 1.0860e-01,\n                                        6.2970e-01\n                                    ],\n                                    [\n                                        6.8547e+00, 4.2251e+01, -2.5955e+00,\n                                        6.5650e-01, 6.2480e-01, 6.9540e-01,\n                                        2.5380e-01\n                                    ],\n                                    [\n                                        -3.3628e+01, 1.1234e+01, -8.2176e+00,\n                                        2.8030e-01, 2.5800e-02, 4.8960e-01,\n                                        3.2690e-01\n                                    ]])\n    assert torch.allclose(expected_tensor, depth_points.tensor, 1e-3)\n\n    # test get_item\n    expected_tensor = torch.tensor(\n        [[-26.6848, 14.7898, -8.0455, 0.1502, 0.3707, 0.1086, 0.6297]])\n    assert torch.allclose(expected_tensor, depth_points[1].tensor, 1e-4)\n    expected_tensor = torch.tensor(\n        [[-26.6848, 14.7898, -8.0455, 0.1502, 0.3707, 0.1086, 0.6297],\n         [6.8547, 42.2509, -2.5955, 0.6565, 0.6248, 0.6954, 0.2538]])\n    assert torch.allclose(expected_tensor, depth_points[1:3].tensor, 1e-4)\n    mask = torch.tensor([True, False, True, False])\n    expected_tensor = torch.tensor(\n        [[9.0722, 47.3678, -2.5382, 0.6666, 0.1956, 0.4974, 0.9409],\n         [6.8547, 42.2509, -2.5955, 0.6565, 0.6248, 0.6954, 0.2538]])\n    assert torch.allclose(expected_tensor, depth_points[mask].tensor, 1e-4)\n\n    # test length\n    assert len(depth_points) == 4\n\n    # test repr\n    expected_repr = 'DepthPoints(\\n    '\\\n        'tensor([[ 9.0722e+00,  4.7368e+01, -2.5382e+00,  '\\\n        '6.6660e-01,  1.9560e-01,\\n          4.9740e-01,  '\\\n        '9.4090e-01],\\n        '\\\n        '[-2.6685e+01,  1.4790e+01, -8.0455e+00,  1.5020e-01,  '\\\n        '3.7070e-01,\\n          '\\\n        '1.0860e-01,  6.2970e-01],\\n        '\\\n        '[ 6.8547e+00,  4.2251e+01, -2.5955e+00,  6.5650e-01,  '\\\n        '6.2480e-01,\\n          '\\\n        '6.9540e-01,  2.5380e-01],\\n        '\\\n        '[-3.3628e+01,  1.1234e+01, -8.2176e+00,  2.8030e-01,  '\\\n        '2.5800e-02,\\n          '\\\n        '4.8960e-01,  3.2690e-01]]))'\n    assert expected_repr == str(depth_points)\n\n    # test concatenate\n    depth_points_clone = depth_points.clone()\n    cat_points = DepthPoints.cat([depth_points, depth_points_clone])\n    assert torch.allclose(cat_points.tensor[:len(depth_points)],\n                          depth_points.tensor)\n\n    # test iteration\n    for i, point in enumerate(depth_points):\n        assert torch.allclose(point, depth_points.tensor[i])\n\n    # test new_point\n    new_points = depth_points.new_point([[1, 2, 3, 4, 5, 6, 7]])\n    assert torch.allclose(\n        new_points.tensor,\n        torch.tensor([[1, 2, 3, 4, 5, 6, 7]], dtype=depth_points.tensor.dtype))\n\n    # test in_range_bev\n    point_bev_range = [-30, -40, 30, 40]\n    in_range_flags = depth_points.in_range_bev(point_bev_range)\n    expected_flags = torch.tensor([False, True, False, False])\n    assert torch.all(in_range_flags == expected_flags)\n\n    # test flip\n    depth_points.flip(bev_direction='horizontal')\n    expected_tensor = torch.tensor([[\n        -9.0722e+00, 4.7368e+01, -2.5382e+00, 6.6660e-01, 1.9560e-01,\n        4.9740e-01, 9.4090e-01\n    ],\n                                    [\n                                        2.6685e+01, 1.4790e+01, -8.0455e+00,\n                                        1.5020e-01, 3.7070e-01, 1.0860e-01,\n                                        6.2970e-01\n                                    ],\n                                    [\n                                        -6.8547e+00, 4.2251e+01, -2.5955e+00,\n                                        6.5650e-01, 6.2480e-01, 6.9540e-01,\n                                        2.5380e-01\n                                    ],\n                                    [\n                                        3.3628e+01, 1.1234e+01, -8.2176e+00,\n                                        2.8030e-01, 2.5800e-02, 4.8960e-01,\n                                        3.2690e-01\n                                    ]])\n    assert torch.allclose(expected_tensor, depth_points.tensor, 1e-4)\n\n    depth_points.flip(bev_direction='vertical')\n    expected_tensor = torch.tensor([[\n        -9.0722e+00, -4.7368e+01, -2.5382e+00, 6.6660e-01, 1.9560e-01,\n        4.9740e-01, 9.4090e-01\n    ],\n                                    [\n                                        2.6685e+01, -1.4790e+01, -8.0455e+00,\n                                        1.5020e-01, 3.7070e-01, 1.0860e-01,\n                                        6.2970e-01\n                                    ],\n                                    [\n                                        -6.8547e+00, -4.2251e+01, -2.5955e+00,\n                                        6.5650e-01, 6.2480e-01, 6.9540e-01,\n                                        2.5380e-01\n                                    ],\n                                    [\n                                        3.3628e+01, -1.1234e+01, -8.2176e+00,\n                                        2.8030e-01, 2.5800e-02, 4.8960e-01,\n                                        3.2690e-01\n                                    ]])\n    assert torch.allclose(expected_tensor, depth_points.tensor, 1e-4)\n"
  },
  {
    "path": "tests/test_utils/test_samplers.py",
    "content": "import pytest\nimport torch\n\nfrom mmdet3d.core.bbox.assigners import MaxIoUAssigner\nfrom mmdet3d.core.bbox.samplers import IoUNegPiecewiseSampler\n\n\ndef test_iou_piecewise_sampler():\n    if not torch.cuda.is_available():\n        pytest.skip()\n    assigner = MaxIoUAssigner(\n        pos_iou_thr=0.55,\n        neg_iou_thr=0.55,\n        min_pos_iou=0.55,\n        ignore_iof_thr=-1,\n        iou_calculator=dict(type='BboxOverlaps3D', coordinate='lidar'))\n    bboxes = torch.tensor(\n        [[32, 32, 16, 8, 38, 42, -0.3], [32, 32, 16, 8, 38, 42, -0.3],\n         [32, 32, 16, 8, 38, 42, -0.3], [32, 32, 16, 8, 38, 42, -0.3],\n         [0, 0, 0, 10, 10, 10, 0.2], [10, 10, 10, 20, 20, 15, 0.6],\n         [5, 5, 5, 15, 15, 15, 0.7], [5, 5, 5, 15, 15, 15, 0.7],\n         [5, 5, 5, 15, 15, 15, 0.7], [32, 32, 16, 8, 38, 42, -0.3],\n         [32, 32, 16, 8, 38, 42, -0.3], [32, 32, 16, 8, 38, 42, -0.3]],\n        dtype=torch.float32).cuda()\n    gt_bboxes = torch.tensor(\n        [[0, 0, 0, 10, 10, 9, 0.2], [5, 10, 10, 20, 20, 15, 0.6]],\n        dtype=torch.float32).cuda()\n    gt_labels = torch.tensor([1, 1], dtype=torch.int64).cuda()\n    assign_result = assigner.assign(bboxes, gt_bboxes, gt_labels=gt_labels)\n\n    sampler = IoUNegPiecewiseSampler(\n        num=10,\n        pos_fraction=0.55,\n        neg_piece_fractions=[0.8, 0.2],\n        neg_iou_piece_thrs=[0.55, 0.1],\n        neg_pos_ub=-1,\n        add_gt_as_proposals=False)\n\n    sample_result = sampler.sample(assign_result, bboxes, gt_bboxes, gt_labels)\n\n    assert sample_result.pos_inds == 4\n    assert len(sample_result.pos_bboxes) == len(sample_result.pos_inds)\n    assert len(sample_result.neg_bboxes) == len(sample_result.neg_inds)\n"
  },
  {
    "path": "tests/test_utils/test_utils.py",
    "content": "import torch\n\nfrom mmdet3d.core import draw_heatmap_gaussian\n\n\ndef test_gaussian():\n    heatmap = torch.zeros((128, 128))\n    ct_int = torch.tensor([64, 64], dtype=torch.int32)\n    radius = 2\n    draw_heatmap_gaussian(heatmap, ct_int, radius)\n    assert torch.isclose(torch.sum(heatmap), torch.tensor(4.3505), atol=1e-3)\n"
  },
  {
    "path": "tools/analysis_tools/analyze_logs.py",
    "content": "import argparse\nimport json\nimport numpy as np\nimport seaborn as sns\nfrom collections import defaultdict\nfrom matplotlib import pyplot as plt\n\n\ndef cal_train_time(log_dicts, args):\n    for i, log_dict in enumerate(log_dicts):\n        print(f'{\"-\" * 5}Analyze train time of {args.json_logs[i]}{\"-\" * 5}')\n        all_times = []\n        for epoch in log_dict.keys():\n            if args.include_outliers:\n                all_times.append(log_dict[epoch]['time'])\n            else:\n                all_times.append(log_dict[epoch]['time'][1:])\n        all_times = np.array(all_times)\n        epoch_ave_time = all_times.mean(-1)\n        slowest_epoch = epoch_ave_time.argmax()\n        fastest_epoch = epoch_ave_time.argmin()\n        std_over_epoch = epoch_ave_time.std()\n        print(f'slowest epoch {slowest_epoch + 1}, '\n              f'average time is {epoch_ave_time[slowest_epoch]:.4f}')\n        print(f'fastest epoch {fastest_epoch + 1}, '\n              f'average time is {epoch_ave_time[fastest_epoch]:.4f}')\n        print(f'time std over epochs is {std_over_epoch:.4f}')\n        print(f'average iter time: {np.mean(all_times):.4f} s/iter')\n        print()\n\n\ndef plot_curve(log_dicts, args):\n    if args.backend is not None:\n        plt.switch_backend(args.backend)\n    sns.set_style(args.style)\n    # if legend is None, use {filename}_{key} as legend\n    legend = args.legend\n    if legend is None:\n        legend = []\n        for json_log in args.json_logs:\n            for metric in args.keys:\n                legend.append(f'{json_log}_{metric}')\n    assert len(legend) == (len(args.json_logs) * len(args.keys))\n    metrics = args.keys\n\n    num_metrics = len(metrics)\n    for i, log_dict in enumerate(log_dicts):\n        epochs = list(log_dict.keys())\n        for j, metric in enumerate(metrics):\n            print(f'plot curve of {args.json_logs[i]}, metric is {metric}')\n            if metric not in log_dict[epochs[args.interval - 1]]:\n                raise KeyError(\n                    f'{args.json_logs[i]} does not contain metric {metric}')\n\n            if args.mode == 'eval':\n                xs = np.arange(args.interval, max(epochs) + 1, args.interval)\n                ys = []\n                for epoch in epochs[args.interval - 1::args.interval]:\n                    ys += log_dict[epoch][metric]\n                ax = plt.gca()\n                ax.set_xticks(xs)\n                plt.xlabel('epoch')\n                plt.plot(xs, ys, label=legend[i * num_metrics + j], marker='o')\n            else:\n                xs = []\n                ys = []\n                num_iters_per_epoch = \\\n                    log_dict[epochs[args.interval-1]]['iter'][-1]\n                for epoch in epochs[args.interval - 1::args.interval]:\n                    iters = log_dict[epoch]['iter']\n                    if log_dict[epoch]['mode'][-1] == 'val':\n                        iters = iters[:-1]\n                    xs.append(\n                        np.array(iters) + (epoch - 1) * num_iters_per_epoch)\n                    ys.append(np.array(log_dict[epoch][metric][:len(iters)]))\n                xs = np.concatenate(xs)\n                ys = np.concatenate(ys)\n                plt.xlabel('iter')\n                plt.plot(\n                    xs, ys, label=legend[i * num_metrics + j], linewidth=0.5)\n            plt.legend()\n        if args.title is not None:\n            plt.title(args.title)\n    if args.out is None:\n        plt.show()\n    else:\n        print(f'save curve to: {args.out}')\n        plt.savefig(args.out)\n        plt.cla()\n\n\ndef add_plot_parser(subparsers):\n    parser_plt = subparsers.add_parser(\n        'plot_curve', help='parser for plotting curves')\n    parser_plt.add_argument(\n        'json_logs',\n        type=str,\n        nargs='+',\n        help='path of train log in json format')\n    parser_plt.add_argument(\n        '--keys',\n        type=str,\n        nargs='+',\n        default=['mAP_0.25'],\n        help='the metric that you want to plot')\n    parser_plt.add_argument('--title', type=str, help='title of figure')\n    parser_plt.add_argument(\n        '--legend',\n        type=str,\n        nargs='+',\n        default=None,\n        help='legend of each plot')\n    parser_plt.add_argument(\n        '--backend', type=str, default=None, help='backend of plt')\n    parser_plt.add_argument(\n        '--style', type=str, default='dark', help='style of plt')\n    parser_plt.add_argument('--out', type=str, default=None)\n    parser_plt.add_argument('--mode', type=str, default='train')\n    parser_plt.add_argument('--interval', type=int, default=1)\n\n\ndef add_time_parser(subparsers):\n    parser_time = subparsers.add_parser(\n        'cal_train_time',\n        help='parser for computing the average time per training iteration')\n    parser_time.add_argument(\n        'json_logs',\n        type=str,\n        nargs='+',\n        help='path of train log in json format')\n    parser_time.add_argument(\n        '--include-outliers',\n        action='store_true',\n        help='include the first value of every epoch when computing '\n        'the average time')\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='Analyze Json Log')\n    # currently only support plot curve and calculate average train time\n    subparsers = parser.add_subparsers(dest='task', help='task parser')\n    add_plot_parser(subparsers)\n    add_time_parser(subparsers)\n    args = parser.parse_args()\n    return args\n\n\ndef load_json_logs(json_logs):\n    # load and convert json_logs to log_dict, key is epoch, value is a sub dict\n    # keys of sub dict is different metrics, e.g. memory, bbox_mAP\n    # value of sub dict is a list of corresponding values of all iterations\n    log_dicts = [dict() for _ in json_logs]\n    for json_log, log_dict in zip(json_logs, log_dicts):\n        with open(json_log, 'r') as log_file:\n            for line in log_file:\n                log = json.loads(line.strip())\n                # skip lines without `epoch` field\n                if 'epoch' not in log:\n                    continue\n                epoch = log.pop('epoch')\n                if epoch not in log_dict:\n                    log_dict[epoch] = defaultdict(list)\n                for k, v in log.items():\n                    log_dict[epoch][k].append(v)\n    return log_dicts\n\n\ndef main():\n    args = parse_args()\n\n    json_logs = args.json_logs\n    for json_log in json_logs:\n        assert json_log.endswith('.json')\n\n    log_dicts = load_json_logs(json_logs)\n\n    eval(args.task)(log_dicts, args)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tools/analysis_tools/benchmark.py",
    "content": "import argparse\nimport time\nimport torch\nfrom mmcv import Config\nfrom mmcv.parallel import MMDataParallel\nfrom mmcv.runner import load_checkpoint\n\nfrom mmdet3d.datasets import build_dataloader, build_dataset\nfrom mmdet3d.models import build_detector\nfrom mmdet.core import wrap_fp16_model\nfrom tools.misc.fuse_conv_bn import fuse_module\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='MMDet benchmark a model')\n    parser.add_argument('config', help='test config file path')\n    parser.add_argument('checkpoint', help='checkpoint file')\n    parser.add_argument('--samples', default=2000, help='samples to benchmark')\n    parser.add_argument(\n        '--log-interval', default=50, help='interval of logging')\n    parser.add_argument(\n        '--fuse-conv-bn',\n        action='store_true',\n        help='Whether to fuse conv and bn, this will slightly increase'\n        'the inference speed')\n    args = parser.parse_args()\n    return args\n\n\ndef main():\n    args = parse_args()\n\n    cfg = Config.fromfile(args.config)\n    # set cudnn_benchmark\n    if cfg.get('cudnn_benchmark', False):\n        torch.backends.cudnn.benchmark = True\n    cfg.model.pretrained = None\n    cfg.data.test.test_mode = True\n\n    # build the dataloader\n    # TODO: support multiple images per gpu (only minor changes are needed)\n    dataset = build_dataset(cfg.data.test)\n    data_loader = build_dataloader(\n        dataset,\n        samples_per_gpu=1,\n        workers_per_gpu=cfg.data.workers_per_gpu,\n        dist=False,\n        shuffle=False)\n\n    # build the model and load checkpoint\n    cfg.model.train_cfg = None\n    model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))\n    fp16_cfg = cfg.get('fp16', None)\n    if fp16_cfg is not None:\n        wrap_fp16_model(model)\n    load_checkpoint(model, args.checkpoint, map_location='cpu')\n    if args.fuse_conv_bn:\n        model = fuse_module(model)\n\n    model = MMDataParallel(model, device_ids=[0])\n\n    model.eval()\n\n    # the first several iterations may be very slow so skip them\n    num_warmup = 5\n    pure_inf_time = 0\n\n    # benchmark with several samples and take the average\n    for i, data in enumerate(data_loader):\n\n        torch.cuda.synchronize()\n        start_time = time.perf_counter()\n\n        with torch.no_grad():\n            model(return_loss=False, rescale=True, **data)\n\n        torch.cuda.synchronize()\n        elapsed = time.perf_counter() - start_time\n\n        if i >= num_warmup:\n            pure_inf_time += elapsed\n            if (i + 1) % args.log_interval == 0:\n                fps = (i + 1 - num_warmup) / pure_inf_time\n                print(f'Done image [{i + 1:<3}/ {args.samples}], '\n                      f'fps: {fps:.1f} img / s')\n\n        if (i + 1) == args.samples:\n            pure_inf_time += elapsed\n            fps = (i + 1 - num_warmup) / pure_inf_time\n            print(f'Overall fps: {fps:.1f} img / s')\n            break\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tools/analysis_tools/get_flops.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\r\nimport argparse\r\n\r\nimport torch\r\nfrom mmcv import Config, DictAction\r\n\r\nfrom mmdet3d.models import build_detector\r\n\r\ntry:\r\n    from mmcv.cnn import get_model_complexity_info\r\nexcept ImportError:\r\n    raise ImportError('Please upgrade mmcv to >0.6.2')\r\n\r\n\r\ndef parse_args():\r\n    parser = argparse.ArgumentParser(description='Train a detector')\r\n    parser.add_argument('config', help='train config file path')\r\n    parser.add_argument(\r\n        '--shape',\r\n        type=int,\r\n        nargs='+',\r\n        default=[40000, 4],\r\n        help='input point cloud size')\r\n    parser.add_argument(\r\n        '--modality',\r\n        type=str,\r\n        default='point',\r\n        choices=['point', 'image', 'multi'],\r\n        help='input data modality')\r\n    parser.add_argument(\r\n        '--cfg-options',\r\n        nargs='+',\r\n        action=DictAction,\r\n        help='override some settings in the used config, the key-value pair '\r\n        'in xxx=yyy format will be merged into config file. If the value to '\r\n        'be overwritten is a list, it should be like key=\"[a,b]\" or key=a,b '\r\n        'It also allows nested list/tuple values, e.g. key=\"[(a,b),(c,d)]\" '\r\n        'Note that the quotation marks are necessary and that no white space '\r\n        'is allowed.')\r\n    args = parser.parse_args()\r\n    return args\r\n\r\n\r\ndef main():\r\n\r\n    args = parse_args()\r\n\r\n    if args.modality == 'point':\r\n        assert len(args.shape) == 2, 'invalid input shape'\r\n        input_shape = tuple(args.shape)\r\n    elif args.modality == 'image':\r\n        if len(args.shape) == 1:\r\n            input_shape = (3, args.shape[0], args.shape[0])\r\n        elif len(args.shape) == 2:\r\n            input_shape = (3, ) + tuple(args.shape)\r\n        else:\r\n            raise ValueError('invalid input shape')\r\n    elif args.modality == 'multi':\r\n        raise NotImplementedError(\r\n            'FLOPs counter is currently not supported for models with '\r\n            'multi-modality input')\r\n\r\n    cfg = Config.fromfile(args.config)\r\n    if args.cfg_options is not None:\r\n        cfg.merge_from_dict(args.cfg_options)\r\n\r\n    model = build_detector(\r\n        cfg.model,\r\n        train_cfg=cfg.get('train_cfg'),\r\n        test_cfg=cfg.get('test_cfg'))\r\n    if torch.cuda.is_available():\r\n        model.cuda()\r\n    model.eval()\r\n\r\n    if hasattr(model, 'forward_dummy'):\r\n        model.forward = model.forward_dummy\r\n    else:\r\n        raise NotImplementedError(\r\n            'FLOPs counter is currently not supported for {}'.format(\r\n                model.__class__.__name__))\r\n\r\n    flops, params = get_model_complexity_info(model, input_shape)\r\n    split_line = '=' * 30\r\n    print(f'{split_line}\\nInput shape: {input_shape}\\n'\r\n          f'Flops: {flops}\\nParams: {params}\\n{split_line}')\r\n    print('!!!Please be cautious if you use the results in papers. '\r\n          'You may need to check if all ops are supported and verify that the '\r\n          'flops computation is correct.')\r\n\r\n\r\nif __name__ == '__main__':\r\n    main()"
  },
  {
    "path": "tools/combine_view_info.py",
    "content": "import os\r\nimport pickle\r\nimport json\r\nimport numpy as np\r\nimport torch\r\nimport argparse\r\nfrom mmdet3d.core import Box3DMode, LiDARInstance3DBoxes\r\n\r\nfrom mmdet3d.core.bbox.structures.utils import limit_period\r\n\r\ncam_orders = ['CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_RIGHT', 'CAM_BACK', 'CAM_BACK_LEFT']\r\n\r\ndef project_to_image(points, cam_int, cam_ext, img_h, img_w):\r\n    num_points = points.shape[0]\r\n    pts_4d = np.concatenate([points[:, :3], np.ones((num_points, 1))], axis=-1)\r\n    pts_cam_4d = pts_4d @ cam_ext.T\r\n    pts_2d = pts_cam_4d @ cam_int.T\r\n    # cam_points is Tensor of Nx4 whose last column is 1\r\n    # transform camera coordinate to image coordinate\r\n\r\n    depth_mask = pts_2d[:, 2] > 0\r\n    pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=99999)\r\n    pts_2d[:, 0] /= pts_2d[:, 2]\r\n    pts_2d[:, 1] /= pts_2d[:, 2]\r\n\r\n    fov_inds = ((pts_2d[:, 0] < img_w)\r\n                & (pts_2d[:, 0] >= 0)\r\n                & (pts_2d[:, 1] < img_h)\r\n                & (pts_2d[:, 1] >= 0)\r\n                & depth_mask)\r\n    return pts_2d, pts_cam_4d, fov_inds\r\n\r\n\r\ndef combine_data(data_root, info_file, coco_file, output_file):\r\n    info_path = os.path.join(data_root, info_file)\r\n    coco_file_path = os.path.join(data_root, coco_file)\r\n\r\n    with open(info_path, \"rb\") as file:\r\n        info = pickle.load(file, encoding=\"bytes\")\r\n\r\n    with open(coco_file_path, \"r\") as file:\r\n        coco = json.load(file)\r\n\r\n    id2image = {}\r\n    for image in coco[\"images\"]:\r\n        image_id = image['id']\r\n        if image_id not in id2image:\r\n            id2image[image_id] = image\r\n    print(\"Generate new info file\")\r\n    all_depths = []\r\n\r\n    for info_id, info_dict in enumerate(info['infos']):\r\n        if info_id % 1000 == 1:\r\n            print(info_id, \"/\", len(info['infos']))\r\n        valid_flag = info_dict['num_lidar_pts'] > 0\r\n\r\n        gt_visible_3d = np.zeros((info_dict['gt_boxes'].shape[0], ), dtype=np.int32)\r\n\r\n        gt_boxes_3d_tensor = info_dict['gt_boxes'][valid_flag]\r\n\r\n        gt_names_3d = info_dict['gt_names'][valid_flag]\r\n        gt_vel_2d = info_dict['gt_velocity'][valid_flag]\r\n        gt_vel_3d = np.concatenate([gt_vel_2d, np.zeros_like(gt_vel_2d[:,:1])], axis=1)\r\n        gt_visible_3d_valid = np.zeros((gt_boxes_3d_tensor.shape[0], ), dtype=np.int32)\r\n\r\n        if gt_boxes_3d_tensor.shape[0] == 0:\r\n            info_dict['gt_bboxes2d_view'] = np.zeros((0, 4))\r\n            info_dict['gt_names2d_view'] = []\r\n            info_dict['gt_viewsIDs'] = np.zeros(0)\r\n            info_dict['gt_pts_centers_view'] = np.zeros((0, 3))\r\n            info_dict['gt_img_centers_view'] = np.zeros((0, 3))\r\n            info_dict['gt_bboxes_cam_view'] = np.zeros((0, 7))\r\n            info_dict['gt_velocity_cam_view'] = np.zeros((0, 2))\r\n            info_dict['gt_visible'] = gt_visible_3d\r\n            info_dict['gt_bboxes_lidar_view'] = np.zeros((0, 7))\r\n            info_dict['gt_velocity_lidar_view'] = np.zeros((0, 2))\r\n            continue\r\n\r\n        gt_boxes_3d = LiDARInstance3DBoxes(gt_boxes_3d_tensor, box_dim=7, origin=(0.5, 0.5, 0.5)).convert_to(Box3DMode.LIDAR)\r\n\r\n        corners = gt_boxes_3d.corners\r\n        centers = gt_boxes_3d.gravity_center\r\n        dims = gt_boxes_3d.dims\r\n        yaws = gt_boxes_3d.yaw[:,None]\r\n\r\n        view_ids = []\r\n        bboxes = []\r\n        gt_names = []\r\n        pts_centers = []\r\n        img_centers = []\r\n\r\n        bboxes_cam_3d = []\r\n        vel_cam = []\r\n\r\n        bboxes_lidar = []\r\n        vels_lidar = []\r\n\r\n        for view_id, cam in enumerate(cam_orders):\r\n            cam_info = info_dict['cams'][cam]\r\n            lidar2cam_r = np.linalg.inv(cam_info['sensor2lidar_rotation'])\r\n            lidar2cam_t = cam_info['sensor2lidar_translation'] @ lidar2cam_r.T\r\n            lidar2cam_rt = np.eye(4)\r\n            lidar2cam_rt[:3, :3] = lidar2cam_r.T\r\n            lidar2cam_rt[3, :3] = -lidar2cam_t\r\n            intrinsic = cam_info['cam_intrinsic']\r\n            viewpad = np.eye(4)\r\n            viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] = intrinsic\r\n            lidar2img_rt = (viewpad @ lidar2cam_rt.T)\r\n\r\n            image_token = cam_info['sample_data_token']\r\n            image_info = id2image[image_token]\r\n            img_h = image_info['height']\r\n            img_w = image_info['width']\r\n\r\n            centers_2d, center_cam_3d, view_mask = project_to_image(centers, viewpad, lidar2cam_rt.T, img_h, img_w)\r\n            centers_2d = centers_2d[..., :3]\r\n            center_cam_3d = center_cam_3d[..., :3]\r\n\r\n            corners_view = corners[view_mask].cpu().numpy()\r\n            names_view = gt_names_3d[view_mask]\r\n            centers_view = centers[view_mask].cpu().numpy()\r\n            centers_2d_view = centers_2d[view_mask]\r\n            dims_view = dims[view_mask].cpu().numpy()\r\n            yaw_view = -yaws[view_mask] - np.pi / 2\r\n            center_cam_3d_view = center_cam_3d[view_mask]\r\n            vel_3d_view = gt_vel_3d[view_mask]\r\n\r\n            boxes_lidar_view = gt_boxes_3d_tensor[view_mask]\r\n            vels_lidar_view = gt_vel_2d[view_mask]\r\n\r\n            vel_cam_3d_view = vel_3d_view @ lidar2cam_r.T\r\n            vel_cam_view = vel_cam_3d_view[:, [0, 2]]\r\n\r\n            dims_view = dims_view[:, [1, 2, 0]]\r\n\r\n            rot_dir_view = torch.cat([torch.cos(yaw_view), torch.sin(yaw_view), torch.zeros_like(yaw_view)], dim=1)\r\n            rot_dir_view = rot_dir_view @ lidar2cam_r.T\r\n            rot_dir_view = rot_dir_view[:, [0, 2]]\r\n\r\n            yaw_view = -torch.atan2(rot_dir_view[:, 1:2], rot_dir_view[:, 0:1])\r\n            yaw_view = limit_period(yaw_view, period=2*np.pi).cpu().numpy()\r\n\r\n            bboxes_cam_3d_view = np.concatenate([center_cam_3d_view, dims_view, yaw_view], axis=1)\r\n\r\n            gt_visible_3d_valid_view = np.zeros((view_mask.sum(),), dtype=np.int32)\r\n\r\n            ann_num = corners_view.shape[0]\r\n            if ann_num == 0:\r\n                continue\r\n\r\n            for ann_id in range(ann_num):\r\n                corner_2d, _, _ = project_to_image(corners_view[ann_id], viewpad, lidar2cam_rt.T, img_h, img_w)  # (8, 2)\r\n                coord_min = np.min(corner_2d, axis=0)\r\n                coord_max = np.max(corner_2d, axis=0)\r\n\r\n                x1, y1 = coord_min[0], coord_min[1]\r\n                x2, y2 = coord_max[0], coord_max[1]\r\n\r\n                x1 = max(x1, 0)\r\n                y1 = max(y1, 0)\r\n                x2 = min(x2, img_w)\r\n                y2 = min(y2, img_h)\r\n                w = x2 - x1\r\n                h = y2 - y1\r\n\r\n                bboxes.append([x1, y1, w, h])\r\n                view_ids.append(view_id)\r\n                gt_names.append(names_view[ann_id])\r\n                pts_centers.append(centers_view[ann_id])\r\n                img_centers.append(centers_2d_view[ann_id])\r\n                all_depths.append(centers_2d_view[ann_id][2])\r\n                bboxes_cam_3d.append(bboxes_cam_3d_view[ann_id])\r\n                vel_cam.append(vel_cam_view[ann_id])\r\n                bboxes_lidar.append(boxes_lidar_view[ann_id])\r\n                vels_lidar.append(vels_lidar_view[ann_id])\r\n                gt_visible_3d_valid_view[ann_id] = 1\r\n\r\n            gt_visible_3d_valid[view_mask] = gt_visible_3d_valid_view\r\n\r\n\r\n        view_ids = np.array(view_ids)\r\n        bboxes = np.array(bboxes).reshape(-1, 4)\r\n        pts_centers = np.array(pts_centers).reshape(-1, 3)\r\n        img_centers = np.array(img_centers).reshape(-1, 3)\r\n        bboxes_cam_3d = np.vstack(bboxes_cam_3d)\r\n        vel_cam = np.vstack(vel_cam)\r\n\r\n        bboxes_lidar = np.vstack(bboxes_lidar)\r\n        vels_lidar = np.vstack(vels_lidar)\r\n\r\n        gt_visible_3d[valid_flag] = gt_visible_3d_valid\r\n\r\n        info_dict['gt_bboxes2d_view'] = bboxes\r\n        info_dict['gt_names2d_view'] = gt_names\r\n        info_dict['gt_viewsIDs'] = view_ids\r\n        info_dict['gt_pts_centers_view'] = pts_centers\r\n        info_dict['gt_img_centers_view'] = img_centers\r\n\r\n        info_dict['gt_bboxes_cam_view'] = bboxes_cam_3d\r\n        info_dict['gt_velocity_cam_view'] = vel_cam\r\n        info_dict['gt_visible'] = gt_visible_3d\r\n\r\n        info_dict['gt_bboxes_lidar_view'] = bboxes_lidar\r\n        info_dict['gt_velocity_lidar_view'] = vels_lidar\r\n\r\n    output_path = os.path.join(data_root, output_file)\r\n    with open(output_path, \"wb\") as file:\r\n        pickle.dump(info, file)\r\n\r\n\r\nif __name__ == \"__main__\":\r\n    parser = argparse.ArgumentParser(description='Combine the 3D bboxes in the camera coordinates into the ann file')\r\n    parser.add_argument('--data_root', type=str, default='./data/nuscenes/', help='root path of dataset')\r\n    parser.add_argument('--info_tag', type=str, default='nuscenes_infos', help='data info filename prefix')\r\n    parser.add_argument('--output_tag', type=str, default='nuscenes_infos_w_views', help='output filename prefix')\r\n    parser.add_argument('--output_file', type=str, default='./data/nuscenes/', help='root path of dataset')\r\n    args = parser.parse_args()\r\n\r\n    for split in [\"train\", \"val\"]:\r\n        info_file = args.info_tag + \"_%s.pkl\"%split\r\n        coco_file = args.info_tag + \"_%s.coco.json\"%split\r\n        output_file = args.output_tag + \"_%s.pkl\"%split\r\n\r\n        print(\"Processing %s data\"%split)\r\n        combine_data(args.data_root, info_file, coco_file, output_file)\r\n"
  },
  {
    "path": "tools/create_data.py",
    "content": "import argparse\nfrom os import path as osp\n\nfrom tools.data_converter import indoor_converter as indoor\nfrom tools.data_converter import kitti_converter as kitti\nfrom tools.data_converter import lyft_converter as lyft_converter\nfrom tools.data_converter import nuscenes_converter as nuscenes_converter\nfrom tools.data_converter.create_gt_database import create_groundtruth_database\n\n\ndef kitti_data_prep(root_path, info_prefix, version, out_dir):\n    \"\"\"Prepare data related to Kitti dataset.\n\n    Related data consists of '.pkl' files recording basic infos,\n    2D annotations and groundtruth database.\n\n    Args:\n        root_path (str): Path of dataset root.\n        info_prefix (str): The prefix of info filenames.\n        version (str): Dataset version.\n        out_dir (str): Output directory of the groundtruth database info.\n    \"\"\"\n    kitti.create_kitti_info_file(root_path, info_prefix)\n    kitti.create_reduced_point_cloud(root_path, info_prefix)\n    create_groundtruth_database(\n        'KittiDataset',\n        root_path,\n        info_prefix,\n        f'{out_dir}/{info_prefix}_infos_train.pkl',\n        relative_path=False,\n        mask_anno_path='instances_train.json',\n        with_mask=(version == 'mask'))\n\n\ndef nuscenes_data_prep(root_path,\n                       info_prefix,\n                       version,\n                       dataset_name,\n                       out_dir,\n                       max_sweeps=10):\n    \"\"\"Prepare data related to nuScenes dataset.\n\n    Related data consists of '.pkl' files recording basic infos,\n    2D annotations and groundtruth database.\n\n    Args:\n        root_path (str): Path of dataset root.\n        info_prefix (str): The prefix of info filenames.\n        version (str): Dataset version.\n        dataset_name (str): The dataset class name.\n        out_dir (str): Output directory of the groundtruth database info.\n        max_sweeps (int): Number of input consecutive frames. Default: 10\n    \"\"\"\n    nuscenes_converter.create_nuscenes_infos(\n        root_path, info_prefix, version=version, max_sweeps=max_sweeps)\n\n    if version == 'v1.0-test':\n        return\n\n    info_train_path = osp.join(root_path, f'{info_prefix}_infos_train.pkl')\n    info_val_path = osp.join(root_path, f'{info_prefix}_infos_val.pkl')\n    nuscenes_converter.export_2d_annotation(\n        root_path, info_train_path, version=version)\n    nuscenes_converter.export_2d_annotation(\n        root_path, info_val_path, version=version)\n    create_groundtruth_database(dataset_name, root_path, info_prefix,\n                                f'{out_dir}/{info_prefix}_infos_train.pkl')\n\n\ndef lyft_data_prep(root_path,\n                   info_prefix,\n                   version,\n                   dataset_name,\n                   out_dir,\n                   max_sweeps=10):\n    \"\"\"Prepare data related to Lyft dataset.\n\n    Related data consists of '.pkl' files recording basic infos,\n    and 2D annotations.\n    Although the ground truth database is not used in Lyft, it can also be\n    generated like nuScenes.\n\n    Args:\n        root_path (str): Path of dataset root.\n        info_prefix (str): The prefix of info filenames.\n        version (str): Dataset version.\n        dataset_name (str): The dataset class name.\n        out_dir (str): Output directory of the groundtruth database info.\n            Not used here if the groundtruth database is not generated.\n        max_sweeps (int): Number of input consecutive frames. Default: 10\n    \"\"\"\n    lyft_converter.create_lyft_infos(\n        root_path, info_prefix, version=version, max_sweeps=max_sweeps)\n\n    if version == 'v1.01-test':\n        return\n\n    train_info_name = f'{info_prefix}_infos_train'\n    val_info_name = f'{info_prefix}_infos_val'\n\n    info_train_path = osp.join(root_path, f'{train_info_name}.pkl')\n    info_val_path = osp.join(root_path, f'{val_info_name}.pkl')\n\n    lyft_converter.export_2d_annotation(\n        root_path, info_train_path, version=version)\n    lyft_converter.export_2d_annotation(\n        root_path, info_val_path, version=version)\n\n\ndef scannet_data_prep(root_path, info_prefix, out_dir, workers):\n    \"\"\"Prepare the info file for scannet dataset.\n\n    Args:\n        root_path (str): Path of dataset root.\n        info_prefix (str): The prefix of info filenames.\n        out_dir (str): Output directory of the generated info file.\n        workers (int): Number of threads to be used.\n    \"\"\"\n    indoor.create_indoor_info_file(\n        root_path, info_prefix, out_dir, workers=workers)\n\n\ndef sunrgbd_data_prep(root_path, info_prefix, out_dir, workers):\n    \"\"\"Prepare the info file for sunrgbd dataset.\n\n    Args:\n        root_path (str): Path of dataset root.\n        info_prefix (str): The prefix of info filenames.\n        out_dir (str): Output directory of the generated info file.\n        workers (int): Number of threads to be used.\n    \"\"\"\n    indoor.create_indoor_info_file(\n        root_path, info_prefix, out_dir, workers=workers)\n\n\ndef waymo_data_prep(root_path,\n                    info_prefix,\n                    version,\n                    out_dir,\n                    workers,\n                    max_sweeps=5):\n    \"\"\"Prepare the info file for waymo dataset.\n\n    Args:\n        root_path (str): Path of dataset root.\n        info_prefix (str): The prefix of info filenames.\n        out_dir (str): Output directory of the generated info file.\n        workers (int): Number of threads to be used.\n        max_sweeps (int): Number of input consecutive frames. Default: 5 \\\n            Here we store pose information of these frames for later use.\n    \"\"\"\n    from tools.data_converter import waymo_converter as waymo\n\n    splits = ['training', 'validation', 'testing']\n    for i, split in enumerate(splits):\n        load_dir = osp.join(root_path, 'waymo_format', split)\n        if split == 'validation':\n            save_dir = osp.join(out_dir, 'kitti_format', 'training')\n        else:\n            save_dir = osp.join(out_dir, 'kitti_format', split)\n        converter = waymo.Waymo2KITTI(\n            load_dir,\n            save_dir,\n            prefix=str(i),\n            workers=workers,\n            test_mode=(split == 'test'))\n        converter.convert()\n    # Generate waymo infos\n    out_dir = osp.join(out_dir, 'kitti_format')\n    kitti.create_waymo_info_file(out_dir, info_prefix, max_sweeps=max_sweeps)\n    create_groundtruth_database(\n        'WaymoDataset',\n        out_dir,\n        info_prefix,\n        f'{out_dir}/{info_prefix}_infos_train.pkl',\n        relative_path=False,\n        with_mask=False)\n\n\nparser = argparse.ArgumentParser(description='Data converter arg parser')\nparser.add_argument('dataset', metavar='kitti', help='name of the dataset')\nparser.add_argument(\n    '--root-path',\n    type=str,\n    default='./data/kitti',\n    help='specify the root path of dataset')\nparser.add_argument(\n    '--version',\n    type=str,\n    default='v1.0',\n    required=False,\n    help='specify the dataset version, no need for kitti')\nparser.add_argument(\n    '--max-sweeps',\n    type=int,\n    default=10,\n    required=False,\n    help='specify sweeps of lidar per example')\nparser.add_argument(\n    '--out-dir',\n    type=str,\n    default='./data/kitti',\n    required='False',\n    help='name of info pkl')\nparser.add_argument('--extra-tag', type=str, default='kitti')\nparser.add_argument(\n    '--workers', type=int, default=4, help='number of threads to be used')\nargs = parser.parse_args()\n\nif __name__ == '__main__':\n    if args.dataset == 'kitti':\n        kitti_data_prep(\n            root_path=args.root_path,\n            info_prefix=args.extra_tag,\n            version=args.version,\n            out_dir=args.out_dir)\n    elif args.dataset == 'nuscenes' and args.version != 'v1.0-mini':\n        train_version = f'{args.version}-trainval'\n        nuscenes_data_prep(\n            root_path=args.root_path,\n            info_prefix=args.extra_tag,\n            version=train_version,\n            dataset_name='NuScenesDataset',\n            out_dir=args.out_dir,\n            max_sweeps=args.max_sweeps)\n        test_version = f'{args.version}-test'\n        nuscenes_data_prep(\n            root_path=args.root_path,\n            info_prefix=args.extra_tag,\n            version=test_version,\n            dataset_name='NuScenesDataset',\n            out_dir=args.out_dir,\n            max_sweeps=args.max_sweeps)\n    elif args.dataset == 'nuscenes' and args.version == 'v1.0-mini':\n        train_version = f'{args.version}'\n        nuscenes_data_prep(\n            root_path=args.root_path,\n            info_prefix=args.extra_tag,\n            version=train_version,\n            dataset_name='NuScenesDataset',\n            out_dir=args.out_dir,\n            max_sweeps=args.max_sweeps)\n    elif args.dataset == 'lyft':\n        train_version = f'{args.version}-train'\n        lyft_data_prep(\n            root_path=args.root_path,\n            info_prefix=args.extra_tag,\n            version=train_version,\n            dataset_name='LyftDataset',\n            out_dir=args.out_dir,\n            max_sweeps=args.max_sweeps)\n        test_version = f'{args.version}-test'\n        lyft_data_prep(\n            root_path=args.root_path,\n            info_prefix=args.extra_tag,\n            version=test_version,\n            dataset_name='LyftDataset',\n            out_dir=args.out_dir,\n            max_sweeps=args.max_sweeps)\n    elif args.dataset == 'waymo':\n        waymo_data_prep(\n            root_path=args.root_path,\n            info_prefix=args.extra_tag,\n            version=args.version,\n            out_dir=args.out_dir,\n            workers=args.workers,\n            max_sweeps=args.max_sweeps)\n    elif args.dataset == 'scannet':\n        scannet_data_prep(\n            root_path=args.root_path,\n            info_prefix=args.extra_tag,\n            out_dir=args.out_dir,\n            workers=args.workers)\n    elif args.dataset == 'sunrgbd':\n        sunrgbd_data_prep(\n            root_path=args.root_path,\n            info_prefix=args.extra_tag,\n            out_dir=args.out_dir,\n            workers=args.workers)\n"
  },
  {
    "path": "tools/create_data.sh",
    "content": "#!/usr/bin/env bash\n\nset -x\nexport PYTHONPATH=`pwd`:$PYTHONPATH\n\nPARTITION=$1\nJOB_NAME=$2\nCONFIG=$3\nWORK_DIR=$4\nGPUS=${GPUS:-1}\nGPUS_PER_NODE=${GPUS_PER_NODE:-1}\nSRUN_ARGS=${SRUN_ARGS:-\"\"}\nJOB_NAME=create_data\n\nsrun -p ${PARTITION} \\\n    --job-name=${JOB_NAME} \\\n    --gres=gpu:${GPUS_PER_NODE} \\\n    --ntasks=${GPUS} \\\n    --ntasks-per-node=${GPUS_PER_NODE} \\\n    --kill-on-bad-exit=1 \\\n    ${SRUN_ARGS} \\\n    python -u tools/create_data.py kitti \\\n            --root-path ./data/kitti \\\n            --out-dir ./data/kitti \\\n            --extra-tag kitti\n"
  },
  {
    "path": "tools/data_converter/__init__.py",
    "content": ""
  },
  {
    "path": "tools/data_converter/create_gt_database.py",
    "content": "import mmcv\nimport numpy as np\nimport pickle\nfrom mmcv import track_iter_progress\nfrom mmcv.ops import roi_align\nfrom os import path as osp\nfrom pycocotools import mask as maskUtils\nfrom pycocotools.coco import COCO\n\nfrom mmdet3d.core.bbox import box_np_ops as box_np_ops\nfrom mmdet3d.datasets import build_dataset\nfrom mmdet.core.evaluation.bbox_overlaps import bbox_overlaps\n\n\ndef _poly2mask(mask_ann, img_h, img_w):\n    if isinstance(mask_ann, list):\n        # polygon -- a single object might consist of multiple parts\n        # we merge all parts into one mask rle code\n        rles = maskUtils.frPyObjects(mask_ann, img_h, img_w)\n        rle = maskUtils.merge(rles)\n    elif isinstance(mask_ann['counts'], list):\n        # uncompressed RLE\n        rle = maskUtils.frPyObjects(mask_ann, img_h, img_w)\n    else:\n        # rle\n        rle = mask_ann\n    mask = maskUtils.decode(rle)\n    return mask\n\n\ndef _parse_coco_ann_info(ann_info):\n    gt_bboxes = []\n    gt_labels = []\n    gt_bboxes_ignore = []\n    gt_masks_ann = []\n\n    for i, ann in enumerate(ann_info):\n        if ann.get('ignore', False):\n            continue\n        x1, y1, w, h = ann['bbox']\n        if ann['area'] <= 0:\n            continue\n        bbox = [x1, y1, x1 + w, y1 + h]\n        if ann.get('iscrowd', False):\n            gt_bboxes_ignore.append(bbox)\n        else:\n            gt_bboxes.append(bbox)\n            gt_masks_ann.append(ann['segmentation'])\n\n    if gt_bboxes:\n        gt_bboxes = np.array(gt_bboxes, dtype=np.float32)\n        gt_labels = np.array(gt_labels, dtype=np.int64)\n    else:\n        gt_bboxes = np.zeros((0, 4), dtype=np.float32)\n        gt_labels = np.array([], dtype=np.int64)\n\n    if gt_bboxes_ignore:\n        gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)\n    else:\n        gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)\n\n    ann = dict(\n        bboxes=gt_bboxes, bboxes_ignore=gt_bboxes_ignore, masks=gt_masks_ann)\n\n    return ann\n\n\ndef crop_image_patch_v2(pos_proposals, pos_assigned_gt_inds, gt_masks):\n    import torch\n    from torch.nn.modules.utils import _pair\n    device = pos_proposals.device\n    num_pos = pos_proposals.size(0)\n    fake_inds = (\n        torch.arange(num_pos,\n                     device=device).to(dtype=pos_proposals.dtype)[:, None])\n    rois = torch.cat([fake_inds, pos_proposals], dim=1)  # Nx5\n    mask_size = _pair(28)\n    rois = rois.to(device=device)\n    gt_masks_th = (\n        torch.from_numpy(gt_masks).to(device).index_select(\n            0, pos_assigned_gt_inds).to(dtype=rois.dtype))\n    # Use RoIAlign could apparently accelerate the training (~0.1s/iter)\n    targets = (\n        roi_align(gt_masks_th, rois, mask_size[::-1], 1.0, 0, True).squeeze(1))\n    return targets\n\n\ndef crop_image_patch(pos_proposals, gt_masks, pos_assigned_gt_inds, org_img):\n    num_pos = pos_proposals.shape[0]\n    masks = []\n    img_patches = []\n    for i in range(num_pos):\n        gt_mask = gt_masks[pos_assigned_gt_inds[i]]\n        bbox = pos_proposals[i, :].astype(np.int32)\n        x1, y1, x2, y2 = bbox\n        w = np.maximum(x2 - x1 + 1, 1)\n        h = np.maximum(y2 - y1 + 1, 1)\n\n        mask_patch = gt_mask[y1:y1 + h, x1:x1 + w]\n        masked_img = gt_mask[..., None] * org_img\n        img_patch = masked_img[y1:y1 + h, x1:x1 + w]\n\n        img_patches.append(img_patch)\n        masks.append(mask_patch)\n    return img_patches, masks\n\n\ndef create_groundtruth_database(dataset_class_name,\n                                data_path,\n                                info_prefix,\n                                info_path=None,\n                                mask_anno_path=None,\n                                used_classes=None,\n                                database_save_path=None,\n                                db_info_save_path=None,\n                                relative_path=True,\n                                add_rgb=False,\n                                lidar_only=False,\n                                bev_only=False,\n                                coors_range=None,\n                                with_mask=False):\n    \"\"\"Given the raw data, generate the ground truth database.\n\n    Args:\n        dataset_class_name （str): Name of the input dataset.\n        data_path (str): Path of the data.\n        info_prefix (str): Prefix of the info file.\n        info_path (str): Path of the info file.\n            Default: None.\n        mask_anno_path (str): Path of the mask_anno.\n            Default: None.\n        used_classes (list[str]): Classes have been used.\n            Default: None.\n        database_save_path (str): Path to save database.\n            Default: None.\n        db_info_save_path (str): Path to save db_info.\n            Default: None.\n        relative_path (bool): Whether to use relative path.\n            Default: True.\n        with_mask (bool): Whether to use mask.\n            Default: False.\n    \"\"\"\n    print(f'Create GT Database of {dataset_class_name}')\n    dataset_cfg = dict(\n        type=dataset_class_name, data_root=data_path, ann_file=info_path)\n    if dataset_class_name == 'KittiDataset':\n        file_client_args = dict(backend='disk')\n        dataset_cfg.update(\n            test_mode=False,\n            split='training',\n            modality=dict(\n                use_lidar=True,\n                use_depth=False,\n                use_lidar_intensity=True,\n                use_camera=with_mask,\n            ),\n            pipeline=[\n                dict(\n                    type='LoadPointsFromFile',\n                    coord_type='LIDAR',\n                    load_dim=4,\n                    use_dim=4,\n                    file_client_args=file_client_args),\n                dict(\n                    type='LoadAnnotations3D',\n                    with_bbox_3d=True,\n                    with_label_3d=True,\n                    file_client_args=file_client_args)\n            ])\n\n    elif dataset_class_name == 'NuScenesDataset':\n        dataset_cfg.update(\n            use_valid_flag=True,\n            pipeline=[\n                dict(\n                    type='LoadPointsFromFile',\n                    coord_type='LIDAR',\n                    load_dim=5,\n                    use_dim=5),\n                dict(\n                    type='LoadPointsFromMultiSweeps',\n                    sweeps_num=10,\n                    use_dim=[0, 1, 2, 3, 4],\n                    pad_empty_sweeps=True,\n                    remove_close=True),\n                dict(\n                    type='LoadAnnotations3D',\n                    with_bbox_3d=True,\n                    with_label_3d=True)\n            ])\n\n    elif dataset_class_name == 'WaymoDataset':\n        file_client_args = dict(backend='disk')\n        dataset_cfg.update(\n            test_mode=False,\n            split='training',\n            modality=dict(\n                use_lidar=True,\n                use_depth=False,\n                use_lidar_intensity=True,\n                use_camera=False,\n            ),\n            pipeline=[\n                dict(\n                    type='LoadPointsFromFile',\n                    coord_type='LIDAR',\n                    load_dim=6,\n                    use_dim=5,\n                    file_client_args=file_client_args),\n                dict(\n                    type='LoadAnnotations3D',\n                    with_bbox_3d=True,\n                    with_label_3d=True,\n                    file_client_args=file_client_args)\n            ])\n\n    dataset = build_dataset(dataset_cfg)\n\n    if database_save_path is None:\n        database_save_path = osp.join(data_path, f'{info_prefix}_gt_database')\n    if db_info_save_path is None:\n        db_info_save_path = osp.join(data_path,\n                                     f'{info_prefix}_dbinfos_train.pkl')\n    mmcv.mkdir_or_exist(database_save_path)\n    all_db_infos = dict()\n    if with_mask:\n        coco = COCO(osp.join(data_path, mask_anno_path))\n        imgIds = coco.getImgIds()\n        file2id = dict()\n        for i in imgIds:\n            info = coco.loadImgs([i])[0]\n            file2id.update({info['file_name']: i})\n\n    group_counter = 0\n    for j in track_iter_progress(list(range(len(dataset)))):\n        input_dict = dataset.get_data_info(j)\n        dataset.pre_pipeline(input_dict)\n        example = dataset.pipeline(input_dict)\n        annos = example['ann_info']\n        image_idx = example['sample_idx']\n        points = example['points'].tensor.numpy()\n        gt_boxes_3d = annos['gt_bboxes_3d'].tensor.numpy()\n        names = annos['gt_names']\n        group_dict = dict()\n        if 'group_ids' in annos:\n            group_ids = annos['group_ids']\n        else:\n            group_ids = np.arange(gt_boxes_3d.shape[0], dtype=np.int64)\n        difficulty = np.zeros(gt_boxes_3d.shape[0], dtype=np.int32)\n        if 'difficulty' in annos:\n            difficulty = annos['difficulty']\n\n        num_obj = gt_boxes_3d.shape[0]\n        point_indices = box_np_ops.points_in_rbbox(points, gt_boxes_3d)\n\n        if with_mask:\n            # prepare masks\n            gt_boxes = annos['gt_bboxes']\n            img_path = osp.split(example['img_info']['filename'])[-1]\n            if img_path not in file2id.keys():\n                print(f'skip image {img_path} for empty mask')\n                continue\n            img_id = file2id[img_path]\n            kins_annIds = coco.getAnnIds(imgIds=img_id)\n            kins_raw_info = coco.loadAnns(kins_annIds)\n            kins_ann_info = _parse_coco_ann_info(kins_raw_info)\n            h, w = annos['img_shape'][:2]\n            gt_masks = [\n                _poly2mask(mask, h, w) for mask in kins_ann_info['masks']\n            ]\n            # get mask inds based on iou mapping\n            bbox_iou = bbox_overlaps(kins_ann_info['bboxes'], gt_boxes)\n            mask_inds = bbox_iou.argmax(axis=0)\n            valid_inds = (bbox_iou.max(axis=0) > 0.5)\n\n            # mask the image\n            # use more precise crop when it is ready\n            # object_img_patches = np.ascontiguousarray(\n            #     np.stack(object_img_patches, axis=0).transpose(0, 3, 1, 2))\n            # crop image patches using roi_align\n            # object_img_patches = crop_image_patch_v2(\n            #     torch.Tensor(gt_boxes),\n            #     torch.Tensor(mask_inds).long(), object_img_patches)\n            object_img_patches, object_masks = crop_image_patch(\n                gt_boxes, gt_masks, mask_inds, annos['img'])\n\n        for i in range(num_obj):\n            filename = f'{image_idx}_{names[i]}_{i}.bin'\n            abs_filepath = osp.join(database_save_path, filename)\n            rel_filepath = osp.join(f'{info_prefix}_gt_database', filename)\n\n            # save point clouds and image patches for each object\n            gt_points = points[point_indices[:, i]]\n            gt_points[:, :3] -= gt_boxes_3d[i, :3]\n\n            if with_mask:\n                if object_masks[i].sum() == 0 or not valid_inds[i]:\n                    # Skip object for empty or invalid mask\n                    continue\n                img_patch_path = abs_filepath + '.png'\n                mask_patch_path = abs_filepath + '.mask.png'\n                mmcv.imwrite(object_img_patches[i], img_patch_path)\n                mmcv.imwrite(object_masks[i], mask_patch_path)\n\n            with open(abs_filepath, 'w') as f:\n                gt_points.tofile(f)\n\n            if (used_classes is None) or names[i] in used_classes:\n                db_info = {\n                    'name': names[i],\n                    'path': rel_filepath,\n                    'image_idx': image_idx,\n                    'gt_idx': i,\n                    'box3d_lidar': gt_boxes_3d[i],\n                    'num_points_in_gt': gt_points.shape[0],\n                    'difficulty': difficulty[i],\n                }\n                local_group_id = group_ids[i]\n                # if local_group_id >= 0:\n                if local_group_id not in group_dict:\n                    group_dict[local_group_id] = group_counter\n                    group_counter += 1\n                db_info['group_id'] = group_dict[local_group_id]\n                if 'score' in annos:\n                    db_info['score'] = annos['score'][i]\n                if with_mask:\n                    db_info.update({'box2d_camera': gt_boxes[i]})\n                if names[i] in all_db_infos:\n                    all_db_infos[names[i]].append(db_info)\n                else:\n                    all_db_infos[names[i]] = [db_info]\n\n    for k, v in all_db_infos.items():\n        print(f'load {len(v)} {k} database infos')\n\n    with open(db_info_save_path, 'wb') as f:\n        pickle.dump(all_db_infos, f)\n"
  },
  {
    "path": "tools/data_converter/indoor_converter.py",
    "content": "import mmcv\nimport os\n\nfrom tools.data_converter.scannet_data_utils import ScanNetData\nfrom tools.data_converter.sunrgbd_data_utils import SUNRGBDData\n\n\ndef create_indoor_info_file(data_path,\n                            pkl_prefix='sunrgbd',\n                            save_path=None,\n                            use_v1=False,\n                            workers=4):\n    \"\"\"Create indoor information file.\n\n    Get information of the raw data and save it to the pkl file.\n\n    Args:\n        data_path (str): Path of the data.\n        pkl_prefix (str): Prefix of the pkl to be saved. Default: 'sunrgbd'.\n        save_path (str): Path of the pkl to be saved. Default: None.\n        use_v1 (bool): Whether to use v1. Default: False.\n        workers (int): Number of threads to be used. Default: 4.\n    \"\"\"\n    assert os.path.exists(data_path)\n    assert pkl_prefix in ['sunrgbd', 'scannet']\n    save_path = data_path if save_path is None else save_path\n    assert os.path.exists(save_path)\n\n    train_filename = os.path.join(save_path, f'{pkl_prefix}_infos_train.pkl')\n    val_filename = os.path.join(save_path, f'{pkl_prefix}_infos_val.pkl')\n    if pkl_prefix == 'sunrgbd':\n        train_dataset = SUNRGBDData(\n            root_path=data_path, split='train', use_v1=use_v1)\n        val_dataset = SUNRGBDData(\n            root_path=data_path, split='val', use_v1=use_v1)\n    else:\n        train_dataset = ScanNetData(root_path=data_path, split='train')\n        val_dataset = ScanNetData(root_path=data_path, split='val')\n\n    infos_train = train_dataset.get_infos(num_workers=workers, has_label=True)\n    mmcv.dump(infos_train, train_filename, 'pkl')\n    print(f'{pkl_prefix} info train file is saved to {train_filename}')\n\n    infos_val = val_dataset.get_infos(num_workers=workers, has_label=True)\n    mmcv.dump(infos_val, val_filename, 'pkl')\n    print(f'{pkl_prefix} info val file is saved to {val_filename}')\n"
  },
  {
    "path": "tools/data_converter/kitti_converter.py",
    "content": "import mmcv\nimport numpy as np\nfrom pathlib import Path\n\nfrom mmdet3d.core.bbox import box_np_ops\nfrom .kitti_data_utils import get_kitti_image_info, get_waymo_image_info\n\n\ndef convert_to_kitti_info_version2(info):\n    \"\"\"convert kitti info v1 to v2 if possible.\n\n    Args:\n        info (dict): Info of the input kitti data.\n            - image (dict): image info\n            - calib (dict): calibration info\n            - point_cloud (dict): point cloud info\n    \"\"\"\n    if 'image' not in info or 'calib' not in info or 'point_cloud' not in info:\n        info['image'] = {\n            'image_shape': info['img_shape'],\n            'image_idx': info['image_idx'],\n            'image_path': info['img_path'],\n        }\n        info['calib'] = {\n            'R0_rect': info['calib/R0_rect'],\n            'Tr_velo_to_cam': info['calib/Tr_velo_to_cam'],\n            'P2': info['calib/P2'],\n        }\n        info['point_cloud'] = {\n            'velodyne_path': info['velodyne_path'],\n        }\n\n\ndef _read_imageset_file(path):\n    with open(path, 'r') as f:\n        lines = f.readlines()\n    return [int(line) for line in lines]\n\n\ndef _calculate_num_points_in_gt(data_path,\n                                infos,\n                                relative_path,\n                                remove_outside=True,\n                                num_features=4):\n    for info in mmcv.track_iter_progress(infos):\n        pc_info = info['point_cloud']\n        image_info = info['image']\n        calib = info['calib']\n        if relative_path:\n            v_path = str(Path(data_path) / pc_info['velodyne_path'])\n        else:\n            v_path = pc_info['velodyne_path']\n        points_v = np.fromfile(\n            v_path, dtype=np.float32, count=-1).reshape([-1, num_features])\n        rect = calib['R0_rect']\n        Trv2c = calib['Tr_velo_to_cam']\n        P2 = calib['P2']\n        if remove_outside:\n            points_v = box_np_ops.remove_outside_points(\n                points_v, rect, Trv2c, P2, image_info['image_shape'])\n\n        # points_v = points_v[points_v[:, 0] > 0]\n        annos = info['annos']\n        num_obj = len([n for n in annos['name'] if n != 'DontCare'])\n        # annos = kitti.filter_kitti_anno(annos, ['DontCare'])\n        dims = annos['dimensions'][:num_obj]\n        loc = annos['location'][:num_obj]\n        rots = annos['rotation_y'][:num_obj]\n        gt_boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]],\n                                         axis=1)\n        gt_boxes_lidar = box_np_ops.box_camera_to_lidar(\n            gt_boxes_camera, rect, Trv2c)\n        indices = box_np_ops.points_in_rbbox(points_v[:, :3], gt_boxes_lidar)\n        num_points_in_gt = indices.sum(0)\n        num_ignored = len(annos['dimensions']) - num_obj\n        num_points_in_gt = np.concatenate(\n            [num_points_in_gt, -np.ones([num_ignored])])\n        annos['num_points_in_gt'] = num_points_in_gt.astype(np.int32)\n\n\ndef create_kitti_info_file(data_path,\n                           pkl_prefix='kitti',\n                           save_path=None,\n                           relative_path=True):\n    \"\"\"Create info file of KITTI dataset.\n\n    Given the raw data, generate its related info file in pkl format.\n\n    Args:\n        data_path (str): Path of the data root.\n        pkl_prefix (str): Prefix of the info file to be generated.\n        save_path (str): Path to save the info file.\n        relative_path (bool): Whether to use relative path.\n    \"\"\"\n    imageset_folder = Path(data_path) / 'ImageSets'\n    train_img_ids = _read_imageset_file(str(imageset_folder / 'train.txt'))\n    val_img_ids = _read_imageset_file(str(imageset_folder / 'val.txt'))\n    test_img_ids = _read_imageset_file(str(imageset_folder / 'test.txt'))\n\n    print('Generate info. this may take several minutes.')\n    if save_path is None:\n        save_path = Path(data_path)\n    else:\n        save_path = Path(save_path)\n    kitti_infos_train = get_kitti_image_info(\n        data_path,\n        training=True,\n        velodyne=True,\n        calib=True,\n        image_ids=train_img_ids,\n        relative_path=relative_path)\n    _calculate_num_points_in_gt(data_path, kitti_infos_train, relative_path)\n    filename = save_path / f'{pkl_prefix}_infos_train.pkl'\n    print(f'Kitti info train file is saved to {filename}')\n    mmcv.dump(kitti_infos_train, filename)\n    kitti_infos_val = get_kitti_image_info(\n        data_path,\n        training=True,\n        velodyne=True,\n        calib=True,\n        image_ids=val_img_ids,\n        relative_path=relative_path)\n    _calculate_num_points_in_gt(data_path, kitti_infos_val, relative_path)\n    filename = save_path / f'{pkl_prefix}_infos_val.pkl'\n    print(f'Kitti info val file is saved to {filename}')\n    mmcv.dump(kitti_infos_val, filename)\n    filename = save_path / f'{pkl_prefix}_infos_trainval.pkl'\n    print(f'Kitti info trainval file is saved to {filename}')\n    mmcv.dump(kitti_infos_train + kitti_infos_val, filename)\n\n    kitti_infos_test = get_kitti_image_info(\n        data_path,\n        training=False,\n        label_info=False,\n        velodyne=True,\n        calib=True,\n        image_ids=test_img_ids,\n        relative_path=relative_path)\n    filename = save_path / f'{pkl_prefix}_infos_test.pkl'\n    print(f'Kitti info test file is saved to {filename}')\n    mmcv.dump(kitti_infos_test, filename)\n\n\ndef create_waymo_info_file(data_path,\n                           pkl_prefix='waymo',\n                           save_path=None,\n                           relative_path=True,\n                           max_sweeps=5):\n    \"\"\"Create info file of waymo dataset.\n\n    Given the raw data, generate its related info file in pkl format.\n\n    Args:\n        data_path (str): Path of the data root.\n        pkl_prefix (str): Prefix of the info file to be generated.\n        save_path (str | None): Path to save the info file.\n        relative_path (bool): Whether to use relative path.\n        max_sweeps (int): Max sweeps before the detection frame to be used.\n    \"\"\"\n    imageset_folder = Path(data_path) / 'ImageSets'\n    train_img_ids = _read_imageset_file(str(imageset_folder / 'train.txt'))\n    val_img_ids = _read_imageset_file(str(imageset_folder / 'val.txt'))\n    test_img_ids = _read_imageset_file(str(imageset_folder / 'test.txt'))\n\n    print('Generate info. this may take several minutes.')\n    if save_path is None:\n        save_path = Path(data_path)\n    else:\n        save_path = Path(save_path)\n    waymo_infos_train = get_waymo_image_info(\n        data_path,\n        training=True,\n        velodyne=True,\n        calib=True,\n        pose=True,\n        image_ids=train_img_ids,\n        relative_path=relative_path,\n        max_sweeps=max_sweeps)\n    _calculate_num_points_in_gt(\n        data_path,\n        waymo_infos_train,\n        relative_path,\n        num_features=6,\n        remove_outside=False)\n    filename = save_path / f'{pkl_prefix}_infos_train.pkl'\n    print(f'Waymo info train file is saved to {filename}')\n    mmcv.dump(waymo_infos_train, filename)\n    waymo_infos_val = get_waymo_image_info(\n        data_path,\n        training=True,\n        velodyne=True,\n        calib=True,\n        pose=True,\n        image_ids=val_img_ids,\n        relative_path=relative_path,\n        max_sweeps=max_sweeps)\n    _calculate_num_points_in_gt(\n        data_path,\n        waymo_infos_val,\n        relative_path,\n        num_features=6,\n        remove_outside=False)\n    filename = save_path / f'{pkl_prefix}_infos_val.pkl'\n    print(f'Waymo info val file is saved to {filename}')\n    mmcv.dump(waymo_infos_val, filename)\n    filename = save_path / f'{pkl_prefix}_infos_trainval.pkl'\n    print(f'Waymo info trainval file is saved to {filename}')\n    mmcv.dump(waymo_infos_train + waymo_infos_val, filename)\n    waymo_infos_test = get_waymo_image_info(\n        data_path,\n        training=False,\n        label_info=False,\n        velodyne=True,\n        calib=True,\n        pose=True,\n        image_ids=test_img_ids,\n        relative_path=relative_path,\n        max_sweeps=max_sweeps)\n    filename = save_path / f'{pkl_prefix}_infos_test.pkl'\n    print(f'Waymo info test file is saved to {filename}')\n    mmcv.dump(waymo_infos_test, filename)\n\n\ndef _create_reduced_point_cloud(data_path,\n                                info_path,\n                                save_path=None,\n                                back=False,\n                                num_features=4,\n                                front_camera_id=2):\n    \"\"\"Create reduced point clouds for given info.\n\n    Args:\n        data_path (str): Path of original data.\n        info_path (str): Path of data info.\n        save_path (str | None): Path to save reduced point cloud data.\n            Default: None.\n        back (bool): Whether to flip the points to back.\n        num_features (int): Number of point features. Default: 4.\n        front_camera_id (int): The referenced/front camera ID. Default: 2.\n    \"\"\"\n    kitti_infos = mmcv.load(info_path)\n\n    for info in mmcv.track_iter_progress(kitti_infos):\n        pc_info = info['point_cloud']\n        image_info = info['image']\n        calib = info['calib']\n\n        v_path = pc_info['velodyne_path']\n        v_path = Path(data_path) / v_path\n        points_v = np.fromfile(\n            str(v_path), dtype=np.float32,\n            count=-1).reshape([-1, num_features])\n        rect = calib['R0_rect']\n        if front_camera_id == 2:\n            P2 = calib['P2']\n        else:\n            P2 = calib[f'P{str(front_camera_id)}']\n        Trv2c = calib['Tr_velo_to_cam']\n        # first remove z < 0 points\n        # keep = points_v[:, -1] > 0\n        # points_v = points_v[keep]\n        # then remove outside.\n        if back:\n            points_v[:, 0] = -points_v[:, 0]\n        points_v = box_np_ops.remove_outside_points(points_v, rect, Trv2c, P2,\n                                                    image_info['image_shape'])\n        if save_path is None:\n            save_dir = v_path.parent.parent / (v_path.parent.stem + '_reduced')\n            if not save_dir.exists():\n                save_dir.mkdir()\n            save_filename = save_dir / v_path.name\n            # save_filename = str(v_path) + '_reduced'\n            if back:\n                save_filename += '_back'\n        else:\n            save_filename = str(Path(save_path) / v_path.name)\n            if back:\n                save_filename += '_back'\n        with open(save_filename, 'w') as f:\n            points_v.tofile(f)\n\n\ndef create_reduced_point_cloud(data_path,\n                               pkl_prefix,\n                               train_info_path=None,\n                               val_info_path=None,\n                               test_info_path=None,\n                               save_path=None,\n                               with_back=False):\n    \"\"\"Create reduced point clouds for training/validation/testing.\n\n    Args:\n        data_path (str): Path of original data.\n        pkl_prefix (str): Prefix of info files.\n        train_info_path (str | None): Path of training set info.\n            Default: None.\n        val_info_path (str | None): Path of validation set info.\n            Default: None.\n        test_info_path (str | None): Path of test set info.\n            Default: None.\n        save_path (str | None): Path to save reduced point cloud data.\n        with_back (bool): Whether to flip the points to back.\n    \"\"\"\n    if train_info_path is None:\n        train_info_path = Path(data_path) / f'{pkl_prefix}_infos_train.pkl'\n    if val_info_path is None:\n        val_info_path = Path(data_path) / f'{pkl_prefix}_infos_val.pkl'\n    if test_info_path is None:\n        test_info_path = Path(data_path) / f'{pkl_prefix}_infos_test.pkl'\n\n    print('create reduced point cloud for training set')\n    _create_reduced_point_cloud(data_path, train_info_path, save_path)\n    print('create reduced point cloud for validation set')\n    _create_reduced_point_cloud(data_path, val_info_path, save_path)\n    print('create reduced point cloud for testing set')\n    _create_reduced_point_cloud(data_path, test_info_path, save_path)\n    if with_back:\n        _create_reduced_point_cloud(\n            data_path, train_info_path, save_path, back=True)\n        _create_reduced_point_cloud(\n            data_path, val_info_path, save_path, back=True)\n        _create_reduced_point_cloud(\n            data_path, test_info_path, save_path, back=True)\n"
  },
  {
    "path": "tools/data_converter/kitti_data_utils.py",
    "content": "import numpy as np\nfrom collections import OrderedDict\nfrom concurrent import futures as futures\nfrom os import path as osp\nfrom pathlib import Path\nfrom skimage import io\n\n\ndef get_image_index_str(img_idx, use_prefix_id=False):\n    if use_prefix_id:\n        return '{:07d}'.format(img_idx)\n    else:\n        return '{:06d}'.format(img_idx)\n\n\ndef get_kitti_info_path(idx,\n                        prefix,\n                        info_type='image_2',\n                        file_tail='.png',\n                        training=True,\n                        relative_path=True,\n                        exist_check=True,\n                        use_prefix_id=False):\n    img_idx_str = get_image_index_str(idx, use_prefix_id)\n    img_idx_str += file_tail\n    prefix = Path(prefix)\n    if training:\n        file_path = Path('training') / info_type / img_idx_str\n    else:\n        file_path = Path('testing') / info_type / img_idx_str\n    if exist_check and not (prefix / file_path).exists():\n        raise ValueError('file not exist: {}'.format(file_path))\n    if relative_path:\n        return str(file_path)\n    else:\n        return str(prefix / file_path)\n\n\ndef get_image_path(idx,\n                   prefix,\n                   training=True,\n                   relative_path=True,\n                   exist_check=True,\n                   info_type='image_2',\n                   use_prefix_id=False):\n    return get_kitti_info_path(idx, prefix, info_type, '.png', training,\n                               relative_path, exist_check, use_prefix_id)\n\n\ndef get_label_path(idx,\n                   prefix,\n                   training=True,\n                   relative_path=True,\n                   exist_check=True,\n                   info_type='label_2',\n                   use_prefix_id=False):\n    return get_kitti_info_path(idx, prefix, info_type, '.txt', training,\n                               relative_path, exist_check, use_prefix_id)\n\n\ndef get_velodyne_path(idx,\n                      prefix,\n                      training=True,\n                      relative_path=True,\n                      exist_check=True,\n                      use_prefix_id=False):\n    return get_kitti_info_path(idx, prefix, 'velodyne', '.bin', training,\n                               relative_path, exist_check, use_prefix_id)\n\n\ndef get_calib_path(idx,\n                   prefix,\n                   training=True,\n                   relative_path=True,\n                   exist_check=True,\n                   use_prefix_id=False):\n    return get_kitti_info_path(idx, prefix, 'calib', '.txt', training,\n                               relative_path, exist_check, use_prefix_id)\n\n\ndef get_pose_path(idx,\n                  prefix,\n                  training=True,\n                  relative_path=True,\n                  exist_check=True,\n                  use_prefix_id=False):\n    return get_kitti_info_path(idx, prefix, 'pose', '.txt', training,\n                               relative_path, exist_check, use_prefix_id)\n\n\ndef get_label_anno(label_path):\n    annotations = {}\n    annotations.update({\n        'name': [],\n        'truncated': [],\n        'occluded': [],\n        'alpha': [],\n        'bbox': [],\n        'dimensions': [],\n        'location': [],\n        'rotation_y': []\n    })\n    with open(label_path, 'r') as f:\n        lines = f.readlines()\n    # if len(lines) == 0 or len(lines[0]) < 15:\n    #     content = []\n    # else:\n    content = [line.strip().split(' ') for line in lines]\n    num_objects = len([x[0] for x in content if x[0] != 'DontCare'])\n    annotations['name'] = np.array([x[0] for x in content])\n    num_gt = len(annotations['name'])\n    annotations['truncated'] = np.array([float(x[1]) for x in content])\n    annotations['occluded'] = np.array([int(x[2]) for x in content])\n    annotations['alpha'] = np.array([float(x[3]) for x in content])\n    annotations['bbox'] = np.array([[float(info) for info in x[4:8]]\n                                    for x in content]).reshape(-1, 4)\n    # dimensions will convert hwl format to standard lhw(camera) format.\n    annotations['dimensions'] = np.array([[float(info) for info in x[8:11]]\n                                          for x in content\n                                          ]).reshape(-1, 3)[:, [2, 0, 1]]\n    annotations['location'] = np.array([[float(info) for info in x[11:14]]\n                                        for x in content]).reshape(-1, 3)\n    annotations['rotation_y'] = np.array([float(x[14])\n                                          for x in content]).reshape(-1)\n    if len(content) != 0 and len(content[0]) == 16:  # have score\n        annotations['score'] = np.array([float(x[15]) for x in content])\n    else:\n        annotations['score'] = np.zeros((annotations['bbox'].shape[0], ))\n    index = list(range(num_objects)) + [-1] * (num_gt - num_objects)\n    annotations['index'] = np.array(index, dtype=np.int32)\n    annotations['group_ids'] = np.arange(num_gt, dtype=np.int32)\n    return annotations\n\n\ndef _extend_matrix(mat):\n    mat = np.concatenate([mat, np.array([[0., 0., 0., 1.]])], axis=0)\n    return mat\n\n\ndef get_kitti_image_info(path,\n                         training=True,\n                         label_info=True,\n                         velodyne=False,\n                         calib=False,\n                         image_ids=7481,\n                         extend_matrix=True,\n                         num_worker=8,\n                         relative_path=True,\n                         with_imageshape=True):\n    \"\"\"\n    KITTI annotation format version 2:\n    {\n        [optional]points: [N, 3+] point cloud\n        [optional, for kitti]image: {\n            image_idx: ...\n            image_path: ...\n            image_shape: ...\n        }\n        point_cloud: {\n            num_features: 4\n            velodyne_path: ...\n        }\n        [optional, for kitti]calib: {\n            R0_rect: ...\n            Tr_velo_to_cam: ...\n            P2: ...\n        }\n        annos: {\n            location: [num_gt, 3] array\n            dimensions: [num_gt, 3] array\n            rotation_y: [num_gt] angle array\n            name: [num_gt] ground truth name array\n            [optional]difficulty: kitti difficulty\n            [optional]group_ids: used for multi-part object\n        }\n    }\n    \"\"\"\n    root_path = Path(path)\n    if not isinstance(image_ids, list):\n        image_ids = list(range(image_ids))\n\n    def map_func(idx):\n        info = {}\n        pc_info = {'num_features': 4}\n        calib_info = {}\n\n        image_info = {'image_idx': idx}\n        annotations = None\n        if velodyne:\n            pc_info['velodyne_path'] = get_velodyne_path(\n                idx, path, training, relative_path)\n        image_info['image_path'] = get_image_path(idx, path, training,\n                                                  relative_path)\n        if with_imageshape:\n            img_path = image_info['image_path']\n            if relative_path:\n                img_path = str(root_path / img_path)\n            image_info['image_shape'] = np.array(\n                io.imread(img_path).shape[:2], dtype=np.int32)\n        if label_info:\n            label_path = get_label_path(idx, path, training, relative_path)\n            if relative_path:\n                label_path = str(root_path / label_path)\n            annotations = get_label_anno(label_path)\n        info['image'] = image_info\n        info['point_cloud'] = pc_info\n        if calib:\n            calib_path = get_calib_path(\n                idx, path, training, relative_path=False)\n            with open(calib_path, 'r') as f:\n                lines = f.readlines()\n            P0 = np.array([float(info) for info in lines[0].split(' ')[1:13]\n                           ]).reshape([3, 4])\n            P1 = np.array([float(info) for info in lines[1].split(' ')[1:13]\n                           ]).reshape([3, 4])\n            P2 = np.array([float(info) for info in lines[2].split(' ')[1:13]\n                           ]).reshape([3, 4])\n            P3 = np.array([float(info) for info in lines[3].split(' ')[1:13]\n                           ]).reshape([3, 4])\n            if extend_matrix:\n                P0 = _extend_matrix(P0)\n                P1 = _extend_matrix(P1)\n                P2 = _extend_matrix(P2)\n                P3 = _extend_matrix(P3)\n            R0_rect = np.array([\n                float(info) for info in lines[4].split(' ')[1:10]\n            ]).reshape([3, 3])\n            if extend_matrix:\n                rect_4x4 = np.zeros([4, 4], dtype=R0_rect.dtype)\n                rect_4x4[3, 3] = 1.\n                rect_4x4[:3, :3] = R0_rect\n            else:\n                rect_4x4 = R0_rect\n\n            Tr_velo_to_cam = np.array([\n                float(info) for info in lines[5].split(' ')[1:13]\n            ]).reshape([3, 4])\n            Tr_imu_to_velo = np.array([\n                float(info) for info in lines[6].split(' ')[1:13]\n            ]).reshape([3, 4])\n            if extend_matrix:\n                Tr_velo_to_cam = _extend_matrix(Tr_velo_to_cam)\n                Tr_imu_to_velo = _extend_matrix(Tr_imu_to_velo)\n            calib_info['P0'] = P0\n            calib_info['P1'] = P1\n            calib_info['P2'] = P2\n            calib_info['P3'] = P3\n            calib_info['R0_rect'] = rect_4x4\n            calib_info['Tr_velo_to_cam'] = Tr_velo_to_cam\n            calib_info['Tr_imu_to_velo'] = Tr_imu_to_velo\n            info['calib'] = calib_info\n\n        if annotations is not None:\n            info['annos'] = annotations\n            add_difficulty_to_annos(info)\n        return info\n\n    with futures.ThreadPoolExecutor(num_worker) as executor:\n        image_infos = executor.map(map_func, image_ids)\n\n    return list(image_infos)\n\n\ndef get_waymo_image_info(path,\n                         training=True,\n                         label_info=True,\n                         velodyne=False,\n                         calib=False,\n                         pose=False,\n                         image_ids=7481,\n                         extend_matrix=True,\n                         num_worker=8,\n                         relative_path=True,\n                         with_imageshape=True,\n                         max_sweeps=5):\n    \"\"\"\n    Waymo annotation format version like KITTI:\n    {\n        [optional]points: [N, 3+] point cloud\n        [optional, for kitti]image: {\n            image_idx: ...\n            image_path: ...\n            image_shape: ...\n        }\n        point_cloud: {\n            num_features: 6\n            velodyne_path: ...\n        }\n        [optional, for kitti]calib: {\n            R0_rect: ...\n            Tr_velo_to_cam0: ...\n            P0: ...\n        }\n        annos: {\n            location: [num_gt, 3] array\n            dimensions: [num_gt, 3] array\n            rotation_y: [num_gt] angle array\n            name: [num_gt] ground truth name array\n            [optional]difficulty: kitti difficulty\n            [optional]group_ids: used for multi-part object\n        }\n    }\n    \"\"\"\n    root_path = Path(path)\n    if not isinstance(image_ids, list):\n        image_ids = list(range(image_ids))\n\n    def map_func(idx):\n        info = {}\n        pc_info = {'num_features': 6}\n        calib_info = {}\n\n        image_info = {'image_idx': idx}\n        annotations = None\n        if velodyne:\n            pc_info['velodyne_path'] = get_velodyne_path(\n                idx, path, training, relative_path, use_prefix_id=True)\n            points = np.fromfile(\n                Path(path) / pc_info['velodyne_path'], dtype=np.float32)\n            points = np.copy(points).reshape(-1, pc_info['num_features'])\n            info['timestamp'] = np.int64(points[0, -1])\n            # values of the last dim are all the timestamp\n        image_info['image_path'] = get_image_path(\n            idx,\n            path,\n            training,\n            relative_path,\n            info_type='image_0',\n            use_prefix_id=True)\n        if with_imageshape:\n            img_path = image_info['image_path']\n            if relative_path:\n                img_path = str(root_path / img_path)\n            image_info['image_shape'] = np.array(\n                io.imread(img_path).shape[:2], dtype=np.int32)\n        if label_info:\n            label_path = get_label_path(\n                idx,\n                path,\n                training,\n                relative_path,\n                info_type='label_all',\n                use_prefix_id=True)\n            if relative_path:\n                label_path = str(root_path / label_path)\n            annotations = get_label_anno(label_path)\n        info['image'] = image_info\n        info['point_cloud'] = pc_info\n        if calib:\n            calib_path = get_calib_path(\n                idx, path, training, relative_path=False, use_prefix_id=True)\n            with open(calib_path, 'r') as f:\n                lines = f.readlines()\n            P0 = np.array([float(info) for info in lines[0].split(' ')[1:13]\n                           ]).reshape([3, 4])\n            P1 = np.array([float(info) for info in lines[1].split(' ')[1:13]\n                           ]).reshape([3, 4])\n            P2 = np.array([float(info) for info in lines[2].split(' ')[1:13]\n                           ]).reshape([3, 4])\n            P3 = np.array([float(info) for info in lines[3].split(' ')[1:13]\n                           ]).reshape([3, 4])\n            P4 = np.array([float(info) for info in lines[4].split(' ')[1:13]\n                           ]).reshape([3, 4])\n            if extend_matrix:\n                P0 = _extend_matrix(P0)\n                P1 = _extend_matrix(P1)\n                P2 = _extend_matrix(P2)\n                P3 = _extend_matrix(P3)\n                P4 = _extend_matrix(P4)\n            R0_rect = np.array([\n                float(info) for info in lines[5].split(' ')[1:10]\n            ]).reshape([3, 3])\n            if extend_matrix:\n                rect_4x4 = np.zeros([4, 4], dtype=R0_rect.dtype)\n                rect_4x4[3, 3] = 1.\n                rect_4x4[:3, :3] = R0_rect\n            else:\n                rect_4x4 = R0_rect\n\n            Tr_velo_to_cam = np.array([\n                float(info) for info in lines[6].split(' ')[1:13]\n            ]).reshape([3, 4])\n            if extend_matrix:\n                Tr_velo_to_cam = _extend_matrix(Tr_velo_to_cam)\n            calib_info['P0'] = P0\n            calib_info['P1'] = P1\n            calib_info['P2'] = P2\n            calib_info['P3'] = P3\n            calib_info['P4'] = P4\n            calib_info['R0_rect'] = rect_4x4\n            calib_info['Tr_velo_to_cam'] = Tr_velo_to_cam\n            info['calib'] = calib_info\n        if pose:\n            pose_path = get_pose_path(\n                idx, path, training, relative_path=False, use_prefix_id=True)\n            info['pose'] = np.loadtxt(pose_path)\n\n        if annotations is not None:\n            info['annos'] = annotations\n            info['annos']['camera_id'] = info['annos'].pop('score')\n            add_difficulty_to_annos(info)\n\n        sweeps = []\n        prev_idx = idx\n        while len(sweeps) < max_sweeps:\n            prev_info = {}\n            prev_idx -= 1\n            prev_info['velodyne_path'] = get_velodyne_path(\n                prev_idx,\n                path,\n                training,\n                relative_path,\n                exist_check=False,\n                use_prefix_id=True)\n            if_prev_exists = osp.exists(\n                Path(path) / prev_info['velodyne_path'])\n            if if_prev_exists:\n                prev_points = np.fromfile(\n                    Path(path) / prev_info['velodyne_path'], dtype=np.float32)\n                prev_points = np.copy(prev_points).reshape(\n                    -1, pc_info['num_features'])\n                prev_info['timestamp'] = np.int64(prev_points[0, -1])\n                prev_pose_path = get_pose_path(\n                    prev_idx,\n                    path,\n                    training,\n                    relative_path=False,\n                    use_prefix_id=True)\n                prev_info['pose'] = np.loadtxt(prev_pose_path)\n                sweeps.append(prev_info)\n            else:\n                break\n        info['sweeps'] = sweeps\n\n        return info\n\n    with futures.ThreadPoolExecutor(num_worker) as executor:\n        image_infos = executor.map(map_func, image_ids)\n\n    return list(image_infos)\n\n\ndef kitti_anno_to_label_file(annos, folder):\n    folder = Path(folder)\n    for anno in annos:\n        image_idx = anno['metadata']['image_idx']\n        label_lines = []\n        for j in range(anno['bbox'].shape[0]):\n            label_dict = {\n                'name': anno['name'][j],\n                'alpha': anno['alpha'][j],\n                'bbox': anno['bbox'][j],\n                'location': anno['location'][j],\n                'dimensions': anno['dimensions'][j],\n                'rotation_y': anno['rotation_y'][j],\n                'score': anno['score'][j],\n            }\n            label_line = kitti_result_line(label_dict)\n            label_lines.append(label_line)\n        label_file = folder / f'{get_image_index_str(image_idx)}.txt'\n        label_str = '\\n'.join(label_lines)\n        with open(label_file, 'w') as f:\n            f.write(label_str)\n\n\ndef add_difficulty_to_annos(info):\n    min_height = [40, 25,\n                  25]  # minimum height for evaluated groundtruth/detections\n    max_occlusion = [\n        0, 1, 2\n    ]  # maximum occlusion level of the groundtruth used for evaluation\n    max_trunc = [\n        0.15, 0.3, 0.5\n    ]  # maximum truncation level of the groundtruth used for evaluation\n    annos = info['annos']\n    dims = annos['dimensions']  # lhw format\n    bbox = annos['bbox']\n    height = bbox[:, 3] - bbox[:, 1]\n    occlusion = annos['occluded']\n    truncation = annos['truncated']\n    diff = []\n    easy_mask = np.ones((len(dims), ), dtype=np.bool)\n    moderate_mask = np.ones((len(dims), ), dtype=np.bool)\n    hard_mask = np.ones((len(dims), ), dtype=np.bool)\n    i = 0\n    for h, o, t in zip(height, occlusion, truncation):\n        if o > max_occlusion[0] or h <= min_height[0] or t > max_trunc[0]:\n            easy_mask[i] = False\n        if o > max_occlusion[1] or h <= min_height[1] or t > max_trunc[1]:\n            moderate_mask[i] = False\n        if o > max_occlusion[2] or h <= min_height[2] or t > max_trunc[2]:\n            hard_mask[i] = False\n        i += 1\n    is_easy = easy_mask\n    is_moderate = np.logical_xor(easy_mask, moderate_mask)\n    is_hard = np.logical_xor(hard_mask, moderate_mask)\n\n    for i in range(len(dims)):\n        if is_easy[i]:\n            diff.append(0)\n        elif is_moderate[i]:\n            diff.append(1)\n        elif is_hard[i]:\n            diff.append(2)\n        else:\n            diff.append(-1)\n    annos['difficulty'] = np.array(diff, np.int32)\n    return diff\n\n\ndef kitti_result_line(result_dict, precision=4):\n    prec_float = '{' + ':.{}f'.format(precision) + '}'\n    res_line = []\n    all_field_default = OrderedDict([\n        ('name', None),\n        ('truncated', -1),\n        ('occluded', -1),\n        ('alpha', -10),\n        ('bbox', None),\n        ('dimensions', [-1, -1, -1]),\n        ('location', [-1000, -1000, -1000]),\n        ('rotation_y', -10),\n        ('score', 0.0),\n    ])\n    res_dict = [(key, None) for key, val in all_field_default.items()]\n    res_dict = OrderedDict(res_dict)\n    for key, val in result_dict.items():\n        if all_field_default[key] is None and val is None:\n            raise ValueError('you must specify a value for {}'.format(key))\n        res_dict[key] = val\n\n    for key, val in res_dict.items():\n        if key == 'name':\n            res_line.append(val)\n        elif key in ['truncated', 'alpha', 'rotation_y', 'score']:\n            if val is None:\n                res_line.append(str(all_field_default[key]))\n            else:\n                res_line.append(prec_float.format(val))\n        elif key == 'occluded':\n            if val is None:\n                res_line.append(str(all_field_default[key]))\n            else:\n                res_line.append('{}'.format(val))\n        elif key in ['bbox', 'dimensions', 'location']:\n            if val is None:\n                res_line += [str(v) for v in all_field_default[key]]\n            else:\n                res_line += [prec_float.format(v) for v in val]\n        else:\n            raise ValueError('unknown key. supported key:{}'.format(\n                res_dict.keys()))\n    return ' '.join(res_line)\n"
  },
  {
    "path": "tools/data_converter/lyft_converter.py",
    "content": "import mmcv\nimport numpy as np\nimport os\nfrom lyft_dataset_sdk.lyftdataset import LyftDataset as Lyft\nfrom os import path as osp\nfrom pyquaternion import Quaternion\n\nfrom mmdet3d.datasets import LyftDataset\nfrom .nuscenes_converter import (get_2d_boxes, get_available_scenes,\n                                 obtain_sensor2top)\n\nlyft_categories = ('car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle',\n                   'motorcycle', 'bicycle', 'pedestrian', 'animal')\n\n\ndef create_lyft_infos(root_path,\n                      info_prefix,\n                      version='v1.01-train',\n                      max_sweeps=10):\n    \"\"\"Create info file of lyft dataset.\n\n    Given the raw data, generate its related info file in pkl format.\n\n    Args:\n        root_path (str): Path of the data root.\n        info_prefix (str): Prefix of the info file to be generated.\n        version (str): Version of the data.\n            Default: 'v1.01-train'\n        max_sweeps (int): Max number of sweeps.\n            Default: 10\n    \"\"\"\n    lyft = Lyft(\n        data_path=osp.join(root_path, version),\n        json_path=osp.join(root_path, version, version),\n        verbose=True)\n    available_vers = ['v1.01-train', 'v1.01-test']\n    assert version in available_vers\n    if version == 'v1.01-train':\n        train_scenes = mmcv.list_from_file('data/lyft/train.txt')\n        val_scenes = mmcv.list_from_file('data/lyft/val.txt')\n    elif version == 'v1.01-test':\n        train_scenes = mmcv.list_from_file('data/lyft/test.txt')\n        val_scenes = []\n    else:\n        raise ValueError('unknown')\n\n    # filter existing scenes.\n    available_scenes = get_available_scenes(lyft)\n    available_scene_names = [s['name'] for s in available_scenes]\n    train_scenes = list(\n        filter(lambda x: x in available_scene_names, train_scenes))\n    val_scenes = list(filter(lambda x: x in available_scene_names, val_scenes))\n    train_scenes = set([\n        available_scenes[available_scene_names.index(s)]['token']\n        for s in train_scenes\n    ])\n    val_scenes = set([\n        available_scenes[available_scene_names.index(s)]['token']\n        for s in val_scenes\n    ])\n\n    test = 'test' in version\n    if test:\n        print(f'test scene: {len(train_scenes)}')\n    else:\n        print(f'train scene: {len(train_scenes)}, \\\n                val scene: {len(val_scenes)}')\n    train_lyft_infos, val_lyft_infos = _fill_trainval_infos(\n        lyft, train_scenes, val_scenes, test, max_sweeps=max_sweeps)\n\n    metadata = dict(version=version)\n    if test:\n        print(f'test sample: {len(train_lyft_infos)}')\n        data = dict(infos=train_lyft_infos, metadata=metadata)\n        info_name = f'{info_prefix}_infos_test'\n        info_path = osp.join(root_path, f'{info_name}.pkl')\n        mmcv.dump(data, info_path)\n    else:\n        print(f'train sample: {len(train_lyft_infos)}, \\\n                val sample: {len(val_lyft_infos)}')\n        data = dict(infos=train_lyft_infos, metadata=metadata)\n        train_info_name = f'{info_prefix}_infos_train'\n        info_path = osp.join(root_path, f'{train_info_name}.pkl')\n        mmcv.dump(data, info_path)\n        data['infos'] = val_lyft_infos\n        val_info_name = f'{info_prefix}_infos_val'\n        info_val_path = osp.join(root_path, f'{val_info_name}.pkl')\n        mmcv.dump(data, info_val_path)\n\n\ndef _fill_trainval_infos(lyft,\n                         train_scenes,\n                         val_scenes,\n                         test=False,\n                         max_sweeps=10):\n    \"\"\"Generate the train/val infos from the raw data.\n\n    Args:\n        lyft (:obj:`LyftDataset`): Dataset class in the Lyft dataset.\n        train_scenes (list[str]): Basic information of training scenes.\n        val_scenes (list[str]): Basic information of validation scenes.\n        test (bool): Whether use the test mode. In the test mode, no\n            annotations can be accessed. Default: False.\n        max_sweeps (int): Max number of sweeps. Default: 10.\n\n    Returns:\n        tuple[list[dict]]: Information of training set and\n            validation set that will be saved to the info file.\n    \"\"\"\n    train_lyft_infos = []\n    val_lyft_infos = []\n\n    for sample in mmcv.track_iter_progress(lyft.sample):\n        lidar_token = sample['data']['LIDAR_TOP']\n        sd_rec = lyft.get('sample_data', sample['data']['LIDAR_TOP'])\n        cs_record = lyft.get('calibrated_sensor',\n                             sd_rec['calibrated_sensor_token'])\n        pose_record = lyft.get('ego_pose', sd_rec['ego_pose_token'])\n        abs_lidar_path, boxes, _ = lyft.get_sample_data(lidar_token)\n        # nuScenes devkit returns more convenient relative paths while\n        # lyft devkit returns absolute paths\n        abs_lidar_path = str(abs_lidar_path)  # absolute path\n        lidar_path = abs_lidar_path.split(f'{os.getcwd()}/')[-1]\n        # relative path\n\n        mmcv.check_file_exist(lidar_path)\n\n        info = {\n            'lidar_path': lidar_path,\n            'token': sample['token'],\n            'sweeps': [],\n            'cams': dict(),\n            'lidar2ego_translation': cs_record['translation'],\n            'lidar2ego_rotation': cs_record['rotation'],\n            'ego2global_translation': pose_record['translation'],\n            'ego2global_rotation': pose_record['rotation'],\n            'timestamp': sample['timestamp'],\n        }\n\n        l2e_r = info['lidar2ego_rotation']\n        l2e_t = info['lidar2ego_translation']\n        e2g_r = info['ego2global_rotation']\n        e2g_t = info['ego2global_translation']\n        l2e_r_mat = Quaternion(l2e_r).rotation_matrix\n        e2g_r_mat = Quaternion(e2g_r).rotation_matrix\n\n        # obtain 6 image's information per frame\n        camera_types = [\n            'CAM_FRONT',\n            'CAM_FRONT_RIGHT',\n            'CAM_FRONT_LEFT',\n            'CAM_BACK',\n            'CAM_BACK_LEFT',\n            'CAM_BACK_RIGHT',\n        ]\n        for cam in camera_types:\n            cam_token = sample['data'][cam]\n            cam_path, _, cam_intrinsic = lyft.get_sample_data(cam_token)\n            cam_info = obtain_sensor2top(lyft, cam_token, l2e_t, l2e_r_mat,\n                                         e2g_t, e2g_r_mat, cam)\n            cam_info.update(cam_intrinsic=cam_intrinsic)\n            info['cams'].update({cam: cam_info})\n\n        # obtain sweeps for a single key-frame\n        sd_rec = lyft.get('sample_data', sample['data']['LIDAR_TOP'])\n        sweeps = []\n        while len(sweeps) < max_sweeps:\n            if not sd_rec['prev'] == '':\n                sweep = obtain_sensor2top(lyft, sd_rec['prev'], l2e_t,\n                                          l2e_r_mat, e2g_t, e2g_r_mat, 'lidar')\n                sweeps.append(sweep)\n                sd_rec = lyft.get('sample_data', sd_rec['prev'])\n            else:\n                break\n        info['sweeps'] = sweeps\n        # obtain annotation\n        if not test:\n            annotations = [\n                lyft.get('sample_annotation', token)\n                for token in sample['anns']\n            ]\n            locs = np.array([b.center for b in boxes]).reshape(-1, 3)\n            dims = np.array([b.wlh for b in boxes]).reshape(-1, 3)\n            rots = np.array([b.orientation.yaw_pitch_roll[0]\n                             for b in boxes]).reshape(-1, 1)\n\n            names = [b.name for b in boxes]\n            for i in range(len(names)):\n                if names[i] in LyftDataset.NameMapping:\n                    names[i] = LyftDataset.NameMapping[names[i]]\n            names = np.array(names)\n\n            # we need to convert rot to SECOND format.\n            gt_boxes = np.concatenate([locs, dims, -rots - np.pi / 2], axis=1)\n            assert len(gt_boxes) == len(\n                annotations), f'{len(gt_boxes)}, {len(annotations)}'\n            info['gt_boxes'] = gt_boxes\n            info['gt_names'] = names\n            info['num_lidar_pts'] = np.array(\n                [a['num_lidar_pts'] for a in annotations])\n            info['num_radar_pts'] = np.array(\n                [a['num_radar_pts'] for a in annotations])\n\n        if sample['scene_token'] in train_scenes:\n            train_lyft_infos.append(info)\n        else:\n            val_lyft_infos.append(info)\n\n    return train_lyft_infos, val_lyft_infos\n\n\ndef export_2d_annotation(root_path, info_path, version):\n    \"\"\"Export 2d annotation from the info file and raw data.\n\n    Args:\n        root_path (str): Root path of the raw data.\n        info_path (str): Path of the info file.\n        version (str): Dataset version.\n    \"\"\"\n    # get bbox annotations for camera\n    camera_types = [\n        'CAM_FRONT',\n        'CAM_FRONT_RIGHT',\n        'CAM_FRONT_LEFT',\n        'CAM_BACK',\n        'CAM_BACK_LEFT',\n        'CAM_BACK_RIGHT',\n    ]\n    lyft_infos = mmcv.load(info_path)['infos']\n    lyft = Lyft(\n        data_path=osp.join(root_path, version),\n        json_path=osp.join(root_path, version, version),\n        verbose=True)\n    # info_2d_list = []\n    cat2Ids = [\n        dict(id=lyft_categories.index(cat_name), name=cat_name)\n        for cat_name in lyft_categories\n    ]\n    coco_ann_id = 0\n    coco_2d_dict = dict(annotations=[], images=[], categories=cat2Ids)\n    for info in mmcv.track_iter_progress(lyft_infos):\n        for cam in camera_types:\n            cam_info = info['cams'][cam]\n            coco_infos = get_2d_boxes(\n                lyft,\n                cam_info['sample_data_token'],\n                visibilities=['', '1', '2', '3', '4'])\n            (height, width, _) = mmcv.imread(cam_info['data_path']).shape\n            coco_2d_dict['images'].append(\n                dict(\n                    file_name=cam_info['data_path'],\n                    id=cam_info['sample_data_token'],\n                    width=width,\n                    height=height))\n            for coco_info in coco_infos:\n                if coco_info is None:\n                    continue\n                # add an empty key for coco format\n                coco_info['segmentation'] = []\n                coco_info['id'] = coco_ann_id\n                coco_2d_dict['annotations'].append(coco_info)\n                coco_ann_id += 1\n    mmcv.dump(coco_2d_dict, f'{info_path[:-4]}.coco.json')\n"
  },
  {
    "path": "tools/data_converter/nuimage_converter.py",
    "content": "import argparse\nimport base64\nimport mmcv\nimport numpy as np\nfrom nuimages import NuImages\nfrom nuimages.utils.utils import mask_decode, name_to_index_mapping\nfrom os import path as osp\n\nnus_categories = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',\n                  'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',\n                  'barrier')\n\nNAME_MAPPING = {\n    'movable_object.barrier': 'barrier',\n    'vehicle.bicycle': 'bicycle',\n    'vehicle.bus.bendy': 'bus',\n    'vehicle.bus.rigid': 'bus',\n    'vehicle.car': 'car',\n    'vehicle.construction': 'construction_vehicle',\n    'vehicle.motorcycle': 'motorcycle',\n    'human.pedestrian.adult': 'pedestrian',\n    'human.pedestrian.child': 'pedestrian',\n    'human.pedestrian.construction_worker': 'pedestrian',\n    'human.pedestrian.police_officer': 'pedestrian',\n    'movable_object.trafficcone': 'traffic_cone',\n    'vehicle.trailer': 'trailer',\n    'vehicle.truck': 'truck',\n}\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='Data converter arg parser')\n    parser.add_argument(\n        '--data-root',\n        type=str,\n        default='./data/nuimages',\n        help='specify the root path of dataset')\n    parser.add_argument(\n        '--version',\n        type=str,\n        nargs='+',\n        default=['v1.0-mini'],\n        required=False,\n        help='specify the dataset version')\n    parser.add_argument(\n        '--out-dir',\n        type=str,\n        default='./data/nuimages/annotations/',\n        required=False,\n        help='path to save the exported json')\n    parser.add_argument(\n        '--nproc',\n        type=int,\n        default=4,\n        required=False,\n        help='workers to process semantic masks')\n    parser.add_argument('--extra-tag', type=str, default='nuimages')\n    args = parser.parse_args()\n    return args\n\n\ndef get_img_annos(nuim, img_info, cat2id, out_dir, data_root, seg_root):\n    \"\"\"Get semantic segmentation map for an image.\n\n    Args:\n        nuim (obj:`NuImages`): NuImages dataset object\n        img_info (dict): Meta information of img\n\n    Returns:\n        np.ndarray: Semantic segmentation map of the image\n    \"\"\"\n    sd_token = img_info['token']\n    image_id = img_info['id']\n    name_to_index = name_to_index_mapping(nuim.category)\n\n    # Get image data.\n    width, height = img_info['width'], img_info['height']\n    semseg_mask = np.zeros((height, width)).astype('uint8')\n\n    # Load stuff / surface regions.\n    surface_anns = [\n        o for o in nuim.surface_ann if o['sample_data_token'] == sd_token\n    ]\n\n    # Draw stuff / surface regions.\n    for ann in surface_anns:\n        # Get color and mask.\n        category_token = ann['category_token']\n        category_name = nuim.get('category', category_token)['name']\n        if ann['mask'] is None:\n            continue\n        mask = mask_decode(ann['mask'])\n\n        # Draw mask for semantic segmentation.\n        semseg_mask[mask == 1] = name_to_index[category_name]\n\n    # Load object instances.\n    object_anns = [\n        o for o in nuim.object_ann if o['sample_data_token'] == sd_token\n    ]\n\n    # Sort by token to ensure that objects always appear in the\n    # instance mask in the same order.\n    object_anns = sorted(object_anns, key=lambda k: k['token'])\n\n    # Draw object instances.\n    # The 0 index is reserved for background; thus, the instances\n    # should start from index 1.\n    annotations = []\n    for i, ann in enumerate(object_anns, start=1):\n        # Get color, box, mask and name.\n        category_token = ann['category_token']\n        category_name = nuim.get('category', category_token)['name']\n        if ann['mask'] is None:\n            continue\n        mask = mask_decode(ann['mask'])\n\n        # Draw masks for semantic segmentation and instance segmentation.\n        semseg_mask[mask == 1] = name_to_index[category_name]\n\n        if category_name in NAME_MAPPING:\n            cat_name = NAME_MAPPING[category_name]\n            cat_id = cat2id[cat_name]\n\n            x_min, y_min, x_max, y_max = ann['bbox']\n            # encode calibrated instance mask\n            mask_anno = dict()\n            mask_anno['counts'] = base64.b64decode(\n                ann['mask']['counts']).decode()\n            mask_anno['size'] = ann['mask']['size']\n\n            data_anno = dict(\n                image_id=image_id,\n                category_id=cat_id,\n                bbox=[x_min, y_min, x_max - x_min, y_max - y_min],\n                area=(x_max - x_min) * (y_max - y_min),\n                segmentation=mask_anno,\n                iscrowd=0)\n            annotations.append(data_anno)\n\n    # after process, save semantic masks\n    img_filename = img_info['file_name']\n    seg_filename = img_filename.replace('jpg', 'png')\n    seg_filename = osp.join(seg_root, seg_filename)\n    mmcv.imwrite(semseg_mask, seg_filename)\n    return annotations, np.max(semseg_mask)\n\n\ndef export_nuim_to_coco(nuim, data_root, out_dir, extra_tag, version, nproc):\n    print('Process category information')\n    categories = []\n    categories = [\n        dict(id=nus_categories.index(cat_name), name=cat_name)\n        for cat_name in nus_categories\n    ]\n    cat2id = {k_v['name']: k_v['id'] for k_v in categories}\n\n    images = []\n    print('Process image meta information...')\n    for sample_info in mmcv.track_iter_progress(nuim.sample_data):\n        if sample_info['is_key_frame']:\n            img_idx = len(images)\n            images.append(\n                dict(\n                    id=img_idx,\n                    token=sample_info['token'],\n                    file_name=sample_info['filename'],\n                    width=sample_info['width'],\n                    height=sample_info['height']))\n\n    seg_root = f'{out_dir}semantic_masks'\n    mmcv.mkdir_or_exist(seg_root)\n    mmcv.mkdir_or_exist(osp.join(data_root, 'calibrated'))\n\n    global process_img_anno\n\n    def process_img_anno(img_info):\n        single_img_annos, max_cls_id = get_img_annos(nuim, img_info, cat2id,\n                                                     out_dir, data_root,\n                                                     seg_root)\n        return single_img_annos, max_cls_id\n\n    print('Process img annotations...')\n    if nproc > 1:\n        outputs = mmcv.track_parallel_progress(\n            process_img_anno, images, nproc=nproc)\n    else:\n        outputs = []\n        for img_info in mmcv.track_iter_progress(images):\n            outputs.append(process_img_anno(img_info))\n\n    # Determine the index of object annotation\n    print('Process annotation information...')\n    annotations = []\n    max_cls_ids = []\n    for single_img_annos, max_cls_id in outputs:\n        max_cls_ids.append(max_cls_id)\n        for img_anno in single_img_annos:\n            img_anno.update(id=len(annotations))\n            annotations.append(img_anno)\n\n    max_cls_id = max(max_cls_ids)\n    print(f'Max ID of class in the semantic map: {max_cls_id}')\n\n    coco_format_json = dict(\n        images=images, annotations=annotations, categories=categories)\n\n    mmcv.mkdir_or_exist(out_dir)\n    out_file = osp.join(out_dir, f'{extra_tag}_{version}.json')\n    print(f'Annotation dumped to {out_file}')\n    mmcv.dump(coco_format_json, out_file)\n\n\ndef main():\n    args = parse_args()\n    for version in args.version:\n        nuim = NuImages(\n            dataroot=args.data_root, version=version, verbose=True, lazy=True)\n        export_nuim_to_coco(nuim, args.data_root, args.out_dir, args.extra_tag,\n                            version, args.nproc)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tools/data_converter/nuscenes_converter.py",
    "content": "import mmcv\nimport numpy as np\nimport os\nfrom collections import OrderedDict\nfrom nuscenes.nuscenes import NuScenes\nfrom nuscenes.utils.geometry_utils import view_points\nfrom os import path as osp\nfrom pyquaternion import Quaternion\nfrom shapely.geometry import MultiPoint, box\nfrom typing import List, Tuple, Union\n\nfrom mmdet3d.datasets import NuScenesDataset\n\nnus_categories = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',\n                  'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',\n                  'barrier')\n\n\ndef create_nuscenes_infos(root_path,\n                          info_prefix,\n                          version='v1.0-trainval',\n                          max_sweeps=10):\n    \"\"\"Create info file of nuscene dataset.\n\n    Given the raw data, generate its related info file in pkl format.\n\n    Args:\n        root_path (str): Path of the data root.\n        info_prefix (str): Prefix of the info file to be generated.\n        version (str): Version of the data.\n            Default: 'v1.0-trainval'\n        max_sweeps (int): Max number of sweeps.\n            Default: 10\n    \"\"\"\n    from nuscenes.nuscenes import NuScenes\n    nusc = NuScenes(version=version, dataroot=root_path, verbose=True)\n    from nuscenes.utils import splits\n    available_vers = ['v1.0-trainval', 'v1.0-test', 'v1.0-mini']\n    assert version in available_vers\n    if version == 'v1.0-trainval':\n        train_scenes = splits.train\n        val_scenes = splits.val\n    elif version == 'v1.0-test':\n        train_scenes = splits.test\n        val_scenes = []\n    elif version == 'v1.0-mini':\n        train_scenes = splits.mini_train\n        val_scenes = splits.mini_val\n    else:\n        raise ValueError('unknown')\n\n    # filter existing scenes.\n    available_scenes = get_available_scenes(nusc)\n    available_scene_names = [s['name'] for s in available_scenes]\n    train_scenes = list(\n        filter(lambda x: x in available_scene_names, train_scenes))\n    val_scenes = list(filter(lambda x: x in available_scene_names, val_scenes))\n    train_scenes = set([\n        available_scenes[available_scene_names.index(s)]['token']\n        for s in train_scenes\n    ])\n    val_scenes = set([\n        available_scenes[available_scene_names.index(s)]['token']\n        for s in val_scenes\n    ])\n\n    test = 'test' in version\n    if test:\n        print('test scene: {}'.format(len(train_scenes)))\n    else:\n        print('train scene: {}, val scene: {}'.format(\n            len(train_scenes), len(val_scenes)))\n    train_nusc_infos, val_nusc_infos = _fill_trainval_infos(\n        nusc, train_scenes, val_scenes, test, max_sweeps=max_sweeps)\n\n    metadata = dict(version=version)\n    if test:\n        print('test sample: {}'.format(len(train_nusc_infos)))\n        data = dict(infos=train_nusc_infos, metadata=metadata)\n        info_path = osp.join(root_path,\n                             '{}_infos_test.pkl'.format(info_prefix))\n        mmcv.dump(data, info_path)\n    else:\n        print('train sample: {}, val sample: {}'.format(\n            len(train_nusc_infos), len(val_nusc_infos)))\n        data = dict(infos=train_nusc_infos, metadata=metadata)\n        info_path = osp.join(root_path,\n                             '{}_infos_train.pkl'.format(info_prefix))\n        mmcv.dump(data, info_path)\n        data['infos'] = val_nusc_infos\n        info_val_path = osp.join(root_path,\n                                 '{}_infos_val.pkl'.format(info_prefix))\n        mmcv.dump(data, info_val_path)\n\n\ndef get_available_scenes(nusc):\n    \"\"\"Get available scenes from the input nuscenes class.\n\n    Given the raw data, get the information of available scenes for\n    further info generation.\n\n    Args:\n        nusc (class): Dataset class in the nuScenes dataset.\n\n    Returns:\n        available_scenes (list[dict]): List of basic information for the\n            available scenes.\n    \"\"\"\n    available_scenes = []\n    print('total scene num: {}'.format(len(nusc.scene)))\n    for scene in nusc.scene:\n        scene_token = scene['token']\n        scene_rec = nusc.get('scene', scene_token)\n        sample_rec = nusc.get('sample', scene_rec['first_sample_token'])\n        sd_rec = nusc.get('sample_data', sample_rec['data']['LIDAR_TOP'])\n        has_more_frames = True\n        scene_not_exist = False\n        while has_more_frames:\n            lidar_path, boxes, _ = nusc.get_sample_data(sd_rec['token'])\n            lidar_path = str(lidar_path)\n            if os.getcwd() in lidar_path:\n                # path from lyftdataset is absolute path\n                lidar_path = lidar_path.split(f'{os.getcwd()}/')[-1]\n                # relative path\n            if not mmcv.is_filepath(lidar_path):\n                scene_not_exist = True\n                break\n            else:\n                break\n        if scene_not_exist:\n            continue\n        available_scenes.append(scene)\n    print('exist scene num: {}'.format(len(available_scenes)))\n    return available_scenes\n\n\ndef _fill_trainval_infos(nusc,\n                         train_scenes,\n                         val_scenes,\n                         test=False,\n                         max_sweeps=10):\n    \"\"\"Generate the train/val infos from the raw data.\n\n    Args:\n        nusc (:obj:`NuScenes`): Dataset class in the nuScenes dataset.\n        train_scenes (list[str]): Basic information of training scenes.\n        val_scenes (list[str]): Basic information of validation scenes.\n        test (bool): Whether use the test mode. In the test mode, no\n            annotations can be accessed. Default: False.\n        max_sweeps (int): Max number of sweeps. Default: 10.\n\n    Returns:\n        tuple[list[dict]]: Information of training set and validation set\n            that will be saved to the info file.\n    \"\"\"\n    train_nusc_infos = []\n    val_nusc_infos = []\n\n    for sample in mmcv.track_iter_progress(nusc.sample):\n        lidar_token = sample['data']['LIDAR_TOP']\n        sd_rec = nusc.get('sample_data', sample['data']['LIDAR_TOP'])\n        cs_record = nusc.get('calibrated_sensor',\n                             sd_rec['calibrated_sensor_token'])\n        pose_record = nusc.get('ego_pose', sd_rec['ego_pose_token'])\n        lidar_path, boxes, _ = nusc.get_sample_data(lidar_token)\n\n        mmcv.check_file_exist(lidar_path)\n\n        info = {\n            'lidar_path': lidar_path,\n            'token': sample['token'],\n            'sweeps': [],\n            'cams': dict(),\n            'lidar2ego_translation': cs_record['translation'],\n            'lidar2ego_rotation': cs_record['rotation'],\n            'ego2global_translation': pose_record['translation'],\n            'ego2global_rotation': pose_record['rotation'],\n            'timestamp': sample['timestamp'],\n        }\n\n        l2e_r = info['lidar2ego_rotation']\n        l2e_t = info['lidar2ego_translation']\n        e2g_r = info['ego2global_rotation']\n        e2g_t = info['ego2global_translation']\n        l2e_r_mat = Quaternion(l2e_r).rotation_matrix\n        e2g_r_mat = Quaternion(e2g_r).rotation_matrix\n\n        # obtain 6 image's information per frame\n        camera_types = [\n            'CAM_FRONT',\n            'CAM_FRONT_RIGHT',\n            'CAM_FRONT_LEFT',\n            'CAM_BACK',\n            'CAM_BACK_LEFT',\n            'CAM_BACK_RIGHT',\n        ]\n        for cam in camera_types:\n            cam_token = sample['data'][cam]\n            cam_path, _, cam_intrinsic = nusc.get_sample_data(cam_token)\n            cam_info = obtain_sensor2top(nusc, cam_token, l2e_t, l2e_r_mat,\n                                         e2g_t, e2g_r_mat, cam)\n            cam_info.update(cam_intrinsic=cam_intrinsic)\n            info['cams'].update({cam: cam_info})\n\n        # obtain sweeps for a single key-frame\n        sd_rec = nusc.get('sample_data', sample['data']['LIDAR_TOP'])\n        sweeps = []\n        while len(sweeps) < max_sweeps:\n            if not sd_rec['prev'] == '':\n                sweep = obtain_sensor2top(nusc, sd_rec['prev'], l2e_t,\n                                          l2e_r_mat, e2g_t, e2g_r_mat, 'lidar')\n                sweeps.append(sweep)\n                sd_rec = nusc.get('sample_data', sd_rec['prev'])\n            else:\n                break\n        info['sweeps'] = sweeps\n        # obtain annotation\n        if not test:\n            annotations = [\n                nusc.get('sample_annotation', token)\n                for token in sample['anns']\n            ]\n            locs = np.array([b.center for b in boxes]).reshape(-1, 3)\n            dims = np.array([b.wlh for b in boxes]).reshape(-1, 3)\n            rots = np.array([b.orientation.yaw_pitch_roll[0]\n                             for b in boxes]).reshape(-1, 1)\n            velocity = np.array(\n                [nusc.box_velocity(token)[:2] for token in sample['anns']])\n            valid_flag = np.array(\n                [(anno['num_lidar_pts'] + anno['num_radar_pts']) > 0\n                 for anno in annotations],\n                dtype=bool).reshape(-1)\n            # convert velo from global to lidar\n            for i in range(len(boxes)):\n                velo = np.array([*velocity[i], 0.0])\n                velo = velo @ np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(\n                    l2e_r_mat).T\n                velocity[i] = velo[:2]\n\n            names = [b.name for b in boxes]\n            for i in range(len(names)):\n                if names[i] in NuScenesDataset.NameMapping:\n                    names[i] = NuScenesDataset.NameMapping[names[i]]\n            names = np.array(names)\n            # we need to convert rot to SECOND format.\n            gt_boxes = np.concatenate([locs, dims, -rots - np.pi / 2], axis=1)\n            assert len(gt_boxes) == len(\n                annotations), f'{len(gt_boxes)}, {len(annotations)}'\n            info['gt_boxes'] = gt_boxes\n            info['gt_names'] = names\n            info['gt_velocity'] = velocity.reshape(-1, 2)\n            info['num_lidar_pts'] = np.array(\n                [a['num_lidar_pts'] for a in annotations])\n            info['num_radar_pts'] = np.array(\n                [a['num_radar_pts'] for a in annotations])\n            info['valid_flag'] = valid_flag\n\n        if sample['scene_token'] in train_scenes:\n            train_nusc_infos.append(info)\n        else:\n            val_nusc_infos.append(info)\n\n    return train_nusc_infos, val_nusc_infos\n\n\ndef obtain_sensor2top(nusc,\n                      sensor_token,\n                      l2e_t,\n                      l2e_r_mat,\n                      e2g_t,\n                      e2g_r_mat,\n                      sensor_type='lidar'):\n    \"\"\"Obtain the info with RT matric from general sensor to Top LiDAR.\n\n    Args:\n        nusc (class): Dataset class in the nuScenes dataset.\n        sensor_token (str): Sample data token corresponding to the\n            specific sensor type.\n        l2e_t (np.ndarray): Translation from lidar to ego in shape (1, 3).\n        l2e_r_mat (np.ndarray): Rotation matrix from lidar to ego\n            in shape (3, 3).\n        e2g_t (np.ndarray): Translation from ego to global in shape (1, 3).\n        e2g_r_mat (np.ndarray): Rotation matrix from ego to global\n            in shape (3, 3).\n        sensor_type (str): Sensor to calibrate. Default: 'lidar'.\n\n    Returns:\n        sweep (dict): Sweep information after transformation.\n    \"\"\"\n    sd_rec = nusc.get('sample_data', sensor_token)\n    cs_record = nusc.get('calibrated_sensor',\n                         sd_rec['calibrated_sensor_token'])\n    pose_record = nusc.get('ego_pose', sd_rec['ego_pose_token'])\n    data_path = str(nusc.get_sample_data_path(sd_rec['token']))\n    if os.getcwd() in data_path:  # path from lyftdataset is absolute path\n        data_path = data_path.split(f'{os.getcwd()}/')[-1]  # relative path\n    sweep = {\n        'data_path': data_path,\n        'type': sensor_type,\n        'sample_data_token': sd_rec['token'],\n        'sensor2ego_translation': cs_record['translation'],\n        'sensor2ego_rotation': cs_record['rotation'],\n        'ego2global_translation': pose_record['translation'],\n        'ego2global_rotation': pose_record['rotation'],\n        'timestamp': sd_rec['timestamp']\n    }\n    l2e_r_s = sweep['sensor2ego_rotation']\n    l2e_t_s = sweep['sensor2ego_translation']\n    e2g_r_s = sweep['ego2global_rotation']\n    e2g_t_s = sweep['ego2global_translation']\n\n    # obtain the RT from sensor to Top LiDAR\n    # sweep->ego->global->ego'->lidar\n    l2e_r_s_mat = Quaternion(l2e_r_s).rotation_matrix\n    e2g_r_s_mat = Quaternion(e2g_r_s).rotation_matrix\n    R = (l2e_r_s_mat.T @ e2g_r_s_mat.T) @ (\n        np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T)\n    T = (l2e_t_s @ e2g_r_s_mat.T + e2g_t_s) @ (\n        np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T)\n    T -= e2g_t @ (np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T\n                  ) + l2e_t @ np.linalg.inv(l2e_r_mat).T\n    sweep['sensor2lidar_rotation'] = R.T  # points @ R.T + T\n    sweep['sensor2lidar_translation'] = T\n    return sweep\n\n\ndef export_2d_annotation(root_path, info_path, version):\n    \"\"\"Export 2d annotation from the info file and raw data.\n\n    Args:\n        root_path (str): Root path of the raw data.\n        info_path (str): Path of the info file.\n        version (str): Dataset version.\n    \"\"\"\n    # get bbox annotations for camera\n    camera_types = [\n        'CAM_FRONT',\n        'CAM_FRONT_RIGHT',\n        'CAM_FRONT_LEFT',\n        'CAM_BACK',\n        'CAM_BACK_LEFT',\n        'CAM_BACK_RIGHT',\n    ]\n    nusc_infos = mmcv.load(info_path)['infos']\n    nusc = NuScenes(version=version, dataroot=root_path, verbose=True)\n    # info_2d_list = []\n    cat2Ids = [\n        dict(id=nus_categories.index(cat_name), name=cat_name)\n        for cat_name in nus_categories\n    ]\n    coco_ann_id = 0\n    coco_2d_dict = dict(annotations=[], images=[], categories=cat2Ids)\n    for info in mmcv.track_iter_progress(nusc_infos):\n        for cam in camera_types:\n            cam_info = info['cams'][cam]\n            coco_infos = get_2d_boxes(\n                nusc,\n                cam_info['sample_data_token'],\n                visibilities=['', '1', '2', '3', '4'])\n            (height, width, _) = mmcv.imread(cam_info['data_path']).shape\n            coco_2d_dict['images'].append(\n                dict(\n                    file_name=cam_info['data_path'],\n                    id=cam_info['sample_data_token'],\n                    width=width,\n                    height=height))\n            for coco_info in coco_infos:\n                if coco_info is None:\n                    continue\n                # add an empty key for coco format\n                coco_info['segmentation'] = []\n                coco_info['id'] = coco_ann_id\n                coco_2d_dict['annotations'].append(coco_info)\n                coco_ann_id += 1\n    mmcv.dump(coco_2d_dict, f'{info_path[:-4]}.coco.json')\n\n\ndef get_2d_boxes(nusc, sample_data_token: str,\n                 visibilities: List[str]) -> List[OrderedDict]:\n    \"\"\"Get the 2D annotation records for a given `sample_data_token`.\n\n    Args:\n        sample_data_token: Sample data token belonging to a camera keyframe.\n        visibilities: Visibility filter.\n\n    Return:\n        list[dict]: List of 2D annotation record that belongs to the input\n            `sample_data_token`.\n    \"\"\"\n\n    # Get the sample data and the sample corresponding to that sample data.\n    sd_rec = nusc.get('sample_data', sample_data_token)\n\n    assert sd_rec[\n        'sensor_modality'] == 'camera', 'Error: get_2d_boxes only works' \\\n        ' for camera sample_data!'\n    if not sd_rec['is_key_frame']:\n        raise ValueError(\n            'The 2D re-projections are available only for keyframes.')\n\n    s_rec = nusc.get('sample', sd_rec['sample_token'])\n\n    # Get the calibrated sensor and ego pose\n    # record to get the transformation matrices.\n    cs_rec = nusc.get('calibrated_sensor', sd_rec['calibrated_sensor_token'])\n    pose_rec = nusc.get('ego_pose', sd_rec['ego_pose_token'])\n    camera_intrinsic = np.array(cs_rec['camera_intrinsic'])\n\n    # Get all the annotation with the specified visibilties.\n    ann_recs = [\n        nusc.get('sample_annotation', token) for token in s_rec['anns']\n    ]\n    ann_recs = [\n        ann_rec for ann_rec in ann_recs\n        if (ann_rec['visibility_token'] in visibilities)\n    ]\n\n    repro_recs = []\n\n    for ann_rec in ann_recs:\n        # Augment sample_annotation with token information.\n        ann_rec['sample_annotation_token'] = ann_rec['token']\n        ann_rec['sample_data_token'] = sample_data_token\n\n        # Get the box in global coordinates.\n        box = nusc.get_box(ann_rec['token'])\n\n        # Move them to the ego-pose frame.\n        box.translate(-np.array(pose_rec['translation']))\n        box.rotate(Quaternion(pose_rec['rotation']).inverse)\n\n        # Move them to the calibrated sensor frame.\n        box.translate(-np.array(cs_rec['translation']))\n        box.rotate(Quaternion(cs_rec['rotation']).inverse)\n\n        # Filter out the corners that are not in front of the calibrated\n        # sensor.\n        corners_3d = box.corners()\n        in_front = np.argwhere(corners_3d[2, :] > 0).flatten()\n        corners_3d = corners_3d[:, in_front]\n\n        # Project 3d box to 2d.\n        corner_coords = view_points(corners_3d, camera_intrinsic,\n                                    True).T[:, :2].tolist()\n\n        # Keep only corners that fall within the image.\n        final_coords = post_process_coords(corner_coords)\n\n        # Skip if the convex hull of the re-projected corners\n        # does not intersect the image canvas.\n        if final_coords is None:\n            continue\n        else:\n            min_x, min_y, max_x, max_y = final_coords\n\n        # Generate dictionary record to be included in the .json file.\n        repro_rec = generate_record(ann_rec, min_x, min_y, max_x, max_y,\n                                    sample_data_token, sd_rec['filename'])\n        repro_recs.append(repro_rec)\n\n    return repro_recs\n\n\ndef post_process_coords(\n    corner_coords: List, imsize: Tuple[int, int] = (1600, 900)\n) -> Union[Tuple[float, float, float, float], None]:\n    \"\"\"Get the intersection of the convex hull of the reprojected bbox corners\n    and the image canvas, return None if no intersection.\n\n    Args:\n        corner_coords (list[int]): Corner coordinates of reprojected\n            bounding box.\n        imsize (tuple[int]): Size of the image canvas.\n\n    Return:\n        tuple [float]: Intersection of the convex hull of the 2D box\n            corners and the image canvas.\n    \"\"\"\n    polygon_from_2d_box = MultiPoint(corner_coords).convex_hull\n    img_canvas = box(0, 0, imsize[0], imsize[1])\n\n    if polygon_from_2d_box.intersects(img_canvas):\n        img_intersection = polygon_from_2d_box.intersection(img_canvas)\n        intersection_coords = np.array(\n            [coord for coord in img_intersection.exterior.coords])\n\n        min_x = min(intersection_coords[:, 0])\n        min_y = min(intersection_coords[:, 1])\n        max_x = max(intersection_coords[:, 0])\n        max_y = max(intersection_coords[:, 1])\n\n        return min_x, min_y, max_x, max_y\n    else:\n        return None\n\n\ndef generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float,\n                    sample_data_token: str, filename: str) -> OrderedDict:\n    \"\"\"Generate one 2D annotation record given various informations on top of\n    the 2D bounding box coordinates.\n\n    Args:\n        ann_rec (dict): Original 3d annotation record.\n        x1 (float): Minimum value of the x coordinate.\n        y1 (float): Minimum value of the y coordinate.\n        x2 (float): Maximum value of the x coordinate.\n        y2 (float): Maximum value of the y coordinate.\n        sample_data_token (str): Sample data token.\n        filename (str):The corresponding image file where the annotation\n            is present.\n\n    Returns:\n        dict: A sample 2D annotation record.\n            - file_name (str): flie name\n            - image_id (str): sample data token\n            - area (float): 2d box area\n            - category_name (str): category name\n            - category_id (int): category id\n            - bbox (list[float]): left x, top y, dx, dy of 2d box\n            - iscrowd (int): whether the area is crowd\n    \"\"\"\n    repro_rec = OrderedDict()\n    repro_rec['sample_data_token'] = sample_data_token\n    coco_rec = dict()\n\n    relevant_keys = [\n        'attribute_tokens',\n        'category_name',\n        'instance_token',\n        'next',\n        'num_lidar_pts',\n        'num_radar_pts',\n        'prev',\n        'sample_annotation_token',\n        'sample_data_token',\n        'visibility_token',\n    ]\n\n    for key, value in ann_rec.items():\n        if key in relevant_keys:\n            repro_rec[key] = value\n\n    repro_rec['bbox_corners'] = [x1, y1, x2, y2]\n    repro_rec['filename'] = filename\n\n    coco_rec['file_name'] = filename\n    coco_rec['image_id'] = sample_data_token\n    coco_rec['area'] = (y2 - y1) * (x2 - x1)\n\n    if repro_rec['category_name'] not in NuScenesDataset.NameMapping:\n        return None\n    cat_name = NuScenesDataset.NameMapping[repro_rec['category_name']]\n    coco_rec['category_name'] = cat_name\n    coco_rec['category_id'] = nus_categories.index(cat_name)\n    coco_rec['bbox'] = [x1, y1, x2 - x1, y2 - y1]\n    coco_rec['iscrowd'] = 0\n\n    return coco_rec\n"
  },
  {
    "path": "tools/data_converter/scannet_data_utils.py",
    "content": "import mmcv\nimport numpy as np\nfrom concurrent import futures as futures\nfrom os import path as osp\n\n\nclass ScanNetData(object):\n    \"\"\"ScanNet data.\n\n    Generate scannet infos for scannet_converter.\n\n    Args:\n        root_path (str): Root path of the raw data.\n        split (str): Set split type of the data. Default: 'train'.\n    \"\"\"\n\n    def __init__(self, root_path, split='train'):\n        self.root_dir = root_path\n        self.split = split\n        self.split_dir = osp.join(root_path)\n        self.classes = [\n            'cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',\n            'bookshelf', 'picture', 'counter', 'desk', 'curtain',\n            'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',\n            'garbagebin'\n        ]\n        self.cat2label = {cat: self.classes.index(cat) for cat in self.classes}\n        self.label2cat = {self.cat2label[t]: t for t in self.cat2label}\n        self.cat_ids = np.array(\n            [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39])\n        self.cat_ids2class = {\n            nyu40id: i\n            for i, nyu40id in enumerate(list(self.cat_ids))\n        }\n        assert split in ['train', 'val', 'test']\n        split_file = osp.join(self.root_dir, 'meta_data',\n                              f'scannetv2_{split}.txt')\n        mmcv.check_file_exist(split_file)\n        self.sample_id_list = mmcv.list_from_file(split_file)\n\n    def __len__(self):\n        return len(self.sample_id_list)\n\n    def get_box_label(self, idx):\n        box_file = osp.join(self.root_dir, 'scannet_train_instance_data',\n                            f'{idx}_bbox.npy')\n        mmcv.check_file_exist(box_file)\n        return np.load(box_file)\n\n    def get_infos(self, num_workers=4, has_label=True, sample_id_list=None):\n        \"\"\"Get data infos.\n\n        This method gets information from the raw data.\n\n        Args:\n            num_workers (int): Number of threads to be used. Default: 4.\n            has_label (bool): Whether the data has label. Default: True.\n            sample_id_list (list[int]): Index list of the sample.\n                Default: None.\n\n        Returns:\n            infos (list[dict]): Information of the raw data.\n        \"\"\"\n\n        def process_single_scene(sample_idx):\n            print(f'{self.split} sample_idx: {sample_idx}')\n            info = dict()\n            pc_info = {'num_features': 6, 'lidar_idx': sample_idx}\n            info['point_cloud'] = pc_info\n            pts_filename = osp.join(self.root_dir,\n                                    'scannet_train_instance_data',\n                                    f'{sample_idx}_vert.npy')\n            pts_instance_mask_path = osp.join(self.root_dir,\n                                              'scannet_train_instance_data',\n                                              f'{sample_idx}_ins_label.npy')\n            pts_semantic_mask_path = osp.join(self.root_dir,\n                                              'scannet_train_instance_data',\n                                              f'{sample_idx}_sem_label.npy')\n\n            points = np.load(pts_filename)\n            pts_instance_mask = np.load(pts_instance_mask_path).astype(np.long)\n            pts_semantic_mask = np.load(pts_semantic_mask_path).astype(np.long)\n\n            mmcv.mkdir_or_exist(osp.join(self.root_dir, 'points'))\n            mmcv.mkdir_or_exist(osp.join(self.root_dir, 'instance_mask'))\n            mmcv.mkdir_or_exist(osp.join(self.root_dir, 'semantic_mask'))\n\n            points.tofile(\n                osp.join(self.root_dir, 'points', f'{sample_idx}.bin'))\n            pts_instance_mask.tofile(\n                osp.join(self.root_dir, 'instance_mask', f'{sample_idx}.bin'))\n            pts_semantic_mask.tofile(\n                osp.join(self.root_dir, 'semantic_mask', f'{sample_idx}.bin'))\n\n            info['pts_path'] = osp.join('points', f'{sample_idx}.bin')\n            info['pts_instance_mask_path'] = osp.join('instance_mask',\n                                                      f'{sample_idx}.bin')\n            info['pts_semantic_mask_path'] = osp.join('semantic_mask',\n                                                      f'{sample_idx}.bin')\n\n            if has_label:\n                annotations = {}\n                boxes_with_classes = self.get_box_label(\n                    sample_idx)  # k, 6 + class\n                annotations['gt_num'] = boxes_with_classes.shape[0]\n                if annotations['gt_num'] != 0:\n                    minmax_boxes3d = boxes_with_classes[:, :-1]  # k, 6\n                    classes = boxes_with_classes[:, -1]  # k, 1\n                    annotations['name'] = np.array([\n                        self.label2cat[self.cat_ids2class[classes[i]]]\n                        for i in range(annotations['gt_num'])\n                    ])\n                    annotations['location'] = minmax_boxes3d[:, :3]\n                    annotations['dimensions'] = minmax_boxes3d[:, 3:6]\n                    annotations['gt_boxes_upright_depth'] = minmax_boxes3d\n                    annotations['index'] = np.arange(\n                        annotations['gt_num'], dtype=np.int32)\n                    annotations['class'] = np.array([\n                        self.cat_ids2class[classes[i]]\n                        for i in range(annotations['gt_num'])\n                    ])\n                info['annos'] = annotations\n            return info\n\n        sample_id_list = sample_id_list if sample_id_list is not None \\\n            else self.sample_id_list\n        with futures.ThreadPoolExecutor(num_workers) as executor:\n            infos = executor.map(process_single_scene, sample_id_list)\n        return list(infos)\n"
  },
  {
    "path": "tools/data_converter/sunrgbd_data_utils.py",
    "content": "import mmcv\nimport numpy as np\nfrom concurrent import futures as futures\nfrom os import path as osp\nfrom scipy import io as sio\n\n\ndef random_sampling(points, num_points, replace=None, return_choices=False):\n    \"\"\"Random sampling.\n\n    Sampling point cloud to a certain number of points.\n\n    Args:\n        points (ndarray): Point cloud.\n        num_points (int): The number of samples.\n        replace (bool): Whether the sample is with or without replacement.\n        return_choices (bool): Whether to return choices.\n\n    Returns:\n        points (ndarray): Point cloud after sampling.\n    \"\"\"\n\n    if replace is None:\n        replace = (points.shape[0] < num_points)\n    choices = np.random.choice(points.shape[0], num_points, replace=replace)\n    if return_choices:\n        return points[choices], choices\n    else:\n        return points[choices]\n\n\nclass SUNRGBDInstance(object):\n\n    def __init__(self, line):\n        data = line.split(' ')\n        data[1:] = [float(x) for x in data[1:]]\n        self.classname = data[0]\n        self.xmin = data[1]\n        self.ymin = data[2]\n        self.xmax = data[1] + data[3]\n        self.ymax = data[2] + data[4]\n        self.box2d = np.array([self.xmin, self.ymin, self.xmax, self.ymax])\n        self.centroid = np.array([data[5], data[6], data[7]])\n        self.w = data[8]\n        self.l = data[9]  # noqa: E741\n        self.h = data[10]\n        self.orientation = np.zeros((3, ))\n        self.orientation[0] = data[11]\n        self.orientation[1] = data[12]\n        self.heading_angle = -1 * np.arctan2(self.orientation[1],\n                                             self.orientation[0])\n        self.box3d = np.concatenate([\n            self.centroid,\n            np.array([self.l * 2, self.w * 2, self.h * 2, self.heading_angle])\n        ])\n\n\nclass SUNRGBDData(object):\n    \"\"\"SUNRGBD data.\n\n    Generate scannet infos for sunrgbd_converter.\n\n    Args:\n        root_path (str): Root path of the raw data.\n        split (str): Set split type of the data. Default: 'train'.\n        use_v1 (bool): Whether to use v1. Default: False.\n    \"\"\"\n\n    def __init__(self, root_path, split='train', use_v1=False):\n        self.root_dir = root_path\n        self.split = split\n        self.split_dir = osp.join(root_path, 'sunrgbd_trainval')\n        self.classes = [\n            'bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',\n            'night_stand', 'bookshelf', 'bathtub'\n        ]\n        self.cat2label = {cat: self.classes.index(cat) for cat in self.classes}\n        self.label2cat = {\n            label: self.classes[label]\n            for label in range(len(self.classes))\n        }\n        assert split in ['train', 'val', 'test']\n        split_file = osp.join(self.split_dir, f'{split}_data_idx.txt')\n        mmcv.check_file_exist(split_file)\n        self.sample_id_list = map(int, mmcv.list_from_file(split_file))\n        self.image_dir = osp.join(self.split_dir, 'image')\n        self.calib_dir = osp.join(self.split_dir, 'calib')\n        self.depth_dir = osp.join(self.split_dir, 'depth')\n        if use_v1:\n            self.label_dir = osp.join(self.split_dir, 'label_v1')\n        else:\n            self.label_dir = osp.join(self.split_dir, 'label')\n\n    def __len__(self):\n        return len(self.sample_id_list)\n\n    def get_image(self, idx):\n        img_filename = osp.join(self.image_dir, f'{idx:06d}.jpg')\n        return mmcv.imread(img_filename)\n\n    def get_image_shape(self, idx):\n        image = self.get_image(idx)\n        return np.array(image.shape[:2], dtype=np.int32)\n\n    def get_depth(self, idx):\n        depth_filename = osp.join(self.depth_dir, f'{idx:06d}.mat')\n        depth = sio.loadmat(depth_filename)['instance']\n        return depth\n\n    def get_calibration(self, idx):\n        calib_filepath = osp.join(self.calib_dir, f'{idx:06d}.txt')\n        lines = [line.rstrip() for line in open(calib_filepath)]\n        Rt = np.array([float(x) for x in lines[0].split(' ')])\n        Rt = np.reshape(Rt, (3, 3), order='F').astype(np.float32)\n        K = np.array([float(x) for x in lines[1].split(' ')])\n        K = np.reshape(Rt, (3, 3), order='F').astype(np.float32)\n        return K, Rt\n\n    def get_label_objects(self, idx):\n        label_filename = osp.join(self.label_dir, f'{idx:06d}.txt')\n        lines = [line.rstrip() for line in open(label_filename)]\n        objects = [SUNRGBDInstance(line) for line in lines]\n        return objects\n\n    def get_infos(self, num_workers=4, has_label=True, sample_id_list=None):\n        \"\"\"Get data infos.\n\n        This method gets information from the raw data.\n\n        Args:\n            num_workers (int): Number of threads to be used. Default: 4.\n            has_label (bool): Whether the data has label. Default: True.\n            sample_id_list (list[int]): Index list of the sample.\n                Default: None.\n\n        Returns:\n            infos (list[dict]): Information of the raw data.\n        \"\"\"\n\n        def process_single_scene(sample_idx):\n            print(f'{self.split} sample_idx: {sample_idx}')\n            # convert depth to points\n            SAMPLE_NUM = 50000\n            # TODO: Check whether can move the point\n            #  sampling process during training.\n            pc_upright_depth = self.get_depth(sample_idx)\n            pc_upright_depth_subsampled = random_sampling(\n                pc_upright_depth, SAMPLE_NUM)\n\n            info = dict()\n            pc_info = {'num_features': 6, 'lidar_idx': sample_idx}\n            info['point_cloud'] = pc_info\n\n            mmcv.mkdir_or_exist(osp.join(self.root_dir, 'points'))\n            pc_upright_depth_subsampled.tofile(\n                osp.join(self.root_dir, 'points', f'{sample_idx:06d}.bin'))\n\n            info['pts_path'] = osp.join('points', f'{sample_idx:06d}.bin')\n            img_path = osp.join('image', f'{sample_idx:06d}.jpg')\n            image_info = {\n                'image_idx': sample_idx,\n                'image_shape': self.get_image_shape(sample_idx),\n                'image_path': img_path\n            }\n            info['image'] = image_info\n\n            K, Rt = self.get_calibration(sample_idx)\n            calib_info = {'K': K, 'Rt': Rt}\n            info['calib'] = calib_info\n\n            if has_label:\n                obj_list = self.get_label_objects(sample_idx)\n                annotations = {}\n                annotations['gt_num'] = len([\n                    obj.classname for obj in obj_list\n                    if obj.classname in self.cat2label.keys()\n                ])\n                if annotations['gt_num'] != 0:\n                    annotations['name'] = np.array([\n                        obj.classname for obj in obj_list\n                        if obj.classname in self.cat2label.keys()\n                    ])\n                    annotations['bbox'] = np.concatenate([\n                        obj.box2d.reshape(1, 4) for obj in obj_list\n                        if obj.classname in self.cat2label.keys()\n                    ],\n                                                         axis=0)\n                    annotations['location'] = np.concatenate([\n                        obj.centroid.reshape(1, 3) for obj in obj_list\n                        if obj.classname in self.cat2label.keys()\n                    ],\n                                                             axis=0)\n                    annotations['dimensions'] = 2 * np.array([\n                        [obj.l, obj.h, obj.w] for obj in obj_list\n                        if obj.classname in self.cat2label.keys()\n                    ])  # lhw(depth) format\n                    annotations['rotation_y'] = np.array([\n                        obj.heading_angle for obj in obj_list\n                        if obj.classname in self.cat2label.keys()\n                    ])\n                    annotations['index'] = np.arange(\n                        len(obj_list), dtype=np.int32)\n                    annotations['class'] = np.array([\n                        self.cat2label[obj.classname] for obj in obj_list\n                        if obj.classname in self.cat2label.keys()\n                    ])\n                    annotations['gt_boxes_upright_depth'] = np.stack(\n                        [\n                            obj.box3d for obj in obj_list\n                            if obj.classname in self.cat2label.keys()\n                        ],\n                        axis=0)  # (K,8)\n                info['annos'] = annotations\n            return info\n\n        sample_id_list = sample_id_list if \\\n            sample_id_list is not None else self.sample_id_list\n        with futures.ThreadPoolExecutor(num_workers) as executor:\n            infos = executor.map(process_single_scene, sample_id_list)\n        return list(infos)\n"
  },
  {
    "path": "tools/data_converter/waymo_converter.py",
    "content": "r\"\"\"Adapted from `Waymo to KITTI converter\n    <https://github.com/caizhongang/waymo_kitti_converter>`_.\n\"\"\"\n\ntry:\n    from waymo_open_dataset import dataset_pb2\nexcept ImportError:\n    raise ImportError(\n        'Please run \"pip install waymo-open-dataset-tf-2-1-0==1.2.0\" '\n        'to install the official devkit first.')\n\nimport mmcv\nimport numpy as np\nimport tensorflow as tf\nfrom glob import glob\nfrom os.path import join\nfrom waymo_open_dataset.utils import range_image_utils, transform_utils\nfrom waymo_open_dataset.utils.frame_utils import \\\n    parse_range_image_and_camera_projection\n\n\nclass Waymo2KITTI(object):\n    \"\"\"Waymo to KITTI converter.\n\n    This class serves as the converter to change the waymo raw data to KITTI\n    format.\n\n    Args:\n        load_dir (str): Directory to load waymo raw data.\n        save_dir (str): Directory to save data in KITTI format.\n        prefix (str): Prefix of filename. In general, 0 for training, 1 for\n            validation and 2 for testing.\n        workers (str): Number of workers for the parallel process.\n        test_mode (bool): Whether in the test_mode. Default: False.\n    \"\"\"\n\n    def __init__(self,\n                 load_dir,\n                 save_dir,\n                 prefix,\n                 workers=64,\n                 test_mode=False):\n        self.filter_empty_3dboxes = True\n        self.filter_no_label_zone_points = True\n\n        self.selected_waymo_classes = ['VEHICLE', 'PEDESTRIAN', 'CYCLIST']\n\n        # Only data collected in specific locations will be converted\n        # If set None, this filter is disabled\n        # Available options: location_sf (main dataset)\n        self.selected_waymo_locations = None\n        self.save_track_id = False\n\n        # turn on eager execution for older tensorflow versions\n        if int(tf.__version__.split('.')[0]) < 2:\n            tf.enable_eager_execution()\n\n        self.lidar_list = [\n            '_FRONT', '_FRONT_RIGHT', '_FRONT_LEFT', '_SIDE_RIGHT',\n            '_SIDE_LEFT'\n        ]\n        self.type_list = [\n            'UNKNOWN', 'VEHICLE', 'PEDESTRIAN', 'SIGN', 'CYCLIST'\n        ]\n        self.waymo_to_kitti_class_map = {\n            'UNKNOWN': 'DontCare',\n            'PEDESTRIAN': 'Pedestrian',\n            'VEHICLE': 'Car',\n            'CYCLIST': 'Cyclist',\n            'SIGN': 'Sign'  # not in kitti\n        }\n\n        self.load_dir = load_dir\n        self.save_dir = save_dir\n        self.prefix = prefix\n        self.workers = int(workers)\n        self.test_mode = test_mode\n\n        self.tfrecord_pathnames = sorted(\n            glob(join(self.load_dir, '*.tfrecord')))\n\n        self.label_save_dir = f'{self.save_dir}/label_'\n        self.label_all_save_dir = f'{self.save_dir}/label_all'\n        self.image_save_dir = f'{self.save_dir}/image_'\n        self.calib_save_dir = f'{self.save_dir}/calib'\n        self.point_cloud_save_dir = f'{self.save_dir}/velodyne'\n        self.pose_save_dir = f'{self.save_dir}/pose'\n\n        self.create_folder()\n\n    def convert(self):\n        \"\"\"Convert action.\"\"\"\n        print('Start converting ...')\n        mmcv.track_parallel_progress(self.convert_one, range(len(self)),\n                                     self.workers)\n        print('\\nFinished ...')\n\n    def convert_one(self, file_idx):\n        \"\"\"Convert action for single file.\n\n        Args:\n            file_idx (int): Index of the file to be converted.\n        \"\"\"\n        pathname = self.tfrecord_pathnames[file_idx]\n        dataset = tf.data.TFRecordDataset(pathname, compression_type='')\n\n        for frame_idx, data in enumerate(dataset):\n\n            frame = dataset_pb2.Frame()\n            frame.ParseFromString(bytearray(data.numpy()))\n            if (self.selected_waymo_locations is not None\n                    and frame.context.stats.location\n                    not in self.selected_waymo_locations):\n                continue\n\n            self.save_image(frame, file_idx, frame_idx)\n            self.save_calib(frame, file_idx, frame_idx)\n            self.save_lidar(frame, file_idx, frame_idx)\n            self.save_pose(frame, file_idx, frame_idx)\n\n            if not self.test_mode:\n                self.save_label(frame, file_idx, frame_idx)\n\n    def __len__(self):\n        \"\"\"Length of the filename list.\"\"\"\n        return len(self.tfrecord_pathnames)\n\n    def save_image(self, frame, file_idx, frame_idx):\n        \"\"\"Parse and save the images in png format.\n\n        Args:\n            frame (:obj:`Frame`): Open dataset frame proto.\n            file_idx (int): Current file index.\n            frame_idx (int): Current frame index.\n        \"\"\"\n        for img in frame.images:\n            img_path = f'{self.image_save_dir}{str(img.name - 1)}/' + \\\n                f'{self.prefix}{str(file_idx).zfill(3)}' + \\\n                f'{str(frame_idx).zfill(3)}.png'\n            img = mmcv.imfrombytes(img.image)\n            mmcv.imwrite(img, img_path)\n\n    def save_calib(self, frame, file_idx, frame_idx):\n        \"\"\"Parse and save the calibration data.\n\n        Args:\n            frame (:obj:`Frame`): Open dataset frame proto.\n            file_idx (int): Current file index.\n            frame_idx (int): Current frame index.\n        \"\"\"\n        # waymo front camera to kitti reference camera\n        T_front_cam_to_ref = np.array([[0.0, -1.0, 0.0], [0.0, 0.0, -1.0],\n                                       [1.0, 0.0, 0.0]])\n        camera_calibs = []\n        R0_rect = [f'{i:e}' for i in np.eye(3).flatten()]\n        Tr_velo_to_cams = []\n        calib_context = ''\n\n        for camera in frame.context.camera_calibrations:\n            # extrinsic parameters\n            T_cam_to_vehicle = np.array(camera.extrinsic.transform).reshape(\n                4, 4)\n            T_vehicle_to_cam = np.linalg.inv(T_cam_to_vehicle)\n            Tr_velo_to_cam = \\\n                self.cart_to_homo(T_front_cam_to_ref) @ T_vehicle_to_cam\n            if camera.name == 1:  # FRONT = 1, see dataset.proto for details\n                self.T_velo_to_front_cam = Tr_velo_to_cam.copy()\n            Tr_velo_to_cam = Tr_velo_to_cam[:3, :].reshape((12, ))\n            Tr_velo_to_cams.append([f'{i:e}' for i in Tr_velo_to_cam])\n\n            # intrinsic parameters\n            camera_calib = np.zeros((3, 4))\n            camera_calib[0, 0] = camera.intrinsic[0]\n            camera_calib[1, 1] = camera.intrinsic[1]\n            camera_calib[0, 2] = camera.intrinsic[2]\n            camera_calib[1, 2] = camera.intrinsic[3]\n            camera_calib[2, 2] = 1\n            camera_calib = list(camera_calib.reshape(12))\n            camera_calib = [f'{i:e}' for i in camera_calib]\n            camera_calibs.append(camera_calib)\n\n        # all camera ids are saved as id-1 in the result because\n        # camera 0 is unknown in the proto\n        for i in range(5):\n            calib_context += 'P' + str(i) + ': ' + \\\n                ' '.join(camera_calibs[i]) + '\\n'\n        calib_context += 'R0_rect' + ': ' + ' '.join(R0_rect) + '\\n'\n        for i in range(5):\n            calib_context += 'Tr_velo_to_cam_' + str(i) + ': ' + \\\n                ' '.join(Tr_velo_to_cams[i]) + '\\n'\n\n        with open(\n                f'{self.calib_save_dir}/{self.prefix}' +\n                f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt',\n                'w+') as fp_calib:\n            fp_calib.write(calib_context)\n            fp_calib.close()\n\n    def save_lidar(self, frame, file_idx, frame_idx):\n        \"\"\"Parse and save the lidar data in psd format.\n\n        Args:\n            frame (:obj:`Frame`): Open dataset frame proto.\n            file_idx (int): Current file index.\n            frame_idx (int): Current frame index.\n        \"\"\"\n        range_images, camera_projections, range_image_top_pose = \\\n            parse_range_image_and_camera_projection(frame)\n\n        # First return\n        points_0, cp_points_0, intensity_0, elongation_0 = \\\n            self.convert_range_image_to_point_cloud(\n                frame,\n                range_images,\n                camera_projections,\n                range_image_top_pose,\n                ri_index=0\n            )\n        points_0 = np.concatenate(points_0, axis=0)\n        intensity_0 = np.concatenate(intensity_0, axis=0)\n        elongation_0 = np.concatenate(elongation_0, axis=0)\n\n        # Second return\n        points_1, cp_points_1, intensity_1, elongation_1 = \\\n            self.convert_range_image_to_point_cloud(\n                frame,\n                range_images,\n                camera_projections,\n                range_image_top_pose,\n                ri_index=1\n            )\n        points_1 = np.concatenate(points_1, axis=0)\n        intensity_1 = np.concatenate(intensity_1, axis=0)\n        elongation_1 = np.concatenate(elongation_1, axis=0)\n\n        points = np.concatenate([points_0, points_1], axis=0)\n        intensity = np.concatenate([intensity_0, intensity_1], axis=0)\n        elongation = np.concatenate([elongation_0, elongation_1], axis=0)\n        timestamp = frame.timestamp_micros * np.ones_like(intensity)\n\n        # concatenate x,y,z, intensity, elongation, timestamp (6-dim)\n        point_cloud = np.column_stack(\n            (points, intensity, elongation, timestamp))\n\n        pc_path = f'{self.point_cloud_save_dir}/{self.prefix}' + \\\n            f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.bin'\n        point_cloud.astype(np.float32).tofile(pc_path)\n\n    def save_label(self, frame, file_idx, frame_idx):\n        \"\"\"Parse and save the label data in txt format.\n        The relation between waymo and kitti coordinates is noteworthy:\n        1. x, y, z correspond to l, w, h (waymo) -> l, h, w (kitti)\n        2. x-y-z: front-left-up (waymo) -> right-down-front(kitti)\n        3. bbox origin at volumetric center (waymo) -> bottom center (kitti)\n        4. rotation: +x around y-axis (kitti) -> +x around z-axis (waymo)\n\n        Args:\n            frame (:obj:`Frame`): Open dataset frame proto.\n            file_idx (int): Current file index.\n            frame_idx (int): Current frame index.\n        \"\"\"\n        fp_label_all = open(\n            f'{self.label_all_save_dir}/{self.prefix}' +\n            f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt', 'w+')\n        id_to_bbox = dict()\n        id_to_name = dict()\n        for labels in frame.projected_lidar_labels:\n            name = labels.name\n            for label in labels.labels:\n                # TODO: need a workaround as bbox may not belong to front cam\n                bbox = [\n                    label.box.center_x - label.box.length / 2,\n                    label.box.center_y - label.box.width / 2,\n                    label.box.center_x + label.box.length / 2,\n                    label.box.center_y + label.box.width / 2\n                ]\n                id_to_bbox[label.id] = bbox\n                id_to_name[label.id] = name - 1\n\n        for obj in frame.laser_labels:\n            bounding_box = None\n            name = None\n            id = obj.id\n            for lidar in self.lidar_list:\n                if id + lidar in id_to_bbox:\n                    bounding_box = id_to_bbox.get(id + lidar)\n                    name = str(id_to_name.get(id + lidar))\n                    break\n\n            if bounding_box is None or name is None:\n                name = '0'\n                bounding_box = (0, 0, 0, 0)\n\n            my_type = self.type_list[obj.type]\n\n            if my_type not in self.selected_waymo_classes:\n                continue\n\n            if self.filter_empty_3dboxes and obj.num_lidar_points_in_box < 1:\n                continue\n\n            my_type = self.waymo_to_kitti_class_map[my_type]\n\n            height = obj.box.height\n            width = obj.box.width\n            length = obj.box.length\n\n            x = obj.box.center_x\n            y = obj.box.center_y\n            z = obj.box.center_z - height / 2\n\n            # project bounding box to the virtual reference frame\n            pt_ref = self.T_velo_to_front_cam @ \\\n                np.array([x, y, z, 1]).reshape((4, 1))\n            x, y, z, _ = pt_ref.flatten().tolist()\n\n            rotation_y = -obj.box.heading - np.pi / 2\n            track_id = obj.id\n\n            # not available\n            truncated = 0\n            occluded = 0\n            alpha = -10\n\n            line = my_type + \\\n                ' {} {} {} {} {} {} {} {} {} {} {} {} {} {}\\n'.format(\n                    round(truncated, 2), occluded, round(alpha, 2),\n                    round(bounding_box[0], 2), round(bounding_box[1], 2),\n                    round(bounding_box[2], 2), round(bounding_box[3], 2),\n                    round(height, 2), round(width, 2), round(length, 2),\n                    round(x, 2), round(y, 2), round(z, 2),\n                    round(rotation_y, 2))\n\n            if self.save_track_id:\n                line_all = line[:-1] + ' ' + name + ' ' + track_id + '\\n'\n            else:\n                line_all = line[:-1] + ' ' + name + '\\n'\n\n            fp_label = open(\n                f'{self.label_save_dir}{name}/{self.prefix}' +\n                f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt', 'a')\n            fp_label.write(line)\n            fp_label.close()\n\n            fp_label_all.write(line_all)\n\n        fp_label_all.close()\n\n    def save_pose(self, frame, file_idx, frame_idx):\n        \"\"\"Parse and save the pose data.\n\n        Note that SDC's own pose is not included in the regular training\n        of KITTI dataset. KITTI raw dataset contains ego motion files\n        but are not often used. Pose is important for algorithms that\n        take advantage of the temporal information.\n\n        Args:\n            frame (:obj:`Frame`): Open dataset frame proto.\n            file_idx (int): Current file index.\n            frame_idx (int): Current frame index.\n        \"\"\"\n        pose = np.array(frame.pose.transform).reshape(4, 4)\n        np.savetxt(\n            join(f'{self.pose_save_dir}/{self.prefix}' +\n                 f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt'),\n            pose)\n\n    def create_folder(self):\n        \"\"\"Create folder for data preprocessing.\"\"\"\n        if not self.test_mode:\n            dir_list1 = [\n                self.label_all_save_dir, self.calib_save_dir,\n                self.point_cloud_save_dir, self.pose_save_dir\n            ]\n            dir_list2 = [self.label_save_dir, self.image_save_dir]\n        else:\n            dir_list1 = [\n                self.calib_save_dir, self.point_cloud_save_dir,\n                self.pose_save_dir\n            ]\n            dir_list2 = [self.image_save_dir]\n        for d in dir_list1:\n            mmcv.mkdir_or_exist(d)\n        for d in dir_list2:\n            for i in range(5):\n                mmcv.mkdir_or_exist(f'{d}{str(i)}')\n\n    def convert_range_image_to_point_cloud(self,\n                                           frame,\n                                           range_images,\n                                           camera_projections,\n                                           range_image_top_pose,\n                                           ri_index=0):\n        \"\"\"Convert range images to point cloud.\n\n        Args:\n            frame (:obj:`Frame`): Open dataset frame.\n            range_images (dict): Mapping from laser_name to list of two\n                range images corresponding with two returns.\n            camera_projections (dict): Mapping from laser_name to list of two\n                camera projections corresponding with two returns.\n            range_image_top_pose (:obj:`Transform`): Range image pixel pose for\n                top lidar.\n            ri_index (int): 0 for the first return, 1 for the second return.\n                Default: 0.\n\n        Returns:\n            tuple[list[np.ndarray]]: (List of points with shape [N, 3],\n                camera projections of points with shape [N, 6], intensity\n                with shape [N, 1], elongation with shape [N, 1]). All the\n                lists have the length of lidar numbers (5).\n        \"\"\"\n        calibrations = sorted(\n            frame.context.laser_calibrations, key=lambda c: c.name)\n        points = []\n        cp_points = []\n        intensity = []\n        elongation = []\n\n        frame_pose = tf.convert_to_tensor(\n            value=np.reshape(np.array(frame.pose.transform), [4, 4]))\n        # [H, W, 6]\n        range_image_top_pose_tensor = tf.reshape(\n            tf.convert_to_tensor(value=range_image_top_pose.data),\n            range_image_top_pose.shape.dims)\n        # [H, W, 3, 3]\n        range_image_top_pose_tensor_rotation = \\\n            transform_utils.get_rotation_matrix(\n                range_image_top_pose_tensor[..., 0],\n                range_image_top_pose_tensor[..., 1],\n                range_image_top_pose_tensor[..., 2])\n        range_image_top_pose_tensor_translation = \\\n            range_image_top_pose_tensor[..., 3:]\n        range_image_top_pose_tensor = transform_utils.get_transform(\n            range_image_top_pose_tensor_rotation,\n            range_image_top_pose_tensor_translation)\n        for c in calibrations:\n            range_image = range_images[c.name][ri_index]\n            if len(c.beam_inclinations) == 0:\n                beam_inclinations = range_image_utils.compute_inclination(\n                    tf.constant(\n                        [c.beam_inclination_min, c.beam_inclination_max]),\n                    height=range_image.shape.dims[0])\n            else:\n                beam_inclinations = tf.constant(c.beam_inclinations)\n\n            beam_inclinations = tf.reverse(beam_inclinations, axis=[-1])\n            extrinsic = np.reshape(np.array(c.extrinsic.transform), [4, 4])\n\n            range_image_tensor = tf.reshape(\n                tf.convert_to_tensor(value=range_image.data),\n                range_image.shape.dims)\n            pixel_pose_local = None\n            frame_pose_local = None\n            if c.name == dataset_pb2.LaserName.TOP:\n                pixel_pose_local = range_image_top_pose_tensor\n                pixel_pose_local = tf.expand_dims(pixel_pose_local, axis=0)\n                frame_pose_local = tf.expand_dims(frame_pose, axis=0)\n            range_image_mask = range_image_tensor[..., 0] > 0\n\n            if self.filter_no_label_zone_points:\n                nlz_mask = range_image_tensor[..., 3] != 1.0  # 1.0: in NLZ\n                range_image_mask = range_image_mask & nlz_mask\n\n            range_image_cartesian = \\\n                range_image_utils.extract_point_cloud_from_range_image(\n                    tf.expand_dims(range_image_tensor[..., 0], axis=0),\n                    tf.expand_dims(extrinsic, axis=0),\n                    tf.expand_dims(tf.convert_to_tensor(\n                        value=beam_inclinations), axis=0),\n                    pixel_pose=pixel_pose_local,\n                    frame_pose=frame_pose_local)\n\n            range_image_cartesian = tf.squeeze(range_image_cartesian, axis=0)\n            points_tensor = tf.gather_nd(range_image_cartesian,\n                                         tf.compat.v1.where(range_image_mask))\n\n            cp = camera_projections[c.name][ri_index]\n            cp_tensor = tf.reshape(\n                tf.convert_to_tensor(value=cp.data), cp.shape.dims)\n            cp_points_tensor = tf.gather_nd(\n                cp_tensor, tf.compat.v1.where(range_image_mask))\n            points.append(points_tensor.numpy())\n            cp_points.append(cp_points_tensor.numpy())\n\n            intensity_tensor = tf.gather_nd(range_image_tensor[..., 1],\n                                            tf.where(range_image_mask))\n            intensity.append(intensity_tensor.numpy())\n\n            elongation_tensor = tf.gather_nd(range_image_tensor[..., 2],\n                                             tf.where(range_image_mask))\n            elongation.append(elongation_tensor.numpy())\n\n        return points, cp_points, intensity, elongation\n\n    def cart_to_homo(self, mat):\n        \"\"\"Convert transformation matrix in Cartesian coordinates to\n        homogeneous format.\n\n        Args:\n            mat (np.ndarray): Transformation matrix in Cartesian.\n                The input matrix shape is 3x3 or 3x4.\n\n        Returns:\n            np.ndarray: Transformation matrix in homogeneous format.\n                The matrix shape is 4x4.\n        \"\"\"\n        ret = np.eye(4)\n        if mat.shape == (3, 3):\n            ret[:3, :3] = mat\n        elif mat.shape == (3, 4):\n            ret[:3, :] = mat\n        else:\n            raise ValueError(mat.shape)\n        return ret\n"
  },
  {
    "path": "tools/dist_test.sh",
    "content": "#!/usr/bin/env bash\n\nCONFIG=$1\nCHECKPOINT=$2\nGPUS=$3\nPORT=${PORT:-29502}\n\nPYTHONPATH=\"$(dirname $0)/..\":$PYTHONPATH \\\npython -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \\\n    $(dirname \"$0\")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}\n"
  },
  {
    "path": "tools/dist_train.sh",
    "content": "#!/usr/bin/env bash\n\nCONFIG=$1\nGPUS=$2\nPORT=${PORT:-29501}\n\nPYTHONPATH=\"$(dirname $0)/..\":$PYTHONPATH \\\npython -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \\\n    $(dirname \"$0\")/train.py $CONFIG --launcher pytorch ${@:3}\n"
  },
  {
    "path": "tools/misc/fuse_conv_bn.py",
    "content": "import argparse\nimport torch\nfrom mmcv.runner import save_checkpoint\nfrom torch import nn as nn\n\nfrom mmdet.apis import init_detector\n\n\ndef fuse_conv_bn(conv, bn):\n    \"\"\"During inference, the functionary of batch norm layers is turned off but\n    only the mean and var alone channels are used, which exposes the chance to\n    fuse it with the preceding conv layers to save computations and simplify\n    network structures.\"\"\"\n    conv_w = conv.weight\n    conv_b = conv.bias if conv.bias is not None else torch.zeros_like(\n        bn.running_mean)\n\n    factor = bn.weight / torch.sqrt(bn.running_var + bn.eps)\n    conv.weight = nn.Parameter(conv_w *\n                               factor.reshape([conv.out_channels, 1, 1, 1]))\n    conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias)\n    return conv\n\n\ndef fuse_module(m):\n    last_conv = None\n    last_conv_name = None\n\n    for name, child in m.named_children():\n        if isinstance(child, (nn.BatchNorm2d, nn.SyncBatchNorm)):\n            if last_conv is None:  # only fuse BN that is after Conv\n                continue\n            fused_conv = fuse_conv_bn(last_conv, child)\n            m._modules[last_conv_name] = fused_conv\n            # To reduce changes, set BN as Identity instead of deleting it.\n            m._modules[name] = nn.Identity()\n            last_conv = None\n        elif isinstance(child, nn.Conv2d):\n            last_conv = child\n            last_conv_name = name\n        else:\n            fuse_module(child)\n    return m\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='fuse Conv and BN layers in a model')\n    parser.add_argument('config', help='config file path')\n    parser.add_argument('checkpoint', help='checkpoint file path')\n    parser.add_argument('out', help='output path of the converted model')\n    args = parser.parse_args()\n    return args\n\n\ndef main():\n    args = parse_args()\n    # build the model from a config file and a checkpoint file\n    model = init_detector(args.config, args.checkpoint)\n    # fuse conv and bn layers of the model\n    fused_model = fuse_module(model)\n    save_checkpoint(fused_model, args.out)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tools/misc/print_config.py",
    "content": "import argparse\nfrom mmcv import Config, DictAction\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='Print the whole config')\n    parser.add_argument('config', help='config file path')\n    parser.add_argument(\n        '--options', nargs='+', action=DictAction, help='arguments in dict')\n    args = parser.parse_args()\n\n    return args\n\n\ndef main():\n    args = parse_args()\n\n    cfg = Config.fromfile(args.config)\n    if args.options is not None:\n        cfg.merge_from_dict(args.options)\n    print(f'Config:\\n{cfg.pretty_text}')\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tools/misc/visualize_results.py",
    "content": "import argparse\nimport mmcv\nfrom mmcv import Config\n\nfrom mmdet3d.datasets import build_dataset\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='MMDet3D visualize the results')\n    parser.add_argument('config', help='test config file path')\n    parser.add_argument('--result', help='results file in pickle format')\n    parser.add_argument(\n        '--show-dir', help='directory where visualize results will be saved')\n    args = parser.parse_args()\n\n    return args\n\n\ndef main():\n    args = parse_args()\n\n    if args.result is not None and \\\n            not args.result.endswith(('.pkl', '.pickle')):\n        raise ValueError('The results file must be a pkl file.')\n\n    cfg = Config.fromfile(args.config)\n    cfg.data.test.test_mode = True\n\n    # build the dataset\n    dataset = build_dataset(cfg.data.test)\n    results = mmcv.load(args.result)\n\n    if getattr(dataset, 'show', None) is not None:\n        dataset.show(results, args.show_dir)\n    else:\n        raise NotImplementedError(\n            'Show is not implemented for dataset {}!'.format(\n                type(dataset).__name__))\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tools/model_converters/convert_votenet_checkpoints.py",
    "content": "import argparse\nimport tempfile\nimport torch\nfrom mmcv import Config\nfrom mmcv.runner import load_state_dict\n\nfrom mmdet3d.models import build_detector\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='MMDet3D upgrade model version(before v0.6.0) of VoteNet')\n    parser.add_argument('checkpoint', help='checkpoint file')\n    parser.add_argument('--out', help='path of the output checkpoint file')\n    args = parser.parse_args()\n    return args\n\n\ndef parse_config(config_strings):\n    \"\"\"Parse config from strings.\n\n    Args:\n        config_strings (string): strings of model config.\n\n    Returns:\n        Config: model config\n    \"\"\"\n    temp_file = tempfile.NamedTemporaryFile()\n    config_path = f'{temp_file.name}.py'\n    with open(config_path, 'w') as f:\n        f.write(config_strings)\n\n    config = Config.fromfile(config_path)\n\n    # Update backbone config\n    if 'pool_mod' in config.model.backbone:\n        config.model.backbone.pop('pool_mod')\n\n    if 'sa_cfg' not in config.model.backbone:\n        config.model.backbone['sa_cfg'] = dict(\n            type='PointSAModule',\n            pool_mod='max',\n            use_xyz=True,\n            normalize_xyz=True)\n\n    if 'type' not in config.model.bbox_head.vote_aggregation_cfg:\n        config.model.bbox_head.vote_aggregation_cfg['type'] = 'PointSAModule'\n\n    # Update bbox_head config\n    if 'pred_layer_cfg' not in config.model.bbox_head:\n        config.model.bbox_head['pred_layer_cfg'] = dict(\n            in_channels=128, shared_conv_channels=(128, 128), bias=True)\n\n    if 'feat_channels' in config.model.bbox_head:\n        config.model.bbox_head.pop('feat_channels')\n\n    if 'vote_moudule_cfg' in config.model.bbox_head:\n        config.model.bbox_head['vote_module_cfg'] = config.model.bbox_head.pop(\n            'vote_moudule_cfg')\n\n    if config.model.bbox_head.vote_aggregation_cfg.use_xyz:\n        config.model.bbox_head.vote_aggregation_cfg.mlp_channels[0] -= 3\n\n    temp_file.close()\n\n    return config\n\n\ndef main():\n    \"\"\"Convert keys in checkpoints for VoteNet.\n\n    There can be some breaking changes during the development of mmdetection3d,\n    and this tool is used for upgrading checkpoints trained with old versions\n    (before v0.6.0) to the latest one.\n    \"\"\"\n    args = parse_args()\n    checkpoint = torch.load(args.checkpoint)\n    cfg = parse_config(checkpoint['meta']['config'])\n    # Build the model and load checkpoint\n    model = build_detector(\n        cfg.model,\n        train_cfg=cfg.get('train_cfg'),\n        test_cfg=cfg.get('test_cfg'))\n    orig_ckpt = checkpoint['state_dict']\n    converted_ckpt = orig_ckpt.copy()\n\n    if cfg['dataset_type'] == 'ScanNetDataset':\n        NUM_CLASSES = 18\n    elif cfg['dataset_type'] == 'SUNRGBDDataset':\n        NUM_CLASSES = 10\n    else:\n        raise NotImplementedError\n\n    RENAME_PREFIX = {\n        'bbox_head.conv_pred.0': 'bbox_head.conv_pred.shared_convs.layer0',\n        'bbox_head.conv_pred.1': 'bbox_head.conv_pred.shared_convs.layer1'\n    }\n\n    DEL_KEYS = [\n        'bbox_head.conv_pred.0.bn.num_batches_tracked',\n        'bbox_head.conv_pred.1.bn.num_batches_tracked'\n    ]\n\n    EXTRACT_KEYS = {\n        'bbox_head.conv_pred.conv_cls.weight':\n        ('bbox_head.conv_pred.conv_out.weight', [(0, 2), (-NUM_CLASSES, -1)]),\n        'bbox_head.conv_pred.conv_cls.bias':\n        ('bbox_head.conv_pred.conv_out.bias', [(0, 2), (-NUM_CLASSES, -1)]),\n        'bbox_head.conv_pred.conv_reg.weight':\n        ('bbox_head.conv_pred.conv_out.weight', [(2, -NUM_CLASSES)]),\n        'bbox_head.conv_pred.conv_reg.bias':\n        ('bbox_head.conv_pred.conv_out.bias', [(2, -NUM_CLASSES)])\n    }\n\n    # Delete some useless keys\n    for key in DEL_KEYS:\n        converted_ckpt.pop(key)\n\n    # Rename keys with specific prefix\n    RENAME_KEYS = dict()\n    for old_key in converted_ckpt.keys():\n        for rename_prefix in RENAME_PREFIX.keys():\n            if rename_prefix in old_key:\n                new_key = old_key.replace(rename_prefix,\n                                          RENAME_PREFIX[rename_prefix])\n                RENAME_KEYS[new_key] = old_key\n    for new_key, old_key in RENAME_KEYS.items():\n        converted_ckpt[new_key] = converted_ckpt.pop(old_key)\n\n    # Extract weights and rename the keys\n    for new_key, (old_key, indices) in EXTRACT_KEYS.items():\n        cur_layers = orig_ckpt[old_key]\n        converted_layers = []\n        for (start, end) in indices:\n            if end != -1:\n                converted_layers.append(cur_layers[start:end])\n            else:\n                converted_layers.append(cur_layers[start:])\n        converted_layers = torch.cat(converted_layers, 0)\n        converted_ckpt[new_key] = converted_layers\n        if old_key in converted_ckpt.keys():\n            converted_ckpt.pop(old_key)\n\n    # Check the converted checkpoint by loading to the model\n    load_state_dict(model, converted_ckpt, strict=True)\n    checkpoint['state_dict'] = converted_ckpt\n    torch.save(checkpoint, args.out)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tools/model_converters/publish_model.py",
    "content": "import argparse\nimport subprocess\nimport torch\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='Process a checkpoint to be published')\n    parser.add_argument('in_file', help='input checkpoint filename')\n    parser.add_argument('out_file', help='output checkpoint filename')\n    args = parser.parse_args()\n    return args\n\n\ndef process_checkpoint(in_file, out_file):\n    checkpoint = torch.load(in_file, map_location='cpu')\n    # remove optimizer for smaller file size\n    if 'optimizer' in checkpoint:\n        del checkpoint['optimizer']\n    # if it is necessary to remove some sensitive data in checkpoint['meta'],\n    # add the code here.\n    torch.save(checkpoint, out_file)\n    sha = subprocess.check_output(['sha256sum', out_file]).decode()\n    final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])\n    subprocess.Popen(['mv', out_file, final_file])\n\n\ndef main():\n    args = parse_args()\n    process_checkpoint(args.in_file, args.out_file)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tools/model_converters/regnet2mmdet.py",
    "content": "import argparse\nimport torch\nfrom collections import OrderedDict\n\n\ndef convert_stem(model_key, model_weight, state_dict, converted_names):\n    new_key = model_key.replace('stem.conv', 'conv1')\n    new_key = new_key.replace('stem.bn', 'bn1')\n    state_dict[new_key] = model_weight\n    converted_names.add(model_key)\n    print(f'Convert {model_key} to {new_key}')\n\n\ndef convert_head(model_key, model_weight, state_dict, converted_names):\n    new_key = model_key.replace('head.fc', 'fc')\n    state_dict[new_key] = model_weight\n    converted_names.add(model_key)\n    print(f'Convert {model_key} to {new_key}')\n\n\ndef convert_reslayer(model_key, model_weight, state_dict, converted_names):\n    split_keys = model_key.split('.')\n    layer, block, module = split_keys[:3]\n    block_id = int(block[1:])\n    layer_name = f'layer{int(layer[1:])}'\n    block_name = f'{block_id - 1}'\n\n    if block_id == 1 and module == 'bn':\n        new_key = f'{layer_name}.{block_name}.downsample.1.{split_keys[-1]}'\n    elif block_id == 1 and module == 'proj':\n        new_key = f'{layer_name}.{block_name}.downsample.0.{split_keys[-1]}'\n    elif module == 'f':\n        if split_keys[3] == 'a_bn':\n            module_name = 'bn1'\n        elif split_keys[3] == 'b_bn':\n            module_name = 'bn2'\n        elif split_keys[3] == 'c_bn':\n            module_name = 'bn3'\n        elif split_keys[3] == 'a':\n            module_name = 'conv1'\n        elif split_keys[3] == 'b':\n            module_name = 'conv2'\n        elif split_keys[3] == 'c':\n            module_name = 'conv3'\n        new_key = f'{layer_name}.{block_name}.{module_name}.{split_keys[-1]}'\n    else:\n        raise ValueError(f'Unsupported conversion of key {model_key}')\n    print(f'Convert {model_key} to {new_key}')\n    state_dict[new_key] = model_weight\n    converted_names.add(model_key)\n\n\ndef convert(src, dst):\n    \"\"\"Convert keys in pycls pretrained RegNet models to mmdet style.\"\"\"\n    # load caffe model\n    regnet_model = torch.load(src)\n    blobs = regnet_model['model_state']\n    # convert to pytorch style\n    state_dict = OrderedDict()\n    converted_names = set()\n    for key, weight in blobs.items():\n        if 'stem' in key:\n            convert_stem(key, weight, state_dict, converted_names)\n        elif 'head' in key:\n            convert_head(key, weight, state_dict, converted_names)\n        elif key.startswith('s'):\n            convert_reslayer(key, weight, state_dict, converted_names)\n\n    # check if all layers are converted\n    for key in blobs:\n        if key not in converted_names:\n            print(f'not converted: {key}')\n    # save checkpoint\n    checkpoint = dict()\n    checkpoint['state_dict'] = state_dict\n    torch.save(checkpoint, dst)\n\n\ndef main():\n    parser = argparse.ArgumentParser(description='Convert model keys')\n    parser.add_argument('src', help='src detectron model path')\n    parser.add_argument('dst', help='save path')\n    args = parser.parse_args()\n    convert(args.src, args.dst)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tools/slurm_test.sh",
    "content": "#!/usr/bin/env bash\n\nset -x\n\nPARTITION=$1\nJOB_NAME=$2\nCONFIG=$3\nCHECKPOINT=$4\nGPUS=${GPUS:-8}\nGPUS_PER_NODE=${GPUS_PER_NODE:-8}\nCPUS_PER_TASK=${CPUS_PER_TASK:-5}\nPY_ARGS=${@:5}\nSRUN_ARGS=${SRUN_ARGS:-\"\"}\n\nPYTHONPATH=\"$(dirname $0)/..\":$PYTHONPATH \\\nsrun -p ${PARTITION} \\\n    --job-name=${JOB_NAME} \\\n    --gres=gpu:${GPUS_PER_NODE} \\\n    --ntasks=${GPUS} \\\n    --ntasks-per-node=${GPUS_PER_NODE} \\\n    --cpus-per-task=${CPUS_PER_TASK} \\\n    --kill-on-bad-exit=1 \\\n    ${SRUN_ARGS} \\\n    python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher=\"slurm\" ${PY_ARGS}\n"
  },
  {
    "path": "tools/slurm_train.sh",
    "content": "#!/usr/bin/env bash\n\nset -x\n\nPARTITION=$1\nJOB_NAME=$2\nCONFIG=$3\nWORK_DIR=$4\nGPUS=${GPUS:-8}\nGPUS_PER_NODE=${GPUS_PER_NODE:-8}\nCPUS_PER_TASK=${CPUS_PER_TASK:-5}\nSRUN_ARGS=${SRUN_ARGS:-\"\"}\nPY_ARGS=${@:5}\n\nPYTHONPATH=\"$(dirname $0)/..\":$PYTHONPATH \\\nsrun -p ${PARTITION} \\\n    --job-name=${JOB_NAME} \\\n    --gres=gpu:${GPUS_PER_NODE} \\\n    --ntasks=${GPUS} \\\n    --ntasks-per-node=${GPUS_PER_NODE} \\\n    --cpus-per-task=${CPUS_PER_TASK} \\\n    --kill-on-bad-exit=1 \\\n    ${SRUN_ARGS} \\\n    python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher=\"slurm\" ${PY_ARGS}\n"
  },
  {
    "path": "tools/test.py",
    "content": "import argparse\nimport mmcv\nimport os\nimport torch\nimport warnings\nfrom mmcv import Config, DictAction\nfrom mmcv.cnn import fuse_conv_bn\nfrom mmcv.parallel import MMDataParallel, MMDistributedDataParallel\nfrom mmcv.runner import (get_dist_info, init_dist, load_checkpoint,\n                         wrap_fp16_model)\n\nfrom mmdet3d.apis import single_gpu_test\nfrom mmdet3d.datasets import build_dataloader, build_dataset\nfrom mmdet3d.models import build_detector\nfrom mmdet.apis import multi_gpu_test, set_random_seed\nfrom mmdet.datasets import replace_ImageToTensor\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='MMDet test (and eval) a model')\n    parser.add_argument('config', help='test config file path')\n    parser.add_argument('checkpoint', help='checkpoint file')\n    parser.add_argument('--out', help='output result file in pickle format')\n    parser.add_argument(\n        '--fuse-conv-bn',\n        action='store_true',\n        help='Whether to fuse conv and bn, this will slightly increase'\n        'the inference speed')\n    parser.add_argument(\n        '--format-only',\n        action='store_true',\n        help='Format the output results without perform evaluation. It is'\n        'useful when you want to format the result to a specific format and '\n        'submit it to the test server')\n    parser.add_argument(\n        '--eval',\n        type=str,\n        nargs='+',\n        help='evaluation metrics, which depends on the dataset, e.g., \"bbox\",'\n        ' \"segm\", \"proposal\" for COCO, and \"mAP\", \"recall\" for PASCAL VOC')\n    parser.add_argument('--show', action='store_true', help='show results')\n    parser.add_argument(\n        '--show-dir', help='directory where results will be saved')\n    parser.add_argument(\n        '--gpu-collect',\n        action='store_true',\n        help='whether to use gpu to collect results.')\n    parser.add_argument(\n        '--tmpdir',\n        help='tmp directory used for collecting results from multiple '\n        'workers, available when gpu-collect is not specified')\n    parser.add_argument('--seed', type=int, default=0, help='random seed')\n    parser.add_argument(\n        '--deterministic',\n        action='store_true',\n        help='whether to set deterministic options for CUDNN backend.')\n    parser.add_argument(\n        '--cfg-options',\n        nargs='+',\n        action=DictAction,\n        help='override some settings in the used config, the key-value pair '\n        'in xxx=yyy format will be merged into config file. If the value to '\n        'be overwritten is a list, it should be like key=\"[a,b]\" or key=a,b '\n        'It also allows nested list/tuple values, e.g. key=\"[(a,b),(c,d)]\" '\n        'Note that the quotation marks are necessary and that no white space '\n        'is allowed.')\n    parser.add_argument(\n        '--options',\n        nargs='+',\n        action=DictAction,\n        help='custom options for evaluation, the key-value pair in xxx=yyy '\n        'format will be kwargs for dataset.evaluate() function (deprecate), '\n        'change to --eval-options instead.')\n    parser.add_argument(\n        '--eval-options',\n        nargs='+',\n        action=DictAction,\n        help='custom options for evaluation, the key-value pair in xxx=yyy '\n        'format will be kwargs for dataset.evaluate() function')\n    parser.add_argument(\n        '--launcher',\n        choices=['none', 'pytorch', 'slurm', 'mpi'],\n        default='none',\n        help='job launcher')\n    parser.add_argument('--local_rank', type=int, default=0)\n    args = parser.parse_args()\n    if 'LOCAL_RANK' not in os.environ:\n        os.environ['LOCAL_RANK'] = str(args.local_rank)\n\n    if args.options and args.eval_options:\n        raise ValueError(\n            '--options and --eval-options cannot be both specified, '\n            '--options is deprecated in favor of --eval-options')\n    if args.options:\n        warnings.warn('--options is deprecated in favor of --eval-options')\n        args.eval_options = args.options\n    return args\n\n\ndef main():\n    args = parse_args()\n\n    assert args.out or args.eval or args.format_only or args.show \\\n        or args.show_dir, \\\n        ('Please specify at least one operation (save/eval/format/show the '\n         'results / save the results) with the argument \"--out\", \"--eval\"'\n         ', \"--format-only\", \"--show\" or \"--show-dir\"')\n\n    if args.eval and args.format_only:\n        raise ValueError('--eval and --format_only cannot be both specified')\n\n    if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):\n        raise ValueError('The output file must be a pkl file.')\n\n    cfg = Config.fromfile(args.config)\n    if args.cfg_options is not None:\n        cfg.merge_from_dict(args.cfg_options)\n    # import modules from string list.\n    if cfg.get('custom_imports', None):\n        from mmcv.utils import import_modules_from_strings\n        import_modules_from_strings(**cfg['custom_imports'])\n    # set cudnn_benchmark\n    if cfg.get('cudnn_benchmark', False):\n        torch.backends.cudnn.benchmark = True\n\n    cfg.model.pretrained = None\n    # in case the test dataset is concatenated\n    samples_per_gpu = 1\n    if isinstance(cfg.data.test, dict):\n        cfg.data.test.test_mode = True\n        samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1)\n        if samples_per_gpu > 1:\n            # Replace 'ImageToTensor' to 'DefaultFormatBundle'\n            cfg.data.test.pipeline = replace_ImageToTensor(\n                cfg.data.test.pipeline)\n    elif isinstance(cfg.data.test, list):\n        for ds_cfg in cfg.data.test:\n            ds_cfg.test_mode = True\n        samples_per_gpu = max(\n            [ds_cfg.pop('samples_per_gpu', 1) for ds_cfg in cfg.data.test])\n        if samples_per_gpu > 1:\n            for ds_cfg in cfg.data.test:\n                ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline)\n\n    # init distributed env first, since logger depends on the dist info.\n    if args.launcher == 'none':\n        distributed = False\n    else:\n        distributed = True\n        init_dist(args.launcher, **cfg.dist_params)\n\n    # set random seeds\n    if args.seed is not None:\n        set_random_seed(args.seed, deterministic=args.deterministic)\n\n    # build the dataloader\n    dataset = build_dataset(cfg.data.test)\n    data_loader = build_dataloader(\n        dataset,\n        samples_per_gpu=samples_per_gpu,\n        workers_per_gpu=cfg.data.workers_per_gpu,\n        dist=distributed,\n        shuffle=False)\n\n    # build the model and load checkpoint\n    cfg.model.train_cfg = None\n    model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))\n    fp16_cfg = cfg.get('fp16', None)\n    if fp16_cfg is not None:\n        wrap_fp16_model(model)\n    checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu')\n    if args.fuse_conv_bn:\n        model = fuse_conv_bn(model)\n    # old versions did not save class info in checkpoints, this walkaround is\n    # for backward compatibility\n    if 'CLASSES' in checkpoint.get('meta', {}):\n        model.CLASSES = checkpoint['meta']['CLASSES']\n    else:\n        model.CLASSES = dataset.CLASSES\n\n    if not distributed:\n        model = MMDataParallel(model, device_ids=[0])\n        outputs = single_gpu_test(model, data_loader, args.show, args.show_dir)\n    else:\n        model = MMDistributedDataParallel(\n            model.cuda(),\n            device_ids=[torch.cuda.current_device()],\n            broadcast_buffers=False)\n        outputs = multi_gpu_test(model, data_loader, args.tmpdir,\n                                 args.gpu_collect)\n\n    rank, _ = get_dist_info()\n    if rank == 0:\n        if args.out:\n            print(f'\\nwriting results to {args.out}')\n            mmcv.dump(outputs, args.out)\n        kwargs = {} if args.eval_options is None else args.eval_options\n        if args.format_only:\n            dataset.format_results(outputs, **kwargs)\n        if args.eval:\n            eval_kwargs = cfg.get('evaluation', {}).copy()\n            # hard-code way to remove EvalHook args\n            for key in [\n                    'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best',\n                    'rule'\n            ]:\n                eval_kwargs.pop(key, None)\n            eval_kwargs.update(dict(metric=args.eval, **kwargs))\n            print(dataset.evaluate(outputs, **eval_kwargs))\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tools/train.py",
    "content": "from __future__ import division\n\nimport argparse\nimport copy\nimport logging\nimport mmcv\nimport os\nimport time\nimport torch\nimport warnings\nfrom mmcv import Config, DictAction\nfrom mmcv.runner import get_dist_info, init_dist\nfrom os import path as osp\n\nfrom mmdet3d import __version__\nfrom mmdet3d.datasets import build_dataset\nfrom mmdet3d.models import build_detector\nfrom mmdet3d.utils import collect_env, get_root_logger\nfrom mmdet.apis import set_random_seed, train_detector\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='Train a detector')\n    parser.add_argument('config', help='train config file path')\n    parser.add_argument('--work-dir', help='the dir to save logs and models')\n    parser.add_argument(\n        '--resume-from', help='the checkpoint file to resume from')\n    parser.add_argument(\n        '--no-validate',\n        action='store_true',\n        help='whether not to evaluate the checkpoint during training')\n    group_gpus = parser.add_mutually_exclusive_group()\n    group_gpus.add_argument(\n        '--gpus',\n        type=int,\n        help='number of gpus to use '\n        '(only applicable to non-distributed training)')\n    group_gpus.add_argument(\n        '--gpu-ids',\n        type=int,\n        nargs='+',\n        help='ids of gpus to use '\n        '(only applicable to non-distributed training)')\n    parser.add_argument('--seed', type=int, default=0, help='random seed')\n    parser.add_argument(\n        '--deterministic',\n        action='store_true',\n        help='whether to set deterministic options for CUDNN backend.')\n    parser.add_argument(\n        '--options',\n        nargs='+',\n        action=DictAction,\n        help='override some settings in the used config, the key-value pair '\n        'in xxx=yyy format will be merged into config file (deprecate), '\n        'change to --cfg-options instead.')\n    parser.add_argument(\n        '--cfg-options',\n        nargs='+',\n        action=DictAction,\n        help='override some settings in the used config, the key-value pair '\n        'in xxx=yyy format will be merged into config file. If the value to '\n        'be overwritten is a list, it should be like key=\"[a,b]\" or key=a,b '\n        'It also allows nested list/tuple values, e.g. key=\"[(a,b),(c,d)]\" '\n        'Note that the quotation marks are necessary and that no white space '\n        'is allowed.')\n    parser.add_argument(\n        '--launcher',\n        choices=['none', 'pytorch', 'slurm', 'mpi'],\n        default='none',\n        help='job launcher')\n    parser.add_argument('--local_rank', type=int, default=0)\n    parser.add_argument(\n        '--autoscale-lr',\n        action='store_true',\n        help='automatically scale lr with the number of gpus')\n    args = parser.parse_args()\n    if 'LOCAL_RANK' not in os.environ:\n        os.environ['LOCAL_RANK'] = str(args.local_rank)\n\n    if args.options and args.cfg_options:\n        raise ValueError(\n            '--options and --cfg-options cannot be both specified, '\n            '--options is deprecated in favor of --cfg-options')\n    if args.options:\n        warnings.warn('--options is deprecated in favor of --cfg-options')\n        args.cfg_options = args.options\n\n    return args\n\n\ndef main():\n    args = parse_args()\n\n    cfg = Config.fromfile(args.config)\n    if args.cfg_options is not None:\n        cfg.merge_from_dict(args.cfg_options)\n    # import modules from string list.\n    if cfg.get('custom_imports', None):\n        from mmcv.utils import import_modules_from_strings\n        import_modules_from_strings(**cfg['custom_imports'])\n\n    # set cudnn_benchmark\n    if cfg.get('cudnn_benchmark', False):\n        torch.backends.cudnn.benchmark = True\n\n    # work_dir is determined in this priority: CLI > segment in file > filename\n    if args.work_dir is not None:\n        # update configs according to CLI args if args.work_dir is not None\n        cfg.work_dir = args.work_dir\n    elif cfg.get('work_dir', None) is None:\n        # use config filename as default work_dir if cfg.work_dir is None\n        cfg.work_dir = osp.join('./work_dirs',\n                                osp.splitext(osp.basename(args.config))[0])\n    if args.resume_from is not None:\n        cfg.resume_from = args.resume_from\n    if args.gpu_ids is not None:\n        cfg.gpu_ids = args.gpu_ids\n    else:\n        cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)\n\n    if args.autoscale_lr:\n        # apply the linear scaling rule (https://arxiv.org/abs/1706.02677)\n        cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8\n\n    # init distributed env first, since logger depends on the dist info.\n    if args.launcher == 'none':\n        distributed = False\n    else:\n        distributed = True\n        init_dist(args.launcher, **cfg.dist_params)\n        # re-set gpu_ids with distributed training mode\n        _, world_size = get_dist_info()\n        cfg.gpu_ids = range(world_size)\n\n    # create work_dir\n    mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))\n    # dump config\n    cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))\n    # init the logger before other steps\n    timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())\n    log_file = osp.join(cfg.work_dir, f'{timestamp}.log')\n    logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)\n\n    # add a logging filter\n    logging_filter = logging.Filter('mmdet')\n    logging_filter.filter = lambda record: record.find('mmdet') != -1\n\n    # init the meta dict to record some important information such as\n    # environment info and seed, which will be logged\n    meta = dict()\n    # log env info\n    env_info_dict = collect_env()\n    env_info = '\\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])\n    dash_line = '-' * 60 + '\\n'\n    logger.info('Environment info:\\n' + dash_line + env_info + '\\n' +\n                dash_line)\n    meta['env_info'] = env_info\n    meta['config'] = cfg.pretty_text\n\n    # log some basic info\n    logger.info(f'Distributed training: {distributed}')\n    logger.info(f'Config:\\n{cfg.pretty_text}')\n\n    # set random seeds\n    if args.seed is not None:\n        logger.info(f'Set random seed to {args.seed}, '\n                    f'deterministic: {args.deterministic}')\n        set_random_seed(args.seed, deterministic=args.deterministic)\n    cfg.seed = args.seed\n    meta['seed'] = args.seed\n    meta['exp_name'] = osp.basename(args.config)\n\n    model = build_detector(\n        cfg.model,\n        train_cfg=cfg.get('train_cfg'),\n        test_cfg=cfg.get('test_cfg'))\n\n    if 'freeze_lidar_components' in cfg and cfg['freeze_lidar_components'] is True:\n        logger.info(f\"param need to update:\")\n        param_grad = []\n        param_nograd = []\n\n        for name, param in model.named_parameters():\n            if 'pts' in name and 'pts_bbox_head' not in name:\n                param.requires_grad = False\n\n            if 'freeze_lidar_detector' not in cfg or cfg['freeze_lidar_detector'] is True:\n                if 'pts_bbox_head.shared_conv' in name and 'pts_bbox_head.shared_conv_img' not in name:\n                    param.requires_grad = False\n                if 'pts_bbox_head.heatmap_head' in name and 'pts_bbox_head.heatmap_head_img' not in name:\n                    param.requires_grad = False\n                if 'pts_bbox_head.point_transformer' in name:\n                    param.requires_grad = False\n                if 'pts_bbox_head.class_encoding' in name:\n                    param.requires_grad = False\n\n        from torch import nn\n\n        def fix_bn(m):\n            if isinstance(m, nn.BatchNorm1d) or isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.LayerNorm):\n                m.track_running_stats = False\n\n        model.pts_voxel_layer.apply(fix_bn)\n        model.pts_voxel_encoder.apply(fix_bn)\n        model.pts_middle_encoder.apply(fix_bn)\n        model.pts_backbone.apply(fix_bn)\n        model.pts_neck.apply(fix_bn)\n\n        if 'freeze_lidar_detector' not in cfg or cfg['freeze_lidar_detector'] is True:\n            model.pts_bbox_head.heatmap_head.apply(fix_bn)\n            model.pts_bbox_head.shared_conv.apply(fix_bn)\n            model.pts_bbox_head.class_encoding.apply(fix_bn)\n            model.pts_bbox_head.point_transformer.apply(fix_bn)\n\n\n        for name, param in model.named_parameters():\n            if param.requires_grad is True:\n                logger.info(name)\n                param_grad.append(name)\n            else:\n                param_nograd.append(name)\n\n    logger.info(f'Model:\\n{model}')\n    datasets = [build_dataset(cfg.data.train)]\n    if len(cfg.workflow) == 2:\n        val_dataset = copy.deepcopy(cfg.data.val)\n        # in case we use a dataset wrapper\n        if 'dataset' in cfg.data.train:\n            val_dataset.pipeline = cfg.data.train.dataset.pipeline\n        else:\n            val_dataset.pipeline = cfg.data.train.pipeline\n        # set test_mode=False here in deep copied config\n        # which do not affect AP/AR calculation later\n        # refer to https://mmdetection3d.readthedocs.io/en/latest/tutorials/customize_runtime.html#customize-workflow  # noqa\n        val_dataset.test_mode = False\n        datasets.append(build_dataset(val_dataset))\n    if cfg.checkpoint_config is not None:\n        # save mmdet version, config file content and class names in\n        # checkpoints as meta data\n        cfg.checkpoint_config.meta = dict(\n            mmdet_version=__version__,\n            config=cfg.pretty_text,\n            CLASSES=datasets[0].CLASSES)\n    # add an attribute for visualization convenience\n    model.CLASSES = datasets[0].CLASSES\n    train_detector(\n        model,\n        datasets,\n        cfg,\n        distributed=distributed,\n        validate=(not args.no_validate),\n        timestamp=timestamp,\n        meta=meta\n    )\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "train.sh",
    "content": "sleep 4h;\r\nbash tools/dist_train.sh configs/impfusion_nusc_voxel_LC_2d_3d_Cross_full.py 4 --work-dir work_dirs/impfusion_nusc_voxel_LC_2d_3d_Cross_cameraSE_focal_fuseProj_catImage_proj2D_ColAttnHeatmapW0.5_fuseSelf_imgHeatmap2_PointAug_ImgAug_maskrcnnCOCO_regLayer2woBN_fullset --resume-from /media/msc-auto/HDD/yichen/TransFusion/work_dirs/impfusion_nusc_voxel_LC_2d_3d_Cross_cameraSE_focal_fuseProj_catImage_proj2D_ColAttnHeatmapW0.5_fuseSelf_imgHeatmap2_PointAug_ImgAug_maskrcnnCOCO_regLayer2woBN_fullset/epoch_1.pth;"
  }
]