Repository: hailanyi/TED Branch: master Commit: c2e4847e3acf Files: 182 Total size: 1.4 MB Directory structure: gitextract_kkhpxbxf/ ├── LICENSE ├── README.md ├── data/ │ └── kitti/ │ └── ImageSets/ │ ├── test.txt │ ├── train.txt │ └── val.txt ├── pcdet/ │ ├── __init__.py │ ├── config.py │ ├── datasets/ │ │ ├── __init__.py │ │ ├── augmentor/ │ │ │ ├── X_transform.py │ │ │ ├── augmentor_utils.py │ │ │ ├── data_augmentor.py │ │ │ └── database_sampler.py │ │ ├── dataset.py │ │ ├── kitti/ │ │ │ ├── kitti_dataset.py │ │ │ ├── kitti_dataset_mm.py │ │ │ └── kitti_object_eval_python/ │ │ │ ├── LICENSE │ │ │ ├── README.md │ │ │ ├── eval.py │ │ │ ├── evaluate.py │ │ │ ├── kitti_common.py │ │ │ └── rotate_iou.py │ │ └── processor/ │ │ ├── data_processor.py │ │ └── point_feature_encoder.py │ ├── models/ │ │ ├── __init__.py │ │ ├── backbones_2d/ │ │ │ ├── __init__.py │ │ │ ├── base_bev_backbone.py │ │ │ └── map_to_bev/ │ │ │ ├── __init__.py │ │ │ ├── height_compression.py │ │ │ └── pointpillar_scatter.py │ │ ├── backbones_3d/ │ │ │ ├── __init__.py │ │ │ ├── pfe/ │ │ │ │ ├── __init__.py │ │ │ │ ├── bev_features_interpolation.py │ │ │ │ └── voxel_set_abstraction.py │ │ │ ├── pointnet2_backbone.py │ │ │ ├── spconv_backbone.py │ │ │ ├── spconv_unet.py │ │ │ └── vfe/ │ │ │ ├── __init__.py │ │ │ ├── mean_vfe.py │ │ │ ├── pillar_vfe.py │ │ │ └── vfe_template.py │ │ ├── dense_heads/ │ │ │ ├── __init__.py │ │ │ ├── anchor_head_multi.py │ │ │ ├── anchor_head_single.py │ │ │ ├── anchor_head_template.py │ │ │ ├── center_head.py │ │ │ ├── point_head_box.py │ │ │ ├── point_head_simple.py │ │ │ ├── point_head_template.py │ │ │ ├── point_intra_part_head.py │ │ │ └── target_assigner/ │ │ │ ├── anchor_generator.py │ │ │ ├── atss_target_assigner.py │ │ │ └── axis_aligned_target_assigner.py │ │ ├── detectors/ │ │ │ ├── __init__.py │ │ │ ├── detector3d_template.py │ │ │ └── voxel_rcnn.py │ │ ├── model_utils/ │ │ │ ├── centernet_utils.py │ │ │ ├── ctrans.py │ │ │ └── model_nms_utils.py │ │ └── roi_heads/ │ │ ├── __init__.py │ │ ├── roi_head_template.py │ │ ├── target_assigner/ │ │ │ ├── proposal_target_layer.py │ │ │ └── proposal_target_layer3.py │ │ └── ted_head.py │ ├── ops/ │ │ ├── dcn/ │ │ │ ├── __init__.py │ │ │ ├── deform_conv.py │ │ │ ├── setup.py │ │ │ └── src/ │ │ │ ├── deform_conv_cuda.cpp │ │ │ ├── deform_conv_cuda_kernel.cu │ │ │ ├── deform_pool_cuda.cpp │ │ │ └── deform_pool_cuda_kernel.cu │ │ ├── iou3d_nms/ │ │ │ ├── iou3d_nms_utils.py │ │ │ └── src/ │ │ │ ├── iou3d_cpu.cpp │ │ │ ├── iou3d_cpu.h │ │ │ ├── iou3d_nms.cpp │ │ │ ├── iou3d_nms.h │ │ │ ├── iou3d_nms_api.cpp │ │ │ └── iou3d_nms_kernel.cu │ │ ├── pointnet2/ │ │ │ ├── pointnet2_batch/ │ │ │ │ ├── pointnet2_modules.py │ │ │ │ ├── pointnet2_utils.py │ │ │ │ └── src/ │ │ │ │ ├── ball_query.cpp │ │ │ │ ├── ball_query_gpu.cu │ │ │ │ ├── ball_query_gpu.h │ │ │ │ ├── cuda_utils.h │ │ │ │ ├── group_points.cpp │ │ │ │ ├── group_points_gpu.cu │ │ │ │ ├── group_points_gpu.h │ │ │ │ ├── interpolate.cpp │ │ │ │ ├── interpolate_gpu.cu │ │ │ │ ├── interpolate_gpu.h │ │ │ │ ├── pointnet2_api.cpp │ │ │ │ ├── sampling.cpp │ │ │ │ ├── sampling_gpu.cu │ │ │ │ └── sampling_gpu.h │ │ │ └── pointnet2_stack/ │ │ │ ├── pointnet2_modules.py │ │ │ ├── pointnet2_utils.py │ │ │ ├── src/ │ │ │ │ ├── ball_query.cpp │ │ │ │ ├── ball_query_deform.cpp │ │ │ │ ├── ball_query_deform_gpu.cu │ │ │ │ ├── ball_query_deform_gpu.h │ │ │ │ ├── ball_query_gpu.cu │ │ │ │ ├── ball_query_gpu.h │ │ │ │ ├── cuda_utils.h │ │ │ │ ├── group_points.cpp │ │ │ │ ├── group_points_gpu.cu │ │ │ │ ├── group_points_gpu.h │ │ │ │ ├── interpolate.cpp │ │ │ │ ├── interpolate_gpu.cu │ │ │ │ ├── interpolate_gpu.h │ │ │ │ ├── pointnet2_api.cpp │ │ │ │ ├── sampling.cpp │ │ │ │ ├── sampling_gpu.cu │ │ │ │ ├── sampling_gpu.h │ │ │ │ ├── vector_pool.cpp │ │ │ │ ├── vector_pool_gpu.cu │ │ │ │ ├── vector_pool_gpu.h │ │ │ │ ├── voxel_query.cpp │ │ │ │ ├── voxel_query_gpu.cu │ │ │ │ └── voxel_query_gpu.h │ │ │ ├── voxel_pool_modules.py │ │ │ └── voxel_query_utils.py │ │ ├── roiaware_pool3d/ │ │ │ ├── roiaware_pool3d_utils.py │ │ │ └── src/ │ │ │ ├── roiaware_pool3d.cpp │ │ │ └── roiaware_pool3d_kernel.cu │ │ ├── roipoint_pool3d/ │ │ │ ├── roipoint_pool3d_utils.py │ │ │ └── src/ │ │ │ ├── roipoint_pool3d.cpp │ │ │ └── roipoint_pool3d_kernel.cu │ │ └── votr_ops/ │ │ ├── src/ │ │ │ ├── build_attention_indices.cpp │ │ │ ├── build_attention_indices_gpu.cu │ │ │ ├── build_attention_indices_gpu.h │ │ │ ├── build_mapping.cpp │ │ │ ├── build_mapping_gpu.cu │ │ │ ├── build_mapping_gpu.h │ │ │ ├── group_features.cpp │ │ │ ├── group_features_gpu.cu │ │ │ ├── group_features_gpu.h │ │ │ ├── votr_api.cpp │ │ │ └── votr_cuda_utils.h │ │ └── votr_utils.py │ ├── utils/ │ │ ├── __init__.py │ │ ├── bbloss.py │ │ ├── box_coder_utils.py │ │ ├── box_np_ops.py │ │ ├── box_utils.py │ │ ├── calibration_kitti.py │ │ ├── common_utils.py │ │ ├── commu_utils.py │ │ ├── loss_utils.py │ │ ├── object3d_kitti.py │ │ ├── odiou_loss.py │ │ ├── spconv_utils.py │ │ └── transform_utils.py │ └── version.py ├── requirements.txt ├── setup.py └── tools/ ├── PENet/ │ ├── CoordConv.py │ ├── LICENSE │ ├── basic.py │ ├── criteria.py │ ├── dataloaders/ │ │ ├── calib_cam_to_cam.txt │ │ ├── calibration_kitti.py │ │ ├── kitti_loader.py │ │ ├── my_loader.py │ │ ├── spconv_utils.py │ │ └── transforms.py │ ├── helper.py │ ├── main.py │ ├── metrics.py │ ├── model.py │ └── vis_utils.py ├── cfgs/ │ ├── dataset_configs/ │ │ └── kitti_dataset.yaml │ └── models/ │ └── kitti/ │ ├── TED-M.yaml │ └── TED-S.yaml ├── dist_test.sh ├── dist_train.sh ├── eval_utils/ │ └── eval_utils.py ├── test.py ├── train.py ├── train_utils/ │ ├── optimization/ │ │ ├── __init__.py │ │ ├── fastai_optim.py │ │ └── learning_schedules_fastai.py │ └── train_utils.py └── visual_utils/ └── visualize_utils.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================ # Transformation-Equivariant 3D Object Detection for Autonomous Driving This is a improved version of [TED](https://arxiv.org/abs/2211.11962) by a multiple refinement design. This code is mainly based on [OpenPCDet](https://github.com/open-mmlab/OpenPCDet) and [CasA](https://github.com/hailanyi/CasA), some codes are from [PENet](https://github.com/JUGGHM/PENet_ICRA2021) and [SFD](https://github.com/LittlePey/SFD). ## Detection Framework The overall detection framework is shown below. (1) Transformation-equivariant Sparse Convolution (TeSpConv) backbone; (2) Transformation-equivariant Bird Eye View (TeBEV) pooling; (3) Multi-grid pooling and multi-refinement. TeSpConv applies shared weights on multiple transformed point clouds to record the transformation-equivariant voxel features. TeBEV pooling aligns and aggregates the scene-level equivariant features into lightweight representations for proposal generation. Multi-grid pooling and multi-refinement align and aggregate the instance-level invariant features for proposal refinement. ![](./tools/images/framework.png) ## Model Zoo We release two models, which are based on LiDAR-only and multi-modal data respectively. We denoted the two models as TED-S and TED-M respectively. * All models are trained with 8 V100 GPUs and are available for download. * The models are trained with train split (3712 samples) of KITTI dataset * The results are the 3D AP(R40) of Car on the *val* set of KITTI dataset. * These models are not suitable to directly report results on KITTI test set, please use slightly lower score threshold and train the models on all or 80% training data to achieve a desirable performance on KITTI test set. | |Modality|GPU memory of training| Easy | Mod. | Hard | download | |---------------------------------------------|----------:|----------:|:-------:|:-------:|:-------:|:---------:| | [TED-S](tools/cfgs/models/kitti/TED-S.yaml)|LiDAR only|~12 GB |93.25 |87.99| 86.28| [google](https://drive.google.com/file/d/1hqoj-lV4Cr3m7U3EphdCSjHmhBlekRm8/view?usp=sharing) / [baidu(p91t)](https://pan.baidu.com/s/1ecobwO673ScrGYOHbooGIw) / 36M | | [TED-M](tools/cfgs/models/kitti/TED-M.yaml)|LiDAR+RGB |~15 GB| 95.62 |89.24 |86.77 | [google](https://drive.google.com/file/d/1hXe1at-LKogTfWorALmq6djjYqhKX7nD/view?usp=sharing) / [baidu(nkr5)](https://pan.baidu.com/s/1FP80452dfM09YtE8DBaicQ) / 65M| ## Getting Started ``` conda create -n spconv2 python=3.9 conda activate spconv2 pip install torch==1.8.1+cu111 torchvision==0.9.1+cu111 torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html pip install numpy==1.19.5 protobuf==3.19.4 scikit-image==0.19.2 waymo-open-dataset-tf-2-5-0 nuscenes-devkit==1.0.5 spconv-cu111 numba scipy pyyaml easydict fire tqdm shapely matplotlib opencv-python addict pyquaternion awscli open3d pandas future pybind11 tensorboardX tensorboard Cython prefetch-generator ``` ### Dependency Our released implementation is tested on. + Ubuntu 18.04 + Python 3.6.9 + PyTorch 1.8.1 + Spconv 1.2.1 + NVIDIA CUDA 11.1 + 8x Tesla V100 GPUs We also tested on. + Ubuntu 18.04 + Python 3.9.13 + PyTorch 1.8.1 + Spconv 2.1.22 # pip install spconv-cu111 + NVIDIA CUDA 11.1 + 2x 3090 GPUs ### Prepare dataset Please download the official [KITTI 3D object detection](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d) dataset and organize the downloaded files as follows (the road planes could be downloaded from [[road plane]](https://drive.google.com/file/d/1d5mq0RXRnvHPVeKx6Q612z0YRO1t2wAp/view?usp=sharing), which are optional for data augmentation in the training): ``` TED ├── data │ ├── kitti │ │ │── ImageSets │ │ │── training │ │ │ ├──calib & velodyne & label_2 & image_2 & (optional: planes) │ │ │── testing │ │ │ ├──calib & velodyne & image_2 ├── pcdet ├── tools ``` You need creat a 'velodyne_depth' dataset to run our multimodal detector: You can download our preprocessed data from [google (13GB)](https://drive.google.com/file/d/1xki9v_zsQMM8vMVNo0ENi1Mh_GNMjHUg/view?usp=sharing), [baidu (a20o)](https://pan.baidu.com/s/1OH4KIVoSSH7ea3-3CqkZRQ), or generate the data by yourself: * [Install this project](#installation). * Download the PENet depth completion model [here (500M)](https://drive.google.com/file/d/1RDdKlKJcas-G5OA49x8OoqcUDiYYZgeM/view?usp=sharing) and put it into ```tools/PENet```. * Then run the following code to generate RGB pseudo points. ``` cd tools/PENet python3 main.py --detpath [your path like: ../../data/kitti/training] ``` After 'velodyne_depth' generation, run following command to creat dataset infos: ``` cd ../.. python3 -m pcdet.datasets.kitti.kitti_dataset create_kitti_infos tools/cfgs/dataset_configs/kitti_dataset.yaml python3 -m pcdet.datasets.kitti.kitti_dataset_mm create_kitti_infos tools/cfgs/dataset_configs/kitti_dataset.yaml ``` Anyway, the data structure should be: ``` TED ├── data │ ├── kitti │ │ │── ImageSets │ │ │── training │ │ │ ├──calib & velodyne & label_2 & image_2 & (optional: planes) & velodyne_depth │ │ │── testing │ │ │ ├──calib & velodyne & image_2 & velodyne_depth │ │ │── gt_database │ │ │── gt_database_mm │ │ │── kitti_dbinfos_train_mm.pkl │ │ │── kitti_dbinfos_train.pkl │ │ │── kitti_infos_test.pkl │ │ │── kitti_infos_train.pkl │ │ │── kitti_infos_trainval.pkl │ │ │── kitti_infos_val.pkl ├── pcdet ├── tools ``` ### Installation ``` git clone https://github.com/hailanyi/TED.git cd TED python3 setup.py develop ``` ### Training Single GPU train: ``` cd tools python3 train.py --cfg_file ${CONFIG_FILE} ``` For example, if you train the TED-S model: ``` cd tools python3 train.py --cfg_file cfgs/models/kitti/TED-S.yaml ``` Multiple GPU train: You can modify the gpu number in the dist_train.sh and run ``` cd tools sh dist_train.sh ``` The log infos are saved into log.txt You can run ```cat log.txt``` to view the training process. ### Evaluation ``` cd tools python3 test.py --cfg_file ${CONFIG_FILE} --batch_size ${BATCH_SIZE} --ckpt ${CKPT} ``` For example, if you test the TED-S model: ``` cd tools python3 test.py --cfg_file cfgs/models/kitti/TED-S.yaml --ckpt TED-S.pth ``` Multiple GPU test: you need modify the gpu number in the dist_test.sh and run ``` sh dist_test.sh ``` The log infos are saved into log-test.txt You can run ```cat log-test.txt``` to view the test results. ## License This code is released under the [Apache 2.0 license](LICENSE). ## Acknowledgement [CasA](https://github.com/hailanyi/CasA) [OpenPCDet](https://github.com/open-mmlab/OpenPCDet) [PENet](https://github.com/JUGGHM/PENet_ICRA2021) [SFD](https://github.com/LittlePey/SFD) ## Citation @inproceedings{TED, title={Transformation-Equivariant 3D Object Detection for Autonomous Driving}, author={Wu, Hai and Wen, Chenglu and Li, Wei and Yang, Ruigang and Wang, Cheng}, year={2023}, booktitle={AAAI} } ================================================ FILE: data/kitti/ImageSets/test.txt ================================================ 000000 000001 000002 000003 000004 000005 000006 000007 000008 000009 000010 000011 000012 000013 000014 000015 000016 000017 000018 000019 000020 000021 000022 000023 000024 000025 000026 000027 000028 000029 000030 000031 000032 000033 000034 000035 000036 000037 000038 000039 000040 000041 000042 000043 000044 000045 000046 000047 000048 000049 000050 000051 000052 000053 000054 000055 000056 000057 000058 000059 000060 000061 000062 000063 000064 000065 000066 000067 000068 000069 000070 000071 000072 000073 000074 000075 000076 000077 000078 000079 000080 000081 000082 000083 000084 000085 000086 000087 000088 000089 000090 000091 000092 000093 000094 000095 000096 000097 000098 000099 000100 000101 000102 000103 000104 000105 000106 000107 000108 000109 000110 000111 000112 000113 000114 000115 000116 000117 000118 000119 000120 000121 000122 000123 000124 000125 000126 000127 000128 000129 000130 000131 000132 000133 000134 000135 000136 000137 000138 000139 000140 000141 000142 000143 000144 000145 000146 000147 000148 000149 000150 000151 000152 000153 000154 000155 000156 000157 000158 000159 000160 000161 000162 000163 000164 000165 000166 000167 000168 000169 000170 000171 000172 000173 000174 000175 000176 000177 000178 000179 000180 000181 000182 000183 000184 000185 000186 000187 000188 000189 000190 000191 000192 000193 000194 000195 000196 000197 000198 000199 000200 000201 000202 000203 000204 000205 000206 000207 000208 000209 000210 000211 000212 000213 000214 000215 000216 000217 000218 000219 000220 000221 000222 000223 000224 000225 000226 000227 000228 000229 000230 000231 000232 000233 000234 000235 000236 000237 000238 000239 000240 000241 000242 000243 000244 000245 000246 000247 000248 000249 000250 000251 000252 000253 000254 000255 000256 000257 000258 000259 000260 000261 000262 000263 000264 000265 000266 000267 000268 000269 000270 000271 000272 000273 000274 000275 000276 000277 000278 000279 000280 000281 000282 000283 000284 000285 000286 000287 000288 000289 000290 000291 000292 000293 000294 000295 000296 000297 000298 000299 000300 000301 000302 000303 000304 000305 000306 000307 000308 000309 000310 000311 000312 000313 000314 000315 000316 000317 000318 000319 000320 000321 000322 000323 000324 000325 000326 000327 000328 000329 000330 000331 000332 000333 000334 000335 000336 000337 000338 000339 000340 000341 000342 000343 000344 000345 000346 000347 000348 000349 000350 000351 000352 000353 000354 000355 000356 000357 000358 000359 000360 000361 000362 000363 000364 000365 000366 000367 000368 000369 000370 000371 000372 000373 000374 000375 000376 000377 000378 000379 000380 000381 000382 000383 000384 000385 000386 000387 000388 000389 000390 000391 000392 000393 000394 000395 000396 000397 000398 000399 000400 000401 000402 000403 000404 000405 000406 000407 000408 000409 000410 000411 000412 000413 000414 000415 000416 000417 000418 000419 000420 000421 000422 000423 000424 000425 000426 000427 000428 000429 000430 000431 000432 000433 000434 000435 000436 000437 000438 000439 000440 000441 000442 000443 000444 000445 000446 000447 000448 000449 000450 000451 000452 000453 000454 000455 000456 000457 000458 000459 000460 000461 000462 000463 000464 000465 000466 000467 000468 000469 000470 000471 000472 000473 000474 000475 000476 000477 000478 000479 000480 000481 000482 000483 000484 000485 000486 000487 000488 000489 000490 000491 000492 000493 000494 000495 000496 000497 000498 000499 000500 000501 000502 000503 000504 000505 000506 000507 000508 000509 000510 000511 000512 000513 000514 000515 000516 000517 000518 000519 000520 000521 000522 000523 000524 000525 000526 000527 000528 000529 000530 000531 000532 000533 000534 000535 000536 000537 000538 000539 000540 000541 000542 000543 000544 000545 000546 000547 000548 000549 000550 000551 000552 000553 000554 000555 000556 000557 000558 000559 000560 000561 000562 000563 000564 000565 000566 000567 000568 000569 000570 000571 000572 000573 000574 000575 000576 000577 000578 000579 000580 000581 000582 000583 000584 000585 000586 000587 000588 000589 000590 000591 000592 000593 000594 000595 000596 000597 000598 000599 000600 000601 000602 000603 000604 000605 000606 000607 000608 000609 000610 000611 000612 000613 000614 000615 000616 000617 000618 000619 000620 000621 000622 000623 000624 000625 000626 000627 000628 000629 000630 000631 000632 000633 000634 000635 000636 000637 000638 000639 000640 000641 000642 000643 000644 000645 000646 000647 000648 000649 000650 000651 000652 000653 000654 000655 000656 000657 000658 000659 000660 000661 000662 000663 000664 000665 000666 000667 000668 000669 000670 000671 000672 000673 000674 000675 000676 000677 000678 000679 000680 000681 000682 000683 000684 000685 000686 000687 000688 000689 000690 000691 000692 000693 000694 000695 000696 000697 000698 000699 000700 000701 000702 000703 000704 000705 000706 000707 000708 000709 000710 000711 000712 000713 000714 000715 000716 000717 000718 000719 000720 000721 000722 000723 000724 000725 000726 000727 000728 000729 000730 000731 000732 000733 000734 000735 000736 000737 000738 000739 000740 000741 000742 000743 000744 000745 000746 000747 000748 000749 000750 000751 000752 000753 000754 000755 000756 000757 000758 000759 000760 000761 000762 000763 000764 000765 000766 000767 000768 000769 000770 000771 000772 000773 000774 000775 000776 000777 000778 000779 000780 000781 000782 000783 000784 000785 000786 000787 000788 000789 000790 000791 000792 000793 000794 000795 000796 000797 000798 000799 000800 000801 000802 000803 000804 000805 000806 000807 000808 000809 000810 000811 000812 000813 000814 000815 000816 000817 000818 000819 000820 000821 000822 000823 000824 000825 000826 000827 000828 000829 000830 000831 000832 000833 000834 000835 000836 000837 000838 000839 000840 000841 000842 000843 000844 000845 000846 000847 000848 000849 000850 000851 000852 000853 000854 000855 000856 000857 000858 000859 000860 000861 000862 000863 000864 000865 000866 000867 000868 000869 000870 000871 000872 000873 000874 000875 000876 000877 000878 000879 000880 000881 000882 000883 000884 000885 000886 000887 000888 000889 000890 000891 000892 000893 000894 000895 000896 000897 000898 000899 000900 000901 000902 000903 000904 000905 000906 000907 000908 000909 000910 000911 000912 000913 000914 000915 000916 000917 000918 000919 000920 000921 000922 000923 000924 000925 000926 000927 000928 000929 000930 000931 000932 000933 000934 000935 000936 000937 000938 000939 000940 000941 000942 000943 000944 000945 000946 000947 000948 000949 000950 000951 000952 000953 000954 000955 000956 000957 000958 000959 000960 000961 000962 000963 000964 000965 000966 000967 000968 000969 000970 000971 000972 000973 000974 000975 000976 000977 000978 000979 000980 000981 000982 000983 000984 000985 000986 000987 000988 000989 000990 000991 000992 000993 000994 000995 000996 000997 000998 000999 001000 001001 001002 001003 001004 001005 001006 001007 001008 001009 001010 001011 001012 001013 001014 001015 001016 001017 001018 001019 001020 001021 001022 001023 001024 001025 001026 001027 001028 001029 001030 001031 001032 001033 001034 001035 001036 001037 001038 001039 001040 001041 001042 001043 001044 001045 001046 001047 001048 001049 001050 001051 001052 001053 001054 001055 001056 001057 001058 001059 001060 001061 001062 001063 001064 001065 001066 001067 001068 001069 001070 001071 001072 001073 001074 001075 001076 001077 001078 001079 001080 001081 001082 001083 001084 001085 001086 001087 001088 001089 001090 001091 001092 001093 001094 001095 001096 001097 001098 001099 001100 001101 001102 001103 001104 001105 001106 001107 001108 001109 001110 001111 001112 001113 001114 001115 001116 001117 001118 001119 001120 001121 001122 001123 001124 001125 001126 001127 001128 001129 001130 001131 001132 001133 001134 001135 001136 001137 001138 001139 001140 001141 001142 001143 001144 001145 001146 001147 001148 001149 001150 001151 001152 001153 001154 001155 001156 001157 001158 001159 001160 001161 001162 001163 001164 001165 001166 001167 001168 001169 001170 001171 001172 001173 001174 001175 001176 001177 001178 001179 001180 001181 001182 001183 001184 001185 001186 001187 001188 001189 001190 001191 001192 001193 001194 001195 001196 001197 001198 001199 001200 001201 001202 001203 001204 001205 001206 001207 001208 001209 001210 001211 001212 001213 001214 001215 001216 001217 001218 001219 001220 001221 001222 001223 001224 001225 001226 001227 001228 001229 001230 001231 001232 001233 001234 001235 001236 001237 001238 001239 001240 001241 001242 001243 001244 001245 001246 001247 001248 001249 001250 001251 001252 001253 001254 001255 001256 001257 001258 001259 001260 001261 001262 001263 001264 001265 001266 001267 001268 001269 001270 001271 001272 001273 001274 001275 001276 001277 001278 001279 001280 001281 001282 001283 001284 001285 001286 001287 001288 001289 001290 001291 001292 001293 001294 001295 001296 001297 001298 001299 001300 001301 001302 001303 001304 001305 001306 001307 001308 001309 001310 001311 001312 001313 001314 001315 001316 001317 001318 001319 001320 001321 001322 001323 001324 001325 001326 001327 001328 001329 001330 001331 001332 001333 001334 001335 001336 001337 001338 001339 001340 001341 001342 001343 001344 001345 001346 001347 001348 001349 001350 001351 001352 001353 001354 001355 001356 001357 001358 001359 001360 001361 001362 001363 001364 001365 001366 001367 001368 001369 001370 001371 001372 001373 001374 001375 001376 001377 001378 001379 001380 001381 001382 001383 001384 001385 001386 001387 001388 001389 001390 001391 001392 001393 001394 001395 001396 001397 001398 001399 001400 001401 001402 001403 001404 001405 001406 001407 001408 001409 001410 001411 001412 001413 001414 001415 001416 001417 001418 001419 001420 001421 001422 001423 001424 001425 001426 001427 001428 001429 001430 001431 001432 001433 001434 001435 001436 001437 001438 001439 001440 001441 001442 001443 001444 001445 001446 001447 001448 001449 001450 001451 001452 001453 001454 001455 001456 001457 001458 001459 001460 001461 001462 001463 001464 001465 001466 001467 001468 001469 001470 001471 001472 001473 001474 001475 001476 001477 001478 001479 001480 001481 001482 001483 001484 001485 001486 001487 001488 001489 001490 001491 001492 001493 001494 001495 001496 001497 001498 001499 001500 001501 001502 001503 001504 001505 001506 001507 001508 001509 001510 001511 001512 001513 001514 001515 001516 001517 001518 001519 001520 001521 001522 001523 001524 001525 001526 001527 001528 001529 001530 001531 001532 001533 001534 001535 001536 001537 001538 001539 001540 001541 001542 001543 001544 001545 001546 001547 001548 001549 001550 001551 001552 001553 001554 001555 001556 001557 001558 001559 001560 001561 001562 001563 001564 001565 001566 001567 001568 001569 001570 001571 001572 001573 001574 001575 001576 001577 001578 001579 001580 001581 001582 001583 001584 001585 001586 001587 001588 001589 001590 001591 001592 001593 001594 001595 001596 001597 001598 001599 001600 001601 001602 001603 001604 001605 001606 001607 001608 001609 001610 001611 001612 001613 001614 001615 001616 001617 001618 001619 001620 001621 001622 001623 001624 001625 001626 001627 001628 001629 001630 001631 001632 001633 001634 001635 001636 001637 001638 001639 001640 001641 001642 001643 001644 001645 001646 001647 001648 001649 001650 001651 001652 001653 001654 001655 001656 001657 001658 001659 001660 001661 001662 001663 001664 001665 001666 001667 001668 001669 001670 001671 001672 001673 001674 001675 001676 001677 001678 001679 001680 001681 001682 001683 001684 001685 001686 001687 001688 001689 001690 001691 001692 001693 001694 001695 001696 001697 001698 001699 001700 001701 001702 001703 001704 001705 001706 001707 001708 001709 001710 001711 001712 001713 001714 001715 001716 001717 001718 001719 001720 001721 001722 001723 001724 001725 001726 001727 001728 001729 001730 001731 001732 001733 001734 001735 001736 001737 001738 001739 001740 001741 001742 001743 001744 001745 001746 001747 001748 001749 001750 001751 001752 001753 001754 001755 001756 001757 001758 001759 001760 001761 001762 001763 001764 001765 001766 001767 001768 001769 001770 001771 001772 001773 001774 001775 001776 001777 001778 001779 001780 001781 001782 001783 001784 001785 001786 001787 001788 001789 001790 001791 001792 001793 001794 001795 001796 001797 001798 001799 001800 001801 001802 001803 001804 001805 001806 001807 001808 001809 001810 001811 001812 001813 001814 001815 001816 001817 001818 001819 001820 001821 001822 001823 001824 001825 001826 001827 001828 001829 001830 001831 001832 001833 001834 001835 001836 001837 001838 001839 001840 001841 001842 001843 001844 001845 001846 001847 001848 001849 001850 001851 001852 001853 001854 001855 001856 001857 001858 001859 001860 001861 001862 001863 001864 001865 001866 001867 001868 001869 001870 001871 001872 001873 001874 001875 001876 001877 001878 001879 001880 001881 001882 001883 001884 001885 001886 001887 001888 001889 001890 001891 001892 001893 001894 001895 001896 001897 001898 001899 001900 001901 001902 001903 001904 001905 001906 001907 001908 001909 001910 001911 001912 001913 001914 001915 001916 001917 001918 001919 001920 001921 001922 001923 001924 001925 001926 001927 001928 001929 001930 001931 001932 001933 001934 001935 001936 001937 001938 001939 001940 001941 001942 001943 001944 001945 001946 001947 001948 001949 001950 001951 001952 001953 001954 001955 001956 001957 001958 001959 001960 001961 001962 001963 001964 001965 001966 001967 001968 001969 001970 001971 001972 001973 001974 001975 001976 001977 001978 001979 001980 001981 001982 001983 001984 001985 001986 001987 001988 001989 001990 001991 001992 001993 001994 001995 001996 001997 001998 001999 002000 002001 002002 002003 002004 002005 002006 002007 002008 002009 002010 002011 002012 002013 002014 002015 002016 002017 002018 002019 002020 002021 002022 002023 002024 002025 002026 002027 002028 002029 002030 002031 002032 002033 002034 002035 002036 002037 002038 002039 002040 002041 002042 002043 002044 002045 002046 002047 002048 002049 002050 002051 002052 002053 002054 002055 002056 002057 002058 002059 002060 002061 002062 002063 002064 002065 002066 002067 002068 002069 002070 002071 002072 002073 002074 002075 002076 002077 002078 002079 002080 002081 002082 002083 002084 002085 002086 002087 002088 002089 002090 002091 002092 002093 002094 002095 002096 002097 002098 002099 002100 002101 002102 002103 002104 002105 002106 002107 002108 002109 002110 002111 002112 002113 002114 002115 002116 002117 002118 002119 002120 002121 002122 002123 002124 002125 002126 002127 002128 002129 002130 002131 002132 002133 002134 002135 002136 002137 002138 002139 002140 002141 002142 002143 002144 002145 002146 002147 002148 002149 002150 002151 002152 002153 002154 002155 002156 002157 002158 002159 002160 002161 002162 002163 002164 002165 002166 002167 002168 002169 002170 002171 002172 002173 002174 002175 002176 002177 002178 002179 002180 002181 002182 002183 002184 002185 002186 002187 002188 002189 002190 002191 002192 002193 002194 002195 002196 002197 002198 002199 002200 002201 002202 002203 002204 002205 002206 002207 002208 002209 002210 002211 002212 002213 002214 002215 002216 002217 002218 002219 002220 002221 002222 002223 002224 002225 002226 002227 002228 002229 002230 002231 002232 002233 002234 002235 002236 002237 002238 002239 002240 002241 002242 002243 002244 002245 002246 002247 002248 002249 002250 002251 002252 002253 002254 002255 002256 002257 002258 002259 002260 002261 002262 002263 002264 002265 002266 002267 002268 002269 002270 002271 002272 002273 002274 002275 002276 002277 002278 002279 002280 002281 002282 002283 002284 002285 002286 002287 002288 002289 002290 002291 002292 002293 002294 002295 002296 002297 002298 002299 002300 002301 002302 002303 002304 002305 002306 002307 002308 002309 002310 002311 002312 002313 002314 002315 002316 002317 002318 002319 002320 002321 002322 002323 002324 002325 002326 002327 002328 002329 002330 002331 002332 002333 002334 002335 002336 002337 002338 002339 002340 002341 002342 002343 002344 002345 002346 002347 002348 002349 002350 002351 002352 002353 002354 002355 002356 002357 002358 002359 002360 002361 002362 002363 002364 002365 002366 002367 002368 002369 002370 002371 002372 002373 002374 002375 002376 002377 002378 002379 002380 002381 002382 002383 002384 002385 002386 002387 002388 002389 002390 002391 002392 002393 002394 002395 002396 002397 002398 002399 002400 002401 002402 002403 002404 002405 002406 002407 002408 002409 002410 002411 002412 002413 002414 002415 002416 002417 002418 002419 002420 002421 002422 002423 002424 002425 002426 002427 002428 002429 002430 002431 002432 002433 002434 002435 002436 002437 002438 002439 002440 002441 002442 002443 002444 002445 002446 002447 002448 002449 002450 002451 002452 002453 002454 002455 002456 002457 002458 002459 002460 002461 002462 002463 002464 002465 002466 002467 002468 002469 002470 002471 002472 002473 002474 002475 002476 002477 002478 002479 002480 002481 002482 002483 002484 002485 002486 002487 002488 002489 002490 002491 002492 002493 002494 002495 002496 002497 002498 002499 002500 002501 002502 002503 002504 002505 002506 002507 002508 002509 002510 002511 002512 002513 002514 002515 002516 002517 002518 002519 002520 002521 002522 002523 002524 002525 002526 002527 002528 002529 002530 002531 002532 002533 002534 002535 002536 002537 002538 002539 002540 002541 002542 002543 002544 002545 002546 002547 002548 002549 002550 002551 002552 002553 002554 002555 002556 002557 002558 002559 002560 002561 002562 002563 002564 002565 002566 002567 002568 002569 002570 002571 002572 002573 002574 002575 002576 002577 002578 002579 002580 002581 002582 002583 002584 002585 002586 002587 002588 002589 002590 002591 002592 002593 002594 002595 002596 002597 002598 002599 002600 002601 002602 002603 002604 002605 002606 002607 002608 002609 002610 002611 002612 002613 002614 002615 002616 002617 002618 002619 002620 002621 002622 002623 002624 002625 002626 002627 002628 002629 002630 002631 002632 002633 002634 002635 002636 002637 002638 002639 002640 002641 002642 002643 002644 002645 002646 002647 002648 002649 002650 002651 002652 002653 002654 002655 002656 002657 002658 002659 002660 002661 002662 002663 002664 002665 002666 002667 002668 002669 002670 002671 002672 002673 002674 002675 002676 002677 002678 002679 002680 002681 002682 002683 002684 002685 002686 002687 002688 002689 002690 002691 002692 002693 002694 002695 002696 002697 002698 002699 002700 002701 002702 002703 002704 002705 002706 002707 002708 002709 002710 002711 002712 002713 002714 002715 002716 002717 002718 002719 002720 002721 002722 002723 002724 002725 002726 002727 002728 002729 002730 002731 002732 002733 002734 002735 002736 002737 002738 002739 002740 002741 002742 002743 002744 002745 002746 002747 002748 002749 002750 002751 002752 002753 002754 002755 002756 002757 002758 002759 002760 002761 002762 002763 002764 002765 002766 002767 002768 002769 002770 002771 002772 002773 002774 002775 002776 002777 002778 002779 002780 002781 002782 002783 002784 002785 002786 002787 002788 002789 002790 002791 002792 002793 002794 002795 002796 002797 002798 002799 002800 002801 002802 002803 002804 002805 002806 002807 002808 002809 002810 002811 002812 002813 002814 002815 002816 002817 002818 002819 002820 002821 002822 002823 002824 002825 002826 002827 002828 002829 002830 002831 002832 002833 002834 002835 002836 002837 002838 002839 002840 002841 002842 002843 002844 002845 002846 002847 002848 002849 002850 002851 002852 002853 002854 002855 002856 002857 002858 002859 002860 002861 002862 002863 002864 002865 002866 002867 002868 002869 002870 002871 002872 002873 002874 002875 002876 002877 002878 002879 002880 002881 002882 002883 002884 002885 002886 002887 002888 002889 002890 002891 002892 002893 002894 002895 002896 002897 002898 002899 002900 002901 002902 002903 002904 002905 002906 002907 002908 002909 002910 002911 002912 002913 002914 002915 002916 002917 002918 002919 002920 002921 002922 002923 002924 002925 002926 002927 002928 002929 002930 002931 002932 002933 002934 002935 002936 002937 002938 002939 002940 002941 002942 002943 002944 002945 002946 002947 002948 002949 002950 002951 002952 002953 002954 002955 002956 002957 002958 002959 002960 002961 002962 002963 002964 002965 002966 002967 002968 002969 002970 002971 002972 002973 002974 002975 002976 002977 002978 002979 002980 002981 002982 002983 002984 002985 002986 002987 002988 002989 002990 002991 002992 002993 002994 002995 002996 002997 002998 002999 003000 003001 003002 003003 003004 003005 003006 003007 003008 003009 003010 003011 003012 003013 003014 003015 003016 003017 003018 003019 003020 003021 003022 003023 003024 003025 003026 003027 003028 003029 003030 003031 003032 003033 003034 003035 003036 003037 003038 003039 003040 003041 003042 003043 003044 003045 003046 003047 003048 003049 003050 003051 003052 003053 003054 003055 003056 003057 003058 003059 003060 003061 003062 003063 003064 003065 003066 003067 003068 003069 003070 003071 003072 003073 003074 003075 003076 003077 003078 003079 003080 003081 003082 003083 003084 003085 003086 003087 003088 003089 003090 003091 003092 003093 003094 003095 003096 003097 003098 003099 003100 003101 003102 003103 003104 003105 003106 003107 003108 003109 003110 003111 003112 003113 003114 003115 003116 003117 003118 003119 003120 003121 003122 003123 003124 003125 003126 003127 003128 003129 003130 003131 003132 003133 003134 003135 003136 003137 003138 003139 003140 003141 003142 003143 003144 003145 003146 003147 003148 003149 003150 003151 003152 003153 003154 003155 003156 003157 003158 003159 003160 003161 003162 003163 003164 003165 003166 003167 003168 003169 003170 003171 003172 003173 003174 003175 003176 003177 003178 003179 003180 003181 003182 003183 003184 003185 003186 003187 003188 003189 003190 003191 003192 003193 003194 003195 003196 003197 003198 003199 003200 003201 003202 003203 003204 003205 003206 003207 003208 003209 003210 003211 003212 003213 003214 003215 003216 003217 003218 003219 003220 003221 003222 003223 003224 003225 003226 003227 003228 003229 003230 003231 003232 003233 003234 003235 003236 003237 003238 003239 003240 003241 003242 003243 003244 003245 003246 003247 003248 003249 003250 003251 003252 003253 003254 003255 003256 003257 003258 003259 003260 003261 003262 003263 003264 003265 003266 003267 003268 003269 003270 003271 003272 003273 003274 003275 003276 003277 003278 003279 003280 003281 003282 003283 003284 003285 003286 003287 003288 003289 003290 003291 003292 003293 003294 003295 003296 003297 003298 003299 003300 003301 003302 003303 003304 003305 003306 003307 003308 003309 003310 003311 003312 003313 003314 003315 003316 003317 003318 003319 003320 003321 003322 003323 003324 003325 003326 003327 003328 003329 003330 003331 003332 003333 003334 003335 003336 003337 003338 003339 003340 003341 003342 003343 003344 003345 003346 003347 003348 003349 003350 003351 003352 003353 003354 003355 003356 003357 003358 003359 003360 003361 003362 003363 003364 003365 003366 003367 003368 003369 003370 003371 003372 003373 003374 003375 003376 003377 003378 003379 003380 003381 003382 003383 003384 003385 003386 003387 003388 003389 003390 003391 003392 003393 003394 003395 003396 003397 003398 003399 003400 003401 003402 003403 003404 003405 003406 003407 003408 003409 003410 003411 003412 003413 003414 003415 003416 003417 003418 003419 003420 003421 003422 003423 003424 003425 003426 003427 003428 003429 003430 003431 003432 003433 003434 003435 003436 003437 003438 003439 003440 003441 003442 003443 003444 003445 003446 003447 003448 003449 003450 003451 003452 003453 003454 003455 003456 003457 003458 003459 003460 003461 003462 003463 003464 003465 003466 003467 003468 003469 003470 003471 003472 003473 003474 003475 003476 003477 003478 003479 003480 003481 003482 003483 003484 003485 003486 003487 003488 003489 003490 003491 003492 003493 003494 003495 003496 003497 003498 003499 003500 003501 003502 003503 003504 003505 003506 003507 003508 003509 003510 003511 003512 003513 003514 003515 003516 003517 003518 003519 003520 003521 003522 003523 003524 003525 003526 003527 003528 003529 003530 003531 003532 003533 003534 003535 003536 003537 003538 003539 003540 003541 003542 003543 003544 003545 003546 003547 003548 003549 003550 003551 003552 003553 003554 003555 003556 003557 003558 003559 003560 003561 003562 003563 003564 003565 003566 003567 003568 003569 003570 003571 003572 003573 003574 003575 003576 003577 003578 003579 003580 003581 003582 003583 003584 003585 003586 003587 003588 003589 003590 003591 003592 003593 003594 003595 003596 003597 003598 003599 003600 003601 003602 003603 003604 003605 003606 003607 003608 003609 003610 003611 003612 003613 003614 003615 003616 003617 003618 003619 003620 003621 003622 003623 003624 003625 003626 003627 003628 003629 003630 003631 003632 003633 003634 003635 003636 003637 003638 003639 003640 003641 003642 003643 003644 003645 003646 003647 003648 003649 003650 003651 003652 003653 003654 003655 003656 003657 003658 003659 003660 003661 003662 003663 003664 003665 003666 003667 003668 003669 003670 003671 003672 003673 003674 003675 003676 003677 003678 003679 003680 003681 003682 003683 003684 003685 003686 003687 003688 003689 003690 003691 003692 003693 003694 003695 003696 003697 003698 003699 003700 003701 003702 003703 003704 003705 003706 003707 003708 003709 003710 003711 003712 003713 003714 003715 003716 003717 003718 003719 003720 003721 003722 003723 003724 003725 003726 003727 003728 003729 003730 003731 003732 003733 003734 003735 003736 003737 003738 003739 003740 003741 003742 003743 003744 003745 003746 003747 003748 003749 003750 003751 003752 003753 003754 003755 003756 003757 003758 003759 003760 003761 003762 003763 003764 003765 003766 003767 003768 003769 003770 003771 003772 003773 003774 003775 003776 003777 003778 003779 003780 003781 003782 003783 003784 003785 003786 003787 003788 003789 003790 003791 003792 003793 003794 003795 003796 003797 003798 003799 003800 003801 003802 003803 003804 003805 003806 003807 003808 003809 003810 003811 003812 003813 003814 003815 003816 003817 003818 003819 003820 003821 003822 003823 003824 003825 003826 003827 003828 003829 003830 003831 003832 003833 003834 003835 003836 003837 003838 003839 003840 003841 003842 003843 003844 003845 003846 003847 003848 003849 003850 003851 003852 003853 003854 003855 003856 003857 003858 003859 003860 003861 003862 003863 003864 003865 003866 003867 003868 003869 003870 003871 003872 003873 003874 003875 003876 003877 003878 003879 003880 003881 003882 003883 003884 003885 003886 003887 003888 003889 003890 003891 003892 003893 003894 003895 003896 003897 003898 003899 003900 003901 003902 003903 003904 003905 003906 003907 003908 003909 003910 003911 003912 003913 003914 003915 003916 003917 003918 003919 003920 003921 003922 003923 003924 003925 003926 003927 003928 003929 003930 003931 003932 003933 003934 003935 003936 003937 003938 003939 003940 003941 003942 003943 003944 003945 003946 003947 003948 003949 003950 003951 003952 003953 003954 003955 003956 003957 003958 003959 003960 003961 003962 003963 003964 003965 003966 003967 003968 003969 003970 003971 003972 003973 003974 003975 003976 003977 003978 003979 003980 003981 003982 003983 003984 003985 003986 003987 003988 003989 003990 003991 003992 003993 003994 003995 003996 003997 003998 003999 004000 004001 004002 004003 004004 004005 004006 004007 004008 004009 004010 004011 004012 004013 004014 004015 004016 004017 004018 004019 004020 004021 004022 004023 004024 004025 004026 004027 004028 004029 004030 004031 004032 004033 004034 004035 004036 004037 004038 004039 004040 004041 004042 004043 004044 004045 004046 004047 004048 004049 004050 004051 004052 004053 004054 004055 004056 004057 004058 004059 004060 004061 004062 004063 004064 004065 004066 004067 004068 004069 004070 004071 004072 004073 004074 004075 004076 004077 004078 004079 004080 004081 004082 004083 004084 004085 004086 004087 004088 004089 004090 004091 004092 004093 004094 004095 004096 004097 004098 004099 004100 004101 004102 004103 004104 004105 004106 004107 004108 004109 004110 004111 004112 004113 004114 004115 004116 004117 004118 004119 004120 004121 004122 004123 004124 004125 004126 004127 004128 004129 004130 004131 004132 004133 004134 004135 004136 004137 004138 004139 004140 004141 004142 004143 004144 004145 004146 004147 004148 004149 004150 004151 004152 004153 004154 004155 004156 004157 004158 004159 004160 004161 004162 004163 004164 004165 004166 004167 004168 004169 004170 004171 004172 004173 004174 004175 004176 004177 004178 004179 004180 004181 004182 004183 004184 004185 004186 004187 004188 004189 004190 004191 004192 004193 004194 004195 004196 004197 004198 004199 004200 004201 004202 004203 004204 004205 004206 004207 004208 004209 004210 004211 004212 004213 004214 004215 004216 004217 004218 004219 004220 004221 004222 004223 004224 004225 004226 004227 004228 004229 004230 004231 004232 004233 004234 004235 004236 004237 004238 004239 004240 004241 004242 004243 004244 004245 004246 004247 004248 004249 004250 004251 004252 004253 004254 004255 004256 004257 004258 004259 004260 004261 004262 004263 004264 004265 004266 004267 004268 004269 004270 004271 004272 004273 004274 004275 004276 004277 004278 004279 004280 004281 004282 004283 004284 004285 004286 004287 004288 004289 004290 004291 004292 004293 004294 004295 004296 004297 004298 004299 004300 004301 004302 004303 004304 004305 004306 004307 004308 004309 004310 004311 004312 004313 004314 004315 004316 004317 004318 004319 004320 004321 004322 004323 004324 004325 004326 004327 004328 004329 004330 004331 004332 004333 004334 004335 004336 004337 004338 004339 004340 004341 004342 004343 004344 004345 004346 004347 004348 004349 004350 004351 004352 004353 004354 004355 004356 004357 004358 004359 004360 004361 004362 004363 004364 004365 004366 004367 004368 004369 004370 004371 004372 004373 004374 004375 004376 004377 004378 004379 004380 004381 004382 004383 004384 004385 004386 004387 004388 004389 004390 004391 004392 004393 004394 004395 004396 004397 004398 004399 004400 004401 004402 004403 004404 004405 004406 004407 004408 004409 004410 004411 004412 004413 004414 004415 004416 004417 004418 004419 004420 004421 004422 004423 004424 004425 004426 004427 004428 004429 004430 004431 004432 004433 004434 004435 004436 004437 004438 004439 004440 004441 004442 004443 004444 004445 004446 004447 004448 004449 004450 004451 004452 004453 004454 004455 004456 004457 004458 004459 004460 004461 004462 004463 004464 004465 004466 004467 004468 004469 004470 004471 004472 004473 004474 004475 004476 004477 004478 004479 004480 004481 004482 004483 004484 004485 004486 004487 004488 004489 004490 004491 004492 004493 004494 004495 004496 004497 004498 004499 004500 004501 004502 004503 004504 004505 004506 004507 004508 004509 004510 004511 004512 004513 004514 004515 004516 004517 004518 004519 004520 004521 004522 004523 004524 004525 004526 004527 004528 004529 004530 004531 004532 004533 004534 004535 004536 004537 004538 004539 004540 004541 004542 004543 004544 004545 004546 004547 004548 004549 004550 004551 004552 004553 004554 004555 004556 004557 004558 004559 004560 004561 004562 004563 004564 004565 004566 004567 004568 004569 004570 004571 004572 004573 004574 004575 004576 004577 004578 004579 004580 004581 004582 004583 004584 004585 004586 004587 004588 004589 004590 004591 004592 004593 004594 004595 004596 004597 004598 004599 004600 004601 004602 004603 004604 004605 004606 004607 004608 004609 004610 004611 004612 004613 004614 004615 004616 004617 004618 004619 004620 004621 004622 004623 004624 004625 004626 004627 004628 004629 004630 004631 004632 004633 004634 004635 004636 004637 004638 004639 004640 004641 004642 004643 004644 004645 004646 004647 004648 004649 004650 004651 004652 004653 004654 004655 004656 004657 004658 004659 004660 004661 004662 004663 004664 004665 004666 004667 004668 004669 004670 004671 004672 004673 004674 004675 004676 004677 004678 004679 004680 004681 004682 004683 004684 004685 004686 004687 004688 004689 004690 004691 004692 004693 004694 004695 004696 004697 004698 004699 004700 004701 004702 004703 004704 004705 004706 004707 004708 004709 004710 004711 004712 004713 004714 004715 004716 004717 004718 004719 004720 004721 004722 004723 004724 004725 004726 004727 004728 004729 004730 004731 004732 004733 004734 004735 004736 004737 004738 004739 004740 004741 004742 004743 004744 004745 004746 004747 004748 004749 004750 004751 004752 004753 004754 004755 004756 004757 004758 004759 004760 004761 004762 004763 004764 004765 004766 004767 004768 004769 004770 004771 004772 004773 004774 004775 004776 004777 004778 004779 004780 004781 004782 004783 004784 004785 004786 004787 004788 004789 004790 004791 004792 004793 004794 004795 004796 004797 004798 004799 004800 004801 004802 004803 004804 004805 004806 004807 004808 004809 004810 004811 004812 004813 004814 004815 004816 004817 004818 004819 004820 004821 004822 004823 004824 004825 004826 004827 004828 004829 004830 004831 004832 004833 004834 004835 004836 004837 004838 004839 004840 004841 004842 004843 004844 004845 004846 004847 004848 004849 004850 004851 004852 004853 004854 004855 004856 004857 004858 004859 004860 004861 004862 004863 004864 004865 004866 004867 004868 004869 004870 004871 004872 004873 004874 004875 004876 004877 004878 004879 004880 004881 004882 004883 004884 004885 004886 004887 004888 004889 004890 004891 004892 004893 004894 004895 004896 004897 004898 004899 004900 004901 004902 004903 004904 004905 004906 004907 004908 004909 004910 004911 004912 004913 004914 004915 004916 004917 004918 004919 004920 004921 004922 004923 004924 004925 004926 004927 004928 004929 004930 004931 004932 004933 004934 004935 004936 004937 004938 004939 004940 004941 004942 004943 004944 004945 004946 004947 004948 004949 004950 004951 004952 004953 004954 004955 004956 004957 004958 004959 004960 004961 004962 004963 004964 004965 004966 004967 004968 004969 004970 004971 004972 004973 004974 004975 004976 004977 004978 004979 004980 004981 004982 004983 004984 004985 004986 004987 004988 004989 004990 004991 004992 004993 004994 004995 004996 004997 004998 004999 005000 005001 005002 005003 005004 005005 005006 005007 005008 005009 005010 005011 005012 005013 005014 005015 005016 005017 005018 005019 005020 005021 005022 005023 005024 005025 005026 005027 005028 005029 005030 005031 005032 005033 005034 005035 005036 005037 005038 005039 005040 005041 005042 005043 005044 005045 005046 005047 005048 005049 005050 005051 005052 005053 005054 005055 005056 005057 005058 005059 005060 005061 005062 005063 005064 005065 005066 005067 005068 005069 005070 005071 005072 005073 005074 005075 005076 005077 005078 005079 005080 005081 005082 005083 005084 005085 005086 005087 005088 005089 005090 005091 005092 005093 005094 005095 005096 005097 005098 005099 005100 005101 005102 005103 005104 005105 005106 005107 005108 005109 005110 005111 005112 005113 005114 005115 005116 005117 005118 005119 005120 005121 005122 005123 005124 005125 005126 005127 005128 005129 005130 005131 005132 005133 005134 005135 005136 005137 005138 005139 005140 005141 005142 005143 005144 005145 005146 005147 005148 005149 005150 005151 005152 005153 005154 005155 005156 005157 005158 005159 005160 005161 005162 005163 005164 005165 005166 005167 005168 005169 005170 005171 005172 005173 005174 005175 005176 005177 005178 005179 005180 005181 005182 005183 005184 005185 005186 005187 005188 005189 005190 005191 005192 005193 005194 005195 005196 005197 005198 005199 005200 005201 005202 005203 005204 005205 005206 005207 005208 005209 005210 005211 005212 005213 005214 005215 005216 005217 005218 005219 005220 005221 005222 005223 005224 005225 005226 005227 005228 005229 005230 005231 005232 005233 005234 005235 005236 005237 005238 005239 005240 005241 005242 005243 005244 005245 005246 005247 005248 005249 005250 005251 005252 005253 005254 005255 005256 005257 005258 005259 005260 005261 005262 005263 005264 005265 005266 005267 005268 005269 005270 005271 005272 005273 005274 005275 005276 005277 005278 005279 005280 005281 005282 005283 005284 005285 005286 005287 005288 005289 005290 005291 005292 005293 005294 005295 005296 005297 005298 005299 005300 005301 005302 005303 005304 005305 005306 005307 005308 005309 005310 005311 005312 005313 005314 005315 005316 005317 005318 005319 005320 005321 005322 005323 005324 005325 005326 005327 005328 005329 005330 005331 005332 005333 005334 005335 005336 005337 005338 005339 005340 005341 005342 005343 005344 005345 005346 005347 005348 005349 005350 005351 005352 005353 005354 005355 005356 005357 005358 005359 005360 005361 005362 005363 005364 005365 005366 005367 005368 005369 005370 005371 005372 005373 005374 005375 005376 005377 005378 005379 005380 005381 005382 005383 005384 005385 005386 005387 005388 005389 005390 005391 005392 005393 005394 005395 005396 005397 005398 005399 005400 005401 005402 005403 005404 005405 005406 005407 005408 005409 005410 005411 005412 005413 005414 005415 005416 005417 005418 005419 005420 005421 005422 005423 005424 005425 005426 005427 005428 005429 005430 005431 005432 005433 005434 005435 005436 005437 005438 005439 005440 005441 005442 005443 005444 005445 005446 005447 005448 005449 005450 005451 005452 005453 005454 005455 005456 005457 005458 005459 005460 005461 005462 005463 005464 005465 005466 005467 005468 005469 005470 005471 005472 005473 005474 005475 005476 005477 005478 005479 005480 005481 005482 005483 005484 005485 005486 005487 005488 005489 005490 005491 005492 005493 005494 005495 005496 005497 005498 005499 005500 005501 005502 005503 005504 005505 005506 005507 005508 005509 005510 005511 005512 005513 005514 005515 005516 005517 005518 005519 005520 005521 005522 005523 005524 005525 005526 005527 005528 005529 005530 005531 005532 005533 005534 005535 005536 005537 005538 005539 005540 005541 005542 005543 005544 005545 005546 005547 005548 005549 005550 005551 005552 005553 005554 005555 005556 005557 005558 005559 005560 005561 005562 005563 005564 005565 005566 005567 005568 005569 005570 005571 005572 005573 005574 005575 005576 005577 005578 005579 005580 005581 005582 005583 005584 005585 005586 005587 005588 005589 005590 005591 005592 005593 005594 005595 005596 005597 005598 005599 005600 005601 005602 005603 005604 005605 005606 005607 005608 005609 005610 005611 005612 005613 005614 005615 005616 005617 005618 005619 005620 005621 005622 005623 005624 005625 005626 005627 005628 005629 005630 005631 005632 005633 005634 005635 005636 005637 005638 005639 005640 005641 005642 005643 005644 005645 005646 005647 005648 005649 005650 005651 005652 005653 005654 005655 005656 005657 005658 005659 005660 005661 005662 005663 005664 005665 005666 005667 005668 005669 005670 005671 005672 005673 005674 005675 005676 005677 005678 005679 005680 005681 005682 005683 005684 005685 005686 005687 005688 005689 005690 005691 005692 005693 005694 005695 005696 005697 005698 005699 005700 005701 005702 005703 005704 005705 005706 005707 005708 005709 005710 005711 005712 005713 005714 005715 005716 005717 005718 005719 005720 005721 005722 005723 005724 005725 005726 005727 005728 005729 005730 005731 005732 005733 005734 005735 005736 005737 005738 005739 005740 005741 005742 005743 005744 005745 005746 005747 005748 005749 005750 005751 005752 005753 005754 005755 005756 005757 005758 005759 005760 005761 005762 005763 005764 005765 005766 005767 005768 005769 005770 005771 005772 005773 005774 005775 005776 005777 005778 005779 005780 005781 005782 005783 005784 005785 005786 005787 005788 005789 005790 005791 005792 005793 005794 005795 005796 005797 005798 005799 005800 005801 005802 005803 005804 005805 005806 005807 005808 005809 005810 005811 005812 005813 005814 005815 005816 005817 005818 005819 005820 005821 005822 005823 005824 005825 005826 005827 005828 005829 005830 005831 005832 005833 005834 005835 005836 005837 005838 005839 005840 005841 005842 005843 005844 005845 005846 005847 005848 005849 005850 005851 005852 005853 005854 005855 005856 005857 005858 005859 005860 005861 005862 005863 005864 005865 005866 005867 005868 005869 005870 005871 005872 005873 005874 005875 005876 005877 005878 005879 005880 005881 005882 005883 005884 005885 005886 005887 005888 005889 005890 005891 005892 005893 005894 005895 005896 005897 005898 005899 005900 005901 005902 005903 005904 005905 005906 005907 005908 005909 005910 005911 005912 005913 005914 005915 005916 005917 005918 005919 005920 005921 005922 005923 005924 005925 005926 005927 005928 005929 005930 005931 005932 005933 005934 005935 005936 005937 005938 005939 005940 005941 005942 005943 005944 005945 005946 005947 005948 005949 005950 005951 005952 005953 005954 005955 005956 005957 005958 005959 005960 005961 005962 005963 005964 005965 005966 005967 005968 005969 005970 005971 005972 005973 005974 005975 005976 005977 005978 005979 005980 005981 005982 005983 005984 005985 005986 005987 005988 005989 005990 005991 005992 005993 005994 005995 005996 005997 005998 005999 006000 006001 006002 006003 006004 006005 006006 006007 006008 006009 006010 006011 006012 006013 006014 006015 006016 006017 006018 006019 006020 006021 006022 006023 006024 006025 006026 006027 006028 006029 006030 006031 006032 006033 006034 006035 006036 006037 006038 006039 006040 006041 006042 006043 006044 006045 006046 006047 006048 006049 006050 006051 006052 006053 006054 006055 006056 006057 006058 006059 006060 006061 006062 006063 006064 006065 006066 006067 006068 006069 006070 006071 006072 006073 006074 006075 006076 006077 006078 006079 006080 006081 006082 006083 006084 006085 006086 006087 006088 006089 006090 006091 006092 006093 006094 006095 006096 006097 006098 006099 006100 006101 006102 006103 006104 006105 006106 006107 006108 006109 006110 006111 006112 006113 006114 006115 006116 006117 006118 006119 006120 006121 006122 006123 006124 006125 006126 006127 006128 006129 006130 006131 006132 006133 006134 006135 006136 006137 006138 006139 006140 006141 006142 006143 006144 006145 006146 006147 006148 006149 006150 006151 006152 006153 006154 006155 006156 006157 006158 006159 006160 006161 006162 006163 006164 006165 006166 006167 006168 006169 006170 006171 006172 006173 006174 006175 006176 006177 006178 006179 006180 006181 006182 006183 006184 006185 006186 006187 006188 006189 006190 006191 006192 006193 006194 006195 006196 006197 006198 006199 006200 006201 006202 006203 006204 006205 006206 006207 006208 006209 006210 006211 006212 006213 006214 006215 006216 006217 006218 006219 006220 006221 006222 006223 006224 006225 006226 006227 006228 006229 006230 006231 006232 006233 006234 006235 006236 006237 006238 006239 006240 006241 006242 006243 006244 006245 006246 006247 006248 006249 006250 006251 006252 006253 006254 006255 006256 006257 006258 006259 006260 006261 006262 006263 006264 006265 006266 006267 006268 006269 006270 006271 006272 006273 006274 006275 006276 006277 006278 006279 006280 006281 006282 006283 006284 006285 006286 006287 006288 006289 006290 006291 006292 006293 006294 006295 006296 006297 006298 006299 006300 006301 006302 006303 006304 006305 006306 006307 006308 006309 006310 006311 006312 006313 006314 006315 006316 006317 006318 006319 006320 006321 006322 006323 006324 006325 006326 006327 006328 006329 006330 006331 006332 006333 006334 006335 006336 006337 006338 006339 006340 006341 006342 006343 006344 006345 006346 006347 006348 006349 006350 006351 006352 006353 006354 006355 006356 006357 006358 006359 006360 006361 006362 006363 006364 006365 006366 006367 006368 006369 006370 006371 006372 006373 006374 006375 006376 006377 006378 006379 006380 006381 006382 006383 006384 006385 006386 006387 006388 006389 006390 006391 006392 006393 006394 006395 006396 006397 006398 006399 006400 006401 006402 006403 006404 006405 006406 006407 006408 006409 006410 006411 006412 006413 006414 006415 006416 006417 006418 006419 006420 006421 006422 006423 006424 006425 006426 006427 006428 006429 006430 006431 006432 006433 006434 006435 006436 006437 006438 006439 006440 006441 006442 006443 006444 006445 006446 006447 006448 006449 006450 006451 006452 006453 006454 006455 006456 006457 006458 006459 006460 006461 006462 006463 006464 006465 006466 006467 006468 006469 006470 006471 006472 006473 006474 006475 006476 006477 006478 006479 006480 006481 006482 006483 006484 006485 006486 006487 006488 006489 006490 006491 006492 006493 006494 006495 006496 006497 006498 006499 006500 006501 006502 006503 006504 006505 006506 006507 006508 006509 006510 006511 006512 006513 006514 006515 006516 006517 006518 006519 006520 006521 006522 006523 006524 006525 006526 006527 006528 006529 006530 006531 006532 006533 006534 006535 006536 006537 006538 006539 006540 006541 006542 006543 006544 006545 006546 006547 006548 006549 006550 006551 006552 006553 006554 006555 006556 006557 006558 006559 006560 006561 006562 006563 006564 006565 006566 006567 006568 006569 006570 006571 006572 006573 006574 006575 006576 006577 006578 006579 006580 006581 006582 006583 006584 006585 006586 006587 006588 006589 006590 006591 006592 006593 006594 006595 006596 006597 006598 006599 006600 006601 006602 006603 006604 006605 006606 006607 006608 006609 006610 006611 006612 006613 006614 006615 006616 006617 006618 006619 006620 006621 006622 006623 006624 006625 006626 006627 006628 006629 006630 006631 006632 006633 006634 006635 006636 006637 006638 006639 006640 006641 006642 006643 006644 006645 006646 006647 006648 006649 006650 006651 006652 006653 006654 006655 006656 006657 006658 006659 006660 006661 006662 006663 006664 006665 006666 006667 006668 006669 006670 006671 006672 006673 006674 006675 006676 006677 006678 006679 006680 006681 006682 006683 006684 006685 006686 006687 006688 006689 006690 006691 006692 006693 006694 006695 006696 006697 006698 006699 006700 006701 006702 006703 006704 006705 006706 006707 006708 006709 006710 006711 006712 006713 006714 006715 006716 006717 006718 006719 006720 006721 006722 006723 006724 006725 006726 006727 006728 006729 006730 006731 006732 006733 006734 006735 006736 006737 006738 006739 006740 006741 006742 006743 006744 006745 006746 006747 006748 006749 006750 006751 006752 006753 006754 006755 006756 006757 006758 006759 006760 006761 006762 006763 006764 006765 006766 006767 006768 006769 006770 006771 006772 006773 006774 006775 006776 006777 006778 006779 006780 006781 006782 006783 006784 006785 006786 006787 006788 006789 006790 006791 006792 006793 006794 006795 006796 006797 006798 006799 006800 006801 006802 006803 006804 006805 006806 006807 006808 006809 006810 006811 006812 006813 006814 006815 006816 006817 006818 006819 006820 006821 006822 006823 006824 006825 006826 006827 006828 006829 006830 006831 006832 006833 006834 006835 006836 006837 006838 006839 006840 006841 006842 006843 006844 006845 006846 006847 006848 006849 006850 006851 006852 006853 006854 006855 006856 006857 006858 006859 006860 006861 006862 006863 006864 006865 006866 006867 006868 006869 006870 006871 006872 006873 006874 006875 006876 006877 006878 006879 006880 006881 006882 006883 006884 006885 006886 006887 006888 006889 006890 006891 006892 006893 006894 006895 006896 006897 006898 006899 006900 006901 006902 006903 006904 006905 006906 006907 006908 006909 006910 006911 006912 006913 006914 006915 006916 006917 006918 006919 006920 006921 006922 006923 006924 006925 006926 006927 006928 006929 006930 006931 006932 006933 006934 006935 006936 006937 006938 006939 006940 006941 006942 006943 006944 006945 006946 006947 006948 006949 006950 006951 006952 006953 006954 006955 006956 006957 006958 006959 006960 006961 006962 006963 006964 006965 006966 006967 006968 006969 006970 006971 006972 006973 006974 006975 006976 006977 006978 006979 006980 006981 006982 006983 006984 006985 006986 006987 006988 006989 006990 006991 006992 006993 006994 006995 006996 006997 006998 006999 007000 007001 007002 007003 007004 007005 007006 007007 007008 007009 007010 007011 007012 007013 007014 007015 007016 007017 007018 007019 007020 007021 007022 007023 007024 007025 007026 007027 007028 007029 007030 007031 007032 007033 007034 007035 007036 007037 007038 007039 007040 007041 007042 007043 007044 007045 007046 007047 007048 007049 007050 007051 007052 007053 007054 007055 007056 007057 007058 007059 007060 007061 007062 007063 007064 007065 007066 007067 007068 007069 007070 007071 007072 007073 007074 007075 007076 007077 007078 007079 007080 007081 007082 007083 007084 007085 007086 007087 007088 007089 007090 007091 007092 007093 007094 007095 007096 007097 007098 007099 007100 007101 007102 007103 007104 007105 007106 007107 007108 007109 007110 007111 007112 007113 007114 007115 007116 007117 007118 007119 007120 007121 007122 007123 007124 007125 007126 007127 007128 007129 007130 007131 007132 007133 007134 007135 007136 007137 007138 007139 007140 007141 007142 007143 007144 007145 007146 007147 007148 007149 007150 007151 007152 007153 007154 007155 007156 007157 007158 007159 007160 007161 007162 007163 007164 007165 007166 007167 007168 007169 007170 007171 007172 007173 007174 007175 007176 007177 007178 007179 007180 007181 007182 007183 007184 007185 007186 007187 007188 007189 007190 007191 007192 007193 007194 007195 007196 007197 007198 007199 007200 007201 007202 007203 007204 007205 007206 007207 007208 007209 007210 007211 007212 007213 007214 007215 007216 007217 007218 007219 007220 007221 007222 007223 007224 007225 007226 007227 007228 007229 007230 007231 007232 007233 007234 007235 007236 007237 007238 007239 007240 007241 007242 007243 007244 007245 007246 007247 007248 007249 007250 007251 007252 007253 007254 007255 007256 007257 007258 007259 007260 007261 007262 007263 007264 007265 007266 007267 007268 007269 007270 007271 007272 007273 007274 007275 007276 007277 007278 007279 007280 007281 007282 007283 007284 007285 007286 007287 007288 007289 007290 007291 007292 007293 007294 007295 007296 007297 007298 007299 007300 007301 007302 007303 007304 007305 007306 007307 007308 007309 007310 007311 007312 007313 007314 007315 007316 007317 007318 007319 007320 007321 007322 007323 007324 007325 007326 007327 007328 007329 007330 007331 007332 007333 007334 007335 007336 007337 007338 007339 007340 007341 007342 007343 007344 007345 007346 007347 007348 007349 007350 007351 007352 007353 007354 007355 007356 007357 007358 007359 007360 007361 007362 007363 007364 007365 007366 007367 007368 007369 007370 007371 007372 007373 007374 007375 007376 007377 007378 007379 007380 007381 007382 007383 007384 007385 007386 007387 007388 007389 007390 007391 007392 007393 007394 007395 007396 007397 007398 007399 007400 007401 007402 007403 007404 007405 007406 007407 007408 007409 007410 007411 007412 007413 007414 007415 007416 007417 007418 007419 007420 007421 007422 007423 007424 007425 007426 007427 007428 007429 007430 007431 007432 007433 007434 007435 007436 007437 007438 007439 007440 007441 007442 007443 007444 007445 007446 007447 007448 007449 007450 007451 007452 007453 007454 007455 007456 007457 007458 007459 007460 007461 007462 007463 007464 007465 007466 007467 007468 007469 007470 007471 007472 007473 007474 007475 007476 007477 007478 007479 007480 007481 007482 007483 007484 007485 007486 007487 007488 007489 007490 007491 007492 007493 007494 007495 007496 007497 007498 007499 007500 007501 007502 007503 007504 007505 007506 007507 007508 007509 007510 007511 007512 007513 007514 007515 007516 007517 ================================================ FILE: data/kitti/ImageSets/train.txt ================================================ 000000 000003 000007 000009 000010 000011 000012 000013 000014 000016 000017 000018 000022 000026 000029 000030 000032 000034 000036 000038 000041 000043 000044 000045 000046 000049 000051 000054 000055 000056 000057 000060 000064 000067 000068 000069 000070 000071 000072 000073 000074 000075 000079 000080 000082 000083 000084 000085 000086 000087 000088 000091 000092 000095 000096 000097 000099 000100 000101 000103 000105 000109 000110 000111 000112 000113 000114 000115 000119 000120 000121 000123 000125 000127 000129 000130 000131 000133 000136 000138 000141 000142 000144 000145 000146 000148 000149 000150 000154 000155 000157 000158 000160 000162 000163 000164 000165 000166 000171 000172 000176 000177 000178 000179 000180 000184 000185 000189 000193 000198 000200 000202 000205 000206 000208 000209 000210 000214 000215 000217 000219 000220 000221 000222 000225 000227 000228 000232 000233 000238 000240 000241 000243 000244 000245 000253 000254 000255 000256 000257 000258 000259 000261 000264 000267 000271 000274 000275 000276 000277 000280 000282 000285 000286 000287 000288 000292 000294 000295 000296 000298 000299 000300 000303 000304 000306 000310 000313 000316 000317 000318 000322 000325 000326 000330 000331 000334 000337 000338 000339 000342 000344 000348 000349 000353 000358 000363 000364 000367 000368 000371 000374 000375 000380 000384 000387 000389 000390 000400 000405 000406 000410 000411 000412 000416 000417 000418 000421 000423 000424 000425 000426 000431 000432 000433 000434 000435 000438 000439 000441 000442 000444 000445 000447 000449 000456 000458 000460 000461 000462 000464 000465 000466 000467 000470 000471 000474 000482 000483 000484 000487 000488 000490 000497 000500 000501 000502 000505 000507 000511 000513 000514 000516 000518 000520 000522 000523 000525 000526 000529 000531 000532 000534 000535 000537 000538 000539 000540 000544 000547 000549 000550 000552 000553 000556 000557 000562 000563 000565 000570 000573 000574 000575 000576 000577 000578 000579 000580 000582 000584 000585 000586 000587 000592 000593 000594 000596 000597 000598 000599 000602 000603 000605 000606 000607 000608 000609 000616 000617 000621 000622 000623 000627 000629 000631 000632 000633 000637 000638 000640 000641 000643 000646 000649 000651 000652 000653 000654 000656 000661 000662 000663 000664 000665 000666 000668 000671 000672 000673 000675 000676 000678 000680 000681 000685 000686 000687 000688 000689 000690 000693 000695 000697 000701 000703 000705 000707 000709 000710 000711 000712 000713 000714 000715 000719 000720 000723 000724 000726 000730 000732 000733 000735 000738 000739 000742 000743 000744 000747 000749 000753 000755 000757 000758 000759 000760 000762 000763 000764 000770 000775 000776 000777 000780 000781 000783 000784 000785 000786 000787 000788 000789 000791 000793 000794 000796 000797 000799 000808 000813 000814 000815 000817 000818 000820 000821 000822 000824 000825 000827 000828 000829 000830 000832 000833 000834 000835 000836 000839 000842 000845 000846 000851 000853 000855 000856 000857 000858 000860 000861 000864 000865 000866 000867 000868 000870 000871 000872 000880 000882 000883 000886 000887 000888 000890 000891 000892 000895 000896 000898 000900 000901 000902 000903 000905 000906 000908 000910 000913 000914 000918 000919 000921 000924 000925 000927 000929 000933 000934 000935 000936 000937 000941 000945 000946 000947 000950 000951 000954 000955 000957 000959 000960 000962 000965 000968 000972 000975 000977 000978 000980 000982 000987 000989 000990 000992 000993 000994 000995 000996 000997 000998 001000 001001 001003 001004 001005 001009 001016 001017 001020 001023 001024 001028 001029 001030 001031 001032 001033 001034 001036 001038 001040 001041 001044 001045 001047 001048 001049 001052 001056 001057 001059 001060 001061 001062 001064 001072 001073 001074 001079 001080 001081 001082 001085 001087 001090 001091 001092 001093 001098 001100 001103 001105 001109 001110 001112 001117 001119 001121 001122 001124 001126 001128 001130 001137 001142 001146 001151 001156 001157 001159 001160 001161 001164 001165 001166 001168 001169 001170 001171 001174 001175 001181 001184 001185 001186 001190 001196 001197 001200 001201 001202 001204 001205 001208 001209 001210 001211 001212 001215 001219 001220 001223 001227 001229 001231 001233 001238 001240 001247 001248 001250 001256 001258 001262 001264 001276 001277 001278 001279 001280 001282 001283 001285 001288 001290 001293 001297 001298 001299 001300 001301 001302 001309 001310 001311 001312 001313 001315 001316 001319 001320 001321 001322 001323 001324 001325 001326 001327 001328 001335 001338 001340 001341 001343 001348 001349 001351 001354 001357 001358 001360 001361 001362 001364 001366 001367 001368 001369 001370 001371 001373 001378 001379 001383 001385 001390 001392 001393 001394 001396 001399 001400 001401 001402 001403 001404 001405 001406 001408 001409 001413 001414 001417 001418 001420 001422 001423 001425 001426 001428 001429 001430 001433 001434 001436 001440 001444 001447 001449 001452 001453 001454 001455 001456 001457 001459 001460 001462 001464 001465 001467 001468 001470 001472 001473 001474 001475 001476 001479 001482 001483 001484 001486 001490 001491 001492 001493 001494 001496 001498 001499 001500 001503 001504 001505 001506 001509 001510 001512 001515 001518 001519 001520 001523 001529 001530 001531 001532 001534 001539 001540 001541 001543 001544 001548 001550 001551 001553 001554 001556 001558 001559 001561 001563 001566 001568 001570 001571 001572 001575 001578 001580 001581 001584 001593 001595 001598 001599 001601 001604 001607 001608 001609 001611 001612 001614 001618 001620 001622 001623 001624 001626 001628 001630 001632 001636 001637 001638 001639 001641 001642 001644 001646 001648 001649 001651 001652 001653 001655 001657 001659 001661 001663 001668 001669 001671 001672 001673 001674 001676 001677 001678 001679 001681 001685 001686 001687 001688 001690 001691 001692 001695 001696 001698 001700 001703 001708 001715 001716 001720 001723 001724 001725 001728 001730 001731 001734 001735 001736 001737 001738 001739 001743 001744 001747 001748 001753 001754 001756 001757 001759 001760 001761 001763 001766 001767 001769 001770 001773 001775 001777 001779 001784 001785 001788 001789 001790 001791 001792 001793 001796 001798 001799 001803 001805 001806 001809 001810 001811 001812 001815 001816 001819 001821 001826 001827 001829 001830 001832 001833 001834 001836 001837 001838 001839 001841 001842 001843 001845 001847 001849 001850 001857 001860 001864 001865 001866 001870 001871 001873 001874 001876 001879 001882 001883 001889 001891 001894 001895 001896 001899 001901 001902 001903 001906 001907 001908 001910 001911 001912 001913 001914 001915 001916 001917 001918 001921 001922 001930 001935 001938 001939 001944 001947 001948 001949 001950 001951 001953 001955 001956 001957 001958 001961 001962 001963 001964 001965 001968 001970 001971 001973 001974 001975 001976 001981 001987 001988 001990 001992 001993 001994 001998 002003 002005 002006 002007 002009 002015 002016 002018 002020 002023 002024 002026 002030 002031 002032 002033 002039 002040 002041 002047 002051 002053 002055 002059 002060 002061 002063 002064 002065 002066 002067 002069 002070 002072 002077 002080 002083 002084 002088 002090 002092 002095 002096 002097 002098 002099 002104 002105 002106 002109 002110 002114 002116 002117 002119 002122 002125 002126 002129 002132 002133 002134 002141 002143 002144 002145 002146 002147 002148 002149 002150 002154 002155 002156 002157 002162 002164 002167 002171 002172 002174 002175 002176 002178 002180 002181 002184 002186 002189 002190 002191 002192 002194 002195 002197 002198 002199 002203 002204 002205 002208 002210 002211 002212 002213 002214 002217 002221 002222 002223 002226 002227 002230 002231 002235 002236 002237 002238 002240 002241 002242 002244 002247 002249 002252 002253 002256 002259 002261 002263 002264 002265 002267 002268 002269 002270 002271 002273 002274 002275 002278 002281 002285 002288 002289 002296 002297 002301 002302 002305 002309 002311 002312 002313 002316 002317 002318 002321 002322 002323 002324 002326 002328 002331 002333 002335 002339 002342 002343 002349 002350 002351 002352 002354 002355 002358 002360 002361 002363 002364 002368 002371 002373 002374 002375 002377 002379 002381 002388 002389 002390 002394 002395 002396 002400 002401 002402 002403 002406 002407 002408 002409 002410 002412 002413 002416 002417 002421 002426 002427 002430 002431 002435 002436 002437 002438 002441 002443 002444 002445 002447 002448 002449 002451 002452 002453 002456 002459 002464 002465 002466 002467 002468 002469 002470 002471 002472 002475 002480 002481 002482 002484 002485 002487 002489 002491 002493 002494 002496 002498 002501 002507 002508 002510 002512 002513 002514 002515 002517 002518 002522 002523 002524 002527 002533 002535 002536 002537 002542 002544 002545 002547 002549 002550 002551 002553 002554 002555 002559 002560 002561 002566 002567 002571 002573 002576 002578 002579 002582 002587 002588 002589 002591 002592 002593 002595 002596 002597 002605 002607 002608 002609 002610 002611 002614 002616 002617 002618 002620 002622 002623 002624 002627 002629 002632 002634 002637 002639 002642 002643 002647 002648 002649 002650 002652 002654 002655 002658 002659 002660 002662 002664 002665 002667 002668 002670 002671 002672 002676 002678 002679 002682 002683 002684 002687 002688 002689 002691 002697 002698 002700 002701 002703 002704 002705 002708 002714 002716 002718 002719 002723 002731 002732 002733 002734 002736 002738 002739 002741 002743 002750 002751 002754 002756 002759 002762 002766 002768 002769 002770 002771 002774 002776 002777 002778 002779 002780 002781 002782 002784 002785 002788 002790 002791 002792 002795 002798 002799 002802 002803 002807 002808 002813 002816 002817 002819 002821 002822 002823 002824 002825 002829 002832 002834 002835 002837 002838 002842 002843 002849 002850 002851 002852 002854 002855 002857 002859 002860 002862 002864 002865 002868 002869 002870 002871 002872 002873 002874 002882 002884 002886 002887 002888 002897 002898 002899 002904 002906 002907 002909 002910 002912 002913 002915 002918 002920 002921 002922 002923 002926 002927 002929 002931 002932 002933 002936 002938 002939 002940 002941 002943 002946 002949 002950 002952 002954 002956 002965 002967 002968 002969 002970 002972 002973 002975 002980 002981 002983 002986 002987 002989 002990 002992 002996 002998 003002 003008 003009 003012 003013 003014 003015 003016 003017 003018 003020 003021 003023 003026 003028 003036 003037 003039 003040 003041 003044 003045 003049 003051 003057 003059 003060 003063 003064 003068 003069 003070 003072 003075 003077 003078 003079 003081 003083 003084 003085 003086 003089 003091 003092 003093 003095 003097 003098 003100 003104 003105 003108 003111 003113 003115 003117 003119 003120 003121 003122 003123 003125 003128 003130 003132 003138 003139 003140 003143 003147 003149 003151 003152 003154 003155 003157 003158 003160 003163 003164 003166 003168 003169 003171 003173 003176 003178 003184 003185 003186 003188 003189 003191 003193 003195 003196 003198 003200 003201 003205 003206 003208 003209 003212 003213 003215 003218 003220 003223 003227 003230 003234 003235 003237 003238 003241 003243 003244 003245 003246 003248 003249 003253 003256 003258 003260 003261 003262 003263 003264 003267 003268 003270 003271 003273 003274 003277 003278 003279 003282 003284 003285 003286 003287 003289 003290 003291 003293 003294 003297 003299 003303 003307 003309 003311 003314 003317 003320 003321 003326 003327 003328 003329 003332 003333 003334 003335 003336 003339 003340 003342 003344 003345 003348 003349 003354 003356 003359 003360 003361 003362 003363 003369 003371 003372 003374 003376 003377 003378 003380 003381 003382 003383 003384 003387 003388 003389 003390 003391 003392 003398 003400 003413 003414 003415 003416 003418 003420 003423 003424 003427 003431 003433 003436 003437 003438 003439 003440 003441 003442 003444 003445 003446 003451 003452 003454 003455 003457 003458 003459 003460 003462 003463 003468 003472 003473 003475 003476 003477 003479 003485 003486 003493 003494 003498 003499 003500 003501 003505 003507 003508 003509 003510 003512 003513 003514 003516 003518 003522 003523 003525 003526 003532 003533 003534 003536 003537 003538 003540 003541 003542 003545 003546 003548 003549 003551 003555 003556 003560 003561 003564 003565 003566 003567 003569 003570 003572 003575 003576 003577 003578 003579 003581 003585 003586 003587 003589 003590 003591 003592 003593 003594 003595 003596 003597 003598 003599 003602 003603 003606 003610 003612 003613 003615 003617 003619 003625 003626 003628 003636 003637 003638 003639 003640 003641 003642 003644 003646 003648 003650 003651 003654 003656 003657 003660 003663 003664 003665 003666 003670 003672 003673 003674 003675 003680 003681 003685 003686 003687 003693 003694 003695 003696 003697 003698 003699 003700 003701 003704 003706 003709 003710 003713 003714 003717 003720 003721 003722 003724 003725 003727 003729 003730 003731 003732 003733 003734 003740 003741 003742 003743 003744 003745 003749 003752 003754 003757 003758 003759 003760 003761 003765 003766 003767 003768 003770 003772 003773 003774 003776 003780 003783 003784 003785 003786 003789 003790 003791 003792 003795 003796 003797 003799 003801 003803 003806 003810 003813 003815 003816 003817 003818 003819 003821 003823 003824 003825 003829 003831 003832 003833 003836 003838 003839 003840 003842 003843 003844 003845 003846 003848 003849 003850 003851 003853 003855 003857 003858 003861 003862 003863 003865 003867 003868 003871 003875 003876 003877 003882 003884 003887 003888 003889 003893 003895 003896 003900 003903 003904 003906 003908 003910 003911 003912 003913 003917 003918 003919 003921 003922 003925 003927 003928 003929 003930 003933 003935 003936 003939 003940 003941 003942 003944 003947 003949 003951 003952 003953 003954 003955 003957 003959 003960 003963 003966 003967 003968 003971 003973 003974 003976 003978 003979 003983 003985 003987 003988 003989 003990 003991 003993 003994 003995 003997 003999 004005 004006 004012 004013 004014 004015 004017 004018 004019 004020 004022 004023 004024 004025 004029 004030 004031 004035 004037 004039 004043 004044 004046 004047 004050 004052 004053 004054 004056 004057 004058 004060 004062 004066 004067 004069 004070 004071 004073 004075 004076 004078 004080 004084 004086 004088 004090 004093 004094 004097 004099 004102 004103 004106 004112 004114 004115 004123 004127 004133 004134 004135 004139 004141 004144 004145 004146 004147 004151 004159 004165 004166 004167 004169 004170 004176 004177 004178 004179 004180 004181 004182 004183 004184 004186 004192 004193 004194 004197 004198 004199 004200 004201 004203 004204 004208 004211 004212 004216 004217 004218 004219 004225 004227 004229 004230 004231 004233 004234 004235 004236 004238 004240 004244 004245 004247 004252 004253 004257 004258 004261 004262 004264 004265 004266 004267 004268 004269 004272 004273 004274 004276 004279 004283 004286 004287 004292 004296 004297 004302 004304 004308 004310 004313 004315 004316 004317 004320 004322 004325 004328 004331 004332 004333 004334 004339 004341 004344 004346 004347 004351 004354 004355 004356 004357 004358 004359 004361 004365 004366 004371 004372 004375 004376 004378 004379 004380 004381 004382 004386 004387 004389 004390 004394 004395 004399 004400 004405 004408 004409 004410 004411 004412 004413 004416 004417 004427 004428 004431 004432 004436 004441 004442 004445 004446 004448 004449 004451 004453 004455 004457 004459 004461 004463 004464 004466 004467 004468 004471 004473 004476 004477 004478 004479 004484 004488 004492 004495 004497 004498 004499 004500 004503 004504 004505 004506 004507 004509 004510 004512 004514 004515 004518 004522 004523 004524 004525 004533 004535 004536 004537 004538 004539 004543 004544 004545 004546 004550 004552 004554 004555 004558 004559 004560 004561 004563 004564 004565 004571 004572 004575 004577 004579 004580 004583 004584 004586 004590 004592 004593 004594 004595 004597 004600 004601 004602 004604 004605 004606 004607 004613 004614 004616 004617 004619 004621 004623 004625 004627 004628 004631 004635 004637 004639 004641 004642 004643 004645 004646 004653 004654 004656 004659 004661 004662 004663 004664 004670 004671 004674 004675 004676 004677 004678 004681 004684 004690 004696 004701 004702 004703 004704 004707 004712 004719 004723 004727 004728 004729 004731 004733 004736 004741 004747 004749 004750 004751 004754 004755 004757 004758 004760 004761 004765 004767 004771 004772 004774 004775 004778 004779 004780 004781 004784 004785 004786 004789 004793 004794 004795 004796 004798 004801 004802 004803 004805 004808 004809 004812 004818 004819 004820 004823 004824 004826 004827 004828 004833 004834 004836 004837 004838 004840 004841 004842 004844 004845 004847 004853 004854 004855 004856 004857 004865 004866 004869 004870 004872 004876 004877 004878 004879 004880 004882 004883 004884 004886 004889 004890 004894 004897 004899 004900 004901 004906 004908 004910 004911 004912 004913 004915 004916 004919 004922 004923 004925 004930 004933 004936 004937 004939 004940 004945 004950 004951 004952 004955 004957 004961 004964 004965 004967 004968 004969 004970 004971 004972 004973 004975 004977 004978 004980 004982 004984 004987 004991 004992 004997 005000 005003 005005 005006 005007 005009 005011 005012 005016 005018 005020 005022 005023 005025 005027 005029 005030 005031 005033 005035 005039 005042 005043 005044 005046 005047 005048 005051 005059 005060 005061 005066 005069 005071 005076 005083 005084 005085 005087 005088 005089 005091 005092 005096 005097 005098 005099 005100 005102 005104 005106 005107 005111 005114 005115 005116 005117 005118 005119 005123 005126 005129 005130 005131 005132 005134 005137 005142 005146 005148 005150 005151 005152 005154 005159 005160 005165 005169 005171 005173 005177 005178 005183 005186 005187 005192 005193 005195 005196 005200 005202 005203 005204 005205 005207 005208 005209 005210 005211 005212 005215 005216 005220 005223 005224 005225 005228 005231 005232 005235 005238 005239 005243 005245 005247 005248 005250 005252 005253 005254 005257 005258 005259 005261 005263 005264 005265 005266 005269 005270 005272 005277 005278 005281 005283 005285 005286 005288 005290 005291 005293 005294 005295 005300 005301 005302 005303 005305 005306 005310 005314 005317 005320 005324 005326 005327 005331 005332 005339 005340 005344 005346 005348 005351 005352 005353 005354 005355 005356 005357 005358 005361 005362 005364 005367 005370 005373 005374 005376 005380 005382 005383 005384 005387 005388 005392 005393 005394 005395 005396 005397 005398 005399 005400 005401 005402 005403 005406 005407 005408 005409 005410 005411 005412 005414 005416 005417 005418 005419 005420 005421 005424 005425 005428 005432 005433 005435 005436 005438 005439 005440 005442 005446 005451 005454 005455 005456 005457 005462 005463 005464 005468 005469 005470 005475 005478 005480 005483 005485 005488 005490 005491 005492 005493 005496 005497 005499 005500 005501 005502 005503 005504 005506 005507 005508 005509 005512 005513 005516 005517 005518 005519 005520 005521 005522 005524 005526 005527 005529 005530 005533 005535 005537 005539 005541 005543 005547 005548 005549 005550 005553 005554 005561 005562 005563 005564 005567 005568 005569 005574 005575 005578 005579 005583 005585 005591 005592 005593 005594 005597 005598 005599 005604 005605 005606 005607 005608 005609 005611 005612 005614 005615 005620 005621 005622 005624 005626 005627 005628 005629 005632 005636 005637 005641 005644 005645 005646 005647 005648 005651 005654 005655 005657 005661 005663 005665 005666 005667 005670 005671 005674 005675 005678 005679 005681 005682 005684 005686 005688 005690 005691 005692 005693 005694 005696 005697 005701 005702 005705 005710 005711 005715 005716 005718 005719 005720 005721 005722 005723 005726 005730 005732 005733 005734 005737 005738 005742 005748 005749 005750 005752 005753 005755 005756 005758 005759 005761 005764 005766 005767 005768 005769 005770 005771 005772 005773 005774 005775 005776 005778 005779 005780 005781 005788 005789 005791 005792 005795 005797 005798 005799 005802 005804 005808 005809 005810 005813 005814 005815 005816 005817 005823 005824 005825 005828 005830 005831 005832 005833 005835 005836 005837 005838 005842 005844 005845 005846 005847 005848 005849 005850 005851 005853 005858 005860 005861 005862 005863 005865 005866 005867 005868 005870 005871 005872 005874 005875 005877 005880 005884 005886 005888 005890 005891 005895 005896 005897 005898 005902 005904 005908 005915 005920 005924 005928 005929 005930 005932 005934 005936 005937 005940 005941 005942 005943 005945 005946 005950 005951 005953 005954 005956 005957 005959 005960 005964 005966 005967 005968 005971 005973 005974 005976 005977 005979 005980 005983 005987 005989 005990 005991 005992 005993 005995 005998 006000 006004 006006 006007 006011 006015 006017 006018 006019 006020 006021 006022 006025 006032 006035 006037 006040 006049 006051 006053 006055 006056 006059 006064 006065 006069 006072 006073 006076 006079 006080 006081 006082 006084 006089 006090 006091 006092 006094 006099 006101 006104 006105 006108 006109 006111 006112 006113 006119 006120 006124 006128 006129 006131 006132 006134 006135 006137 006138 006140 006141 006142 006143 006145 006147 006149 006150 006153 006155 006157 006158 006159 006160 006162 006164 006166 006170 006171 006172 006174 006175 006178 006179 006180 006181 006183 006184 006188 006189 006191 006192 006193 006197 006199 006200 006201 006203 006205 006206 006207 006209 006211 006212 006214 006216 006217 006218 006220 006221 006223 006224 006225 006226 006230 006231 006234 006235 006236 006237 006239 006241 006242 006243 006245 006248 006251 006252 006253 006254 006255 006256 006257 006259 006260 006261 006262 006264 006268 006271 006277 006279 006281 006283 006284 006285 006289 006290 006291 006292 006293 006294 006295 006296 006298 006299 006303 006304 006307 006308 006309 006310 006311 006313 006318 006319 006320 006323 006325 006326 006327 006328 006329 006330 006335 006336 006337 006341 006346 006347 006350 006352 006358 006359 006361 006362 006363 006365 006367 006373 006374 006375 006376 006378 006382 006383 006384 006387 006389 006390 006392 006397 006398 006399 006400 006401 006402 006404 006408 006412 006413 006414 006418 006419 006421 006422 006428 006429 006430 006431 006432 006438 006443 006447 006448 006449 006450 006455 006456 006457 006458 006459 006460 006461 006463 006466 006467 006471 006476 006479 006480 006485 006487 006489 006490 006492 006494 006495 006499 006500 006501 006502 006504 006509 006510 006511 006513 006518 006522 006523 006526 006527 006528 006536 006538 006539 006541 006543 006544 006545 006546 006547 006550 006552 006554 006557 006559 006562 006564 006566 006567 006571 006572 006573 006575 006579 006580 006584 006585 006587 006589 006591 006594 006598 006599 006600 006601 006605 006606 006607 006608 006609 006610 006615 006616 006617 006619 006620 006621 006622 006627 006630 006631 006635 006639 006640 006642 006644 006645 006646 006648 006652 006653 006654 006657 006661 006662 006663 006665 006668 006671 006672 006673 006675 006680 006681 006683 006684 006687 006688 006689 006690 006691 006697 006699 006700 006702 006704 006705 006706 006707 006708 006716 006717 006718 006721 006722 006724 006727 006728 006730 006735 006736 006739 006740 006742 006743 006746 006748 006749 006750 006757 006763 006766 006769 006774 006775 006776 006779 006784 006787 006788 006790 006793 006795 006799 006801 006802 006805 006809 006810 006814 006817 006820 006821 006823 006824 006825 006826 006827 006830 006831 006834 006835 006838 006839 006840 006842 006845 006846 006848 006851 006857 006859 006861 006864 006865 006867 006869 006871 006875 006877 006878 006880 006883 006886 006888 006890 006892 006893 006894 006896 006902 006904 006905 006909 006911 006912 006915 006916 006918 006919 006920 006921 006923 006924 006926 006927 006929 006931 006932 006933 006934 006935 006939 006940 006941 006946 006947 006949 006951 006952 006957 006958 006961 006963 006965 006966 006967 006969 006970 006972 006974 006975 006976 006979 006983 006984 006985 006986 006988 006991 006993 006995 006996 006998 007001 007002 007004 007007 007009 007013 007017 007018 007020 007021 007024 007025 007035 007036 007039 007040 007041 007044 007045 007046 007050 007051 007054 007057 007058 007060 007062 007064 007066 007070 007073 007075 007077 007086 007090 007092 007093 007094 007096 007097 007099 007101 007102 007104 007105 007106 007107 007108 007111 007113 007114 007116 007118 007121 007123 007124 007126 007127 007128 007129 007134 007137 007140 007141 007142 007143 007147 007148 007150 007151 007152 007153 007155 007156 007159 007160 007167 007170 007171 007173 007175 007179 007181 007184 007185 007186 007188 007189 007190 007191 007192 007193 007195 007196 007197 007203 007206 007209 007211 007213 007216 007218 007220 007222 007223 007224 007226 007228 007231 007234 007236 007237 007239 007241 007243 007245 007248 007249 007250 007251 007254 007257 007259 007263 007264 007268 007269 007270 007276 007281 007282 007285 007286 007293 007295 007296 007297 007298 007301 007305 007306 007307 007308 007312 007313 007314 007316 007317 007320 007321 007324 007328 007332 007333 007334 007335 007338 007340 007341 007346 007348 007354 007355 007356 007357 007358 007361 007362 007363 007365 007366 007367 007368 007370 007372 007373 007378 007379 007386 007387 007388 007390 007392 007393 007394 007399 007400 007404 007406 007408 007414 007417 007418 007425 007427 007428 007429 007431 007432 007438 007441 007443 007444 007446 007451 007452 007454 007455 007457 007459 007460 007461 007465 007471 007472 007474 007476 007479 ================================================ FILE: data/kitti/ImageSets/val.txt ================================================ 000001 000002 000004 000005 000006 000008 000015 000019 000020 000021 000023 000024 000025 000027 000028 000031 000033 000035 000037 000039 000040 000042 000047 000048 000050 000052 000053 000058 000059 000061 000062 000063 000065 000066 000076 000077 000078 000081 000089 000090 000093 000094 000098 000102 000104 000106 000107 000108 000116 000117 000118 000122 000124 000126 000128 000132 000134 000135 000137 000139 000140 000143 000147 000151 000152 000153 000156 000159 000161 000167 000168 000169 000170 000173 000174 000175 000181 000182 000183 000186 000187 000188 000190 000191 000192 000194 000195 000196 000197 000199 000201 000203 000204 000207 000211 000212 000213 000216 000218 000223 000224 000226 000229 000230 000231 000234 000235 000236 000237 000239 000242 000246 000247 000248 000249 000250 000251 000252 000260 000262 000263 000265 000266 000268 000269 000270 000272 000273 000278 000279 000281 000283 000284 000289 000290 000291 000293 000297 000301 000302 000305 000307 000308 000309 000311 000312 000314 000315 000319 000320 000321 000323 000324 000327 000328 000329 000332 000333 000335 000336 000340 000341 000343 000345 000346 000347 000350 000351 000352 000354 000355 000356 000357 000359 000360 000361 000362 000365 000366 000369 000370 000372 000373 000376 000377 000378 000379 000381 000382 000383 000385 000386 000388 000391 000392 000393 000394 000395 000396 000397 000398 000399 000401 000402 000403 000404 000407 000408 000409 000413 000414 000415 000419 000420 000422 000427 000428 000429 000430 000436 000437 000440 000443 000446 000448 000450 000451 000452 000453 000454 000455 000457 000459 000463 000468 000469 000472 000473 000475 000476 000477 000478 000479 000480 000481 000485 000486 000489 000491 000492 000493 000494 000495 000496 000498 000499 000503 000504 000506 000508 000509 000510 000512 000515 000517 000519 000521 000524 000527 000528 000530 000533 000536 000541 000542 000543 000545 000546 000548 000551 000554 000555 000558 000559 000560 000561 000564 000566 000567 000568 000569 000571 000572 000581 000583 000588 000589 000590 000591 000595 000600 000601 000604 000610 000611 000612 000613 000614 000615 000618 000619 000620 000624 000625 000626 000628 000630 000634 000635 000636 000639 000642 000644 000645 000647 000648 000650 000655 000657 000658 000659 000660 000667 000669 000670 000674 000677 000679 000682 000683 000684 000691 000692 000694 000696 000698 000699 000700 000702 000704 000706 000708 000716 000717 000718 000721 000722 000725 000727 000728 000729 000731 000734 000736 000737 000740 000741 000745 000746 000748 000750 000751 000752 000754 000756 000761 000765 000766 000767 000768 000769 000771 000772 000773 000774 000778 000779 000782 000790 000792 000795 000798 000800 000801 000802 000803 000804 000805 000806 000807 000809 000810 000811 000812 000816 000819 000823 000826 000831 000837 000838 000840 000841 000843 000844 000847 000848 000849 000850 000852 000854 000859 000862 000863 000869 000873 000874 000875 000876 000877 000878 000879 000881 000884 000885 000889 000893 000894 000897 000899 000904 000907 000909 000911 000912 000915 000916 000917 000920 000922 000923 000926 000928 000930 000931 000932 000938 000939 000940 000942 000943 000944 000948 000949 000952 000953 000956 000958 000961 000963 000964 000966 000967 000969 000970 000971 000973 000974 000976 000979 000981 000983 000984 000985 000986 000988 000991 000999 001002 001006 001007 001008 001010 001011 001012 001013 001014 001015 001018 001019 001021 001022 001025 001026 001027 001035 001037 001039 001042 001043 001046 001050 001051 001053 001054 001055 001058 001063 001065 001066 001067 001068 001069 001070 001071 001075 001076 001077 001078 001083 001084 001086 001088 001089 001094 001095 001096 001097 001099 001101 001102 001104 001106 001107 001108 001111 001113 001114 001115 001116 001118 001120 001123 001125 001127 001129 001131 001132 001133 001134 001135 001136 001138 001139 001140 001141 001143 001144 001145 001147 001148 001149 001150 001152 001153 001154 001155 001158 001162 001163 001167 001172 001173 001176 001177 001178 001179 001180 001182 001183 001187 001188 001189 001191 001192 001193 001194 001195 001198 001199 001203 001206 001207 001213 001214 001216 001217 001218 001221 001222 001224 001225 001226 001228 001230 001232 001234 001235 001236 001237 001239 001241 001242 001243 001244 001245 001246 001249 001251 001252 001253 001254 001255 001257 001259 001260 001261 001263 001265 001266 001267 001268 001269 001270 001271 001272 001273 001274 001275 001281 001284 001286 001287 001289 001291 001292 001294 001295 001296 001303 001304 001305 001306 001307 001308 001314 001317 001318 001329 001330 001331 001332 001333 001334 001336 001337 001339 001342 001344 001345 001346 001347 001350 001352 001353 001355 001356 001359 001363 001365 001372 001374 001375 001376 001377 001380 001381 001382 001384 001386 001387 001388 001389 001391 001395 001397 001398 001407 001410 001411 001412 001415 001416 001419 001421 001424 001427 001431 001432 001435 001437 001438 001439 001441 001442 001443 001445 001446 001448 001450 001451 001458 001461 001463 001466 001469 001471 001477 001478 001480 001481 001485 001487 001488 001489 001495 001497 001501 001502 001507 001508 001511 001513 001514 001516 001517 001521 001522 001524 001525 001526 001527 001528 001533 001535 001536 001537 001538 001542 001545 001546 001547 001549 001552 001555 001557 001560 001562 001564 001565 001567 001569 001573 001574 001576 001577 001579 001582 001583 001585 001586 001587 001588 001589 001590 001591 001592 001594 001596 001597 001600 001602 001603 001605 001606 001610 001613 001615 001616 001617 001619 001621 001625 001627 001629 001631 001633 001634 001635 001640 001643 001645 001647 001650 001654 001656 001658 001660 001662 001664 001665 001666 001667 001670 001675 001680 001682 001683 001684 001689 001693 001694 001697 001699 001701 001702 001704 001705 001706 001707 001709 001710 001711 001712 001713 001714 001717 001718 001719 001721 001722 001726 001727 001729 001732 001733 001740 001741 001742 001745 001746 001749 001750 001751 001752 001755 001758 001762 001764 001765 001768 001771 001772 001774 001776 001778 001780 001781 001782 001783 001786 001787 001794 001795 001797 001800 001801 001802 001804 001807 001808 001813 001814 001817 001818 001820 001822 001823 001824 001825 001828 001831 001835 001840 001844 001846 001848 001851 001852 001853 001854 001855 001856 001858 001859 001861 001862 001863 001867 001868 001869 001872 001875 001877 001878 001880 001881 001884 001885 001886 001887 001888 001890 001892 001893 001897 001898 001900 001904 001905 001909 001919 001920 001923 001924 001925 001926 001927 001928 001929 001931 001932 001933 001934 001936 001937 001940 001941 001942 001943 001945 001946 001952 001954 001959 001960 001966 001967 001969 001972 001977 001978 001979 001980 001982 001983 001984 001985 001986 001989 001991 001995 001996 001997 001999 002000 002001 002002 002004 002008 002010 002011 002012 002013 002014 002017 002019 002021 002022 002025 002027 002028 002029 002034 002035 002036 002037 002038 002042 002043 002044 002045 002046 002048 002049 002050 002052 002054 002056 002057 002058 002062 002068 002071 002073 002074 002075 002076 002078 002079 002081 002082 002085 002086 002087 002089 002091 002093 002094 002100 002101 002102 002103 002107 002108 002111 002112 002113 002115 002118 002120 002121 002123 002124 002127 002128 002130 002131 002135 002136 002137 002138 002139 002140 002142 002151 002152 002153 002158 002159 002160 002161 002163 002165 002166 002168 002169 002170 002173 002177 002179 002182 002183 002185 002187 002188 002193 002196 002200 002201 002202 002206 002207 002209 002215 002216 002218 002219 002220 002224 002225 002228 002229 002232 002233 002234 002239 002243 002245 002246 002248 002250 002251 002254 002255 002257 002258 002260 002262 002266 002272 002276 002277 002279 002280 002282 002283 002284 002286 002287 002290 002291 002292 002293 002294 002295 002298 002299 002300 002303 002304 002306 002307 002308 002310 002314 002315 002319 002320 002325 002327 002329 002330 002332 002334 002336 002337 002338 002340 002341 002344 002345 002346 002347 002348 002353 002356 002357 002359 002362 002365 002366 002367 002369 002370 002372 002376 002378 002380 002382 002383 002384 002385 002386 002387 002391 002392 002393 002397 002398 002399 002404 002405 002411 002414 002415 002418 002419 002420 002422 002423 002424 002425 002428 002429 002432 002433 002434 002439 002440 002442 002446 002450 002454 002455 002457 002458 002460 002461 002462 002463 002473 002474 002476 002477 002478 002479 002483 002486 002488 002490 002492 002495 002497 002499 002500 002502 002503 002504 002505 002506 002509 002511 002516 002519 002520 002521 002525 002526 002528 002529 002530 002531 002532 002534 002538 002539 002540 002541 002543 002546 002548 002552 002556 002557 002558 002562 002563 002564 002565 002568 002569 002570 002572 002574 002575 002577 002580 002581 002583 002584 002585 002586 002590 002594 002598 002599 002600 002601 002602 002603 002604 002606 002612 002613 002615 002619 002621 002625 002626 002628 002630 002631 002633 002635 002636 002638 002640 002641 002644 002645 002646 002651 002653 002656 002657 002661 002663 002666 002669 002673 002674 002675 002677 002680 002681 002685 002686 002690 002692 002693 002694 002695 002696 002699 002702 002706 002707 002709 002710 002711 002712 002713 002715 002717 002720 002721 002722 002724 002725 002726 002727 002728 002729 002730 002735 002737 002740 002742 002744 002745 002746 002747 002748 002749 002752 002753 002755 002757 002758 002760 002761 002763 002764 002765 002767 002772 002773 002775 002783 002786 002787 002789 002793 002794 002796 002797 002800 002801 002804 002805 002806 002809 002810 002811 002812 002814 002815 002818 002820 002826 002827 002828 002830 002831 002833 002836 002839 002840 002841 002844 002845 002846 002847 002848 002853 002856 002858 002861 002863 002866 002867 002875 002876 002877 002878 002879 002880 002881 002883 002885 002889 002890 002891 002892 002893 002894 002895 002896 002900 002901 002902 002903 002905 002908 002911 002914 002916 002917 002919 002924 002925 002928 002930 002934 002935 002937 002942 002944 002945 002947 002948 002951 002953 002955 002957 002958 002959 002960 002961 002962 002963 002964 002966 002971 002974 002976 002977 002978 002979 002982 002984 002985 002988 002991 002993 002994 002995 002997 002999 003000 003001 003003 003004 003005 003006 003007 003010 003011 003019 003022 003024 003025 003027 003029 003030 003031 003032 003033 003034 003035 003038 003042 003043 003046 003047 003048 003050 003052 003053 003054 003055 003056 003058 003061 003062 003065 003066 003067 003071 003073 003074 003076 003080 003082 003087 003088 003090 003094 003096 003099 003101 003102 003103 003106 003107 003109 003110 003112 003114 003116 003118 003124 003126 003127 003129 003131 003133 003134 003135 003136 003137 003141 003142 003144 003145 003146 003148 003150 003153 003156 003159 003161 003162 003165 003167 003170 003172 003174 003175 003177 003179 003180 003181 003182 003183 003187 003190 003192 003194 003197 003199 003202 003203 003204 003207 003210 003211 003214 003216 003217 003219 003221 003222 003224 003225 003226 003228 003229 003231 003232 003233 003236 003239 003240 003242 003247 003250 003251 003252 003254 003255 003257 003259 003265 003266 003269 003272 003275 003276 003280 003281 003283 003288 003292 003295 003296 003298 003300 003301 003302 003304 003305 003306 003308 003310 003312 003313 003315 003316 003318 003319 003322 003323 003324 003325 003330 003331 003337 003338 003341 003343 003346 003347 003350 003351 003352 003353 003355 003357 003358 003364 003365 003366 003367 003368 003370 003373 003375 003379 003385 003386 003393 003394 003395 003396 003397 003399 003401 003402 003403 003404 003405 003406 003407 003408 003409 003410 003411 003412 003417 003419 003421 003422 003425 003426 003428 003429 003430 003432 003434 003435 003443 003447 003448 003449 003450 003453 003456 003461 003464 003465 003466 003467 003469 003470 003471 003474 003478 003480 003481 003482 003483 003484 003487 003488 003489 003490 003491 003492 003495 003496 003497 003502 003503 003504 003506 003511 003515 003517 003519 003520 003521 003524 003527 003528 003529 003530 003531 003535 003539 003543 003544 003547 003550 003552 003553 003554 003557 003558 003559 003562 003563 003568 003571 003573 003574 003580 003582 003583 003584 003588 003600 003601 003604 003605 003607 003608 003609 003611 003614 003616 003618 003620 003621 003622 003623 003624 003627 003629 003630 003631 003632 003633 003634 003635 003643 003645 003647 003649 003652 003653 003655 003658 003659 003661 003662 003667 003668 003669 003671 003676 003677 003678 003679 003682 003683 003684 003688 003689 003690 003691 003692 003702 003703 003705 003707 003708 003711 003712 003715 003716 003718 003719 003723 003726 003728 003735 003736 003737 003738 003739 003746 003747 003748 003750 003751 003753 003755 003756 003762 003763 003764 003769 003771 003775 003777 003778 003779 003781 003782 003787 003788 003793 003794 003798 003800 003802 003804 003805 003807 003808 003809 003811 003812 003814 003820 003822 003826 003827 003828 003830 003834 003835 003837 003841 003847 003852 003854 003856 003859 003860 003864 003866 003869 003870 003872 003873 003874 003878 003879 003880 003881 003883 003885 003886 003890 003891 003892 003894 003897 003898 003899 003901 003902 003905 003907 003909 003914 003915 003916 003920 003923 003924 003926 003931 003932 003934 003937 003938 003943 003945 003946 003948 003950 003956 003958 003961 003962 003964 003965 003969 003970 003972 003975 003977 003980 003981 003982 003984 003986 003992 003996 003998 004000 004001 004002 004003 004004 004007 004008 004009 004010 004011 004016 004021 004026 004027 004028 004032 004033 004034 004036 004038 004040 004041 004042 004045 004048 004049 004051 004055 004059 004061 004063 004064 004065 004068 004072 004074 004077 004079 004081 004082 004083 004085 004087 004089 004091 004092 004095 004096 004098 004100 004101 004104 004105 004107 004108 004109 004110 004111 004113 004116 004117 004118 004119 004120 004121 004122 004124 004125 004126 004128 004129 004130 004131 004132 004136 004137 004138 004140 004142 004143 004148 004149 004150 004152 004153 004154 004155 004156 004157 004158 004160 004161 004162 004163 004164 004168 004171 004172 004173 004174 004175 004185 004187 004188 004189 004190 004191 004195 004196 004202 004205 004206 004207 004209 004210 004213 004214 004215 004220 004221 004222 004223 004224 004226 004228 004232 004237 004239 004241 004242 004243 004246 004248 004249 004250 004251 004254 004255 004256 004259 004260 004263 004270 004271 004275 004277 004278 004280 004281 004282 004284 004285 004288 004289 004290 004291 004293 004294 004295 004298 004299 004300 004301 004303 004305 004306 004307 004309 004311 004312 004314 004318 004319 004321 004323 004324 004326 004327 004329 004330 004335 004336 004337 004338 004340 004342 004343 004345 004348 004349 004350 004352 004353 004360 004362 004363 004364 004367 004368 004369 004370 004373 004374 004377 004383 004384 004385 004388 004391 004392 004393 004396 004397 004398 004401 004402 004403 004404 004406 004407 004414 004415 004418 004419 004420 004421 004422 004423 004424 004425 004426 004429 004430 004433 004434 004435 004437 004438 004439 004440 004443 004444 004447 004450 004452 004454 004456 004458 004460 004462 004465 004469 004470 004472 004474 004475 004480 004481 004482 004483 004485 004486 004487 004489 004490 004491 004493 004494 004496 004501 004502 004508 004511 004513 004516 004517 004519 004520 004521 004526 004527 004528 004529 004530 004531 004532 004534 004540 004541 004542 004547 004548 004549 004551 004553 004556 004557 004562 004566 004567 004568 004569 004570 004573 004574 004576 004578 004581 004582 004585 004587 004588 004589 004591 004596 004598 004599 004603 004608 004609 004610 004611 004612 004615 004618 004620 004622 004624 004626 004629 004630 004632 004633 004634 004636 004638 004640 004644 004647 004648 004649 004650 004651 004652 004655 004657 004658 004660 004665 004666 004667 004668 004669 004672 004673 004679 004680 004682 004683 004685 004686 004687 004688 004689 004691 004692 004693 004694 004695 004697 004698 004699 004700 004705 004706 004708 004709 004710 004711 004713 004714 004715 004716 004717 004718 004720 004721 004722 004724 004725 004726 004730 004732 004734 004735 004737 004738 004739 004740 004742 004743 004744 004745 004746 004748 004752 004753 004756 004759 004762 004763 004764 004766 004768 004769 004770 004773 004776 004777 004782 004783 004787 004788 004790 004791 004792 004797 004799 004800 004804 004806 004807 004810 004811 004813 004814 004815 004816 004817 004821 004822 004825 004829 004830 004831 004832 004835 004839 004843 004846 004848 004849 004850 004851 004852 004858 004859 004860 004861 004862 004863 004864 004867 004868 004871 004873 004874 004875 004881 004885 004887 004888 004891 004892 004893 004895 004896 004898 004902 004903 004904 004905 004907 004909 004914 004917 004918 004920 004921 004924 004926 004927 004928 004929 004931 004932 004934 004935 004938 004941 004942 004943 004944 004946 004947 004948 004949 004953 004954 004956 004958 004959 004960 004962 004963 004966 004974 004976 004979 004981 004983 004985 004986 004988 004989 004990 004993 004994 004995 004996 004998 004999 005001 005002 005004 005008 005010 005013 005014 005015 005017 005019 005021 005024 005026 005028 005032 005034 005036 005037 005038 005040 005041 005045 005049 005050 005052 005053 005054 005055 005056 005057 005058 005062 005063 005064 005065 005067 005068 005070 005072 005073 005074 005075 005077 005078 005079 005080 005081 005082 005086 005090 005093 005094 005095 005101 005103 005105 005108 005109 005110 005112 005113 005120 005121 005122 005124 005125 005127 005128 005133 005135 005136 005138 005139 005140 005141 005143 005144 005145 005147 005149 005153 005155 005156 005157 005158 005161 005162 005163 005164 005166 005167 005168 005170 005172 005174 005175 005176 005179 005180 005181 005182 005184 005185 005188 005189 005190 005191 005194 005197 005198 005199 005201 005206 005213 005214 005217 005218 005219 005221 005222 005226 005227 005229 005230 005233 005234 005236 005237 005240 005241 005242 005244 005246 005249 005251 005255 005256 005260 005262 005267 005268 005271 005273 005274 005275 005276 005279 005280 005282 005284 005287 005289 005292 005296 005297 005298 005299 005304 005307 005308 005309 005311 005312 005313 005315 005316 005318 005319 005321 005322 005323 005325 005328 005329 005330 005333 005334 005335 005336 005337 005338 005341 005342 005343 005345 005347 005349 005350 005359 005360 005363 005365 005366 005368 005369 005371 005372 005375 005377 005378 005379 005381 005385 005386 005389 005390 005391 005404 005405 005413 005415 005422 005423 005426 005427 005429 005430 005431 005434 005437 005441 005443 005444 005445 005447 005448 005449 005450 005452 005453 005458 005459 005460 005461 005465 005466 005467 005471 005472 005473 005474 005476 005477 005479 005481 005482 005484 005486 005487 005489 005494 005495 005498 005505 005510 005511 005514 005515 005523 005525 005528 005531 005532 005534 005536 005538 005540 005542 005544 005545 005546 005551 005552 005555 005556 005557 005558 005559 005560 005565 005566 005570 005571 005572 005573 005576 005577 005580 005581 005582 005584 005586 005587 005588 005589 005590 005595 005596 005600 005601 005602 005603 005610 005613 005616 005617 005618 005619 005623 005625 005630 005631 005633 005634 005635 005638 005639 005640 005642 005643 005649 005650 005652 005653 005656 005658 005659 005660 005662 005664 005668 005669 005672 005673 005676 005677 005680 005683 005685 005687 005689 005695 005698 005699 005700 005703 005704 005706 005707 005708 005709 005712 005713 005714 005717 005724 005725 005727 005728 005729 005731 005735 005736 005739 005740 005741 005743 005744 005745 005746 005747 005751 005754 005757 005760 005762 005763 005765 005777 005782 005783 005784 005785 005786 005787 005790 005793 005794 005796 005800 005801 005803 005805 005806 005807 005811 005812 005818 005819 005820 005821 005822 005826 005827 005829 005834 005839 005840 005841 005843 005852 005854 005855 005856 005857 005859 005864 005869 005873 005876 005878 005879 005881 005882 005883 005885 005887 005889 005892 005893 005894 005899 005900 005901 005903 005905 005906 005907 005909 005910 005911 005912 005913 005914 005916 005917 005918 005919 005921 005922 005923 005925 005926 005927 005931 005933 005935 005938 005939 005944 005947 005948 005949 005952 005955 005958 005961 005962 005963 005965 005969 005970 005972 005975 005978 005981 005982 005984 005985 005986 005988 005994 005996 005997 005999 006001 006002 006003 006005 006008 006009 006010 006012 006013 006014 006016 006023 006024 006026 006027 006028 006029 006030 006031 006033 006034 006036 006038 006039 006041 006042 006043 006044 006045 006046 006047 006048 006050 006052 006054 006057 006058 006060 006061 006062 006063 006066 006067 006068 006070 006071 006074 006075 006077 006078 006083 006085 006086 006087 006088 006093 006095 006096 006097 006098 006100 006102 006103 006106 006107 006110 006114 006115 006116 006117 006118 006121 006122 006123 006125 006126 006127 006130 006133 006136 006139 006144 006146 006148 006151 006152 006154 006156 006161 006163 006165 006167 006168 006169 006173 006176 006177 006182 006185 006186 006187 006190 006194 006195 006196 006198 006202 006204 006208 006210 006213 006215 006219 006222 006227 006228 006229 006232 006233 006238 006240 006244 006246 006247 006249 006250 006258 006263 006265 006266 006267 006269 006270 006272 006273 006274 006275 006276 006278 006280 006282 006286 006287 006288 006297 006300 006301 006302 006305 006306 006312 006314 006315 006316 006317 006321 006322 006324 006331 006332 006333 006334 006338 006339 006340 006342 006343 006344 006345 006348 006349 006351 006353 006354 006355 006356 006357 006360 006364 006366 006368 006369 006370 006371 006372 006377 006379 006380 006381 006385 006386 006388 006391 006393 006394 006395 006396 006403 006405 006406 006407 006409 006410 006411 006415 006416 006417 006420 006423 006424 006425 006426 006427 006433 006434 006435 006436 006437 006439 006440 006441 006442 006444 006445 006446 006451 006452 006453 006454 006462 006464 006465 006468 006469 006470 006472 006473 006474 006475 006477 006478 006481 006482 006483 006484 006486 006488 006491 006493 006496 006497 006498 006503 006505 006506 006507 006508 006512 006514 006515 006516 006517 006519 006520 006521 006524 006525 006529 006530 006531 006532 006533 006534 006535 006537 006540 006542 006548 006549 006551 006553 006555 006556 006558 006560 006561 006563 006565 006568 006569 006570 006574 006576 006577 006578 006581 006582 006583 006586 006588 006590 006592 006593 006595 006596 006597 006602 006603 006604 006611 006612 006613 006614 006618 006623 006624 006625 006626 006628 006629 006632 006633 006634 006636 006637 006638 006641 006643 006647 006649 006650 006651 006655 006656 006658 006659 006660 006664 006666 006667 006669 006670 006674 006676 006677 006678 006679 006682 006685 006686 006692 006693 006694 006695 006696 006698 006701 006703 006709 006710 006711 006712 006713 006714 006715 006719 006720 006723 006725 006726 006729 006731 006732 006733 006734 006737 006738 006741 006744 006745 006747 006751 006752 006753 006754 006755 006756 006758 006759 006760 006761 006762 006764 006765 006767 006768 006770 006771 006772 006773 006777 006778 006780 006781 006782 006783 006785 006786 006789 006791 006792 006794 006796 006797 006798 006800 006803 006804 006806 006807 006808 006811 006812 006813 006815 006816 006818 006819 006822 006828 006829 006832 006833 006836 006837 006841 006843 006844 006847 006849 006850 006852 006853 006854 006855 006856 006858 006860 006862 006863 006866 006868 006870 006872 006873 006874 006876 006879 006881 006882 006884 006885 006887 006889 006891 006895 006897 006898 006899 006900 006901 006903 006906 006907 006908 006910 006913 006914 006917 006922 006925 006928 006930 006936 006937 006938 006942 006943 006944 006945 006948 006950 006953 006954 006955 006956 006959 006960 006962 006964 006968 006971 006973 006977 006978 006980 006981 006982 006987 006989 006990 006992 006994 006997 006999 007000 007003 007005 007006 007008 007010 007011 007012 007014 007015 007016 007019 007022 007023 007026 007027 007028 007029 007030 007031 007032 007033 007034 007037 007038 007042 007043 007047 007048 007049 007052 007053 007055 007056 007059 007061 007063 007065 007067 007068 007069 007071 007072 007074 007076 007078 007079 007080 007081 007082 007083 007084 007085 007087 007088 007089 007091 007095 007098 007100 007103 007109 007110 007112 007115 007117 007119 007120 007122 007125 007130 007131 007132 007133 007135 007136 007138 007139 007144 007145 007146 007149 007154 007157 007158 007161 007162 007163 007164 007165 007166 007168 007169 007172 007174 007176 007177 007178 007180 007182 007183 007187 007194 007198 007199 007200 007201 007202 007204 007205 007207 007208 007210 007212 007214 007215 007217 007219 007221 007225 007227 007229 007230 007232 007233 007235 007238 007240 007242 007244 007246 007247 007252 007253 007255 007256 007258 007260 007261 007262 007265 007266 007267 007271 007272 007273 007274 007275 007277 007278 007279 007280 007283 007284 007287 007288 007289 007290 007291 007292 007294 007299 007300 007302 007303 007304 007309 007310 007311 007315 007318 007319 007322 007323 007325 007326 007327 007329 007330 007331 007336 007337 007339 007342 007343 007344 007345 007347 007349 007350 007351 007352 007353 007359 007360 007364 007369 007371 007374 007375 007376 007377 007380 007381 007382 007383 007384 007385 007389 007391 007395 007396 007397 007398 007401 007402 007403 007405 007407 007409 007410 007411 007412 007413 007415 007416 007419 007420 007421 007422 007423 007424 007426 007430 007433 007434 007435 007436 007437 007439 007440 007442 007445 007447 007448 007449 007450 007453 007456 007458 007462 007463 007464 007466 007467 007468 007469 007470 007473 007475 007477 007478 007480 ================================================ FILE: pcdet/__init__.py ================================================ import subprocess from pathlib import Path from .version import __version__ __all__ = [ '__version__' ] def get_git_commit_number(): if not (Path(__file__).parent / '../.git').exists(): return '0000000' cmd_out = subprocess.run(['git', 'rev-parse', 'HEAD'], stdout=subprocess.PIPE) git_commit_number = cmd_out.stdout.decode('utf-8')[:7] return git_commit_number script_version = get_git_commit_number() if script_version not in __version__: __version__ = __version__ + '+py%s' % script_version ================================================ FILE: pcdet/config.py ================================================ from pathlib import Path import yaml from easydict import EasyDict def log_config_to_file(cfg, pre='cfg', logger=None): for key, val in cfg.items(): if isinstance(cfg[key], EasyDict): logger.info('\n%s.%s = edict()' % (pre, key)) log_config_to_file(cfg[key], pre=pre + '.' + key, logger=logger) continue logger.info('%s.%s: %s' % (pre, key, val)) def cfg_from_list(cfg_list, config): """Set config keys via list (e.g., from command line).""" from ast import literal_eval assert len(cfg_list) % 2 == 0 for k, v in zip(cfg_list[0::2], cfg_list[1::2]): key_list = k.split('.') d = config for subkey in key_list[:-1]: assert subkey in d, 'NotFoundKey: %s' % subkey d = d[subkey] subkey = key_list[-1] assert subkey in d, 'NotFoundKey: %s' % subkey try: value = literal_eval(v) except: value = v if type(value) != type(d[subkey]) and isinstance(d[subkey], EasyDict): key_val_list = value.split(',') for src in key_val_list: cur_key, cur_val = src.split(':') val_type = type(d[subkey][cur_key]) cur_val = val_type(cur_val) d[subkey][cur_key] = cur_val elif type(value) != type(d[subkey]) and isinstance(d[subkey], list): val_list = value.split(',') for k, x in enumerate(val_list): val_list[k] = type(d[subkey][0])(x) d[subkey] = val_list else: assert type(value) == type(d[subkey]), \ 'type {} does not match original type {}'.format(type(value), type(d[subkey])) d[subkey] = value def merge_new_config(config, new_config): if '_BASE_CONFIG_' in new_config: with open(new_config['_BASE_CONFIG_'], 'r') as f: try: yaml_config = yaml.load(f, Loader=yaml.FullLoader) except: yaml_config = yaml.load(f) config.update(EasyDict(yaml_config)) for key, val in new_config.items(): if not isinstance(val, dict): config[key] = val continue if key not in config: config[key] = EasyDict() merge_new_config(config[key], val) return config def cfg_from_yaml_file(cfg_file, config): with open(cfg_file, 'r') as f: try: new_config = yaml.load(f, Loader=yaml.FullLoader) except: new_config = yaml.load(f) merge_new_config(config=config, new_config=new_config) return config cfg = EasyDict() cfg.ROOT_DIR = (Path(__file__).resolve().parent / '../').resolve() cfg.LOCAL_RANK = 0 ================================================ FILE: pcdet/datasets/__init__.py ================================================ import torch from torch.utils.data import DataLoader from torch.utils.data import DistributedSampler as _DistributedSampler from pcdet.utils import common_utils from .dataset import DatasetTemplate from .kitti.kitti_dataset import KittiDataset from .kitti.kitti_dataset_mm import KittiDatasetMM from prefetch_generator import BackgroundGenerator __all__ = { 'DatasetTemplate': DatasetTemplate, 'KittiDataset': KittiDataset, 'KittiDatasetMM': KittiDatasetMM } class DataLoaderX(DataLoader): def __iter__(self): return BackgroundGenerator(super().__iter__()) class DistributedSampler(_DistributedSampler): def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): super().__init__(dataset, num_replicas=num_replicas, rank=rank) self.shuffle = shuffle def __iter__(self): if self.shuffle: g = torch.Generator() g.manual_seed(self.epoch) indices = torch.randperm(len(self.dataset), generator=g).tolist() else: indices = torch.arange(len(self.dataset)).tolist() indices += indices[:(self.total_size - len(indices))] assert len(indices) == self.total_size indices = indices[self.rank:self.total_size:self.num_replicas] assert len(indices) == self.num_samples return iter(indices) def build_dataloader(dataset_cfg, class_names, batch_size, dist, root_path=None, workers=4, logger=None, training=True, merge_all_iters_to_one_epoch=False, total_epochs=0): dataset = __all__[dataset_cfg.DATASET]( dataset_cfg=dataset_cfg, class_names=class_names, root_path=root_path, training=training, logger=logger, ) if merge_all_iters_to_one_epoch: assert hasattr(dataset, 'merge_all_iters_to_one_epoch') dataset.merge_all_iters_to_one_epoch(merge=True, epochs=total_epochs) if dist: if training: sampler = torch.utils.data.distributed.DistributedSampler(dataset) else: rank, world_size = common_utils.get_dist_info() sampler = DistributedSampler(dataset, world_size, rank, shuffle=False) else: sampler = None dataloader = DataLoaderX( dataset, batch_size=batch_size, pin_memory=True, num_workers=workers, shuffle=(sampler is None) and training, collate_fn=dataset.collate_batch, drop_last=False, sampler=sampler, timeout=0 ) return dataset, dataloader, sampler ================================================ FILE: pcdet/datasets/augmentor/X_transform.py ================================================ from functools import partial import numpy as np from ...utils import common_utils from . import augmentor_utils import copy class X_TRANS(object): def __init__(self, augmentor_configs=None, rot_num=1): self.rot_num = rot_num self.data_augmentor_queue = [] self.test_back_queue = [] if augmentor_configs is None: augmentor_configs=[{'NAME': 'world_rotation', 'WORLD_ROT_ANGLE': [-0.78539816, 0.78539816]}, {'NAME': 'world_flip', 'ALONG_AXIS_LIST': [0, 1]}, {'NAME': 'world_scaling', 'WORLD_SCALE_RANGE': [0.95, 1.05]}] self.augmentor_configs = augmentor_configs else: self.augmentor_configs = augmentor_configs self.aug_config_list = augmentor_configs if isinstance(augmentor_configs, list) \ else augmentor_configs.AUG_CONFIG_LIST for i, cur_cfg in enumerate(self.aug_config_list): cur_augmentor = getattr(self, cur_cfg['NAME'])(config=cur_cfg) self.data_augmentor_queue.append(cur_augmentor) back_config = self.aug_config_list[-(i+1)] cur_augmentor = getattr(self, back_config['NAME'])(config=back_config) self.test_back_queue.append(cur_augmentor) self.backward_flag = False def get_params(self): transform_param = np.zeros(shape=(self.rot_num, len(self.aug_config_list))) for s in range(self.rot_num): for i, config in enumerate(self.aug_config_list): if config.NAME == 'world_rotation': transform_param[s][i] = config.WORLD_ROT_ANGLE[s] if config.NAME == 'world_flip': transform_param[s][i] = config.ALONG_AXIS_LIST[s] if config.NAME == 'world_scaling': transform_param[s][i] = config.WORLD_SCALE_RANGE[s] return transform_param def world_rotation(self, data_dict=None, config=None): if data_dict is None: return partial(self.world_rotation, config=config) rot_factor = data_dict['transform_param'][0] if isinstance(rot_factor, np.float64): rot_factor = np.array([rot_factor]) else: rot_factor = rot_factor.unsqueeze(0) if 'points' in data_dict: points = data_dict['points'] if self.backward_flag: points[:,0:3] = common_utils.rotate_points_along_z(points[np.newaxis, :, 0:3], -rot_factor)[0] else: points[:, 0:3] = common_utils.rotate_points_along_z(points[np.newaxis, :, 0:3], rot_factor)[0] data_dict['points'] = points if 'boxes' in data_dict: boxes_lidar = data_dict['boxes'] if self.backward_flag: boxes_lidar[:, 0:3] = common_utils.rotate_points_along_z(boxes_lidar[np.newaxis, :, 0:3], -rot_factor)[0] boxes_lidar[:, 6] += -rot_factor else: boxes_lidar[:, 0:3] = common_utils.rotate_points_along_z(boxes_lidar[np.newaxis, :, 0:3], rot_factor)[0] boxes_lidar[:, 6] += rot_factor data_dict['boxes'] = boxes_lidar return data_dict def world_flip(self, data_dict=None, config=None): if data_dict is None: return partial(self.world_flip, config=config) if 'points' in data_dict: points = getattr(augmentor_utils, 'random_flip_with_param')( data_dict['points'], data_dict['transform_param'][1], ax=1) data_dict['points'] = points if 'boxes' in data_dict: boxes = getattr(augmentor_utils, 'random_flip_with_param')( data_dict['boxes'], data_dict['transform_param'][1], ax=1) boxes = getattr(augmentor_utils, 'random_flip_with_param')( boxes, data_dict['transform_param'][1], ax=6) data_dict['boxes'] = boxes return data_dict def world_scaling(self, data_dict=None, config=None): if data_dict is None: return partial(self.world_scaling, config=config) scale_factor = data_dict['transform_param'][2] if 'points' in data_dict: points = data_dict['points'] if self.backward_flag: points[:, 0:3] /= scale_factor else: points[:, 0:3] *= scale_factor data_dict['points'] = points if 'boxes' in data_dict: boxes_lidar = data_dict['boxes'] if self.backward_flag: boxes_lidar[:, 0:6] /= scale_factor else: boxes_lidar[:, 0:6] *= scale_factor data_dict['boxes'] = boxes_lidar return data_dict def forward_with_param(self, data_dict): """ Args: data_dict: points: (N, 3 + C_in) gt_boxes: optional, (N, 7) [x, y, z, dx, dy, dz, heading] gt_names: optional, (N), string ... Returns: """ for cur_augmentor in self.data_augmentor_queue: data_dict = cur_augmentor(data_dict=data_dict) return data_dict def backward_with_param(self, data_dict): """ Args: data_dict: points: (N, 3 + C_in) gt_boxes: optional, (N, 7) [x, y, z, dx, dy, dz, heading] gt_names: optional, (N), string ... Returns: """ self.backward_flag = True for cur_augmentor in self.test_back_queue: data_dict = cur_augmentor(data_dict=data_dict) self.backward_flag = False return data_dict def input_transform(self, data_dict, trans_boxes=False): """ Args: data_dict: points: (N, 3 + C_in) gt_boxes: optional, (N, 7) [x, y, z, dx, dy, dz, heading] gt_names: optional, (N), string ... Returns: """ params = self.get_params() src_points = copy.deepcopy(data_dict['points']) if trans_boxes: src_gt_boxes = copy.deepcopy(data_dict['gt_boxes']) for i in range(self.rot_num): if i == 0: rot_num_id = '' else: rot_num_id = str(i) ini_data_dict = {} ini_data_dict['points'] = copy.deepcopy(src_points) if trans_boxes: ini_data_dict['boxes'] = copy.deepcopy(src_gt_boxes) ini_data_dict['transform_param'] = copy.deepcopy(params[i]) transformed_data = self.forward_with_param(ini_data_dict) data_dict['points'+rot_num_id] = transformed_data['points'] if trans_boxes: data_dict['gt_boxes'+rot_num_id] = transformed_data['boxes'] data_dict['transform_param'] = params return data_dict ================================================ FILE: pcdet/datasets/augmentor/augmentor_utils.py ================================================ import numpy as np import math import copy from ...utils import common_utils,box_np_ops from ...utils import box_utils import numba def random_flip_along_x(gt_boxes, points): """ Args: gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]] points: (M, 3 + C) Returns: """ enable = np.random.choice([False, True], replace=False, p=[0.5, 0.5]) if enable: gt_boxes[:, 1] = -gt_boxes[:, 1] gt_boxes[:, 6] = -gt_boxes[:, 6] points[:, 1] = -points[:, 1] if gt_boxes.shape[1] > 7: gt_boxes[:, 8] = -gt_boxes[:, 8] return gt_boxes, points,enable def random_flip_with_param(points, enable, ax=1,offset = 0): if enable and points is not None: points[:, ax] = -(points[:, ax]+offset) return points def random_flip_along_y(gt_boxes, points): """ Args: gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]] points: (M, 3 + C) Returns: """ enable = np.random.choice([False, True], replace=False, p=[0.5, 0.5]) if enable: gt_boxes[:, 0] = -gt_boxes[:, 0] gt_boxes[:, 6] = -(gt_boxes[:, 6] + np.pi) points[:, 0] = -points[:, 0] if gt_boxes.shape[1] > 7: gt_boxes[:, 7] = -gt_boxes[:, 7] return gt_boxes, points, enable def global_rotation(gt_boxes, points, rot_range): """ Args: gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]] points: (M, 3 + C), rot_range: [min, max] Returns: """ noise_rotation = np.random.uniform(rot_range[0], rot_range[1]) points = common_utils.rotate_points_along_z(points[np.newaxis, :, :], np.array([noise_rotation]))[0] gt_boxes[:, 0:3] = common_utils.rotate_points_along_z(gt_boxes[np.newaxis, :, 0:3], np.array([noise_rotation]))[0] gt_boxes[:, 6] += noise_rotation if gt_boxes.shape[1] > 7: gt_boxes[:, 7:9] = common_utils.rotate_points_along_z( np.hstack((gt_boxes[:, 7:9], np.zeros((gt_boxes.shape[0], 1))))[np.newaxis, :, :], np.array([noise_rotation]) )[0][:, 0:2] return gt_boxes, points, noise_rotation def global_rotation_with_param(batch_dict, noise_rotation=None,num_frames=2): for i in range(num_frames): if i == 0: batch_dict['points'] = common_utils.rotate_points_along_z(batch_dict['points'][np.newaxis, :, :], np.array([noise_rotation]))[0] batch_dict['gt_boxes'][:,0:3] = common_utils.rotate_points_along_z(batch_dict['gt_boxes'][np.newaxis, :, 0:3], np.array([noise_rotation]))[0] batch_dict['gt_boxes'][:, 6]+=noise_rotation batch_dict['gt_tracklets'][:, 0:3] = common_utils.rotate_points_along_z(batch_dict['gt_tracklets'][np.newaxis, :, 0:3], np.array([noise_rotation]))[0] batch_dict['gt_tracklets'][:, 6] += noise_rotation if 'points'+str(-i) in batch_dict: batch_dict['points'+str(-i)] = common_utils.rotate_points_along_z(batch_dict['points'+str(-i)][np.newaxis, :, :], np.array([noise_rotation]))[0] begin_id = 7+(i-1)*4 batch_dict['gt_tracklets'][:, begin_id:begin_id+3] = common_utils.rotate_points_along_z(batch_dict['gt_tracklets'][np.newaxis, :, begin_id:begin_id+3],np.array([noise_rotation]))[0] batch_dict['gt_tracklets'][:, begin_id + 3]+=noise_rotation if 'gt_boxes'+str(-i) in batch_dict: batch_dict['gt_boxes'+str(-i)][:, :3] = \ common_utils.rotate_points_along_z(batch_dict['gt_boxes'+str(-i)][np.newaxis, :, :3], np.array([noise_rotation]))[0] batch_dict['gt_boxes' + str(-i)][:, 6] +=noise_rotation return batch_dict def boxes_rotation_with_param(boxes, noise_rotation=None): boxes[:,0:3] = common_utils.rotate_points_along_z(boxes[np.newaxis, :, 0:3], np.array([noise_rotation]))[0] boxes[:, 6]+=noise_rotation return boxes def global_scaling(gt_boxes, points, scale_range): """ Args: gt_boxes: (N, 7), [x, y, z, dx, dy, dz, heading] points: (M, 3 + C), scale_range: [min, max] Returns: """ if scale_range[1] - scale_range[0] < 1e-3: return gt_boxes, points noise_scale = np.random.uniform(scale_range[0], scale_range[1]) points[:, :3] *= noise_scale gt_boxes[:, :6] *= noise_scale return gt_boxes, points, noise_scale def global_scaling_with_param(batch_dict, noise_scale=None,num_frames=2): for i in range(num_frames): if i==0: batch_dict['points'][:,0:3]*=noise_scale batch_dict['gt_boxes'][:, 0:6] *= noise_scale batch_dict['gt_tracklets'][:, 0:6]*=noise_scale if 'points'+str(-i) in batch_dict: begin_id = 7 + (i - 1) * 4 batch_dict['points'+str(-i)][:, 0:3] *= noise_scale batch_dict['gt_tracklets'][:, begin_id:begin_id+3] *= noise_scale if 'gt_boxes' + str(-i) in batch_dict: batch_dict['gt_boxes' + str(-i)][:, 0:6] *= noise_scale return batch_dict def get_points_in_box(points, gt_box): x, y, z = points[:, 0], points[:, 1], points[:, 2] cx, cy, cz = gt_box[0], gt_box[1], gt_box[2] dx, dy, dz, rz = gt_box[3], gt_box[4], gt_box[5], gt_box[6] shift_x, shift_y, shift_z = x - cx, y - cy, z - cz MARGIN = 1e-1 cosa, sina = math.cos(-rz), math.sin(-rz) local_x = shift_x * cosa + shift_y * (-sina) local_y = shift_x * sina + shift_y * cosa mask = np.logical_and(abs(shift_z) <= dz / 2.0, np.logical_and(abs(local_x) <= dx / 2.0 + MARGIN, abs(local_y) <= dy / 2.0 + MARGIN)) points = points[mask] return points, mask def get_pyramids(boxes): pyramid_orders = np.array([ [0, 1, 5, 4], [4, 5, 6, 7], [7, 6, 2, 3], [3, 2, 1, 0], [1, 2, 6, 5], [0, 4, 7, 3] ]) boxes_corners = box_utils.boxes_to_corners_3d(boxes).reshape(-1, 24) pyramid_list = [] for order in pyramid_orders: # frustum polygon: 5 corners, 5 surfaces pyramid = np.concatenate(( boxes[:, 0:3], boxes_corners[:, 3 * order[0]: 3 * order[0] + 3], boxes_corners[:, 3 * order[1]: 3 * order[1] + 3], boxes_corners[:, 3 * order[2]: 3 * order[2] + 3], boxes_corners[:, 3 * order[3]: 3 * order[3] + 3]), axis=1) pyramid_list.append(pyramid[:, None, :]) pyramids = np.concatenate(pyramid_list, axis=1) # [N, 6, 15], 15=5*3 return pyramids def one_hot(x, num_class=1): if num_class is None: num_class = 1 ohx = np.zeros((len(x), num_class)) ohx[range(len(x)), x] = 1 return ohx def points_in_pyramids_mask(points, pyramids): pyramids = pyramids.reshape(-1, 5, 3) flags = np.zeros((points.shape[0], pyramids.shape[0]), dtype=np.bool) for i, pyramid in enumerate(pyramids): flags[:, i] = np.logical_or(flags[:, i], box_utils.in_hull(points[:, 0:3], pyramid)) return flags def local_pyramid_dropout(gt_boxes, points, dropout_prob, pyramids=None): if pyramids is None: pyramids = get_pyramids(gt_boxes).reshape([-1, 6, 5, 3]) # each six surface of boxes: [num_boxes, 6, 15=3*5] drop_pyramid_indices = np.random.randint(0, 6, (pyramids.shape[0])) drop_pyramid_one_hot = one_hot(drop_pyramid_indices, num_class=6) drop_box_mask = np.random.uniform(0, 1, (pyramids.shape[0])) <= dropout_prob if np.sum(drop_box_mask) != 0: drop_pyramid_mask = (np.tile(drop_box_mask[:, None], [1, 6]) * drop_pyramid_one_hot) > 0 drop_pyramids = pyramids[drop_pyramid_mask] point_masks = points_in_pyramids_mask(points, drop_pyramids) points = points[np.logical_not(point_masks.any(-1))] # print(drop_box_mask) pyramids = pyramids[np.logical_not(drop_box_mask)] return gt_boxes, points, pyramids def local_pyramid_sparsify(gt_boxes, points, prob, max_num_pts, pyramids=None): if pyramids is None: pyramids = get_pyramids(gt_boxes).reshape([-1, 6, 5, 3]) # each six surface of boxes: [num_boxes, 6, 15=3*5] if pyramids.shape[0] > 0: sparsity_prob, sparsity_num = prob, max_num_pts sparsify_pyramid_indices = np.random.randint(0, 6, (pyramids.shape[0])) sparsify_pyramid_one_hot = one_hot(sparsify_pyramid_indices, num_class=6) sparsify_box_mask = np.random.uniform(0, 1, (pyramids.shape[0])) <= sparsity_prob sparsify_pyramid_mask = (np.tile(sparsify_box_mask[:, None], [1, 6]) * sparsify_pyramid_one_hot) > 0 # print(sparsify_box_mask) pyramid_sampled = pyramids[sparsify_pyramid_mask] # (-1,6,5,3)[(num_sample,6)] # print(pyramid_sampled.shape) pyramid_sampled_point_masks = points_in_pyramids_mask(points, pyramid_sampled) pyramid_sampled_points_num = pyramid_sampled_point_masks.sum(0) # the number of points in each surface pyramid valid_pyramid_sampled_mask = pyramid_sampled_points_num > sparsity_num # only much than sparsity_num should be sparse sparsify_pyramids = pyramid_sampled[valid_pyramid_sampled_mask] if sparsify_pyramids.shape[0] > 0: point_masks = pyramid_sampled_point_masks[:, valid_pyramid_sampled_mask] remain_points = points[ np.logical_not(point_masks.any(-1))] # points which outside the down sampling pyramid to_sparsify_points = [points[point_masks[:, i]] for i in range(point_masks.shape[1])] sparsified_points = [] for sample in to_sparsify_points: sampled_indices = np.random.choice(sample.shape[0], size=sparsity_num, replace=False) sparsified_points.append(sample[sampled_indices]) sparsified_points = np.concatenate(sparsified_points, axis=0) points = np.concatenate([remain_points, sparsified_points], axis=0) pyramids = pyramids[np.logical_not(sparsify_box_mask)] return gt_boxes, points, pyramids def local_pyramid_swap(gt_boxes, points, prob, max_num_pts, pyramids=None): def get_points_ratio(points, pyramid): surface_center = (pyramid[3:6] + pyramid[6:9] + pyramid[9:12] + pyramid[12:]) / 4.0 vector_0, vector_1, vector_2 = pyramid[6:9] - pyramid[3:6], pyramid[12:] - pyramid[3:6], pyramid[ 0:3] - surface_center alphas = ((points[:, 0:3] - pyramid[3:6]) * vector_0).sum(-1) / np.power(vector_0, 2).sum() betas = ((points[:, 0:3] - pyramid[3:6]) * vector_1).sum(-1) / np.power(vector_1, 2).sum() gammas = ((points[:, 0:3] - surface_center) * vector_2).sum(-1) / np.power(vector_2, 2).sum() return [alphas, betas, gammas] def recover_points_by_ratio(points_ratio, pyramid): alphas, betas, gammas = points_ratio surface_center = (pyramid[3:6] + pyramid[6:9] + pyramid[9:12] + pyramid[12:]) / 4.0 vector_0, vector_1, vector_2 = pyramid[6:9] - pyramid[3:6], pyramid[12:] - pyramid[3:6], pyramid[ 0:3] - surface_center points = (alphas[:, None] * vector_0 + betas[:, None] * vector_1) + pyramid[3:6] + gammas[:, None] * vector_2 return points def recover_points_intensity_by_ratio(points_intensity_ratio, max_intensity, min_intensity): return points_intensity_ratio * (max_intensity - min_intensity) + min_intensity # swap partition if pyramids is None: pyramids = get_pyramids(gt_boxes).reshape([-1, 6, 5, 3]) # each six surface of boxes: [num_boxes, 6, 15=3*5] swap_prob, num_thres = prob, max_num_pts swap_pyramid_mask = np.random.uniform(0, 1, (pyramids.shape[0])) <= swap_prob if swap_pyramid_mask.sum() > 0: point_masks = points_in_pyramids_mask(points, pyramids) point_nums = point_masks.sum(0).reshape(pyramids.shape[0], -1) # [N, 6] non_zero_pyramids_mask = point_nums > num_thres # ingore dropout pyramids or highly occluded pyramids selected_pyramids = non_zero_pyramids_mask * swap_pyramid_mask[:, None] # selected boxes and all their valid pyramids # print(selected_pyramids) if selected_pyramids.sum() > 0: # get to_swap pyramids index_i, index_j = np.nonzero(selected_pyramids) selected_pyramid_indices = [np.random.choice(index_j[index_i == i]) \ if e and (index_i == i).any() else 0 for i, e in enumerate(swap_pyramid_mask)] selected_pyramids_mask = selected_pyramids * one_hot(selected_pyramid_indices, num_class=6) == 1 to_swap_pyramids = pyramids[selected_pyramids_mask] # get swapped pyramids index_i, index_j = np.nonzero(selected_pyramids_mask) non_zero_pyramids_mask[selected_pyramids_mask] = False swapped_index_i = np.array([np.random.choice(np.where(non_zero_pyramids_mask[:, j])[0]) if \ np.where(non_zero_pyramids_mask[:, j])[0].shape[0] > 0 else index_i[i] for i, j in enumerate(index_j.tolist())]) swapped_indicies = np.concatenate([swapped_index_i[:, None], index_j[:, None]], axis=1) swapped_pyramids = pyramids[ swapped_indicies[:, 0].astype(np.int32), swapped_indicies[:, 1].astype(np.int32)] # concat to_swap&swapped pyramids swap_pyramids = np.concatenate([to_swap_pyramids, swapped_pyramids], axis=0) swap_point_masks = points_in_pyramids_mask(points, swap_pyramids) remain_points = points[np.logical_not(swap_point_masks.any(-1))] # swap pyramids points_res = [] num_swapped_pyramids = swapped_pyramids.shape[0] for i in range(num_swapped_pyramids): to_swap_pyramid = to_swap_pyramids[i] swapped_pyramid = swapped_pyramids[i] to_swap_points = points[swap_point_masks[:, i]] swapped_points = points[swap_point_masks[:, i + num_swapped_pyramids]] # for intensity transform to_swap_points_intensity_ratio = (to_swap_points[:, 3:] - to_swap_points[:, 3:].min()) / \ np.clip( (to_swap_points[:, 3:].max() - to_swap_points[:, 3:].min()), 1e-6, 1) swapped_points_intensity_ratio = (swapped_points[:, 3:] - swapped_points[:, 3:].min()) / \ np.clip( (swapped_points[:, 3:].max() - swapped_points[:, 3:].min()), 1e-6, 1) to_swap_points_ratio = get_points_ratio(to_swap_points, to_swap_pyramid.reshape(15)) swapped_points_ratio = get_points_ratio(swapped_points, swapped_pyramid.reshape(15)) new_to_swap_points = recover_points_by_ratio(swapped_points_ratio, to_swap_pyramid.reshape(15)) new_swapped_points = recover_points_by_ratio(to_swap_points_ratio, swapped_pyramid.reshape(15)) # for intensity transform new_to_swap_points_intensity = recover_points_intensity_by_ratio( swapped_points_intensity_ratio, to_swap_points[:, 3:].max(), to_swap_points[:, 3:].min()) new_swapped_points_intensity = recover_points_intensity_by_ratio( to_swap_points_intensity_ratio, swapped_points[:, 3:].max(), swapped_points[:, 3:].min()) # new_to_swap_points = np.concatenate([new_to_swap_points, swapped_points[:, -1:]], axis=1) # new_swapped_points = np.concatenate([new_swapped_points, to_swap_points[:, -1:]], axis=1) new_to_swap_points = np.concatenate([new_to_swap_points, new_to_swap_points_intensity], axis=1) new_swapped_points = np.concatenate([new_swapped_points, new_swapped_points_intensity], axis=1) points_res.append(new_to_swap_points) points_res.append(new_swapped_points) points_res = np.concatenate(points_res, axis=0) points = np.concatenate([remain_points, points_res], axis=0) return gt_boxes, points def noise_per_object_v3_(gt_boxes, points=None, points_pseudo=None, valid_mask=None, rotation_perturb=np.pi / 4, center_noise_std=1.0, global_random_rot_range=np.pi / 4, data_aug_with_context=-1.0, num_try=100): """Random rotate or remove each groundtruth independently. use kitti viewer to test this function points_transform_ Args: gt_boxes (np.ndarray): Ground truth boxes with shape (N, 7). points (np.ndarray | None): Input point cloud with shape (M, 4). Default: None. valid_mask (np.ndarray | None): Mask to indicate which boxes are valid. Default: None. rotation_perturb (float): Rotation perturbation. Default: pi / 4. center_noise_std (float): Center noise standard deviation. Default: 1.0. global_random_rot_range (float): Global random rotation range. Default: pi/4. num_try (int): Number of try. Default: 100. """ num_boxes = gt_boxes.shape[0] if not isinstance(rotation_perturb, (list, tuple, np.ndarray)): rotation_perturb = [-rotation_perturb, rotation_perturb] if not isinstance(global_random_rot_range, (list, tuple, np.ndarray)): global_random_rot_range = [ -global_random_rot_range, global_random_rot_range ] enable_grot = np.abs(global_random_rot_range[0] - global_random_rot_range[1]) >= 1e-3 if not isinstance(center_noise_std, (list, tuple, np.ndarray)): center_noise_std = [ center_noise_std, center_noise_std, center_noise_std ] if valid_mask is None: valid_mask = np.ones((num_boxes, ), dtype=np.bool_) center_noise_std = np.array(center_noise_std, dtype=gt_boxes.dtype) loc_noises = np.random.normal( scale=center_noise_std, size=[num_boxes, num_try, 3]) rot_noises = np.random.uniform( rotation_perturb[0], rotation_perturb[1], size=[num_boxes, num_try]) global_rot_noises = np.random.uniform( global_random_rot_range[0], global_random_rot_range[1], size=[num_boxes, num_try]) origin = (0.5, 0.5, 0.5) offset = np.array([0.0, 0.0, 0.0, data_aug_with_context[0], data_aug_with_context[1], data_aug_with_context[2], 0.0]) gt_box_corners = box_np_ops.center_to_corner_box3d( gt_boxes[:, :3], gt_boxes[:, 3:6] + offset[3:6], gt_boxes[:, 6], origin=origin, axis=2) if not enable_grot: selected_noise = noise_per_box(gt_boxes[:, [0, 1, 3, 4, 6]] + offset[[0, 1, 3, 4, 6]], valid_mask, loc_noises, rot_noises) else: selected_noise = noise_per_box_v2_(gt_boxes[:, [0, 1, 3, 4, 6]] + offset[[0, 1, 3, 4, 6]], valid_mask, loc_noises, rot_noises, global_rot_noises) loc_transforms = _select_transform(loc_noises, selected_noise) rot_transforms = _select_transform(rot_noises, selected_noise) surfaces = box_np_ops.corner_to_surfaces_3d_jit(gt_box_corners) if points is not None: point_masks = box_np_ops.points_in_convex_polygon_3d_jit( points[:, :3], surfaces) points_transform_(points, gt_boxes[:, :3], point_masks, loc_transforms, rot_transforms, valid_mask) if points_pseudo is not None: point_pseudo_masks = box_np_ops.points_in_convex_polygon_3d_jit( points_pseudo[:, :3], surfaces) points_transform_(points_pseudo, gt_boxes[:, :3], point_pseudo_masks, loc_transforms, rot_transforms, valid_mask) box3d_transform_(gt_boxes, loc_transforms, rot_transforms, valid_mask) @numba.njit def _rotation_box2d_jit_(corners, angle, rot_mat_T): """Rotate 2D boxes. Args: corners (np.ndarray): Corners of boxes. angle (float): Rotation angle. rot_mat_T (np.ndarray): Transposed rotation matrix. """ rot_sin = np.sin(angle) rot_cos = np.cos(angle) rot_mat_T[0, 0] = rot_cos rot_mat_T[0, 1] = -rot_sin rot_mat_T[1, 0] = rot_sin rot_mat_T[1, 1] = rot_cos corners[:] = corners @ rot_mat_T @numba.jit(nopython=True) def box_collision_test(boxes, qboxes, clockwise=True): """Box collision test. Args: boxes (np.ndarray): Corners of current boxes. qboxes (np.ndarray): Boxes to be avoid colliding. clockwise (bool): Whether the corners are in clockwise order. Default: True. """ N = boxes.shape[0] K = qboxes.shape[0] ret = np.zeros((N, K), dtype=np.bool_) slices = np.array([1, 2, 3, 0]) lines_boxes = np.stack((boxes, boxes[:, slices, :]), axis=2) # [N, 4, 2(line), 2(xy)] lines_qboxes = np.stack((qboxes, qboxes[:, slices, :]), axis=2) # vec = np.zeros((2,), dtype=boxes.dtype) boxes_standup = box_np_ops.corner_to_standup_nd_jit(boxes) qboxes_standup = box_np_ops.corner_to_standup_nd_jit(qboxes) for i in range(N): for j in range(K): # calculate standup first iw = ( min(boxes_standup[i, 2], qboxes_standup[j, 2]) - max(boxes_standup[i, 0], qboxes_standup[j, 0])) if iw > 0: ih = ( min(boxes_standup[i, 3], qboxes_standup[j, 3]) - max(boxes_standup[i, 1], qboxes_standup[j, 1])) if ih > 0: for k in range(4): for box_l in range(4): A = lines_boxes[i, k, 0] B = lines_boxes[i, k, 1] C = lines_qboxes[j, box_l, 0] D = lines_qboxes[j, box_l, 1] acd = (D[1] - A[1]) * (C[0] - A[0]) > (C[1] - A[1]) * ( D[0] - A[0]) bcd = (D[1] - B[1]) * (C[0] - B[0]) > (C[1] - B[1]) * ( D[0] - B[0]) if acd != bcd: abc = (C[1] - A[1]) * (B[0] - A[0]) > ( B[1] - A[1]) * ( C[0] - A[0]) abd = (D[1] - A[1]) * (B[0] - A[0]) > ( B[1] - A[1]) * ( D[0] - A[0]) if abc != abd: ret[i, j] = True # collision. break if ret[i, j] is True: break if ret[i, j] is False: # now check complete overlap. # box overlap qbox: box_overlap_qbox = True for box_l in range(4): # point l in qboxes for k in range(4): # corner k in boxes vec = boxes[i, k] - boxes[i, (k + 1) % 4] if clockwise: vec = -vec cross = vec[1] * ( boxes[i, k, 0] - qboxes[j, box_l, 0]) cross -= vec[0] * ( boxes[i, k, 1] - qboxes[j, box_l, 1]) if cross >= 0: box_overlap_qbox = False break if box_overlap_qbox is False: break if box_overlap_qbox is False: qbox_overlap_box = True for box_l in range(4): # point box_l in boxes for k in range(4): # corner k in qboxes vec = qboxes[j, k] - qboxes[j, (k + 1) % 4] if clockwise: vec = -vec cross = vec[1] * ( qboxes[j, k, 0] - boxes[i, box_l, 0]) cross -= vec[0] * ( qboxes[j, k, 1] - boxes[i, box_l, 1]) if cross >= 0: # qbox_overlap_box = False break if qbox_overlap_box is False: break if qbox_overlap_box: ret[i, j] = True # collision. else: ret[i, j] = True # collision. return ret @numba.njit def noise_per_box(boxes, valid_mask, loc_noises, rot_noises): """Add noise to every box (only on the horizontal plane). Args: boxes (np.ndarray): Input boxes with shape (N, 5). valid_mask (np.ndarray): Mask to indicate which boxes are valid with shape (N). loc_noises (np.ndarray): Location noises with shape (N, M, 3). rot_noises (np.ndarray): Rotation noises with shape (N, M). Returns: np.ndarray: Mask to indicate whether the noise is added successfully (pass the collision test). """ num_boxes = boxes.shape[0] num_tests = loc_noises.shape[1] box_corners = box_np_ops.box2d_to_corner_jit(boxes) current_corners = np.zeros((4, 2), dtype=boxes.dtype) rot_mat_T = np.zeros((2, 2), dtype=boxes.dtype) success_mask = -np.ones((num_boxes, ), dtype=np.int64) for i in range(num_boxes): if valid_mask[i]: for j in range(num_tests): current_corners[:] = box_corners[i] current_corners -= boxes[i, :2] _rotation_box2d_jit_(current_corners, rot_noises[i, j], rot_mat_T) current_corners += boxes[i, :2] + loc_noises[i, j, :2] coll_mat = box_collision_test( current_corners.reshape(1, 4, 2), box_corners) coll_mat[0, i] = False # print(coll_mat) if not coll_mat.any(): success_mask[i] = j box_corners[i] = current_corners break return success_mask @numba.njit def noise_per_box_v2_(boxes, valid_mask, loc_noises, rot_noises, global_rot_noises): """Add noise to every box (only on the horizontal plane). Version 2 used when enable global rotations. Args: boxes (np.ndarray): Input boxes with shape (N, 5). valid_mask (np.ndarray): Mask to indicate which boxes are valid with shape (N). loc_noises (np.ndarray): Location noises with shape (N, M, 3). rot_noises (np.ndarray): Rotation noises with shape (N, M). Returns: np.ndarray: Mask to indicate whether the noise is added successfully (pass the collision test). """ num_boxes = boxes.shape[0] num_tests = loc_noises.shape[1] box_corners = box_np_ops.box2d_to_corner_jit(boxes) current_corners = np.zeros((4, 2), dtype=boxes.dtype) current_box = np.zeros((1, 5), dtype=boxes.dtype) rot_mat_T = np.zeros((2, 2), dtype=boxes.dtype) dst_pos = np.zeros((2, ), dtype=boxes.dtype) success_mask = -np.ones((num_boxes, ), dtype=np.int64) corners_norm = np.zeros((4, 2), dtype=boxes.dtype) corners_norm[1, 1] = 1.0 corners_norm[2] = 1.0 corners_norm[3, 0] = 1.0 corners_norm -= np.array([0.5, 0.5], dtype=boxes.dtype) corners_norm = corners_norm.reshape(4, 2) for i in range(num_boxes): if valid_mask[i]: for j in range(num_tests): current_box[0, :] = boxes[i] # current_radius = np.sqrt(boxes[i, 0]**2 + boxes[i, 1]**2) # current_grot = np.arctan2(boxes[i, 0], boxes[i, 1]) # dst_grot = current_grot + global_rot_noises[i, j] # dst_pos[0] = current_radius * np.sin(dst_grot) # dst_pos[1] = current_radius * np.cos(dst_grot) dst_pos[0] = boxes[i, 0] * np.cos(global_rot_noises[i, j]) + boxes[i, 1] * np.sin(global_rot_noises[i, j]) dst_pos[1] = -boxes[i, 0] * np.sin(global_rot_noises[i, j]) + boxes[i, 1] * np.cos(global_rot_noises[i, j]) current_box[0, :2] = dst_pos # current_box[0, -1] += (dst_grot - current_grot) current_box[0, -1] += global_rot_noises[i, j] rot_sin = np.sin(current_box[0, -1]) rot_cos = np.cos(current_box[0, -1]) rot_mat_T[0, 0] = rot_cos rot_mat_T[0, 1] = -rot_sin rot_mat_T[1, 0] = rot_sin rot_mat_T[1, 1] = rot_cos current_corners[:] = current_box[ 0, 2:4] * corners_norm @ rot_mat_T + current_box[0, :2] current_corners -= current_box[0, :2] _rotation_box2d_jit_(current_corners, rot_noises[i, j], rot_mat_T) current_corners += current_box[0, :2] + loc_noises[i, j, :2] coll_mat = box_collision_test( current_corners.reshape(1, 4, 2), box_corners) coll_mat[0, i] = False if not coll_mat.any(): success_mask[i] = j box_corners[i] = current_corners loc_noises[i, j, :2] += (dst_pos - boxes[i, :2]) # rot_noises[i, j] += (dst_grot - current_grot) rot_noises[i, j] += global_rot_noises[i, j] break return success_mask def _select_transform(transform, indices): """Select transform. Args: transform (np.ndarray): Transforms to select from. indices (np.ndarray): Mask to indicate which transform to select. Returns: np.ndarray: Selected transforms. """ result = np.zeros((transform.shape[0], *transform.shape[2:]), dtype=transform.dtype) for i in range(transform.shape[0]): if indices[i] != -1: result[i] = transform[i, indices[i]] return result @numba.njit def _rotation_matrix_3d_(rot_mat_T, angle, axis): """Get the 3D rotation matrix. Args: rot_mat_T (np.ndarray): Transposed rotation matrix. angle (float): Rotation angle. axis (int): Rotation axis. """ rot_sin = np.sin(angle) rot_cos = np.cos(angle) rot_mat_T[:] = np.eye(3) if axis == 1: rot_mat_T[0, 0] = rot_cos rot_mat_T[0, 2] = -rot_sin rot_mat_T[2, 0] = rot_sin rot_mat_T[2, 2] = rot_cos elif axis == 2 or axis == -1: rot_mat_T[0, 0] = rot_cos rot_mat_T[0, 1] = -rot_sin rot_mat_T[1, 0] = rot_sin rot_mat_T[1, 1] = rot_cos elif axis == 0: rot_mat_T[1, 1] = rot_cos rot_mat_T[1, 2] = -rot_sin rot_mat_T[2, 1] = rot_sin rot_mat_T[2, 2] = rot_cos @numba.njit def points_transform_(points, centers, point_masks, loc_transform, rot_transform, valid_mask): """Apply transforms to points and box centers. Args: points (np.ndarray): Input points. centers (np.ndarray): Input box centers. point_masks (np.ndarray): Mask to indicate which points need to be transformed. loc_transform (np.ndarray): Location transform to be applied. rot_transform (np.ndarray): Rotation transform to be applied. valid_mask (np.ndarray): Mask to indicate which boxes are valid. """ num_box = centers.shape[0] num_points = points.shape[0] rot_mat_T = np.zeros((num_box, 3, 3), dtype=points.dtype) for i in range(num_box): _rotation_matrix_3d_(rot_mat_T[i], rot_transform[i], 2) for i in range(num_points): for j in range(num_box): if valid_mask[j]: if point_masks[i, j] == 1: points[i, :3] -= centers[j, :3] points[i:i + 1, :3] = points[i:i + 1, :3] @ rot_mat_T[j] points[i, :3] += centers[j, :3] points[i, :3] += loc_transform[j] break # only apply first box's transform @numba.njit def box3d_transform_(boxes, loc_transform, rot_transform, valid_mask): """Transform 3D boxes. Args: boxes (np.ndarray): 3D boxes to be transformed. loc_transform (np.ndarray): Location transform to be applied. rot_transform (np.ndarray): Rotation transform to be applied. valid_mask (np.ndarray | None): Mask to indicate which boxes are valid. """ num_box = boxes.shape[0] for i in range(num_box): if valid_mask[i]: boxes[i, :3] += loc_transform[i] boxes[i, 6] += rot_transform[i] ================================================ FILE: pcdet/datasets/augmentor/data_augmentor.py ================================================ from functools import partial import numpy as np from ...utils import common_utils from . import augmentor_utils, database_sampler class DataAugmentor(object): def __init__(self, root_path, augmentor_configs, class_names, logger=None): self.root_path = root_path self.class_names = class_names self.logger = logger self.data_augmentor_queue = [] aug_config_list = augmentor_configs if isinstance(augmentor_configs, list) \ else augmentor_configs.AUG_CONFIG_LIST for cur_cfg in aug_config_list: if not isinstance(augmentor_configs, list): if cur_cfg.NAME in augmentor_configs.DISABLE_AUG_LIST: continue cur_augmentor = getattr(self, cur_cfg.NAME)(config=cur_cfg) self.data_augmentor_queue.append(cur_augmentor) def gt_sampling(self, config=None): db_sampler = database_sampler.DataBaseSampler( root_path=self.root_path, sampler_cfg=config, class_names=self.class_names, logger=self.logger, ) return db_sampler def da_sampling(self, config=None): db_sampler = database_sampler.DADataBaseSampler( root_path=self.root_path, sampler_cfg=config, class_names=self.class_names, logger=self.logger, ) return db_sampler def __getstate__(self): d = dict(self.__dict__) del d['logger'] return d def __setstate__(self, d): self.__dict__.update(d) def random_world_rotation(self, data_dict=None, config=None): if data_dict is None: return partial(self.random_world_rotation, config=config) rot_range = config['WORLD_ROT_ANGLE'] if not isinstance(rot_range, list): rot_range = [-rot_range, rot_range] gt_boxes, points, param = augmentor_utils.global_rotation( data_dict['gt_boxes'], data_dict['points'], rot_range=rot_range ) data_dict['gt_boxes'] = gt_boxes data_dict['points'] = points aug_param=[param] data_dict['aug_param'] = aug_param return data_dict def random_world_flip(self, data_dict=None, config=None): if data_dict is None: return partial(self.random_world_flip, config=config) gt_boxes, points = data_dict['gt_boxes'], data_dict['points'] for cur_axis in config['ALONG_AXIS_LIST']: assert cur_axis in ['x', 'y'] gt_boxes, points, param = getattr(augmentor_utils, 'random_flip_along_%s' % cur_axis)( gt_boxes, points, ) data_dict['gt_boxes'] = gt_boxes data_dict['points'] = points if 'aug_param' in data_dict: data_dict['aug_param'].append(int(param)) else: data_dict['aug_param'] = [param] return data_dict def random_world_scaling(self, data_dict=None, config=None): if data_dict is None: return partial(self.random_world_scaling, config=config) gt_boxes, points, param = augmentor_utils.global_scaling( data_dict['gt_boxes'], data_dict['points'], config['WORLD_SCALE_RANGE'] ) data_dict['gt_boxes'] = gt_boxes data_dict['points'] = points if 'aug_param' in data_dict: data_dict['aug_param'].append(param) else: data_dict['aug_param'] = [param] return data_dict def random_local_noise(self, data_dict=None, config=None): if data_dict is None: return partial(self.random_local_noise, config=config) data_dict['gt_boxes'][:, 6] = -data_dict['gt_boxes'][:, 6] augmentor_utils.noise_per_object_v3_(data_dict['gt_boxes'], data_dict['points'], None, data_dict.get('valid_noise', None), config['LOCAL_ROT_RANGE'], config['TRANSLATION_STD'], config['GLOBAL_ROT_RANGE'], config['EXTRA_WIDTH']) data_dict['gt_boxes'][:, 6] = -data_dict['gt_boxes'][:, 6] if 'valid_noise' in data_dict: data_dict.pop('valid_noise') return data_dict def random_local_pyramid_aug(self, data_dict=None, config=None): """ Refer to the paper: SE-SSD: Self-Ensembling Single-Stage Object Detector From Point Cloud """ if data_dict is None: return partial(self.random_local_pyramid_aug, config=config) gt_boxes, points = data_dict['gt_boxes'], data_dict['points'] gt_boxes, points, pyramids = augmentor_utils.local_pyramid_dropout(gt_boxes, points, config['DROP_PROB']) gt_boxes, points, pyramids = augmentor_utils.local_pyramid_sparsify(gt_boxes, points, config['SPARSIFY_PROB'], config['SPARSIFY_MAX_NUM'], pyramids) gt_boxes, points = augmentor_utils.local_pyramid_swap(gt_boxes, points, config['SWAP_PROB'], config['SWAP_MAX_NUM'], pyramids) data_dict['gt_boxes'] = gt_boxes data_dict['points'] = points return data_dict def forward(self, data_dict): """ Args: data_dict: points: (N, 3 + C_in) gt_boxes: optional, (N, 7) [x, y, z, dx, dy, dz, heading] gt_names: optional, (N), string ... Returns: """ for cur_augmentor in self.data_augmentor_queue: data_dict = cur_augmentor(data_dict=data_dict) data_dict['gt_boxes'][:, 6] = common_utils.limit_period( data_dict['gt_boxes'][:, 6], offset=0.5, period=2 * np.pi ) if 'aug_param' in data_dict: data_dict['aug_param'] = np.array(data_dict['aug_param']) if 'calib' in data_dict: data_dict.pop('calib') if 'road_plane' in data_dict: data_dict.pop('road_plane') return data_dict ================================================ FILE: pcdet/datasets/augmentor/database_sampler.py ================================================ import pathlib import pickle import numpy as np from ...ops.iou3d_nms import iou3d_nms_utils from ...utils import box_utils import time import copy import random class DataBaseSampler(object): def __init__(self, root_path, sampler_cfg, class_names, logger=None): self.root_path = root_path self.class_names = class_names self.sampler_cfg = sampler_cfg #self.gt_path = pathlib.Path(sampler_cfg.GT_PATH) self.use_van = self.sampler_cfg.get('USE_VAN', None) self.logger = logger self.db_infos = {} for class_name in class_names: self.db_infos[class_name] = [] if self.use_van: self.db_infos['Van'] = [] for db_info_path in sampler_cfg.DB_INFO_PATH: db_info_path = self.root_path.resolve() / db_info_path with open(str(db_info_path), 'rb') as f: infos = pickle.load(f) for cls in class_names: if cls in infos.keys(): self.db_infos[cls].extend(infos[cls]) if self.use_van: if 'Van' in infos.keys(): self.db_infos['Van'].extend(infos['Van']) for func_name, val in sampler_cfg.PREPARE.items(): self.db_infos = getattr(self, func_name)(self.db_infos, val) self.sample_groups = {} self.sample_class_num = {} self.limit_whole_scene = sampler_cfg.get('LIMIT_WHOLE_SCENE', False) for x in sampler_cfg.SAMPLE_GROUPS: class_name, sample_num = x.split(':') if class_name not in class_names: if not (self.use_van and class_name == 'Van'): continue self.sample_class_num[class_name] = sample_num self.sample_groups[class_name] = { 'sample_num': sample_num, 'pointer': len(self.db_infos[class_name]), 'indices': np.arange(len(self.db_infos[class_name])) } def __getstate__(self): d = dict(self.__dict__) del d['logger'] return d def __setstate__(self, d): self.__dict__.update(d) def filter_by_difficulty(self, db_infos, removed_difficulty): new_db_infos = {} for key, dinfos in db_infos.items(): pre_len = len(dinfos) this_infos = [] for info in dinfos: if 'difficulty' in info: if info['difficulty'] not in removed_difficulty: this_infos.append(info) else: this_infos.append(info) new_db_infos[key] = this_infos if self.logger is not None: self.logger.info('Database filter by difficulty %s: %d => %d' % (key, pre_len, len(new_db_infos[key]))) return new_db_infos def filter_by_min_points(self, db_infos, min_gt_points_list): for name_num in min_gt_points_list: name, min_num = name_num.split(':') min_num = int(min_num) if min_num > 0 and name in db_infos.keys(): filtered_infos = [] for info in db_infos[name]: if info['num_points_in_gt'] >= min_num: filtered_infos.append(info) if self.logger is not None: self.logger.info('Database filter by min points %s: %d => %d' % (name, len(db_infos[name]), len(filtered_infos))) db_infos[name] = filtered_infos return db_infos def sample_with_fixed_number(self, class_name, sample_group): """ Args: class_name: sample_group: Returns: """ sample_num, pointer, indices = int(sample_group['sample_num']), sample_group['pointer'], sample_group['indices'] if pointer >= len(self.db_infos[class_name]): indices = np.random.permutation(len(self.db_infos[class_name])) pointer = 0 sampled_dict = [self.db_infos[class_name][idx] for idx in indices[pointer: pointer + sample_num]] pointer += sample_num sample_group['pointer'] = pointer sample_group['indices'] = indices return sampled_dict @staticmethod def put_boxes_on_road_planes(gt_boxes, road_planes, calib): """ Only validate in KITTIDataset Args: gt_boxes: (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...] road_planes: [a, b, c, d] calib: Returns: """ a, b, c, d = road_planes center_cam = calib.lidar_to_rect(gt_boxes[:, 0:3]) cur_height_cam = (-d - a * center_cam[:, 0] - c * center_cam[:, 2]) / b center_cam[:, 1] = cur_height_cam cur_lidar_height = calib.rect_to_lidar(center_cam)[:, 2] mv_height = gt_boxes[:, 2] - gt_boxes[:, 5] / 2 - cur_lidar_height gt_boxes[:, 2] -= mv_height # lidar view return gt_boxes, mv_height def points_rigid_transform(self,cloud,pose): if cloud.shape[0]==0: return cloud mat=np.ones(shape=(cloud.shape[0],4),dtype=np.float32) pose_mat=np.mat(pose) mat[:,0:3]=cloud[:,0:3] mat=np.mat(mat) transformed_mat=pose_mat*mat.T T=np.array(transformed_mat.T,dtype=np.float32) return T[:,0:3] def get_registration_angle(self,mat): cos_theta=mat[0,0] sin_theta=mat[1,0] if cos_theta < -1: cos_theta = -1 if cos_theta > 1: cos_theta = 1 theta_cos = np.arccos(cos_theta) if sin_theta >= 0: return theta_cos else: return 2 * np.pi - theta_cos def registration(self,pose, pre_pose, pre_obj_points, pre_box3d_lidar): inv_pose_of_last_frame = np.linalg.inv(pose) registration_mat = np.matmul(inv_pose_of_last_frame, pre_pose) if len(pre_obj_points)!=0: pre_obj_points[:, 0:3] = self.points_rigid_transform(pre_obj_points, registration_mat)[:,0:3] angle = self.get_registration_angle(registration_mat) pre_box3d_lidar[0:3] = self.points_rigid_transform(np.array([pre_box3d_lidar]), registration_mat)[0, 0:3] pre_box3d_lidar[6]+=angle return pre_obj_points, pre_box3d_lidar def add_sampled_boxes_to_scene(self, data_dict, sampled_gt_boxes, total_valid_sampled_dict): gt_boxes_mask = np.array([n in self.class_names for n in data_dict['gt_names']], dtype=np.bool_) gt_boxes = data_dict['gt_boxes'][gt_boxes_mask] gt_names = data_dict['gt_names'][gt_boxes_mask] if 'gt_tracklets' in data_dict: data_dict['gt_tracklets']=data_dict['gt_tracklets'][gt_boxes_mask] points = data_dict['points'] if 'road_plane' in data_dict: sampled_gt_boxes, mv_height = self.put_boxes_on_road_planes( sampled_gt_boxes, data_dict['road_plane'], data_dict['calib'] ) obj_points_list = [] for idx, info in enumerate(total_valid_sampled_dict): file_path = self.root_path / info['path'] #path = pathlib.Path(self.root_path) #file_path = path / info['path'] obj_points = np.fromfile(str(file_path), dtype=np.float32).reshape( [-1, self.sampler_cfg.NUM_POINT_FEATURES]) obj_points[:, :3] += info['box3d_lidar'][:3] if 'road_plane' in data_dict: # mv height obj_points[:, 2] -= mv_height[idx] obj_points_list.append(obj_points) obj_points = np.concatenate(obj_points_list, axis=0) sampled_gt_names = np.array([x['name'] for x in total_valid_sampled_dict]) if self.use_van: sampled_gt_names = np.array(['Car' if sampled_gt_names[i]=='Van' else sampled_gt_names[i] for i in range(len(sampled_gt_names))]) large_sampled_gt_boxes = box_utils.enlarge_box3d( sampled_gt_boxes[:, 0:7], extra_width=self.sampler_cfg.REMOVE_EXTRA_WIDTH ) points = box_utils.remove_points_in_boxes3d(points, large_sampled_gt_boxes) points = np.concatenate([obj_points[:, 0:points.shape[1]], points], axis=0) gt_names = np.concatenate([gt_names, sampled_gt_names], axis=0) gt_boxes = np.concatenate([gt_boxes, sampled_gt_boxes], axis=0) valid_mask = np.ones((len(gt_names),), dtype=np.bool_) valid_mask[:len(gt_names) - len(sampled_gt_names)] = 0 data_dict['valid_noise'] = valid_mask data_dict['gt_boxes'] = gt_boxes data_dict['gt_names'] = gt_names data_dict['points'] = points if 'road_plane' in data_dict: data_dict.pop('road_plane') return data_dict def __call__(self, data_dict): """ Args: data_dict: gt_boxes: (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...] Returns: """ gt_boxes = data_dict['gt_boxes'] gt_names = data_dict['gt_names'].astype(str) existed_boxes = gt_boxes total_valid_sampled_dict = [] for class_name, sample_group in self.sample_groups.items(): if self.limit_whole_scene: num_gt = np.sum(class_name == gt_names) sample_group['sample_num'] = str(int(self.sample_class_num[class_name]) - num_gt) if int(sample_group['sample_num']) > 0: sampled_dict = self.sample_with_fixed_number(class_name, sample_group) sampled_boxes1 = np.stack([x['box3d_lidar'] for x in sampled_dict], axis=0).astype(np.float32) if self.sampler_cfg.get('DATABASE_WITH_FAKELIDAR', False): sampled_boxes1 = box_utils.boxes3d_kitti_fakelidar_to_lidar(sampled_boxes) sampled_boxes = copy.deepcopy(sampled_boxes1) iou1 = iou3d_nms_utils.boxes_bev_iou_cpu(sampled_boxes[:, 0:7], existed_boxes[:, 0:7]) iou2 = iou3d_nms_utils.boxes_bev_iou_cpu(sampled_boxes[:, 0:7], sampled_boxes[:, 0:7]) iou2[range(sampled_boxes.shape[0]), range(sampled_boxes.shape[0])] = 0 iou1 = iou1 if iou1.shape[1] > 0 else iou2 valid_mask = ((iou1.max(axis=1) + iou2.max(axis=1)) == 0).nonzero()[0] valid_sampled_dict = [sampled_dict[x] for x in valid_mask] valid_sampled_boxes = sampled_boxes[valid_mask] existed_boxes = np.concatenate((existed_boxes, valid_sampled_boxes), axis=0) total_valid_sampled_dict.extend(valid_sampled_dict) sampled_gt_boxes = existed_boxes[gt_boxes.shape[0]:, :] if total_valid_sampled_dict.__len__() > 0: data_dict = self.add_sampled_boxes_to_scene(data_dict, sampled_gt_boxes, total_valid_sampled_dict) return data_dict class DADataBaseSampler(object): def __init__(self, root_path, sampler_cfg, class_names, logger=None): self.root_path = root_path self.class_names = class_names self.sampler_cfg = sampler_cfg # self.gt_path = pathlib.Path(sampler_cfg.GT_PATH) self.use_van = self.sampler_cfg.get('USE_VAN', None) self.min_sampling_dis = sampler_cfg.MIN_SAMPLING_DIS self.max_sampling_dis = sampler_cfg.MIN_SAMPLING_DIS self.occlusion_noise = sampler_cfg.OCCLUSION_NOISE self.occlusion_offset = sampler_cfg.OCCLUSION_OFFSET self.sampling_method = sampler_cfg.SAMPLING_METHOD self.vert_res = sampler_cfg.VERT_RES self.hor_res = sampler_cfg.HOR_RES self.logger = logger self.db_infos = {} for class_name in class_names: self.db_infos[class_name] = [] if self.use_van: self.db_infos['Van'] = [] for db_info_path in sampler_cfg.DB_INFO_PATH: db_info_path = self.root_path.resolve() / db_info_path with open(str(db_info_path), 'rb') as f: infos = pickle.load(f) for cls in class_names: if cls in infos.keys(): self.db_infos[cls].extend(infos[cls]) # [self.db_infos[cur_class].extend(infos[cur_class]) for cur_class in class_names] if self.use_van: if 'Van' in infos.keys(): self.db_infos['Van'].extend(infos['Van']) for func_name, val in sampler_cfg.PREPARE.items(): self.db_infos = getattr(self, func_name)(self.db_infos, val) self.sample_groups = {} self.sample_class_num = {} self.limit_whole_scene = sampler_cfg.get('LIMIT_WHOLE_SCENE', False) for x in sampler_cfg.SAMPLE_GROUPS: class_name, sample_num = x.split(':') if class_name not in class_names: if not (self.use_van and class_name == 'Van'): continue self.sample_class_num[class_name] = sample_num self.sample_groups[class_name] = { 'sample_num': sample_num, 'pointer': len(self.db_infos[class_name]), 'indices': np.arange(len(self.db_infos[class_name])) } def __getstate__(self): d = dict(self.__dict__) del d['logger'] return d def __setstate__(self, d): self.__dict__.update(d) def to_sphere_coords(self, points): r = np.linalg.norm(points[:, 0:3], ord=2, axis=-1) theta = np.arccos(points[:, 2] / r) fan = np.arctan(points[:, 1] / points[:, 0]) new_points = copy.deepcopy(points) new_points[:, 0] = r new_points[:, 1] = theta new_points[:, 2] = fan return new_points def la_sampling(self, points, vert_res=0.006, hor_res=0.003): new_points = copy.deepcopy(points) sp_coords = self.to_sphere_coords(new_points) voxel_dict = {} for i, point in enumerate(sp_coords): vert_coord = point[1] // vert_res hor_coord = point[2] // hor_res voxel_key = str(vert_coord) + '_' + str(hor_coord) if voxel_key in voxel_dict: voxel_dict[voxel_key]['sp'].append(point) voxel_dict[voxel_key]['pts'].append(new_points[i]) else: voxel_dict[voxel_key] = {'sp': [point], 'pts': [new_points[i]]} sampled_list = [] for voxel_key in voxel_dict: sp = voxel_dict[voxel_key]['sp'] arg_min = np.argmin(np.array(sp)[:, 1]) min_point = voxel_dict[voxel_key]['pts'][arg_min] sampled_list.append(min_point) new_points = np.array(sampled_list) if len(new_points) < 5: return points else: return new_points def random_sampling(self, points, box, dis): new_points = copy.deepcopy(points) new_box = copy.deepcopy(box) x_off = dis y_off = 0 # np.random.randn()*10 new_points[:, 0] -= new_box[0] new_points[:, 1] -= new_box[1] new_box[0] = x_off new_box[1] = y_off new_points[:, 0] += new_box[0] new_points[:, 1] += new_box[1] nn = random.choices(new_points.tolist(), k=int((1 - dis / 100) ** 3 * 300)) return np.array(nn), new_box def random_drop_out(self, points, rand_noise=0.2, offset=0.3): rand = np.random.choice([0, 1, 2, 3]) new_points = [] for i, p in enumerate(points): if rand == 0 and p[1] + np.random.randn() * rand_noise < offset: new_points.append(points[i]) if rand == 1 and p[1] + np.random.randn() * rand_noise >= -offset: new_points.append(points[i]) if rand == 2 and p[2] + np.random.randn() * rand_noise < offset: new_points.append(points[i]) if rand == 3 and p[2] + np.random.randn() * rand_noise >= -offset: new_points.append(points[i]) new_points = np.array(new_points) if len(new_points) < 5: return self.random_drop_out(points, rand_noise, offset) return new_points def filter_by_difficulty(self, db_infos, removed_difficulty): new_db_infos = {} for key, dinfos in db_infos.items(): pre_len = len(dinfos) this_infos = [] for info in dinfos: if 'difficulty' in info: if info['difficulty'] not in removed_difficulty: this_infos.append(info) else: this_infos.append(info) new_db_infos[key] = this_infos if self.logger is not None: self.logger.info('Database filter by difficulty %s: %d => %d' % (key, pre_len, len(new_db_infos[key]))) return new_db_infos def filter_by_min_points(self, db_infos, min_gt_points_list): for name_num in min_gt_points_list: name, min_num = name_num.split(':') min_num = int(min_num) if min_num > 0 and name in db_infos.keys(): filtered_infos = [] for info in db_infos[name]: if info['num_points_in_gt'] >= min_num: filtered_infos.append(info) if self.logger is not None: self.logger.info('Database filter by min points %s: %d => %d' % (name, len(db_infos[name]), len(filtered_infos))) db_infos[name] = filtered_infos return db_infos def sample_with_fixed_number(self, class_name, sample_group): """ Args: class_name: sample_group: Returns: """ sample_num, pointer, indices = int(sample_group['sample_num']), sample_group['pointer'], sample_group['indices'] if pointer >= len(self.db_infos[class_name]): indices = np.random.permutation(len(self.db_infos[class_name])) pointer = 0 sampled_dict = [self.db_infos[class_name][idx] for idx in indices[pointer: pointer + sample_num]] pointer += sample_num sample_group['pointer'] = pointer sample_group['indices'] = indices return sampled_dict @staticmethod def put_boxes_on_road_planes(gt_boxes, road_planes, calib): """ Only validate in KITTIDataset Args: gt_boxes: (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...] road_planes: [a, b, c, d] calib: Returns: """ a, b, c, d = road_planes center_cam = calib.lidar_to_rect(gt_boxes[:, 0:3]) cur_height_cam = (-d - a * center_cam[:, 0] - c * center_cam[:, 2]) / b center_cam[:, 1] = cur_height_cam cur_lidar_height = calib.rect_to_lidar(center_cam)[:, 2] mv_height = gt_boxes[:, 2] - gt_boxes[:, 5] / 2 - cur_lidar_height gt_boxes[:, 2] -= mv_height # lidar view return gt_boxes, mv_height def points_rigid_transform(self, cloud, pose): if cloud.shape[0] == 0: return cloud mat = np.ones(shape=(cloud.shape[0], 4), dtype=np.float32) pose_mat = np.mat(pose) mat[:, 0:3] = cloud[:, 0:3] mat = np.mat(mat) transformed_mat = pose_mat * mat.T T = np.array(transformed_mat.T, dtype=np.float32) return T[:, 0:3] def get_registration_angle(self, mat): cos_theta = mat[0, 0] sin_theta = mat[1, 0] if cos_theta < -1: cos_theta = -1 if cos_theta > 1: cos_theta = 1 theta_cos = np.arccos(cos_theta) if sin_theta >= 0: return theta_cos else: return 2 * np.pi - theta_cos def registration(self, pose, pre_pose, pre_obj_points, pre_box3d_lidar): inv_pose_of_last_frame = np.linalg.inv(pose) registration_mat = np.matmul(inv_pose_of_last_frame, pre_pose) if len(pre_obj_points) != 0: pre_obj_points[:, 0:3] = self.points_rigid_transform(pre_obj_points, registration_mat)[:, 0:3] angle = self.get_registration_angle(registration_mat) pre_box3d_lidar[0:3] = self.points_rigid_transform(np.array([pre_box3d_lidar]), registration_mat)[0, 0:3] pre_box3d_lidar[6] += angle return pre_obj_points, pre_box3d_lidar def add_sampled_boxes_to_scene(self, data_dict, sampled_gt_boxes, total_valid_sampled_dict): gt_boxes_mask = np.array([n in self.class_names for n in data_dict['gt_names']], dtype=np.bool_) gt_boxes = data_dict['gt_boxes'][gt_boxes_mask] gt_names = data_dict['gt_names'][gt_boxes_mask] if 'gt_tracklets' in data_dict: data_dict['gt_tracklets'] = data_dict['gt_tracklets'][gt_boxes_mask] points = data_dict['points'] if 'road_plane' in data_dict: sampled_gt_boxes, mv_height = self.put_boxes_on_road_planes( sampled_gt_boxes, data_dict['road_plane'], data_dict['calib'] ) obj_points_list = [] for idx, info in enumerate(total_valid_sampled_dict): file_path = self.root_path / info['path'] # path = pathlib.Path(self.root_path) # file_path = path / info['path'] obj_points = np.fromfile(str(file_path), dtype=np.float32).reshape( [-1, self.sampler_cfg.NUM_POINT_FEATURES]) obj_points[:, :3] += sampled_gt_boxes[idx][:3] ''' if self.sampler_cfg.get('USE_ROAD_PLANE', False): # mv height obj_points[:, 2] -= mv_height[idx] ''' if self.sampling_method == 'LiDAR-aware': obj_points = self.la_sampling(obj_points, vert_res=self.vert_res, hor_res=self.hor_res) obj_points[:, 0:3] -= sampled_gt_boxes[idx][:3] obj_points = self.random_drop_out(obj_points, rand_noise=self.occlusion_noise, offset=self.occlusion_offset) obj_points[:, 0:3] += sampled_gt_boxes[idx][:3] obj_points_list.append(obj_points) obj_points = np.concatenate(obj_points_list, axis=0) sampled_gt_names = np.array([x['name'] for x in total_valid_sampled_dict]) large_sampled_gt_boxes = box_utils.enlarge_box3d( sampled_gt_boxes[:, 0:7], extra_width=self.sampler_cfg.REMOVE_EXTRA_WIDTH ) points = box_utils.remove_points_in_boxes3d(points, large_sampled_gt_boxes) points = np.concatenate([obj_points[:, 0:points.shape[1]], points], axis=0) if self.use_van: sampled_gt_names = np.array( ['Car' if sampled_gt_names[i] == 'Van' else sampled_gt_names[i] for i in range(len(sampled_gt_names))]) gt_names = np.concatenate([gt_names, sampled_gt_names], axis=0) gt_boxes = np.concatenate([gt_boxes, sampled_gt_boxes], axis=0) valid_mask = np.ones((len(gt_names),), dtype=np.bool_) if 'valid_noise' in data_dict: valid_mask[:len(gt_names) - len(sampled_gt_names)] = data_dict['valid_noise'][:] else: valid_mask[:len(gt_names) - len(sampled_gt_names)] = 0 data_dict['valid_noise'] = valid_mask data_dict['gt_boxes'] = gt_boxes data_dict['gt_names'] = gt_names data_dict['points'] = points if 'road_plane' in data_dict: data_dict.pop('road_plane') return data_dict def __call__(self, data_dict): """ Args: data_dict: gt_boxes: (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...] Returns: """ gt_boxes = data_dict['gt_boxes'] gt_names = data_dict['gt_names'].astype(str) existed_boxes = gt_boxes total_valid_sampled_dict = [] for class_name, sample_group in self.sample_groups.items(): if self.limit_whole_scene: num_gt = np.sum(class_name == gt_names) sample_group['sample_num'] = str(int(self.sample_class_num[class_name]) - num_gt) if int(sample_group['sample_num']) > 0: sampled_dict = self.sample_with_fixed_number(class_name, sample_group) sampled_boxes1 = np.stack([x['box3d_lidar'] for x in sampled_dict], axis=0).astype(np.float32) if self.sampler_cfg.get('DATABASE_WITH_FAKELIDAR', False): sampled_boxes1 = box_utils.boxes3d_kitti_fakelidar_to_lidar(sampled_boxes1) sampled_boxes = copy.deepcopy(sampled_boxes1) sampled_boxes[:, 0] += np.random.random()*(self.max_sampling_dis-self.min_sampling_dis) + self.min_sampling_dis iou1 = iou3d_nms_utils.boxes_bev_iou_cpu(sampled_boxes[:, 0:7], existed_boxes[:, 0:7]) iou2 = iou3d_nms_utils.boxes_bev_iou_cpu(sampled_boxes[:, 0:7], sampled_boxes[:, 0:7]) iou2[range(sampled_boxes.shape[0]), range(sampled_boxes.shape[0])] = 0 iou1 = iou1 if iou1.shape[1] > 0 else iou2 valid_mask = ((iou1.max(axis=1) + iou2.max(axis=1)) == 0).nonzero()[0] valid_sampled_dict = [sampled_dict[x] for x in valid_mask] valid_sampled_boxes = sampled_boxes[valid_mask] existed_boxes = np.concatenate((existed_boxes, valid_sampled_boxes), axis=0) total_valid_sampled_dict.extend(valid_sampled_dict) sampled_gt_boxes = existed_boxes[gt_boxes.shape[0]:, :] if total_valid_sampled_dict.__len__() > 0: data_dict = self.add_sampled_boxes_to_scene(data_dict, sampled_gt_boxes, total_valid_sampled_dict) return data_dict ================================================ FILE: pcdet/datasets/dataset.py ================================================ from collections import defaultdict from pathlib import Path import torch import numpy as np import torch.utils.data as torch_data import os from ..utils import common_utils from .augmentor.data_augmentor import DataAugmentor from .augmentor.X_transform import X_TRANS from .processor.data_processor import DataProcessor from .processor.point_feature_encoder import PointFeatureEncoder import copy import time class DatasetTemplate(torch_data.Dataset): def __init__(self, dataset_cfg=None, class_names=None, training=True, is_source=True, root_path=None, logger=None, da_train=False): super().__init__() self.test_flip = False self.dataset_cfg = dataset_cfg self.training = training self.is_source = is_source self.da_train = da_train self.class_names = class_names self.logger = logger self.root_path = root_path if root_path is not None else Path(self.dataset_cfg.DATA_PATH) if self.dataset_cfg is None or class_names is None: return self.rot_num = self.dataset_cfg.get('ROT_NUM', 1) self.point_cloud_range = np.array(self.dataset_cfg.POINT_CLOUD_RANGE, dtype=np.float32) self.point_feature_encoder = PointFeatureEncoder( self.dataset_cfg.POINT_FEATURE_ENCODING, point_cloud_range=self.point_cloud_range, rot_num=self.rot_num ) self.data_augmentor = DataAugmentor( self.root_path, self.dataset_cfg.DATA_AUGMENTOR, self.class_names, logger=self.logger, ) if self.training else None self.data_processor = DataProcessor( self.dataset_cfg.DATA_PROCESSOR, point_cloud_range=self.point_cloud_range, training=self.training, rot_num=self.rot_num, num_point_features=self.point_feature_encoder.num_point_features ) x_trans_cfg = self.dataset_cfg.get('X_TRANS', None) if x_trans_cfg is not None: self.x_trans = X_TRANS(x_trans_cfg, rot_num=self.rot_num) else: raise NotImplementedError self.grid_size = self.data_processor.grid_size self.voxel_size = self.data_processor.voxel_size self.total_epochs = 0 self._merge_all_iters_to_one_epoch = False self.iter =0 @property def mode(self): return 'train' if self.training else 'test' def __getstate__(self): d = dict(self.__dict__) del d['logger'] return d def __setstate__(self, d): self.__dict__.update(d) @staticmethod def generate_prediction_dicts(batch_dict, pred_dicts, class_names, output_path=None): """ To support a custom dataset, implement this function to receive the predicted results from the model, and then transform the unified normative coordinate to your required coordinate, and optionally save them to disk. Args: batch_dict: dict of original data from the dataloader pred_dicts: dict of predicted results from the model pred_boxes: (N, 7), Tensor pred_scores: (N), Tensor pred_labels: (N), Tensor class_names: output_path: if it is not None, save the results to this path Returns: """ def merge_all_iters_to_one_epoch(self, merge=True, epochs=None): if merge: self._merge_all_iters_to_one_epoch = True self.total_epochs = epochs else: self._merge_all_iters_to_one_epoch = False def __len__(self): raise NotImplementedError def __getitem__(self, index): """ To support a custom dataset, implement this function to load the raw data (and labels), then transform them to the unified normative coordinate and call the function self.prepare_data() to process the data and send them to the model. Args: index: Returns: """ raise NotImplementedError def prepare_data(self, data_dict): """ Args: data_dict: points: (N, 3 + C_in) gt_boxes: optional, (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...] gt_names: optional, (N), string ... Returns: data_dict: frame_id: string points: (N, 3 + C_in) gt_boxes: optional, (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...] gt_names: optional, (N), string use_lead_xyz: bool voxels: optional (num_voxels, max_points_per_voxel, 3 + C) voxel_coords: optional (num_voxels, 3) voxel_num_points: optional (num_voxels) ... """ if self.training: assert 'gt_boxes' in data_dict, 'gt_boxes should be provided for training' data_dict = self.data_augmentor.forward( data_dict={ **data_dict, } ) if 'road_plane' in data_dict: data_dict.pop('road_plane') if self.rot_num>1: data_dict = self.x_trans.input_transform( data_dict={ **data_dict, },trans_boxes=True ) else: data_dict = self.x_trans.input_transform( data_dict={ **data_dict, } ) if data_dict.get('gt_boxes', None) is not None: selected = common_utils.keep_arrays_by_name(data_dict['gt_names'], self.class_names) data_dict['gt_names'] = data_dict['gt_names'][selected] for i in range(self.rot_num): if i == 0: rot_num_id = '' else: rot_num_id = str(i) if 'gt_boxes'+rot_num_id in data_dict: data_dict['gt_boxes'+rot_num_id] = data_dict['gt_boxes'+rot_num_id][selected] gt_classes = np.array([self.class_names.index(n) + 1 for n in data_dict['gt_names']], dtype=np.int32) gt_boxes = np.concatenate((data_dict['gt_boxes'+rot_num_id], gt_classes.reshape(-1, 1).astype(np.float32)), axis=1) data_dict['gt_boxes'+rot_num_id] = gt_boxes for i in range(self.rot_num): if i ==0: rot_num_id = '' else: rot_num_id = str(i) if 'mm' in data_dict: data_dict['points_mm'+rot_num_id] = data_dict['points'+rot_num_id][data_dict['points'+rot_num_id][:, -1] == 1] data_dict['points'+rot_num_id] = data_dict['points'+rot_num_id][data_dict['points'+rot_num_id][:, -1] == 2] data_dict = self.point_feature_encoder.forward(data_dict) self.iter+=1 data_dict = self.data_processor.forward( data_dict=data_dict ) if self.training and len(data_dict['gt_boxes']) == 0: new_index = np.random.randint(self.__len__()) return self.__getitem__(new_index) data_dict.pop('gt_names', None) if 'valid_noise' in data_dict: data_dict.pop('valid_noise') return data_dict def collate_batch(self, batch_list, _unused=False): data_dict = defaultdict(list) for cur_sample in batch_list: for key, val in cur_sample.items(): data_dict[key].append(val) batch_size = len(batch_list) ret = {} point_key_dict=['points', 'voxel_coords', 'points_mm', 'voxel_coords_mm'] for i in range(1, 10): point_key_dict.append('points'+str(i)) point_key_dict.append('voxel_coords'+str(i)) point_key_dict.append('points_mm'+str(i)) point_key_dict.append('voxel_coords_mm'+str(i)) voxel_key_dict=['voxels', 'voxel_num_points', 'voxels_mm', 'voxel_num_points_mm'] for i in range(1, 10): voxel_key_dict.append('voxels'+str(i)) voxel_key_dict.append('voxel_num_points' + str(i)) voxel_key_dict.append('voxels_mm'+str(i)) voxel_key_dict.append('voxel_num_points_mm' + str(i)) boxes_key = ['gt_boxes'] for i in range(1, 10): boxes_key.append('gt_boxes'+str(i)) for key, val in data_dict.items(): try: if key in voxel_key_dict: ret[key] = np.concatenate(val, axis=0) elif key in point_key_dict: coors = [] for i, coor in enumerate(val): coor_pad = np.pad(coor, ((0, 0), (1, 0)), mode='constant', constant_values=i) coors.append(coor_pad) ret[key] = np.concatenate(coors, axis=0) elif key in boxes_key: max_gt = max([len(x) for x in val]) batch_gt_boxes3d = np.zeros((batch_size, max_gt, val[0].shape[-1]), dtype=np.float32) for k in range(batch_size): batch_gt_boxes3d[k, :val[k].__len__(), :] = val[k] ret[key] = batch_gt_boxes3d else: ret[key] = np.stack(val, axis=0) except: print('Error in collate_batch: key=%s' % key) raise TypeError ret['batch_size'] = batch_size return ret ================================================ FILE: pcdet/datasets/kitti/kitti_dataset.py ================================================ import copy import pickle import numpy as np from skimage import io from pcdet.ops.roiaware_pool3d import roiaware_pool3d_utils from pcdet.utils import box_utils, calibration_kitti, common_utils, object3d_kitti from pcdet.datasets.dataset import DatasetTemplate from pcdet.models.model_utils import model_nms_utils class KittiDataset(DatasetTemplate): def __init__(self, dataset_cfg, class_names, training=True, root_path=None, logger=None): """ Args: root_path: dataset_cfg: class_names: training: logger: """ super().__init__( dataset_cfg=dataset_cfg, class_names=class_names, training=training, root_path=root_path, logger=logger ) self.split = self.dataset_cfg.DATA_SPLIT[self.mode] self.root_split_path = self.root_path / ('training' if self.split != 'test' else 'testing') split_dir = self.root_path / 'ImageSets' / (self.split + '.txt') self.sample_id_list = [x.strip() for x in open(split_dir).readlines()] if split_dir.exists() else None self.kitti_infos = [] self.include_kitti_data(self.mode) def include_kitti_data(self, mode): if self.logger is not None: self.logger.info('Loading KITTI dataset') kitti_infos = [] for info_path in self.dataset_cfg.INFO_PATH[mode]: info_path = self.root_path / info_path if not info_path.exists(): continue with open(info_path, 'rb') as f: infos = pickle.load(f) kitti_infos.extend(infos) self.kitti_infos.extend(kitti_infos) if self.logger is not None: self.logger.info('Total samples for KITTI dataset: %d' % (len(kitti_infos))) def set_split(self, split): super().__init__( dataset_cfg=self.dataset_cfg, class_names=self.class_names, training=self.training, root_path=self.root_path, logger=self.logger ) self.split = split self.root_split_path = self.root_path / ('training' if self.split != 'test' else 'testing') split_dir = self.root_path / 'ImageSets' / (self.split + '.txt') self.sample_id_list = [x.strip() for x in open(split_dir).readlines()] if split_dir.exists() else None def get_lidar(self, idx): lidar_file = self.root_split_path / 'velodyne' / ('%s.bin' % idx) assert lidar_file.exists() return np.fromfile(str(lidar_file), dtype=np.float32).reshape(-1, 4) def get_image_shape(self, idx): img_file = self.root_split_path / 'image_2' / ('%s.png' % idx) assert img_file.exists() return np.array(io.imread(img_file).shape[:2], dtype=np.int32) def get_label(self, idx): label_file = self.root_split_path / 'label_2' / ('%s.txt' % idx) assert label_file.exists() return object3d_kitti.get_objects_from_label(label_file) def get_calib(self, idx): calib_file = self.root_split_path / 'calib' / ('%s.txt' % idx) assert calib_file.exists() return calibration_kitti.Calibration(calib_file) def get_road_plane(self, idx): plane_file = self.root_split_path / 'planes' / ('%s.txt' % idx) if not plane_file.exists(): return None with open(plane_file, 'r') as f: lines = f.readlines() lines = [float(i) for i in lines[3].split()] plane = np.asarray(lines) # Ensure normal is always facing up, this is in the rectified camera coordinate if plane[1] > 0: plane = -plane norm = np.linalg.norm(plane[0:3]) plane = plane / norm return plane @staticmethod def get_fov_flag(pts_rect, img_shape, calib): """ Args: pts_rect: img_shape: calib: Returns: """ pts_img, pts_rect_depth = calib.rect_to_img(pts_rect) val_flag_1 = np.logical_and(pts_img[:, 0] >= 0, pts_img[:, 0] < img_shape[1]) val_flag_2 = np.logical_and(pts_img[:, 1] >= 0, pts_img[:, 1] < img_shape[0]) val_flag_merge = np.logical_and(val_flag_1, val_flag_2) pts_valid_flag = np.logical_and(val_flag_merge, pts_rect_depth >= 0) return pts_valid_flag def get_infos(self, num_workers=4, has_label=True, count_inside_pts=True, sample_id_list=None): import concurrent.futures as futures def process_single_scene(sample_idx): print('%s sample_idx: %s' % (self.split, sample_idx)) info = {} pc_info = {'num_features': 4, 'lidar_idx': sample_idx} info['point_cloud'] = pc_info image_info = {'image_idx': sample_idx, 'image_shape': self.get_image_shape(sample_idx)} info['image'] = image_info calib = self.get_calib(sample_idx) P2 = np.concatenate([calib.P2, np.array([[0., 0., 0., 1.]])], axis=0) R0_4x4 = np.zeros([4, 4], dtype=calib.R0.dtype) R0_4x4[3, 3] = 1. R0_4x4[:3, :3] = calib.R0 V2C_4x4 = np.concatenate([calib.V2C, np.array([[0., 0., 0., 1.]])], axis=0) calib_info = {'P2': P2, 'R0_rect': R0_4x4, 'Tr_velo_to_cam': V2C_4x4} info['calib'] = calib_info if has_label: obj_list = self.get_label(sample_idx) annotations = {} annotations['name'] = np.array([obj.cls_type for obj in obj_list]) annotations['truncated'] = np.array([obj.truncation for obj in obj_list]) annotations['occluded'] = np.array([obj.occlusion for obj in obj_list]) annotations['alpha'] = np.array([obj.alpha for obj in obj_list]) annotations['bbox'] = np.concatenate([obj.box2d.reshape(1, 4) for obj in obj_list], axis=0) annotations['dimensions'] = np.array([[obj.l, obj.h, obj.w] for obj in obj_list]) # lhw(camera) format annotations['location'] = np.concatenate([obj.loc.reshape(1, 3) for obj in obj_list], axis=0) annotations['rotation_y'] = np.array([obj.ry for obj in obj_list]) annotations['score'] = np.array([obj.score for obj in obj_list]) annotations['difficulty'] = np.array([obj.level for obj in obj_list], np.int32) num_objects = len([obj.cls_type for obj in obj_list if obj.cls_type != 'DontCare']) num_gt = len(annotations['name']) index = list(range(num_objects)) + [-1] * (num_gt - num_objects) annotations['index'] = np.array(index, dtype=np.int32) loc = annotations['location'][:num_objects] dims = annotations['dimensions'][:num_objects] rots = annotations['rotation_y'][:num_objects] loc_lidar = calib.rect_to_lidar(loc) l, h, w = dims[:, 0:1], dims[:, 1:2], dims[:, 2:3] loc_lidar[:, 2] += h[:, 0] / 2 gt_boxes_lidar = np.concatenate([loc_lidar, l, w, h, -(np.pi / 2 + rots[..., np.newaxis])], axis=1) annotations['gt_boxes_lidar'] = gt_boxes_lidar info['annos'] = annotations if count_inside_pts: points = self.get_lidar(sample_idx) calib = self.get_calib(sample_idx) pts_rect = calib.lidar_to_rect(points[:, 0:3]) fov_flag = self.get_fov_flag(pts_rect, info['image']['image_shape'], calib) pts_fov = points[fov_flag] corners_lidar = box_utils.boxes_to_corners_3d(gt_boxes_lidar) num_points_in_gt = -np.ones(num_gt, dtype=np.int32) for k in range(num_objects): flag = box_utils.in_hull(pts_fov[:, 0:3], corners_lidar[k]) num_points_in_gt[k] = flag.sum() annotations['num_points_in_gt'] = num_points_in_gt return info sample_id_list = sample_id_list if sample_id_list is not None else self.sample_id_list with futures.ThreadPoolExecutor(num_workers) as executor: infos = executor.map(process_single_scene, sample_id_list) return list(infos) def create_groundtruth_database(self, info_path=None, used_classes=None, split='train'): import torch database_save_path = Path(self.root_path) / ('gt_database' if split == 'train' else ('gt_database_%s' % split)) db_info_save_path = Path(self.root_path) / ('kitti_dbinfos_%s.pkl' % split) database_save_path.mkdir(parents=True, exist_ok=True) all_db_infos = {} with open(info_path, 'rb') as f: infos = pickle.load(f) for k in range(len(infos)): print('gt_database sample: %d/%d' % (k + 1, len(infos))) info = infos[k] sample_idx = info['point_cloud']['lidar_idx'] points = self.get_lidar(sample_idx) annos = info['annos'] names = annos['name'] difficulty = annos['difficulty'] bbox = annos['bbox'] gt_boxes = annos['gt_boxes_lidar'] num_obj = gt_boxes.shape[0] point_indices = roiaware_pool3d_utils.points_in_boxes_cpu( torch.from_numpy(points[:, 0:3]), torch.from_numpy(gt_boxes) ).numpy() # (nboxes, npoints) for i in range(num_obj): filename = '%s_%s_%d.bin' % (sample_idx, names[i], i) filepath = database_save_path / filename gt_points = points[point_indices[i] > 0] gt_points[:, :3] -= gt_boxes[i, :3] with open(filepath, 'w') as f: gt_points.tofile(f) if (used_classes is None) or names[i] in used_classes: db_path = str(filepath.relative_to(self.root_path)) # gt_database/xxxxx.bin db_info = {'name': names[i], 'path': db_path, 'image_idx': sample_idx, 'gt_idx': i, 'box3d_lidar': gt_boxes[i], 'num_points_in_gt': gt_points.shape[0], 'difficulty': difficulty[i], 'bbox': bbox[i], 'score': annos['score'][i]} if names[i] in all_db_infos: all_db_infos[names[i]].append(db_info) else: all_db_infos[names[i]] = [db_info] for k, v in all_db_infos.items(): print('Database %s: %d' % (k, len(v))) with open(db_info_save_path, 'wb') as f: pickle.dump(all_db_infos, f) return all_db_infos #staticmethod def generate_prediction_dicts(self,batch_dict, pred_dicts, class_names, output_path=None): """ Args: batch_dict: frame_id: pred_dicts: list of pred_dicts pred_boxes: (N, 7), Tensor pred_scores: (N), Tensor pred_labels: (N), Tensor class_names: output_path: Returns: """ def get_template_prediction(num_samples): ret_dict = { 'name': np.zeros(num_samples), 'truncated': np.zeros(num_samples), 'occluded': np.zeros(num_samples), 'alpha': np.zeros(num_samples), 'bbox': np.zeros([num_samples, 4]), 'dimensions': np.zeros([num_samples, 3]), 'location': np.zeros([num_samples, 3]), 'rotation_y': np.zeros(num_samples), 'score': np.zeros(num_samples), 'boxes_lidar': np.zeros([num_samples, 7]) } return ret_dict def generate_single_sample_dict(batch_index, box_dict): pred_scores = box_dict['pred_scores'].cpu().numpy() pred_boxes = box_dict['pred_boxes'].cpu().numpy() pred_labels = box_dict['pred_labels'].cpu().numpy() if 'WBF' in box_dict: pred_labels,pred_scores,pred_boxes = model_nms_utils.compute_WBF(pred_labels,pred_scores,pred_boxes) pred_dict = get_template_prediction(pred_scores.shape[0]) if pred_scores.shape[0] == 0: return pred_dict calib = batch_dict['calib'][batch_index] image_shape = batch_dict['image_shape'][batch_index] pred_boxes_camera = box_utils.boxes3d_lidar_to_kitti_camera(pred_boxes, calib) pred_boxes_img = box_utils.boxes3d_kitti_camera_to_imageboxes( pred_boxes_camera, calib, image_shape=image_shape ) pred_dict['name'] = np.array(class_names)[pred_labels - 1] pred_dict['alpha'] = -np.arctan2(-pred_boxes[:, 1], pred_boxes[:, 0]) + pred_boxes_camera[:, 6] pred_dict['bbox'] = pred_boxes_img height = pred_dict['bbox'][:, 3] - pred_dict['bbox'][:, 1] height_mask = height<25 pred_dict['bbox'][height_mask, 3] +=2 pred_dict['dimensions'] = pred_boxes_camera[:, 3:6] pred_dict['location'] = pred_boxes_camera[:, 0:3] pred_dict['rotation_y'] = pred_boxes_camera[:, 6] pred_dict['score'] = pred_scores pred_dict['boxes_lidar'] = pred_boxes return pred_dict annos = [] for index, box_dict in enumerate(pred_dicts): frame_id = batch_dict['frame_id'][index] single_pred_dict = generate_single_sample_dict(index, box_dict) single_pred_dict['frame_id'] = frame_id annos.append(single_pred_dict) if output_path is not None: cur_det_file = output_path / ('%s.txt' % frame_id) with open(cur_det_file, 'w') as f: bbox = single_pred_dict['bbox'] loc = single_pred_dict['location'] dims = single_pred_dict['dimensions'] # lhw -> hwl for idx in range(len(bbox)): print('%s -1 -1 %.4f %.4f %.4f %.4f %.4f %.4f %.4f %.4f %.4f %.4f %.4f %.4f %.4f' % (single_pred_dict['name'][idx], single_pred_dict['alpha'][idx], bbox[idx][0], bbox[idx][1], bbox[idx][2], bbox[idx][3], dims[idx][1], dims[idx][2], dims[idx][0], loc[idx][0], loc[idx][1], loc[idx][2], single_pred_dict['rotation_y'][idx], single_pred_dict['score'][idx]), file=f) return annos def evaluation(self, det_annos, class_names, **kwargs): if 'annos' not in self.kitti_infos[0].keys(): return None, {} from .kitti_object_eval_python import eval as kitti_eval eval_det_annos = copy.deepcopy(det_annos) eval_gt_annos = [copy.deepcopy(info['annos']) for info in self.kitti_infos] ap_result_str, ap_dict = kitti_eval.get_official_eval_result(eval_gt_annos, eval_det_annos, class_names) return ap_result_str, ap_dict def __len__(self): if self._merge_all_iters_to_one_epoch: return len(self.kitti_infos) * self.total_epochs return len(self.kitti_infos) def __getitem__(self, index): # index = 4 if self._merge_all_iters_to_one_epoch: index = index % len(self.kitti_infos) info = copy.deepcopy(self.kitti_infos[index]) sample_idx = info['point_cloud']['lidar_idx'] points = self.get_lidar(sample_idx) calib = self.get_calib(sample_idx) img_shape = info['image']['image_shape'] if self.dataset_cfg.FOV_POINTS_ONLY: pts_rect = calib.lidar_to_rect(points[:, 0:3]) fov_flag = self.get_fov_flag(pts_rect, img_shape, calib) points = points[fov_flag] input_dict = { 'points': points, 'frame_id': sample_idx, 'calib': calib, } if 'annos' in info: annos = info['annos'] annos = common_utils.drop_info_with_name(annos, name='DontCare') loc, dims, rots = annos['location'], annos['dimensions'], annos['rotation_y'] gt_names = annos['name'] gt_boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]], axis=1).astype(np.float32) gt_boxes_lidar = box_utils.boxes3d_kitti_camera_to_lidar(gt_boxes_camera, calib) input_dict.update({ 'gt_names': gt_names, 'gt_boxes': gt_boxes_lidar }) road_plane = self.get_road_plane(sample_idx) if road_plane is not None: input_dict['road_plane'] = road_plane data_dict = self.prepare_data(data_dict=input_dict) data_dict['image_shape'] = img_shape return data_dict def create_kitti_infos(dataset_cfg, class_names, data_path, save_path, workers=4): dataset = KittiDataset(dataset_cfg=dataset_cfg, class_names=class_names, root_path=data_path, training=False) train_split, val_split = 'train', 'val' train_filename = save_path / ('kitti_infos_%s.pkl' % train_split) val_filename = save_path / ('kitti_infos_%s.pkl' % val_split) trainval_filename = save_path / 'kitti_infos_trainval.pkl' test_filename = save_path / 'kitti_infos_test.pkl' print('---------------Start to generate data infos---------------') dataset.set_split(train_split) kitti_infos_train = dataset.get_infos(num_workers=workers, has_label=True, count_inside_pts=True) with open(train_filename, 'wb') as f: pickle.dump(kitti_infos_train, f) print('Kitti info train file is saved to %s' % train_filename) dataset.set_split(val_split) kitti_infos_val = dataset.get_infos(num_workers=workers, has_label=True, count_inside_pts=True) with open(val_filename, 'wb') as f: pickle.dump(kitti_infos_val, f) print('Kitti info val file is saved to %s' % val_filename) with open(trainval_filename, 'wb') as f: pickle.dump(kitti_infos_train + kitti_infos_val, f) print('Kitti info trainval file is saved to %s' % trainval_filename) dataset.set_split('test') kitti_infos_test = dataset.get_infos(num_workers=workers, has_label=False, count_inside_pts=False) with open(test_filename, 'wb') as f: pickle.dump(kitti_infos_test, f) print('Kitti info test file is saved to %s' % test_filename) print('---------------Start create groundtruth database for data augmentation---------------') dataset.set_split(train_split) dataset.create_groundtruth_database(train_filename, split=train_split) print('---------------Data preparation Done---------------') if __name__ == '__main__': import sys if sys.argv.__len__() > 1 and sys.argv[1] == 'create_kitti_infos': import yaml from pathlib import Path from easydict import EasyDict dataset_cfg = EasyDict(yaml.safe_load(open(sys.argv[2]))) ROOT_DIR = (Path(__file__).resolve().parent / '../../../').resolve() create_kitti_infos( dataset_cfg=dataset_cfg, class_names=['Car', 'Pedestrian', 'Cyclist'], data_path=ROOT_DIR / 'data' / 'kitti', save_path=ROOT_DIR / 'data' / 'kitti' ) ================================================ FILE: pcdet/datasets/kitti/kitti_dataset_mm.py ================================================ import copy import pickle import numpy as np from skimage import io from pcdet.ops.roiaware_pool3d import roiaware_pool3d_utils from pcdet.utils import box_utils, calibration_kitti, common_utils, object3d_kitti from pcdet.datasets.dataset import DatasetTemplate from pcdet.models.model_utils import model_nms_utils import time class KittiDatasetMM(DatasetTemplate): def __init__(self, dataset_cfg, class_names, training=True, root_path=None, logger=None): """ Args: root_path: dataset_cfg: class_names: training: logger: """ super().__init__( dataset_cfg=dataset_cfg, class_names=class_names, training=training, root_path=root_path, logger=logger ) self.split = self.dataset_cfg.DATA_SPLIT[self.mode] self.root_split_path = self.root_path / ('training' if self.split != 'test' else 'testing') split_dir = self.root_path / 'ImageSets' / (self.split + '.txt') self.sample_id_list = [x.strip() for x in open(split_dir).readlines()] if split_dir.exists() else None self.kitti_infos = [] self.include_kitti_data(self.mode) def include_kitti_data(self, mode): if self.logger is not None: self.logger.info('Loading KITTI dataset') kitti_infos = [] for info_path in self.dataset_cfg.INFO_PATH[mode]: info_path = self.root_path / info_path if not info_path.exists(): continue with open(info_path, 'rb') as f: infos = pickle.load(f) kitti_infos.extend(infos) self.kitti_infos.extend(kitti_infos) if self.logger is not None: self.logger.info('Total samples for KITTI dataset: %d' % (len(kitti_infos))) def set_split(self, split): super().__init__( dataset_cfg=self.dataset_cfg, class_names=self.class_names, training=self.training, root_path=self.root_path, logger=self.logger ) self.split = split self.root_split_path = self.root_path / ('training' if self.split != 'test' else 'testing') split_dir = self.root_path / 'ImageSets' / (self.split + '.txt') self.sample_id_list = [x.strip() for x in open(split_dir).readlines()] if split_dir.exists() else None def get_lidar(self, idx): lidar_file = self.root_split_path / 'velodyne' / ('%s.bin' % idx) assert lidar_file.exists() p = np.fromfile(str(lidar_file), dtype=np.float32).reshape(-1, 4) return p def get_lidar_mm(self, idx): lidar_file = self.root_split_path / self.dataset_cfg.MM_PATH / ('%s.npy' % idx) assert lidar_file.exists() return np.load(lidar_file).astype(np.float32) def get_image_shape(self, idx): img_file = self.root_split_path / 'image_2' / ('%s.png' % idx) assert img_file.exists() return np.array(io.imread(img_file).shape[:2], dtype=np.int32) def get_image(self, idx): img_file = self.root_split_path / 'image_2' / ('%s.png' % idx) assert img_file.exists() return np.array(io.imread(img_file)) def get_label(self, idx): label_file = self.root_split_path / 'label_2' / ('%s.txt' % idx) assert label_file.exists() return object3d_kitti.get_objects_from_label(label_file) def get_calib(self, idx): calib_file = self.root_split_path / 'calib' / ('%s.txt' % idx) assert calib_file.exists() return calibration_kitti.Calibration(calib_file) def get_road_plane(self, idx): plane_file = self.root_split_path / 'planes' / ('%s.txt' % idx) if not plane_file.exists(): return None with open(plane_file, 'r') as f: lines = f.readlines() lines = [float(i) for i in lines[3].split()] plane = np.asarray(lines) # Ensure normal is always facing up, this is in the rectified camera coordinate if plane[1] > 0: plane = -plane norm = np.linalg.norm(plane[0:3]) plane = plane / norm return plane @staticmethod def get_fov_flag(pts_rect, img_shape, calib): """ Args: pts_rect: img_shape: calib: Returns: """ pts_img, pts_rect_depth = calib.rect_to_img(pts_rect) val_flag_1 = np.logical_and(pts_img[:, 0] >= 0, pts_img[:, 0] < img_shape[1]) val_flag_2 = np.logical_and(pts_img[:, 1] >= 0, pts_img[:, 1] < img_shape[0]) val_flag_merge = np.logical_and(val_flag_1, val_flag_2) pts_valid_flag = np.logical_and(val_flag_merge, pts_rect_depth >= 0) return pts_valid_flag def get_infos(self, num_workers=4, has_label=True, count_inside_pts=True, sample_id_list=None): import concurrent.futures as futures def process_single_scene(sample_idx): print('%s sample_idx: %s' % (self.split, sample_idx)) info = {} pc_info = {'num_features': 4, 'lidar_idx': sample_idx} info['point_cloud'] = pc_info image_info = {'image_idx': sample_idx, 'image_shape': self.get_image_shape(sample_idx)} info['image'] = image_info calib = self.get_calib(sample_idx) P2 = np.concatenate([calib.P2, np.array([[0., 0., 0., 1.]])], axis=0) R0_4x4 = np.zeros([4, 4], dtype=calib.R0.dtype) R0_4x4[3, 3] = 1. R0_4x4[:3, :3] = calib.R0 V2C_4x4 = np.concatenate([calib.V2C, np.array([[0., 0., 0., 1.]])], axis=0) calib_info = {'P2': P2, 'R0_rect': R0_4x4, 'Tr_velo_to_cam': V2C_4x4} info['calib'] = calib_info if has_label: obj_list = self.get_label(sample_idx) annotations = {} annotations['name'] = np.array([obj.cls_type for obj in obj_list]) annotations['truncated'] = np.array([obj.truncation for obj in obj_list]) annotations['occluded'] = np.array([obj.occlusion for obj in obj_list]) annotations['alpha'] = np.array([obj.alpha for obj in obj_list]) annotations['bbox'] = np.concatenate([obj.box2d.reshape(1, 4) for obj in obj_list], axis=0) annotations['dimensions'] = np.array([[obj.l, obj.h, obj.w] for obj in obj_list]) # lhw(camera) format annotations['location'] = np.concatenate([obj.loc.reshape(1, 3) for obj in obj_list], axis=0) annotations['rotation_y'] = np.array([obj.ry for obj in obj_list]) annotations['score'] = np.array([obj.score for obj in obj_list]) annotations['difficulty'] = np.array([obj.level for obj in obj_list], np.int32) num_objects = len([obj.cls_type for obj in obj_list if obj.cls_type != 'DontCare']) num_gt = len(annotations['name']) index = list(range(num_objects)) + [-1] * (num_gt - num_objects) annotations['index'] = np.array(index, dtype=np.int32) loc = annotations['location'][:num_objects] dims = annotations['dimensions'][:num_objects] rots = annotations['rotation_y'][:num_objects] loc_lidar = calib.rect_to_lidar(loc) l, h, w = dims[:, 0:1], dims[:, 1:2], dims[:, 2:3] loc_lidar[:, 2] += h[:, 0] / 2 gt_boxes_lidar = np.concatenate([loc_lidar, l, w, h, -(np.pi / 2 + rots[..., np.newaxis])], axis=1) annotations['gt_boxes_lidar'] = gt_boxes_lidar info['annos'] = annotations if count_inside_pts: points = self.get_lidar(sample_idx) calib = self.get_calib(sample_idx) pts_rect = calib.lidar_to_rect(points[:, 0:3]) fov_flag = self.get_fov_flag(pts_rect, info['image']['image_shape'], calib) pts_fov = points[fov_flag] corners_lidar = box_utils.boxes_to_corners_3d(gt_boxes_lidar) num_points_in_gt = -np.ones(num_gt, dtype=np.int32) for k in range(num_objects): flag = box_utils.in_hull(pts_fov[:, 0:3], corners_lidar[k]) num_points_in_gt[k] = flag.sum() annotations['num_points_in_gt'] = num_points_in_gt return info sample_id_list = sample_id_list if sample_id_list is not None else self.sample_id_list with futures.ThreadPoolExecutor(num_workers) as executor: infos = executor.map(process_single_scene, sample_id_list) return list(infos) def create_groundtruth_database(self, info_path=None, used_classes=None, split='train'): import torch database_save_path = Path(self.root_path) / ('gt_database_mm' if split == 'train' else ('gt_database_%s_mm' % split)) db_info_save_path = Path(self.root_path) / ('kitti_dbinfos_%s_mm.pkl' % split) database_save_path.mkdir(parents=True, exist_ok=True) all_db_infos = {} with open(info_path, 'rb') as f: infos = pickle.load(f) for k in range(len(infos)): print('gt_database sample: %d/%d' % (k + 1, len(infos))) info = infos[k] sample_idx = info['point_cloud']['lidar_idx'] points = self.get_lidar_mm(sample_idx) annos = info['annos'] names = annos['name'] difficulty = annos['difficulty'] bbox = annos['bbox'] gt_boxes = annos['gt_boxes_lidar'] num_obj = gt_boxes.shape[0] point_indices = roiaware_pool3d_utils.points_in_boxes_cpu( torch.from_numpy(points[:, 0:3]), torch.from_numpy(gt_boxes) ).numpy() # (nboxes, npoints) for i in range(num_obj): filename = '%s_%s_%d.bin' % (sample_idx, names[i], i) filepath = database_save_path / filename gt_points = points[point_indices[i] > 0] gt_points[:, :3] -= gt_boxes[i, :3] with open(filepath, 'w') as f: gt_points.tofile(f) shape = gt_points[gt_points[:, -1]==2].shape[0] if (used_classes is None) or names[i] in used_classes: db_path = str(filepath.relative_to(self.root_path)) # gt_database/xxxxx.bin db_info = {'name': names[i], 'path': db_path, 'image_idx': sample_idx, 'gt_idx': i, 'box3d_lidar': gt_boxes[i], 'num_points_in_gt': shape, 'difficulty': difficulty[i], 'bbox': bbox[i], 'score': annos['score'][i]} if names[i] in all_db_infos: all_db_infos[names[i]].append(db_info) else: all_db_infos[names[i]] = [db_info] for k, v in all_db_infos.items(): print('Database %s: %d' % (k, len(v))) with open(db_info_save_path, 'wb') as f: pickle.dump(all_db_infos, f) return all_db_infos #staticmethod def generate_prediction_dicts(self,batch_dict, pred_dicts, class_names, output_path=None): """ Args: batch_dict: frame_id: pred_dicts: list of pred_dicts pred_boxes: (N, 7), Tensor pred_scores: (N), Tensor pred_labels: (N), Tensor class_names: output_path: Returns: """ def get_template_prediction(num_samples): ret_dict = { 'name': np.zeros(num_samples), 'truncated': np.zeros(num_samples), 'occluded': np.zeros(num_samples), 'alpha': np.zeros(num_samples), 'bbox': np.zeros([num_samples, 4]), 'dimensions': np.zeros([num_samples, 3]), 'location': np.zeros([num_samples, 3]), 'rotation_y': np.zeros(num_samples), 'score': np.zeros(num_samples), 'boxes_lidar': np.zeros([num_samples, 7]) } return ret_dict def generate_single_sample_dict(batch_index, box_dict): pred_scores = box_dict['pred_scores'].cpu().numpy() pred_boxes = box_dict['pred_boxes'].cpu().numpy() pred_labels = box_dict['pred_labels'].cpu().numpy() if 'WBF' in box_dict: pred_labels,pred_scores,pred_boxes = model_nms_utils.compute_WBF(pred_labels,pred_scores,pred_boxes) pred_dict = get_template_prediction(pred_scores.shape[0]) if pred_scores.shape[0] == 0: return pred_dict calib = batch_dict['calib'][batch_index] image_shape = batch_dict['image_shape'][batch_index] pred_boxes_camera = box_utils.boxes3d_lidar_to_kitti_camera(pred_boxes, calib) pred_boxes_img = box_utils.boxes3d_kitti_camera_to_imageboxes( pred_boxes_camera, calib, image_shape=image_shape ) pred_dict['name'] = np.array(class_names)[pred_labels - 1] pred_dict['alpha'] = -np.arctan2(-pred_boxes[:, 1], pred_boxes[:, 0]) + pred_boxes_camera[:, 6] pred_dict['bbox'] = pred_boxes_img height = pred_dict['bbox'][:, 3] - pred_dict['bbox'][:, 1] height_mask = height<25 pred_dict['bbox'][height_mask, 3] +=2 pred_dict['dimensions'] = pred_boxes_camera[:, 3:6] pred_dict['location'] = pred_boxes_camera[:, 0:3] pred_dict['rotation_y'] = pred_boxes_camera[:, 6] pred_dict['score'] = pred_scores pred_dict['boxes_lidar'] = pred_boxes return pred_dict annos = [] for index, box_dict in enumerate(pred_dicts): frame_id = batch_dict['frame_id'][index] single_pred_dict = generate_single_sample_dict(index, box_dict) single_pred_dict['frame_id'] = frame_id annos.append(single_pred_dict) if output_path is not None: cur_det_file = output_path / ('%s.txt' % frame_id) with open(cur_det_file, 'w') as f: bbox = single_pred_dict['bbox'] loc = single_pred_dict['location'] dims = single_pred_dict['dimensions'] # lhw -> hwl for idx in range(len(bbox)): print('%s -1 -1 %.4f %.4f %.4f %.4f %.4f %.4f %.4f %.4f %.4f %.4f %.4f %.4f %.4f' % (single_pred_dict['name'][idx], single_pred_dict['alpha'][idx], bbox[idx][0], bbox[idx][1], bbox[idx][2], bbox[idx][3], dims[idx][1], dims[idx][2], dims[idx][0], loc[idx][0], loc[idx][1], loc[idx][2], single_pred_dict['rotation_y'][idx], single_pred_dict['score'][idx]), file=f) return annos def evaluation(self, det_annos, class_names, **kwargs): if 'annos' not in self.kitti_infos[0].keys(): return None, {} from .kitti_object_eval_python import eval as kitti_eval eval_det_annos = copy.deepcopy(det_annos) eval_gt_annos = [copy.deepcopy(info['annos']) for info in self.kitti_infos] ap_result_str, ap_dict = kitti_eval.get_official_eval_result(eval_gt_annos, eval_det_annos, class_names) return ap_result_str, ap_dict def __len__(self): if self._merge_all_iters_to_one_epoch: return len(self.kitti_infos) * self.total_epochs return len(self.kitti_infos) def __getitem__(self, index): # index = 4 if self._merge_all_iters_to_one_epoch: index = index % len(self.kitti_infos) info = copy.deepcopy(self.kitti_infos[index]) sample_idx = info['point_cloud']['lidar_idx'] points = self.get_lidar_mm(sample_idx) calib = self.get_calib(sample_idx) img_shape = info['image']['image_shape'] if self.dataset_cfg.FOV_POINTS_ONLY: pts_rect = calib.lidar_to_rect(points[:, 0:3]) fov_flag = self.get_fov_flag(pts_rect, img_shape, calib) points = points[fov_flag] input_dict = { 'points': points, 'frame_id': sample_idx, 'calib': calib, } input_dict.update({ 'mm': np.ones(shape=(1, 1)) }) if 'annos' in info: annos = info['annos'] annos = common_utils.drop_info_with_name(annos, name='DontCare') loc, dims, rots = annos['location'], annos['dimensions'], annos['rotation_y'] gt_names = annos['name'] if (self.dataset_cfg.get('USE_VAN', None) is True) and (self.training is True): gt_names = np.array(['Car' if gt_names[i]=='Van' else gt_names[i] for i in range(len(gt_names))]) gt_boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]], axis=1).astype(np.float32) gt_boxes_lidar = box_utils.boxes3d_kitti_camera_to_lidar(gt_boxes_camera, calib) if self.training and 'num_points_in_gt' in annos: nmask = annos['num_points_in_gt']>0 annos['num_points_in_gt'] = annos['num_points_in_gt'][nmask] gt_names = gt_names[nmask] gt_boxes_lidar = gt_boxes_lidar[nmask] input_dict.update({ 'gt_names': gt_names, 'gt_boxes': gt_boxes_lidar }) road_plane = self.get_road_plane(sample_idx) if road_plane is not None: input_dict['road_plane'] = road_plane data_dict = self.prepare_data(data_dict=input_dict) data_dict['image_shape'] = img_shape data_dict['calib'] = calib return data_dict def create_kitti_infos(dataset_cfg, class_names, data_path, save_path, workers=4): dataset = KittiDatasetMM(dataset_cfg=dataset_cfg, class_names=class_names, root_path=data_path, training=False) train_split, val_split, trainval_split = 'train', 'val', 'trainval' train_filename = save_path / ('kitti_infos_%s.pkl' % train_split) val_filename = save_path / ('kitti_infos_%s.pkl' % val_split) trainval_filename = save_path / 'kitti_infos_trainval.pkl' test_filename = save_path / 'kitti_infos_test.pkl' print('---------------Start to generate data infos---------------') ''' dataset.set_split(train_split) kitti_infos_train = dataset.get_infos(num_workers=workers, has_label=True, count_inside_pts=True) with open(train_filename, 'wb') as f: pickle.dump(kitti_infos_train, f) print('Kitti info train file is saved to %s' % train_filename) dataset.set_split(val_split) kitti_infos_val = dataset.get_infos(num_workers=workers, has_label=True, count_inside_pts=True) with open(val_filename, 'wb') as f: pickle.dump(kitti_infos_val, f) print('Kitti info val file is saved to %s' % val_filename) with open(trainval_filename, 'wb') as f: pickle.dump(kitti_infos_train + kitti_infos_val, f) print('Kitti info trainval file is saved to %s' % trainval_filename) dataset.set_split('test') kitti_infos_test = dataset.get_infos(num_workers=workers, has_label=False, count_inside_pts=False) with open(test_filename, 'wb') as f: pickle.dump(kitti_infos_test, f) print('Kitti info test file is saved to %s' % test_filename) ''' print('---------------Start create groundtruth database for data augmentation---------------') dataset.set_split('train') dataset.create_groundtruth_database(train_filename, split='train') print('---------------Data preparation Done---------------') if __name__ == '__main__': import sys if sys.argv.__len__() > 1 and sys.argv[1] == 'create_kitti_infos': import yaml from pathlib import Path from easydict import EasyDict dataset_cfg = EasyDict(yaml.safe_load(open(sys.argv[2]))) ROOT_DIR = (Path(__file__).resolve().parent / '../../../').resolve() create_kitti_infos( dataset_cfg=dataset_cfg, class_names=['Car', 'Pedestrian', 'Cyclist'], data_path=ROOT_DIR / 'data' / 'kitti', save_path=ROOT_DIR / 'data' / 'kitti' ) ================================================ FILE: pcdet/datasets/kitti/kitti_object_eval_python/LICENSE ================================================ MIT License Copyright (c) 2018 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: pcdet/datasets/kitti/kitti_object_eval_python/README.md ================================================ # kitti-object-eval-python **Note**: This is borrowed from [traveller59/kitti-object-eval-python](https://github.com/traveller59/kitti-object-eval-python) Fast kitti object detection eval in python(finish eval in less than 10 second), support 2d/bev/3d/aos. , support coco-style AP. If you use command line interface, numba need some time to compile jit functions. ## Dependencies Only support python 3.6+, need `numpy`, `skimage`, `numba`, `fire`. If you have Anaconda, just install `cudatoolkit` in anaconda. Otherwise, please reference to this [page](https://github.com/numba/numba#custom-python-environments) to set up llvm and cuda for numba. * Install by conda: ``` conda install -c numba cudatoolkit=x.x (8.0, 9.0, 9.1, depend on your environment) ``` ## Usage * commandline interface: ``` python evaluate.py evaluate --label_path=/path/to/your_gt_label_folder --result_path=/path/to/your_result_folder --label_split_file=/path/to/val.txt --current_class=0 --coco=False ``` * python interface: ```Python import kitti_common as kitti from eval import get_official_eval_result, get_coco_eval_result def _read_imageset_file(path): with open(path, 'r') as f: lines = f.readlines() return [int(line) for line in lines] det_path = "/path/to/your_result_folder" dt_annos = kitti.get_label_annos(det_path) gt_path = "/path/to/your_gt_label_folder" gt_split_file = "/path/to/val.txt" # from https://xiaozhichen.github.io/files/mv3d/imagesets.tar.gz val_image_ids = _read_imageset_file(gt_split_file) gt_annos = kitti.get_label_annos(gt_path, val_image_ids) print(get_official_eval_result(gt_annos, dt_annos, 0)) # 6s in my computer print(get_coco_eval_result(gt_annos, dt_annos, 0)) # 18s in my computer ``` ================================================ FILE: pcdet/datasets/kitti/kitti_object_eval_python/eval.py ================================================ import io as sysio import numba import numpy as np from .rotate_iou import rotate_iou_gpu_eval import pickle @numba.jit def get_thresholds(scores: np.ndarray, num_gt, num_sample_pts=41): scores.sort() scores = scores[::-1] current_recall = 0 thresholds = [] for i, score in enumerate(scores): l_recall = (i + 1) / num_gt if i < (len(scores) - 1): r_recall = (i + 2) / num_gt else: r_recall = l_recall if (((r_recall - current_recall) < (current_recall - l_recall)) and (i < (len(scores) - 1))): continue # recall = l_recall thresholds.append(score) current_recall += 1 / (num_sample_pts - 1.0) return thresholds def clean_data(gt_anno, dt_anno, current_class, difficulty): CLASS_NAMES = ['car', 'pedestrian', 'cyclist', 'van', 'person_sitting', 'truck'] MIN_HEIGHT = [40, 25, 25] MAX_OCCLUSION = [0, 1, 2] MAX_TRUNCATION = [0.15, 0.3, 0.5] dc_bboxes, ignored_gt, ignored_dt = [], [], [] current_cls_name = CLASS_NAMES[current_class].lower() num_gt = len(gt_anno["name"]) num_dt = len(dt_anno["name"]) num_valid_gt = 0 for i in range(num_gt): bbox = gt_anno["bbox"][i] gt_name = gt_anno["name"][i].lower() height = bbox[3] - bbox[1] valid_class = -1 if (gt_name == current_cls_name): valid_class = 1 elif (current_cls_name == "Pedestrian".lower() and "Person_sitting".lower() == gt_name): valid_class = 0 elif (current_cls_name == "Car".lower() and "Van".lower() == gt_name): valid_class = 0 else: valid_class = -1 ignore = False if ((gt_anno["occluded"][i] > MAX_OCCLUSION[difficulty]) or (gt_anno["truncated"][i] > MAX_TRUNCATION[difficulty]) or (height <= MIN_HEIGHT[difficulty])): # if gt_anno["difficulty"][i] > difficulty or gt_anno["difficulty"][i] == -1: ignore = True if valid_class == 1 and not ignore: ignored_gt.append(0) num_valid_gt += 1 elif (valid_class == 0 or (ignore and (valid_class == 1))): ignored_gt.append(1) else: ignored_gt.append(-1) # for i in range(num_gt): if gt_anno["name"][i] == "DontCare": dc_bboxes.append(gt_anno["bbox"][i]) for i in range(num_dt): if (dt_anno["name"][i].lower() == current_cls_name): valid_class = 1 else: valid_class = -1 height = abs(dt_anno["bbox"][i, 3] - dt_anno["bbox"][i, 1]) if height < MIN_HEIGHT[difficulty]: ignored_dt.append(1) elif valid_class == 1: ignored_dt.append(0) else: ignored_dt.append(-1) return num_valid_gt, ignored_gt, ignored_dt, dc_bboxes @numba.jit(nopython=True) def image_box_overlap(boxes, query_boxes, criterion=-1): N = boxes.shape[0] K = query_boxes.shape[0] overlaps = np.zeros((N, K), dtype=boxes.dtype) for k in range(K): qbox_area = ((query_boxes[k, 2] - query_boxes[k, 0]) * (query_boxes[k, 3] - query_boxes[k, 1])) for n in range(N): iw = (min(boxes[n, 2], query_boxes[k, 2]) - max(boxes[n, 0], query_boxes[k, 0])) if iw > 0: ih = (min(boxes[n, 3], query_boxes[k, 3]) - max(boxes[n, 1], query_boxes[k, 1])) if ih > 0: if criterion == -1: ua = ( (boxes[n, 2] - boxes[n, 0]) * (boxes[n, 3] - boxes[n, 1]) + qbox_area - iw * ih) elif criterion == 0: ua = ((boxes[n, 2] - boxes[n, 0]) * (boxes[n, 3] - boxes[n, 1])) elif criterion == 1: ua = qbox_area else: ua = 1.0 overlaps[n, k] = iw * ih / ua return overlaps def bev_box_overlap(boxes, qboxes, criterion=-1): riou = rotate_iou_gpu_eval(boxes, qboxes, criterion) return riou @numba.jit(nopython=True, parallel=True) def d3_box_overlap_kernel(boxes, qboxes, rinc, criterion=-1): # ONLY support overlap in CAMERA, not lider. N, K = boxes.shape[0], qboxes.shape[0] for i in range(N): for j in range(K): if rinc[i, j] > 0: # iw = (min(boxes[i, 1] + boxes[i, 4], qboxes[j, 1] + # qboxes[j, 4]) - max(boxes[i, 1], qboxes[j, 1])) iw = (min(boxes[i, 1], qboxes[j, 1]) - max( boxes[i, 1] - boxes[i, 4], qboxes[j, 1] - qboxes[j, 4])) if iw > 0: area1 = boxes[i, 3] * boxes[i, 4] * boxes[i, 5] area2 = qboxes[j, 3] * qboxes[j, 4] * qboxes[j, 5] inc = iw * rinc[i, j] if criterion == -1: ua = (area1 + area2 - inc) elif criterion == 0: ua = area1 elif criterion == 1: ua = area2 else: ua = inc rinc[i, j] = inc / ua else: rinc[i, j] = 0.0 def d3_box_overlap(boxes, qboxes, criterion=-1): rinc = rotate_iou_gpu_eval(boxes[:, [0, 2, 3, 5, 6]], qboxes[:, [0, 2, 3, 5, 6]], 2) d3_box_overlap_kernel(boxes, qboxes, rinc, criterion) return rinc @numba.jit(nopython=True) def compute_statistics_jit(overlaps, gt_datas, dt_datas, ignored_gt, ignored_det, dc_bboxes, metric, min_overlap, thresh=0, compute_fp=False, compute_aos=False): det_size = dt_datas.shape[0] gt_size = gt_datas.shape[0] dt_scores = dt_datas[:, -1] dt_alphas = dt_datas[:, 4] gt_alphas = gt_datas[:, 4] dt_bboxes = dt_datas[:, :4] gt_bboxes = gt_datas[:, :4] assigned_detection = [False] * det_size ignored_threshold = [False] * det_size if compute_fp: for i in range(det_size): if (dt_scores[i] < thresh): ignored_threshold[i] = True NO_DETECTION = -10000000 tp, fp, fn, similarity = 0, 0, 0, 0 # thresholds = [0.0] # delta = [0.0] thresholds = np.zeros((gt_size, )) thresh_idx = 0 delta = np.zeros((gt_size, )) delta_idx = 0 for i in range(gt_size): if ignored_gt[i] == -1: continue det_idx = -1 valid_detection = NO_DETECTION max_overlap = 0 assigned_ignored_det = False for j in range(det_size): if (ignored_det[j] == -1): continue if (assigned_detection[j]): continue if (ignored_threshold[j]): continue overlap = overlaps[j, i] dt_score = dt_scores[j] if (not compute_fp and (overlap > min_overlap) and dt_score > valid_detection): det_idx = j valid_detection = dt_score elif (compute_fp and (overlap > min_overlap) and (overlap > max_overlap or assigned_ignored_det) and ignored_det[j] == 0): max_overlap = overlap det_idx = j valid_detection = 1 assigned_ignored_det = False elif (compute_fp and (overlap > min_overlap) and (valid_detection == NO_DETECTION) and ignored_det[j] == 1): det_idx = j valid_detection = 1 assigned_ignored_det = True if (valid_detection == NO_DETECTION) and ignored_gt[i] == 0: fn += 1 elif ((valid_detection != NO_DETECTION) and (ignored_gt[i] == 1 or ignored_det[det_idx] == 1)): assigned_detection[det_idx] = True elif valid_detection != NO_DETECTION: tp += 1 # thresholds.append(dt_scores[det_idx]) thresholds[thresh_idx] = dt_scores[det_idx] thresh_idx += 1 if compute_aos: # delta.append(gt_alphas[i] - dt_alphas[det_idx]) delta[delta_idx] = gt_alphas[i] - dt_alphas[det_idx] delta_idx += 1 assigned_detection[det_idx] = True if compute_fp: for i in range(det_size): if (not (assigned_detection[i] or ignored_det[i] == -1 or ignored_det[i] == 1 or ignored_threshold[i])): fp += 1 nstuff = 0 if metric == 0: overlaps_dt_dc = image_box_overlap(dt_bboxes, dc_bboxes, 0) for i in range(dc_bboxes.shape[0]): for j in range(det_size): if (assigned_detection[j]): continue if (ignored_det[j] == -1 or ignored_det[j] == 1): continue if (ignored_threshold[j]): continue if overlaps_dt_dc[j, i] > min_overlap: assigned_detection[j] = True nstuff += 1 fp -= nstuff if compute_aos: tmp = np.zeros((fp + delta_idx, )) # tmp = [0] * fp for i in range(delta_idx): tmp[i + fp] = (1.0 + np.cos(delta[i])) / 2.0 # tmp.append((1.0 + np.cos(delta[i])) / 2.0) # assert len(tmp) == fp + tp # assert len(delta) == tp if tp > 0 or fp > 0: similarity = np.sum(tmp) else: similarity = -1 return tp, fp, fn, similarity, thresholds[:thresh_idx] def get_split_parts(num, num_part): same_part = num // num_part remain_num = num % num_part if same_part == 0: return [num] if remain_num == 0: return [same_part] * num_part else: return [same_part] * num_part + [remain_num] @numba.jit(nopython=True) def fused_compute_statistics(overlaps, pr, gt_nums, dt_nums, dc_nums, gt_datas, dt_datas, dontcares, ignored_gts, ignored_dets, metric, min_overlap, thresholds, compute_aos=False): gt_num = 0 dt_num = 0 dc_num = 0 for i in range(gt_nums.shape[0]): for t, thresh in enumerate(thresholds): overlap = overlaps[dt_num:dt_num + dt_nums[i], gt_num: gt_num + gt_nums[i]] gt_data = gt_datas[gt_num:gt_num + gt_nums[i]] dt_data = dt_datas[dt_num:dt_num + dt_nums[i]] ignored_gt = ignored_gts[gt_num:gt_num + gt_nums[i]] ignored_det = ignored_dets[dt_num:dt_num + dt_nums[i]] dontcare = dontcares[dc_num:dc_num + dc_nums[i]] tp, fp, fn, similarity, _ = compute_statistics_jit( overlap, gt_data, dt_data, ignored_gt, ignored_det, dontcare, metric, min_overlap=min_overlap, thresh=thresh, compute_fp=True, compute_aos=compute_aos) pr[t, 0] += tp pr[t, 1] += fp pr[t, 2] += fn if similarity != -1: pr[t, 3] += similarity gt_num += gt_nums[i] dt_num += dt_nums[i] dc_num += dc_nums[i] def calculate_iou_partly(gt_annos, dt_annos, metric, num_parts=50): """fast iou algorithm. this function can be used independently to do result analysis. Must be used in CAMERA coordinate system. Args: gt_annos: dict, must from get_label_annos() in kitti_common.py dt_annos: dict, must from get_label_annos() in kitti_common.py metric: eval type. 0: bbox, 1: bev, 2: 3d num_parts: int. a parameter for fast calculate algorithm """ assert len(gt_annos) == len(dt_annos) total_dt_num = np.stack([len(a["name"]) for a in dt_annos], 0) total_gt_num = np.stack([len(a["name"]) for a in gt_annos], 0) num_examples = len(gt_annos) split_parts = get_split_parts(num_examples, num_parts) parted_overlaps = [] example_idx = 0 for num_part in split_parts: gt_annos_part = gt_annos[example_idx:example_idx + num_part] dt_annos_part = dt_annos[example_idx:example_idx + num_part] if metric == 0: gt_boxes = np.concatenate([a["bbox"] for a in gt_annos_part], 0) dt_boxes = np.concatenate([a["bbox"] for a in dt_annos_part], 0) overlap_part = image_box_overlap(gt_boxes, dt_boxes) elif metric == 1: loc = np.concatenate( [a["location"][:, [0, 2]] for a in gt_annos_part], 0) dims = np.concatenate( [a["dimensions"][:, [0, 2]] for a in gt_annos_part], 0) rots = np.concatenate([a["rotation_y"] for a in gt_annos_part], 0) gt_boxes = np.concatenate( [loc, dims, rots[..., np.newaxis]], axis=1) loc = np.concatenate( [a["location"][:, [0, 2]] for a in dt_annos_part], 0) dims = np.concatenate( [a["dimensions"][:, [0, 2]] for a in dt_annos_part], 0) rots = np.concatenate([a["rotation_y"] for a in dt_annos_part], 0) dt_boxes = np.concatenate( [loc, dims, rots[..., np.newaxis]], axis=1) overlap_part = bev_box_overlap(gt_boxes, dt_boxes).astype( np.float64) elif metric == 2: loc = np.concatenate([a["location"] for a in gt_annos_part], 0) dims = np.concatenate([a["dimensions"] for a in gt_annos_part], 0) rots = np.concatenate([a["rotation_y"] for a in gt_annos_part], 0) gt_boxes = np.concatenate( [loc, dims, rots[..., np.newaxis]], axis=1) loc = np.concatenate([a["location"] for a in dt_annos_part], 0) dims = np.concatenate([a["dimensions"] for a in dt_annos_part], 0) rots = np.concatenate([a["rotation_y"] for a in dt_annos_part], 0) dt_boxes = np.concatenate( [loc, dims, rots[..., np.newaxis]], axis=1) overlap_part = d3_box_overlap(gt_boxes, dt_boxes).astype( np.float64) else: raise ValueError("unknown metric") parted_overlaps.append(overlap_part) example_idx += num_part overlaps = [] example_idx = 0 for j, num_part in enumerate(split_parts): gt_annos_part = gt_annos[example_idx:example_idx + num_part] dt_annos_part = dt_annos[example_idx:example_idx + num_part] gt_num_idx, dt_num_idx = 0, 0 for i in range(num_part): gt_box_num = total_gt_num[example_idx + i] dt_box_num = total_dt_num[example_idx + i] overlaps.append( parted_overlaps[j][gt_num_idx:gt_num_idx + gt_box_num, dt_num_idx:dt_num_idx + dt_box_num]) gt_num_idx += gt_box_num dt_num_idx += dt_box_num example_idx += num_part return overlaps, parted_overlaps, total_gt_num, total_dt_num def _prepare_data(gt_annos, dt_annos, current_class, difficulty): gt_datas_list = [] dt_datas_list = [] total_dc_num = [] ignored_gts, ignored_dets, dontcares = [], [], [] total_num_valid_gt = 0 for i in range(len(gt_annos)): rets = clean_data(gt_annos[i], dt_annos[i], current_class, difficulty) num_valid_gt, ignored_gt, ignored_det, dc_bboxes = rets ignored_gts.append(np.array(ignored_gt, dtype=np.int64)) ignored_dets.append(np.array(ignored_det, dtype=np.int64)) if len(dc_bboxes) == 0: dc_bboxes = np.zeros((0, 4)).astype(np.float64) else: dc_bboxes = np.stack(dc_bboxes, 0).astype(np.float64) total_dc_num.append(dc_bboxes.shape[0]) dontcares.append(dc_bboxes) total_num_valid_gt += num_valid_gt gt_datas = np.concatenate( [gt_annos[i]["bbox"], gt_annos[i]["alpha"][..., np.newaxis]], 1) dt_datas = np.concatenate([ dt_annos[i]["bbox"], dt_annos[i]["alpha"][..., np.newaxis], dt_annos[i]["score"][..., np.newaxis] ], 1) gt_datas_list.append(gt_datas) dt_datas_list.append(dt_datas) total_dc_num = np.stack(total_dc_num, axis=0) return (gt_datas_list, dt_datas_list, ignored_gts, ignored_dets, dontcares, total_dc_num, total_num_valid_gt) def eval_class(gt_annos, dt_annos, current_classes, difficultys, metric, min_overlaps, compute_aos=False, num_parts=100): """Kitti eval. support 2d/bev/3d/aos eval. support 0.5:0.05:0.95 coco AP. Args: gt_annos: dict, must from get_label_annos() in kitti_common.py dt_annos: dict, must from get_label_annos() in kitti_common.py current_classes: list of int, 0: car, 1: pedestrian, 2: cyclist difficultys: list of int. eval difficulty, 0: easy, 1: normal, 2: hard metric: eval type. 0: bbox, 1: bev, 2: 3d min_overlaps: float, min overlap. format: [num_overlap, metric, class]. num_parts: int. a parameter for fast calculate algorithm Returns: dict of recall, precision and aos """ assert len(gt_annos) == len(dt_annos) num_examples = len(gt_annos) split_parts = get_split_parts(num_examples, num_parts) rets = calculate_iou_partly(dt_annos, gt_annos, metric, num_parts) overlaps, parted_overlaps, total_dt_num, total_gt_num = rets N_SAMPLE_PTS = 41 num_minoverlap = len(min_overlaps) num_class = len(current_classes) num_difficulty = len(difficultys) precision = np.zeros( [num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS]) recall = np.zeros( [num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS]) aos = np.zeros([num_class, num_difficulty, num_minoverlap, N_SAMPLE_PTS]) for m, current_class in enumerate(current_classes): for l, difficulty in enumerate(difficultys): rets = _prepare_data(gt_annos, dt_annos, current_class, difficulty) (gt_datas_list, dt_datas_list, ignored_gts, ignored_dets, dontcares, total_dc_num, total_num_valid_gt) = rets for k, min_overlap in enumerate(min_overlaps[:, metric, m]): thresholdss = [] all_tp=0 all_fn = 0 for i in range(len(gt_annos)): rets = compute_statistics_jit( overlaps[i], gt_datas_list[i], dt_datas_list[i], ignored_gts[i], ignored_dets[i], dontcares[i], metric, min_overlap=min_overlap, thresh=0.0, compute_fp=False) tp, fp, fn, similarity, thresholds = rets all_tp+=tp all_fn += fn thresholdss += thresholds.tolist() thresholdss = np.array(thresholdss) thresholds = get_thresholds(thresholdss, total_num_valid_gt) thresholds = np.array(thresholds) pr = np.zeros([len(thresholds), 4]) idx = 0 for j, num_part in enumerate(split_parts): gt_datas_part = np.concatenate( gt_datas_list[idx:idx + num_part], 0) dt_datas_part = np.concatenate( dt_datas_list[idx:idx + num_part], 0) dc_datas_part = np.concatenate( dontcares[idx:idx + num_part], 0) ignored_dets_part = np.concatenate( ignored_dets[idx:idx + num_part], 0) ignored_gts_part = np.concatenate( ignored_gts[idx:idx + num_part], 0) fused_compute_statistics( parted_overlaps[j], pr, total_gt_num[idx:idx + num_part], total_dt_num[idx:idx + num_part], total_dc_num[idx:idx + num_part], gt_datas_part, dt_datas_part, dc_datas_part, ignored_gts_part, ignored_dets_part, metric, min_overlap=min_overlap, thresholds=thresholds, compute_aos=compute_aos) idx += num_part for i in range(len(thresholds)): recall[m, l, k, i] = pr[i, 0] / (pr[i, 0] + pr[i, 2]) precision[m, l, k, i] = pr[i, 0] / (pr[i, 0] + pr[i, 1]) if compute_aos: aos[m, l, k, i] = pr[i, 3] / (pr[i, 0] + pr[i, 1]) for i in range(len(thresholds)): precision[m, l, k, i] = np.max( precision[m, l, k, i:], axis=-1) #recall[m, l, k, i] = np.max(recall[m, l, k, i:], axis=-1) if compute_aos: aos[m, l, k, i] = np.max(aos[m, l, k, i:], axis=-1) ret_dict = { "recall": recall, "precision": precision, "orientation": aos, } return ret_dict def get_mAP(prec): sums = 0 for i in range(0, prec.shape[-1], 4): sums = sums + prec[..., i] return sums / 11 * 100 def get_mAP_R40(prec): sums = 0 for i in range(1, prec.shape[-1]): sums = sums + prec[..., i] return sums / 40 * 100 def print_str(value, *arg, sstream=None): if sstream is None: sstream = sysio.StringIO() sstream.truncate(0) sstream.seek(0) print(value, *arg, file=sstream) return sstream.getvalue() def do_eval(gt_annos, dt_annos, current_classes, min_overlaps, compute_aos=False, PR_detail_dict=None): # min_overlaps: [num_minoverlap, metric, num_class] difficultys = [0, 1, 2] ret = eval_class(gt_annos, dt_annos, current_classes, difficultys, 0, min_overlaps, compute_aos) # ret: [num_class, num_diff, num_minoverlap, num_sample_points] mAP_bbox = get_mAP(ret["precision"]) mAP_bbox_R40 = get_mAP_R40(ret["precision"]) if PR_detail_dict is not None: PR_detail_dict['bbox'] = ret['precision'] mAP_aos = mAP_aos_R40 = None if compute_aos: mAP_aos = get_mAP(ret["orientation"]) mAP_aos_R40 = get_mAP_R40(ret["orientation"]) if PR_detail_dict is not None: PR_detail_dict['aos'] = ret['orientation'] ret = eval_class(gt_annos, dt_annos, current_classes, difficultys, 1, min_overlaps) mAP_bev = get_mAP(ret["precision"]) mAP_bev_R40 = get_mAP_R40(ret["precision"]) if PR_detail_dict is not None: PR_detail_dict['bev'] = ret['precision'] ret = eval_class(gt_annos, dt_annos, current_classes, difficultys, 2, min_overlaps) mAP_3d = get_mAP(ret["precision"]) mAP_3d_R40 = get_mAP_R40(ret["precision"]) if PR_detail_dict is not None: PR_detail_dict['3d'] = ret['precision'] return mAP_bbox, mAP_bev, mAP_3d, mAP_aos, mAP_bbox_R40, mAP_bev_R40, mAP_3d_R40, mAP_aos_R40 def do_coco_style_eval(gt_annos, dt_annos, current_classes, overlap_ranges, compute_aos): # overlap_ranges: [range, metric, num_class] min_overlaps = np.zeros([10, *overlap_ranges.shape[1:]]) for i in range(overlap_ranges.shape[1]): for j in range(overlap_ranges.shape[2]): min_overlaps[:, i, j] = np.linspace(*overlap_ranges[:, i, j]) mAP_bbox, mAP_bev, mAP_3d, mAP_aos = do_eval( gt_annos, dt_annos, current_classes, min_overlaps, compute_aos) # ret: [num_class, num_diff, num_minoverlap] mAP_bbox = mAP_bbox.mean(-1) mAP_bev = mAP_bev.mean(-1) mAP_3d = mAP_3d.mean(-1) if mAP_aos is not None: mAP_aos = mAP_aos.mean(-1) return mAP_bbox, mAP_bev, mAP_3d, mAP_aos def get_official_eval_result(gt_annos, dt_annos, current_classes, PR_detail_dict=None): overlap_0_7 = np.array([[0.7, 0.5, 0.5, 0.7, 0.5, 0.7], [0.7, 0.5, 0.5, 0.7, 0.5, 0.7], [0.7, 0.5, 0.5, 0.7, 0.5, 0.7]]) overlap_0_5 = np.array([[0.7, 0.5, 0.5, 0.7, 0.5, 0.5], [0.5, 0.25, 0.25, 0.5, 0.25, 0.5], [0.5, 0.25, 0.25, 0.5, 0.25, 0.5]]) min_overlaps = np.stack([overlap_0_7, overlap_0_5], axis=0) # [2, 3, 5] class_to_name = { 0: 'Car', 1: 'Pedestrian', 2: 'Cyclist', 3: 'Van', 4: 'Person_sitting', 5: 'Truck' } name_to_class = {v: n for n, v in class_to_name.items()} if not isinstance(current_classes, (list, tuple)): current_classes = [current_classes] current_classes_int = [] for curcls in current_classes: if isinstance(curcls, str): current_classes_int.append(name_to_class[curcls]) else: current_classes_int.append(curcls) current_classes = current_classes_int min_overlaps = min_overlaps[:, :, current_classes] result = '' # check whether alpha is valid compute_aos = False for anno in dt_annos: if anno['alpha'].shape[0] != 0: if anno['alpha'][0] != -10: compute_aos = True break mAPbbox, mAPbev, mAP3d, mAPaos, mAPbbox_R40, mAPbev_R40, mAP3d_R40, mAPaos_R40 = do_eval( gt_annos, dt_annos, current_classes, min_overlaps, compute_aos, PR_detail_dict=PR_detail_dict) ret_dict = {} for j, curcls in enumerate(current_classes): # mAP threshold array: [num_minoverlap, metric, class] # mAP result: [num_class, num_diff, num_minoverlap] for i in range(min_overlaps.shape[0]): result += print_str( (f"{class_to_name[curcls]} " "AP@{:.2f}, {:.2f}, {:.2f}:".format(*min_overlaps[i, :, j]))) result += print_str((f"bbox AP:{mAPbbox[j, 0, i]:.4f}, " f"{mAPbbox[j, 1, i]:.4f}, " f"{mAPbbox[j, 2, i]:.4f}")) result += print_str((f"bev AP:{mAPbev[j, 0, i]:.4f}, " f"{mAPbev[j, 1, i]:.4f}, " f"{mAPbev[j, 2, i]:.4f}")) result += print_str((f"3d AP:{mAP3d[j, 0, i]:.4f}, " f"{mAP3d[j, 1, i]:.4f}, " f"{mAP3d[j, 2, i]:.4f}")) if compute_aos: result += print_str((f"aos AP:{mAPaos[j, 0, i]:.2f}, " f"{mAPaos[j, 1, i]:.2f}, " f"{mAPaos[j, 2, i]:.2f}")) # if i == 0: # ret_dict['%s_aos/easy' % class_to_name[curcls]] = mAPaos[j, 0, 0] # ret_dict['%s_aos/moderate' % class_to_name[curcls]] = mAPaos[j, 1, 0] # ret_dict['%s_aos/hard' % class_to_name[curcls]] = mAPaos[j, 2, 0] result += print_str( (f"{class_to_name[curcls]} " "AP_R40@{:.2f}, {:.2f}, {:.2f}:".format(*min_overlaps[i, :, j]))) result += print_str((f"bbox AP:{mAPbbox_R40[j, 0, i]:.4f}, " f"{mAPbbox_R40[j, 1, i]:.4f}, " f"{mAPbbox_R40[j, 2, i]:.4f}")) result += print_str((f"bev AP:{mAPbev_R40[j, 0, i]:.4f}, " f"{mAPbev_R40[j, 1, i]:.4f}, " f"{mAPbev_R40[j, 2, i]:.4f}")) result += print_str((f"3d AP:{mAP3d_R40[j, 0, i]:.4f}, " f"{mAP3d_R40[j, 1, i]:.4f}, " f"{mAP3d_R40[j, 2, i]:.4f}")) if compute_aos: result += print_str((f"aos AP:{mAPaos_R40[j, 0, i]:.2f}, " f"{mAPaos_R40[j, 1, i]:.2f}, " f"{mAPaos_R40[j, 2, i]:.2f}")) if i == 0: ret_dict['%s_aos/easy_R40' % class_to_name[curcls]] = mAPaos_R40[j, 0, 0] ret_dict['%s_aos/moderate_R40' % class_to_name[curcls]] = mAPaos_R40[j, 1, 0] ret_dict['%s_aos/hard_R40' % class_to_name[curcls]] = mAPaos_R40[j, 2, 0] if i == 0: # ret_dict['%s_3d/easy' % class_to_name[curcls]] = mAP3d[j, 0, 0] # ret_dict['%s_3d/moderate' % class_to_name[curcls]] = mAP3d[j, 1, 0] # ret_dict['%s_3d/hard' % class_to_name[curcls]] = mAP3d[j, 2, 0] # ret_dict['%s_bev/easy' % class_to_name[curcls]] = mAPbev[j, 0, 0] # ret_dict['%s_bev/moderate' % class_to_name[curcls]] = mAPbev[j, 1, 0] # ret_dict['%s_bev/hard' % class_to_name[curcls]] = mAPbev[j, 2, 0] # ret_dict['%s_image/easy' % class_to_name[curcls]] = mAPbbox[j, 0, 0] # ret_dict['%s_image/moderate' % class_to_name[curcls]] = mAPbbox[j, 1, 0] # ret_dict['%s_image/hard' % class_to_name[curcls]] = mAPbbox[j, 2, 0] ret_dict['%s_3d/easy_R40' % class_to_name[curcls]] = mAP3d_R40[j, 0, 0] ret_dict['%s_3d/moderate_R40' % class_to_name[curcls]] = mAP3d_R40[j, 1, 0] ret_dict['%s_3d/hard_R40' % class_to_name[curcls]] = mAP3d_R40[j, 2, 0] ret_dict['%s_bev/easy_R40' % class_to_name[curcls]] = mAPbev_R40[j, 0, 0] ret_dict['%s_bev/moderate_R40' % class_to_name[curcls]] = mAPbev_R40[j, 1, 0] ret_dict['%s_bev/hard_R40' % class_to_name[curcls]] = mAPbev_R40[j, 2, 0] ret_dict['%s_image/easy_R40' % class_to_name[curcls]] = mAPbbox_R40[j, 0, 0] ret_dict['%s_image/moderate_R40' % class_to_name[curcls]] = mAPbbox_R40[j, 1, 0] ret_dict['%s_image/hard_R40' % class_to_name[curcls]] = mAPbbox_R40[j, 2, 0] return result, ret_dict def get_coco_eval_result(gt_annos, dt_annos, current_classes): class_to_name = { 0: 'Car', 1: 'Pedestrian', 2: 'Cyclist', 3: 'Van', 4: 'Person_sitting', } class_to_range = { 0: [0.5, 0.95, 10], 1: [0.25, 0.7, 10], 2: [0.25, 0.7, 10], 3: [0.5, 0.95, 10], 4: [0.25, 0.7, 10], } name_to_class = {v: n for n, v in class_to_name.items()} if not isinstance(current_classes, (list, tuple)): current_classes = [current_classes] current_classes_int = [] for curcls in current_classes: if isinstance(curcls, str): current_classes_int.append(name_to_class[curcls]) else: current_classes_int.append(curcls) current_classes = current_classes_int overlap_ranges = np.zeros([3, 3, len(current_classes)]) for i, curcls in enumerate(current_classes): overlap_ranges[:, :, i] = np.array( class_to_range[curcls])[:, np.newaxis] result = '' # check whether alpha is valid compute_aos = False for anno in dt_annos: if anno['alpha'].shape[0] != 0: if anno['alpha'][0] != -10: compute_aos = True break mAPbbox, mAPbev, mAP3d, mAPaos = do_coco_style_eval( gt_annos, dt_annos, current_classes, overlap_ranges, compute_aos) for j, curcls in enumerate(current_classes): # mAP threshold array: [num_minoverlap, metric, class] # mAP result: [num_class, num_diff, num_minoverlap] o_range = np.array(class_to_range[curcls])[[0, 2, 1]] o_range[1] = (o_range[2] - o_range[0]) / (o_range[1] - 1) result += print_str((f"{class_to_name[curcls]} " "coco AP@{:.2f}:{:.2f}:{:.2f}:".format(*o_range))) result += print_str((f"bbox AP:{mAPbbox[j, 0]:.2f}, " f"{mAPbbox[j, 1]:.2f}, " f"{mAPbbox[j, 2]:.2f}")) result += print_str((f"bev AP:{mAPbev[j, 0]:.2f}, " f"{mAPbev[j, 1]:.2f}, " f"{mAPbev[j, 2]:.2f}")) result += print_str((f"3d AP:{mAP3d[j, 0]:.2f}, " f"{mAP3d[j, 1]:.2f}, " f"{mAP3d[j, 2]:.2f}")) if compute_aos: result += print_str((f"aos AP:{mAPaos[j, 0]:.2f}, " f"{mAPaos[j, 1]:.2f}, " f"{mAPaos[j, 2]:.2f}")) return result ================================================ FILE: pcdet/datasets/kitti/kitti_object_eval_python/evaluate.py ================================================ import time import fire import pcdet.datasets.kitti.kitti_object_eval_python.kitti_common as kitti from pcdet.datasets.kitti.kitti_object_eval_python.eval import get_coco_eval_result, get_official_eval_result import pickle def _read_imageset_file(path): with open(path, 'r') as f: lines = f.readlines() return [int(line) for line in lines] def evaluate(label_path, result_path, label_split_file, current_class=[0,1,2], coco=False, score_thresh=-1): dt_annos = pickle.load(open(result_path,'rb'))#kitti.get_label_annos(result_path) if score_thresh > 0: dt_annos = kitti.filter_annos_low_score(dt_annos, score_thresh) val_image_ids = _read_imageset_file(label_split_file) gt_annos = kitti.get_label_annos(label_path, val_image_ids) if coco: return get_coco_eval_result(gt_annos, dt_annos, current_class) else: return get_official_eval_result(gt_annos, dt_annos, current_class) def evaluate_dis(label_path, result_path, label_split_file, current_class=[0,1,2], coco=False, min_dis = 0, max_dis = 100): dt_annos = pickle.load(open(result_path,'rb')) val_image_ids = _read_imageset_file(label_split_file) gt_annos = kitti.get_label_annos(label_path, val_image_ids) gt_annos = kitti.filter_gt_annos_dis(gt_annos,min_dis,max_dis) dt_annos = kitti.filter_det_annos_dis(dt_annos, min_dis, max_dis) if coco: return get_coco_eval_result(gt_annos, dt_annos, current_class) else: return get_official_eval_result(gt_annos, dt_annos, current_class) ================================================ FILE: pcdet/datasets/kitti/kitti_object_eval_python/kitti_common.py ================================================ import concurrent.futures as futures import os import pathlib import re from collections import OrderedDict import numpy as np from skimage import io def get_image_index_str(img_idx): return "{:06d}".format(img_idx) def get_kitti_info_path(idx, prefix, info_type='image_2', file_tail='.png', training=True, relative_path=True): img_idx_str = get_image_index_str(idx) img_idx_str += file_tail prefix = pathlib.Path(prefix) if training: file_path = pathlib.Path('training') / info_type / img_idx_str else: file_path = pathlib.Path('testing') / info_type / img_idx_str if not (prefix / file_path).exists(): raise ValueError("file not exist: {}".format(file_path)) if relative_path: return str(file_path) else: return str(prefix / file_path) def get_image_path(idx, prefix, training=True, relative_path=True): return get_kitti_info_path(idx, prefix, 'image_2', '.png', training, relative_path) def get_label_path(idx, prefix, training=True, relative_path=True): return get_kitti_info_path(idx, prefix, 'label_2', '.txt', training, relative_path) def get_velodyne_path(idx, prefix, training=True, relative_path=True): return get_kitti_info_path(idx, prefix, 'velodyne', '.bin', training, relative_path) def get_calib_path(idx, prefix, training=True, relative_path=True): return get_kitti_info_path(idx, prefix, 'calib', '.txt', training, relative_path) def _extend_matrix(mat): mat = np.concatenate([mat, np.array([[0., 0., 0., 1.]])], axis=0) return mat def get_kitti_image_info(path, training=True, label_info=True, velodyne=False, calib=False, image_ids=7481, extend_matrix=True, num_worker=8, relative_path=True, with_imageshape=True): # image_infos = [] root_path = pathlib.Path(path) if not isinstance(image_ids, list): image_ids = list(range(image_ids)) def map_func(idx): image_info = {'image_idx': idx} annotations = None if velodyne: image_info['velodyne_path'] = get_velodyne_path( idx, path, training, relative_path) image_info['img_path'] = get_image_path(idx, path, training, relative_path) if with_imageshape: img_path = image_info['img_path'] if relative_path: img_path = str(root_path / img_path) image_info['img_shape'] = np.array( io.imread(img_path).shape[:2], dtype=np.int32) if label_info: label_path = get_label_path(idx, path, training, relative_path) if relative_path: label_path = str(root_path / label_path) annotations = get_label_anno(label_path) if calib: calib_path = get_calib_path( idx, path, training, relative_path=False) with open(calib_path, 'r') as f: lines = f.readlines() P0 = np.array( [float(info) for info in lines[0].split(' ')[1:13]]).reshape( [3, 4]) P1 = np.array( [float(info) for info in lines[1].split(' ')[1:13]]).reshape( [3, 4]) P2 = np.array( [float(info) for info in lines[2].split(' ')[1:13]]).reshape( [3, 4]) P3 = np.array( [float(info) for info in lines[3].split(' ')[1:13]]).reshape( [3, 4]) if extend_matrix: P0 = _extend_matrix(P0) P1 = _extend_matrix(P1) P2 = _extend_matrix(P2) P3 = _extend_matrix(P3) image_info['calib/P0'] = P0 image_info['calib/P1'] = P1 image_info['calib/P2'] = P2 image_info['calib/P3'] = P3 R0_rect = np.array([ float(info) for info in lines[4].split(' ')[1:10] ]).reshape([3, 3]) if extend_matrix: rect_4x4 = np.zeros([4, 4], dtype=R0_rect.dtype) rect_4x4[3, 3] = 1. rect_4x4[:3, :3] = R0_rect else: rect_4x4 = R0_rect image_info['calib/R0_rect'] = rect_4x4 Tr_velo_to_cam = np.array([ float(info) for info in lines[5].split(' ')[1:13] ]).reshape([3, 4]) Tr_imu_to_velo = np.array([ float(info) for info in lines[6].split(' ')[1:13] ]).reshape([3, 4]) if extend_matrix: Tr_velo_to_cam = _extend_matrix(Tr_velo_to_cam) Tr_imu_to_velo = _extend_matrix(Tr_imu_to_velo) image_info['calib/Tr_velo_to_cam'] = Tr_velo_to_cam image_info['calib/Tr_imu_to_velo'] = Tr_imu_to_velo if annotations is not None: image_info['annos'] = annotations add_difficulty_to_annos(image_info) return image_info with futures.ThreadPoolExecutor(num_worker) as executor: image_infos = executor.map(map_func, image_ids) return list(image_infos) def filter_kitti_anno(image_anno, used_classes, used_difficulty=None, dontcare_iou=None): if not isinstance(used_classes, (list, tuple)): used_classes = [used_classes] img_filtered_annotations = {} relevant_annotation_indices = [ i for i, x in enumerate(image_anno['name']) if x in used_classes ] for key in image_anno.keys(): img_filtered_annotations[key] = ( image_anno[key][relevant_annotation_indices]) if used_difficulty is not None: relevant_annotation_indices = [ i for i, x in enumerate(img_filtered_annotations['difficulty']) if x in used_difficulty ] for key in image_anno.keys(): img_filtered_annotations[key] = ( img_filtered_annotations[key][relevant_annotation_indices]) if 'DontCare' in used_classes and dontcare_iou is not None: dont_care_indices = [ i for i, x in enumerate(img_filtered_annotations['name']) if x == 'DontCare' ] # bounding box format [y_min, x_min, y_max, x_max] all_boxes = img_filtered_annotations['bbox'] ious = iou(all_boxes, all_boxes[dont_care_indices]) # Remove all bounding boxes that overlap with a dontcare region. if ious.size > 0: boxes_to_remove = np.amax(ious, axis=1) > dontcare_iou for key in image_anno.keys(): img_filtered_annotations[key] = (img_filtered_annotations[key][ np.logical_not(boxes_to_remove)]) return img_filtered_annotations def filter_annos_low_score(image_annos, thresh): new_image_annos = [] for anno in image_annos: img_filtered_annotations = {} relevant_annotation_indices = [ i for i, s in enumerate(anno['score']) if s >= thresh ] for key in anno.keys(): img_filtered_annotations[key] = ( anno[key][relevant_annotation_indices]) new_image_annos.append(img_filtered_annotations) return new_image_annos def filter_gt_annos_dis(image_annos, dis_min=0, dis_max=100): new_image_annos = [] for anno in image_annos: img_filtered_annotations = {} relevant_annotation_indices = [ i for i, s in enumerate(anno['location']) if (dis_min max_occlusion[0] or h <= min_height[0] or t > max_trunc[0]: easy_mask[i] = False if o > max_occlusion[1] or h <= min_height[1] or t > max_trunc[1]: moderate_mask[i] = False if o > max_occlusion[2] or h <= min_height[2] or t > max_trunc[2]: hard_mask[i] = False i += 1 is_easy = easy_mask is_moderate = np.logical_xor(easy_mask, moderate_mask) is_hard = np.logical_xor(hard_mask, moderate_mask) for i in range(len(dims)): if is_easy[i]: diff.append(0) elif is_moderate[i]: diff.append(1) elif is_hard[i]: diff.append(2) else: diff.append(-1) annos["difficulty"] = np.array(diff, np.int32) return diff def get_label_anno(label_path): annotations = {} annotations.update({ 'name': [], 'truncated': [], 'occluded': [], 'alpha': [], 'bbox': [], 'dimensions': [], 'location': [], 'rotation_y': [] }) with open(label_path, 'r') as f: lines = f.readlines() # if len(lines) == 0 or len(lines[0]) < 15: # content = [] # else: content = [line.strip().split(' ') for line in lines] annotations['name'] = np.array([x[0] for x in content]) annotations['truncated'] = np.array([float(x[1]) for x in content]) annotations['occluded'] = np.array([int(x[2]) for x in content]) annotations['alpha'] = np.array([float(x[3]) for x in content]) annotations['bbox'] = np.array( [[float(info) for info in x[4:8]] for x in content]).reshape(-1, 4) # dimensions will convert hwl format to standard lhw(camera) format. annotations['dimensions'] = np.array( [[float(info) for info in x[8:11]] for x in content]).reshape( -1, 3)[:, [2, 0, 1]] annotations['location'] = np.array( [[float(info) for info in x[11:14]] for x in content]).reshape(-1, 3) annotations['rotation_y'] = np.array( [float(x[14]) for x in content]).reshape(-1) if len(content) != 0 and len(content[0]) == 16: # have score annotations['score'] = np.array([float(x[15]) for x in content]) else: annotations['score'] = np.zeros([len(annotations['bbox'])]) return annotations def get_label_annos(label_folder, image_ids=None): if image_ids is None: filepaths = pathlib.Path(label_folder).glob('*.txt') prog = re.compile(r'^\d{6}.txt$') filepaths = filter(lambda f: prog.match(f.name), filepaths) image_ids = [int(p.stem) for p in filepaths] image_ids = sorted(image_ids) if not isinstance(image_ids, list): image_ids = list(range(image_ids)) annos = [] label_folder = pathlib.Path(label_folder) for idx in image_ids: image_idx = get_image_index_str(idx) label_filename = label_folder / (image_idx + '.txt') annos.append(get_label_anno(label_filename)) return annos def area(boxes, add1=False): """Computes area of boxes. Args: boxes: Numpy array with shape [N, 4] holding N boxes Returns: a numpy array with shape [N*1] representing box areas """ if add1: return (boxes[:, 2] - boxes[:, 0] + 1.0) * ( boxes[:, 3] - boxes[:, 1] + 1.0) else: return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) def intersection(boxes1, boxes2, add1=False): """Compute pairwise intersection areas between boxes. Args: boxes1: a numpy array with shape [N, 4] holding N boxes boxes2: a numpy array with shape [M, 4] holding M boxes Returns: a numpy array with shape [N*M] representing pairwise intersection area """ [y_min1, x_min1, y_max1, x_max1] = np.split(boxes1, 4, axis=1) [y_min2, x_min2, y_max2, x_max2] = np.split(boxes2, 4, axis=1) all_pairs_min_ymax = np.minimum(y_max1, np.transpose(y_max2)) all_pairs_max_ymin = np.maximum(y_min1, np.transpose(y_min2)) if add1: all_pairs_min_ymax += 1.0 intersect_heights = np.maximum( np.zeros(all_pairs_max_ymin.shape), all_pairs_min_ymax - all_pairs_max_ymin) all_pairs_min_xmax = np.minimum(x_max1, np.transpose(x_max2)) all_pairs_max_xmin = np.maximum(x_min1, np.transpose(x_min2)) if add1: all_pairs_min_xmax += 1.0 intersect_widths = np.maximum( np.zeros(all_pairs_max_xmin.shape), all_pairs_min_xmax - all_pairs_max_xmin) return intersect_heights * intersect_widths def iou(boxes1, boxes2, add1=False): """Computes pairwise intersection-over-union between box collections. Args: boxes1: a numpy array with shape [N, 4] holding N boxes. boxes2: a numpy array with shape [M, 4] holding N boxes. Returns: a numpy array with shape [N, M] representing pairwise iou scores. """ intersect = intersection(boxes1, boxes2, add1) area1 = area(boxes1, add1) area2 = area(boxes2, add1) union = np.expand_dims( area1, axis=1) + np.expand_dims( area2, axis=0) - intersect return intersect / union ================================================ FILE: pcdet/datasets/kitti/kitti_object_eval_python/rotate_iou.py ================================================ ##################### # Based on https://github.com/hongzhenwang/RRPN-revise # Licensed under The MIT License # Author: yanyan, scrin@foxmail.com ##################### import math import numba import numpy as np from numba import cuda @numba.jit(nopython=True) def div_up(m, n): return m // n + (m % n > 0) @cuda.jit('(float32[:], float32[:], float32[:])', device=True, inline=True) def trangle_area(a, b, c): return ((a[0] - c[0]) * (b[1] - c[1]) - (a[1] - c[1]) * (b[0] - c[0])) / 2.0 @cuda.jit('(float32[:], int32)', device=True, inline=True) def area(int_pts, num_of_inter): area_val = 0.0 for i in range(num_of_inter - 2): area_val += abs( trangle_area(int_pts[:2], int_pts[2 * i + 2:2 * i + 4], int_pts[2 * i + 4:2 * i + 6])) return area_val @cuda.jit('(float32[:], int32)', device=True, inline=True) def sort_vertex_in_convex_polygon(int_pts, num_of_inter): if num_of_inter > 0: center = cuda.local.array((2, ), dtype=numba.float32) center[:] = 0.0 for i in range(num_of_inter): center[0] += int_pts[2 * i] center[1] += int_pts[2 * i + 1] center[0] /= num_of_inter center[1] /= num_of_inter v = cuda.local.array((2, ), dtype=numba.float32) vs = cuda.local.array((16, ), dtype=numba.float32) for i in range(num_of_inter): v[0] = int_pts[2 * i] - center[0] v[1] = int_pts[2 * i + 1] - center[1] d = math.sqrt(v[0] * v[0] + v[1] * v[1]) v[0] = v[0] / d v[1] = v[1] / d if v[1] < 0: v[0] = -2 - v[0] vs[i] = v[0] j = 0 temp = 0 for i in range(1, num_of_inter): if vs[i - 1] > vs[i]: temp = vs[i] tx = int_pts[2 * i] ty = int_pts[2 * i + 1] j = i while j > 0 and vs[j - 1] > temp: vs[j] = vs[j - 1] int_pts[j * 2] = int_pts[j * 2 - 2] int_pts[j * 2 + 1] = int_pts[j * 2 - 1] j -= 1 vs[j] = temp int_pts[j * 2] = tx int_pts[j * 2 + 1] = ty @cuda.jit( '(float32[:], float32[:], int32, int32, float32[:])', device=True, inline=True) def line_segment_intersection(pts1, pts2, i, j, temp_pts): A = cuda.local.array((2, ), dtype=numba.float32) B = cuda.local.array((2, ), dtype=numba.float32) C = cuda.local.array((2, ), dtype=numba.float32) D = cuda.local.array((2, ), dtype=numba.float32) A[0] = pts1[2 * i] A[1] = pts1[2 * i + 1] B[0] = pts1[2 * ((i + 1) % 4)] B[1] = pts1[2 * ((i + 1) % 4) + 1] C[0] = pts2[2 * j] C[1] = pts2[2 * j + 1] D[0] = pts2[2 * ((j + 1) % 4)] D[1] = pts2[2 * ((j + 1) % 4) + 1] BA0 = B[0] - A[0] BA1 = B[1] - A[1] DA0 = D[0] - A[0] CA0 = C[0] - A[0] DA1 = D[1] - A[1] CA1 = C[1] - A[1] acd = DA1 * CA0 > CA1 * DA0 bcd = (D[1] - B[1]) * (C[0] - B[0]) > (C[1] - B[1]) * (D[0] - B[0]) if acd != bcd: abc = CA1 * BA0 > BA1 * CA0 abd = DA1 * BA0 > BA1 * DA0 if abc != abd: DC0 = D[0] - C[0] DC1 = D[1] - C[1] ABBA = A[0] * B[1] - B[0] * A[1] CDDC = C[0] * D[1] - D[0] * C[1] DH = BA1 * DC0 - BA0 * DC1 Dx = ABBA * DC0 - BA0 * CDDC Dy = ABBA * DC1 - BA1 * CDDC temp_pts[0] = Dx / DH temp_pts[1] = Dy / DH return True return False @cuda.jit( '(float32[:], float32[:], int32, int32, float32[:])', device=True, inline=True) def line_segment_intersection_v1(pts1, pts2, i, j, temp_pts): a = cuda.local.array((2, ), dtype=numba.float32) b = cuda.local.array((2, ), dtype=numba.float32) c = cuda.local.array((2, ), dtype=numba.float32) d = cuda.local.array((2, ), dtype=numba.float32) a[0] = pts1[2 * i] a[1] = pts1[2 * i + 1] b[0] = pts1[2 * ((i + 1) % 4)] b[1] = pts1[2 * ((i + 1) % 4) + 1] c[0] = pts2[2 * j] c[1] = pts2[2 * j + 1] d[0] = pts2[2 * ((j + 1) % 4)] d[1] = pts2[2 * ((j + 1) % 4) + 1] area_abc = trangle_area(a, b, c) area_abd = trangle_area(a, b, d) if area_abc * area_abd >= 0: return False area_cda = trangle_area(c, d, a) area_cdb = area_cda + area_abc - area_abd if area_cda * area_cdb >= 0: return False t = area_cda / (area_abd - area_abc) dx = t * (b[0] - a[0]) dy = t * (b[1] - a[1]) temp_pts[0] = a[0] + dx temp_pts[1] = a[1] + dy return True @cuda.jit('(float32, float32, float32[:])', device=True, inline=True) def point_in_quadrilateral(pt_x, pt_y, corners): ab0 = corners[2] - corners[0] ab1 = corners[3] - corners[1] ad0 = corners[6] - corners[0] ad1 = corners[7] - corners[1] ap0 = pt_x - corners[0] ap1 = pt_y - corners[1] abab = ab0 * ab0 + ab1 * ab1 abap = ab0 * ap0 + ab1 * ap1 adad = ad0 * ad0 + ad1 * ad1 adap = ad0 * ap0 + ad1 * ap1 return abab >= abap and abap >= 0 and adad >= adap and adap >= 0 @cuda.jit('(float32[:], float32[:], float32[:])', device=True, inline=True) def quadrilateral_intersection(pts1, pts2, int_pts): num_of_inter = 0 for i in range(4): if point_in_quadrilateral(pts1[2 * i], pts1[2 * i + 1], pts2): int_pts[num_of_inter * 2] = pts1[2 * i] int_pts[num_of_inter * 2 + 1] = pts1[2 * i + 1] num_of_inter += 1 if point_in_quadrilateral(pts2[2 * i], pts2[2 * i + 1], pts1): int_pts[num_of_inter * 2] = pts2[2 * i] int_pts[num_of_inter * 2 + 1] = pts2[2 * i + 1] num_of_inter += 1 temp_pts = cuda.local.array((2, ), dtype=numba.float32) for i in range(4): for j in range(4): has_pts = line_segment_intersection(pts1, pts2, i, j, temp_pts) if has_pts: int_pts[num_of_inter * 2] = temp_pts[0] int_pts[num_of_inter * 2 + 1] = temp_pts[1] num_of_inter += 1 return num_of_inter @cuda.jit('(float32[:], float32[:])', device=True, inline=True) def rbbox_to_corners(corners, rbbox): # generate clockwise corners and rotate it clockwise angle = rbbox[4] a_cos = math.cos(angle) a_sin = math.sin(angle) center_x = rbbox[0] center_y = rbbox[1] x_d = rbbox[2] y_d = rbbox[3] corners_x = cuda.local.array((4, ), dtype=numba.float32) corners_y = cuda.local.array((4, ), dtype=numba.float32) corners_x[0] = -x_d / 2 corners_x[1] = -x_d / 2 corners_x[2] = x_d / 2 corners_x[3] = x_d / 2 corners_y[0] = -y_d / 2 corners_y[1] = y_d / 2 corners_y[2] = y_d / 2 corners_y[3] = -y_d / 2 for i in range(4): corners[2 * i] = a_cos * corners_x[i] + a_sin * corners_y[i] + center_x corners[2 * i + 1] = -a_sin * corners_x[i] + a_cos * corners_y[i] + center_y @cuda.jit('(float32[:], float32[:])', device=True, inline=True) def inter(rbbox1, rbbox2): corners1 = cuda.local.array((8, ), dtype=numba.float32) corners2 = cuda.local.array((8, ), dtype=numba.float32) intersection_corners = cuda.local.array((16, ), dtype=numba.float32) rbbox_to_corners(corners1, rbbox1) rbbox_to_corners(corners2, rbbox2) num_intersection = quadrilateral_intersection(corners1, corners2, intersection_corners) sort_vertex_in_convex_polygon(intersection_corners, num_intersection) # print(intersection_corners.reshape([-1, 2])[:num_intersection]) return area(intersection_corners, num_intersection) @cuda.jit('(float32[:], float32[:], int32)', device=True, inline=True) def devRotateIoUEval(rbox1, rbox2, criterion=-1): area1 = rbox1[2] * rbox1[3] area2 = rbox2[2] * rbox2[3] area_inter = inter(rbox1, rbox2) if criterion == -1: return area_inter / (area1 + area2 - area_inter) elif criterion == 0: return area_inter / area1 elif criterion == 1: return area_inter / area2 else: return area_inter @cuda.jit('(int64, int64, float32[:], float32[:], float32[:], int32)', fastmath=False) def rotate_iou_kernel_eval(N, K, dev_boxes, dev_query_boxes, dev_iou, criterion=-1): threadsPerBlock = 8 * 8 row_start = cuda.blockIdx.x col_start = cuda.blockIdx.y tx = cuda.threadIdx.x row_size = min(N - row_start * threadsPerBlock, threadsPerBlock) col_size = min(K - col_start * threadsPerBlock, threadsPerBlock) block_boxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32) block_qboxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32) dev_query_box_idx = threadsPerBlock * col_start + tx dev_box_idx = threadsPerBlock * row_start + tx if (tx < col_size): block_qboxes[tx * 5 + 0] = dev_query_boxes[dev_query_box_idx * 5 + 0] block_qboxes[tx * 5 + 1] = dev_query_boxes[dev_query_box_idx * 5 + 1] block_qboxes[tx * 5 + 2] = dev_query_boxes[dev_query_box_idx * 5 + 2] block_qboxes[tx * 5 + 3] = dev_query_boxes[dev_query_box_idx * 5 + 3] block_qboxes[tx * 5 + 4] = dev_query_boxes[dev_query_box_idx * 5 + 4] if (tx < row_size): block_boxes[tx * 5 + 0] = dev_boxes[dev_box_idx * 5 + 0] block_boxes[tx * 5 + 1] = dev_boxes[dev_box_idx * 5 + 1] block_boxes[tx * 5 + 2] = dev_boxes[dev_box_idx * 5 + 2] block_boxes[tx * 5 + 3] = dev_boxes[dev_box_idx * 5 + 3] block_boxes[tx * 5 + 4] = dev_boxes[dev_box_idx * 5 + 4] cuda.syncthreads() if tx < row_size: for i in range(col_size): offset = row_start * threadsPerBlock * K + col_start * threadsPerBlock + tx * K + i dev_iou[offset] = devRotateIoUEval(block_qboxes[i * 5:i * 5 + 5], block_boxes[tx * 5:tx * 5 + 5], criterion) def rotate_iou_gpu_eval(boxes, query_boxes, criterion=-1, device_id=0): """rotated box iou running in gpu. 500x faster than cpu version (take 5ms in one example with numba.cuda code). convert from [this project]( https://github.com/hongzhenwang/RRPN-revise/tree/master/pcdet/rotation). Args: boxes (float tensor: [N, 5]): rbboxes. format: centers, dims, angles(clockwise when positive) query_boxes (float tensor: [K, 5]): [description] device_id (int, optional): Defaults to 0. [description] Returns: [type]: [description] """ box_dtype = boxes.dtype boxes = boxes.astype(np.float32) query_boxes = query_boxes.astype(np.float32) N = boxes.shape[0] K = query_boxes.shape[0] iou = np.zeros((N, K), dtype=np.float32) if N == 0 or K == 0: return iou threadsPerBlock = 8 * 8 cuda.select_device(device_id) blockspergrid = (div_up(N, threadsPerBlock), div_up(K, threadsPerBlock)) stream = cuda.stream() with stream.auto_synchronize(): boxes_dev = cuda.to_device(boxes.reshape([-1]), stream) query_boxes_dev = cuda.to_device(query_boxes.reshape([-1]), stream) iou_dev = cuda.to_device(iou.reshape([-1]), stream) rotate_iou_kernel_eval[blockspergrid, threadsPerBlock, stream]( N, K, boxes_dev, query_boxes_dev, iou_dev, criterion) iou_dev.copy_to_host(iou.reshape([-1]), stream=stream) return iou.astype(boxes.dtype) ================================================ FILE: pcdet/datasets/processor/data_processor.py ================================================ from functools import partial import numpy as np from skimage import transform from ...utils import box_utils, common_utils tv = None try: import cumm.tensorview as tv except: pass class VoxelGeneratorWrapper(): def __init__(self, vsize_xyz, coors_range_xyz, num_point_features, max_num_points_per_voxel, max_num_voxels): try: from spconv.utils import VoxelGeneratorV2 as VoxelGenerator self.spconv_ver = 1 except: try: from spconv.utils import VoxelGenerator self.spconv_ver = 1 except: from spconv.utils import Point2VoxelCPU3d as VoxelGenerator self.spconv_ver = 2 if self.spconv_ver == 1: self._voxel_generator = VoxelGenerator( voxel_size=vsize_xyz, point_cloud_range=coors_range_xyz, max_num_points=max_num_points_per_voxel, max_voxels=max_num_voxels ) else: self._voxel_generator = VoxelGenerator( vsize_xyz=vsize_xyz, coors_range_xyz=coors_range_xyz, num_point_features=num_point_features, max_num_points_per_voxel=max_num_points_per_voxel, max_num_voxels=max_num_voxels ) def generate(self, points): if self.spconv_ver == 1: voxel_output = self._voxel_generator.generate(points) if isinstance(voxel_output, dict): voxels, coordinates, num_points = \ voxel_output['voxels'], voxel_output['coordinates'], voxel_output['num_points_per_voxel'] else: voxels, coordinates, num_points = voxel_output else: assert tv is not None, f"Unexpected error, library: 'cumm' wasn't imported properly." voxel_output = self._voxel_generator.point_to_voxel(tv.from_numpy(points)) tv_voxels, tv_coordinates, tv_num_points = voxel_output # make copy with numpy(), since numpy_view() will disappear as soon as the generator is deleted voxels = tv_voxels.numpy() coordinates = tv_coordinates.numpy() num_points = tv_num_points.numpy() return voxels, coordinates, num_points class DataProcessor(object): def __init__(self, processor_configs, point_cloud_range, training, rot_num, num_point_features): self.rot_num = rot_num self.point_cloud_range = point_cloud_range self.training = training self.num_point_features = num_point_features self.mode = 'train' if training else 'test' self.grid_size = self.voxel_size = None self.data_processor_queue = [] self.voxel_generator = None for cur_cfg in processor_configs: cur_processor = getattr(self, cur_cfg.NAME)(config=cur_cfg) self.data_processor_queue.append(cur_processor) def mask_points_and_boxes_outside_range(self, data_dict=None, config=None): if data_dict is None: return partial(self.mask_points_and_boxes_outside_range, config=config) for rot_num_id in range(self.rot_num): if rot_num_id == 0: rot_num_id_str = '' else: rot_num_id_str = str(rot_num_id) mask = common_utils.mask_points_by_range(data_dict['points'+rot_num_id_str], self.point_cloud_range) data_dict['points'+rot_num_id_str] = data_dict['points'+rot_num_id_str][mask] if 'mm' in data_dict: mask = common_utils.mask_points_by_range(data_dict['points_mm'+rot_num_id_str], self.point_cloud_range) data_dict['points_mm'+rot_num_id_str] = data_dict['points_mm'+rot_num_id_str][mask] if data_dict.get('gt_boxes'+rot_num_id_str, None) is not None and config.REMOVE_OUTSIDE_BOXES: mask = box_utils.mask_boxes_outside_range_numpy( data_dict['gt_boxes'+rot_num_id_str], self.point_cloud_range, min_num_corners=config.get('min_num_corners', 1) ) data_dict['gt_boxes'+rot_num_id_str] = data_dict['gt_boxes'+rot_num_id_str][mask] if rot_num_id==0 and 'gt_tracklets'+rot_num_id_str in data_dict: data_dict['gt_tracklets'] = data_dict['gt_tracklets'][mask] data_dict['num_bbs_in_tracklets'] = data_dict['num_bbs_in_tracklets'][mask] return data_dict def shuffle_points(self, data_dict=None, config=None): if data_dict is None: return partial(self.shuffle_points, config=config) if config.SHUFFLE_ENABLED[self.mode]: for rot_num_id in range(self.rot_num): if rot_num_id == 0: rot_num_id_str = '' else: rot_num_id_str = str(rot_num_id) points = data_dict['points'+rot_num_id_str] shuffle_idx = np.random.permutation(points.shape[0]) points = points[shuffle_idx] data_dict['points'+rot_num_id_str] = points if 'mm' in data_dict: points = data_dict['points_mm'+rot_num_id_str] shuffle_idx = np.random.permutation(points.shape[0]) points = points[shuffle_idx] data_dict['points_mm'+rot_num_id_str] = points return data_dict def transform_points_to_voxels(self, data_dict=None, config=None): if data_dict is None: grid_size = (self.point_cloud_range[3:6] - self.point_cloud_range[0:3]) / np.array(config.VOXEL_SIZE) self.grid_size = np.round(grid_size).astype(np.int64) self.voxel_size = config.VOXEL_SIZE # just bind the config, we will create the VoxelGeneratorWrapper later, # to avoid pickling issues in multiprocess spawn return partial(self.transform_points_to_voxels, config=config) if self.voxel_generator is None: self.voxel_generator = VoxelGeneratorWrapper( vsize_xyz=config.VOXEL_SIZE, coors_range_xyz=self.point_cloud_range, num_point_features=self.num_point_features, max_num_points_per_voxel=config.MAX_POINTS_PER_VOXEL, max_num_voxels=config.MAX_NUMBER_OF_VOXELS[self.mode], ) for rot_num_id in range(self.rot_num): if rot_num_id == 0: rot_num_id_str = '' else: rot_num_id_str = str(rot_num_id) points = data_dict['points'+rot_num_id_str] voxel_output = self.voxel_generator.generate(points) if isinstance(voxel_output, dict): voxels, coordinates, num_points = \ voxel_output['voxels'], voxel_output['coordinates'], voxel_output['num_points_per_voxel'] else: voxels, coordinates, num_points = voxel_output if not data_dict['use_lead_xyz']: voxels = voxels[..., 3:] # remove xyz in voxels(N, 3) data_dict['voxels'+rot_num_id_str] = voxels data_dict['voxel_coords'+rot_num_id_str] = coordinates data_dict['voxel_num_points'+rot_num_id_str] = num_points if 'mm' in data_dict: points = data_dict['points_mm'+rot_num_id_str] voxel_output = self.voxel_generator.generate(points) if isinstance(voxel_output, dict): voxels, coordinates, num_points = \ voxel_output['voxels'], voxel_output['coordinates'], voxel_output['num_points_per_voxel'] else: voxels, coordinates, num_points = voxel_output if not data_dict['use_lead_xyz']: voxels = voxels[..., 3:] # remove xyz in voxels(N, 3) data_dict['voxels_mm'+rot_num_id_str] = voxels data_dict['voxel_coords_mm'+rot_num_id_str] = coordinates data_dict['voxel_num_points_mm'+rot_num_id_str] = num_points return data_dict def sample_points(self, data_dict=None, config=None): if data_dict is None: return partial(self.sample_points, config=config) num_points = config.NUM_POINTS[self.mode] if num_points == -1: return data_dict points = data_dict['points'] if num_points < len(points): pts_depth = np.linalg.norm(points[:, 0:3], axis=1) pts_near_flag = pts_depth < 40.0 far_idxs_choice = np.where(pts_near_flag == 0)[0] near_idxs = np.where(pts_near_flag == 1)[0] if num_points > len(far_idxs_choice): near_idxs_choice = np.random.choice(near_idxs, num_points - len(far_idxs_choice), replace=False) choice = np.concatenate((near_idxs_choice, far_idxs_choice), axis=0) \ if len(far_idxs_choice) > 0 else near_idxs_choice else: choice = np.arange(0, len(points), dtype=np.int32) choice = np.random.choice(choice, num_points, replace=False) np.random.shuffle(choice) else: choice = np.arange(0, len(points), dtype=np.int32) if num_points > len(points): extra_choice = np.random.choice(choice, num_points - len(points), replace=False) choice = np.concatenate((choice, extra_choice), axis=0) np.random.shuffle(choice) data_dict['points'] = points[choice] return data_dict def forward(self, data_dict): """ Args: data_dict: points: (N, 3 + C_in) gt_boxes: optional, (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...] gt_names: optional, (N), string ... Returns: """ for cur_processor in self.data_processor_queue: data_dict = cur_processor(data_dict=data_dict) return data_dict ================================================ FILE: pcdet/datasets/processor/point_feature_encoder.py ================================================ import numpy as np class PointFeatureEncoder(object): def __init__(self, config, point_cloud_range=None, rot_num=1): super().__init__() self.rot_num=rot_num self.point_encoding_config = config assert list(self.point_encoding_config.src_feature_list[0:3]) == ['x', 'y', 'z'] self.used_feature_list = self.point_encoding_config.used_feature_list self.src_feature_list = self.point_encoding_config.src_feature_list self.point_cloud_range = point_cloud_range @property def num_point_features(self): return getattr(self, self.point_encoding_config.encoding_type)(points=None) def forward(self, data_dict): """ Args: data_dict: points: (N, 3 + C_in) ... Returns: data_dict: points: (N, 3 + C_out), use_lead_xyz: whether to use xyz as point-wise features ... """ for i in range(self.rot_num): if i == 0: rot_num_id = '' else: rot_num_id = str(i) data_dict['points'+rot_num_id], use_lead_xyz = getattr(self, self.point_encoding_config.encoding_type)( data_dict['points'+rot_num_id] ) if 'mm' in data_dict: data_dict['points_mm'+rot_num_id], use_lead_xyz = getattr(self, self.point_encoding_config.encoding_type)( data_dict['points_mm'+rot_num_id] ) data_dict['use_lead_xyz'] = use_lead_xyz return data_dict def absolute_coordinates_encoding(self, points=None): if points is None: num_output_features = len(self.used_feature_list) return num_output_features point_feature_list = [points[:, 0:3]] for x in self.used_feature_list: if x in ['x', 'y', 'z']: continue idx = self.src_feature_list.index(x) point_feature_list.append(points[:, idx:idx+1]) point_features = np.concatenate(point_feature_list, axis=1) return point_features, True def absolute_coordinates_encoding_mm(self, points=None): if points is None: num_output_features = self.point_encoding_config.num_features return num_output_features point_features = points[:, 0:self.point_encoding_config.num_features] return point_features, True ================================================ FILE: pcdet/models/__init__.py ================================================ from collections import namedtuple import numpy as np import torch from .detectors import build_detector def build_network(model_cfg, num_class, dataset): model = build_detector( model_cfg=model_cfg, num_class=num_class, dataset=dataset ) return model def load_data_to_gpu(batch_dict): for key, val in batch_dict.items(): if not isinstance(val, np.ndarray): continue if key in ['frame_id', 'metadata', 'calib', 'image_shape', 'seq_id']: continue batch_dict[key] = torch.from_numpy(val).float().cuda() def model_fn_decorator(): ModelReturn = namedtuple('ModelReturn', ['loss', 'tb_dict', 'disp_dict']) def model_func(model, batch_dict): load_data_to_gpu(batch_dict) ret_dict, tb_dict, disp_dict = model(batch_dict) loss = ret_dict['loss'].mean() if hasattr(model, 'update_global_step'): model.update_global_step() else: model.module.update_global_step() return ModelReturn(loss, tb_dict, disp_dict) return model_func ================================================ FILE: pcdet/models/backbones_2d/__init__.py ================================================ from .base_bev_backbone import BaseBEVBackbone __all__ = { 'BaseBEVBackbone': BaseBEVBackbone, } ================================================ FILE: pcdet/models/backbones_2d/base_bev_backbone.py ================================================ import numpy as np import torch import torch.nn as nn class BaseBEVBackbone(nn.Module): def __init__(self, model_cfg, input_channels): super().__init__() self.model_cfg = model_cfg if self.model_cfg.get('LAYER_NUMS', None) is not None: assert len(self.model_cfg.LAYER_NUMS) == len(self.model_cfg.LAYER_STRIDES) == len(self.model_cfg.NUM_FILTERS) layer_nums = self.model_cfg.LAYER_NUMS layer_strides = self.model_cfg.LAYER_STRIDES num_filters = self.model_cfg.NUM_FILTERS else: layer_nums = layer_strides = num_filters = [] if self.model_cfg.get('UPSAMPLE_STRIDES', None) is not None: assert len(self.model_cfg.UPSAMPLE_STRIDES) == len(self.model_cfg.NUM_UPSAMPLE_FILTERS) num_upsample_filters = self.model_cfg.NUM_UPSAMPLE_FILTERS upsample_strides = self.model_cfg.UPSAMPLE_STRIDES else: upsample_strides = num_upsample_filters = [] num_levels = len(layer_nums) c_in_list = [input_channels, *num_filters[:-1]] self.blocks = nn.ModuleList() self.deblocks = nn.ModuleList() for idx in range(num_levels): cur_layers = [ nn.ZeroPad2d(1), nn.Conv2d( c_in_list[idx], num_filters[idx], kernel_size=3, stride=layer_strides[idx], padding=0, bias=False ), nn.BatchNorm2d(num_filters[idx], eps=1e-3, momentum=0.01), nn.ReLU() ] for k in range(layer_nums[idx]): cur_layers.extend([ nn.Conv2d(num_filters[idx], num_filters[idx], kernel_size=3, padding=1, bias=False), nn.BatchNorm2d(num_filters[idx], eps=1e-3, momentum=0.01), nn.ReLU() ]) self.blocks.append(nn.Sequential(*cur_layers)) if len(upsample_strides) > 0: stride = upsample_strides[idx] if stride >= 1: self.deblocks.append(nn.Sequential( nn.ConvTranspose2d( num_filters[idx], num_upsample_filters[idx], upsample_strides[idx], stride=upsample_strides[idx], bias=False ), nn.BatchNorm2d(num_upsample_filters[idx], eps=1e-3, momentum=0.01), nn.ReLU() )) else: stride = np.round(1 / stride).astype(np.int) self.deblocks.append(nn.Sequential( nn.Conv2d( num_filters[idx], num_upsample_filters[idx], stride, stride=stride, bias=False ), nn.BatchNorm2d(num_upsample_filters[idx], eps=1e-3, momentum=0.01), nn.ReLU() )) c_in = sum(num_upsample_filters) if len(upsample_strides) > num_levels: self.deblocks.append(nn.Sequential( nn.ConvTranspose2d(c_in, c_in, upsample_strides[-1], stride=upsample_strides[-1], bias=False), nn.BatchNorm2d(c_in, eps=1e-3, momentum=0.01), nn.ReLU(), )) #for child in self.children(): # for param in child.parameters(): # param.requires_grad = False self.num_bev_features_post = c_in def forward(self, data_dict): """ Args: data_dict: spatial_features Returns: """ spatial_features = data_dict['spatial_features'] ups = [] x = spatial_features for i in range(len(self.blocks)): x = self.blocks[i](x) stride = int(spatial_features.shape[2] / x.shape[2]) if len(self.deblocks) > 0: ups.append(self.deblocks[i](x)) else: ups.append(x) if len(ups) > 1: x = torch.cat(ups, dim=1) elif len(ups) == 1: x = ups[0] if len(self.deblocks) > len(self.blocks): x = self.deblocks[-1](x) data_dict['st_features_2d'] = x return data_dict ================================================ FILE: pcdet/models/backbones_2d/map_to_bev/__init__.py ================================================ from .height_compression import BEVPool from .pointpillar_scatter import PointPillarScatter __all__ = { 'BEVPool': BEVPool, 'PointPillarScatter': PointPillarScatter } ================================================ FILE: pcdet/models/backbones_2d/map_to_bev/height_compression.py ================================================ import torch.nn as nn import numpy as np from pcdet.datasets.augmentor.X_transform import X_TRANS import torch def bilinear_interpolate_torch(im, x, y): """ Args: im: (H, W, C) [y, x] x: (N) y: (N) Returns: """ x0 = torch.floor(x).long() x1 = x0 + 1 y0 = torch.floor(y).long() y1 = y0 + 1 x0 = torch.clamp(x0, 0, im.shape[1] - 1) x1 = torch.clamp(x1, 0, im.shape[1] - 1) y0 = torch.clamp(y0, 0, im.shape[0] - 1) y1 = torch.clamp(y1, 0, im.shape[0] - 1) Ia = im[y0, x0] Ib = im[y1, x0] Ic = im[y0, x1] Id = im[y1, x1] wa = (x1.type_as(x) - x) * (y1.type_as(y) - y) wb = (x1.type_as(x) - x) * (y - y0.type_as(y)) wc = (x - x0.type_as(x)) * (y1.type_as(y) - y) wd = (x - x0.type_as(x)) * (y - y0.type_as(y)) ans = torch.t((torch.t(Ia) * wa)) + torch.t(torch.t(Ib) * wb) + torch.t(torch.t(Ic) * wc) + torch.t(torch.t(Id) * wd) return ans class BEVPool(nn.Module): def __init__(self, model_cfg, voxel_size=None, point_cloud_range=None): super().__init__() self.model_cfg = model_cfg self.num_bev_features = self.model_cfg.NUM_BEV_FEATURES self.RANGE = [0, -40, -3, 70.4, 40, 1] self.x_trans = X_TRANS() self.point_cloud_range = point_cloud_range self.voxel_size=voxel_size def get_pseudo_points(self, pts_range=[0, -40, -3, 70.4, 40, 1], voxel_size=[0.05, 0.05, 0.05], stride=8): x_stride = voxel_size[0] * stride y_stride = voxel_size[1] * stride min_x = pts_range[0] + x_stride / 2 max_x = pts_range[3] #+ x_stride / 2 min_y = pts_range[1] + y_stride / 2 max_y = pts_range[4] + y_stride / 2 x = np.arange(min_x, max_x, x_stride) y = np.arange(min_y, max_y, y_stride) x, y = np.meshgrid(x, y) zeo = np.zeros(shape=x.shape) grids = torch.from_numpy(np.stack([x, y, zeo]).astype(np.float32)).permute(1,2,0).cuda() return grids def interpolate_from_bev_features(self, points, bev_features, bev_stride): cur_batch_points = points x_idxs = (cur_batch_points[:, 0] - self.point_cloud_range[0]) / self.voxel_size[0] y_idxs = (cur_batch_points[:, 1] - self.point_cloud_range[1]) / self.voxel_size[1] cur_x_idxs = x_idxs / bev_stride cur_y_idxs = y_idxs / bev_stride cur_bev_features = bev_features.permute(1, 2, 0) # (H, W, C) point_bev_features = bilinear_interpolate_torch(cur_bev_features, cur_x_idxs, cur_y_idxs) return point_bev_features def bev_align(self, bev_feat, transform_param, stride, stage_i): batch_size = len(bev_feat) w, h = bev_feat.shape[-2], bev_feat.shape[-1] all_feat = [] for bt_i in range(batch_size): cur_bev_feat = bev_feat[bt_i] grid_pts = self.get_pseudo_points(self.point_cloud_range, self.voxel_size, stride) grid_pts = grid_pts.reshape(-1, 3) bt_transform_param = transform_param[bt_i] previous_stage_param = bt_transform_param[0] current_stage_param = bt_transform_param[stage_i] trans_dict = self.x_trans.forward_with_param({'points': grid_pts, 'transform_param': current_stage_param}) trans_dict = self.x_trans.backward_with_param({'points': trans_dict['points'], 'transform_param': previous_stage_param}) aligned_feat = self.interpolate_from_bev_features(trans_dict['points'], cur_bev_feat, stride).reshape(w, h, -1) aligned_feat=aligned_feat.permute(2,0,1) all_feat.append(aligned_feat) return torch.stack(all_feat) def forward(self, batch_dict): """ Args: batch_dict: encoded_spconv_tensor: sparse tensor Returns: batch_dict: spatial_features: """ if 'transform_param' in batch_dict: trans_param = batch_dict['transform_param'] rot_num = trans_param.shape[1] else: rot_num = 1 batch_dict['spatial_features_stride'] = batch_dict['encoded_spconv_tensor_stride'] all_feat = [] for i in range(rot_num): if i==0: rot_num_id = '' else: rot_num_id = str(i) encoded_spconv_tensor = batch_dict['encoded_spconv_tensor'+rot_num_id] spatial_features = encoded_spconv_tensor.dense() N, C, D, H, W = spatial_features.shape spatial_features = spatial_features.view(N, C * D, H, W) batch_dict['spatial_features'+rot_num_id] = spatial_features if i==0: all_feat.append(spatial_features) elif 'transform_param' in batch_dict and i>0: aligned_bev_feat = self.bev_align(spatial_features.clone(), batch_dict['transform_param'], batch_dict['spatial_features_stride'], i) all_feat.append(aligned_bev_feat) if 'transform_param' in batch_dict: all_feat = torch.stack(all_feat) if self.model_cfg.get('ALIGN_METHOD', 'none') == 'max': final_feat = all_feat.max(0)[0] batch_dict['spatial_features'] = final_feat elif self.model_cfg.get('ALIGN_METHOD', 'none') == 'mean': final_feat = all_feat.mean(0) batch_dict['spatial_features'] = final_feat else: raise NotImplementedError return batch_dict ================================================ FILE: pcdet/models/backbones_2d/map_to_bev/pointpillar_scatter.py ================================================ import torch import torch.nn as nn class PointPillarScatter(nn.Module): def __init__(self, model_cfg, grid_size, **kwargs): super().__init__() self.model_cfg = model_cfg self.num_bev_features = self.model_cfg.NUM_BEV_FEATURES self.nx, self.ny, self.nz = grid_size assert self.nz == 1 def forward(self, batch_dict, **kwargs): pillar_features, coords = batch_dict['pillar_features'], batch_dict['voxel_coords'] batch_spatial_features = [] batch_size = coords[:, 0].max().int().item() + 1 for batch_idx in range(batch_size): spatial_feature = torch.zeros( self.num_bev_features, self.nz * self.nx * self.ny, dtype=pillar_features.dtype, device=pillar_features.device) batch_mask = coords[:, 0] == batch_idx this_coords = coords[batch_mask, :] indices = this_coords[:, 1] + this_coords[:, 2] * self.nx + this_coords[:, 3] indices = indices.type(torch.long) pillars = pillar_features[batch_mask, :] pillars = pillars.t() spatial_feature[:, indices] = pillars batch_spatial_features.append(spatial_feature) batch_spatial_features = torch.stack(batch_spatial_features, 0) batch_spatial_features = batch_spatial_features.view(batch_size, self.num_bev_features * self.nz, self.ny, self.nx) batch_dict['spatial_features'] = batch_spatial_features return batch_dict ================================================ FILE: pcdet/models/backbones_3d/__init__.py ================================================ from .pointnet2_backbone import PointNet2Backbone, PointNet2MSG from .spconv_backbone import TeMMVoxelBackBone8x,TeVoxelBackBone8x __all__ = { 'TeMMVoxelBackBone8x': TeMMVoxelBackBone8x, 'TeVoxelBackBone8x': TeVoxelBackBone8x, 'PointNet2Backbone': PointNet2Backbone, 'PointNet2MSG': PointNet2MSG, } ================================================ FILE: pcdet/models/backbones_3d/pfe/__init__.py ================================================ from .voxel_set_abstraction import VoxelSetAbstraction __all__ = { 'VoxelSetAbstraction': VoxelSetAbstraction, } ================================================ FILE: pcdet/models/backbones_3d/pfe/bev_features_interpolation.py ================================================ import torch import torch.nn as nn from ....ops.pointnet2.pointnet2_stack import pointnet2_modules as pointnet2_stack_modules from ....ops.pointnet2.pointnet2_stack import pointnet2_utils as pointnet2_stack_utils from ....utils import common_utils def bilinear_interpolate_torch(im, x, y): """ Args: im: (H, W, C) [y, x] x: (N) y: (N) Returns: """ x0 = torch.floor(x).long() x1 = x0 + 1 y0 = torch.floor(y).long() y1 = y0 + 1 x0 = torch.clamp(x0, 0, im.shape[1] - 1) x1 = torch.clamp(x1, 0, im.shape[1] - 1) y0 = torch.clamp(y0, 0, im.shape[0] - 1) y1 = torch.clamp(y1, 0, im.shape[0] - 1) Ia = im[y0, x0] Ib = im[y1, x0] Ic = im[y0, x1] Id = im[y1, x1] wa = (x1.type_as(x) - x) * (y1.type_as(y) - y) wb = (x1.type_as(x) - x) * (y - y0.type_as(y)) wc = (x - x0.type_as(x)) * (y1.type_as(y) - y) wd = (x - x0.type_as(x)) * (y - y0.type_as(y)) ans = torch.t((torch.t(Ia) * wa)) + torch.t(torch.t(Ib) * wb) + torch.t(torch.t(Ic) * wc) + torch.t(torch.t(Id) * wd) return ans class BEVFeaturesInterpolation(nn.Module): def __init__(self, model_cfg, voxel_size, point_cloud_range, num_frames=1, num_bev_features=None, num_rawpoint_features=None, **kwargs): super().__init__() self.num_frames = num_frames self.model_cfg = model_cfg self.voxel_size = voxel_size self.point_cloud_range = point_cloud_range self.SA_layers = nn.ModuleList() self.SA_layer_names = [] self.downsample_times_map = {} c_in = 0 if 'temporal_features' in self.model_cfg.FEATURES_SOURCE: c_bev = num_bev_features c_in += c_bev if 'spatial_features' in self.model_cfg.FEATURES_SOURCE: c_bev = num_bev_features c_in += c_bev self.output_bev_features = nn.Sequential( nn.Linear(c_in, self.model_cfg.NUM_OUTPUT_FEATURES, bias=False), nn.BatchNorm1d(self.model_cfg.NUM_OUTPUT_FEATURES), nn.ReLU(), ) self.num_point_features = self.model_cfg.NUM_OUTPUT_FEATURES self.num_point_features_before_fusion = c_in def interpolate_from_bev_features(self, points, bev_features, batch_size, bev_stride): point_bev_features_list = [] for k in range(batch_size): points_b = points[:,0] cur_batch_points = points[points_b==k] x_idxs = (cur_batch_points[:, 1] - self.point_cloud_range[0]) / self.voxel_size[0] y_idxs = (cur_batch_points[:, 2] - self.point_cloud_range[1]) / self.voxel_size[1] cur_x_idxs = x_idxs / bev_stride cur_y_idxs = y_idxs / bev_stride cur_bev_features = bev_features[k].permute(1, 2, 0) # (H, W, C) point_bev_features = bilinear_interpolate_torch(cur_bev_features, cur_x_idxs, cur_y_idxs) point_bev_features_list.append(point_bev_features) point_bev_features = torch.cat(point_bev_features_list, dim=0) # (B, N, C0) return point_bev_features def forward(self, batch_dict): for i in range(self.num_frames): if i==0: point_features_list = [] if 'temporal_features' in self.model_cfg.FEATURES_SOURCE: bev_features = batch_dict['temporal_features'] point_bev_features = self.interpolate_from_bev_features( batch_dict['points'], bev_features, batch_dict['batch_size'], bev_stride=batch_dict['spatial_features_stride'] ) point_features_list.append(point_bev_features) if 'spatial_features' in self.model_cfg.FEATURES_SOURCE: bev_features = batch_dict['spatial_features'] point_bev_features = self.interpolate_from_bev_features( batch_dict['points'], bev_features, batch_dict['batch_size'], bev_stride=batch_dict['spatial_features_stride'] ) point_features_list.append(point_bev_features) point_features = torch.cat(point_features_list, dim=-1) point_features = self.output_bev_features(point_features.view(-1, point_features.shape[-1])) batch_dict['point_features'] = point_features # (BxN, C) batch_dict['point_coords'] = batch_dict['points'][:,0:4] # (BxN, 4) elif 'points'+str(-i) in batch_dict: points = batch_dict['points'+str(-i)] point_features_list = [] if 'temporal_features' in self.model_cfg.FEATURES_SOURCE: bev_features = batch_dict['temporal_features'+str(-i)] point_bev_features = self.interpolate_from_bev_features( points, bev_features, batch_dict['batch_size'], bev_stride=batch_dict['spatial_features_stride'] ) point_features_list.append(point_bev_features) if 'spatial_features' in self.model_cfg.FEATURES_SOURCE: bev_features = batch_dict['spatial_features'+str(-i)] point_bev_features = self.interpolate_from_bev_features( points, bev_features, batch_dict['batch_size'], bev_stride=batch_dict['spatial_features_stride'] ) point_features_list.append(point_bev_features) point_features = torch.cat(point_features_list, dim=-1) point_features = self.output_bev_features(point_features.view(-1, point_features.shape[-1])) batch_dict['point_features'+str(-i)] = point_features # (BxN, C) batch_dict['point_coords'+str(-i)] = batch_dict['points'+str(-i)][:, 0:4] # (BxN, 4) return batch_dict ================================================ FILE: pcdet/models/backbones_3d/pfe/voxel_set_abstraction.py ================================================ import math import numpy as np import torch import torch.nn as nn from ....ops.pointnet2.pointnet2_stack import pointnet2_modules as pointnet2_stack_modules from ....ops.pointnet2.pointnet2_stack import pointnet2_utils as pointnet2_stack_utils from ....utils import common_utils def bilinear_interpolate_torch(im, x, y): """ Args: im: (H, W, C) [y, x] x: (N) y: (N) Returns: """ x0 = torch.floor(x).long() x1 = x0 + 1 y0 = torch.floor(y).long() y1 = y0 + 1 x0 = torch.clamp(x0, 0, im.shape[1] - 1) x1 = torch.clamp(x1, 0, im.shape[1] - 1) y0 = torch.clamp(y0, 0, im.shape[0] - 1) y1 = torch.clamp(y1, 0, im.shape[0] - 1) Ia = im[y0, x0] Ib = im[y1, x0] Ic = im[y0, x1] Id = im[y1, x1] wa = (x1.type_as(x) - x) * (y1.type_as(y) - y) wb = (x1.type_as(x) - x) * (y - y0.type_as(y)) wc = (x - x0.type_as(x)) * (y1.type_as(y) - y) wd = (x - x0.type_as(x)) * (y - y0.type_as(y)) ans = torch.t((torch.t(Ia) * wa)) + torch.t(torch.t(Ib) * wb) + torch.t(torch.t(Ic) * wc) + torch.t(torch.t(Id) * wd) return ans def sample_points_with_roi(rois, points, sample_radius_with_roi, num_max_points_of_part=200000): """ Args: rois: (M, 7 + C) points: (N, 3) sample_radius_with_roi: num_max_points_of_part: Returns: sampled_points: (N_out, 3) """ if points.shape[0] < num_max_points_of_part: distance = (points[:, None, :] - rois[None, :, 0:3]).norm(dim=-1) min_dis, min_dis_roi_idx = distance.min(dim=-1) roi_max_dim = (rois[min_dis_roi_idx, 3:6] / 2).norm(dim=-1) point_mask = min_dis < roi_max_dim + sample_radius_with_roi else: start_idx = 0 point_mask_list = [] while start_idx < points.shape[0]: distance = (points[start_idx:start_idx + num_max_points_of_part, None, :] - rois[None, :, 0:3]).norm(dim=-1) min_dis, min_dis_roi_idx = distance.min(dim=-1) roi_max_dim = (rois[min_dis_roi_idx, 3:6] / 2).norm(dim=-1) cur_point_mask = min_dis < roi_max_dim + sample_radius_with_roi point_mask_list.append(cur_point_mask) start_idx += num_max_points_of_part point_mask = torch.cat(point_mask_list, dim=0) sampled_points = points[:1] if point_mask.sum() == 0 else points[point_mask, :] return sampled_points, point_mask def sector_fps(points, num_sampled_points, num_sectors): """ Args: points: (N, 3) num_sampled_points: int num_sectors: int Returns: sampled_points: (N_out, 3) """ sector_size = np.pi * 2 / num_sectors point_angles = torch.atan2(points[:, 1], points[:, 0]) + np.pi sector_idx = (point_angles / sector_size).floor().clamp(min=0, max=num_sectors) xyz_points_list = [] xyz_batch_cnt = [] num_sampled_points_list = [] for k in range(num_sectors): mask = (sector_idx == k) cur_num_points = mask.sum().item() if cur_num_points > 0: xyz_points_list.append(points[mask]) xyz_batch_cnt.append(cur_num_points) ratio = cur_num_points / points.shape[0] num_sampled_points_list.append( min(cur_num_points, math.ceil(ratio * num_sampled_points)) ) if len(xyz_batch_cnt) == 0: xyz_points_list.append(points) xyz_batch_cnt.append(len(points)) num_sampled_points_list.append(num_sampled_points) print(f'Warning: empty sector points detected in SectorFPS: points.shape={points.shape}') xyz = torch.cat(xyz_points_list, dim=0) xyz_batch_cnt = torch.tensor(xyz_batch_cnt, device=points.device).int() sampled_points_batch_cnt = torch.tensor(num_sampled_points_list, device=points.device).int() sampled_pt_idxs = pointnet2_stack_utils.stack_farthest_point_sample( xyz.contiguous(), xyz_batch_cnt, sampled_points_batch_cnt ).long() sampled_points = xyz[sampled_pt_idxs] return sampled_points class VoxelSetAbstraction(nn.Module): def __init__(self, model_cfg, voxel_size, point_cloud_range, num_bev_features=None, num_rawpoint_features=None, **kwargs): super().__init__() self.model_cfg = model_cfg self.voxel_size = voxel_size self.point_cloud_range = point_cloud_range SA_cfg = self.model_cfg.SA_LAYER self.SA_layers = nn.ModuleList() self.SA_layer_names = [] self.downsample_times_map = {} c_in = 0 for src_name in self.model_cfg.FEATURES_SOURCE: if src_name in ['bev', 'raw_points']: continue self.downsample_times_map[src_name] = SA_cfg[src_name].DOWNSAMPLE_FACTOR if SA_cfg[src_name].get('INPUT_CHANNELS', None) is None: input_channels = SA_cfg[src_name].MLPS[0][0] \ if isinstance(SA_cfg[src_name].MLPS[0], list) else SA_cfg[src_name].MLPS[0] else: input_channels = SA_cfg[src_name]['INPUT_CHANNELS'] cur_layer, cur_num_c_out = pointnet2_stack_modules.build_local_aggregation_module( input_channels=input_channels, config=SA_cfg[src_name] ) self.SA_layers.append(cur_layer) self.SA_layer_names.append(src_name) c_in += cur_num_c_out if 'bev' in self.model_cfg.FEATURES_SOURCE: c_bev = num_bev_features c_in += c_bev if 'raw_points' in self.model_cfg.FEATURES_SOURCE: self.SA_rawpoints, cur_num_c_out = pointnet2_stack_modules.build_local_aggregation_module( input_channels=num_rawpoint_features - 3, config=SA_cfg['raw_points'] ) c_in += cur_num_c_out self.vsa_point_feature_fusion = nn.Sequential( nn.Linear(c_in, self.model_cfg.NUM_OUTPUT_FEATURES, bias=False), nn.BatchNorm1d(self.model_cfg.NUM_OUTPUT_FEATURES), nn.ReLU(), ) self.num_point_features = self.model_cfg.NUM_OUTPUT_FEATURES self.num_point_features_before_fusion = c_in def interpolate_from_bev_features(self, keypoints, bev_features, batch_size, bev_stride): """ Args: keypoints: (N1 + N2 + ..., 4) bev_features: (B, C, H, W) batch_size: bev_stride: Returns: point_bev_features: (N1 + N2 + ..., C) """ x_idxs = (keypoints[:, 1] - self.point_cloud_range[0]) / self.voxel_size[0] y_idxs = (keypoints[:, 2] - self.point_cloud_range[1]) / self.voxel_size[1] x_idxs = x_idxs / bev_stride y_idxs = y_idxs / bev_stride point_bev_features_list = [] for k in range(batch_size): bs_mask = (keypoints[:, 0] == k) cur_x_idxs = x_idxs[bs_mask] cur_y_idxs = y_idxs[bs_mask] cur_bev_features = bev_features[k].permute(1, 2, 0) # (H, W, C) point_bev_features = bilinear_interpolate_torch(cur_bev_features, cur_x_idxs, cur_y_idxs) point_bev_features_list.append(point_bev_features) point_bev_features = torch.cat(point_bev_features_list, dim=0) # (N1 + N2 + ..., C) return point_bev_features def sectorized_proposal_centric_sampling(self, roi_boxes, points): """ Args: roi_boxes: (M, 7 + C) points: (N, 3) Returns: sampled_points: (N_out, 3) """ sampled_points, _ = sample_points_with_roi( rois=roi_boxes, points=points, sample_radius_with_roi=self.model_cfg.SPC_SAMPLING.SAMPLE_RADIUS_WITH_ROI, num_max_points_of_part=self.model_cfg.SPC_SAMPLING.get('NUM_POINTS_OF_EACH_SAMPLE_PART', 200000) ) sampled_points = sector_fps( points=sampled_points, num_sampled_points=self.model_cfg.NUM_KEYPOINTS, num_sectors=self.model_cfg.SPC_SAMPLING.NUM_SECTORS ) return sampled_points def get_sampled_points(self, batch_dict): """ Args: batch_dict: Returns: keypoints: (N1 + N2 + ..., 4), where 4 indicates [bs_idx, x, y, z] """ batch_size = batch_dict['batch_size'] if self.model_cfg.POINT_SOURCE == 'raw_points': src_points = batch_dict['points'][:, 1:4] batch_indices = batch_dict['points'][:, 0].long() elif self.model_cfg.POINT_SOURCE == 'voxel_centers': src_points = common_utils.get_voxel_centers( batch_dict['voxel_coords'][:, 1:4], downsample_times=1, voxel_size=self.voxel_size, point_cloud_range=self.point_cloud_range ) batch_indices = batch_dict['voxel_coords'][:, 0].long() else: raise NotImplementedError keypoints_list = [] for bs_idx in range(batch_size): bs_mask = (batch_indices == bs_idx) sampled_points = src_points[bs_mask].unsqueeze(dim=0) # (1, N, 3) if self.model_cfg.SAMPLE_METHOD == 'FPS': cur_pt_idxs = pointnet2_stack_utils.farthest_point_sample( sampled_points[:, :, 0:3].contiguous(), self.model_cfg.NUM_KEYPOINTS ).long() if sampled_points.shape[1] < self.model_cfg.NUM_KEYPOINTS: times = int(self.model_cfg.NUM_KEYPOINTS / sampled_points.shape[1]) + 1 non_empty = cur_pt_idxs[0, :sampled_points.shape[1]] cur_pt_idxs[0] = non_empty.repeat(times)[:self.model_cfg.NUM_KEYPOINTS] keypoints = sampled_points[0][cur_pt_idxs[0]].unsqueeze(dim=0) elif self.model_cfg.SAMPLE_METHOD == 'SPC': cur_keypoints = self.sectorized_proposal_centric_sampling( roi_boxes=batch_dict['rois'][bs_idx], points=sampled_points[0] ) bs_idxs = cur_keypoints.new_ones(cur_keypoints.shape[0]) * bs_idx keypoints = torch.cat((bs_idxs[:, None], cur_keypoints), dim=1) else: raise NotImplementedError keypoints_list.append(keypoints) keypoints = torch.cat(keypoints_list, dim=0) # (B, M, 3) or (N1 + N2 + ..., 4) if len(keypoints.shape) == 3: batch_idx = torch.arange(batch_size, device=keypoints.device).view(-1, 1).repeat(1, keypoints.shape[1]).view(-1, 1) keypoints = torch.cat((batch_idx.float(), keypoints.view(-1, 3)), dim=1) return keypoints @staticmethod def aggregate_keypoint_features_from_one_source( batch_size, aggregate_func, xyz, xyz_features, xyz_bs_idxs, new_xyz, new_xyz_batch_cnt, filter_neighbors_with_roi=False, radius_of_neighbor=None, num_max_points_of_part=200000, rois=None ): """ Args: aggregate_func: xyz: (N, 3) xyz_features: (N, C) xyz_bs_idxs: (N) new_xyz: (M, 3) new_xyz_batch_cnt: (batch_size), [N1, N2, ...] filter_neighbors_with_roi: True/False radius_of_neighbor: float num_max_points_of_part: int rois: (batch_size, num_rois, 7 + C) Returns: """ xyz_batch_cnt = xyz.new_zeros(batch_size).int() if filter_neighbors_with_roi: point_features = torch.cat((xyz, xyz_features), dim=-1) if xyz_features is not None else xyz point_features_list = [] for bs_idx in range(batch_size): bs_mask = (xyz_bs_idxs == bs_idx) _, valid_mask = sample_points_with_roi( rois=rois[bs_idx], points=xyz[bs_mask], sample_radius_with_roi=radius_of_neighbor, num_max_points_of_part=num_max_points_of_part, ) point_features_list.append(point_features[bs_mask][valid_mask]) xyz_batch_cnt[bs_idx] = valid_mask.sum() valid_point_features = torch.cat(point_features_list, dim=0) xyz = valid_point_features[:, 0:3] xyz_features = valid_point_features[:, 3:] if xyz_features is not None else None else: for bs_idx in range(batch_size): xyz_batch_cnt[bs_idx] = (xyz_bs_idxs == bs_idx).sum() pooled_points, pooled_features = aggregate_func( xyz=xyz.contiguous(), xyz_batch_cnt=xyz_batch_cnt, new_xyz=new_xyz, new_xyz_batch_cnt=new_xyz_batch_cnt, features=xyz_features.contiguous(), ) return pooled_features def forward(self, batch_dict): """ Args: batch_dict: batch_size: keypoints: (B, num_keypoints, 3) multi_scale_3d_features: { 'x_conv4': ... } points: optional (N, 1 + 3 + C) [bs_idx, x, y, z, ...] spatial_features: optional spatial_features_stride: optional Returns: point_features: (N, C) point_coords: (N, 4) """ keypoints = self.get_sampled_points(batch_dict) point_features_list = [] if 'bev' in self.model_cfg.FEATURES_SOURCE: point_bev_features = self.interpolate_from_bev_features( keypoints, batch_dict['spatial_features'], batch_dict['batch_size'], bev_stride=batch_dict['spatial_features_stride'] ) point_features_list.append(point_bev_features) batch_size = batch_dict['batch_size'] new_xyz = keypoints[:, 1:4].contiguous() new_xyz_batch_cnt = new_xyz.new_zeros(batch_size).int() for k in range(batch_size): new_xyz_batch_cnt[k] = (keypoints[:, 0] == k).sum() if 'raw_points' in self.model_cfg.FEATURES_SOURCE: raw_points = batch_dict['points'] pooled_features = self.aggregate_keypoint_features_from_one_source( batch_size=batch_size, aggregate_func=self.SA_rawpoints, xyz=raw_points[:, 1:4], xyz_features=raw_points[:, 4:].contiguous() if raw_points.shape[1] > 4 else None, xyz_bs_idxs=raw_points[:, 0], new_xyz=new_xyz, new_xyz_batch_cnt=new_xyz_batch_cnt, filter_neighbors_with_roi=self.model_cfg.SA_LAYER['raw_points'].get('FILTER_NEIGHBOR_WITH_ROI', False), radius_of_neighbor=self.model_cfg.SA_LAYER['raw_points'].get('RADIUS_OF_NEIGHBOR_WITH_ROI', None), rois=batch_dict.get('rois', None) ) point_features_list.append(pooled_features) for k, src_name in enumerate(self.SA_layer_names): cur_coords = batch_dict['multi_scale_3d_features'][src_name].indices cur_features = batch_dict['multi_scale_3d_features'][src_name].features.contiguous() xyz = common_utils.get_voxel_centers( cur_coords[:, 1:4], downsample_times=self.downsample_times_map[src_name], voxel_size=self.voxel_size, point_cloud_range=self.point_cloud_range ) pooled_features = self.aggregate_keypoint_features_from_one_source( batch_size=batch_size, aggregate_func=self.SA_layers[k], xyz=xyz.contiguous(), xyz_features=cur_features, xyz_bs_idxs=cur_coords[:, 0], new_xyz=new_xyz, new_xyz_batch_cnt=new_xyz_batch_cnt, filter_neighbors_with_roi=self.model_cfg.SA_LAYER[src_name].get('FILTER_NEIGHBOR_WITH_ROI', False), radius_of_neighbor=self.model_cfg.SA_LAYER[src_name].get('RADIUS_OF_NEIGHBOR_WITH_ROI', None), rois=batch_dict.get('rois', None) ) point_features_list.append(pooled_features) point_features = torch.cat(point_features_list, dim=-1) point_features = point_features.view(-1, point_features.shape[-1]) batch_dict['point_features_before_fusion'] = point_features point_features = self.vsa_point_feature_fusion(point_features) batch_dict['point_features'] = point_features # (BxN, C) batch_dict['point_coords'] = keypoints # (BxN, 4) return batch_dict ================================================ FILE: pcdet/models/backbones_3d/pointnet2_backbone.py ================================================ import torch import torch.nn as nn from ...ops.pointnet2.pointnet2_batch import pointnet2_modules from ...ops.pointnet2.pointnet2_stack import pointnet2_modules as pointnet2_modules_stack from ...ops.pointnet2.pointnet2_stack import pointnet2_utils as pointnet2_utils_stack class PointNet2MSG(nn.Module): def __init__(self, model_cfg, input_channels, **kwargs): super().__init__() self.model_cfg = model_cfg self.SA_modules = nn.ModuleList() channel_in = input_channels - 3 self.num_points_each_layer = [] skip_channel_list = [input_channels - 3] for k in range(self.model_cfg.SA_CONFIG.NPOINTS.__len__()): mlps = self.model_cfg.SA_CONFIG.MLPS[k].copy() channel_out = 0 for idx in range(mlps.__len__()): mlps[idx] = [channel_in] + mlps[idx] channel_out += mlps[idx][-1] self.SA_modules.append( pointnet2_modules.PointnetSAModuleMSG( npoint=self.model_cfg.SA_CONFIG.NPOINTS[k], radii=self.model_cfg.SA_CONFIG.RADIUS[k], nsamples=self.model_cfg.SA_CONFIG.NSAMPLE[k], mlps=mlps, use_xyz=self.model_cfg.SA_CONFIG.get('USE_XYZ', True), ) ) skip_channel_list.append(channel_out) channel_in = channel_out self.FP_modules = nn.ModuleList() for k in range(self.model_cfg.FP_MLPS.__len__()): pre_channel = self.model_cfg.FP_MLPS[k + 1][-1] if k + 1 < len(self.model_cfg.FP_MLPS) else channel_out self.FP_modules.append( pointnet2_modules.PointnetFPModule( mlp=[pre_channel + skip_channel_list[k]] + self.model_cfg.FP_MLPS[k] ) ) self.num_point_features = self.model_cfg.FP_MLPS[0][-1] def break_up_pc(self, pc): batch_idx = pc[:, 0] xyz = pc[:, 1:4].contiguous() features = (pc[:, 4:].contiguous() if pc.size(-1) > 4 else None) return batch_idx, xyz, features def forward(self, batch_dict): """ Args: batch_dict: batch_size: int vfe_features: (num_voxels, C) points: (num_points, 4 + C), [batch_idx, x, y, z, ...] Returns: batch_dict: encoded_spconv_tensor: sparse tensor point_features: (N, C) """ batch_size = batch_dict['batch_size'] points = batch_dict['points'] batch_idx, xyz, features = self.break_up_pc(points) xyz_batch_cnt = xyz.new_zeros(batch_size).int() for bs_idx in range(batch_size): xyz_batch_cnt[bs_idx] = (batch_idx == bs_idx).sum() assert xyz_batch_cnt.min() == xyz_batch_cnt.max() xyz = xyz.view(batch_size, -1, 3) features = features.view(batch_size, -1, features.shape[-1]).permute(0, 2, 1) if features is not None else None l_xyz, l_features = [xyz], [features] for i in range(len(self.SA_modules)): li_xyz, li_features = self.SA_modules[i](l_xyz[i], l_features[i]) l_xyz.append(li_xyz) l_features.append(li_features) for i in range(-1, -(len(self.FP_modules) + 1), -1): l_features[i - 1] = self.FP_modules[i]( l_xyz[i - 1], l_xyz[i], l_features[i - 1], l_features[i] ) # (B, C, N) point_features = l_features[0].permute(0, 2, 1).contiguous() # (B, N, C) batch_dict['point_features'] = point_features.view(-1, point_features.shape[-1]) batch_dict['point_coords'] = torch.cat((batch_idx[:, None].float(), l_xyz[0].view(-1, 3)), dim=1) return batch_dict class PointNet2Backbone(nn.Module): """ DO NOT USE THIS CURRENTLY SINCE IT MAY HAVE POTENTIAL BUGS, 20200723 """ def __init__(self, model_cfg, input_channels, **kwargs): assert False, 'DO NOT USE THIS CURRENTLY SINCE IT MAY HAVE POTENTIAL BUGS, 20200723' super().__init__() self.model_cfg = model_cfg self.SA_modules = nn.ModuleList() channel_in = input_channels - 3 self.num_points_each_layer = [] skip_channel_list = [input_channels] for k in range(self.model_cfg.SA_CONFIG.NPOINTS.__len__()): self.num_points_each_layer.append(self.model_cfg.SA_CONFIG.NPOINTS[k]) mlps = self.model_cfg.SA_CONFIG.MLPS[k].copy() channel_out = 0 for idx in range(mlps.__len__()): mlps[idx] = [channel_in] + mlps[idx] channel_out += mlps[idx][-1] self.SA_modules.append( pointnet2_modules_stack.StackSAModuleMSG( radii=self.model_cfg.SA_CONFIG.RADIUS[k], nsamples=self.model_cfg.SA_CONFIG.NSAMPLE[k], mlps=mlps, use_xyz=self.model_cfg.SA_CONFIG.get('USE_XYZ', True), ) ) skip_channel_list.append(channel_out) channel_in = channel_out self.FP_modules = nn.ModuleList() for k in range(self.model_cfg.FP_MLPS.__len__()): pre_channel = self.model_cfg.FP_MLPS[k + 1][-1] if k + 1 < len(self.model_cfg.FP_MLPS) else channel_out self.FP_modules.append( pointnet2_modules_stack.StackPointnetFPModule( mlp=[pre_channel + skip_channel_list[k]] + self.model_cfg.FP_MLPS[k] ) ) self.num_point_features = self.model_cfg.FP_MLPS[0][-1] def break_up_pc(self, pc): batch_idx = pc[:, 0] xyz = pc[:, 1:4].contiguous() features = (pc[:, 4:].contiguous() if pc.size(-1) > 4 else None) return batch_idx, xyz, features def forward(self, batch_dict): """ Args: batch_dict: batch_size: int vfe_features: (num_voxels, C) points: (num_points, 4 + C), [batch_idx, x, y, z, ...] Returns: batch_dict: encoded_spconv_tensor: sparse tensor point_features: (N, C) """ batch_size = batch_dict['batch_size'] points = batch_dict['points'] batch_idx, xyz, features = self.break_up_pc(points) xyz_batch_cnt = xyz.new_zeros(batch_size).int() for bs_idx in range(batch_size): xyz_batch_cnt[bs_idx] = (batch_idx == bs_idx).sum() l_xyz, l_features, l_batch_cnt = [xyz], [features], [xyz_batch_cnt] for i in range(len(self.SA_modules)): new_xyz_list = [] for k in range(batch_size): if len(l_xyz) == 1: cur_xyz = l_xyz[0][batch_idx == k] else: last_num_points = self.num_points_each_layer[i - 1] cur_xyz = l_xyz[-1][k * last_num_points: (k + 1) * last_num_points] cur_pt_idxs = pointnet2_utils_stack.furthest_point_sample( cur_xyz[None, :, :].contiguous(), self.num_points_each_layer[i] ).long()[0] if cur_xyz.shape[0] < self.num_points_each_layer[i]: empty_num = self.num_points_each_layer[i] - cur_xyz.shape[1] cur_pt_idxs[0, -empty_num:] = cur_pt_idxs[0, :empty_num] new_xyz_list.append(cur_xyz[cur_pt_idxs]) new_xyz = torch.cat(new_xyz_list, dim=0) new_xyz_batch_cnt = xyz.new_zeros(batch_size).int().fill_(self.num_points_each_layer[i]) li_xyz, li_features = self.SA_modules[i]( xyz=l_xyz[i], features=l_features[i], xyz_batch_cnt=l_batch_cnt[i], new_xyz=new_xyz, new_xyz_batch_cnt=new_xyz_batch_cnt ) l_xyz.append(li_xyz) l_features.append(li_features) l_batch_cnt.append(new_xyz_batch_cnt) l_features[0] = points[:, 1:] for i in range(-1, -(len(self.FP_modules) + 1), -1): l_features[i - 1] = self.FP_modules[i]( unknown=l_xyz[i - 1], unknown_batch_cnt=l_batch_cnt[i - 1], known=l_xyz[i], known_batch_cnt=l_batch_cnt[i], unknown_feats=l_features[i - 1], known_feats=l_features[i] ) batch_dict['point_features'] = l_features[0] batch_dict['point_coords'] = torch.cat((batch_idx[:, None].float(), l_xyz[0]), dim=1) return batch_dict ================================================ FILE: pcdet/models/backbones_3d/spconv_backbone.py ================================================ from functools import partial from ...utils.spconv_utils import replace_feature, spconv import torch.nn as nn import numpy as np import torch def post_act_block(in_channels, out_channels, kernel_size, indice_key=None, stride=1, padding=0, conv_type='subm', norm_fn=None): if conv_type == 'subm': conv = spconv.SubMConv3d(in_channels, out_channels, kernel_size, bias=False, indice_key=indice_key) relu = nn.ReLU() elif conv_type == 'spconv': conv = spconv.SparseConv3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, bias=False, indice_key=indice_key) relu =nn.ReLU(inplace=True) elif conv_type == 'inverseconv': conv = spconv.SparseInverseConv3d(in_channels, out_channels, kernel_size, indice_key=indice_key, bias=False) relu = nn.ReLU() else: raise NotImplementedError m = spconv.SparseSequential( conv, norm_fn(out_channels), relu, ) return m class SparseBasicBlock(spconv.SparseModule): expansion = 1 def __init__(self, inplanes, planes, stride=1, norm_fn=None, downsample=None, indice_key=None): super(SparseBasicBlock, self).__init__() assert norm_fn is not None bias = norm_fn is not None self.conv1 = spconv.SubMConv3d( inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=bias, indice_key=indice_key ) self.bn1 = norm_fn(planes) self.relu = nn.ReLU() self.conv2 = spconv.SubMConv3d( planes, planes, kernel_size=3, stride=stride, padding=1, bias=bias, indice_key=indice_key ) self.bn2 = norm_fn(planes) self.downsample = downsample self.stride = stride def forward(self, x): identity = x out = self.conv1(x) out = replace_feature(out, self.bn1(out.features)) out = replace_feature(out, self.relu(out.features)) out = self.conv2(out) out = replace_feature(out, self.bn2(out.features)) if self.downsample is not None: identity = self.downsample(x) out = replace_feature(out, out.features + identity.features) out = replace_feature(out, self.relu(out.features)) return out class BasicBlock(spconv.SparseModule): def __init__(self, inplanes, planes, norm_fn=None, stride=2, padding=1, indice_key=None): super(BasicBlock, self).__init__() assert norm_fn is not None block = post_act_block self.stride = stride if stride >1: self.down_conv = block(inplanes, planes, 3, norm_fn=norm_fn, stride=2, padding=padding, indice_key=('sp' + indice_key), conv_type='spconv') if stride >1: conv_in = planes else: conv_in = inplanes self.conv1 = block(conv_in, planes // 2, 3, norm_fn=norm_fn, padding=1, indice_key=('subm1' + indice_key)) self.conv2 = block(planes//2, planes // 2, 3, norm_fn=norm_fn, padding=1, indice_key=('subm2' + indice_key)) self.conv3 = block(planes//2, planes // 2, 3, norm_fn=norm_fn, padding=1, indice_key=('subm3' + indice_key)) self.conv4 = block(planes//2, planes // 2, 3, norm_fn=norm_fn, padding=1, indice_key=('subm4' + indice_key)) def forward(self, x): if self.stride>1: x = self.down_conv(x) x1 = self.conv1(x) x2 = self.conv2(x1) x3 = self.conv3(x2) x4 = self.conv4(x3) out = replace_feature(x2, torch.cat([x1.features, x4.features],-1)) return out class TeMMVoxelBackBone8x(nn.Module): def __init__(self, model_cfg, input_channels, grid_size, **kwargs): super().__init__() self.model_cfg = model_cfg self.return_num_features_as_dict = model_cfg.RETURN_NUM_FEATURES_AS_DICT self.out_features=model_cfg.OUT_FEATURES num_filters = model_cfg.NUM_FILTERS norm_fn = partial(nn.BatchNorm1d, eps=1e-3, momentum=0.01) self.sparse_shape = grid_size[::-1] + [1, 0, 0] self.conv_input = spconv.SparseSequential( spconv.SubMConv3d(input_channels, num_filters[0], 3, padding=1, bias=False, indice_key='subm1'), norm_fn(num_filters[0]), nn.ReLU(), ) block = post_act_block self.conv1 = spconv.SparseSequential( block(num_filters[0], num_filters[0], 3, norm_fn=norm_fn, padding=1, indice_key='subm1'), ) self.conv2 = spconv.SparseSequential( # [1600, 1408, 41] <- [800, 704, 21] block(num_filters[0], num_filters[1], 3, norm_fn=norm_fn, stride=2, padding=1, indice_key='spconv2', conv_type='spconv'), block(num_filters[1], num_filters[1], 3, norm_fn=norm_fn, padding=1, indice_key='subm2'), block(num_filters[1], num_filters[1], 3, norm_fn=norm_fn, padding=1, indice_key='subm2'), ) self.conv3 = spconv.SparseSequential( # [800, 704, 21] <- [400, 352, 11] block(num_filters[1], num_filters[2], 3, norm_fn=norm_fn, stride=2, padding=1, indice_key='spconv3', conv_type='spconv'), block(num_filters[2], num_filters[2], 3, norm_fn=norm_fn, padding=1, indice_key='subm3'), block(num_filters[2], num_filters[2], 3, norm_fn=norm_fn, padding=1, indice_key='subm3'), ) self.conv4 = spconv.SparseSequential( # [400, 352, 11] <- [200, 176, 5] block(num_filters[2], num_filters[3], 3, norm_fn=norm_fn, stride=2, padding=(0, 1, 1), indice_key='spconv4', conv_type='spconv'), block(num_filters[3], num_filters[3], 3, norm_fn=norm_fn, padding=1, indice_key='subm4'), block(num_filters[3], num_filters[3], 3, norm_fn=norm_fn, padding=1, indice_key='subm4'), ) last_pad = 0 last_pad = self.model_cfg.get('last_pad', last_pad) self.conv_out = spconv.SparseSequential( # [200, 150, 5] -> [200, 150, 2] spconv.SparseConv3d(num_filters[3], self.out_features, (3, 1, 1), stride=(2, 1, 1), padding=last_pad, bias=False, indice_key='spconv_down2'), norm_fn(self.out_features), nn.ReLU(), ) if self.model_cfg.get('MM', False): self.conv_input_2 = spconv.SparseSequential( spconv.SubMConv3d(input_channels, num_filters[0], 3, padding=1, bias=False, indice_key='subm1_2'), norm_fn(num_filters[0]), nn.ReLU(), ) block = post_act_block self.conv1_2 = spconv.SparseSequential( block(num_filters[0], num_filters[0], 3, norm_fn=norm_fn, padding=1, indice_key='subm1_2'), ) self.conv2_2 = spconv.SparseSequential( # [1600, 1408, 41] <- [800, 704, 21] block(num_filters[0], num_filters[1], 3, norm_fn=norm_fn, stride=2, padding=1, indice_key='spconv2_2', conv_type='spconv'), block(num_filters[1], num_filters[1], 3, norm_fn=norm_fn, padding=1, indice_key='subm2_2'), block(num_filters[1], num_filters[1], 3, norm_fn=norm_fn, padding=1, indice_key='subm2_2'), ) self.conv3_2 = spconv.SparseSequential( # [800, 704, 21] <- [400, 352, 11] block(num_filters[1], num_filters[2], 3, norm_fn=norm_fn, stride=2, padding=1, indice_key='spconv3_2', conv_type='spconv'), block(num_filters[2], num_filters[2], 3, norm_fn=norm_fn, padding=1, indice_key='subm3_2'), block(num_filters[2], num_filters[2], 3, norm_fn=norm_fn, padding=1, indice_key='subm3_2'), ) self.conv4_2 = spconv.SparseSequential( # [400, 352, 11] <- [200, 176, 5] block(num_filters[2], num_filters[3], 3, norm_fn=norm_fn, stride=2, padding=(0, 1, 1), indice_key='spconv4_2', conv_type='spconv'), block(num_filters[3], num_filters[3], 3, norm_fn=norm_fn, padding=1, indice_key='subm4_2'), block(num_filters[3], num_filters[3], 3, norm_fn=norm_fn, padding=1, indice_key='subm4_2'), ) self.num_point_features = self.out_features if self.return_num_features_as_dict: num_point_features = {} num_point_features.update({ 'x_conv1': num_filters[0], 'x_conv2': num_filters[1], 'x_conv3': num_filters[2], 'x_conv4': num_filters[3], }) self.num_point_features = num_point_features def decompose_tensor(self, tensor, i, batch_size): input_shape = tensor.spatial_shape[2] begin_shape_ids = i * (input_shape // 4) end_shape_ids = (i + 1) * (input_shape // 4) x_conv3_features = tensor.features x_conv3_coords = tensor.indices mask = (begin_shape_ids < x_conv3_coords[:, 3]) & (x_conv3_coords[:, 3] < end_shape_ids) this_conv3_feat = x_conv3_features[mask] this_conv3_coords = x_conv3_coords[mask] this_conv3_coords[:, 3] -= i * (input_shape // 4) this_shape = [tensor.spatial_shape[0], tensor.spatial_shape[1], tensor.spatial_shape[2] // 4] this_conv3_tensor = spconv.SparseConvTensor( features=this_conv3_feat, indices=this_conv3_coords.int(), spatial_shape=this_shape, batch_size=batch_size ) return this_conv3_tensor def forward_test(self, batch_dict): """ Args: batch_dict: batch_size: int vfe_features: (num_voxels, C) voxel_coords: (num_voxels, 4), [batch_idx, z_idx, y_idx, x_idx] Returns: batch_dict: encoded_spconv_tensor: sparse tensor """ if 'transform_param' in batch_dict: trans_param = batch_dict['transform_param'] rot_num = trans_param.shape[1] else: rot_num = 1 all_lidar_feat = [] all_lidar_coords = [] new_shape = [self.sparse_shape[0], self.sparse_shape[1], self.sparse_shape[2] * 4] for i in range(rot_num): if i==0: rot_num_id = '' else: rot_num_id = str(i) voxel_features, voxel_coords = batch_dict['voxel_features'+rot_num_id], batch_dict['voxel_coords'+rot_num_id] all_lidar_feat.append(voxel_features) new_coord = voxel_coords.clone() new_coord[:, 3] += i*self.sparse_shape[2] all_lidar_coords.append(new_coord) batch_size = batch_dict['batch_size'] all_lidar_feat = torch.cat(all_lidar_feat, 0) all_lidar_coords = torch.cat(all_lidar_coords) input_sp_tensor = spconv.SparseConvTensor( features=all_lidar_feat, indices=all_lidar_coords.int(), spatial_shape=new_shape, batch_size=batch_size ) x = self.conv_input(input_sp_tensor) x_conv1 = self.conv1(x) x_conv2 = self.conv2(x_conv1) x_conv3 = self.conv3(x_conv2) x_conv4 = self.conv4(x_conv3) out = self.conv_out(x_conv4) for i in range(rot_num): if i==0: rot_num_id = '' else: rot_num_id = str(i) this_conv3 = self.decompose_tensor(x_conv3, i, batch_size) this_conv4 = self.decompose_tensor(x_conv4, i, batch_size) this_out = self.decompose_tensor(out, i, batch_size) batch_dict.update({ 'encoded_spconv_tensor'+rot_num_id: this_out, 'encoded_spconv_tensor_stride'+rot_num_id: 8, }) batch_dict.update({ 'multi_scale_3d_features'+rot_num_id: { 'x_conv1': None, 'x_conv2': None, 'x_conv3': this_conv3, 'x_conv4': this_conv4, }, 'multi_scale_3d_strides'+rot_num_id: { 'x_conv1': 1, 'x_conv2': 2, 'x_conv3': 4, 'x_conv4': 8, } }) if self.model_cfg.get('MM', False): all_mm_feat = [] all_mm_coords = [] for i in range(rot_num): if i == 0: rot_num_id = '' else: rot_num_id = str(i) newvoxel_features, newvoxel_coords = batch_dict['voxel_features_mm'+rot_num_id], batch_dict['voxel_coords_mm'+rot_num_id] all_mm_feat.append(newvoxel_features) new_mm_coord = newvoxel_coords.clone() new_mm_coord[:, 3] += i * self.sparse_shape[2] all_mm_coords.append(new_mm_coord) all_mm_feat = torch.cat(all_mm_feat, 0) all_mm_coords = torch.cat(all_mm_coords) newinput_sp_tensor = spconv.SparseConvTensor( features=all_mm_feat, indices=all_mm_coords.int(), spatial_shape=new_shape, batch_size=batch_size ) newx = self.conv_input_2(newinput_sp_tensor) newx_conv1 = self.conv1_2(newx) newx_conv2 = self.conv2_2(newx_conv1) newx_conv3 = self.conv3_2(newx_conv2) newx_conv4 = self.conv4_2(newx_conv3) for i in range(rot_num): if i == 0: rot_num_id = '' else: rot_num_id = str(i) this_conv3 = self.decompose_tensor(newx_conv3, i, batch_size) this_conv4 = self.decompose_tensor(newx_conv4, i, batch_size) batch_dict.update({ 'encoded_spconv_tensor_stride_mm'+rot_num_id: 8 }) batch_dict.update({ 'multi_scale_3d_features_mm'+rot_num_id: { 'x_conv1': None, 'x_conv2': None, 'x_conv3': this_conv3, 'x_conv4': this_conv4, }, 'multi_scale_3d_strides'+rot_num_id: { 'x_conv1': 1, 'x_conv2': 2, 'x_conv3': 4, 'x_conv4': 8, } }) return batch_dict def forward_train(self, batch_dict): """ Args: batch_dict: batch_size: int vfe_features: (num_voxels, C) voxel_coords: (num_voxels, 4), [batch_idx, z_idx, y_idx, x_idx] Returns: batch_dict: encoded_spconv_tensor: sparse tensor """ if 'transform_param' in batch_dict: trans_param = batch_dict['transform_param'] rot_num = trans_param.shape[1] else: rot_num = 1 for i in range(rot_num): if i==0: rot_num_id = '' else: rot_num_id = str(i) voxel_features, voxel_coords = batch_dict['voxel_features'+rot_num_id], batch_dict['voxel_coords'+rot_num_id] batch_size = batch_dict['batch_size'] input_sp_tensor = spconv.SparseConvTensor( features=voxel_features, indices=voxel_coords.int(), spatial_shape=self.sparse_shape, batch_size=batch_size ) x = self.conv_input(input_sp_tensor) x_conv1 = self.conv1(x) x_conv2 = self.conv2(x_conv1) x_conv3 = self.conv3(x_conv2) x_conv4 = self.conv4(x_conv3) # for detection head # [200, 176, 5] -> [200, 176, 2] out = self.conv_out(x_conv4) batch_dict.update({ 'encoded_spconv_tensor'+rot_num_id: out, 'encoded_spconv_tensor_stride'+rot_num_id: 8, }) batch_dict.update({ 'multi_scale_3d_features'+rot_num_id: { 'x_conv1': x_conv1, 'x_conv2': x_conv2, 'x_conv3': x_conv3, 'x_conv4': x_conv4, }, 'multi_scale_3d_strides'+rot_num_id: { 'x_conv1': 1, 'x_conv2': 2, 'x_conv3': 4, 'x_conv4': 8, } }) if self.model_cfg.get('MM', False): newvoxel_features, newvoxel_coords = batch_dict['voxel_features_mm'+rot_num_id], batch_dict['voxel_coords_mm'+rot_num_id] newinput_sp_tensor = spconv.SparseConvTensor( features=newvoxel_features, indices=newvoxel_coords.int(), spatial_shape=self.sparse_shape, batch_size=batch_size ) newx = self.conv_input_2(newinput_sp_tensor) newx_conv1 = self.conv1_2(newx) newx_conv2 = self.conv2_2(newx_conv1) newx_conv3 = self.conv3_2(newx_conv2) newx_conv4 = self.conv4_2(newx_conv3) # for detection head # [200, 176, 5] -> [200, 176, 2] #newout = self.conv_out(newx_conv4) batch_dict.update({ #'encoded_spconv_tensor_mm': newout, 'encoded_spconv_tensor_stride_mm'+rot_num_id: 8 }) batch_dict.update({ 'multi_scale_3d_features_mm'+rot_num_id: { 'x_conv1': newx_conv1, 'x_conv2': newx_conv2, 'x_conv3': newx_conv3, 'x_conv4': newx_conv4, }, 'multi_scale_3d_strides'+rot_num_id: { 'x_conv1': 1, 'x_conv2': 2, 'x_conv3': 4, 'x_conv4': 8, } }) return batch_dict def forward(self, batch_dict): if self.training: return self.forward_train(batch_dict) else: return self.forward_test(batch_dict) class TeVoxelBackBone8x(nn.Module): def __init__(self, model_cfg, input_channels, grid_size, **kwargs): super().__init__() self.model_cfg = model_cfg self.return_num_features_as_dict = model_cfg.RETURN_NUM_FEATURES_AS_DICT self.out_features=model_cfg.OUT_FEATURES num_filters = model_cfg.NUM_FILTERS norm_fn = partial(nn.BatchNorm1d, eps=1e-3, momentum=0.01) self.sparse_shape = grid_size[::-1] + [1, 0, 0] self.conv_input = spconv.SparseSequential( spconv.SubMConv3d(input_channels, num_filters[0], 3, padding=1, bias=False, indice_key='subm1'), norm_fn(num_filters[0]), nn.ReLU(), ) block = post_act_block self.conv1 = spconv.SparseSequential( block(num_filters[0], num_filters[0], 3, norm_fn=norm_fn, padding=1, indice_key='conv1'), ) self.conv2 = BasicBlock(num_filters[0], num_filters[1], norm_fn=norm_fn, indice_key='conv2') self.conv3 = BasicBlock(num_filters[1], num_filters[2], norm_fn=norm_fn, indice_key='conv3') self.conv4 = BasicBlock(num_filters[2], num_filters[3], norm_fn=norm_fn, padding=(0, 1, 1), indice_key='conv4') last_pad = 0 last_pad = self.model_cfg.get('last_pad', last_pad) self.conv_out = spconv.SparseSequential( # [200, 150, 5] -> [200, 150, 2] spconv.SparseConv3d(num_filters[3], self.out_features, (3, 1, 1), stride=(2, 1, 1), padding=last_pad, bias=False, indice_key='spconv_down2'), norm_fn(self.out_features), nn.ReLU(), ) if self.model_cfg.get('MM', False): self.conv_input_2 = spconv.SparseSequential( spconv.SubMConv3d(input_channels, num_filters[0], 3, padding=1, bias=False, indice_key='subm1_2'), norm_fn(num_filters[0]), nn.ReLU(), ) self.conv1_2 = spconv.SparseSequential( block(num_filters[0], num_filters[0], 3, norm_fn=norm_fn, padding=1, indice_key='conv1_2'), ) self.conv2_2 = BasicBlock(num_filters[0], num_filters[1], norm_fn=norm_fn, indice_key='conv2_2') self.conv3_2 = BasicBlock(num_filters[1], num_filters[2], norm_fn=norm_fn, indice_key='conv3_2') self.conv4_2 = BasicBlock(num_filters[2], num_filters[3], norm_fn=norm_fn, padding=(0, 1, 1), indice_key='conv4_2') self.num_point_features = self.out_features if self.return_num_features_as_dict: num_point_features = {} num_point_features.update({ 'x_conv1': num_filters[0], 'x_conv2': num_filters[1], 'x_conv3': num_filters[2], 'x_conv4': num_filters[3], }) self.num_point_features = num_point_features def decompose_tensor(self, tensor, i, batch_size): input_shape = tensor.spatial_shape[2] begin_shape_ids = i * (input_shape // 4) end_shape_ids = (i + 1) * (input_shape // 4) x_conv3_features = tensor.features x_conv3_coords = tensor.indices mask = (begin_shape_ids < x_conv3_coords[:, 3]) & (x_conv3_coords[:, 3] < end_shape_ids) this_conv3_feat = x_conv3_features[mask] this_conv3_coords = x_conv3_coords[mask] this_conv3_coords[:, 3] -= i * (input_shape // 4) this_shape = [tensor.spatial_shape[0], tensor.spatial_shape[1], tensor.spatial_shape[2] // 4] this_conv3_tensor = spconv.SparseConvTensor( features=this_conv3_feat, indices=this_conv3_coords.int(), spatial_shape=this_shape, batch_size=batch_size ) return this_conv3_tensor def forward_test(self, batch_dict): """ Args: batch_dict: batch_size: int vfe_features: (num_voxels, C) voxel_coords: (num_voxels, 4), [batch_idx, z_idx, y_idx, x_idx] Returns: batch_dict: encoded_spconv_tensor: sparse tensor """ if 'transform_param' in batch_dict: trans_param = batch_dict['transform_param'] rot_num = trans_param.shape[1] else: rot_num = 1 all_lidar_feat = [] all_lidar_coords = [] new_shape = [self.sparse_shape[0], self.sparse_shape[1], self.sparse_shape[2] * 4] for i in range(rot_num): if i==0: rot_num_id = '' else: rot_num_id = str(i) voxel_features, voxel_coords = batch_dict['voxel_features'+rot_num_id], batch_dict['voxel_coords'+rot_num_id] all_lidar_feat.append(voxel_features) new_coord = voxel_coords.clone() new_coord[:, 3] += i*self.sparse_shape[2] all_lidar_coords.append(new_coord) batch_size = batch_dict['batch_size'] all_lidar_feat = torch.cat(all_lidar_feat, 0) all_lidar_coords = torch.cat(all_lidar_coords) input_sp_tensor = spconv.SparseConvTensor( features=all_lidar_feat, indices=all_lidar_coords.int(), spatial_shape=new_shape, batch_size=batch_size ) x = self.conv_input(input_sp_tensor) x_conv1 = self.conv1(x) x_conv2 = self.conv2(x_conv1) x_conv3 = self.conv3(x_conv2) x_conv4 = self.conv4(x_conv3) out = self.conv_out(x_conv4) for i in range(rot_num): if i==0: rot_num_id = '' else: rot_num_id = str(i) this_conv3 = self.decompose_tensor(x_conv3, i, batch_size) this_conv4 = self.decompose_tensor(x_conv4, i, batch_size) this_out = self.decompose_tensor(out, i, batch_size) batch_dict.update({ 'encoded_spconv_tensor'+rot_num_id: this_out, 'encoded_spconv_tensor_stride'+rot_num_id: 8, }) batch_dict.update({ 'multi_scale_3d_features'+rot_num_id: { 'x_conv1': None, 'x_conv2': None, 'x_conv3': this_conv3, 'x_conv4': this_conv4, }, 'multi_scale_3d_strides'+rot_num_id: { 'x_conv1': 1, 'x_conv2': 2, 'x_conv3': 4, 'x_conv4': 8, } }) if self.model_cfg.get('MM', False): all_mm_feat = [] all_mm_coords = [] for i in range(rot_num): if i == 0: rot_num_id = '' else: rot_num_id = str(i) newvoxel_features, newvoxel_coords = batch_dict['voxel_features_mm'+rot_num_id], batch_dict['voxel_coords_mm'+rot_num_id] all_mm_feat.append(newvoxel_features) new_mm_coord = newvoxel_coords.clone() new_mm_coord[:, 3] += i * self.sparse_shape[2] all_mm_coords.append(new_mm_coord) all_mm_feat = torch.cat(all_mm_feat, 0) all_mm_coords = torch.cat(all_mm_coords) newinput_sp_tensor = spconv.SparseConvTensor( features=all_mm_feat, indices=all_mm_coords.int(), spatial_shape=new_shape, batch_size=batch_size ) newx = self.conv_input_2(newinput_sp_tensor) newx_conv1 = self.conv1_2(newx) newx_conv2 = self.conv2_2(newx_conv1) newx_conv3 = self.conv3_2(newx_conv2) newx_conv4 = self.conv4_2(newx_conv3) for i in range(rot_num): if i == 0: rot_num_id = '' else: rot_num_id = str(i) this_conv3 = self.decompose_tensor(newx_conv3, i, batch_size) this_conv4 = self.decompose_tensor(newx_conv4, i, batch_size) batch_dict.update({ 'encoded_spconv_tensor_stride_mm'+rot_num_id: 8 }) batch_dict.update({ 'multi_scale_3d_features_mm'+rot_num_id: { 'x_conv1': None, 'x_conv2': None, 'x_conv3': this_conv3, 'x_conv4': this_conv4, }, 'multi_scale_3d_strides'+rot_num_id: { 'x_conv1': 1, 'x_conv2': 2, 'x_conv3': 4, 'x_conv4': 8, } }) return batch_dict def forward_train(self, batch_dict): """ Args: batch_dict: batch_size: int vfe_features: (num_voxels, C) voxel_coords: (num_voxels, 4), [batch_idx, z_idx, y_idx, x_idx] Returns: batch_dict: encoded_spconv_tensor: sparse tensor """ if 'transform_param' in batch_dict: trans_param = batch_dict['transform_param'] rot_num = trans_param.shape[1] else: rot_num = 1 for i in range(rot_num): if i==0: rot_num_id = '' else: rot_num_id = str(i) voxel_features, voxel_coords = batch_dict['voxel_features'+rot_num_id], batch_dict['voxel_coords'+rot_num_id] batch_size = batch_dict['batch_size'] input_sp_tensor = spconv.SparseConvTensor( features=voxel_features, indices=voxel_coords.int(), spatial_shape=self.sparse_shape, batch_size=batch_size ) x = self.conv_input(input_sp_tensor) x_conv1 = self.conv1(x) x_conv2 = self.conv2(x_conv1) x_conv3 = self.conv3(x_conv2) x_conv4 = self.conv4(x_conv3) # for detection head # [200, 176, 5] -> [200, 176, 2] out = self.conv_out(x_conv4) batch_dict.update({ 'encoded_spconv_tensor'+rot_num_id: out, 'encoded_spconv_tensor_stride'+rot_num_id: 8, }) batch_dict.update({ 'multi_scale_3d_features'+rot_num_id: { 'x_conv1': x_conv1, 'x_conv2': x_conv2, 'x_conv3': x_conv3, 'x_conv4': x_conv4, }, 'multi_scale_3d_strides'+rot_num_id: { 'x_conv1': 1, 'x_conv2': 2, 'x_conv3': 4, 'x_conv4': 8, } }) if self.model_cfg.get('MM', False): newvoxel_features, newvoxel_coords = batch_dict['voxel_features_mm'+rot_num_id], batch_dict['voxel_coords_mm'+rot_num_id] newinput_sp_tensor = spconv.SparseConvTensor( features=newvoxel_features, indices=newvoxel_coords.int(), spatial_shape=self.sparse_shape, batch_size=batch_size ) newx = self.conv_input_2(newinput_sp_tensor) newx_conv1 = self.conv1_2(newx) newx_conv2 = self.conv2_2(newx_conv1) newx_conv3 = self.conv3_2(newx_conv2) newx_conv4 = self.conv4_2(newx_conv3) # for detection head # [200, 176, 5] -> [200, 176, 2] #newout = self.conv_out(newx_conv4) batch_dict.update({ #'encoded_spconv_tensor_mm': newout, 'encoded_spconv_tensor_stride_mm'+rot_num_id: 8 }) batch_dict.update({ 'multi_scale_3d_features_mm'+rot_num_id: { 'x_conv1': newx_conv1, 'x_conv2': newx_conv2, 'x_conv3': newx_conv3, 'x_conv4': newx_conv4, }, 'multi_scale_3d_strides'+rot_num_id: { 'x_conv1': 1, 'x_conv2': 2, 'x_conv3': 4, 'x_conv4': 8, } }) return batch_dict def forward(self, batch_dict): if self.training: return self.forward_train(batch_dict) else: return self.forward_test(batch_dict) ================================================ FILE: pcdet/models/backbones_3d/spconv_unet.py ================================================ from functools import partial import spconv import torch import torch.nn as nn from ...utils import common_utils from .spconv_backbone import post_act_block class SparseBasicBlock(spconv.SparseModule): expansion = 1 def __init__(self, inplanes, planes, stride=1, downsample=None, indice_key=None, norm_fn=None): super(SparseBasicBlock, self).__init__() self.conv1 = spconv.SubMConv3d( inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False, indice_key=indice_key ) self.bn1 = norm_fn(planes) self.relu = nn.ReLU() self.conv2 = spconv.SubMConv3d( planes, planes, kernel_size=3, stride=1, padding=1, bias=False, indice_key=indice_key ) self.bn2 = norm_fn(planes) self.downsample = downsample self.stride = stride def forward(self, x): identity = x.features assert x.features.dim() == 2, 'x.features.dim()=%d' % x.features.dim() out = self.conv1(x) out.features = self.bn1(out.features) out.features = self.relu(out.features) out = self.conv2(out) out.features = self.bn2(out.features) if self.downsample is not None: identity = self.downsample(x) out.features += identity out.features = self.relu(out.features) return out class UNetV2(nn.Module): """ Sparse Convolution based UNet for point-wise feature learning. Reference Paper: https://arxiv.org/abs/1907.03670 (Shaoshuai Shi, et. al) From Points to Parts: 3D Object Detection from Point Cloud with Part-aware and Part-aggregation Network """ def __init__(self, model_cfg, input_channels, grid_size, voxel_size, point_cloud_range, **kwargs): super().__init__() self.model_cfg = model_cfg self.sparse_shape = grid_size[::-1] + [1, 0, 0] self.voxel_size = voxel_size self.point_cloud_range = point_cloud_range norm_fn = partial(nn.BatchNorm1d, eps=1e-3, momentum=0.01) self.conv_input = spconv.SparseSequential( spconv.SubMConv3d(input_channels, 16, 3, padding=1, bias=False, indice_key='subm1'), norm_fn(16), nn.ReLU(), ) block = post_act_block self.conv1 = spconv.SparseSequential( block(16, 16, 3, norm_fn=norm_fn, padding=1, indice_key='subm1'), ) self.conv2 = spconv.SparseSequential( # [1600, 1408, 41] <- [800, 704, 21] block(16, 32, 3, norm_fn=norm_fn, stride=2, padding=1, indice_key='spconv2', conv_type='spconv'), block(32, 32, 3, norm_fn=norm_fn, padding=1, indice_key='subm2'), block(32, 32, 3, norm_fn=norm_fn, padding=1, indice_key='subm2'), ) self.conv3 = spconv.SparseSequential( # [800, 704, 21] <- [400, 352, 11] block(32, 64, 3, norm_fn=norm_fn, stride=2, padding=1, indice_key='spconv3', conv_type='spconv'), block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm3'), block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm3'), ) self.conv4 = spconv.SparseSequential( # [400, 352, 11] <- [200, 176, 5] block(64, 64, 3, norm_fn=norm_fn, stride=2, padding=(0, 1, 1), indice_key='spconv4', conv_type='spconv'), block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm4'), block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm4'), ) if self.model_cfg.get('RETURN_ENCODED_TENSOR', True): last_pad = self.model_cfg.get('last_pad', 0) self.conv_out = spconv.SparseSequential( # [200, 150, 5] -> [200, 150, 2] spconv.SparseConv3d(64, 128, (3, 1, 1), stride=(2, 1, 1), padding=last_pad, bias=False, indice_key='spconv_down2'), norm_fn(128), nn.ReLU(), ) else: self.conv_out = None # decoder # [400, 352, 11] <- [200, 176, 5] self.conv_up_t4 = SparseBasicBlock(64, 64, indice_key='subm4', norm_fn=norm_fn) self.conv_up_m4 = block(128, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm4') self.inv_conv4 = block(64, 64, 3, norm_fn=norm_fn, indice_key='spconv4', conv_type='inverseconv') # [800, 704, 21] <- [400, 352, 11] self.conv_up_t3 = SparseBasicBlock(64, 64, indice_key='subm3', norm_fn=norm_fn) self.conv_up_m3 = block(128, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm3') self.inv_conv3 = block(64, 32, 3, norm_fn=norm_fn, indice_key='spconv3', conv_type='inverseconv') # [1600, 1408, 41] <- [800, 704, 21] self.conv_up_t2 = SparseBasicBlock(32, 32, indice_key='subm2', norm_fn=norm_fn) self.conv_up_m2 = block(64, 32, 3, norm_fn=norm_fn, indice_key='subm2') self.inv_conv2 = block(32, 16, 3, norm_fn=norm_fn, indice_key='spconv2', conv_type='inverseconv') # [1600, 1408, 41] <- [1600, 1408, 41] self.conv_up_t1 = SparseBasicBlock(16, 16, indice_key='subm1', norm_fn=norm_fn) self.conv_up_m1 = block(32, 16, 3, norm_fn=norm_fn, indice_key='subm1') self.conv5 = spconv.SparseSequential( block(16, 16, 3, norm_fn=norm_fn, padding=1, indice_key='subm1') ) self.num_point_features = 16 def UR_block_forward(self, x_lateral, x_bottom, conv_t, conv_m, conv_inv): x_trans = conv_t(x_lateral) x = x_trans x.features = torch.cat((x_bottom.features, x_trans.features), dim=1) x_m = conv_m(x) x = self.channel_reduction(x, x_m.features.shape[1]) x.features = x_m.features + x.features x = conv_inv(x) return x @staticmethod def channel_reduction(x, out_channels): """ Args: x: x.features (N, C1) out_channels: C2 Returns: """ features = x.features n, in_channels = features.shape assert (in_channels % out_channels == 0) and (in_channels >= out_channels) x.features = features.view(n, out_channels, -1).sum(dim=2) return x def forward(self, batch_dict): """ Args: batch_dict: batch_size: int vfe_features: (num_voxels, C) voxel_coords: (num_voxels, 4), [batch_idx, z_idx, y_idx, x_idx] Returns: batch_dict: encoded_spconv_tensor: sparse tensor point_features: (N, C) """ voxel_features, voxel_coords = batch_dict['voxel_features'], batch_dict['voxel_coords'] batch_size = batch_dict['batch_size'] input_sp_tensor = spconv.SparseConvTensor( features=voxel_features, indices=voxel_coords.int(), spatial_shape=self.sparse_shape, batch_size=batch_size ) x = self.conv_input(input_sp_tensor) x_conv1 = self.conv1(x) x_conv2 = self.conv2(x_conv1) x_conv3 = self.conv3(x_conv2) x_conv4 = self.conv4(x_conv3) if self.conv_out is not None: # for detection head # [200, 176, 5] -> [200, 176, 2] out = self.conv_out(x_conv4) batch_dict['encoded_spconv_tensor'] = out batch_dict['encoded_spconv_tensor_stride'] = 8 # for segmentation head # [400, 352, 11] <- [200, 176, 5] x_up4 = self.UR_block_forward(x_conv4, x_conv4, self.conv_up_t4, self.conv_up_m4, self.inv_conv4) # [800, 704, 21] <- [400, 352, 11] x_up3 = self.UR_block_forward(x_conv3, x_up4, self.conv_up_t3, self.conv_up_m3, self.inv_conv3) # [1600, 1408, 41] <- [800, 704, 21] x_up2 = self.UR_block_forward(x_conv2, x_up3, self.conv_up_t2, self.conv_up_m2, self.inv_conv2) # [1600, 1408, 41] <- [1600, 1408, 41] x_up1 = self.UR_block_forward(x_conv1, x_up2, self.conv_up_t1, self.conv_up_m1, self.conv5) batch_dict['point_features'] = x_up1.features point_coords = common_utils.get_voxel_centers( x_up1.indices[:, 1:], downsample_times=1, voxel_size=self.voxel_size, point_cloud_range=self.point_cloud_range ) batch_dict['point_coords'] = torch.cat((x_up1.indices[:, 0:1].float(), point_coords), dim=1) return batch_dict ================================================ FILE: pcdet/models/backbones_3d/vfe/__init__.py ================================================ from .mean_vfe import MeanVFE from .pillar_vfe import PillarVFE from .vfe_template import VFETemplate __all__ = { 'VFETemplate': VFETemplate, 'MeanVFE': MeanVFE, 'PillarVFE': PillarVFE } ================================================ FILE: pcdet/models/backbones_3d/vfe/mean_vfe.py ================================================ import torch from .vfe_template import VFETemplate class MeanVFE(VFETemplate): def __init__(self, model_cfg, num_point_features, **kwargs): super().__init__(model_cfg=model_cfg) self.num_point_features = num_point_features self.model = self.model_cfg.get('MODEL',None) def get_output_feature_dim(self): return self.num_point_features def forward(self, batch_dict, **kwargs): """ Args: batch_dict: voxels: (num_voxels, max_points_per_voxel, C) voxel_num_points: optional (num_voxels) **kwargs: Returns: vfe_features: (num_voxels, C) """ if 'transform_param' in batch_dict: trans_param = batch_dict['transform_param'] rot_num = trans_param.shape[1] else: rot_num = 1 for i in range(rot_num): if i==0: frame_id = '' else: frame_id = str(i) voxel_features, voxel_num_points = batch_dict['voxels'+frame_id], batch_dict['voxel_num_points'+frame_id] points_mean = voxel_features[:, :, :].sum(dim=1, keepdim=False) normalizer = torch.clamp_min(voxel_num_points.view(-1, 1), min=1.0).type_as(voxel_features) points_mean = points_mean / normalizer if self.model is not None: if self.model == 'max': time_max = voxel_features[:, :, :].max(dim=1, keepdim=False)[0] points_mean[:, -1] = time_max[:, -1] batch_dict['voxel_features'+frame_id] = points_mean.contiguous() if 'mm' in batch_dict: voxel_features, voxel_num_points = batch_dict['voxels_mm'+frame_id], batch_dict[ 'voxel_num_points_mm'+frame_id] points_mean = voxel_features[:, :, :].sum(dim=1, keepdim=False) normalizer = torch.clamp_min(voxel_num_points.view(-1, 1), min=1.0).type_as(voxel_features) points_mean = points_mean / normalizer batch_dict['voxel_features_mm'+frame_id] = points_mean.contiguous() return batch_dict ================================================ FILE: pcdet/models/backbones_3d/vfe/pillar_vfe.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from .vfe_template import VFETemplate class PFNLayer(nn.Module): def __init__(self, in_channels, out_channels, use_norm=True, last_layer=False): super().__init__() self.last_vfe = last_layer self.use_norm = use_norm if not self.last_vfe: out_channels = out_channels // 2 if self.use_norm: self.linear = nn.Linear(in_channels, out_channels, bias=False) self.norm = nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01) else: self.linear = nn.Linear(in_channels, out_channels, bias=True) self.part = 50000 def forward(self, inputs): if inputs.shape[0] > self.part: # nn.Linear performs randomly when batch size is too large num_parts = inputs.shape[0] // self.part part_linear_out = [self.linear(inputs[num_part*self.part:(num_part+1)*self.part]) for num_part in range(num_parts+1)] x = torch.cat(part_linear_out, dim=0) else: x = self.linear(inputs) torch.backends.cudnn.enabled = False x = self.norm(x.permute(0, 2, 1)).permute(0, 2, 1) if self.use_norm else x torch.backends.cudnn.enabled = True x = F.relu(x) x_max = torch.max(x, dim=1, keepdim=True)[0] if self.last_vfe: return x_max else: x_repeat = x_max.repeat(1, inputs.shape[1], 1) x_concatenated = torch.cat([x, x_repeat], dim=2) return x_concatenated class PillarVFE(VFETemplate): def __init__(self, model_cfg, num_point_features, voxel_size, point_cloud_range): super().__init__(model_cfg=model_cfg) self.use_norm = self.model_cfg.USE_NORM self.with_distance = self.model_cfg.WITH_DISTANCE self.use_absolute_xyz = self.model_cfg.USE_ABSLOTE_XYZ num_point_features += 6 if self.use_absolute_xyz else 3 if self.with_distance: num_point_features += 1 self.num_filters = self.model_cfg.NUM_FILTERS assert len(self.num_filters) > 0 num_filters = [num_point_features] + list(self.num_filters) pfn_layers = [] for i in range(len(num_filters) - 1): in_filters = num_filters[i] out_filters = num_filters[i + 1] pfn_layers.append( PFNLayer(in_filters, out_filters, self.use_norm, last_layer=(i >= len(num_filters) - 2)) ) self.pfn_layers = nn.ModuleList(pfn_layers) self.voxel_x = voxel_size[0] self.voxel_y = voxel_size[1] self.voxel_z = voxel_size[2] self.x_offset = self.voxel_x / 2 + point_cloud_range[0] self.y_offset = self.voxel_y / 2 + point_cloud_range[1] self.z_offset = self.voxel_z / 2 + point_cloud_range[2] def get_output_feature_dim(self): return self.num_filters[-1] def get_paddings_indicator(self, actual_num, max_num, axis=0): actual_num = torch.unsqueeze(actual_num, axis + 1) max_num_shape = [1] * len(actual_num.shape) max_num_shape[axis + 1] = -1 max_num = torch.arange(max_num, dtype=torch.int, device=actual_num.device).view(max_num_shape) paddings_indicator = actual_num.int() > max_num return paddings_indicator def forward(self, batch_dict, **kwargs): voxel_features, voxel_num_points, coords = batch_dict['voxels'], batch_dict['voxel_num_points'], batch_dict['voxel_coords'] points_mean = voxel_features[:, :, :3].sum(dim=1, keepdim=True) / voxel_num_points.type_as(voxel_features).view(-1, 1, 1) f_cluster = voxel_features[:, :, :3] - points_mean f_center = torch.zeros_like(voxel_features[:, :, :3]) f_center[:, :, 0] = voxel_features[:, :, 0] - (coords[:, 3].to(voxel_features.dtype).unsqueeze(1) * self.voxel_x + self.x_offset) f_center[:, :, 1] = voxel_features[:, :, 1] - (coords[:, 2].to(voxel_features.dtype).unsqueeze(1) * self.voxel_y + self.y_offset) f_center[:, :, 2] = voxel_features[:, :, 2] - (coords[:, 1].to(voxel_features.dtype).unsqueeze(1) * self.voxel_z + self.z_offset) if self.use_absolute_xyz: features = [voxel_features, f_cluster, f_center] else: features = [voxel_features[..., 3:], f_cluster, f_center] if self.with_distance: points_dist = torch.norm(voxel_features[:, :, :3], 2, 2, keepdim=True) features.append(points_dist) features = torch.cat(features, dim=-1) voxel_count = features.shape[1] mask = self.get_paddings_indicator(voxel_num_points, voxel_count, axis=0) mask = torch.unsqueeze(mask, -1).type_as(voxel_features) features *= mask for pfn in self.pfn_layers: features = pfn(features) features = features.squeeze() batch_dict['pillar_features'] = features return batch_dict ================================================ FILE: pcdet/models/backbones_3d/vfe/vfe_template.py ================================================ import torch.nn as nn class VFETemplate(nn.Module): def __init__(self, model_cfg, **kwargs): super().__init__() self.model_cfg = model_cfg def get_output_feature_dim(self): raise NotImplementedError def forward(self, **kwargs): """ Args: **kwargs: Returns: batch_dict: ... vfe_features: (num_voxels, C) """ raise NotImplementedError ================================================ FILE: pcdet/models/dense_heads/__init__.py ================================================ from .anchor_head_multi import AnchorHeadMulti from .anchor_head_single import AnchorHeadSingle from .center_head import CenterHead from .anchor_head_template import AnchorHeadTemplate from .point_head_box import PointHeadBox from .point_head_simple import PointHeadSimple from .point_intra_part_head import PointIntraPartOffsetHead from .center_head import CenterHead __all__ = { 'AnchorHeadTemplate': AnchorHeadTemplate, 'AnchorHeadSingle': AnchorHeadSingle, 'CenterHead': CenterHead, 'PointIntraPartOffsetHead': PointIntraPartOffsetHead, 'PointHeadSimple': PointHeadSimple, 'PointHeadBox': PointHeadBox, 'AnchorHeadMulti': AnchorHeadMulti, } ================================================ FILE: pcdet/models/dense_heads/anchor_head_multi.py ================================================ import numpy as np import torch import torch.nn as nn from ..backbones_2d import BaseBEVBackbone from .anchor_head_template import AnchorHeadTemplate class SingleHead(BaseBEVBackbone): def __init__(self, model_cfg, input_channels, num_class, num_anchors_per_location, code_size, rpn_head_cfg=None, head_label_indices=None, separate_reg_config=None): super().__init__(rpn_head_cfg, input_channels) self.num_anchors_per_location = num_anchors_per_location self.num_class = num_class self.code_size = code_size self.model_cfg = model_cfg self.separate_reg_config = separate_reg_config self.register_buffer('head_label_indices', head_label_indices) if self.separate_reg_config is not None: code_size_cnt = 0 self.conv_box = nn.ModuleDict() self.conv_box_names = [] num_middle_conv = self.separate_reg_config.NUM_MIDDLE_CONV num_middle_filter = self.separate_reg_config.NUM_MIDDLE_FILTER conv_cls_list = [] c_in = input_channels for k in range(num_middle_conv): conv_cls_list.extend([ nn.Conv2d( c_in, num_middle_filter, kernel_size=3, stride=1, padding=1, bias=False ), nn.BatchNorm2d(num_middle_filter), nn.ReLU() ]) c_in = num_middle_filter conv_cls_list.append(nn.Conv2d( c_in, self.num_anchors_per_location * self.num_class, kernel_size=3, stride=1, padding=1 )) self.conv_cls = nn.Sequential(*conv_cls_list) for reg_config in self.separate_reg_config.REG_LIST: reg_name, reg_channel = reg_config.split(':') reg_channel = int(reg_channel) cur_conv_list = [] c_in = input_channels for k in range(num_middle_conv): cur_conv_list.extend([ nn.Conv2d( c_in, num_middle_filter, kernel_size=3, stride=1, padding=1, bias=False ), nn.BatchNorm2d(num_middle_filter), nn.ReLU() ]) c_in = num_middle_filter cur_conv_list.append(nn.Conv2d( c_in, self.num_anchors_per_location * int(reg_channel), kernel_size=3, stride=1, padding=1, bias=True )) code_size_cnt += reg_channel self.conv_box[f'conv_{reg_name}'] = nn.Sequential(*cur_conv_list) self.conv_box_names.append(f'conv_{reg_name}') for m in self.conv_box.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') if m.bias is not None: nn.init.constant_(m.bias, 0) assert code_size_cnt == code_size, f'Code size does not match: {code_size_cnt}:{code_size}' else: self.conv_cls = nn.Conv2d( input_channels, self.num_anchors_per_location * self.num_class, kernel_size=1 ) self.conv_box = nn.Conv2d( input_channels, self.num_anchors_per_location * self.code_size, kernel_size=1 ) if self.model_cfg.get('USE_DIRECTION_CLASSIFIER', None) is not None: self.conv_dir_cls = nn.Conv2d( input_channels, self.num_anchors_per_location * self.model_cfg.NUM_DIR_BINS, kernel_size=1 ) else: self.conv_dir_cls = None self.use_multihead = self.model_cfg.get('USE_MULTIHEAD', False) self.init_weights() def init_weights(self): pi = 0.01 if isinstance(self.conv_cls, nn.Conv2d): nn.init.constant_(self.conv_cls.bias, -np.log((1 - pi) / pi)) else: nn.init.constant_(self.conv_cls[-1].bias, -np.log((1 - pi) / pi)) def forward(self, spatial_features_2d): ret_dict = {} spatial_features_2d = super().forward({'spatial_features': spatial_features_2d})['spatial_features_2d'] cls_preds = self.conv_cls(spatial_features_2d) if self.separate_reg_config is None: box_preds = self.conv_box(spatial_features_2d) else: box_preds_list = [] for reg_name in self.conv_box_names: box_preds_list.append(self.conv_box[reg_name](spatial_features_2d)) box_preds = torch.cat(box_preds_list, dim=1) if not self.use_multihead: box_preds = box_preds.permute(0, 2, 3, 1).contiguous() cls_preds = cls_preds.permute(0, 2, 3, 1).contiguous() else: H, W = box_preds.shape[2:] batch_size = box_preds.shape[0] box_preds = box_preds.view(-1, self.num_anchors_per_location, self.code_size, H, W).permute(0, 1, 3, 4, 2).contiguous() cls_preds = cls_preds.view(-1, self.num_anchors_per_location, self.num_class, H, W).permute(0, 1, 3, 4, 2).contiguous() box_preds = box_preds.view(batch_size, -1, self.code_size) cls_preds = cls_preds.view(batch_size, -1, self.num_class) if self.conv_dir_cls is not None: dir_cls_preds = self.conv_dir_cls(spatial_features_2d) if self.use_multihead: dir_cls_preds = dir_cls_preds.view( -1, self.num_anchors_per_location, self.model_cfg.NUM_DIR_BINS, H, W).permute(0, 1, 3, 4, 2).contiguous() dir_cls_preds = dir_cls_preds.view(batch_size, -1, self.model_cfg.NUM_DIR_BINS) else: dir_cls_preds = dir_cls_preds.permute(0, 2, 3, 1).contiguous() else: dir_cls_preds = None ret_dict['cls_preds'] = cls_preds ret_dict['box_preds'] = box_preds ret_dict['dir_cls_preds'] = dir_cls_preds return ret_dict class AnchorHeadMulti(AnchorHeadTemplate): def __init__(self, model_cfg, input_channels, num_class, class_names, grid_size, point_cloud_range, predict_boxes_when_training=True): super().__init__( model_cfg=model_cfg, num_class=num_class, class_names=class_names, grid_size=grid_size, point_cloud_range=point_cloud_range, predict_boxes_when_training=predict_boxes_when_training ) self.model_cfg = model_cfg self.separate_multihead = self.model_cfg.get('SEPARATE_MULTIHEAD', False) if self.model_cfg.get('SHARED_CONV_NUM_FILTER', None) is not None: shared_conv_num_filter = self.model_cfg.SHARED_CONV_NUM_FILTER self.shared_conv = nn.Sequential( nn.Conv2d(input_channels, shared_conv_num_filter, 3, stride=1, padding=1, bias=False), nn.BatchNorm2d(shared_conv_num_filter, eps=1e-3, momentum=0.01), nn.ReLU(), ) else: self.shared_conv = None shared_conv_num_filter = input_channels self.rpn_heads = None self.make_multihead(shared_conv_num_filter) def make_multihead(self, input_channels): rpn_head_cfgs = self.model_cfg.RPN_HEAD_CFGS rpn_heads = [] class_names = [] for rpn_head_cfg in rpn_head_cfgs: class_names.extend(rpn_head_cfg['HEAD_CLS_NAME']) for rpn_head_cfg in rpn_head_cfgs: num_anchors_per_location = sum([self.num_anchors_per_location[class_names.index(head_cls)] for head_cls in rpn_head_cfg['HEAD_CLS_NAME']]) head_label_indices = torch.from_numpy(np.array([ self.class_names.index(cur_name) + 1 for cur_name in rpn_head_cfg['HEAD_CLS_NAME'] ])) rpn_head = SingleHead( self.model_cfg, input_channels, len(rpn_head_cfg['HEAD_CLS_NAME']) if self.separate_multihead else self.num_class, num_anchors_per_location, self.box_coder.code_size, rpn_head_cfg, head_label_indices=head_label_indices, separate_reg_config=self.model_cfg.get('SEPARATE_REG_CONFIG', None) ) rpn_heads.append(rpn_head) self.rpn_heads = nn.ModuleList(rpn_heads) def forward(self, data_dict): spatial_features_2d = data_dict['spatial_features_2d'] if self.shared_conv is not None: spatial_features_2d = self.shared_conv(spatial_features_2d) ret_dicts = [] for rpn_head in self.rpn_heads: ret_dicts.append(rpn_head(spatial_features_2d)) cls_preds = [ret_dict['cls_preds'] for ret_dict in ret_dicts] box_preds = [ret_dict['box_preds'] for ret_dict in ret_dicts] ret = { 'cls_preds': cls_preds if self.separate_multihead else torch.cat(cls_preds, dim=1), 'box_preds': box_preds if self.separate_multihead else torch.cat(box_preds, dim=1), } if self.model_cfg.get('USE_DIRECTION_CLASSIFIER', False): dir_cls_preds = [ret_dict['dir_cls_preds'] for ret_dict in ret_dicts] ret['dir_cls_preds'] = dir_cls_preds if self.separate_multihead else torch.cat(dir_cls_preds, dim=1) self.forward_ret_dict.update(ret) if self.training: targets_dict = self.assign_targets( gt_boxes=data_dict['gt_boxes'] ) self.forward_ret_dict.update(targets_dict) if not self.training or self.predict_boxes_when_training: batch_cls_preds, batch_box_preds = self.generate_predicted_boxes( batch_size=data_dict['batch_size'], cls_preds=ret['cls_preds'], box_preds=ret['box_preds'], dir_cls_preds=ret.get('dir_cls_preds', None) ) if isinstance(batch_cls_preds, list): multihead_label_mapping = [] for idx in range(len(batch_cls_preds)): multihead_label_mapping.append(self.rpn_heads[idx].head_label_indices) data_dict['multihead_label_mapping'] = multihead_label_mapping data_dict['batch_cls_preds'] = batch_cls_preds data_dict['batch_box_preds'] = batch_box_preds data_dict['cls_preds_normalized'] = False return data_dict def get_cls_layer_loss(self): loss_weights = self.model_cfg.LOSS_CONFIG.LOSS_WEIGHTS if 'pos_cls_weight' in loss_weights: pos_cls_weight = loss_weights['pos_cls_weight'] neg_cls_weight = loss_weights['neg_cls_weight'] else: pos_cls_weight = neg_cls_weight = 1.0 cls_preds = self.forward_ret_dict['cls_preds'] box_cls_labels = self.forward_ret_dict['box_cls_labels'] if not isinstance(cls_preds, list): cls_preds = [cls_preds] batch_size = int(cls_preds[0].shape[0]) cared = box_cls_labels >= 0 # [N, num_anchors] positives = box_cls_labels > 0 negatives = box_cls_labels == 0 negative_cls_weights = negatives * 1.0 * neg_cls_weight cls_weights = (negative_cls_weights + pos_cls_weight * positives).float() reg_weights = positives.float() if self.num_class == 1: # class agnostic box_cls_labels[positives] = 1 pos_normalizer = positives.sum(1, keepdim=True).float() reg_weights /= torch.clamp(pos_normalizer, min=1.0) cls_weights /= torch.clamp(pos_normalizer, min=1.0) cls_targets = box_cls_labels * cared.type_as(box_cls_labels) one_hot_targets = torch.zeros( *list(cls_targets.shape), self.num_class + 1, dtype=cls_preds[0].dtype, device=cls_targets.device ) one_hot_targets.scatter_(-1, cls_targets.unsqueeze(dim=-1).long(), 1.0) one_hot_targets = one_hot_targets[..., 1:] start_idx = c_idx = 0 cls_losses = 0 for idx, cls_pred in enumerate(cls_preds): cur_num_class = self.rpn_heads[idx].num_class cls_pred = cls_pred.view(batch_size, -1, cur_num_class) if self.separate_multihead: one_hot_target = one_hot_targets[:, start_idx:start_idx + cls_pred.shape[1], c_idx:c_idx + cur_num_class] c_idx += cur_num_class else: one_hot_target = one_hot_targets[:, start_idx:start_idx + cls_pred.shape[1]] cls_weight = cls_weights[:, start_idx:start_idx + cls_pred.shape[1]] cls_loss_src = self.cls_loss_func(cls_pred, one_hot_target, weights=cls_weight) # [N, M] cls_loss = cls_loss_src.sum() / batch_size cls_loss = cls_loss * loss_weights['cls_weight'] cls_losses += cls_loss start_idx += cls_pred.shape[1] assert start_idx == one_hot_targets.shape[1] tb_dict = { 'rpn_loss_cls': cls_losses.item() } return cls_losses, tb_dict def get_box_reg_layer_loss(self): box_preds = self.forward_ret_dict['box_preds'] box_dir_cls_preds = self.forward_ret_dict.get('dir_cls_preds', None) box_reg_targets = self.forward_ret_dict['box_reg_targets'] box_cls_labels = self.forward_ret_dict['box_cls_labels'] positives = box_cls_labels > 0 reg_weights = positives.float() pos_normalizer = positives.sum(1, keepdim=True).float() reg_weights /= torch.clamp(pos_normalizer, min=1.0) if not isinstance(box_preds, list): box_preds = [box_preds] batch_size = int(box_preds[0].shape[0]) if isinstance(self.anchors, list): if self.use_multihead: anchors = torch.cat( [anchor.permute(3, 4, 0, 1, 2, 5).contiguous().view(-1, anchor.shape[-1]) for anchor in self.anchors], dim=0 ) else: anchors = torch.cat(self.anchors, dim=-3) else: anchors = self.anchors anchors = anchors.view(1, -1, anchors.shape[-1]).repeat(batch_size, 1, 1) start_idx = 0 box_losses = 0 tb_dict = {} for idx, box_pred in enumerate(box_preds): box_pred = box_pred.view( batch_size, -1, box_pred.shape[-1] // self.num_anchors_per_location if not self.use_multihead else box_pred.shape[-1] ) box_reg_target = box_reg_targets[:, start_idx:start_idx + box_pred.shape[1]] reg_weight = reg_weights[:, start_idx:start_idx + box_pred.shape[1]] # sin(a - b) = sinacosb-cosasinb if box_dir_cls_preds is not None: box_pred_sin, reg_target_sin = self.add_sin_difference(box_pred, box_reg_target) loc_loss_src = self.reg_loss_func(box_pred_sin, reg_target_sin, weights=reg_weight) # [N, M] else: loc_loss_src = self.reg_loss_func(box_pred, box_reg_target, weights=reg_weight) # [N, M] loc_loss = loc_loss_src.sum() / batch_size loc_loss = loc_loss * self.model_cfg.LOSS_CONFIG.LOSS_WEIGHTS['loc_weight'] box_losses += loc_loss tb_dict['rpn_loss_loc'] = tb_dict.get('rpn_loss_loc', 0) + loc_loss.item() if box_dir_cls_preds is not None: if not isinstance(box_dir_cls_preds, list): box_dir_cls_preds = [box_dir_cls_preds] dir_targets = self.get_direction_target( anchors, box_reg_targets, dir_offset=self.model_cfg.DIR_OFFSET, num_bins=self.model_cfg.NUM_DIR_BINS ) box_dir_cls_pred = box_dir_cls_preds[idx] dir_logit = box_dir_cls_pred.view(batch_size, -1, self.model_cfg.NUM_DIR_BINS) weights = positives.type_as(dir_logit) weights /= torch.clamp(weights.sum(-1, keepdim=True), min=1.0) weight = weights[:, start_idx:start_idx + box_pred.shape[1]] dir_target = dir_targets[:, start_idx:start_idx + box_pred.shape[1]] dir_loss = self.dir_loss_func(dir_logit, dir_target, weights=weight) dir_loss = dir_loss.sum() / batch_size dir_loss = dir_loss * self.model_cfg.LOSS_CONFIG.LOSS_WEIGHTS['dir_weight'] box_losses += dir_loss tb_dict['rpn_loss_dir'] = tb_dict.get('rpn_loss_dir', 0) + dir_loss.item() start_idx += box_pred.shape[1] return box_losses, tb_dict ================================================ FILE: pcdet/models/dense_heads/anchor_head_single.py ================================================ import numpy as np import torch.nn as nn from .anchor_head_template import AnchorHeadTemplate import torch import cv2 import numpy as np def get_layer(dim,out_dim,init = None): init_func = nn.init.kaiming_normal_ layers = [] conv = nn.Conv2d(dim, dim, kernel_size=3, padding=1, bias=True) nn.init.normal_(conv.weight, mean=0, std=0.001) layers.append(conv) layers.append(nn.BatchNorm2d(dim)) layers.append(nn.ReLU()) conv2 = nn.Conv2d(dim, out_dim, kernel_size=1, bias=True) if init is None: nn.init.normal_(conv2.weight, mean=0, std=0.001) layers.append(conv2) else: conv2.bias.data.fill_(init) layers.append(conv2) return nn.Sequential(*layers) class AnchorHeadSingle(AnchorHeadTemplate): def __init__(self, model_cfg, input_channels, num_class, class_names, grid_size, point_cloud_range, predict_boxes_when_training=True, **kwargs): super().__init__( model_cfg=model_cfg, num_class=num_class, class_names=class_names, grid_size=grid_size, point_cloud_range=point_cloud_range, predict_boxes_when_training=predict_boxes_when_training ) self.grid_size = grid_size # [1408 1600 40] self.range = point_cloud_range self.voxel_size = (point_cloud_range[3] - point_cloud_range[0]) / grid_size[0] self.num_anchors_per_location = sum(self.num_anchors_per_location) self.conv_cls = nn.Conv2d( input_channels, self.num_anchors_per_location * self.num_class, kernel_size=1 ) self.conv_box = nn.Conv2d( input_channels, self.num_anchors_per_location * self.box_coder.code_size, kernel_size=1 ) if self.model_cfg.get('USE_DIRECTION_CLASSIFIER', None) is not None: self.conv_dir_cls = nn.Conv2d( input_channels, self.num_anchors_per_location * self.model_cfg.NUM_DIR_BINS, kernel_size=1 ) else: self.conv_dir_cls = None self.init_weights() #for child in self.children(): # for param in child.parameters(): # param.requires_grad = False def init_weights(self): pi = 0.01 nn.init.constant_(self.conv_cls.bias, -np.log((1 - pi) / pi)) nn.init.normal_(self.conv_box.weight, mean=0, std=0.001) def get_anchor_mask(self,data_dict,shape): stride = np.round(self.voxel_size*8.*10.) minx=self.range[0] miny=self.range[1] points = data_dict["points"] mask = torch.zeros(shape[-2],shape[-1]) mask_large = torch.zeros(shape[-2]//10,shape[-1]//10) in_x = (points[:, 1] - minx) / stride in_y = (points[:, 2] - miny) / stride in_x = in_x.long().clamp(max=shape[-1]//10-1) in_y = in_y.long().clamp(max=shape[-2]//10-1) mask_large[in_y,in_x] = 1 mask_large = mask_large.clone().int().detach().cpu().numpy() mask_large_index = np.argwhere( mask_large>0 ) mask_large_index = mask_large_index*10 index_list=[] for i in np.arange(-10, 10, 1): for j in np.arange(-10, 10, 1): index_list.append(mask_large_index+[i,j]) index_list = np.concatenate(index_list,0) inds = torch.from_numpy(index_list).cuda().long() mask[inds[:,0],inds[:,1]]=1 return mask.bool() def forward(self, data_dict): anchor_mask = self.get_anchor_mask(data_dict,data_dict['st_features_2d'].shape) new_anchors = [] for anchors in self.anchors_root: new_anchors.append(anchors[:, anchor_mask, ...]) self.anchors = new_anchors st_features_2d = data_dict['st_features_2d'] cls_preds = self.conv_cls(st_features_2d) box_preds = self.conv_box(st_features_2d) cls_preds = cls_preds.permute(0, 2, 3, 1).contiguous()[:,anchor_mask,:] # [N, H, W, C] box_preds = box_preds.permute(0, 2, 3, 1).contiguous()[:,anchor_mask,:] # [N, H, W, C] self.forward_ret_dict['cls_preds'] = cls_preds self.forward_ret_dict['box_preds'] = box_preds if self.conv_dir_cls is not None: dir_cls_preds = self.conv_dir_cls(st_features_2d) dir_cls_preds = dir_cls_preds.permute(0, 2, 3, 1).contiguous()[:,anchor_mask,:] self.forward_ret_dict['dir_cls_preds'] = dir_cls_preds else: dir_cls_preds = None if self.training: targets_dict = self.assign_targets( gt_boxes=data_dict['gt_boxes'] ) self.forward_ret_dict.update(targets_dict) data_dict['gt_ious'] = targets_dict['gt_ious'] if not self.training or self.predict_boxes_when_training: batch_cls_preds, batch_box_preds = self.generate_predicted_boxes( batch_size=data_dict['batch_size'], cls_preds=cls_preds, box_preds=box_preds, dir_cls_preds=dir_cls_preds ) data_dict['batch_cls_preds'] = batch_cls_preds data_dict['batch_box_preds'] = batch_box_preds data_dict['cls_preds_normalized'] = False if self.model_cfg.get('NMS_CONFIG', None) is not None: self.proposal_layer( data_dict, nms_config=self.model_cfg.NMS_CONFIG['TRAIN' if self.training else 'TEST'] ) return data_dict ================================================ FILE: pcdet/models/dense_heads/anchor_head_template.py ================================================ import numpy as np import torch import torch.nn as nn from ...utils import box_coder_utils, common_utils, loss_utils from .target_assigner.anchor_generator import AnchorGenerator from .target_assigner.atss_target_assigner import ATSSTargetAssigner from .target_assigner.axis_aligned_target_assigner import AxisAlignedTargetAssigner from ...utils.odiou_loss import odiou_3D from ..model_utils.model_nms_utils import class_agnostic_nms import copy class AnchorHeadTemplate(nn.Module): def __init__(self, model_cfg, num_class, class_names, grid_size, point_cloud_range, predict_boxes_when_training): super().__init__() self.model_cfg = model_cfg self.num_class = num_class self.class_names = class_names self.predict_boxes_when_training = predict_boxes_when_training self.use_multihead = self.model_cfg.get('USE_MULTIHEAD', False) anchor_target_cfg = self.model_cfg.TARGET_ASSIGNER_CONFIG self.box_coder = getattr(box_coder_utils, anchor_target_cfg.BOX_CODER)( num_dir_bins=anchor_target_cfg.get('NUM_DIR_BINS', 6), **anchor_target_cfg.get('BOX_CODER_CONFIG', {}) ) anchor_generator_cfg = self.model_cfg.ANCHOR_GENERATOR_CONFIG self.grid_size = grid_size self.point_cloud_range = point_cloud_range anchors, self.num_anchors_per_location = self.generate_anchors( anchor_generator_cfg, grid_size=grid_size, point_cloud_range=point_cloud_range, anchor_ndim=self.box_coder.code_size ) self.anchors_root = [x.cuda() for x in anchors] self.target_assigner = self.get_target_assigner(anchor_target_cfg) self.forward_ret_dict = {} self.build_losses(self.model_cfg.LOSS_CONFIG) @staticmethod def generate_anchors(anchor_generator_cfg, grid_size, point_cloud_range, anchor_ndim=7): anchor_generator = AnchorGenerator( anchor_range=point_cloud_range, anchor_generator_config=anchor_generator_cfg ) feature_map_size = [grid_size[:2] // config['feature_map_stride'] for config in anchor_generator_cfg] anchors_list, num_anchors_per_location_list = anchor_generator.generate_anchors(feature_map_size) if anchor_ndim != 7: for idx, anchors in enumerate(anchors_list): pad_zeros = anchors.new_zeros([*anchors.shape[0:-1], anchor_ndim - 7]) new_anchors = torch.cat((anchors, pad_zeros), dim=-1) anchors_list[idx] = new_anchors return anchors_list, num_anchors_per_location_list def get_target_assigner(self, anchor_target_cfg): if anchor_target_cfg.NAME == 'ATSS': target_assigner = ATSSTargetAssigner( topk=anchor_target_cfg.TOPK, box_coder=self.box_coder, use_multihead=self.use_multihead, match_height=anchor_target_cfg.MATCH_HEIGHT ) elif anchor_target_cfg.NAME == 'AxisAlignedTargetAssigner': target_assigner = AxisAlignedTargetAssigner( model_cfg=self.model_cfg, class_names=self.class_names, box_coder=self.box_coder, grid_size=self.grid_size, point_cloud_range=self.point_cloud_range, match_height=anchor_target_cfg.MATCH_HEIGHT ) else: raise NotImplementedError return target_assigner def proposal_layer(self, batch_dict, nms_config): """ Args: batch_dict: batch_size: batch_cls_preds: (B, num_boxes, num_classes | 1) or (N1+N2+..., num_classes | 1) batch_box_preds: (B, num_boxes, 7+C) or (N1+N2+..., 7+C) cls_preds_normalized: indicate whether batch_cls_preds is normalized batch_index: optional (N1+N2+...) nms_config: Returns: batch_dict: rois: (B, num_rois, 7+C) roi_scores: (B, num_rois) roi_labels: (B, num_rois) """ if batch_dict.get('rois', None) is not None: return batch_dict batch_size = batch_dict['batch_size'] batch_box_preds = batch_dict['batch_box_preds'] batch_cls_preds = batch_dict['batch_cls_preds'] rois = batch_box_preds.new_zeros((batch_size, nms_config.NMS_POST_MAXSIZE, batch_box_preds.shape[-1])) roi_scores = batch_box_preds.new_zeros((batch_size, nms_config.NMS_POST_MAXSIZE)) roi_labels = batch_box_preds.new_zeros((batch_size, nms_config.NMS_POST_MAXSIZE), dtype=torch.long) for index in range(batch_size): if batch_dict.get('batch_index', None) is not None: assert batch_cls_preds.shape.__len__() == 2 batch_mask = (batch_dict['batch_index'] == index) else: assert batch_dict['batch_cls_preds'].shape.__len__() == 3 batch_mask = index box_preds = batch_box_preds[batch_mask] cls_preds = batch_cls_preds[batch_mask] cur_roi_scores, cur_roi_labels = torch.max(cls_preds, dim=1) if nms_config.MULTI_CLASSES_NMS: raise NotImplementedError else: selected, selected_scores = class_agnostic_nms( box_scores=cur_roi_scores, box_preds=box_preds, nms_config=nms_config ) rois[index, :len(selected), :] = box_preds[selected] roi_scores[index, :len(selected)] = cur_roi_scores[selected] roi_labels[index, :len(selected)] = cur_roi_labels[selected] batch_dict['rois'] = rois batch_dict['roi_scores'] = roi_scores batch_dict['roi_labels'] = roi_labels + 1 batch_dict['has_class_labels'] = True if batch_cls_preds.shape[-1] > 1 else False batch_dict.pop('batch_index', None) return batch_dict def build_losses(self, losses_cfg): self.add_module( 'cls_loss_func', loss_utils.SigmoidFocalClassificationLoss(alpha=0.25, gamma=2.0) ) reg_loss_name = 'WeightedSmoothL1Loss' if losses_cfg.get('REG_LOSS_TYPE', None) is None \ else losses_cfg.REG_LOSS_TYPE self.add_module( 'reg_loss_func', getattr(loss_utils, reg_loss_name)(code_weights=losses_cfg.LOSS_WEIGHTS['code_weights']) ) self.add_module( 'dir_loss_func', loss_utils.WeightedCrossEntropyLoss() ) self.add_module( 'od_loss_func', odiou_3D() ) def assign_targets(self, gt_boxes): """ Args: gt_boxes: (B, M, 8) Returns: """ targets_dict = self.target_assigner.assign_targets( self.anchors, gt_boxes ) return targets_dict def get_cls_layer_loss(self): cls_preds = self.forward_ret_dict['cls_preds'] box_cls_labels = self.forward_ret_dict['box_cls_labels'] batch_size = int(cls_preds.shape[0]) cared = box_cls_labels >= 0 # [N, num_anchors] positives = box_cls_labels > 0 negatives = box_cls_labels == 0 negative_cls_weights = negatives * 1.0 cls_weights = (negative_cls_weights + 1.0 * positives).float() reg_weights = positives.float() if self.num_class == 1: # class agnostic box_cls_labels[positives] = 1 pos_normalizer = positives.sum(1, keepdim=True).float() reg_weights /= torch.clamp(pos_normalizer, min=1.0) cls_weights /= torch.clamp(pos_normalizer, min=1.0) cls_targets = box_cls_labels * cared.type_as(box_cls_labels) cls_targets = cls_targets.unsqueeze(dim=-1) cls_targets = cls_targets.squeeze(dim=-1) one_hot_targets = torch.zeros( *list(cls_targets.shape), self.num_class + 1, dtype=cls_preds.dtype, device=cls_targets.device ) one_hot_targets.scatter_(-1, cls_targets.unsqueeze(dim=-1).long(), 1.0) cls_preds = cls_preds.view(batch_size, -1, self.num_class) one_hot_targets = one_hot_targets[..., 1:] cls_loss_src = self.cls_loss_func(cls_preds, one_hot_targets, weights=cls_weights) # [N, M] cls_loss = cls_loss_src.sum() / batch_size cls_loss = cls_loss * self.model_cfg.LOSS_CONFIG.LOSS_WEIGHTS['cls_weight'] tb_dict = { 'rpn_loss_cls': cls_loss.item() } return cls_loss, tb_dict @staticmethod def add_sin_difference(boxes1, boxes2, dim=6): assert dim != -1 rad_pred_encoding = torch.sin(boxes1[..., dim:dim + 1]) * torch.cos(boxes2[..., dim:dim + 1]) rad_tg_encoding = torch.cos(boxes1[..., dim:dim + 1]) * torch.sin(boxes2[..., dim:dim + 1]) boxes1 = torch.cat([boxes1[..., :dim], rad_pred_encoding, boxes1[..., dim + 1:]], dim=-1) boxes2 = torch.cat([boxes2[..., :dim], rad_tg_encoding, boxes2[..., dim + 1:]], dim=-1) return boxes1, boxes2 @staticmethod def get_direction_target(anchors, reg_targets, one_hot=True, dir_offset=0, num_bins=2): batch_size = reg_targets.shape[0] anchors = anchors.view(batch_size, -1, anchors.shape[-1]) rot_gt = reg_targets[..., 6] + anchors[..., 6] offset_rot = common_utils.limit_period(rot_gt - dir_offset, 0, 2 * np.pi) dir_cls_targets = torch.floor(offset_rot / (2 * np.pi / num_bins)).long() dir_cls_targets = torch.clamp(dir_cls_targets, min=0, max=num_bins - 1) if one_hot: dir_targets = torch.zeros(*list(dir_cls_targets.shape), num_bins, dtype=anchors.dtype, device=dir_cls_targets.device) dir_targets.scatter_(-1, dir_cls_targets.unsqueeze(dim=-1).long(), 1.0) dir_cls_targets = dir_targets return dir_cls_targets def get_box_reg_layer_loss(self): box_preds = self.forward_ret_dict['box_preds'] box_dir_cls_preds = self.forward_ret_dict.get('dir_cls_preds', None) box_reg_targets = self.forward_ret_dict['box_reg_targets'] box_cls_labels = self.forward_ret_dict['box_cls_labels'] batch_size = int(box_preds.shape[0]) positives = box_cls_labels > 0 reg_weights = positives.float() pos_normalizer = positives.sum(1, keepdim=True).float() reg_weights /= torch.clamp(pos_normalizer, min=1.0) if isinstance(self.anchors, list): if self.use_multihead: anchors = torch.cat( [anchor.permute(3, 4, 0, 1, 2, 5).contiguous().view(-1, anchor.shape[-1]) for anchor in self.anchors], dim=0) else: anchors = torch.cat(self.anchors, dim=-3) else: anchors = self.anchors anchors = anchors.view(1, -1, anchors.shape[-1]).repeat(batch_size, 1, 1) box_preds = box_preds.view(batch_size, -1, box_preds.shape[-1] // self.num_anchors_per_location if not self.use_multihead else box_preds.shape[-1]) # sin(a - b) = sinacosb-cosasinb box_preds_sin, reg_targets_sin = self.add_sin_difference(box_preds, box_reg_targets) loc_loss_src = self.reg_loss_func(box_preds_sin, reg_targets_sin, weights=reg_weights) # [N, M] loc_loss = loc_loss_src.sum() / batch_size loc_loss = loc_loss * self.model_cfg.LOSS_CONFIG.LOSS_WEIGHTS['loc_weight'] box_loss = loc_loss tb_dict = { 'rpn_loss_loc': loc_loss.item() } if box_dir_cls_preds is not None: dir_targets = self.get_direction_target( anchors, box_reg_targets, dir_offset=self.model_cfg.DIR_OFFSET, num_bins=self.model_cfg.NUM_DIR_BINS ) dir_logits = box_dir_cls_preds.view(batch_size, -1, self.model_cfg.NUM_DIR_BINS) weights = positives.type_as(dir_logits) weights /= torch.clamp(weights.sum(-1, keepdim=True), min=1.0) dir_loss = self.dir_loss_func(dir_logits, dir_targets, weights=weights) dir_loss = dir_loss.sum() / batch_size dir_loss = dir_loss * self.model_cfg.LOSS_CONFIG.LOSS_WEIGHTS['dir_weight'] box_loss = box_loss+dir_loss tb_dict['rpn_loss_dir'] = dir_loss.item() return box_loss, tb_dict def get_od_loss(self): box_preds = self.forward_ret_dict['box_preds'] gt_bbs = self.forward_ret_dict['gt_bbs'] anchors = copy.deepcopy(self.anchors) anchors = torch.cat(anchors, dim=-3) num_anchors = anchors.view(-1, anchors.shape[-1]).shape[0] batch_anchors = anchors.view(1, -1, anchors.shape[-1]).repeat(len(box_preds), 1, 1) batch_box_preds = box_preds.view(len(box_preds), num_anchors, -1) batch_box_preds = self.box_coder.decode_torch(batch_box_preds, batch_anchors) box_cls_labels = self.forward_ret_dict['box_cls_labels'] positives = box_cls_labels > 0 positives = positives.view(-1,) gt_bbs = gt_bbs.view(-1, anchors.shape[-1]) batch_box_preds = batch_box_preds.view(-1, anchors.shape[-1]) loss = self.od_loss_func(gt_bbs[positives], batch_box_preds[positives], 1, len(box_preds)) loss = 2*loss/(positives.sum()+1) return loss def get_loss(self): cls_loss, tb_dict = self.get_cls_layer_loss() box_loss, tb_dict_box = self.get_box_reg_layer_loss() tb_dict.update(tb_dict_box) rpn_loss = cls_loss + box_loss if self.model_cfg.get('OD_LOSS',False): od_loss = self.get_od_loss() rpn_loss += od_loss tb_dict['rpn_loss'] = rpn_loss.item() return rpn_loss, tb_dict def generate_predicted_boxes(self, batch_size, cls_preds, box_preds, dir_cls_preds=None,): """ Args: batch_size: cls_preds: (N, H, W, C1) box_preds: (N, H, W, C2) dir_cls_preds: (N, H, W, C3) Returns: batch_cls_preds: (B, num_boxes, num_classes) batch_box_preds: (B, num_boxes, 7+C) """ if isinstance(self.anchors, list): if self.use_multihead: anchors = torch.cat([anchor.permute(3, 4, 0, 1, 2, 5).contiguous().view(-1, anchor.shape[-1]) for anchor in self.anchors], dim=0) else: anchors = torch.cat(self.anchors, dim=-3) else: anchors = self.anchors num_anchors = anchors.view(-1, anchors.shape[-1]).shape[0] batch_anchors = anchors.view(1, -1, anchors.shape[-1]).repeat(batch_size, 1, 1) batch_cls_preds = cls_preds.view(batch_size, num_anchors, -1).float() \ if not isinstance(cls_preds, list) else cls_preds batch_box_preds = box_preds.view(batch_size, num_anchors, -1) if not isinstance(box_preds, list) \ else torch.cat(box_preds, dim=1).view(batch_size, num_anchors, -1) batch_box_preds = self.box_coder.decode_torch(batch_box_preds, batch_anchors) if dir_cls_preds is not None: dir_offset = self.model_cfg.DIR_OFFSET dir_limit_offset = self.model_cfg.DIR_LIMIT_OFFSET dir_cls_preds = dir_cls_preds.view(batch_size, num_anchors, -1) if not isinstance(dir_cls_preds, list) \ else torch.cat(dir_cls_preds, dim=1).view(batch_size, num_anchors, -1) dir_labels = torch.max(dir_cls_preds, dim=-1)[1] period = (2 * np.pi / self.model_cfg.NUM_DIR_BINS) dir_rot = common_utils.limit_period( batch_box_preds[..., 6] - dir_offset, dir_limit_offset, period ) batch_box_preds[..., 6] = dir_rot + dir_offset + period * dir_labels.to(batch_box_preds.dtype) if isinstance(self.box_coder, box_coder_utils.PreviousResidualDecoder): batch_box_preds[..., 6] = common_utils.limit_period( -(batch_box_preds[..., 6] + np.pi / 2), offset=0.5, period=np.pi * 2 ) return batch_cls_preds, batch_box_preds def forward(self, **kwargs): raise NotImplementedError ================================================ FILE: pcdet/models/dense_heads/center_head.py ================================================ import copy import numpy as np import torch import torch.nn as nn from torch.nn.init import kaiming_normal_ from ..model_utils import model_nms_utils from ..model_utils import centernet_utils from ...utils import loss_utils class SeparateHead(nn.Module): def __init__(self, input_channels, sep_head_dict, init_bias=-2.19, use_bias=False): super().__init__() self.sep_head_dict = sep_head_dict for cur_name in self.sep_head_dict: output_channels = self.sep_head_dict[cur_name]['out_channels'] num_conv = self.sep_head_dict[cur_name]['num_conv'] fc_list = [] for k in range(num_conv - 1): fc_list.append(nn.Sequential( nn.Conv2d(input_channels, input_channels, kernel_size=3, stride=1, padding=1, bias=use_bias), nn.BatchNorm2d(input_channels), nn.ReLU() )) fc_list.append(nn.Conv2d(input_channels, output_channels, kernel_size=3, stride=1, padding=1, bias=True)) fc = nn.Sequential(*fc_list) if 'hm' in cur_name: fc[-1].bias.data.fill_(init_bias) else: for m in fc.modules(): if isinstance(m, nn.Conv2d): kaiming_normal_(m.weight.data) if hasattr(m, "bias") and m.bias is not None: nn.init.constant_(m.bias, 0) self.__setattr__(cur_name, fc) def forward(self, x): ret_dict = {} for cur_name in self.sep_head_dict: ret_dict[cur_name] = self.__getattr__(cur_name)(x) return ret_dict class CenterHead(nn.Module): def __init__(self, model_cfg, num_frames, input_channels, num_class, class_names, grid_size, point_cloud_range, voxel_size, predict_boxes_when_training=True): super().__init__() self.model_cfg = model_cfg self.num_class = num_class self.grid_size = grid_size self.point_cloud_range = point_cloud_range self.voxel_size = voxel_size self.feature_map_stride = self.model_cfg.TARGET_ASSIGNER_CONFIG.get('FEATURE_MAP_STRIDE', None) self.class_names = class_names self.class_names_each_head = [] self.class_id_mapping_each_head = [] for cur_class_names in self.model_cfg.CLASS_NAMES_EACH_HEAD: self.class_names_each_head.append([x for x in cur_class_names if x in class_names]) cur_class_id_mapping = torch.from_numpy(np.array( [self.class_names.index(x) for x in cur_class_names if x in class_names] )).cuda() self.class_id_mapping_each_head.append(cur_class_id_mapping) total_classes = sum([len(x) for x in self.class_names_each_head]) assert total_classes == len(self.class_names), f'class_names_each_head={self.class_names_each_head}' self.shared_conv = nn.Sequential( nn.Conv2d( input_channels, self.model_cfg.SHARED_CONV_CHANNEL, 3, stride=1, padding=1, bias=self.model_cfg.get('USE_BIAS_BEFORE_NORM', False) ), nn.BatchNorm2d(self.model_cfg.SHARED_CONV_CHANNEL), nn.ReLU(), ) self.heads_list = nn.ModuleList() self.separate_head_cfg = self.model_cfg.SEPARATE_HEAD_CFG for idx, cur_class_names in enumerate(self.class_names_each_head): cur_head_dict = copy.deepcopy(self.separate_head_cfg.HEAD_DICT) cur_head_dict['hm'] = dict(out_channels=len(cur_class_names), num_conv=self.model_cfg.NUM_HM_CONV) self.heads_list.append( SeparateHead( input_channels=self.model_cfg.SHARED_CONV_CHANNEL, sep_head_dict=cur_head_dict, init_bias=-2.19, use_bias=self.model_cfg.get('USE_BIAS_BEFORE_NORM', False) ) ) self.predict_boxes_when_training = predict_boxes_when_training self.forward_ret_dict = {} self.build_losses() def build_losses(self): self.add_module('hm_loss_func', loss_utils.FocalLossCenterNet()) self.add_module('reg_loss_func', loss_utils.RegLossCenterNet()) def assign_target_of_single_head( self, num_classes, gt_boxes, feature_map_size, feature_map_stride, num_max_objs=500, gaussian_overlap=0.1, min_radius=2 ): """ Args: gt_boxes: (N, 8) feature_map_size: (2), [x, y] Returns: """ heatmap = gt_boxes.new_zeros(num_classes, feature_map_size[1], feature_map_size[0]) ret_boxes = gt_boxes.new_zeros((num_max_objs, gt_boxes.shape[-1] - 1 + 1)) inds = gt_boxes.new_zeros(num_max_objs).long() mask = gt_boxes.new_zeros(num_max_objs).long() x, y, z = gt_boxes[:, 0], gt_boxes[:, 1], gt_boxes[:, 2] coord_x = (x - self.point_cloud_range[0]) / self.voxel_size[0] / feature_map_stride coord_y = (y - self.point_cloud_range[1]) / self.voxel_size[1] / feature_map_stride coord_x = torch.clamp(coord_x, min=0, max=feature_map_size[0] - 0.5) # bugfixed: 1e-6 does not work for center.int() coord_y = torch.clamp(coord_y, min=0, max=feature_map_size[1] - 0.5) # center = torch.cat((coord_x[:, None], coord_y[:, None]), dim=-1) center_int = center.int() center_int_float = center_int.float() dx, dy, dz = gt_boxes[:, 3], gt_boxes[:, 4], gt_boxes[:, 5] dx = dx / self.voxel_size[0] / feature_map_stride dy = dy / self.voxel_size[1] / feature_map_stride radius = centernet_utils.gaussian_radius(dx, dy, min_overlap=gaussian_overlap) radius = torch.clamp_min(radius.int(), min=min_radius) for k in range(min(num_max_objs, gt_boxes.shape[0])): if dx[k] <= 0 or dy[k] <= 0: continue if not (0 <= center_int[k][0] <= feature_map_size[0] and 0 <= center_int[k][1] <= feature_map_size[1]): continue cur_class_id = (gt_boxes[k, -1] - 1).long() centernet_utils.draw_gaussian_to_heatmap(heatmap[cur_class_id], center[k], radius[k].item()) inds[k] = center_int[k, 1] * feature_map_size[0] + center_int[k, 0] mask[k] = 1 ret_boxes[k, 0:2] = center[k] - center_int_float[k].float() ret_boxes[k, 2] = z[k] ret_boxes[k, 3:6] = gt_boxes[k, 3:6].log() ret_boxes[k, 6] = torch.cos(gt_boxes[k, 6]) ret_boxes[k, 7] = torch.sin(gt_boxes[k, 6]) if gt_boxes.shape[1] > 8: ret_boxes[k, 8:] = gt_boxes[k, 7:-1] return heatmap, ret_boxes, inds, mask def assign_targets(self, gt_boxes, feature_map_size=None, **kwargs): """ Args: gt_boxes: (B, M, 8) range_image_polar: (B, 3, H, W) feature_map_size: (2) [H, W] spatial_cartesian: (B, 4, H, W) Returns: """ feature_map_size = feature_map_size[::-1] # [H, W] ==> [x, y] target_assigner_cfg = self.model_cfg.TARGET_ASSIGNER_CONFIG # feature_map_size = self.grid_size[:2] // target_assigner_cfg.FEATURE_MAP_STRIDE batch_size = gt_boxes.shape[0] ret_dict = { 'heatmaps': [], 'target_boxes': [], 'inds': [], 'masks': [], 'heatmap_masks': [] } all_names = np.array(['bg', *self.class_names]) for idx, cur_class_names in enumerate(self.class_names_each_head): heatmap_list, target_boxes_list, inds_list, masks_list = [], [], [], [] for bs_idx in range(batch_size): cur_gt_boxes = gt_boxes[bs_idx] gt_class_names = all_names[cur_gt_boxes[:, -1].cpu().long().numpy()] gt_boxes_single_head = [] for idx, name in enumerate(gt_class_names): if name not in cur_class_names: continue temp_box = cur_gt_boxes[idx] temp_box[-1] = cur_class_names.index(name) + 1 gt_boxes_single_head.append(temp_box[None, :]) if len(gt_boxes_single_head) == 0: gt_boxes_single_head = cur_gt_boxes[:0, :] else: gt_boxes_single_head = torch.cat(gt_boxes_single_head, dim=0) heatmap, ret_boxes, inds, mask = self.assign_target_of_single_head( num_classes=len(cur_class_names), gt_boxes=gt_boxes_single_head.cpu(), feature_map_size=feature_map_size, feature_map_stride=target_assigner_cfg.FEATURE_MAP_STRIDE, num_max_objs=target_assigner_cfg.NUM_MAX_OBJS, gaussian_overlap=target_assigner_cfg.GAUSSIAN_OVERLAP, min_radius=target_assigner_cfg.MIN_RADIUS, ) heatmap_list.append(heatmap.to(gt_boxes_single_head.device)) target_boxes_list.append(ret_boxes.to(gt_boxes_single_head.device)) inds_list.append(inds.to(gt_boxes_single_head.device)) masks_list.append(mask.to(gt_boxes_single_head.device)) ret_dict['heatmaps'].append(torch.stack(heatmap_list, dim=0)) ret_dict['target_boxes'].append(torch.stack(target_boxes_list, dim=0)) ret_dict['inds'].append(torch.stack(inds_list, dim=0)) ret_dict['masks'].append(torch.stack(masks_list, dim=0)) return ret_dict def sigmoid(self, x): y = torch.clamp(x.sigmoid(), min=1e-4, max=1 - 1e-4) return y def get_loss(self): pred_dicts = self.forward_ret_dict['pred_dicts'] target_dicts = self.forward_ret_dict['target_dicts'] tb_dict = {} loss = 0 for idx, pred_dict in enumerate(pred_dicts): pred_dict['hm'] = self.sigmoid(pred_dict['hm']) hm_loss = self.hm_loss_func(pred_dict['hm'], target_dicts['heatmaps'][idx]) target_boxes = target_dicts['target_boxes'][idx] pred_boxes = torch.cat([pred_dict[head_name] for head_name in self.separate_head_cfg.HEAD_ORDER], dim=1) reg_loss = self.reg_loss_func( pred_boxes, target_dicts['masks'][idx], target_dicts['inds'][idx], target_boxes ) loc_loss = (reg_loss * reg_loss.new_tensor(self.model_cfg.LOSS_CONFIG.LOSS_WEIGHTS['code_weights'])).sum() loc_loss = loc_loss * self.model_cfg.LOSS_CONFIG.LOSS_WEIGHTS['loc_weight'] loss += hm_loss + loc_loss tb_dict['hm_loss_head_%d' % idx] = hm_loss.item() tb_dict['loc_loss_head_%d' % idx] = loc_loss.item() tb_dict['rpn_loss'] = loss.item() return loss, tb_dict def generate_predicted_boxes(self, batch_size, pred_dicts): post_process_cfg = self.model_cfg.POST_PROCESSING post_center_limit_range = torch.tensor(post_process_cfg.POST_CENTER_LIMIT_RANGE).cuda().float() ret_dict = [{ 'pred_boxes': [], 'pred_scores': [], 'pred_labels': [], } for k in range(batch_size)] for idx, pred_dict in enumerate(pred_dicts): batch_hm = pred_dict['hm'].sigmoid() batch_center = pred_dict['center'] batch_center_z = pred_dict['center_z'] batch_dim = pred_dict['dim'].exp() batch_rot_cos = pred_dict['rot'][:, 0].unsqueeze(dim=1) batch_rot_sin = pred_dict['rot'][:, 1].unsqueeze(dim=1) batch_vel = pred_dict['vel'] if 'vel' in self.separate_head_cfg.HEAD_ORDER else None final_pred_dicts = centernet_utils.decode_bbox_from_heatmap( heatmap=batch_hm, rot_cos=batch_rot_cos, rot_sin=batch_rot_sin, center=batch_center, center_z=batch_center_z, dim=batch_dim, vel=batch_vel, point_cloud_range=self.point_cloud_range, voxel_size=self.voxel_size, feature_map_stride=self.feature_map_stride, K=post_process_cfg.MAX_OBJ_PER_SAMPLE, circle_nms=(post_process_cfg.NMS_CONFIG.NMS_TYPE == 'circle_nms'), score_thresh=post_process_cfg.SCORE_THRESH, post_center_limit_range=post_center_limit_range ) for k, final_dict in enumerate(final_pred_dicts): final_dict['pred_labels'] = self.class_id_mapping_each_head[idx][final_dict['pred_labels'].long()] if post_process_cfg.NMS_CONFIG.NMS_TYPE != 'circle_nms': selected, selected_scores = model_nms_utils.class_agnostic_nms( box_scores=final_dict['pred_scores'], box_preds=final_dict['pred_boxes'], nms_config=post_process_cfg.NMS_CONFIG, score_thresh=None ) final_dict['pred_boxes'] = final_dict['pred_boxes'][selected] final_dict['pred_scores'] = selected_scores final_dict['pred_labels'] = final_dict['pred_labels'][selected] ret_dict[k]['pred_boxes'].append(final_dict['pred_boxes']) ret_dict[k]['pred_scores'].append(final_dict['pred_scores']) ret_dict[k]['pred_labels'].append(final_dict['pred_labels']) for k in range(batch_size): ret_dict[k]['pred_boxes'] = torch.cat(ret_dict[k]['pred_boxes'], dim=0) ret_dict[k]['pred_scores'] = torch.cat(ret_dict[k]['pred_scores'], dim=0) ret_dict[k]['pred_labels'] = torch.cat(ret_dict[k]['pred_labels'], dim=0) + 1 return ret_dict @staticmethod def reorder_rois_for_refining(batch_size, pred_dicts): num_max_rois = max([len(cur_dict['pred_boxes']) for cur_dict in pred_dicts]) num_max_rois = max(1, num_max_rois) # at least one faked rois to avoid error pred_boxes = pred_dicts[0]['pred_boxes'] rois = pred_boxes.new_zeros((batch_size, num_max_rois, pred_boxes.shape[-1])) roi_scores = pred_boxes.new_zeros((batch_size, num_max_rois)) roi_labels = pred_boxes.new_zeros((batch_size, num_max_rois)).long() for bs_idx in range(batch_size): num_boxes = len(pred_dicts[bs_idx]['pred_boxes']) rois[bs_idx, :num_boxes, :] = pred_dicts[bs_idx]['pred_boxes'] roi_scores[bs_idx, :num_boxes] = pred_dicts[bs_idx]['pred_scores'] roi_labels[bs_idx, :num_boxes] = pred_dicts[bs_idx]['pred_labels'] return rois, roi_scores, roi_labels def forward(self, data_dict): spatial_features_2d = data_dict['st_features_2d'] x = self.shared_conv(spatial_features_2d) pred_dicts = [] for head in self.heads_list: pred_dicts.append(head(x)) if self.training: target_dict = self.assign_targets( data_dict['gt_boxes'], feature_map_size=spatial_features_2d.size()[2:], feature_map_stride=data_dict.get('spatial_features_2d_strides', None) ) self.forward_ret_dict['target_dicts'] = target_dict self.forward_ret_dict['pred_dicts'] = pred_dicts if not self.training or self.predict_boxes_when_training: pred_dicts = self.generate_predicted_boxes( data_dict['batch_size'], pred_dicts ) if self.predict_boxes_when_training: rois, roi_scores, roi_labels = self.reorder_rois_for_refining(data_dict['batch_size'], pred_dicts) data_dict['rois'] = rois data_dict['roi_scores'] = roi_scores data_dict['roi_labels'] = roi_labels data_dict['has_class_labels'] = True else: data_dict['final_box_dicts'] = pred_dicts return data_dict ================================================ FILE: pcdet/models/dense_heads/point_head_box.py ================================================ import torch from ...utils import box_coder_utils, box_utils from .point_head_template import PointHeadTemplate class PointHeadBox(PointHeadTemplate): """ A simple point-based segmentation head, which are used for PointRCNN. Reference Paper: https://arxiv.org/abs/1812.04244 PointRCNN: 3D Object Proposal Generation and Detection from Point Cloud """ def __init__(self, num_class, input_channels, model_cfg, predict_boxes_when_training=False, **kwargs): super().__init__(model_cfg=model_cfg, num_class=num_class) self.predict_boxes_when_training = predict_boxes_when_training self.cls_layers = self.make_fc_layers( fc_cfg=self.model_cfg.CLS_FC, input_channels=input_channels, output_channels=num_class ) target_cfg = self.model_cfg.TARGET_CONFIG self.box_coder = getattr(box_coder_utils, target_cfg.BOX_CODER)( **target_cfg.BOX_CODER_CONFIG ) self.box_layers = self.make_fc_layers( fc_cfg=self.model_cfg.REG_FC, input_channels=input_channels, output_channels=self.box_coder.code_size ) def assign_targets(self, input_dict): """ Args: input_dict: point_features: (N1 + N2 + N3 + ..., C) batch_size: point_coords: (N1 + N2 + N3 + ..., 4) [bs_idx, x, y, z] gt_boxes (optional): (B, M, 8) Returns: point_cls_labels: (N1 + N2 + N3 + ...), long type, 0:background, -1:ignored point_part_labels: (N1 + N2 + N3 + ..., 3) """ point_coords = input_dict['point_coords'] gt_boxes = input_dict['gt_boxes'] assert gt_boxes.shape.__len__() == 3, 'gt_boxes.shape=%s' % str(gt_boxes.shape) assert point_coords.shape.__len__() in [2], 'points.shape=%s' % str(point_coords.shape) batch_size = gt_boxes.shape[0] extend_gt_boxes = box_utils.enlarge_box3d( gt_boxes.view(-1, gt_boxes.shape[-1]), extra_width=self.model_cfg.TARGET_CONFIG.GT_EXTRA_WIDTH ).view(batch_size, -1, gt_boxes.shape[-1]) targets_dict = self.assign_stack_targets( points=point_coords, gt_boxes=gt_boxes, extend_gt_boxes=extend_gt_boxes, set_ignore_flag=True, use_ball_constraint=False, ret_part_labels=False, ret_box_labels=True ) return targets_dict def get_loss(self, tb_dict=None): tb_dict = {} if tb_dict is None else tb_dict point_loss_cls, tb_dict_1 = self.get_cls_layer_loss() point_loss_box, tb_dict_2 = self.get_box_layer_loss() point_loss = point_loss_cls + point_loss_box tb_dict.update(tb_dict_1) tb_dict.update(tb_dict_2) return point_loss, tb_dict def forward(self, batch_dict): """ Args: batch_dict: batch_size: point_features: (N1 + N2 + N3 + ..., C) or (B, N, C) point_features_before_fusion: (N1 + N2 + N3 + ..., C) point_coords: (N1 + N2 + N3 + ..., 4) [bs_idx, x, y, z] point_labels (optional): (N1 + N2 + N3 + ...) gt_boxes (optional): (B, M, 8) Returns: batch_dict: point_cls_scores: (N1 + N2 + N3 + ..., 1) point_part_offset: (N1 + N2 + N3 + ..., 3) """ if self.training and batch_dict['gt_boxes'].shape[-1]>8: news = batch_dict['gt_boxes'][..., 0:8] news[..., 7] = batch_dict['gt_boxes'][..., -1] batch_dict['gt_boxes'] = news if self.model_cfg.get('USE_POINT_FEATURES_BEFORE_FUSION', False): point_features = batch_dict['point_features_before_fusion'] else: point_features = batch_dict['point_features'] point_cls_preds = self.cls_layers(point_features) # (total_points, num_class) point_box_preds = self.box_layers(point_features) # (total_points, box_code_size) point_cls_preds_max, _ = point_cls_preds.max(dim=-1) batch_dict['point_cls_scores'] = torch.sigmoid(point_cls_preds_max) ret_dict = {'point_cls_preds': point_cls_preds, 'point_box_preds': point_box_preds} if self.training: targets_dict = self.assign_targets(batch_dict) ret_dict['point_cls_labels'] = targets_dict['point_cls_labels'] ret_dict['point_box_labels'] = targets_dict['point_box_labels'] if not self.training or self.predict_boxes_when_training: point_cls_preds, point_box_preds = self.generate_predicted_boxes( points=batch_dict['point_coords'][:, 1:4], point_cls_preds=point_cls_preds, point_box_preds=point_box_preds ) batch_dict['batch_cls_preds'] = point_cls_preds batch_dict['batch_box_preds'] = point_box_preds batch_dict['batch_index'] = batch_dict['point_coords'][:, 0] batch_dict['cls_preds_normalized'] = False self.forward_ret_dict = ret_dict return batch_dict ================================================ FILE: pcdet/models/dense_heads/point_head_simple.py ================================================ import torch from ...utils import box_utils from .point_head_template import PointHeadTemplate class PointHeadSimple(PointHeadTemplate): """ A simple point-based segmentation head, which are used for PV-RCNN keypoint segmentaion. Reference Paper: https://arxiv.org/abs/1912.13192 PV-RCNN: Point-Voxel Feature Set Abstraction for 3D Object Detection """ def __init__(self, num_class, input_channels, model_cfg, **kwargs): super().__init__(model_cfg=model_cfg, num_class=num_class) self.cls_layers = self.make_fc_layers( fc_cfg=self.model_cfg.CLS_FC, input_channels=input_channels, output_channels=num_class ) def assign_targets(self, input_dict): """ Args: input_dict: point_features: (N1 + N2 + N3 + ..., C) batch_size: point_coords: (N1 + N2 + N3 + ..., 4) [bs_idx, x, y, z] gt_boxes (optional): (B, M, 8) Returns: point_cls_labels: (N1 + N2 + N3 + ...), long type, 0:background, -1:ignored point_part_labels: (N1 + N2 + N3 + ..., 3) """ point_coords = input_dict['point_coords'] gt_boxes = input_dict['gt_boxes'] assert gt_boxes.shape.__len__() == 3, 'gt_boxes.shape=%s' % str(gt_boxes.shape) assert point_coords.shape.__len__() in [2], 'points.shape=%s' % str(point_coords.shape) batch_size = gt_boxes.shape[0] extend_gt_boxes = box_utils.enlarge_box3d( gt_boxes.view(-1, gt_boxes.shape[-1]), extra_width=self.model_cfg.TARGET_CONFIG.GT_EXTRA_WIDTH ).view(batch_size, -1, gt_boxes.shape[-1]) targets_dict = self.assign_stack_targets( points=point_coords, gt_boxes=gt_boxes, extend_gt_boxes=extend_gt_boxes, set_ignore_flag=True, use_ball_constraint=False, ret_part_labels=False ) return targets_dict def get_loss(self, tb_dict=None): tb_dict = {} if tb_dict is None else tb_dict point_loss_cls, tb_dict_1 = self.get_cls_layer_loss() point_loss = point_loss_cls tb_dict.update(tb_dict_1) return point_loss, tb_dict def forward(self, batch_dict): """ Args: batch_dict: batch_size: point_features: (N1 + N2 + N3 + ..., C) or (B, N, C) point_features_before_fusion: (N1 + N2 + N3 + ..., C) point_coords: (N1 + N2 + N3 + ..., 4) [bs_idx, x, y, z] point_labels (optional): (N1 + N2 + N3 + ...) gt_boxes (optional): (B, M, 8) Returns: batch_dict: point_cls_scores: (N1 + N2 + N3 + ..., 1) point_part_offset: (N1 + N2 + N3 + ..., 3) """ if self.model_cfg.get('USE_POINT_FEATURES_BEFORE_FUSION', False): point_features = batch_dict['point_features_before_fusion'] else: point_features = batch_dict['point_features'] point_cls_preds = self.cls_layers(point_features) # (total_points, num_class) ret_dict = { 'point_cls_preds': point_cls_preds, } point_cls_scores = torch.sigmoid(point_cls_preds) batch_dict['point_cls_scores'], _ = point_cls_scores.max(dim=-1) if self.training: targets_dict = self.assign_targets(batch_dict) ret_dict['point_cls_labels'] = targets_dict['point_cls_labels'] self.forward_ret_dict = ret_dict return batch_dict ================================================ FILE: pcdet/models/dense_heads/point_head_template.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from ...ops.roiaware_pool3d import roiaware_pool3d_utils from ...utils import common_utils, loss_utils class PointHeadTemplate(nn.Module): def __init__(self, model_cfg, num_class): super().__init__() self.model_cfg = model_cfg self.num_class = num_class self.build_losses(self.model_cfg.LOSS_CONFIG) self.forward_ret_dict = None def build_losses(self, losses_cfg): self.add_module( 'cls_loss_func', loss_utils.SigmoidFocalClassificationLoss(alpha=0.25, gamma=2.0) ) reg_loss_type = losses_cfg.get('LOSS_REG', None) if reg_loss_type == 'smooth-l1': self.reg_loss_func = F.smooth_l1_loss elif reg_loss_type == 'l1': self.reg_loss_func = F.l1_loss elif reg_loss_type == 'WeightedSmoothL1Loss': self.reg_loss_func = loss_utils.WeightedSmoothL1Loss( code_weights=losses_cfg.LOSS_WEIGHTS.get('code_weights', None) ) else: self.reg_loss_func = F.smooth_l1_loss @staticmethod def make_fc_layers(fc_cfg, input_channels, output_channels): fc_layers = [] c_in = input_channels for k in range(0, fc_cfg.__len__()): fc_layers.extend([ nn.Linear(c_in, fc_cfg[k], bias=False), nn.BatchNorm1d(fc_cfg[k]), nn.ReLU(), ]) c_in = fc_cfg[k] fc_layers.append(nn.Linear(c_in, output_channels, bias=True)) return nn.Sequential(*fc_layers) def assign_stack_targets(self, points, gt_boxes, extend_gt_boxes=None, ret_box_labels=False, ret_part_labels=False, set_ignore_flag=True, use_ball_constraint=False, central_radius=2.0): """ Args: points: (N1 + N2 + N3 + ..., 4) [bs_idx, x, y, z] gt_boxes: (B, M, 8) extend_gt_boxes: [B, M, 8] ret_box_labels: ret_part_labels: set_ignore_flag: use_ball_constraint: central_radius: Returns: point_cls_labels: (N1 + N2 + N3 + ...), long type, 0:background, -1:ignored point_box_labels: (N1 + N2 + N3 + ..., code_size) """ assert len(points.shape) == 2 and points.shape[1] == 4, 'points.shape=%s' % str(points.shape) assert len(gt_boxes.shape) == 3 and gt_boxes.shape[2] == 8, 'gt_boxes.shape=%s' % str(gt_boxes.shape) assert extend_gt_boxes is None or len(extend_gt_boxes.shape) == 3 and extend_gt_boxes.shape[2] == 8, \ 'extend_gt_boxes.shape=%s' % str(extend_gt_boxes.shape) assert set_ignore_flag != use_ball_constraint, 'Choose one only!' batch_size = gt_boxes.shape[0] bs_idx = points[:, 0] point_cls_labels = points.new_zeros(points.shape[0]).long() point_box_labels = gt_boxes.new_zeros((points.shape[0], 8)) if ret_box_labels else None point_part_labels = gt_boxes.new_zeros((points.shape[0], 3)) if ret_part_labels else None for k in range(batch_size): bs_mask = (bs_idx == k) points_single = points[bs_mask][:, 1:4] point_cls_labels_single = point_cls_labels.new_zeros(bs_mask.sum()) box_idxs_of_pts = roiaware_pool3d_utils.points_in_boxes_gpu( points_single.unsqueeze(dim=0), gt_boxes[k:k + 1, :, 0:7].contiguous() ).long().squeeze(dim=0) box_fg_flag = (box_idxs_of_pts >= 0) if set_ignore_flag: extend_box_idxs_of_pts = roiaware_pool3d_utils.points_in_boxes_gpu( points_single.unsqueeze(dim=0), extend_gt_boxes[k:k+1, :, 0:7].contiguous() ).long().squeeze(dim=0) fg_flag = box_fg_flag ignore_flag = fg_flag ^ (extend_box_idxs_of_pts >= 0) point_cls_labels_single[ignore_flag] = -1 elif use_ball_constraint: box_centers = gt_boxes[k][box_idxs_of_pts][:, 0:3].clone() box_centers[:, 2] += gt_boxes[k][box_idxs_of_pts][:, 5] / 2 ball_flag = ((box_centers - points_single).norm(dim=1) < central_radius) fg_flag = box_fg_flag & ball_flag else: raise NotImplementedError gt_box_of_fg_points = gt_boxes[k][box_idxs_of_pts[fg_flag]] point_cls_labels_single[fg_flag] = 1 if self.num_class == 1 else gt_box_of_fg_points[:, -1].long() point_cls_labels[bs_mask] = point_cls_labels_single if ret_box_labels: point_box_labels_single = point_box_labels.new_zeros((bs_mask.sum(), 8)) fg_point_box_labels = self.box_coder.encode_torch( gt_boxes=gt_box_of_fg_points[:, :-1], points=points_single[fg_flag], gt_classes=gt_box_of_fg_points[:, -1].long() ) point_box_labels_single[fg_flag] = fg_point_box_labels point_box_labels[bs_mask] = point_box_labels_single if ret_part_labels: point_part_labels_single = point_part_labels.new_zeros((bs_mask.sum(), 3)) transformed_points = points_single[fg_flag] - gt_box_of_fg_points[:, 0:3] transformed_points = common_utils.rotate_points_along_z( transformed_points.view(-1, 1, 3), -gt_box_of_fg_points[:, 6] ).view(-1, 3) offset = torch.tensor([0.5, 0.5, 0.5]).view(1, 3).type_as(transformed_points) point_part_labels_single[fg_flag] = (transformed_points / gt_box_of_fg_points[:, 3:6]) + offset point_part_labels[bs_mask] = point_part_labels_single targets_dict = { 'point_cls_labels': point_cls_labels, 'point_box_labels': point_box_labels, 'point_part_labels': point_part_labels } return targets_dict def get_cls_layer_loss(self, tb_dict=None): point_cls_labels = self.forward_ret_dict['point_cls_labels'].view(-1) point_cls_preds = self.forward_ret_dict['point_cls_preds'].view(-1, self.num_class) positives = (point_cls_labels > 0) negative_cls_weights = (point_cls_labels == 0) * 1.0 cls_weights = (negative_cls_weights + 1.0 * positives).float() pos_normalizer = positives.sum(dim=0).float() cls_weights /= torch.clamp(pos_normalizer, min=1.0) one_hot_targets = point_cls_preds.new_zeros(*list(point_cls_labels.shape), self.num_class + 1) one_hot_targets.scatter_(-1, (point_cls_labels * (point_cls_labels >= 0).long()).unsqueeze(dim=-1).long(), 1.0) one_hot_targets = one_hot_targets[..., 1:] cls_loss_src = self.cls_loss_func(point_cls_preds, one_hot_targets, weights=cls_weights) point_loss_cls = cls_loss_src.sum() loss_weights_dict = self.model_cfg.LOSS_CONFIG.LOSS_WEIGHTS point_loss_cls = point_loss_cls * loss_weights_dict['point_cls_weight'] if tb_dict is None: tb_dict = {} tb_dict.update({ 'point_loss_cls': point_loss_cls.item(), 'point_pos_num': pos_normalizer.item() }) return point_loss_cls, tb_dict def get_part_layer_loss(self, tb_dict=None): pos_mask = self.forward_ret_dict['point_cls_labels'] > 0 pos_normalizer = max(1, (pos_mask > 0).sum().item()) point_part_labels = self.forward_ret_dict['point_part_labels'] point_part_preds = self.forward_ret_dict['point_part_preds'] point_loss_part = F.binary_cross_entropy(torch.sigmoid(point_part_preds), point_part_labels, reduction='none') point_loss_part = (point_loss_part.sum(dim=-1) * pos_mask.float()).sum() / (3 * pos_normalizer) loss_weights_dict = self.model_cfg.LOSS_CONFIG.LOSS_WEIGHTS point_loss_part = point_loss_part * loss_weights_dict['point_part_weight'] if tb_dict is None: tb_dict = {} tb_dict.update({'point_loss_part': point_loss_part.item()}) return point_loss_part, tb_dict def get_box_layer_loss(self, tb_dict=None): pos_mask = self.forward_ret_dict['point_cls_labels'] > 0 point_box_labels = self.forward_ret_dict['point_box_labels'] point_box_preds = self.forward_ret_dict['point_box_preds'] reg_weights = pos_mask.float() pos_normalizer = pos_mask.sum().float() reg_weights /= torch.clamp(pos_normalizer, min=1.0) point_loss_box_src = self.reg_loss_func( point_box_preds[None, ...], point_box_labels[None, ...], weights=reg_weights[None, ...] ) point_loss_box = point_loss_box_src.sum() loss_weights_dict = self.model_cfg.LOSS_CONFIG.LOSS_WEIGHTS point_loss_box = point_loss_box * loss_weights_dict['point_box_weight'] if tb_dict is None: tb_dict = {} tb_dict.update({'point_loss_box': point_loss_box.item()}) return point_loss_box, tb_dict def generate_predicted_boxes(self, points, point_cls_preds, point_box_preds): """ Args: points: (N, 3) point_cls_preds: (N, num_class) point_box_preds: (N, box_code_size) Returns: point_cls_preds: (N, num_class) point_box_preds: (N, box_code_size) """ _, pred_classes = point_cls_preds.max(dim=-1) point_box_preds = self.box_coder.decode_torch(point_box_preds, points, pred_classes + 1) return point_cls_preds, point_box_preds def forward(self, **kwargs): raise NotImplementedError ================================================ FILE: pcdet/models/dense_heads/point_intra_part_head.py ================================================ import torch from ...utils import box_coder_utils, box_utils from .point_head_template import PointHeadTemplate class PointIntraPartOffsetHead(PointHeadTemplate): """ Point-based head for predicting the intra-object part locations. Reference Paper: https://arxiv.org/abs/1907.03670 From Points to Parts: 3D Object Detection from Point Cloud with Part-aware and Part-aggregation Network """ def __init__(self, num_class, input_channels, model_cfg, predict_boxes_when_training=False, **kwargs): super().__init__(model_cfg=model_cfg, num_class=num_class) self.predict_boxes_when_training = predict_boxes_when_training self.cls_layers = self.make_fc_layers( fc_cfg=self.model_cfg.CLS_FC, input_channels=input_channels, output_channels=num_class ) self.part_reg_layers = self.make_fc_layers( fc_cfg=self.model_cfg.PART_FC, input_channels=input_channels, output_channels=3 ) target_cfg = self.model_cfg.TARGET_CONFIG if target_cfg.get('BOX_CODER', None) is not None: self.box_coder = getattr(box_coder_utils, target_cfg.BOX_CODER)( **target_cfg.BOX_CODER_CONFIG ) self.box_layers = self.make_fc_layers( fc_cfg=self.model_cfg.REG_FC, input_channels=input_channels, output_channels=self.box_coder.code_size ) else: self.box_layers = None def assign_targets(self, input_dict): """ Args: input_dict: point_features: (N1 + N2 + N3 + ..., C) batch_size: point_coords: (N1 + N2 + N3 + ..., 4) [bs_idx, x, y, z] gt_boxes (optional): (B, M, 8) Returns: point_cls_labels: (N1 + N2 + N3 + ...), long type, 0:background, -1:ignored point_part_labels: (N1 + N2 + N3 + ..., 3) """ point_coords = input_dict['point_coords'] gt_boxes = input_dict['gt_boxes'] assert gt_boxes.shape.__len__() == 3, 'gt_boxes.shape=%s' % str(gt_boxes.shape) assert point_coords.shape.__len__() in [2], 'points.shape=%s' % str(point_coords.shape) batch_size = gt_boxes.shape[0] extend_gt_boxes = box_utils.enlarge_box3d( gt_boxes.view(-1, gt_boxes.shape[-1]), extra_width=self.model_cfg.TARGET_CONFIG.GT_EXTRA_WIDTH ).view(batch_size, -1, gt_boxes.shape[-1]) targets_dict = self.assign_stack_targets( points=point_coords, gt_boxes=gt_boxes, extend_gt_boxes=extend_gt_boxes, set_ignore_flag=True, use_ball_constraint=False, ret_part_labels=True, ret_box_labels=(self.box_layers is not None) ) return targets_dict def get_loss(self, tb_dict=None): tb_dict = {} if tb_dict is None else tb_dict point_loss_cls, tb_dict = self.get_cls_layer_loss(tb_dict) point_loss_part, tb_dict = self.get_part_layer_loss(tb_dict) point_loss = point_loss_cls + point_loss_part if self.box_layers is not None: point_loss_box, tb_dict = self.get_box_layer_loss(tb_dict) point_loss += point_loss_box return point_loss, tb_dict def forward(self, batch_dict): """ Args: batch_dict: batch_size: point_features: (N1 + N2 + N3 + ..., C) or (B, N, C) point_coords: (N1 + N2 + N3 + ..., 4) [bs_idx, x, y, z] point_labels (optional): (N1 + N2 + N3 + ...) gt_boxes (optional): (B, M, 8) Returns: batch_dict: point_cls_scores: (N1 + N2 + N3 + ..., 1) point_part_offset: (N1 + N2 + N3 + ..., 3) """ point_features = batch_dict['point_features'] point_cls_preds = self.cls_layers(point_features) # (total_points, num_class) point_part_preds = self.part_reg_layers(point_features) ret_dict = { 'point_cls_preds': point_cls_preds, 'point_part_preds': point_part_preds, } if self.box_layers is not None: point_box_preds = self.box_layers(point_features) ret_dict['point_box_preds'] = point_box_preds point_cls_scores = torch.sigmoid(point_cls_preds) point_part_offset = torch.sigmoid(point_part_preds) batch_dict['point_cls_scores'], _ = point_cls_scores.max(dim=-1) batch_dict['point_part_offset'] = point_part_offset if self.training: targets_dict = self.assign_targets(batch_dict) ret_dict['point_cls_labels'] = targets_dict['point_cls_labels'] ret_dict['point_part_labels'] = targets_dict.get('point_part_labels') ret_dict['point_box_labels'] = targets_dict.get('point_box_labels') if self.box_layers is not None and (not self.training or self.predict_boxes_when_training): point_cls_preds, point_box_preds = self.generate_predicted_boxes( points=batch_dict['point_coords'][:, 1:4], point_cls_preds=point_cls_preds, point_box_preds=ret_dict['point_box_preds'] ) batch_dict['batch_cls_preds'] = point_cls_preds batch_dict['batch_box_preds'] = point_box_preds batch_dict['batch_index'] = batch_dict['point_coords'][:, 0] batch_dict['cls_preds_normalized'] = False self.forward_ret_dict = ret_dict return batch_dict ================================================ FILE: pcdet/models/dense_heads/target_assigner/anchor_generator.py ================================================ import torch class AnchorGenerator(object): def __init__(self, anchor_range, anchor_generator_config): super().__init__() self.anchor_generator_cfg = anchor_generator_config self.anchor_range = anchor_range self.anchor_sizes = [config['anchor_sizes'] for config in anchor_generator_config] self.anchor_rotations = [config['anchor_rotations'] for config in anchor_generator_config] self.anchor_heights = [config['anchor_bottom_heights'] for config in anchor_generator_config] self.align_center = [config.get('align_center', False) for config in anchor_generator_config] assert len(self.anchor_sizes) == len(self.anchor_rotations) == len(self.anchor_heights) self.num_of_anchor_sets = len(self.anchor_sizes) def generate_anchors(self, grid_sizes): assert len(grid_sizes) == self.num_of_anchor_sets all_anchors = [] num_anchors_per_location = [] for grid_size, anchor_size, anchor_rotation, anchor_height, align_center in zip( grid_sizes, self.anchor_sizes, self.anchor_rotations, self.anchor_heights, self.align_center): num_anchors_per_location.append(len(anchor_rotation) * len(anchor_size) * len(anchor_height)) if align_center: x_stride = (self.anchor_range[3] - self.anchor_range[0]) / grid_size[0] y_stride = (self.anchor_range[4] - self.anchor_range[1]) / grid_size[1] x_offset, y_offset = x_stride / 2, y_stride / 2 else: x_stride = (self.anchor_range[3] - self.anchor_range[0]) / (grid_size[0] - 1) y_stride = (self.anchor_range[4] - self.anchor_range[1]) / (grid_size[1] - 1) x_offset, y_offset = 0, 0 x_shifts = torch.arange( self.anchor_range[0] + x_offset, self.anchor_range[3] + 1e-5, step=x_stride, dtype=torch.float32, ).cuda() y_shifts = torch.arange( self.anchor_range[1] + y_offset, self.anchor_range[4] + 1e-5, step=y_stride, dtype=torch.float32, ).cuda() z_shifts = x_shifts.new_tensor(anchor_height) num_anchor_size, num_anchor_rotation = anchor_size.__len__(), anchor_rotation.__len__() anchor_rotation = x_shifts.new_tensor(anchor_rotation) anchor_size = x_shifts.new_tensor(anchor_size) x_shifts, y_shifts, z_shifts = torch.meshgrid([ x_shifts, y_shifts, z_shifts ]) # [x_grid, y_grid, z_grid] anchors = torch.stack((x_shifts, y_shifts, z_shifts), dim=-1) # [x, y, z, 3] anchors = anchors[:, :, :, None, :].repeat(1, 1, 1, anchor_size.shape[0], 1) anchor_size = anchor_size.view(1, 1, 1, -1, 3).repeat([*anchors.shape[0:3], 1, 1]) anchors = torch.cat((anchors, anchor_size), dim=-1) anchors = anchors[:, :, :, :, None, :].repeat(1, 1, 1, 1, num_anchor_rotation, 1) anchor_rotation = anchor_rotation.view(1, 1, 1, 1, -1, 1).repeat([*anchors.shape[0:3], num_anchor_size, 1, 1]) anchors = torch.cat((anchors, anchor_rotation), dim=-1) # [x, y, z, num_size, num_rot, 7] anchors = anchors.permute(2, 1, 0, 3, 4, 5).contiguous() #anchors = anchors.view(-1, anchors.shape[-1]) anchors[..., 2] += anchors[..., 5] / 2 # shift to box centers all_anchors.append(anchors) return all_anchors, num_anchors_per_location if __name__ == '__main__': from easydict import EasyDict config = [ EasyDict({ 'anchor_sizes': [[2.1, 4.7, 1.7], [0.86, 0.91, 1.73], [0.84, 1.78, 1.78]], 'anchor_rotations': [0, 1.57], 'anchor_heights': [0, 0.5] }) ] A = AnchorGenerator( anchor_range=[-75.2, -75.2, -2, 75.2, 75.2, 4], anchor_generator_config=config ) import pdb pdb.set_trace() A.generate_anchors([[188, 188]]) ================================================ FILE: pcdet/models/dense_heads/target_assigner/atss_target_assigner.py ================================================ import torch from ....ops.iou3d_nms import iou3d_nms_utils from ....utils import common_utils class ATSSTargetAssigner(object): """ Reference: https://arxiv.org/abs/1912.02424 """ def __init__(self, topk, box_coder, match_height=False): self.topk = topk self.box_coder = box_coder self.match_height = match_height def assign_targets(self, anchors_list, gt_boxes_with_classes, use_multihead=False): """ Args: anchors: [(N, 7), ...] gt_boxes: (B, M, 8) Returns: """ if not isinstance(anchors_list, list): anchors_list = [anchors_list] single_set_of_anchor = True else: single_set_of_anchor = len(anchors_list) == 1 cls_labels_list, reg_targets_list, reg_weights_list = [], [], [] for anchors in anchors_list: batch_size = gt_boxes_with_classes.shape[0] gt_classes = gt_boxes_with_classes[:, :, -1] gt_boxes = gt_boxes_with_classes[:, :, :-1] if use_multihead: anchors = anchors.permute(3, 4, 0, 1, 2, 5).contiguous().view(-1, anchors.shape[-1]) else: anchors = anchors.view(-1, anchors.shape[-1]) cls_labels, reg_targets, reg_weights = [], [], [] for k in range(batch_size): cur_gt = gt_boxes[k] cnt = cur_gt.__len__() - 1 while cnt > 0 and cur_gt[cnt].sum() == 0: cnt -= 1 cur_gt = cur_gt[:cnt + 1] cur_gt_classes = gt_classes[k][:cnt + 1] cur_cls_labels, cur_reg_targets, cur_reg_weights = self.assign_targets_single( anchors, cur_gt, cur_gt_classes ) cls_labels.append(cur_cls_labels) reg_targets.append(cur_reg_targets) reg_weights.append(cur_reg_weights) cls_labels = torch.stack(cls_labels, dim=0) reg_targets = torch.stack(reg_targets, dim=0) reg_weights = torch.stack(reg_weights, dim=0) cls_labels_list.append(cls_labels) reg_targets_list.append(reg_targets) reg_weights_list.append(reg_weights) if single_set_of_anchor: ret_dict = { 'box_cls_labels': cls_labels_list[0], 'box_reg_targets': reg_targets_list[0], 'reg_weights': reg_weights_list[0] } else: ret_dict = { 'box_cls_labels': torch.cat(cls_labels_list, dim=1), 'box_reg_targets': torch.cat(reg_targets_list, dim=1), 'reg_weights': torch.cat(reg_weights_list, dim=1) } return ret_dict def assign_targets_single(self, anchors, gt_boxes, gt_classes): """ Args: anchors: (N, 7) [x, y, z, dx, dy, dz, heading] gt_boxes: (M, 7) [x, y, z, dx, dy, dz, heading] gt_classes: (M) Returns: """ num_anchor = anchors.shape[0] num_gt = gt_boxes.shape[0] # select topk anchors for each gt_boxes if self.match_height: ious = iou3d_nms_utils.boxes_iou3d_gpu(anchors[:, 0:7], gt_boxes[:, 0:7]) # (N, M) else: ious = iou3d_nms_utils.boxes_iou_bev(anchors[:, 0:7], gt_boxes[:, 0:7]) distance = (anchors[:, None, 0:3] - gt_boxes[None, :, 0:3]).norm(dim=-1) # (N, M) _, topk_idxs = distance.topk(self.topk, dim=0, largest=False) # (K, M) candidate_ious = ious[topk_idxs, torch.arange(num_gt)] # (K, M) iou_mean_per_gt = candidate_ious.mean(dim=0) iou_std_per_gt = candidate_ious.std(dim=0) iou_thresh_per_gt = iou_mean_per_gt + iou_std_per_gt + 1e-6 is_pos = candidate_ious >= iou_thresh_per_gt[None, :] # (K, M) # check whether anchor_center in gt_boxes, only check BEV x-y axes candidate_anchors = anchors[topk_idxs.view(-1)] # (KxM, 7) gt_boxes_of_each_anchor = gt_boxes[:, :].repeat(self.topk, 1) # (KxM, 7) xyz_local = candidate_anchors[:, 0:3] - gt_boxes_of_each_anchor[:, 0:3] xyz_local = common_utils.rotate_points_along_z( xyz_local[:, None, :], -gt_boxes_of_each_anchor[:, 6] ).squeeze(dim=1) xy_local = xyz_local[:, 0:2] lw = gt_boxes_of_each_anchor[:, 3:5][:, [1, 0]] # bugfixed: w ==> y, l ==> x in local coords is_in_gt = ((xy_local <= lw / 2) & (xy_local >= -lw / 2)).all(dim=-1).view(-1, num_gt) # (K, M) is_pos = is_pos & is_in_gt # (K, M) for ng in range(num_gt): topk_idxs[:, ng] += ng * num_anchor # select the highest IoU if an anchor box is assigned with multiple gt_boxes INF = -0x7FFFFFFF ious_inf = torch.full_like(ious, INF).t().contiguous().view(-1) # (MxN) index = topk_idxs.view(-1)[is_pos.view(-1)] ious_inf[index] = ious.t().contiguous().view(-1)[index] ious_inf = ious_inf.view(num_gt, -1).t() # (N, M) anchors_to_gt_values, anchors_to_gt_indexs = ious_inf.max(dim=1) # match the gt_boxes to the anchors which have maximum iou with them max_iou_of_each_gt, argmax_iou_of_each_gt = ious.max(dim=0) anchors_to_gt_indexs[argmax_iou_of_each_gt] = torch.arange(0, num_gt, device=ious.device) anchors_to_gt_values[argmax_iou_of_each_gt] = max_iou_of_each_gt cls_labels = gt_classes[anchors_to_gt_indexs] cls_labels[anchors_to_gt_values == INF] = 0 matched_gts = gt_boxes[anchors_to_gt_indexs] pos_mask = cls_labels > 0 reg_targets = matched_gts.new_zeros((num_anchor, self.box_coder.code_size)) reg_weights = matched_gts.new_zeros(num_anchor) if pos_mask.sum() > 0: reg_targets[pos_mask > 0] = self.box_coder.encode_torch(matched_gts[pos_mask > 0], anchors[pos_mask > 0]) reg_weights[pos_mask] = 1.0 return cls_labels, reg_targets, reg_weights ================================================ FILE: pcdet/models/dense_heads/target_assigner/axis_aligned_target_assigner.py ================================================ import numpy as np import torch from ....ops.iou3d_nms import iou3d_nms_utils from ....utils import box_utils import time class AxisAlignedTargetAssigner(object): def __init__(self, model_cfg, class_names, box_coder, grid_size, point_cloud_range, match_height=False,): super().__init__() anchor_generator_cfg = model_cfg.ANCHOR_GENERATOR_CONFIG anchor_target_cfg = model_cfg.TARGET_ASSIGNER_CONFIG self.grid_size = grid_size # [1408 1600 40] self.point_cloud_range = point_cloud_range # [ 0. -40. -3. 70.4 40. 1. ] self.voxel_size = (point_cloud_range[3]-point_cloud_range[0])/grid_size[0] self.feature_map_stride = [config['feature_map_stride'] for config in anchor_generator_cfg] self.box_coder = box_coder self.match_height = match_height self.class_names = np.array(class_names) self.anchor_class_names = [config['class_name'] for config in anchor_generator_cfg] self.pos_fraction = anchor_target_cfg.POS_FRACTION if anchor_target_cfg.POS_FRACTION >= 0 else None self.sample_size = anchor_target_cfg.SAMPLE_SIZE self.norm_by_num_examples = anchor_target_cfg.NORM_BY_NUM_EXAMPLES self.matched_thresholds = {} self.unmatched_thresholds = {} for config in anchor_generator_cfg: self.matched_thresholds[config['class_name']] = config['matched_threshold'] self.unmatched_thresholds[config['class_name']] = config['unmatched_threshold'] self.use_multihead = model_cfg.get('USE_MULTIHEAD', False) self.seperate_multihead = model_cfg.get('SEPERATE_MULTIHEAD', False) if self.seperate_multihead: rpn_head_cfgs = model_cfg.RPN_HEAD_CFGS self.gt_remapping = {} for rpn_head_cfg in rpn_head_cfgs: for idx, name in enumerate(rpn_head_cfg['HEAD_CLS_NAME']): self.gt_remapping[name] = idx + 1 def assign_targets(self, all_anchors, gt_boxes_with_classes): """ Args: all_anchors: [(N, 7), ...] gt_boxes: (B, M, 8) Returns: """ bbox_targets = [] cls_labels = [] reg_weights = [] gt_ious = [] batch_size = gt_boxes_with_classes.shape[0] gt_classes = gt_boxes_with_classes[:, :, -1] gt_boxes = gt_boxes_with_classes[:, :, :-1] for k in range(batch_size): cur_gt = gt_boxes[k] cnt = cur_gt.__len__() - 1 while cnt > 0 and cur_gt[cnt].sum() == 0: cnt -= 1 cur_gt = cur_gt[:cnt + 1] cur_gt_classes = gt_classes[k][:cnt + 1].int() target_list = [] for anchor_class_name, anchors in zip(self.anchor_class_names, all_anchors): if cur_gt_classes.shape[0] > 1: mask = torch.from_numpy(self.class_names[cur_gt_classes.cpu() - 1] == anchor_class_name) else: mask = torch.tensor([self.class_names[c - 1] == anchor_class_name for c in cur_gt_classes], dtype=torch.bool) this_gt = cur_gt[mask] if self.use_multihead: anchors = anchors.permute(3, 4, 0, 1, 2, 5).contiguous().view(-1, anchors.shape[-1]) if self.seperate_multihead: selected_classes = cur_gt_classes[mask].clone() if len(selected_classes) > 0: new_cls_id = self.gt_remapping[anchor_class_name] selected_classes[:] = new_cls_id else: selected_classes = cur_gt_classes[mask] else: feature_map_size = anchors.shape[:2] #1,376,376 (y,x) anchors = anchors.view(-1, anchors.shape[-1]) selected_classes = cur_gt_classes[mask] single_target = self.assign_targets_single( anchors, this_gt, gt_classes=selected_classes, matched_threshold=self.matched_thresholds[anchor_class_name], unmatched_threshold=self.unmatched_thresholds[anchor_class_name] ) target_list.append(single_target) if self.use_multihead: target_dict = { 'box_cls_labels': [t['box_cls_labels'].view(-1) for t in target_list], 'box_reg_targets': [t['box_reg_targets'].view(-1, self.box_coder.code_size) for t in target_list], 'reg_weights': [t['reg_weights'].view(-1) for t in target_list], 'gt_ious': [t['gt_ious'].view(-1) for t in target_list], } target_dict['box_reg_targets'] = torch.cat(target_dict['box_reg_targets'], dim=0) target_dict['box_cls_labels'] = torch.cat(target_dict['box_cls_labels'], dim=0).view(-1) target_dict['reg_weights'] = torch.cat(target_dict['reg_weights'], dim=0).view(-1) target_dict['gt_ious'] = torch.cat(target_dict['gt_ious'], dim=0).view(-1) else: target_dict = { 'box_cls_labels': [t['box_cls_labels'].view(*feature_map_size, -1) for t in target_list], 'gt_ious': [t['gt_ious'].view(*feature_map_size, -1) for t in target_list], 'box_reg_targets': [t['box_reg_targets'].view(*feature_map_size, -1, self.box_coder.code_size) for t in target_list], 'reg_weights': [t['reg_weights'].view(*feature_map_size, -1) for t in target_list] } target_dict['box_reg_targets'] = torch.cat( target_dict['box_reg_targets'], dim=-2 ).view(-1, self.box_coder.code_size) target_dict['box_cls_labels'] = torch.cat(target_dict['box_cls_labels'], dim=-1).view(-1) target_dict['gt_ious'] = torch.cat(target_dict['gt_ious'], dim=-1).view(-1) target_dict['reg_weights'] = torch.cat(target_dict['reg_weights'], dim=-1).view(-1) bbox_targets.append(target_dict['box_reg_targets']) cls_labels.append(target_dict['box_cls_labels']) gt_ious.append(target_dict['gt_ious']) reg_weights.append(target_dict['reg_weights']) bbox_targets = torch.stack(bbox_targets, dim=0) cls_labels = torch.stack(cls_labels, dim=0) reg_weights = torch.stack(reg_weights, dim=0) gt_ious = torch.stack(gt_ious, dim=0) all_targets_dict = { 'box_cls_labels': cls_labels, 'box_reg_targets': bbox_targets, 'reg_weights': reg_weights, 'gt_ious':gt_ious } return all_targets_dict def assign_targets_single(self, anchors, gt_boxes, gt_classes, matched_threshold=0.6, unmatched_threshold=0.45 ): num_anchors = anchors.shape[0] num_gt = gt_boxes.shape[0] labels = torch.ones((num_anchors,), dtype=torch.int32, device=anchors.device) * -1 gt_ids = torch.ones((num_anchors,), dtype=torch.int32, device=anchors.device) * -1 ious = torch.zeros((num_anchors,), dtype=torch.float, device=anchors.device) * -1 if len(gt_boxes) > 0 and anchors.shape[0] > 0: anchor_by_gt_overlap = iou3d_nms_utils.boxes_iou3d_gpu(anchors[:, 0:7], gt_boxes[:, 0:7]) \ if self.match_height else box_utils.boxes3d_nearest_bev_iou(anchors[:, 0:7], gt_boxes[:, 0:7]) anchor_to_gt_argmax = torch.from_numpy(anchor_by_gt_overlap.cpu().numpy().argmax(axis=1)).cuda() anchor_to_gt_max = anchor_by_gt_overlap[ torch.arange(num_anchors, device=anchors.device), anchor_to_gt_argmax ] ious=anchor_to_gt_max gt_to_anchor_argmax = torch.from_numpy(anchor_by_gt_overlap.cpu().numpy().argmax(axis=0)).cuda() gt_to_anchor_max = anchor_by_gt_overlap[gt_to_anchor_argmax, torch.arange(num_gt, device=anchors.device)] empty_gt_mask = gt_to_anchor_max == 0 gt_to_anchor_max[empty_gt_mask] = -1 anchors_with_max_overlap = (anchor_by_gt_overlap == gt_to_anchor_max).nonzero()[:, 0] gt_inds_force = anchor_to_gt_argmax[anchors_with_max_overlap] labels[anchors_with_max_overlap] = gt_classes[gt_inds_force] gt_ids[anchors_with_max_overlap] = gt_inds_force.int() pos_inds = anchor_to_gt_max >= matched_threshold gt_inds_over_thresh = anchor_to_gt_argmax[pos_inds] labels[pos_inds] = gt_classes[gt_inds_over_thresh] gt_ids[pos_inds] = gt_inds_over_thresh.int() bg_inds = (anchor_to_gt_max < unmatched_threshold).nonzero()[:, 0] else: bg_inds = torch.arange(num_anchors, device=anchors.device) fg_inds = (labels > 0).nonzero()[:, 0] if self.pos_fraction is not None: num_fg = int(self.pos_fraction * self.sample_size) if len(fg_inds) > num_fg: num_disabled = len(fg_inds) - num_fg disable_inds = torch.randperm(len(fg_inds))[:num_disabled] labels[disable_inds] = -1 fg_inds = (labels > 0).nonzero()[:, 0] num_bg = self.sample_size - (labels > 0).sum() if len(bg_inds) > num_bg: enable_inds = bg_inds[torch.randint(0, len(bg_inds), size=(num_bg,))] labels[enable_inds] = 0 # bg_inds = torch.nonzero(labels == 0)[:, 0] else: if len(gt_boxes) == 0 or anchors.shape[0] == 0: labels[:] = 0 else: labels[bg_inds] = 0 labels[anchors_with_max_overlap] = gt_classes[gt_inds_force] bbox_targets = anchors.new_zeros((num_anchors, self.box_coder.code_size)) if len(gt_boxes) > 0 and anchors.shape[0] > 0: fg_gt_boxes = gt_boxes[anchor_to_gt_argmax[fg_inds], :] fg_anchors = anchors[fg_inds, :] bbox_targets[fg_inds, :] = self.box_coder.encode_torch(fg_gt_boxes, fg_anchors) reg_weights = anchors.new_zeros((num_anchors,)) if self.norm_by_num_examples: num_examples = (labels >= 0).sum() num_examples = num_examples if num_examples > 1.0 else 1.0 reg_weights[labels > 0] = 1.0 / num_examples else: reg_weights[labels > 0] = 1.0 ret_dict = { 'box_cls_labels': labels, 'box_reg_targets': bbox_targets, 'reg_weights': reg_weights, 'gt_ious':ious } return ret_dict ================================================ FILE: pcdet/models/detectors/__init__.py ================================================ from .detector3d_template import Detector3DTemplate from .voxel_rcnn import VoxelRCNN __all__ = { 'Detector3DTemplate': Detector3DTemplate, 'VoxelRCNN': VoxelRCNN, } def build_detector(model_cfg, num_class, dataset): model = __all__[model_cfg.NAME]( model_cfg=model_cfg, num_class=num_class, dataset=dataset ) return model ================================================ FILE: pcdet/models/detectors/detector3d_template.py ================================================ import os import torch import torch.nn as nn from ...utils.spconv_utils import find_all_spconv_keys from ...ops.iou3d_nms import iou3d_nms_utils from .. import backbones_2d, backbones_3d, dense_heads, roi_heads from ..backbones_2d import map_to_bev from ..backbones_3d import pfe, vfe from ..model_utils import model_nms_utils class Detector3DTemplate(nn.Module): def __init__(self, model_cfg, num_class, dataset): super().__init__() self.model_cfg = model_cfg self.num_class = num_class self.dataset = dataset self.class_names = dataset.class_names self.register_buffer('global_step', torch.LongTensor(1).zero_()) self.module_topology = [ 'vfe', 'backbone_3d', 'map_to_bev_module', 'backbone_2d', 'dense_head','pfe', 'point_head', 'roi_head' ] self.test_filp=dataset.test_flip @property def mode(self): return 'TRAIN' if self.training else 'TEST' def update_global_step(self): self.global_step += 1 def build_networks(self): model_info_dict = { 'module_list': [], 'num_rawpoint_features': self.dataset.point_feature_encoder.num_point_features, 'num_point_features': self.dataset.point_feature_encoder.num_point_features, 'grid_size': self.dataset.grid_size, 'point_cloud_range': self.dataset.point_cloud_range, 'voxel_size': self.dataset.voxel_size } for module_name in self.module_topology: module, model_info_dict = getattr(self, 'build_%s' % module_name)( model_info_dict=model_info_dict ) self.add_module(module_name, module) return model_info_dict['module_list'] def build_vfe(self, model_info_dict): if self.model_cfg.get('VFE', None) is None: return None, model_info_dict vfe_module = vfe.__all__[self.model_cfg.VFE.NAME]( model_cfg=self.model_cfg.VFE, num_point_features=model_info_dict['num_rawpoint_features'], point_cloud_range=model_info_dict['point_cloud_range'], voxel_size=model_info_dict['voxel_size'], ) model_info_dict['num_point_features'] = vfe_module.get_output_feature_dim() model_info_dict['module_list'].append(vfe_module) return vfe_module, model_info_dict def build_backbone_3d(self, model_info_dict): if self.model_cfg.get('BACKBONE_3D', None) is None: return None, model_info_dict backbone_3d_module = backbones_3d.__all__[self.model_cfg.BACKBONE_3D.NAME]( model_cfg=self.model_cfg.BACKBONE_3D, input_channels=model_info_dict['num_point_features'], grid_size=model_info_dict['grid_size'], voxel_size=model_info_dict['voxel_size'], point_cloud_range=model_info_dict['point_cloud_range'] ) model_info_dict['module_list'].append(backbone_3d_module) model_info_dict['num_point_features'] = backbone_3d_module.num_point_features return backbone_3d_module, model_info_dict def build_map_to_bev_module(self, model_info_dict): if self.model_cfg.get('MAP_TO_BEV', None) is None: return None, model_info_dict map_to_bev_module = map_to_bev.__all__[self.model_cfg.MAP_TO_BEV.NAME]( model_cfg=self.model_cfg.MAP_TO_BEV, voxel_size=model_info_dict['voxel_size'], point_cloud_range=model_info_dict['point_cloud_range'] ) model_info_dict['module_list'].append(map_to_bev_module) model_info_dict['num_bev_features'] = map_to_bev_module.num_bev_features return map_to_bev_module, model_info_dict def build_backbone_2d(self, model_info_dict): if self.model_cfg.get('BACKBONE_2D', None) is None: return None, model_info_dict chan=model_info_dict['num_bev_features'] backbone_2d_module = backbones_2d.__all__[self.model_cfg.BACKBONE_2D.NAME]( model_cfg=self.model_cfg.BACKBONE_2D, input_channels=chan, ) model_info_dict['module_list'].append(backbone_2d_module) model_info_dict['num_bev_features_post'] = backbone_2d_module.num_bev_features_post return backbone_2d_module, model_info_dict def build_pfe(self, model_info_dict): if self.model_cfg.get('PFE', None) is None: return None, model_info_dict pfe_module = pfe.__all__[self.model_cfg.PFE.NAME]( model_cfg=self.model_cfg.PFE, voxel_size=model_info_dict['voxel_size'], point_cloud_range=model_info_dict['point_cloud_range'], num_bev_features=model_info_dict['num_bev_features'], num_rawpoint_features=model_info_dict['num_rawpoint_features'] ) model_info_dict['module_list'].append(pfe_module) if type(model_info_dict['num_point_features']) is dict: model_info_dict['num_point_features'].update({"points_bev": pfe_module.num_point_features}) else: model_info_dict['num_point_features'] = pfe_module.num_point_features model_info_dict['num_point_features_before_fusion'] = pfe_module.num_point_features_before_fusion return pfe_module, model_info_dict def build_dense_head(self, model_info_dict): if self.model_cfg.get('DENSE_HEAD', None) is None: return None, model_info_dict dense_head_module = dense_heads.__all__[self.model_cfg.DENSE_HEAD.NAME]( model_cfg=self.model_cfg.DENSE_HEAD, input_channels=model_info_dict['num_bev_features_post'], num_class=self.num_class if not self.model_cfg.DENSE_HEAD.CLASS_AGNOSTIC else 1, class_names=self.class_names, grid_size=model_info_dict['grid_size'], point_cloud_range=model_info_dict['point_cloud_range'], predict_boxes_when_training=self.model_cfg.get('ROI_HEAD', False), voxel_size = model_info_dict.get('voxel_size', False) ) model_info_dict['module_list'].append(dense_head_module) return dense_head_module, model_info_dict def build_point_head(self, model_info_dict): if self.model_cfg.get('POINT_HEAD', None) is None: return None, model_info_dict if self.model_cfg.POINT_HEAD.get('USE_POINT_FEATURES_BEFORE_FUSION', False): num_point_features = model_info_dict['num_point_features_before_fusion'] else: num_point_features = model_info_dict['num_point_features'] point_head_module = dense_heads.__all__[self.model_cfg.POINT_HEAD.NAME]( model_cfg=self.model_cfg.POINT_HEAD, input_channels=num_point_features, num_class=self.num_class if not self.model_cfg.POINT_HEAD.CLASS_AGNOSTIC else 1, predict_boxes_when_training=self.model_cfg.get('ROI_HEAD', False) ) model_info_dict['module_list'].append(point_head_module) return point_head_module, model_info_dict def build_roi_head(self, model_info_dict): if self.model_cfg.get('ROI_HEAD', None) is None: return None, model_info_dict point_head_module = roi_heads.__all__[self.model_cfg.ROI_HEAD.NAME]( model_cfg=self.model_cfg.ROI_HEAD, input_channels=model_info_dict['num_point_features'], point_cloud_range=model_info_dict['point_cloud_range'], voxel_size=model_info_dict['voxel_size'], num_class=self.num_class if not self.model_cfg.ROI_HEAD.CLASS_AGNOSTIC else 1, ) model_info_dict['module_list'].append(point_head_module) return point_head_module, model_info_dict def forward(self, **kwargs): raise NotImplementedError def post_processing(self, batch_dict): """ Args: batch_dict: batch_size: batch_cls_preds: (B, num_boxes, num_classes | 1) or (N1+N2+..., num_classes | 1) or [(B, num_boxes, num_class1), (B, num_boxes, num_class2) ...] multihead_label_mapping: [(num_class1), (num_class2), ...] batch_box_preds: (B, num_boxes, 7+C) or (N1+N2+..., 7+C) cls_preds_normalized: indicate whether batch_cls_preds is normalized batch_index: optional (N1+N2+...) has_class_labels: True/False roi_labels: (B, num_rois) 1 .. num_classes batch_pred_labels: (B, num_boxes, 1) Returns: """ post_process_cfg = self.model_cfg.POST_PROCESSING batch_size = batch_dict['batch_size'] recall_dict = {} pred_dicts = [] for index in range(batch_size): if batch_dict.get('batch_index', None) is not None: assert batch_dict['batch_box_preds'].shape.__len__() == 2 batch_mask = (batch_dict['batch_index'] == index) else: assert batch_dict['batch_box_preds'].shape.__len__() == 3 batch_mask = index box_preds = batch_dict['batch_box_preds'][batch_mask] src_box_preds = box_preds if not isinstance(batch_dict['batch_cls_preds'], list): cls_preds = batch_dict['batch_cls_preds'][batch_mask] src_cls_preds = cls_preds assert cls_preds.shape[1] in [1, self.num_class] if not batch_dict['cls_preds_normalized']: cls_preds = torch.sigmoid(cls_preds) else: cls_preds = [x[batch_mask] for x in batch_dict['batch_cls_preds']] src_cls_preds = cls_preds if not batch_dict['cls_preds_normalized']: cls_preds = [torch.sigmoid(x) for x in cls_preds] if post_process_cfg.NMS_CONFIG.MULTI_CLASSES_NMS: if not isinstance(cls_preds, list): cls_preds = [cls_preds] multihead_label_mapping = [torch.arange(1, self.num_class, device=cls_preds[0].device)] else: multihead_label_mapping = batch_dict['multihead_label_mapping'] cur_start_idx = 0 pred_scores, pred_labels, pred_boxes = [], [], [] for cur_cls_preds, cur_label_mapping in zip(cls_preds, multihead_label_mapping): assert cur_cls_preds.shape[1] == len(cur_label_mapping) cur_box_preds = box_preds[cur_start_idx: cur_start_idx + cur_cls_preds.shape[0]] cur_pred_scores, cur_pred_labels, cur_pred_boxes = model_nms_utils.multi_classes_nms( cls_scores=cur_cls_preds, box_preds=cur_box_preds, nms_config=post_process_cfg.NMS_CONFIG, score_thresh=post_process_cfg.SCORE_THRESH ) cur_pred_labels = cur_label_mapping[cur_pred_labels] pred_scores.append(cur_pred_scores) pred_labels.append(cur_pred_labels) pred_boxes.append(cur_pred_boxes) cur_start_idx += cur_cls_preds.shape[0] final_scores = torch.cat(pred_scores, dim=0) final_labels = torch.cat(pred_labels, dim=0) final_boxes = torch.cat(pred_boxes, dim=0) else: cls_preds, label_preds = torch.max(cls_preds, dim=-1) if batch_dict.get('has_class_labels', False): label_key = 'roi_labels' if 'roi_labels' in batch_dict else 'batch_pred_labels' label_preds = batch_dict[label_key][index] else: label_preds = label_preds + 1 if post_process_cfg.get('WBF', True): if post_process_cfg.OUTPUT_RAW_SCORE: max_cls_preds, _ = torch.max(src_cls_preds, dim=-1) score_mask = cls_preds > post_process_cfg.SCORE_THRESH final_scores = cls_preds[score_mask] final_labels = label_preds[score_mask] final_boxes = box_preds[score_mask] else: selected, selected_scores = model_nms_utils.class_agnostic_nms( box_scores=cls_preds, box_preds=box_preds, nms_config=post_process_cfg.NMS_CONFIG, score_thresh=post_process_cfg.SCORE_THRESH ) if post_process_cfg.OUTPUT_RAW_SCORE: max_cls_preds, _ = torch.max(src_cls_preds, dim=-1) selected_scores = max_cls_preds[selected] final_scores = selected_scores final_labels = label_preds[selected] final_boxes = box_preds[selected] recall_dict = self.generate_recall_record( box_preds=final_boxes if 'rois' not in batch_dict else src_box_preds, recall_dict=recall_dict, batch_index=index, data_dict=batch_dict, thresh_list=post_process_cfg.RECALL_THRESH_LIST ) record_dict = { 'pred_boxes': final_boxes, 'pred_scores': final_scores, 'pred_labels': final_labels } if post_process_cfg.get('WBF', True): record_dict.update({'WBF': True}) pred_dicts.append(record_dict) return pred_dicts, recall_dict @staticmethod def generate_recall_record(box_preds, recall_dict, batch_index, data_dict=None, thresh_list=None): if 'gt_boxes' not in data_dict: return recall_dict rois = data_dict['rois'][batch_index] if 'rois' in data_dict else None gt_boxes = data_dict['gt_boxes'][batch_index] if recall_dict.__len__() == 0: recall_dict = {'gt': 0} for cur_thresh in thresh_list: recall_dict['roi_%s' % (str(cur_thresh))] = 0 recall_dict['rcnn_%s' % (str(cur_thresh))] = 0 cur_gt = gt_boxes k = cur_gt.__len__() - 1 while k > 0 and cur_gt[k].sum() == 0: k -= 1 cur_gt = cur_gt[:k + 1] if cur_gt.shape[0] > 0: if box_preds.shape[0] > 0: iou3d_rcnn = iou3d_nms_utils.boxes_iou3d_gpu(box_preds[:, 0:7], cur_gt[:, 0:7]) else: iou3d_rcnn = torch.zeros((0, cur_gt.shape[0])) if rois is not None: iou3d_roi = iou3d_nms_utils.boxes_iou3d_gpu(rois[:, 0:7], cur_gt[:, 0:7]) for cur_thresh in thresh_list: if iou3d_rcnn.shape[0] == 0: recall_dict['rcnn_%s' % str(cur_thresh)] += 0 else: rcnn_recalled = (iou3d_rcnn.max(dim=0)[0] > cur_thresh).sum().item() recall_dict['rcnn_%s' % str(cur_thresh)] += rcnn_recalled if rois is not None: roi_recalled = (iou3d_roi.max(dim=0)[0] > cur_thresh).sum().item() recall_dict['roi_%s' % str(cur_thresh)] += roi_recalled recall_dict['gt'] += cur_gt.shape[0] else: gt_iou = box_preds.new_zeros(box_preds.shape[0]) return recall_dict def _load_state_dict(self, model_state_disk, *, strict=True): state_dict = self.state_dict() # local cache of state_dict spconv_keys = find_all_spconv_keys(self) update_model_state = {} for key, val in model_state_disk.items(): if key in spconv_keys and key in state_dict and state_dict[key].shape != val.shape: # with different spconv versions, we need to adapt weight shapes for spconv blocks # adapt spconv weights from version 1.x to version 2.x if you used weights from spconv 1.x val_native = val.transpose(-1, -2) # (k1, k2, k3, c_in, c_out) to (k1, k2, k3, c_out, c_in) if val_native.shape == state_dict[key].shape: val = val_native.contiguous() else: assert val.shape.__len__() == 5, 'currently only spconv 3D is supported' val_implicit = val.permute(4, 0, 1, 2, 3) # (k1, k2, k3, c_in, c_out) to (c_out, k1, k2, k3, c_in) if val_implicit.shape == state_dict[key].shape: val = val_implicit.contiguous() if key in state_dict and state_dict[key].shape == val.shape: update_model_state[key] = val # logger.info('Update weight %s: %s' % (key, str(val.shape))) if strict: self.load_state_dict(update_model_state) else: state_dict.update(update_model_state) self.load_state_dict(state_dict) return state_dict, update_model_state def load_params_from_file(self, filename, logger, to_cpu=False): if not os.path.isfile(filename): raise FileNotFoundError logger.info('==> Loading parameters from checkpoint %s to %s' % (filename, 'CPU' if to_cpu else 'GPU')) loc_type = torch.device('cpu') if to_cpu else None checkpoint = torch.load(filename, map_location=loc_type) model_state_disk = checkpoint['model_state'] version = checkpoint.get("version", None) if version is not None: logger.info('==> Checkpoint trained from version: %s' % version) state_dict, update_model_state = self._load_state_dict(model_state_disk, strict=False) for key in state_dict: if key not in update_model_state: logger.info('Not updated weight %s: %s' % (key, str(state_dict[key].shape))) logger.info('==> Done (loaded %d/%d)' % (len(update_model_state), len(state_dict))) def load_params_with_optimizer(self, filename, to_cpu=False, optimizer=None, logger=None): if not os.path.isfile(filename): raise FileNotFoundError logger.info('==> Loading parameters from checkpoint %s to %s' % (filename, 'CPU' if to_cpu else 'GPU')) loc_type = torch.device('cpu') if to_cpu else None checkpoint = torch.load(filename, map_location=loc_type) epoch = checkpoint.get('epoch', -1) it = checkpoint.get('it', 0.0) self._load_state_dict(checkpoint['model_state'], strict=True) if optimizer is not None: if 'optimizer_state' in checkpoint and checkpoint['optimizer_state'] is not None: logger.info('==> Loading optimizer parameters from checkpoint %s to %s' % (filename, 'CPU' if to_cpu else 'GPU')) optimizer.load_state_dict(checkpoint['optimizer_state']) else: assert filename[-4] == '.', filename src_file, ext = filename[:-4], filename[-3:] optimizer_filename = '%s_optim.%s' % (src_file, ext) if os.path.exists(optimizer_filename): optimizer_ckpt = torch.load(optimizer_filename, map_location=loc_type) optimizer.load_state_dict(optimizer_ckpt['optimizer_state']) if 'version' in checkpoint: print('==> Checkpoint trained from version: %s' % checkpoint['version']) logger.info('==> Done') return it, epoch ================================================ FILE: pcdet/models/detectors/voxel_rcnn.py ================================================ from .detector3d_template import Detector3DTemplate import time class VoxelRCNN(Detector3DTemplate): def __init__(self, model_cfg, num_class, dataset): super().__init__(model_cfg=model_cfg, num_class=num_class, dataset=dataset) self.module_list = self.build_networks() def forward(self, batch_dict): for cur_module in self.module_list: batch_dict = cur_module(batch_dict) if self.training: loss, tb_dict, disp_dict = self.get_training_loss() ret_dict = { 'loss': loss } return ret_dict, tb_dict, disp_dict else: pred_dicts, recall_dicts, = self.post_processing(batch_dict) return pred_dicts, recall_dicts, batch_dict def get_training_loss(self): disp_dict = {} loss_rpn, tb_dict = self.dense_head.get_loss() loss_rcnn, tb_dict = self.roi_head.get_loss(tb_dict)# loss = loss_rpn + loss_rcnn return loss, tb_dict, disp_dict ================================================ FILE: pcdet/models/model_utils/centernet_utils.py ================================================ # This file is modified from https://github.com/tianweiy/CenterPoint import torch import torch.nn.functional as F import numpy as np import numba def gaussian_radius(height, width, min_overlap=0.5): """ Args: height: (N) width: (N) min_overlap: Returns: """ a1 = 1 b1 = (height + width) c1 = width * height * (1 - min_overlap) / (1 + min_overlap) sq1 = (b1 ** 2 - 4 * a1 * c1).sqrt() r1 = (b1 + sq1) / 2 a2 = 4 b2 = 2 * (height + width) c2 = (1 - min_overlap) * width * height sq2 = (b2 ** 2 - 4 * a2 * c2).sqrt() r2 = (b2 + sq2) / 2 a3 = 4 * min_overlap b3 = -2 * min_overlap * (height + width) c3 = (min_overlap - 1) * width * height sq3 = (b3 ** 2 - 4 * a3 * c3).sqrt() r3 = (b3 + sq3) / 2 ret = torch.min(torch.min(r1, r2), r3) return ret def gaussian2D(shape, sigma=1): m, n = [(ss - 1.) / 2. for ss in shape] y, x = np.ogrid[-m:m + 1, -n:n + 1] h = np.exp(-(x * x + y * y) / (2 * sigma * sigma)) h[h < np.finfo(h.dtype).eps * h.max()] = 0 return h def draw_gaussian_to_heatmap(heatmap, center, radius, k=1, valid_mask=None): diameter = 2 * radius + 1 gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6) x, y = int(center[0]), int(center[1]) height, width = heatmap.shape[0:2] left, right = min(x, radius), min(width - x, radius + 1) top, bottom = min(y, radius), min(height - y, radius + 1) masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right] masked_gaussian = torch.from_numpy( gaussian[radius - top:radius + bottom, radius - left:radius + right] ).to(heatmap.device).float() if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug if valid_mask is not None: cur_valid_mask = valid_mask[y - top:y + bottom, x - left:x + right] masked_gaussian = masked_gaussian * cur_valid_mask.float() torch.max(masked_heatmap, masked_gaussian * k, out=masked_heatmap) return heatmap def _nms(heat, kernel=3): pad = (kernel - 1) // 2 hmax = F.max_pool2d(heat, (kernel, kernel), stride=1, padding=pad) keep = (hmax == heat).float() return heat * keep @numba.jit(nopython=True) def circle_nms(dets, thresh): x1 = dets[:, 0] y1 = dets[:, 1] scores = dets[:, 2] order = scores.argsort()[::-1].astype(np.int32) # highest->lowest ndets = dets.shape[0] suppressed = np.zeros((ndets), dtype=np.int32) keep = [] for _i in range(ndets): i = order[_i] # start with highest score box if suppressed[i] == 1: # if any box have enough iou with this, remove it continue keep.append(i) for _j in range(_i + 1, ndets): j = order[_j] if suppressed[j] == 1: continue # calculate center distance between i and j box dist = (x1[i] - x1[j]) ** 2 + (y1[i] - y1[j]) ** 2 # ovr = inter / areas[j] if dist <= thresh: suppressed[j] = 1 return keep def _circle_nms(boxes, min_radius, post_max_size=83): """ NMS according to center distance """ keep = np.array(circle_nms(boxes.cpu().numpy(), thresh=min_radius))[:post_max_size] keep = torch.from_numpy(keep).long().to(boxes.device) return keep def _gather_feat(feat, ind, mask=None): dim = feat.size(2) ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) feat = feat.gather(1, ind) if mask is not None: mask = mask.unsqueeze(2).expand_as(feat) feat = feat[mask] feat = feat.view(-1, dim) return feat def _transpose_and_gather_feat(feat, ind): feat = feat.permute(0, 2, 3, 1).contiguous() feat = feat.view(feat.size(0), -1, feat.size(3)) feat = _gather_feat(feat, ind) return feat def _topk(scores, K=40): batch, num_class, height, width = scores.size() topk_scores, topk_inds = torch.topk(scores.flatten(2, 3), K) topk_inds = topk_inds % (height * width) topk_ys = (topk_inds // width).float() topk_xs = (topk_inds % width).int().float() topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K) topk_classes = (topk_ind // K).int() topk_inds = _gather_feat(topk_inds.view(batch, -1, 1), topk_ind).view(batch, K) topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K) topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K) return topk_score, topk_inds, topk_classes, topk_ys, topk_xs def decode_bbox_from_heatmap(heatmap, rot_cos, rot_sin, center, center_z, dim, point_cloud_range=None, voxel_size=None, feature_map_stride=None, vel=None, K=100, circle_nms=False, score_thresh=None, post_center_limit_range=None): batch_size, num_class, _, _ = heatmap.size() if circle_nms: # TODO: not checked yet assert False, 'not checked yet' heatmap = _nms(heatmap) scores, inds, class_ids, ys, xs = _topk(heatmap, K=K) center = _transpose_and_gather_feat(center, inds).view(batch_size, K, 2) rot_sin = _transpose_and_gather_feat(rot_sin, inds).view(batch_size, K, 1) rot_cos = _transpose_and_gather_feat(rot_cos, inds).view(batch_size, K, 1) center_z = _transpose_and_gather_feat(center_z, inds).view(batch_size, K, 1) dim = _transpose_and_gather_feat(dim, inds).view(batch_size, K, 3) angle = torch.atan2(rot_sin, rot_cos) xs = xs.view(batch_size, K, 1) + center[:, :, 0:1] ys = ys.view(batch_size, K, 1) + center[:, :, 1:2] xs = xs * feature_map_stride * voxel_size[0] + point_cloud_range[0] ys = ys * feature_map_stride * voxel_size[1] + point_cloud_range[1] box_part_list = [xs, ys, center_z, dim, angle] if vel is not None: vel = _transpose_and_gather_feat(vel, inds).view(batch_size, K, 2) box_part_list.append(vel) final_box_preds = torch.cat((box_part_list), dim=-1) final_scores = scores.view(batch_size, K) final_class_ids = class_ids.view(batch_size, K) assert post_center_limit_range is not None mask = (final_box_preds[..., :3] >= post_center_limit_range[:3]).all(2) mask &= (final_box_preds[..., :3] <= post_center_limit_range[3:]).all(2) if score_thresh is not None: mask &= (final_scores > score_thresh) ret_pred_dicts = [] for k in range(batch_size): cur_mask = mask[k] cur_boxes = final_box_preds[k, cur_mask] cur_scores = final_scores[k, cur_mask] cur_labels = final_class_ids[k, cur_mask] if circle_nms: assert False, 'not checked yet' centers = cur_boxes[:, [0, 1]] boxes = torch.cat((centers, scores.view(-1, 1)), dim=1) keep = _circle_nms(boxes, min_radius=min_radius, post_max_size=nms_post_max_size) cur_boxes = cur_boxes[keep] cur_scores = cur_scores[keep] cur_labels = cur_labels[keep] ret_pred_dicts.append({ 'pred_boxes': cur_boxes, 'pred_scores': cur_scores, 'pred_labels': cur_labels }) return ret_pred_dicts ================================================ FILE: pcdet/models/model_utils/ctrans.py ================================================ import torch.nn as nn import pdb import torch import numpy as np from numpy import * import torch.nn.functional as F from typing import Optional, List from torch import Tensor import copy from copy import deepcopy from ...utils import common_utils, spconv_utils class PositionalEmbedding(nn.Module): def __init__(self, demb=256): super(PositionalEmbedding, self).__init__() self.demb = demb inv_freq = 1 / (10000 ** (torch.arange(0.0, demb, 2.0) / demb)) self.register_buffer('inv_freq', inv_freq) # pos_seq = pos_seq = torch.arange(seq_len-1, -1, -1.0) def forward(self, pos_seq, batch_size=2): sinusoid_inp = torch.ger(pos_seq, self.inv_freq) pos_emb = torch.cat([sinusoid_inp.sin(), sinusoid_inp.cos()], dim=-1) if batch_size is not None: return pos_emb[:, None, :].expand(-1, batch_size, -1) else: return pos_emb[:, None, :] class CrossAttention(nn.Module): def __init__(self, hidden_dim, pos = True, head = 4): super(CrossAttention, self).__init__() self.hidden_dim = hidden_dim self.pos_dim = 8 self.pos = pos if self.pos: self.pos_en = PositionalEmbedding(self.pos_dim) self.Q_linear = nn.Linear(hidden_dim+self.pos_dim, hidden_dim, bias=False) self.K_linear = nn.Linear(hidden_dim+self.pos_dim, hidden_dim, bias=False) self.V_linear = nn.Linear(hidden_dim+self.pos_dim, hidden_dim, bias=False) else: self.Q_linear = nn.Linear(hidden_dim, hidden_dim, bias=False) self.K_linear = nn.Linear(hidden_dim, hidden_dim, bias=False) self.V_linear = nn.Linear(hidden_dim, hidden_dim, bias=False) self.att = nn.MultiheadAttention(hidden_dim, head) def forward(self, inputs, Q_in): # N,B,C batch_size = inputs.shape[1] seq_len = inputs.shape[0] if self.pos: pos_input = torch.from_numpy(np.arange(seq_len)+1).cuda() pos_input = self.pos_en(pos_input, batch_size) inputs_pos = torch.cat([inputs, pos_input], -1) pos_Q = torch.from_numpy(np.array([seq_len])).cuda() pos_Q = self.pos_en(pos_Q, batch_size) Q_in_pos = torch.cat([Q_in, pos_Q], -1) else: inputs_pos = inputs Q_in_pos = Q_in Q = self.Q_linear(Q_in_pos) K = self.K_linear(inputs_pos) V = self.V_linear(inputs_pos) out = self.att(Q, K, V) return out[0] class Attention_Layer(nn.Module): def __init__(self, hidden_dim): super(Attention_Layer, self).__init__() self.hidden_dim = hidden_dim self.Q_linear = nn.Linear(hidden_dim, hidden_dim, bias=False) self.K_linear = nn.Linear(hidden_dim, hidden_dim, bias=False) self.V_linear = nn.Linear(hidden_dim, hidden_dim, bias=False) def forward(self, inputs): # B,K,N Q = self.Q_linear(inputs) K = self.K_linear(inputs).permute(0, 2, 1) V = self.V_linear(inputs) alpha = torch.matmul(Q, K) alpha = F.softmax(alpha, dim=2) out = torch.matmul(alpha, V) out = torch.mean(out, -2) return out def gen_sample_grid(rois, grid_size=7, grid_offsets=(0, 0), spatial_scale=1.): faked_features = rois.new_ones((grid_size, grid_size)) N = rois.shape[0] dense_idx = faked_features.nonzero() # (N, 2) [x_idx, y_idx] dense_idx = dense_idx.repeat(N, 1, 1).float() # (B, 7 * 7, 2) local_roi_size = rois.view(N, -1)[:, 3:5] local_roi_grid_points = (dense_idx ) / (grid_size-1) * local_roi_size.unsqueeze(dim=1) \ - (local_roi_size.unsqueeze(dim=1) / 2) # (B, 7 * 7, 2) ones = torch.ones_like(local_roi_grid_points[..., 0:1]) local_roi_grid_points = torch.cat([local_roi_grid_points, ones], -1) global_roi_grid_points = common_utils.rotate_points_along_z( local_roi_grid_points.clone(), rois[:, 6] ).squeeze(dim=1) global_center = rois[:, 0:3].clone() global_roi_grid_points += global_center.unsqueeze(dim=1) x = global_roi_grid_points[..., 0:1] y = global_roi_grid_points[..., 1:2] x = (x.permute(1, 2, 0).contiguous() + grid_offsets[0]) * spatial_scale y = (y.permute(1, 2, 0).contiguous() + grid_offsets[1]) * spatial_scale return x.view(grid_size**2, -1), y.view(grid_size**2, -1) def bilinear_interpolate_torch_gridsample(image, samples_x, samples_y): C, H, W = image.shape image = image.unsqueeze(1) # change to: C x 1 x H x W C,K,1,2 C,K,1,1 samples_x = samples_x.unsqueeze(2) samples_x = samples_x.unsqueeze(3)# 49,K,1,1 samples_y = samples_y.unsqueeze(2) samples_y = samples_y.unsqueeze(3) samples = torch.cat([samples_x, samples_y], 3) samples[:, :, :, 0] = (samples[:, :, :, 0] / W) # normalize to between 0 and 1 samples[:, :, :, 1] = (samples[:, :, :, 1] / H) # normalize to between 0 and 1 samples = samples * 2 - 1 # normalize to between -1 and 1 # 49,K,1,2 #B,C,H,W #B,H,W,2 #B,C,H,W return torch.nn.functional.grid_sample(image, samples, align_corners=False) class MLP(nn.Module): """ Very simple multi-layer perceptron (also called FFN)""" def __init__(self, input_dim, hidden_dim, output_dim, num_layers): super().__init__() self.num_layers = num_layers h = [hidden_dim] * (num_layers - 1) self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim])) self.init_weights() def forward(self, x): for i, layer in enumerate(self.layers): x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x) return x def init_weights(self): init_func = nn.init.xavier_normal_ for module_list in [self.layers]: for m in module_list.modules(): if isinstance(m, nn.Linear): init_func(m.weight) if m.bias is not None: nn.init.constant_(m.bias, 0) def MLP_v2(channels: list, do_bn=True): """ Multi-layer perceptron """ n = len(channels) layers = [] for i in range(1, n): layers.append( nn.Conv1d(channels[i - 1], channels[i], kernel_size=1, bias=True)) if i < (n-1): if do_bn: layers.append(nn.BatchNorm1d(channels[i])) layers.append(nn.ReLU()) return nn.Sequential(*layers) class Transformer(nn.Module): def __init__(self, d_model=512, nhead=8, num_encoder_layers=6, num_decoder_layers=6, dim_feedforward=2048, dropout=0.1, activation="relu", normalize_before=False, return_intermediate_dec=False): super().__init__() encoder_layer = TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout, activation, normalize_before) encoder_norm = nn.LayerNorm(d_model) if normalize_before else None self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm) decoder_layer = TransformerDecoderLayer(d_model, nhead, dim_feedforward, dropout, activation, normalize_before) decoder_norm = nn.LayerNorm(d_model) self.decoder = TransformerDecoder(decoder_layer, num_decoder_layers, decoder_norm, return_intermediate=return_intermediate_dec) self._reset_parameters() self.d_model = d_model self.nhead = nhead def _reset_parameters(self): for p in self.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) def forward(self, src, query_embed, pos_embed): bs, n, c = src.shape src = src.permute(1, 0, 2) pos_embed = pos_embed.permute(1, 0, 2) query_embed = query_embed.unsqueeze(1).repeat(1, bs, 1) tgt = torch.zeros_like(query_embed) memory = self.encoder(src, src_key_padding_mask=None, pos=pos_embed) hs = self.decoder(tgt, memory, memory_key_padding_mask=None, pos=pos_embed, query_pos=query_embed) return hs.transpose(1, 2), memory.permute(1, 2, 0).view(bs, c, n) class TransformerEncoder(nn.Module): def __init__(self, encoder_layer, num_layers, norm=None): super().__init__() self.layers = _get_clones(encoder_layer, num_layers) self.num_layers = num_layers self.norm = norm def forward(self, src, mask: Optional[Tensor] = None, src_key_padding_mask: Optional[Tensor] = None, pos: Optional[Tensor] = None): output = src for layer in self.layers: output = layer(output, src_mask=mask, src_key_padding_mask=src_key_padding_mask, pos=pos) if self.norm is not None: output = self.norm(output) return output class TransformerDecoder(nn.Module): def __init__(self, decoder_layer, num_layers, norm=None, return_intermediate=False): super().__init__() self.layers = _get_clones(decoder_layer, num_layers) self.num_layers = num_layers self.norm = norm self.return_intermediate = return_intermediate def forward(self, tgt, memory, tgt_mask: Optional[Tensor] = None, memory_mask: Optional[Tensor] = None, tgt_key_padding_mask: Optional[Tensor] = None, memory_key_padding_mask: Optional[Tensor] = None, pos: Optional[Tensor] = None, query_pos: Optional[Tensor] = None): output = tgt intermediate = [] for layer in self.layers: output = layer(output, memory, tgt_mask=tgt_mask, memory_mask=memory_mask, tgt_key_padding_mask=tgt_key_padding_mask, memory_key_padding_mask=memory_key_padding_mask, pos=pos, query_pos=query_pos) if self.return_intermediate: intermediate.append(self.norm(output)) if self.norm is not None: output = self.norm(output) if self.return_intermediate: intermediate.pop() intermediate.append(output) if self.return_intermediate: return torch.stack(intermediate) return output.unsqueeze(0) class TransformerEncoderLayer(nn.Module): def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation="relu", normalize_before=False): super().__init__() self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout) # Implementation of Feedforward model self.linear1 = nn.Linear(d_model, dim_feedforward) self.dropout = nn.Dropout(dropout) self.linear2 = nn.Linear(dim_feedforward, d_model) self.norm1 = nn.LayerNorm(d_model) self.norm2 = nn.LayerNorm(d_model) self.dropout1 = nn.Dropout(dropout) self.dropout2 = nn.Dropout(dropout) self.activation = _get_activation_fn(activation) self.normalize_before = normalize_before def with_pos_embed(self, tensor, pos: Optional[Tensor]): return tensor if pos is None else tensor + pos def forward_post(self, src, src_mask: Optional[Tensor] = None, src_key_padding_mask: Optional[Tensor] = None, pos: Optional[Tensor] = None): q = k = self.with_pos_embed(src, pos) src2 = self.self_attn(q, k, value=src, attn_mask=src_mask, key_padding_mask=src_key_padding_mask)[0] src = src + self.dropout1(src2) src = self.norm1(src) src2 = self.linear2(self.dropout(self.activation(self.linear1(src)))) src = src + self.dropout2(src2) src = self.norm2(src) return src def forward_pre(self, src, src_mask: Optional[Tensor] = None, src_key_padding_mask: Optional[Tensor] = None, pos: Optional[Tensor] = None): src2 = self.norm1(src) q = k = self.with_pos_embed(src2, pos) src2 = self.self_attn(q, k, value=src2, attn_mask=src_mask, key_padding_mask=src_key_padding_mask)[0] src = src + self.dropout1(src2) src2 = self.norm2(src) src2 = self.linear2(self.dropout(self.activation(self.linear1(src2)))) src = src + self.dropout2(src2) return src def forward(self, src, src_mask: Optional[Tensor] = None, src_key_padding_mask: Optional[Tensor] = None, pos: Optional[Tensor] = None): if self.normalize_before: return self.forward_pre(src, src_mask, src_key_padding_mask, pos) return self.forward_post(src, src_mask, src_key_padding_mask, pos) def attention(query, key, value): dim = query.shape[1] scores_1 = torch.einsum('bdhn,bdhm->bhnm', query, key) / dim**.5 scores_2 = torch.einsum('abcd, aced->abcd', key, scores_1) prob = torch.nn.functional.softmax(scores_2, dim=-1) output = torch.einsum('bnhm,bdhm->bdhn', prob, value) return output, prob class MultiHeadedAttention(nn.Module): """ Multi-head attention to increase model expressivitiy """ def __init__(self, num_heads: int, d_model: int): super().__init__() assert d_model % num_heads == 0 self.dim = d_model // num_heads self.num_heads = num_heads merge = nn.Conv1d(d_model, d_model, kernel_size=1) self.proj = nn.ModuleList([deepcopy(merge) for _ in range(3)]) self.down_mlp = MLP(input_dim = self.dim, hidden_dim = 32, output_dim = 1, num_layers = 1) def forward(self, query, key, value): batch_dim = query.size(0) # pdb.set_trace() query, key, value = [l(x).view(batch_dim, self.dim, self.num_heads, -1) for l, x in zip(self.proj, (query, key, value))] x, prob = attention(query, key, value) x = self.down_mlp(x) return x.contiguous().view(batch_dim, self.dim*self.num_heads, -1) class TransformerDecoderLayer(nn.Module): def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation="relu", normalize_before=False): super().__init__() self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout) self.multihead_attn = MultiHeadedAttention(nhead, d_model) # Implementation of Feedforward model self.linear1 = nn.Linear(d_model, dim_feedforward) self.dropout = nn.Dropout(dropout) self.linear2 = nn.Linear(dim_feedforward, d_model) self.norm1 = nn.LayerNorm(d_model) self.norm2 = nn.LayerNorm(d_model) self.norm3 = nn.LayerNorm(d_model) self.dropout1 = nn.Dropout(dropout) self.dropout2 = nn.Dropout(dropout) self.dropout3 = nn.Dropout(dropout) self.activation = _get_activation_fn(activation) self.normalize_before = normalize_before def with_pos_embed(self, tensor, pos: Optional[Tensor]): return tensor if pos is None else tensor + pos def forward_post(self, tgt, memory, tgt_mask: Optional[Tensor] = None, memory_mask: Optional[Tensor] = None, tgt_key_padding_mask: Optional[Tensor] = None, memory_key_padding_mask: Optional[Tensor] = None, pos: Optional[Tensor] = None, query_pos: Optional[Tensor] = None): q = k = self.with_pos_embed(tgt, query_pos) tgt2 = self.self_attn(q, k, value=tgt, attn_mask=tgt_mask, key_padding_mask=tgt_key_padding_mask)[0] tgt = tgt + self.dropout1(tgt2) tgt = self.norm1(tgt) tgt2 = self.multihead_attn(query=self.with_pos_embed(tgt, query_pos).permute(1,2,0), key=self.with_pos_embed(memory, pos).permute(1,2,0), value=memory.permute(1,2,0)) tgt2 = tgt2.permute(2,0,1) tgt = tgt + self.dropout2(tgt2) tgt = self.norm2(tgt) tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt)))) tgt = tgt + self.dropout3(tgt2) tgt = self.norm3(tgt) return tgt def forward_pre(self, tgt, memory, tgt_mask: Optional[Tensor] = None, memory_mask: Optional[Tensor] = None, tgt_key_padding_mask: Optional[Tensor] = None, memory_key_padding_mask: Optional[Tensor] = None, pos: Optional[Tensor] = None, query_pos: Optional[Tensor] = None): tgt2 = self.norm1(tgt) q = k = self.with_pos_embed(tgt2, query_pos) tgt2 = self.self_attn(q, k, value=tgt2, attn_mask=tgt_mask, key_padding_mask=tgt_key_padding_mask)[0] tgt = tgt + self.dropout1(tgt2) tgt2 = self.norm2(tgt) tgt2 = self.multihead_attn(query=self.with_pos_embed(tgt2, query_pos), key=self.with_pos_embed(memory, pos), value=memory, attn_mask=memory_mask, key_padding_mask=memory_key_padding_mask)[0] tgt = tgt + self.dropout2(tgt2) tgt2 = self.norm3(tgt) tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt2)))) tgt = tgt + self.dropout3(tgt2) return tgt def forward(self, tgt, memory, tgt_mask: Optional[Tensor] = None, memory_mask: Optional[Tensor] = None, tgt_key_padding_mask: Optional[Tensor] = None, memory_key_padding_mask: Optional[Tensor] = None, pos: Optional[Tensor] = None, query_pos: Optional[Tensor] = None): if self.normalize_before: return self.forward_pre(tgt, memory, tgt_mask, memory_mask, tgt_key_padding_mask, memory_key_padding_mask, pos, query_pos) return self.forward_post(tgt, memory, tgt_mask, memory_mask, tgt_key_padding_mask, memory_key_padding_mask, pos, query_pos) def _get_clones(module, N): return nn.ModuleList([copy.deepcopy(module) for i in range(N)]) def build_transformer(args): return Transformer( d_model=args.hidden_dim, dropout=args.dropout, nhead=args.nheads, dim_feedforward=args.dim_feedforward, num_encoder_layers=args.enc_layers, num_decoder_layers=args.dec_layers, normalize_before=args.pre_norm, return_intermediate_dec=True, ) def _get_activation_fn(activation): """Return an activation function given a string""" if activation == "relu": return F.relu if activation == "gelu": return F.gelu if activation == "glu": return F.glu raise RuntimeError(F"activation should be relu/gelu, not {activation}.") ================================================ FILE: pcdet/models/model_utils/model_nms_utils.py ================================================ import torch import numpy as np from ...ops.iou3d_nms import iou3d_nms_utils def limit(ang): ang = ang % (2 * np.pi) ang[ang > np.pi] = ang[ang > np.pi] - 2 * np.pi ang[ang < -np.pi] = ang[ang < -np.pi] + 2 * np.pi return ang def compute_WBF(det_names, det_scores, det_boxes, iou_thresh=0.85, iou_thresh2=0.03, type='mean'): if len(det_names) == 0: return det_names, det_scores, det_boxes cluster_id = -1 cluster_box_dict = {} cluster_score_dict = {} cluster_merged_dict = {} cluster_name_dict = {} ''' det_boxes[:, 6] = common_utils.limit_period( det_boxes[:, 6], offset=0.5, period=2 * np.pi ) ''' det_boxes[:, 6] = limit(det_boxes[:, 6]) for i, box in enumerate(det_boxes): score = det_scores[i] name = det_names[i] if i == 0: cluster_id += 1 cluster_box_dict[cluster_id] = [box] cluster_score_dict[cluster_id] = [score] cluster_merged_dict[cluster_id] = box cluster_name_dict[cluster_id] = name continue valid_clusters = [] keys = list(cluster_merged_dict) keys.sort() for key in keys: valid_clusters.append(cluster_merged_dict[key]) valid_clusters = np.array(valid_clusters).reshape((-1, 7)) ious = iou3d_nms_utils.boxes_bev_iou_cpu(np.array([box[:7]]), valid_clusters) argmax = np.argmax(ious, -1)[0] max_iou = np.max(ious, -1)[0] if max_iou >= iou_thresh: cluster_box_dict[argmax].append(box) cluster_score_dict[argmax].append(score) elif iou_thresh2<=max_iou= score_thresh) box_scores = box_scores[scores_mask] box_preds = box_preds[scores_mask] selected = [] if box_scores.shape[0] > 0: box_scores_nms, indices = torch.topk(box_scores, k=min(nms_config.NMS_PRE_MAXSIZE, box_scores.shape[0])) boxes_for_nms = box_preds[indices] keep_idx, selected_scores = getattr(iou3d_nms_utils, nms_config.NMS_TYPE)( boxes_for_nms[:, 0:7], box_scores_nms, nms_config.NMS_THRESH, **nms_config ) selected = indices[keep_idx[:nms_config.NMS_POST_MAXSIZE]] if score_thresh is not None: original_idxs = scores_mask.nonzero().view(-1) selected = original_idxs[selected] return selected, src_box_scores[selected] def multi_classes_nms(cls_scores, box_preds, nms_config, score_thresh=None): """ Args: cls_scores: (N, num_class) box_preds: (N, 7 + C) nms_config: score_thresh: Returns: """ pred_scores, pred_labels, pred_boxes = [], [], [] for k in range(cls_scores.shape[1]): if score_thresh is not None: scores_mask = (cls_scores[:, k] >= score_thresh) box_scores = cls_scores[scores_mask, k] cur_box_preds = box_preds[scores_mask] else: box_scores = cls_scores[:, k] selected = [] if box_scores.shape[0] > 0: box_scores_nms, indices = torch.topk(box_scores, k=min(nms_config.NMS_PRE_MAXSIZE, box_scores.shape[0])) boxes_for_nms = cur_box_preds[indices] keep_idx, selected_scores = getattr(iou3d_nms_utils, nms_config.NMS_TYPE)( boxes_for_nms[:, 0:7], box_scores_nms, nms_config.NMS_THRESH, **nms_config ) selected = indices[keep_idx[:nms_config.NMS_POST_MAXSIZE]] pred_scores.append(box_scores[selected]) pred_labels.append(box_scores.new_ones(len(selected)).long() * k) pred_boxes.append(cur_box_preds[selected]) pred_scores = torch.cat(pred_scores, dim=0) pred_labels = torch.cat(pred_labels, dim=0) pred_boxes = torch.cat(pred_boxes, dim=0) return pred_scores, pred_labels, pred_boxes ================================================ FILE: pcdet/models/roi_heads/__init__.py ================================================ from .roi_head_template import RoIHeadTemplate from .ted_head import TEDMHead, TEDSHead __all__ = { 'RoIHeadTemplate': RoIHeadTemplate, 'TEDSHead': TEDSHead, 'TEDMHead': TEDMHead } ================================================ FILE: pcdet/models/roi_heads/roi_head_template.py ================================================ import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from ...utils import box_coder_utils, common_utils, loss_utils, box_utils from ..model_utils.model_nms_utils import class_agnostic_nms from .target_assigner.proposal_target_layer import ProposalTargetLayer from ...utils.bbloss import bb_loss import time import copy class RoIHeadTemplate(nn.Module): def __init__(self, num_class, model_cfg): super().__init__() self.model_cfg = model_cfg self.num_class = num_class self.box_coder = getattr(box_coder_utils, self.model_cfg.TARGET_CONFIG.BOX_CODER)( **self.model_cfg.TARGET_CONFIG.get('BOX_CODER_CONFIG', {}) ) self.proposal_target_layers = [] for i in range(6): this_cfg = copy.deepcopy(self.model_cfg.TARGET_CONFIG) proposal_target_layer = ProposalTargetLayer(roi_sampler_cfg=this_cfg) self.proposal_target_layers.append(proposal_target_layer) self.build_losses(self.model_cfg.LOSS_CONFIG) self.forward_ret_dict = {} def build_losses(self, losses_cfg): self.add_module( 'reg_loss_func', loss_utils.WeightedSmoothL1Loss(code_weights=losses_cfg.LOSS_WEIGHTS['code_weights']) ) def make_fc_layers(self, input_channels, output_channels, fc_list): fc_layers = [] pre_channel = input_channels for k in range(0, fc_list.__len__()): fc_layers.extend([ nn.Conv1d(pre_channel, fc_list[k], kernel_size=1, bias=False), nn.BatchNorm1d(fc_list[k]), nn.ReLU() ]) pre_channel = fc_list[k] if self.model_cfg.DP_RATIO >= 0 and k == 0: fc_layers.append(nn.Dropout(self.model_cfg.DP_RATIO)) fc_layers.append(nn.Conv1d(pre_channel, output_channels, kernel_size=1, bias=True)) fc_layers = nn.Sequential(*fc_layers) return fc_layers @torch.no_grad() def proposal_layer(self, batch_dict, nms_config): """ Args: batch_dict: batch_size: batch_cls_preds: (B, num_boxes, num_classes | 1) or (N1+N2+..., num_classes | 1) batch_box_preds: (B, num_boxes, 7+C) or (N1+N2+..., 7+C) cls_preds_normalized: indicate whether batch_cls_preds is normalized batch_index: optional (N1+N2+...) nms_config: Returns: batch_dict: rois: (B, num_rois, 7+C) roi_scores: (B, num_rois) roi_labels: (B, num_rois) """ if batch_dict.get('rois', None) is not None: batch_dict['cls_preds_normalized'] = False return batch_dict batch_size = batch_dict['batch_size'] batch_box_preds = batch_dict['batch_box_preds'] batch_cls_preds = batch_dict['batch_cls_preds'] rois = batch_box_preds.new_zeros((batch_size, nms_config.NMS_POST_MAXSIZE, batch_box_preds.shape[-1])) roi_scores = batch_box_preds.new_zeros((batch_size, nms_config.NMS_POST_MAXSIZE)) roi_labels = batch_box_preds.new_zeros((batch_size, nms_config.NMS_POST_MAXSIZE), dtype=torch.long) for index in range(batch_size): if batch_dict.get('batch_index', None) is not None: assert batch_cls_preds.shape.__len__() == 2 batch_mask = (batch_dict['batch_index'] == index) else: assert batch_dict['batch_cls_preds'].shape.__len__() == 3 batch_mask = index box_preds = batch_box_preds[batch_mask] cls_preds = batch_cls_preds[batch_mask] cur_roi_scores, cur_roi_labels = torch.max(cls_preds, dim=1) if nms_config.MULTI_CLASSES_NMS: raise NotImplementedError else: selected, selected_scores = class_agnostic_nms( box_scores=cur_roi_scores, box_preds=box_preds, nms_config=nms_config ) rois[index, :len(selected), :] = box_preds[selected] roi_scores[index, :len(selected)] = cur_roi_scores[selected] roi_labels[index, :len(selected)] = cur_roi_labels[selected] batch_dict['rois'] = rois batch_dict['roi_scores'] = roi_scores batch_dict['roi_labels'] = roi_labels + 1 batch_dict['has_class_labels'] = True if batch_cls_preds.shape[-1] > 1 else False batch_dict.pop('batch_index', None) return batch_dict def assign_targets(self, batch_dict, rot_num_id, enable_dif = False): batch_size = batch_dict['batch_size'] with torch.no_grad(): if rot_num_id == 0: s_str = '' else: s_str = str(rot_num_id) if enable_dif: targets_dict = self.proposal_target_layers[rot_num_id].forward(batch_dict, s_str) else: targets_dict = self.proposal_target_layers[rot_num_id].forward(batch_dict, '') rois = targets_dict['rois'] # (B, N, 7 + C) gt_of_rois = targets_dict['gt_of_rois'] # (B, N, 7 + C + 1) targets_dict['gt_of_rois_src'] = gt_of_rois.clone().detach() # canonical transformation roi_center = rois[:, :, 0:3] roi_ry = rois[:, :, 6] % (2 * np.pi) gt_of_rois[:, :, 0:3] = gt_of_rois[:, :, 0:3] - roi_center gt_of_rois[:, :, 6] = gt_of_rois[:, :, 6] - roi_ry # transfer LiDAR coords to local coords gt_of_rois = common_utils.rotate_points_along_z( points=gt_of_rois.view(-1, 1, gt_of_rois.shape[-1]), angle=-roi_ry.view(-1) ).view(batch_size, -1, gt_of_rois.shape[-1]) # flip orientation if rois have opposite orientation heading_label = gt_of_rois[:, :, 6] % (2 * np.pi) # 0 ~ 2pi opposite_flag = (heading_label > np.pi * 0.5) & (heading_label < np.pi * 1.5) heading_label[opposite_flag] = (heading_label[opposite_flag] + np.pi) % (2 * np.pi) # (0 ~ pi/2, 3pi/2 ~ 2pi) flag = heading_label > np.pi heading_label[flag] = heading_label[flag] - np.pi * 2 # (-pi/2, pi/2) heading_label = torch.clamp(heading_label, min=-np.pi / 2, max=np.pi / 2) gt_of_rois[:, :, 6] = heading_label targets_dict['gt_of_rois'] = gt_of_rois return targets_dict def get_box_reg_layer_loss(self, forward_ret_dict): loss_cfgs = self.model_cfg.LOSS_CONFIG code_size = self.box_coder.code_size reg_valid_mask = forward_ret_dict['reg_valid_mask'].view(-1) gt_boxes3d_ct = forward_ret_dict['gt_of_rois'].clone()[..., 0:code_size] gt_of_rois_src = forward_ret_dict['gt_of_rois_src'][..., 0:code_size].view(-1, code_size) rcnn_reg = forward_ret_dict['rcnn_reg'] # (rcnn_batch_size, C) roi_boxes3d = forward_ret_dict['rois'] rcnn_batch_size = gt_boxes3d_ct.view(-1, code_size).shape[0] fg_mask = (reg_valid_mask > 0) fg_sum = fg_mask.long().sum().item() tb_dict = {} if loss_cfgs.REG_LOSS == 'smooth-l1': rois_anchor = roi_boxes3d.clone().detach().view(-1, code_size) rois_anchor[:, 0:3] = 0 rois_anchor[:, 6] = 0 reg_targets = self.box_coder.encode_torch( gt_boxes3d_ct.view(rcnn_batch_size, code_size), rois_anchor ) rcnn_loss_reg = self.reg_loss_func( rcnn_reg.view(rcnn_batch_size, -1).unsqueeze(dim=0), reg_targets.unsqueeze(dim=0), ) # [B, M, 7] rcnn_loss_reg = (rcnn_loss_reg.view(rcnn_batch_size, -1) * fg_mask.unsqueeze(dim=-1).float()).sum() / max(fg_sum, 1) rcnn_loss_reg = rcnn_loss_reg * loss_cfgs.LOSS_WEIGHTS['rcnn_reg_weight'] tb_dict['rcnn_loss_reg'] = rcnn_loss_reg.item() if loss_cfgs.CORNER_LOSS_REGULARIZATION and fg_sum > 0: # TODO: NEED to BE CHECK fg_rcnn_reg = rcnn_reg.view(rcnn_batch_size, -1)[fg_mask] fg_roi_boxes3d = roi_boxes3d.view(-1, code_size)[fg_mask] fg_roi_boxes3d = fg_roi_boxes3d.view(1, -1, code_size) batch_anchors = fg_roi_boxes3d.clone().detach() roi_ry = fg_roi_boxes3d[:, :, 6].view(-1) roi_xyz = fg_roi_boxes3d[:, :, 0:3].view(-1, 3) batch_anchors[:, :, 0:3] = 0 rcnn_boxes3d = self.box_coder.decode_torch( fg_rcnn_reg.view(batch_anchors.shape[0], -1, code_size), batch_anchors ).view(-1, code_size) rcnn_boxes3d = common_utils.rotate_points_along_z( rcnn_boxes3d.unsqueeze(dim=1), roi_ry ).squeeze(dim=1) rcnn_boxes3d[:, 0:3] += roi_xyz loss_corner = loss_utils.get_corner_loss_lidar( rcnn_boxes3d[:, 0:7], gt_of_rois_src[fg_mask][:, 0:7] ) loss_corner = loss_corner.mean() loss_corner = loss_corner * loss_cfgs.LOSS_WEIGHTS['rcnn_corner_weight'] rcnn_loss_reg += loss_corner tb_dict['rcnn_loss_corner'] = loss_corner.item() else: raise NotImplementedError reg_valid_mask = forward_ret_dict['reg_valid_mask'].view(-1) code_size = self.box_coder.code_size shape = forward_ret_dict['gt_of_rois'].shape gt_boxes3d_ct = forward_ret_dict['gt_of_rois'].clone().view(shape[0] * shape[1], -1)[:, 0:7] rcnn_reg = forward_ret_dict['rcnn_reg'] # (rcnn_batch_size, C) rois = forward_ret_dict['rois'].clone().view(-1, code_size)[:, 0:7] rois[:, 0:3] = 0 rois[:, 6] = 0 batch_box_preds = self.box_coder.decode_torch(rcnn_reg, rois).view(-1, code_size) fg_mask = (reg_valid_mask > 0) if len(gt_boxes3d_ct[fg_mask]) == 0: b_loss=0 else: b_loss = bb_loss(batch_box_preds[fg_mask], gt_boxes3d_ct[ fg_mask]).sum() b_loss = b_loss / (fg_mask.sum() + 1) return rcnn_loss_reg+b_loss, tb_dict def get_box_cls_layer_loss(self, forward_ret_dict): loss_cfgs = self.model_cfg.LOSS_CONFIG rcnn_cls = forward_ret_dict['rcnn_cls'] rcnn_cls_labels = forward_ret_dict['rcnn_cls_labels'].view(-1) if loss_cfgs.CLS_LOSS == 'BinaryCrossEntropy': rcnn_cls_flat = rcnn_cls.view(-1) batch_loss_cls = F.binary_cross_entropy(torch.sigmoid(rcnn_cls_flat), rcnn_cls_labels.float(), reduction='none') cls_valid_mask = (rcnn_cls_labels >= 0).float() rcnn_loss_cls = (batch_loss_cls * cls_valid_mask).sum() / torch.clamp(cls_valid_mask.sum(), min=1.0) elif loss_cfgs.CLS_LOSS == 'CrossEntropy': batch_loss_cls = F.cross_entropy(rcnn_cls, rcnn_cls_labels, reduction='none', ignore_index=-1) cls_valid_mask = (rcnn_cls_labels >= 0).float() rcnn_loss_cls = (batch_loss_cls * cls_valid_mask).sum() / torch.clamp(cls_valid_mask.sum(), min=1.0) else: raise NotImplementedError rcnn_loss_cls = rcnn_loss_cls * loss_cfgs.LOSS_WEIGHTS['rcnn_cls_weight'] tb_dict = {'rcnn_loss_cls': rcnn_loss_cls.item()} return rcnn_loss_cls, tb_dict def get_loss(self, tb_dict=None): tb_dict = {} if tb_dict is None else tb_dict rcnn_loss = 0 for i in range(6): if 'targets_dict'+str(i) in self.forward_ret_dict: rcnn_loss_cls, cls_tb_dict = self.get_box_cls_layer_loss(self.forward_ret_dict['targets_dict'+str(i)]) rcnn_loss += rcnn_loss_cls rcnn_loss_reg, reg_tb_dict = self.get_box_reg_layer_loss(self.forward_ret_dict['targets_dict'+str(i)]) rcnn_loss += rcnn_loss_reg if 'targets_dict_pi'+str(i) in self.forward_ret_dict: rcnn_loss_cls, cls_tb_dict = self.get_box_cls_layer_loss(self.forward_ret_dict['targets_dict_pi' + str(i)]) rcnn_loss += 0.5*rcnn_loss_cls rcnn_loss_reg, reg_tb_dict = self.get_box_reg_layer_loss(self.forward_ret_dict['targets_dict_pi' + str(i)]) rcnn_loss += 0.5*rcnn_loss_reg if 'targets_dict_p'+str(i) in self.forward_ret_dict: rcnn_loss_cls, cls_tb_dict = self.get_box_cls_layer_loss(self.forward_ret_dict['targets_dict_p' + str(i)]) rcnn_loss += 0.5*rcnn_loss_cls rcnn_loss_reg, reg_tb_dict = self.get_box_reg_layer_loss(self.forward_ret_dict['targets_dict_p' + str(i)]) rcnn_loss += 0.5*rcnn_loss_reg tb_dict['rcnn_loss'] = rcnn_loss.item() return rcnn_loss, tb_dict def generate_predicted_boxes(self, batch_size, rois, cls_preds, box_preds): """ Args: batch_size: rois: (B, N, 7) cls_preds: (BN, num_class) box_preds: (BN, code_size) Returns: """ code_size = self.box_coder.code_size # batch_cls_preds: (B, N, num_class or 1) batch_cls_preds = cls_preds.view(batch_size, -1, cls_preds.shape[-1]) batch_box_preds = box_preds.view(batch_size, -1, code_size) roi_ry = rois[:, :, 6].view(-1) roi_xyz = rois[:, :, 0:3].view(-1, 3) local_rois = rois.clone() local_rois[:, :, 0:3] = 0 batch_box_preds = self.box_coder.decode_torch(batch_box_preds, local_rois).view(-1, code_size) batch_box_preds = common_utils.rotate_points_along_z( batch_box_preds.unsqueeze(dim=1), roi_ry ).squeeze(dim=1) batch_box_preds[:, 0:3] += roi_xyz batch_box_preds = batch_box_preds.view(batch_size, -1, code_size) return batch_cls_preds, batch_box_preds ================================================ FILE: pcdet/models/roi_heads/target_assigner/proposal_target_layer.py ================================================ import numpy as np import torch import torch.nn as nn from ....ops.iou3d_nms import iou3d_nms_utils class ProposalTargetLayer(nn.Module): def __init__(self, roi_sampler_cfg): super().__init__() self.roi_sampler_cfg = roi_sampler_cfg def limit(self,ang): ang = ang % (2 * np.pi) ang[ang > np.pi] = ang[ang > np.pi] - 2 * np.pi ang[ang < -np.pi] = ang[ang < -np.pi] + 2 * np.pi return ang def ang_weight(self,pred, gt): a = torch.abs(pred - gt) b = 2 * np.pi - torch.abs(pred - gt) res = torch.stack([a, b]) res = torch.min(res, 0)[0] return 1 - res / np.pi def forward(self, batch_dict, ind=''): """ Args: batch_dict: batch_size: rois: (B, num_rois, 7 + C) roi_scores: (B, num_rois) gt_boxes: (B, N, 7 + C + 1) roi_labels: (B, num_rois) Returns: batch_dict: rois: (B, M, 7 + C) gt_of_rois: (B, M, 7 + C) gt_iou_of_rois: (B, M) roi_scores: (B, M) roi_labels: (B, M) reg_valid_mask: (B, M) rcnn_cls_labels: (B, M) """ batch_rois, batch_gt_of_rois, batch_roi_ious, batch_roi_scores, batch_roi_labels = self.sample_rois_for_rcnn( batch_dict=batch_dict, ind=ind, ) # regression valid mask if self.roi_sampler_cfg.CLS_SCORE_TYPE in ['roi_iou_x', 'roi_ioud_x']: reg_valid_mask = batch_roi_ious.new_zeros(batch_roi_ious.shape).long() for cls_i in range(len(self.roi_sampler_cfg.REG_FG_THRESH)): reg_fg_thresh = self.roi_sampler_cfg.REG_FG_THRESH[cls_i] cls_mask = batch_gt_of_rois[...,-1] == (cls_i+1) if self.roi_sampler_cfg.get('ENABLE_HARD_SAMPLING', False): mask_hard = (batch_roi_ious < reg_fg_thresh) & (batch_roi_ious > self.roi_sampler_cfg.HARD_SAMPLING_THRESH[cls_i]) & cls_mask mask_prob = mask_hard.new_zeros(mask_hard.size()).bool() teval = int(1/self.roi_sampler_cfg.HARD_SAMPLING_RATIO[cls_i]) ints = range(np.random.randint(0, teval), mask_prob.shape[0], teval) mask_prob[ints] = 1 mask_hard2 = mask_hard * mask_prob this_fg_inds1 = ((batch_roi_ious > reg_fg_thresh) & cls_mask).long() this_reg_valid_mask = this_fg_inds1 + mask_hard2.long() else: this_reg_valid_mask = ((batch_roi_ious > reg_fg_thresh) & cls_mask).long() reg_valid_mask += this_reg_valid_mask else: reg_valid_mask = (batch_roi_ious > self.roi_sampler_cfg.REG_FG_THRESH).long() # classification label if self.roi_sampler_cfg.CLS_SCORE_TYPE == 'cls': batch_cls_labels = (batch_roi_ious > self.roi_sampler_cfg.CLS_FG_THRESH).long() ignore_mask = (batch_roi_ious > self.roi_sampler_cfg.CLS_BG_THRESH) & \ (batch_roi_ious < self.roi_sampler_cfg.CLS_FG_THRESH) batch_cls_labels[ignore_mask > 0] = -1 elif self.roi_sampler_cfg.CLS_SCORE_TYPE == 'roi_iou': iou_bg_thresh = self.roi_sampler_cfg.CLS_BG_THRESH iou_fg_thresh = self.roi_sampler_cfg.CLS_FG_THRESH fg_mask = batch_roi_ious > iou_fg_thresh bg_mask = batch_roi_ious < iou_bg_thresh interval_mask = (fg_mask == 0) & (bg_mask == 0) batch_cls_labels = (fg_mask > 0).float() batch_cls_labels[interval_mask] = \ (batch_roi_ious[interval_mask] - iou_bg_thresh) / (iou_fg_thresh - iou_bg_thresh) elif self.roi_sampler_cfg.CLS_SCORE_TYPE == 'roi_ioud': iou_bg_thresh = self.roi_sampler_cfg.CLS_BG_THRESH iou_fg_thresh = self.roi_sampler_cfg.CLS_FG_THRESH fg_mask = batch_roi_ious > iou_fg_thresh bg_mask = batch_roi_ious < iou_bg_thresh interval_mask = (fg_mask == 0) & (bg_mask == 0) batch_cls_labels = (fg_mask > 0).float() batch_cls_labels[interval_mask] = \ (batch_roi_ious[interval_mask] - iou_bg_thresh) / (iou_fg_thresh - iou_bg_thresh) ang_roi = batch_rois[...,6] ang_gt = batch_gt_of_rois[...,6] ang_roi = self.limit(ang_roi) ang_gt = self.limit(ang_gt) ang_target = self.ang_weight(ang_roi,ang_gt) direction_constraint = self.roi_sampler_cfg.DIRECTION_MIN direction_constraint2 = self.roi_sampler_cfg.DIRECTION_MAX ang_target = (torch.clamp(ang_target, direction_constraint, direction_constraint2) - direction_constraint) / ( direction_constraint2 - direction_constraint) batch_cls_labels *= ang_target elif self.roi_sampler_cfg.CLS_SCORE_TYPE == 'roi_ioud_x': all_iou_bg_thresh = self.roi_sampler_cfg.CLS_BG_THRESH all_iou_fg_thresh = self.roi_sampler_cfg.CLS_FG_THRESH batch_cls_labels = batch_roi_ious.new_zeros(size = batch_roi_ious.shape) for cls_id in range(len(all_iou_bg_thresh)): gt_cls = batch_gt_of_rois[..., -1] iou_fg_thresh = all_iou_fg_thresh[cls_id] iou_bg_thresh = all_iou_bg_thresh[cls_id] cls_mask = gt_cls == (cls_id+1) fg_mask = batch_roi_ious > iou_fg_thresh bg_mask = batch_roi_ious < iou_bg_thresh interval_mask = (fg_mask == 0) & (bg_mask == 0) cls_labels = (fg_mask > 0).float() cls_labels[interval_mask] = \ (batch_roi_ious[interval_mask] - iou_bg_thresh) / (iou_fg_thresh - iou_bg_thresh) ang_roi = batch_rois[...,6] ang_gt = batch_gt_of_rois[...,6] ang_roi = self.limit(ang_roi) ang_gt = self.limit(ang_gt) ang_target = self.ang_weight(ang_roi,ang_gt) direction_constraint = self.roi_sampler_cfg.DIRECTION_MIN direction_constraint2 = self.roi_sampler_cfg.DIRECTION_MAX ang_target = (torch.clamp(ang_target, direction_constraint, direction_constraint2 ) - direction_constraint) / ( direction_constraint2 - direction_constraint) cls_labels*=ang_target batch_cls_labels[cls_mask] = cls_labels[cls_mask] elif self.roi_sampler_cfg.CLS_SCORE_TYPE == 'roi_iou_x': all_iou_bg_thresh = self.roi_sampler_cfg.CLS_BG_THRESH all_iou_fg_thresh = self.roi_sampler_cfg.CLS_FG_THRESH batch_cls_labels = batch_roi_ious.new_zeros(batch_roi_ious.shape) for cls_id in range(len(all_iou_bg_thresh)): gt_cls = batch_gt_of_rois[..., -1] iou_fg_thresh = all_iou_fg_thresh[cls_id] iou_bg_thresh = all_iou_bg_thresh[cls_id] cls_mask = gt_cls == (cls_id+1) fg_mask = batch_roi_ious > iou_fg_thresh bg_mask = batch_roi_ious < iou_bg_thresh interval_mask = (fg_mask == 0) & (bg_mask == 0) cls_labels = (fg_mask > 0).float() cls_labels[interval_mask] = \ (batch_roi_ious[interval_mask] - iou_bg_thresh) / (iou_fg_thresh - iou_bg_thresh) batch_cls_labels[cls_mask] = cls_labels[cls_mask] else: raise NotImplementedError targets_dict = {'rois': batch_rois, 'gt_of_rois': batch_gt_of_rois, 'gt_iou_of_rois': batch_roi_ious, 'roi_scores': batch_roi_scores, 'roi_labels': batch_roi_labels, 'reg_valid_mask': reg_valid_mask, 'rcnn_cls_labels': batch_cls_labels} return targets_dict def sample_rois_for_rcnn(self, batch_dict, ind=''): """ Args: batch_dict: batch_size: rois: (B, num_rois, 7 + C) roi_scores: (B, num_rois) gt_boxes: (B, N, 7 + C + 1) roi_labels: (B, num_rois) Returns: """ batch_size = batch_dict['batch_size'] rois = batch_dict['rois'] roi_scores = batch_dict['roi_scores'] roi_labels = batch_dict['roi_labels'] gt_boxes = batch_dict['gt_boxes'+ind] gt_code_size = gt_boxes.shape[-1] roi_code_size = rois.shape[-1] batch_rois = rois.new_zeros(batch_size, self.roi_sampler_cfg.ROI_PER_IMAGE, roi_code_size) batch_gt_of_rois = rois.new_zeros(batch_size, self.roi_sampler_cfg.ROI_PER_IMAGE, gt_code_size ) batch_roi_ious = rois.new_zeros(batch_size, self.roi_sampler_cfg.ROI_PER_IMAGE) batch_roi_scores = rois.new_zeros(batch_size, self.roi_sampler_cfg.ROI_PER_IMAGE) batch_roi_labels = rois.new_zeros((batch_size, self.roi_sampler_cfg.ROI_PER_IMAGE), dtype=torch.long) for index in range(batch_size): cur_roi, cur_gt, cur_roi_labels, cur_roi_scores = \ rois[index], gt_boxes[index], roi_labels[index], roi_scores[index] k = cur_gt.__len__() - 1 while k > 0 and cur_gt[k].sum() == 0: k -= 1 cur_gt = cur_gt[:k + 1] cur_gt = cur_gt.new_zeros((1, cur_gt.shape[1])) if len(cur_gt) == 0 else cur_gt if self.roi_sampler_cfg.get('SAMPLE_ROI_BY_EACH_CLASS', False): max_overlaps, gt_assignment = self.get_max_iou_with_same_class( rois=cur_roi, roi_labels=cur_roi_labels, gt_boxes=cur_gt[:, 0:7], gt_labels=cur_gt[:, -1].long() ) else: iou3d = iou3d_nms_utils.boxes_iou3d_gpu(cur_roi, cur_gt[:, 0:7]) # (M, N) max_overlaps, gt_assignment = torch.max(iou3d, dim=1) if self.roi_sampler_cfg.CLS_SCORE_TYPE in ['roi_iou_x','roi_ioud_x']: sampled_inds = self.subsample_rois(max_overlaps=max_overlaps, gts = cur_gt[gt_assignment]) else: sampled_inds = self.subsample_rois(max_overlaps=max_overlaps) batch_rois[index] = cur_roi[sampled_inds] batch_roi_labels[index] = cur_roi_labels[sampled_inds] batch_roi_ious[index] = max_overlaps[sampled_inds] batch_roi_scores[index] = cur_roi_scores[sampled_inds] batch_gt_of_rois[index] = cur_gt[gt_assignment[sampled_inds]] return batch_rois, batch_gt_of_rois, batch_roi_ious, batch_roi_scores, batch_roi_labels def subsample_rois(self, max_overlaps, gts=None): # sample fg, easy_bg, hard_bg fg_rois_per_image = int(np.round(self.roi_sampler_cfg.FG_RATIO * self.roi_sampler_cfg.ROI_PER_IMAGE)) if gts is None: fg_thresh = min(self.roi_sampler_cfg.REG_FG_THRESH, self.roi_sampler_cfg.CLS_FG_THRESH) fg_inds = ((max_overlaps >= fg_thresh)).nonzero().view(-1) else: fg_inds = max_overlaps.new_zeros(max_overlaps.shape).long() for i in range(len(self.roi_sampler_cfg.CLS_FG_THRESH)): cls_mask = gts[...,-1] == (i+1) this_fg_thresh = min(self.roi_sampler_cfg.REG_FG_THRESH[i], self.roi_sampler_cfg.CLS_FG_THRESH[i]) this_fg_inds = ((max_overlaps >= this_fg_thresh) & cls_mask) fg_inds+=this_fg_inds fg_inds = fg_inds.nonzero().view(-1) easy_bg_inds = ((max_overlaps < self.roi_sampler_cfg.CLS_BG_THRESH_LO)).nonzero().view(-1) if gts is None: hard_bg_inds = ((max_overlaps < self.roi_sampler_cfg.REG_FG_THRESH) & (max_overlaps >= self.roi_sampler_cfg.CLS_BG_THRESH_LO)).nonzero().view(-1) else: hard_bg_inds = max_overlaps.new_zeros(max_overlaps.shape).long() for i in range(len(self.roi_sampler_cfg.REG_FG_THRESH)): cls_mask = gts[...,-1] == (i+1) this_hard_bg_inds = ((max_overlaps < self.roi_sampler_cfg.REG_FG_THRESH[i]) & (max_overlaps >= self.roi_sampler_cfg.CLS_BG_THRESH_LO) & cls_mask) hard_bg_inds+=this_hard_bg_inds hard_bg_inds = hard_bg_inds.nonzero().view(-1) fg_num_rois = fg_inds.numel() bg_num_rois = hard_bg_inds.numel() + easy_bg_inds.numel() if fg_num_rois > 0 and bg_num_rois > 0: # sampling fg fg_rois_per_this_image = min(fg_rois_per_image, fg_num_rois) rand_num = torch.from_numpy(np.random.permutation(fg_num_rois)).type_as(max_overlaps).long() fg_inds = fg_inds[rand_num[:fg_rois_per_this_image]] # sampling bg bg_rois_per_this_image = self.roi_sampler_cfg.ROI_PER_IMAGE - fg_rois_per_this_image bg_inds = self.sample_bg_inds( hard_bg_inds, easy_bg_inds, bg_rois_per_this_image, self.roi_sampler_cfg.HARD_BG_RATIO ) elif fg_num_rois > 0 and bg_num_rois == 0: # sampling fg rand_num = np.floor(np.random.rand(self.roi_sampler_cfg.ROI_PER_IMAGE) * fg_num_rois) rand_num = torch.from_numpy(rand_num).type_as(max_overlaps).long() fg_inds = fg_inds[rand_num] bg_inds = [] elif bg_num_rois > 0 and fg_num_rois == 0: # sampling bg bg_rois_per_this_image = self.roi_sampler_cfg.ROI_PER_IMAGE bg_inds = self.sample_bg_inds( hard_bg_inds, easy_bg_inds, bg_rois_per_this_image, self.roi_sampler_cfg.HARD_BG_RATIO ) else: print('maxoverlaps:(min=%f, max=%f)' % (max_overlaps.min().item(), max_overlaps.max().item())) print('ERROR: FG=%d, BG=%d' % (fg_num_rois, bg_num_rois)) raise NotImplementedError sampled_inds = torch.cat((fg_inds, bg_inds), dim=0) return sampled_inds @staticmethod def sample_bg_inds(hard_bg_inds, easy_bg_inds, bg_rois_per_this_image, hard_bg_ratio): if hard_bg_inds.numel() > 0 and easy_bg_inds.numel() > 0: hard_bg_rois_num = min(int(bg_rois_per_this_image * hard_bg_ratio), len(hard_bg_inds)) easy_bg_rois_num = bg_rois_per_this_image - hard_bg_rois_num # sampling hard bg rand_idx = torch.randint(low=0, high=hard_bg_inds.numel(), size=(hard_bg_rois_num,)).long() hard_bg_inds = hard_bg_inds[rand_idx] # sampling easy bg rand_idx = torch.randint(low=0, high=easy_bg_inds.numel(), size=(easy_bg_rois_num,)).long() easy_bg_inds = easy_bg_inds[rand_idx] bg_inds = torch.cat([hard_bg_inds, easy_bg_inds], dim=0) elif hard_bg_inds.numel() > 0 and easy_bg_inds.numel() == 0: hard_bg_rois_num = bg_rois_per_this_image # sampling hard bg rand_idx = torch.randint(low=0, high=hard_bg_inds.numel(), size=(hard_bg_rois_num,)).long() bg_inds = hard_bg_inds[rand_idx] elif hard_bg_inds.numel() == 0 and easy_bg_inds.numel() > 0: easy_bg_rois_num = bg_rois_per_this_image # sampling easy bg rand_idx = torch.randint(low=0, high=easy_bg_inds.numel(), size=(easy_bg_rois_num,)).long() bg_inds = easy_bg_inds[rand_idx] else: raise NotImplementedError return bg_inds @staticmethod def get_max_iou_with_same_class(rois, roi_labels, gt_boxes, gt_labels): """ Args: rois: (N, 7) roi_labels: (N) gt_boxes: (N, ) gt_labels: Returns: """ """ :param rois: (N, 7) :param roi_labels: (N) :param gt_boxes: (N, 8) :return: """ max_overlaps = rois.new_zeros(rois.shape[0]) gt_assignment = roi_labels.new_zeros(roi_labels.shape[0]) for k in range(gt_labels.min().item(), gt_labels.max().item() + 1): roi_mask = (roi_labels == k) gt_mask = (gt_labels == k) if roi_mask.sum() > 0 and gt_mask.sum() > 0: cur_roi = rois[roi_mask] cur_gt = gt_boxes[gt_mask] original_gt_assignment = gt_mask.nonzero().view(-1) iou3d = iou3d_nms_utils.boxes_iou3d_gpu(cur_roi[:,0:7], cur_gt[:,0:7]) # (M, N) cur_max_overlaps, cur_gt_assignment = torch.max(iou3d, dim=1) max_overlaps[roi_mask] = cur_max_overlaps gt_assignment[roi_mask] = original_gt_assignment[cur_gt_assignment] return max_overlaps, gt_assignment class ProposalTargetLayerT(nn.Module): def __init__(self, roi_sampler_cfg): super().__init__() self.roi_sampler_cfg = roi_sampler_cfg def limit(self,ang): ang = ang % (2 * np.pi) ang[ang > np.pi] = ang[ang > np.pi] - 2 * np.pi ang[ang < -np.pi] = ang[ang < -np.pi] + 2 * np.pi return ang def ang_weight(self,pred, gt): a = torch.abs(pred - gt) b = 2 * np.pi - torch.abs(pred - gt) res = torch.stack([a, b]) res = torch.min(res, 0)[0] return 1 - res / np.pi def forward(self, batch_dict): """ Args: batch_dict: batch_size: rois: (B, num_rois, 7 + C) roi_scores: (B, num_rois) gt_boxes: (B, N, 7 + C + 1) roi_labels: (B, num_rois) Returns: batch_dict: rois: (B, M, 7 + C) gt_of_rois: (B, M, 7 + C) gt_iou_of_rois: (B, M) roi_scores: (B, M) roi_labels: (B, M) reg_valid_mask: (B, M) rcnn_cls_labels: (B, M) """ batch_rois, batch_gt_of_rois, batch_roi_ious, batch_roi_mious, batch_roi_scores, batch_roi_labels, batch_gt_bbs_mask\ = self.sample_rois_for_rcnn( batch_dict=batch_dict, ) if self.roi_sampler_cfg.CLS_SCORE_TYPE == 'roi_mious' or self.roi_sampler_cfg.CLS_SCORE_TYPE == 'mcls': batch_roi_ious = batch_roi_mious # regression valid mask reg_valid_mask = (batch_roi_ious > self.roi_sampler_cfg.REG_FG_THRESH).long() # classification label if self.roi_sampler_cfg.CLS_SCORE_TYPE == 'cls' or self.roi_sampler_cfg.CLS_SCORE_TYPE == 'mcls': batch_cls_labels = (batch_roi_ious > self.roi_sampler_cfg.CLS_FG_THRESH).long() ignore_mask = (batch_roi_ious > self.roi_sampler_cfg.CLS_BG_THRESH) & \ (batch_roi_ious < self.roi_sampler_cfg.CLS_FG_THRESH) batch_cls_labels[ignore_mask > 0] = -1 elif self.roi_sampler_cfg.CLS_SCORE_TYPE == 'roi_ious' or self.roi_sampler_cfg.CLS_SCORE_TYPE == 'roi_mious': iou_bg_thresh = self.roi_sampler_cfg.CLS_BG_THRESH iou_fg_thresh = self.roi_sampler_cfg.CLS_FG_THRESH fg_mask = batch_roi_ious > iou_fg_thresh bg_mask = batch_roi_ious < iou_bg_thresh interval_mask = (fg_mask == 0) & (bg_mask == 0) batch_cls_labels = (fg_mask > 0).float() batch_cls_labels[interval_mask] = \ (batch_roi_ious[interval_mask] - iou_bg_thresh) / (iou_fg_thresh - iou_bg_thresh) elif self.roi_sampler_cfg.CLS_SCORE_TYPE == 'roi_ioud': iou_bg_thresh = self.roi_sampler_cfg.CLS_BG_THRESH iou_fg_thresh = self.roi_sampler_cfg.CLS_FG_THRESH fg_mask = batch_roi_ious > iou_fg_thresh bg_mask = batch_roi_ious < iou_bg_thresh interval_mask = (fg_mask == 0) & (bg_mask == 0) batch_cls_labels = (fg_mask > 0).float() batch_cls_labels[interval_mask] = \ (batch_roi_ious[interval_mask] - iou_bg_thresh) / (iou_fg_thresh - iou_bg_thresh) ang_roi = batch_rois[...,6] ang_gt = batch_gt_of_rois[...,6] ang_roi = self.limit(ang_roi) ang_gt = self.limit(ang_gt) ang_target = self.ang_weight(ang_roi,ang_gt) ang_target = torch.clamp(ang_target,0.0,0.8)/0.8 batch_cls_labels*=ang_target else: raise NotImplementedError targets_dict = {'rois': batch_rois, 'gt_of_rois': batch_gt_of_rois, 'gt_iou_of_rois': batch_roi_ious, 'roi_scores': batch_roi_scores, 'roi_labels': batch_roi_labels, 'reg_valid_mask': reg_valid_mask, 'gt_bbs_mask': batch_gt_bbs_mask, 'rcnn_cls_labels': batch_cls_labels} return targets_dict def sample_rois_for_rcnn(self, batch_dict): """ Args: batch_dict: batch_size: rois: (B, num_rois, 7 + C) roi_scores: (B, num_rois) gt_boxes: (B, N, 7 + C + 1) roi_labels: (B, num_rois) Returns: """ batch_size = batch_dict['batch_size'] rois = batch_dict['rois'] roi_scores = batch_dict['roi_scores'] roi_labels = batch_dict['roi_labels'] gt_tracklets = batch_dict['gt_tracklets'] num_frame = gt_tracklets.shape[-1]//7 gt_bbs_mask = batch_dict['gt_bbs_mask'] gt_code_size = gt_tracklets.shape[-1] roi_code_size = rois.shape[-1] batch_rois = rois.new_zeros(batch_size, self.roi_sampler_cfg.ROI_PER_IMAGE, roi_code_size) batch_gt_of_rois = rois.new_zeros(batch_size, self.roi_sampler_cfg.ROI_PER_IMAGE, gt_code_size ) batch_roi_scores = rois.new_zeros(batch_size, self.roi_sampler_cfg.ROI_PER_IMAGE) batch_roi_labels = rois.new_zeros((batch_size, self.roi_sampler_cfg.ROI_PER_IMAGE), dtype=torch.long) batch_all_roi_ious = rois.new_zeros(batch_size, self.roi_sampler_cfg.ROI_PER_IMAGE,num_frame) batch_gt_bbs_mask = rois.new_zeros(batch_size, self.roi_sampler_cfg.ROI_PER_IMAGE, num_frame) for index in range(batch_size): cur_roi, cur_gt, cur_roi_labels, cur_roi_scores,cur_bbs_mask = \ rois[index], gt_tracklets[index], roi_labels[index], roi_scores[index],gt_bbs_mask[index] k = cur_gt.__len__() - 1 while k > 0 and cur_gt[k].sum() == 0: k -= 1 cur_gt = cur_gt[:k + 1] cur_gt = cur_gt.new_zeros((1, cur_gt.shape[1])) if len(cur_gt) == 0 else cur_gt cur_bbs_mask = cur_bbs_mask.new_zeros((1, cur_bbs_mask.shape[1])) if len(cur_bbs_mask) == 0 else cur_bbs_mask if self.roi_sampler_cfg.get('SAMPLE_ROI_BY_EACH_CLASS', False): max_overlaps, gt_assignment = self.get_max_iou_with_same_class( rois=cur_roi[:,0:7], roi_labels=cur_roi_labels, gt_boxes=cur_gt[:, 0:7], gt_labels=cur_gt[:, -1].long() ) else: iou3d = iou3d_nms_utils.boxes_iou3d_gpu(cur_roi[:, 0:7], cur_gt[:, 0:7]) # (M, N) max_overlaps, gt_assignment = torch.max(iou3d, dim=1) sampled_inds = self.subsample_rois(max_overlaps=max_overlaps) batch_rois[index] = cur_roi[sampled_inds] batch_roi_labels[index] = cur_roi_labels[sampled_inds] batch_roi_scores[index] = cur_roi_scores[sampled_inds] batch_gt_of_rois[index] = cur_gt[gt_assignment[sampled_inds]] batch_all_roi_ious[index,:,0] = max_overlaps[sampled_inds] batch_gt_bbs_mask[index] = cur_bbs_mask[gt_assignment[sampled_inds]] for i in range(1, num_frame): for j in range(batch_size): this_roi = batch_rois[j,:,i*7:i*7+7] this_gt_of_roi = batch_gt_of_rois[j,:,i*7:i*7+7] all_ious = iou3d_nms_utils.boxes_iou3d_gpu(this_roi[:, 0:7], this_gt_of_roi[:, 0:7]) box_num = this_roi.shape[0] ious = all_ious[range(box_num),range(box_num)] batch_all_roi_ious[j,:,i] = ious tracks_mean_ious = batch_all_roi_ious.sum(-1)/(batch_gt_bbs_mask.sum(-1)+0.00001) return batch_rois, batch_gt_of_rois,batch_all_roi_ious[...,0], tracks_mean_ious, batch_roi_scores, batch_roi_labels, batch_gt_bbs_mask def subsample_rois(self, max_overlaps): # sample fg, easy_bg, hard_bg fg_rois_per_image = int(np.round(self.roi_sampler_cfg.FG_RATIO * self.roi_sampler_cfg.ROI_PER_IMAGE)) fg_thresh = min(self.roi_sampler_cfg.REG_FG_THRESH, self.roi_sampler_cfg.CLS_FG_THRESH) fg_inds = ((max_overlaps >= fg_thresh)).nonzero().view(-1) easy_bg_inds = ((max_overlaps < self.roi_sampler_cfg.CLS_BG_THRESH_LO)).nonzero().view(-1) hard_bg_inds = ((max_overlaps < self.roi_sampler_cfg.REG_FG_THRESH) & (max_overlaps >= self.roi_sampler_cfg.CLS_BG_THRESH_LO)).nonzero().view(-1) fg_num_rois = fg_inds.numel() bg_num_rois = hard_bg_inds.numel() + easy_bg_inds.numel() if fg_num_rois > 0 and bg_num_rois > 0: # sampling fg fg_rois_per_this_image = min(fg_rois_per_image, fg_num_rois) rand_num = torch.from_numpy(np.random.permutation(fg_num_rois)).type_as(max_overlaps).long() fg_inds = fg_inds[rand_num[:fg_rois_per_this_image]] # sampling bg bg_rois_per_this_image = self.roi_sampler_cfg.ROI_PER_IMAGE - fg_rois_per_this_image bg_inds = self.sample_bg_inds( hard_bg_inds, easy_bg_inds, bg_rois_per_this_image, self.roi_sampler_cfg.HARD_BG_RATIO ) elif fg_num_rois > 0 and bg_num_rois == 0: # sampling fg rand_num = np.floor(np.random.rand(self.roi_sampler_cfg.ROI_PER_IMAGE) * fg_num_rois) rand_num = torch.from_numpy(rand_num).type_as(max_overlaps).long() fg_inds = fg_inds[rand_num] bg_inds = [] elif bg_num_rois > 0 and fg_num_rois == 0: # sampling bg bg_rois_per_this_image = self.roi_sampler_cfg.ROI_PER_IMAGE bg_inds = self.sample_bg_inds( hard_bg_inds, easy_bg_inds, bg_rois_per_this_image, self.roi_sampler_cfg.HARD_BG_RATIO ) else: print('maxoverlaps:(min=%f, max=%f)' % (max_overlaps.min().item(), max_overlaps.max().item())) print('ERROR: FG=%d, BG=%d' % (fg_num_rois, bg_num_rois)) raise NotImplementedError sampled_inds = torch.cat((fg_inds, bg_inds), dim=0) return sampled_inds @staticmethod def sample_bg_inds(hard_bg_inds, easy_bg_inds, bg_rois_per_this_image, hard_bg_ratio): if hard_bg_inds.numel() > 0 and easy_bg_inds.numel() > 0: hard_bg_rois_num = min(int(bg_rois_per_this_image * hard_bg_ratio), len(hard_bg_inds)) easy_bg_rois_num = bg_rois_per_this_image - hard_bg_rois_num # sampling hard bg rand_idx = torch.randint(low=0, high=hard_bg_inds.numel(), size=(hard_bg_rois_num,)).long() hard_bg_inds = hard_bg_inds[rand_idx] # sampling easy bg rand_idx = torch.randint(low=0, high=easy_bg_inds.numel(), size=(easy_bg_rois_num,)).long() easy_bg_inds = easy_bg_inds[rand_idx] bg_inds = torch.cat([hard_bg_inds, easy_bg_inds], dim=0) elif hard_bg_inds.numel() > 0 and easy_bg_inds.numel() == 0: hard_bg_rois_num = bg_rois_per_this_image # sampling hard bg rand_idx = torch.randint(low=0, high=hard_bg_inds.numel(), size=(hard_bg_rois_num,)).long() bg_inds = hard_bg_inds[rand_idx] elif hard_bg_inds.numel() == 0 and easy_bg_inds.numel() > 0: easy_bg_rois_num = bg_rois_per_this_image # sampling easy bg rand_idx = torch.randint(low=0, high=easy_bg_inds.numel(), size=(easy_bg_rois_num,)).long() bg_inds = easy_bg_inds[rand_idx] else: raise NotImplementedError return bg_inds @staticmethod def get_max_iou_with_same_class(rois, roi_labels, gt_boxes, gt_labels): """ Args: rois: (N, 7) roi_labels: (N) gt_boxes: (N, ) gt_labels: Returns: """ """ :param rois: (N, 7) :param roi_labels: (N) :param gt_boxes: (N, 8) :return: """ max_overlaps = rois.new_zeros(rois.shape[0]) gt_assignment = roi_labels.new_zeros(roi_labels.shape[0]) for k in range(gt_labels.min().item(), gt_labels.max().item() + 1): roi_mask = (roi_labels == k) gt_mask = (gt_labels == k) if roi_mask.sum() > 0 and gt_mask.sum() > 0: cur_roi = rois[roi_mask] cur_gt = gt_boxes[gt_mask] original_gt_assignment = gt_mask.nonzero().view(-1) iou3d = iou3d_nms_utils.boxes_iou3d_gpu(cur_roi[:,0:7], cur_gt[:,0:7]) # (M, N) cur_max_overlaps, cur_gt_assignment = torch.max(iou3d, dim=1) max_overlaps[roi_mask] = cur_max_overlaps gt_assignment[roi_mask] = original_gt_assignment[cur_gt_assignment] return max_overlaps, gt_assignment ================================================ FILE: pcdet/models/roi_heads/target_assigner/proposal_target_layer3.py ================================================ import numpy as np import torch import torch.nn as nn from ....ops.iou3d_nms import iou3d_nms_utils class ProposalTargetLayer(nn.Module): def __init__(self, roi_sampler_cfg): super().__init__() self.roi_sampler_cfg = roi_sampler_cfg def limit(self,ang): ang = ang % (2 * np.pi) ang[ang > np.pi] = ang[ang > np.pi] - 2 * np.pi ang[ang < -np.pi] = ang[ang < -np.pi] + 2 * np.pi return ang def ang_weight(self,pred, gt): a = torch.abs(pred - gt) b = 2 * np.pi - torch.abs(pred - gt) res = torch.stack([a, b]) res = torch.min(res, 0)[0] return 1 - res / np.pi def forward(self, batch_dict,ind=''): """ Args: batch_dict: batch_size: rois: (B, num_rois, 7 + C) roi_scores: (B, num_rois) gt_boxes: (B, N, 7 + C + 1) roi_labels: (B, num_rois) Returns: batch_dict: rois: (B, M, 7 + C) gt_of_rois: (B, M, 7 + C) gt_iou_of_rois: (B, M) roi_scores: (B, M) roi_labels: (B, M) reg_valid_mask: (B, M) rcnn_cls_labels: (B, M) """ batch_rois, batch_gt_of_rois, batch_roi_ious, batch_roi_scores, batch_roi_labels = self.sample_rois_for_rcnn( batch_dict=batch_dict,ind=ind, ) # regression valid mask if self.roi_sampler_cfg.CLS_SCORE_TYPE in ['roi_ioud_x','roi_ioud']: roi_angle = batch_rois[..., 6].clone() gt_angle = batch_gt_of_rois[..., 6].clone() roi_angle = self.limit(roi_angle) gt_angle = self.limit(gt_angle) ang_target = self.ang_weight(roi_angle, gt_angle) direction_constraint = self.roi_sampler_cfg.DIRECTION_C ang_target = (torch.clamp(ang_target, direction_constraint, 1) - direction_constraint) / ( 1 - direction_constraint) batch_roi_ious*=ang_target if self.roi_sampler_cfg.CLS_SCORE_TYPE in ['roi_iou_x','roi_ioud_x']: reg_valid_mask = batch_roi_ious.new_zeros(batch_roi_ious.shape).long() for cls_i in range(len(self.roi_sampler_cfg.REG_FG_THRESH)): reg_fg_thresh = self.roi_sampler_cfg.REG_FG_THRESH[cls_i] cls_mask = batch_gt_of_rois[...,-1] == (cls_i+1) this_reg_valid_mask = ((batch_roi_ious > reg_fg_thresh) & cls_mask).long() reg_valid_mask += this_reg_valid_mask else: reg_valid_mask = (batch_roi_ious > self.roi_sampler_cfg.REG_FG_THRESH).long() # classification label if self.roi_sampler_cfg.CLS_SCORE_TYPE == 'cls': batch_cls_labels = (batch_roi_ious > self.roi_sampler_cfg.CLS_FG_THRESH).long() ignore_mask = (batch_roi_ious > self.roi_sampler_cfg.CLS_BG_THRESH) & \ (batch_roi_ious < self.roi_sampler_cfg.CLS_FG_THRESH) batch_cls_labels[ignore_mask > 0] = -1 elif self.roi_sampler_cfg.CLS_SCORE_TYPE in ['roi_iou','roi_ioud']: iou_bg_thresh = self.roi_sampler_cfg.CLS_BG_THRESH iou_fg_thresh = self.roi_sampler_cfg.CLS_FG_THRESH fg_mask = batch_roi_ious > iou_fg_thresh bg_mask = batch_roi_ious < iou_bg_thresh interval_mask = (fg_mask == 0) & (bg_mask == 0) batch_cls_labels = (fg_mask > 0).float() batch_cls_labels[interval_mask] = \ (batch_roi_ious[interval_mask] - iou_bg_thresh) / (iou_fg_thresh - iou_bg_thresh) elif self.roi_sampler_cfg.CLS_SCORE_TYPE in ['roi_iou_x','roi_ioud_x']: all_iou_bg_thresh = self.roi_sampler_cfg.CLS_BG_THRESH all_iou_fg_thresh = self.roi_sampler_cfg.CLS_FG_THRESH batch_cls_labels = batch_roi_ious.new_zeros(batch_roi_ious.shape) for cls_id in range(len(all_iou_bg_thresh)): gt_cls = batch_gt_of_rois[..., -1] iou_fg_thresh = all_iou_fg_thresh[cls_id] iou_bg_thresh = all_iou_bg_thresh[cls_id] cls_mask = gt_cls == (cls_id+1) fg_mask = batch_roi_ious > iou_fg_thresh bg_mask = batch_roi_ious < iou_bg_thresh interval_mask = (fg_mask == 0) & (bg_mask == 0) cls_labels = (fg_mask > 0).float() cls_labels[interval_mask] = \ (batch_roi_ious[interval_mask] - iou_bg_thresh) / (iou_fg_thresh - iou_bg_thresh) batch_cls_labels[cls_mask] = cls_labels[cls_mask] else: raise NotImplementedError targets_dict = {'rois'+ind: batch_rois, 'gt_of_rois'+ind: batch_gt_of_rois, 'gt_iou_of_rois'+ind: batch_roi_ious, 'roi_scores'+ind: batch_roi_scores, 'roi_labels'+ind: batch_roi_labels, 'reg_valid_mask'+ind: reg_valid_mask, 'rcnn_cls_labels'+ind: batch_cls_labels} return targets_dict def sample_rois_for_rcnn(self, batch_dict, ind=''): """ Args: batch_dict: batch_size: rois: (B, num_rois, 7 + C) roi_scores: (B, num_rois) gt_boxes: (B, N, 7 + C + 1) roi_labels: (B, num_rois) Returns: """ batch_size = batch_dict['batch_size'] rois = batch_dict['rois'+ind] roi_scores = batch_dict['roi_scores'+ind] roi_labels = batch_dict['roi_labels'] gt_boxes = batch_dict['gt_boxes'] gt_code_size = gt_boxes.shape[-1] roi_code_size = rois.shape[-1] batch_rois = rois.new_zeros(batch_size, self.roi_sampler_cfg.ROI_PER_IMAGE, roi_code_size) batch_gt_of_rois = rois.new_zeros(batch_size, self.roi_sampler_cfg.ROI_PER_IMAGE, gt_code_size ) batch_roi_ious = rois.new_zeros(batch_size, self.roi_sampler_cfg.ROI_PER_IMAGE) batch_roi_scores = rois.new_zeros(batch_size, self.roi_sampler_cfg.ROI_PER_IMAGE) batch_roi_labels = rois.new_zeros((batch_size, self.roi_sampler_cfg.ROI_PER_IMAGE), dtype=torch.long) for index in range(batch_size): cur_roi, cur_gt, cur_roi_labels, cur_roi_scores = \ rois[index], gt_boxes[index], roi_labels[index], roi_scores[index] k = cur_gt.__len__() - 1 while k > 0 and cur_gt[k].sum() == 0: k -= 1 cur_gt = cur_gt[:k + 1] cur_gt = cur_gt.new_zeros((1, cur_gt.shape[1])) if len(cur_gt) == 0 else cur_gt if self.roi_sampler_cfg.get('SAMPLE_ROI_BY_EACH_CLASS', False): max_overlaps, gt_assignment = self.get_max_iou_with_same_class( rois=cur_roi, roi_labels=cur_roi_labels, gt_boxes=cur_gt[:, 0:7], gt_labels=cur_gt[:, -1].long() ) else: iou3d = iou3d_nms_utils.boxes_iou3d_gpu(cur_roi, cur_gt[:, 0:7]) # (M, N) max_overlaps, gt_assignment = torch.max(iou3d, dim=1) if self.roi_sampler_cfg.CLS_SCORE_TYPE in ['roi_iou_x','roi_ioud_x']: sampled_inds = self.subsample_rois(max_overlaps=max_overlaps,gts = cur_gt[gt_assignment]) else: sampled_inds = self.subsample_rois(max_overlaps=max_overlaps) batch_rois[index] = cur_roi[sampled_inds] batch_roi_labels[index] = cur_roi_labels[sampled_inds] batch_roi_ious[index] = max_overlaps[sampled_inds] batch_roi_scores[index] = cur_roi_scores[sampled_inds] batch_gt_of_rois[index] = cur_gt[gt_assignment[sampled_inds]] return batch_rois, batch_gt_of_rois, batch_roi_ious, batch_roi_scores, batch_roi_labels def subsample_rois(self, max_overlaps, gts=None): # sample fg, easy_bg, hard_bg fg_rois_per_image = int(np.round(self.roi_sampler_cfg.FG_RATIO * self.roi_sampler_cfg.ROI_PER_IMAGE)) if gts is None: fg_thresh = min(self.roi_sampler_cfg.REG_FG_THRESH, self.roi_sampler_cfg.CLS_FG_THRESH) fg_inds = ((max_overlaps >= fg_thresh)).nonzero().view(-1) else: fg_inds = max_overlaps.new_zeros(max_overlaps.shape).long() for i in range(len(self.roi_sampler_cfg.CLS_FG_THRESH)): cls_mask = gts[...,-1] == (i+1) this_fg_thresh = min(self.roi_sampler_cfg.REG_FG_THRESH[i], self.roi_sampler_cfg.CLS_FG_THRESH[i]) this_fg_inds = (max_overlaps >= this_fg_thresh) & cls_mask fg_inds+=this_fg_inds fg_inds = fg_inds.nonzero().view(-1) easy_bg_inds = ((max_overlaps < self.roi_sampler_cfg.CLS_BG_THRESH_LO)).nonzero().view(-1) if gts is None: hard_bg_inds = ((max_overlaps < self.roi_sampler_cfg.REG_FG_THRESH) & (max_overlaps >= self.roi_sampler_cfg.CLS_BG_THRESH_LO)).nonzero().view(-1) else: hard_bg_inds = max_overlaps.new_zeros(max_overlaps.shape).long() for i in range(len(self.roi_sampler_cfg.REG_FG_THRESH)): cls_mask = gts[...,-1] == (i+1) this_hard_bg_inds = ((max_overlaps < self.roi_sampler_cfg.REG_FG_THRESH[i]) & (max_overlaps >= self.roi_sampler_cfg.CLS_BG_THRESH_LO) & cls_mask) hard_bg_inds+=this_hard_bg_inds hard_bg_inds = hard_bg_inds.nonzero().view(-1) fg_num_rois = fg_inds.numel() bg_num_rois = hard_bg_inds.numel() + easy_bg_inds.numel() if fg_num_rois > 0 and bg_num_rois > 0: # sampling fg fg_rois_per_this_image = min(fg_rois_per_image, fg_num_rois) rand_num = torch.from_numpy(np.random.permutation(fg_num_rois)).type_as(max_overlaps).long() fg_inds = fg_inds[rand_num[:fg_rois_per_this_image]] # sampling bg bg_rois_per_this_image = self.roi_sampler_cfg.ROI_PER_IMAGE - fg_rois_per_this_image bg_inds = self.sample_bg_inds( hard_bg_inds, easy_bg_inds, bg_rois_per_this_image, self.roi_sampler_cfg.HARD_BG_RATIO ) elif fg_num_rois > 0 and bg_num_rois == 0: # sampling fg rand_num = np.floor(np.random.rand(self.roi_sampler_cfg.ROI_PER_IMAGE) * fg_num_rois) rand_num = torch.from_numpy(rand_num).type_as(max_overlaps).long() fg_inds = fg_inds[rand_num] bg_inds = [] elif bg_num_rois > 0 and fg_num_rois == 0: # sampling bg bg_rois_per_this_image = self.roi_sampler_cfg.ROI_PER_IMAGE bg_inds = self.sample_bg_inds( hard_bg_inds, easy_bg_inds, bg_rois_per_this_image, self.roi_sampler_cfg.HARD_BG_RATIO ) else: print('maxoverlaps:(min=%f, max=%f)' % (max_overlaps.min().item(), max_overlaps.max().item())) print('ERROR: FG=%d, BG=%d' % (fg_num_rois, bg_num_rois)) raise NotImplementedError sampled_inds = torch.cat((fg_inds, bg_inds), dim=0) return sampled_inds @staticmethod def sample_bg_inds(hard_bg_inds, easy_bg_inds, bg_rois_per_this_image, hard_bg_ratio): if hard_bg_inds.numel() > 0 and easy_bg_inds.numel() > 0: hard_bg_rois_num = min(int(bg_rois_per_this_image * hard_bg_ratio), len(hard_bg_inds)) easy_bg_rois_num = bg_rois_per_this_image - hard_bg_rois_num # sampling hard bg rand_idx = torch.randint(low=0, high=hard_bg_inds.numel(), size=(hard_bg_rois_num,)).long() hard_bg_inds = hard_bg_inds[rand_idx] # sampling easy bg rand_idx = torch.randint(low=0, high=easy_bg_inds.numel(), size=(easy_bg_rois_num,)).long() easy_bg_inds = easy_bg_inds[rand_idx] bg_inds = torch.cat([hard_bg_inds, easy_bg_inds], dim=0) elif hard_bg_inds.numel() > 0 and easy_bg_inds.numel() == 0: hard_bg_rois_num = bg_rois_per_this_image # sampling hard bg rand_idx = torch.randint(low=0, high=hard_bg_inds.numel(), size=(hard_bg_rois_num,)).long() bg_inds = hard_bg_inds[rand_idx] elif hard_bg_inds.numel() == 0 and easy_bg_inds.numel() > 0: easy_bg_rois_num = bg_rois_per_this_image # sampling easy bg rand_idx = torch.randint(low=0, high=easy_bg_inds.numel(), size=(easy_bg_rois_num,)).long() bg_inds = easy_bg_inds[rand_idx] else: raise NotImplementedError return bg_inds @staticmethod def get_max_iou_with_same_class(rois, roi_labels, gt_boxes, gt_labels): """ Args: rois: (N, 7) roi_labels: (N) gt_boxes: (N, ) gt_labels: Returns: """ """ :param rois: (N, 7) :param roi_labels: (N) :param gt_boxes: (N, 8) :return: """ max_overlaps = rois.new_zeros(rois.shape[0]) gt_assignment = roi_labels.new_zeros(roi_labels.shape[0]) for k in range(gt_labels.min().item(), gt_labels.max().item() + 1): roi_mask = (roi_labels == k) gt_mask = (gt_labels == k) if roi_mask.sum() > 0 and gt_mask.sum() > 0: cur_roi = rois[roi_mask] cur_gt = gt_boxes[gt_mask] original_gt_assignment = gt_mask.nonzero().view(-1) iou3d = iou3d_nms_utils.boxes_iou3d_gpu(cur_roi[:,0:7], cur_gt[:,0:7]) # (M, N) cur_max_overlaps, cur_gt_assignment = torch.max(iou3d, dim=1) max_overlaps[roi_mask] = cur_max_overlaps gt_assignment[roi_mask] = original_gt_assignment[cur_gt_assignment] return max_overlaps, gt_assignment class ProposalTargetLayerT(nn.Module): def __init__(self, roi_sampler_cfg): super().__init__() self.roi_sampler_cfg = roi_sampler_cfg def limit(self,ang): ang = ang % (2 * np.pi) ang[ang > np.pi] = ang[ang > np.pi] - 2 * np.pi ang[ang < -np.pi] = ang[ang < -np.pi] + 2 * np.pi return ang def ang_weight(self,pred, gt): a = torch.abs(pred - gt) b = 2 * np.pi - torch.abs(pred - gt) res = torch.stack([a, b]) res = torch.min(res, 0)[0] return 1 - res / np.pi def forward(self, batch_dict): """ Args: batch_dict: batch_size: rois: (B, num_rois, 7 + C) roi_scores: (B, num_rois) gt_boxes: (B, N, 7 + C + 1) roi_labels: (B, num_rois) Returns: batch_dict: rois: (B, M, 7 + C) gt_of_rois: (B, M, 7 + C) gt_iou_of_rois: (B, M) roi_scores: (B, M) roi_labels: (B, M) reg_valid_mask: (B, M) rcnn_cls_labels: (B, M) """ batch_rois, batch_gt_of_rois, batch_roi_ious, batch_roi_mious, batch_roi_scores, batch_roi_labels, batch_gt_bbs_mask\ = self.sample_rois_for_rcnn( batch_dict=batch_dict, ) if self.roi_sampler_cfg.CLS_SCORE_TYPE == 'roi_mious' or self.roi_sampler_cfg.CLS_SCORE_TYPE == 'mcls': batch_roi_ious = batch_roi_mious # regression valid mask reg_valid_mask = (batch_roi_ious > self.roi_sampler_cfg.REG_FG_THRESH).long() # classification label if self.roi_sampler_cfg.CLS_SCORE_TYPE == 'cls' or self.roi_sampler_cfg.CLS_SCORE_TYPE == 'mcls': batch_cls_labels = (batch_roi_ious > self.roi_sampler_cfg.CLS_FG_THRESH).long() ignore_mask = (batch_roi_ious > self.roi_sampler_cfg.CLS_BG_THRESH) & \ (batch_roi_ious < self.roi_sampler_cfg.CLS_FG_THRESH) batch_cls_labels[ignore_mask > 0] = -1 elif self.roi_sampler_cfg.CLS_SCORE_TYPE == 'roi_ious' or self.roi_sampler_cfg.CLS_SCORE_TYPE == 'roi_mious': iou_bg_thresh = self.roi_sampler_cfg.CLS_BG_THRESH iou_fg_thresh = self.roi_sampler_cfg.CLS_FG_THRESH fg_mask = batch_roi_ious > iou_fg_thresh bg_mask = batch_roi_ious < iou_bg_thresh interval_mask = (fg_mask == 0) & (bg_mask == 0) batch_cls_labels = (fg_mask > 0).float() batch_cls_labels[interval_mask] = \ (batch_roi_ious[interval_mask] - iou_bg_thresh) / (iou_fg_thresh - iou_bg_thresh) elif self.roi_sampler_cfg.CLS_SCORE_TYPE == 'roi_ioud': iou_bg_thresh = self.roi_sampler_cfg.CLS_BG_THRESH iou_fg_thresh = self.roi_sampler_cfg.CLS_FG_THRESH fg_mask = batch_roi_ious > iou_fg_thresh bg_mask = batch_roi_ious < iou_bg_thresh interval_mask = (fg_mask == 0) & (bg_mask == 0) batch_cls_labels = (fg_mask > 0).float() batch_cls_labels[interval_mask] = \ (batch_roi_ious[interval_mask] - iou_bg_thresh) / (iou_fg_thresh - iou_bg_thresh) ang_roi = batch_rois[...,6] ang_gt = batch_gt_of_rois[...,6] ang_roi = self.limit(ang_roi) ang_gt = self.limit(ang_gt) ang_target = self.ang_weight(ang_roi,ang_gt) ang_target = torch.clamp(ang_target,0.0,0.8)/0.8 batch_cls_labels*=ang_target else: raise NotImplementedError targets_dict = {'rois': batch_rois, 'gt_of_rois': batch_gt_of_rois, 'gt_iou_of_rois': batch_roi_ious, 'roi_scores': batch_roi_scores, 'roi_labels': batch_roi_labels, 'reg_valid_mask': reg_valid_mask, 'gt_bbs_mask': batch_gt_bbs_mask, 'rcnn_cls_labels': batch_cls_labels} return targets_dict def sample_rois_for_rcnn(self, batch_dict): """ Args: batch_dict: batch_size: rois: (B, num_rois, 7 + C) roi_scores: (B, num_rois) gt_boxes: (B, N, 7 + C + 1) roi_labels: (B, num_rois) Returns: """ batch_size = batch_dict['batch_size'] rois = batch_dict['rois'] roi_scores = batch_dict['roi_scores'] roi_labels = batch_dict['roi_labels'] gt_tracklets = batch_dict['gt_tracklets'] num_frame = gt_tracklets.shape[-1]//7 gt_bbs_mask = batch_dict['gt_bbs_mask'] gt_code_size = gt_tracklets.shape[-1] roi_code_size = rois.shape[-1] batch_rois = rois.new_zeros(batch_size, self.roi_sampler_cfg.ROI_PER_IMAGE, roi_code_size) batch_gt_of_rois = rois.new_zeros(batch_size, self.roi_sampler_cfg.ROI_PER_IMAGE, gt_code_size ) batch_roi_scores = rois.new_zeros(batch_size, self.roi_sampler_cfg.ROI_PER_IMAGE) batch_roi_labels = rois.new_zeros((batch_size, self.roi_sampler_cfg.ROI_PER_IMAGE), dtype=torch.long) batch_all_roi_ious = rois.new_zeros(batch_size, self.roi_sampler_cfg.ROI_PER_IMAGE,num_frame) batch_gt_bbs_mask = rois.new_zeros(batch_size, self.roi_sampler_cfg.ROI_PER_IMAGE, num_frame) for index in range(batch_size): cur_roi, cur_gt, cur_roi_labels, cur_roi_scores,cur_bbs_mask = \ rois[index], gt_tracklets[index], roi_labels[index], roi_scores[index],gt_bbs_mask[index] k = cur_gt.__len__() - 1 while k > 0 and cur_gt[k].sum() == 0: k -= 1 cur_gt = cur_gt[:k + 1] cur_gt = cur_gt.new_zeros((1, cur_gt.shape[1])) if len(cur_gt) == 0 else cur_gt cur_bbs_mask = cur_bbs_mask.new_zeros((1, cur_bbs_mask.shape[1])) if len(cur_bbs_mask) == 0 else cur_bbs_mask if self.roi_sampler_cfg.get('SAMPLE_ROI_BY_EACH_CLASS', False): max_overlaps, gt_assignment = self.get_max_iou_with_same_class( rois=cur_roi[:,0:7], roi_labels=cur_roi_labels, gt_boxes=cur_gt[:, 0:7], gt_labels=cur_gt[:, -1].long() ) else: iou3d = iou3d_nms_utils.boxes_iou3d_gpu(cur_roi[:, 0:7], cur_gt[:, 0:7]) # (M, N) max_overlaps, gt_assignment = torch.max(iou3d, dim=1) sampled_inds = self.subsample_rois(max_overlaps=max_overlaps) batch_rois[index] = cur_roi[sampled_inds] batch_roi_labels[index] = cur_roi_labels[sampled_inds] batch_roi_scores[index] = cur_roi_scores[sampled_inds] batch_gt_of_rois[index] = cur_gt[gt_assignment[sampled_inds]] batch_all_roi_ious[index,:,0] = max_overlaps[sampled_inds] batch_gt_bbs_mask[index] = cur_bbs_mask[gt_assignment[sampled_inds]] for i in range(1, num_frame): for j in range(batch_size): this_roi = batch_rois[j,:,i*7:i*7+7] this_gt_of_roi = batch_gt_of_rois[j,:,i*7:i*7+7] all_ious = iou3d_nms_utils.boxes_iou3d_gpu(this_roi[:, 0:7], this_gt_of_roi[:, 0:7]) box_num = this_roi.shape[0] ious = all_ious[range(box_num),range(box_num)] batch_all_roi_ious[j,:,i] = ious tracks_mean_ious = batch_all_roi_ious.sum(-1)/(batch_gt_bbs_mask.sum(-1)+0.00001) return batch_rois, batch_gt_of_rois,batch_all_roi_ious[...,0], tracks_mean_ious, batch_roi_scores, batch_roi_labels, batch_gt_bbs_mask def subsample_rois(self, max_overlaps): # sample fg, easy_bg, hard_bg fg_rois_per_image = int(np.round(self.roi_sampler_cfg.FG_RATIO * self.roi_sampler_cfg.ROI_PER_IMAGE)) fg_thresh = min(self.roi_sampler_cfg.REG_FG_THRESH, self.roi_sampler_cfg.CLS_FG_THRESH) fg_inds = ((max_overlaps >= fg_thresh)).nonzero().view(-1) easy_bg_inds = ((max_overlaps < self.roi_sampler_cfg.CLS_BG_THRESH_LO)).nonzero().view(-1) hard_bg_inds = ((max_overlaps < self.roi_sampler_cfg.REG_FG_THRESH) & (max_overlaps >= self.roi_sampler_cfg.CLS_BG_THRESH_LO)).nonzero().view(-1) fg_num_rois = fg_inds.numel() bg_num_rois = hard_bg_inds.numel() + easy_bg_inds.numel() if fg_num_rois > 0 and bg_num_rois > 0: # sampling fg fg_rois_per_this_image = min(fg_rois_per_image, fg_num_rois) rand_num = torch.from_numpy(np.random.permutation(fg_num_rois)).type_as(max_overlaps).long() fg_inds = fg_inds[rand_num[:fg_rois_per_this_image]] # sampling bg bg_rois_per_this_image = self.roi_sampler_cfg.ROI_PER_IMAGE - fg_rois_per_this_image bg_inds = self.sample_bg_inds( hard_bg_inds, easy_bg_inds, bg_rois_per_this_image, self.roi_sampler_cfg.HARD_BG_RATIO ) elif fg_num_rois > 0 and bg_num_rois == 0: # sampling fg rand_num = np.floor(np.random.rand(self.roi_sampler_cfg.ROI_PER_IMAGE) * fg_num_rois) rand_num = torch.from_numpy(rand_num).type_as(max_overlaps).long() fg_inds = fg_inds[rand_num] bg_inds = [] elif bg_num_rois > 0 and fg_num_rois == 0: # sampling bg bg_rois_per_this_image = self.roi_sampler_cfg.ROI_PER_IMAGE bg_inds = self.sample_bg_inds( hard_bg_inds, easy_bg_inds, bg_rois_per_this_image, self.roi_sampler_cfg.HARD_BG_RATIO ) else: print('maxoverlaps:(min=%f, max=%f)' % (max_overlaps.min().item(), max_overlaps.max().item())) print('ERROR: FG=%d, BG=%d' % (fg_num_rois, bg_num_rois)) raise NotImplementedError sampled_inds = torch.cat((fg_inds, bg_inds), dim=0) return sampled_inds @staticmethod def sample_bg_inds(hard_bg_inds, easy_bg_inds, bg_rois_per_this_image, hard_bg_ratio): if hard_bg_inds.numel() > 0 and easy_bg_inds.numel() > 0: hard_bg_rois_num = min(int(bg_rois_per_this_image * hard_bg_ratio), len(hard_bg_inds)) easy_bg_rois_num = bg_rois_per_this_image - hard_bg_rois_num # sampling hard bg rand_idx = torch.randint(low=0, high=hard_bg_inds.numel(), size=(hard_bg_rois_num,)).long() hard_bg_inds = hard_bg_inds[rand_idx] # sampling easy bg rand_idx = torch.randint(low=0, high=easy_bg_inds.numel(), size=(easy_bg_rois_num,)).long() easy_bg_inds = easy_bg_inds[rand_idx] bg_inds = torch.cat([hard_bg_inds, easy_bg_inds], dim=0) elif hard_bg_inds.numel() > 0 and easy_bg_inds.numel() == 0: hard_bg_rois_num = bg_rois_per_this_image # sampling hard bg rand_idx = torch.randint(low=0, high=hard_bg_inds.numel(), size=(hard_bg_rois_num,)).long() bg_inds = hard_bg_inds[rand_idx] elif hard_bg_inds.numel() == 0 and easy_bg_inds.numel() > 0: easy_bg_rois_num = bg_rois_per_this_image # sampling easy bg rand_idx = torch.randint(low=0, high=easy_bg_inds.numel(), size=(easy_bg_rois_num,)).long() bg_inds = easy_bg_inds[rand_idx] else: raise NotImplementedError return bg_inds @staticmethod def get_max_iou_with_same_class(rois, roi_labels, gt_boxes, gt_labels): """ Args: rois: (N, 7) roi_labels: (N) gt_boxes: (N, ) gt_labels: Returns: """ """ :param rois: (N, 7) :param roi_labels: (N) :param gt_boxes: (N, 8) :return: """ max_overlaps = rois.new_zeros(rois.shape[0]) gt_assignment = roi_labels.new_zeros(roi_labels.shape[0]) for k in range(gt_labels.min().item(), gt_labels.max().item() + 1): roi_mask = (roi_labels == k) gt_mask = (gt_labels == k) if roi_mask.sum() > 0 and gt_mask.sum() > 0: cur_roi = rois[roi_mask] cur_gt = gt_boxes[gt_mask] original_gt_assignment = gt_mask.nonzero().view(-1) iou3d = iou3d_nms_utils.boxes_iou3d_gpu(cur_roi[:,0:7], cur_gt[:,0:7]) # (M, N) cur_max_overlaps, cur_gt_assignment = torch.max(iou3d, dim=1) max_overlaps[roi_mask] = cur_max_overlaps gt_assignment[roi_mask] = original_gt_assignment[cur_gt_assignment] return max_overlaps, gt_assignment ================================================ FILE: pcdet/models/roi_heads/ted_head.py ================================================ import torch import torch.nn as nn from .roi_head_template import RoIHeadTemplate from ...utils import common_utils, spconv_utils from ...ops.pointnet2.pointnet2_stack import voxel_pool_modules as voxelpool_stack_modules from torch.autograd import Variable import torch.nn.functional as F import numpy as np from functools import partial import pickle import copy from pcdet.datasets.augmentor.X_transform import X_TRANS class PositionalEmbedding(nn.Module): def __init__(self, demb=256): super(PositionalEmbedding, self).__init__() self.demb = demb inv_freq = 1 / (10000 ** (torch.arange(0.0, demb, 2.0) / demb)) self.register_buffer('inv_freq', inv_freq) # pos_seq = pos_seq = torch.arange(seq_len-1, -1, -1.0) def forward(self, pos_seq, batch_size=2): sinusoid_inp = torch.ger(pos_seq, self.inv_freq) pos_emb = torch.cat([sinusoid_inp.sin(), sinusoid_inp.cos()], dim=-1) if batch_size is not None: return pos_emb[:, None, :].expand(-1, batch_size, -1) else: return pos_emb[:, None, :] class CrossAttention(nn.Module): def __init__(self, hidden_dim, pos = True, head = 4): super(CrossAttention, self).__init__() self.hidden_dim = hidden_dim self.pos_dim = 8 self.pos = pos if self.pos: self.pos_en = PositionalEmbedding(self.pos_dim) self.Q_linear = nn.Linear(hidden_dim+self.pos_dim, hidden_dim, bias=False) self.K_linear = nn.Linear(hidden_dim+self.pos_dim, hidden_dim, bias=False) self.V_linear = nn.Linear(hidden_dim+self.pos_dim, hidden_dim, bias=False) else: self.Q_linear = nn.Linear(hidden_dim, hidden_dim, bias=False) self.K_linear = nn.Linear(hidden_dim, hidden_dim, bias=False) self.V_linear = nn.Linear(hidden_dim, hidden_dim, bias=False) self.att = nn.MultiheadAttention(hidden_dim, head) def forward(self, inputs, Q_in): # N,B,C batch_size = inputs.shape[1] seq_len = inputs.shape[0] if self.pos: pos_input = torch.from_numpy(np.arange(seq_len)+1).cuda() pos_input = self.pos_en(pos_input, batch_size) inputs_pos = torch.cat([inputs, pos_input], -1) pos_Q = torch.from_numpy(np.array([seq_len])).cuda() pos_Q = self.pos_en(pos_Q, batch_size) Q_in_pos = torch.cat([Q_in, pos_Q], -1) else: inputs_pos = inputs Q_in_pos = Q_in Q = self.Q_linear(Q_in_pos) K = self.K_linear(inputs_pos) V = self.V_linear(inputs_pos) out = self.att(Q, K, V) return out[0] class Attention_Layer(nn.Module): def __init__(self, hidden_dim): super(Attention_Layer, self).__init__() self.hidden_dim = hidden_dim self.Q_linear = nn.Linear(hidden_dim, hidden_dim, bias=False) self.K_linear = nn.Linear(hidden_dim, hidden_dim, bias=False) self.V_linear = nn.Linear(hidden_dim, hidden_dim, bias=False) def forward(self, inputs): # B,K,N Q = self.Q_linear(inputs) K = self.K_linear(inputs).permute(0, 2, 1) V = self.V_linear(inputs) alpha = torch.matmul(Q, K) alpha = F.softmax(alpha, dim=2) out = torch.matmul(alpha, V) out = torch.mean(out, -2) return out def gen_sample_grid(rois, grid_size=7, grid_offsets=(0, 0), spatial_scale=1.): faked_features = rois.new_ones((grid_size, grid_size)) N = rois.shape[0] dense_idx = faked_features.nonzero() # (N, 2) [x_idx, y_idx] dense_idx = dense_idx.repeat(N, 1, 1).float() # (B, 7 * 7, 2) local_roi_size = rois.view(N, -1)[:, 3:5] local_roi_grid_points = (dense_idx ) / (grid_size-1) * local_roi_size.unsqueeze(dim=1) \ - (local_roi_size.unsqueeze(dim=1) / 2) # (B, 7 * 7, 2) ones = torch.ones_like(local_roi_grid_points[..., 0:1]) local_roi_grid_points = torch.cat([local_roi_grid_points, ones], -1) global_roi_grid_points = common_utils.rotate_points_along_z( local_roi_grid_points.clone(), rois[:, 6] ).squeeze(dim=1) global_center = rois[:, 0:3].clone() global_roi_grid_points += global_center.unsqueeze(dim=1) x = global_roi_grid_points[..., 0:1] y = global_roi_grid_points[..., 1:2] x = (x.permute(1, 2, 0).contiguous() + grid_offsets[0]) * spatial_scale y = (y.permute(1, 2, 0).contiguous() + grid_offsets[1]) * spatial_scale return x.view(grid_size**2, -1), y.view(grid_size**2, -1) def bilinear_interpolate_torch_gridsample(image, samples_x, samples_y): C, H, W = image.shape image = image.unsqueeze(1) # change to: C x 1 x H x W C,K,1,2 C,K,1,1 samples_x = samples_x.unsqueeze(2) samples_x = samples_x.unsqueeze(3)# 49,K,1,1 samples_y = samples_y.unsqueeze(2) samples_y = samples_y.unsqueeze(3) samples = torch.cat([samples_x, samples_y], 3) samples[:, :, :, 0] = (samples[:, :, :, 0] / W) # normalize to between 0 and 1 samples[:, :, :, 1] = (samples[:, :, :, 1] / H) # normalize to between 0 and 1 samples = samples * 2 - 1 # normalize to between -1 and 1 # 49,K,1,2 #B,C,H,W #B,H,W,2 #B,C,H,W return torch.nn.functional.grid_sample(image, samples, align_corners=False) class TEDSHead(RoIHeadTemplate): def __init__(self, input_channels, model_cfg, point_cloud_range=None, voxel_size=None, num_class=1, **kwargs): super().__init__(num_class=num_class, model_cfg=model_cfg) self.model_cfg = model_cfg self.pool_cfg = model_cfg.ROI_GRID_POOL LAYER_cfg = self.pool_cfg.POOL_LAYERS self.point_cloud_range = point_cloud_range self.voxel_size = voxel_size self.rot_num = 3 self.x_trans_train = X_TRANS() c_out = 0 self.roi_grid_pool_layers = nn.ModuleList() for src_name in self.pool_cfg.FEATURES_SOURCE: mlps = LAYER_cfg[src_name].MLPS for k in range(len(mlps)): mlps[k] = [input_channels[src_name]] + mlps[k] pool_layer = voxelpool_stack_modules.NeighborVoxelSAModuleMSG( query_ranges=LAYER_cfg[src_name].QUERY_RANGES, nsamples=LAYER_cfg[src_name].NSAMPLE, radii=LAYER_cfg[src_name].POOL_RADIUS, mlps=mlps, pool_method=LAYER_cfg[src_name].POOL_METHOD, ) self.roi_grid_pool_layers.append(pool_layer) c_out += sum([x[-1] for x in mlps]) GRID_SIZE = self.model_cfg.ROI_GRID_POOL.GRID_SIZE pre_channel = GRID_SIZE * GRID_SIZE * GRID_SIZE * c_out shared_fc_list = [] for k in range(0, self.model_cfg.SHARED_FC.__len__()): shared_fc_list.extend([ nn.Linear(pre_channel, self.model_cfg.SHARED_FC[k], bias=False), nn.BatchNorm1d(self.model_cfg.SHARED_FC[k]), nn.ReLU(inplace=True) ]) pre_channel = self.model_cfg.SHARED_FC[k] if k != self.model_cfg.SHARED_FC.__len__() - 1 and self.model_cfg.DP_RATIO > 0: shared_fc_list.append(nn.Dropout(self.model_cfg.DP_RATIO)) self.shared_fc_layers=nn.Sequential(*shared_fc_list) self.shared_channel = pre_channel pre_channel = self.model_cfg.SHARED_FC[-1] * 2 cls_fc_list = [] for k in range(0, self.model_cfg.CLS_FC.__len__()): cls_fc_list.extend([ nn.Linear(pre_channel, self.model_cfg.CLS_FC[k], bias=False), nn.BatchNorm1d(self.model_cfg.CLS_FC[k]), nn.ReLU() ]) pre_channel = self.model_cfg.CLS_FC[k] if k != self.model_cfg.CLS_FC.__len__() - 1 and self.model_cfg.DP_RATIO > 0: cls_fc_list.append(nn.Dropout(self.model_cfg.DP_RATIO)) cls_fc_list.append(nn.Linear(pre_channel, self.num_class, bias=True)) self.cls_layers=nn.Sequential(*cls_fc_list) pre_channel = self.model_cfg.SHARED_FC[-1] * 2 reg_fc_list = [] for k in range(0, self.model_cfg.REG_FC.__len__()): reg_fc_list.extend([ nn.Linear(pre_channel, self.model_cfg.REG_FC[k], bias=False), nn.BatchNorm1d(self.model_cfg.REG_FC[k]), nn.ReLU() ]) pre_channel = self.model_cfg.REG_FC[k] if k != self.model_cfg.REG_FC.__len__() - 1 and self.model_cfg.DP_RATIO > 0: reg_fc_list.append(nn.Dropout(self.model_cfg.DP_RATIO)) reg_fc_list.append(nn.Linear(pre_channel, self.box_coder.code_size * self.num_class, bias=True)) reg_fc_layers = nn.Sequential(*reg_fc_list) self.reg_layers=reg_fc_layers self.cross_attention_layers = Attention_Layer(self.shared_channel) self.init_weights() self.ious = {0: [], 1: [], 2: [], 3: []} def init_weights(self): init_func = nn.init.xavier_normal_ for trans_module in [self.cls_layers, self.reg_layers]: for m in trans_module.modules(): if isinstance(m, nn.Linear): init_func(m.weight) if m.bias is not None: nn.init.constant_(m.bias, 0) for trans_module in [self.cls_layers, self.reg_layers]: nn.init.normal_(trans_module[-1].weight, 0, 0.01) nn.init.constant_(trans_module[-1].bias, 0) for m in self.shared_fc_layers.modules(): if isinstance(m, nn.Linear): init_func(m.weight) if m.bias is not None: nn.init.constant_(m.bias, 0) def roi_grid_pool(self, batch_dict, i): """ Args: batch_dict: batch_size: rois: (B, num_rois, 7 + C) point_coords: (num_points, 4) [bs_idx, x, y, z] point_features: (num_points, C) point_cls_scores: (N1 + N2 + N3 + ..., 1) point_part_offset: (N1 + N2 + N3 + ..., 3) Returns: """ if i==0: rot_num_id = '' else: rot_num_id = str(i) rois = batch_dict['rois'].clone() batch_size = batch_dict['batch_size'] with_vf_transform = batch_dict.get('with_voxel_feature_transform', False) roi_grid_xyz, _ = self.get_global_grid_points_of_roi( rois, grid_size=self.pool_cfg.GRID_SIZE ) # (BxN, 6x6x6, 3) # roi_grid_xyz: (B, Nx6x6x6, 3) roi_grid_xyz = roi_grid_xyz.view(batch_size, -1, 3) # compute the voxel coordinates of grid points roi_grid_coords_x = (roi_grid_xyz[:, :, 0:1] - self.point_cloud_range[0]) // self.voxel_size[0] roi_grid_coords_y = (roi_grid_xyz[:, :, 1:2] - self.point_cloud_range[1]) // self.voxel_size[1] roi_grid_coords_z = (roi_grid_xyz[:, :, 2:3] - self.point_cloud_range[2]) // self.voxel_size[2] # roi_grid_coords: (B, Nx6x6x6, 3) roi_grid_coords = torch.cat([roi_grid_coords_x, roi_grid_coords_y, roi_grid_coords_z], dim=-1) batch_idx = rois.new_zeros(batch_size, roi_grid_coords.shape[1], 1) for bs_idx in range(batch_size): batch_idx[bs_idx, :, 0] = bs_idx # roi_grid_coords: (B, Nx6x6x6, 4) # roi_grid_coords = torch.cat([batch_idx, roi_grid_coords], dim=-1) # roi_grid_coords = roi_grid_coords.int() roi_grid_batch_cnt = rois.new_zeros(batch_size).int().fill_(roi_grid_coords.shape[1]) pooled_features_list = [] for k, src_name in enumerate(self.pool_cfg.FEATURES_SOURCE): pool_layer = self.roi_grid_pool_layers[k] if src_name in ['x_conv1', 'x_conv2', 'x_conv3', 'x_conv4']: cur_stride = batch_dict['multi_scale_3d_strides'][src_name] j=i while 'multi_scale_3d_features'+rot_num_id not in batch_dict: j-=1 rot_num_id = str(j) cur_sp_tensors = batch_dict['multi_scale_3d_features'+rot_num_id][src_name] if with_vf_transform: cur_sp_tensors = batch_dict['multi_scale_3d_features_post'][src_name] else: cur_sp_tensors = batch_dict['multi_scale_3d_features'+rot_num_id][src_name] # compute voxel center xyz and batch_cnt cur_coords = cur_sp_tensors.indices cur_voxel_xyz = common_utils.get_voxel_centers( cur_coords[:, 1:4], downsample_times=cur_stride, voxel_size=self.voxel_size, point_cloud_range=self.point_cloud_range ) # cur_voxel_xyz_batch_cnt = cur_voxel_xyz.new_zeros(batch_size).int() for bs_idx in range(batch_size): cur_voxel_xyz_batch_cnt[bs_idx] = (cur_coords[:, 0] == bs_idx).sum() # get voxel2point tensor v2p_ind_tensor = spconv_utils.generate_voxel2pinds(cur_sp_tensors) # compute the grid coordinates in this scale, in [batch_idx, x y z] order cur_roi_grid_coords = roi_grid_coords // cur_stride cur_roi_grid_coords = torch.cat([batch_idx, cur_roi_grid_coords], dim=-1) cur_roi_grid_coords = cur_roi_grid_coords.int() # voxel neighbor aggregation pooled_features = pool_layer( xyz=cur_voxel_xyz.contiguous(), xyz_batch_cnt=cur_voxel_xyz_batch_cnt, new_xyz=roi_grid_xyz.contiguous().view(-1, 3), new_xyz_batch_cnt=roi_grid_batch_cnt, new_coords=cur_roi_grid_coords.contiguous().view(-1, 4), features=cur_sp_tensors.features.contiguous(), voxel2point_indices=v2p_ind_tensor ) pooled_features = pooled_features.view( -1, self.pool_cfg.GRID_SIZE ** 3, pooled_features.shape[-1] ) # (BxN, 6x6x6, C) pooled_features_list.append(pooled_features) ms_pooled_features = torch.cat(pooled_features_list, dim=-1) return ms_pooled_features def get_global_grid_points_of_roi(self, rois, grid_size): rois = rois.view(-1, rois.shape[-1]) batch_size_rcnn = rois.shape[0] local_roi_grid_points = self.get_dense_grid_points(rois, batch_size_rcnn, grid_size) # (B, 6x6x6, 3) global_roi_grid_points = common_utils.rotate_points_along_z( local_roi_grid_points.clone(), rois[:, 6] ).squeeze(dim=1) global_center = rois[:, 0:3].clone() global_roi_grid_points += global_center.unsqueeze(dim=1) return global_roi_grid_points, local_roi_grid_points @staticmethod def get_dense_grid_points(rois, batch_size_rcnn, grid_size): faked_features = rois.new_ones((grid_size, grid_size, grid_size)) dense_idx = faked_features.nonzero() # (N, 3) [x_idx, y_idx, z_idx] dense_idx = dense_idx.repeat(batch_size_rcnn, 1, 1).float() # (B, 6x6x6, 3) local_roi_size = rois.view(batch_size_rcnn, -1)[:, 3:6] roi_grid_points = (dense_idx + 0.5) / grid_size * local_roi_size.unsqueeze(dim=1) \ - (local_roi_size.unsqueeze(dim=1) / 2) # (B, 6x6x6, 3) return roi_grid_points def roi_x_trans(self, rois, rot_num_i, transform_param): batch_size = len(rois) rois = rois.clone() x_transformed_roi = [] for bt_i in range(batch_size): cur_roi = rois[bt_i] bt_transform_param = transform_param[bt_i] previous_trans_param = bt_transform_param[rot_num_i-1] current_trans_param = bt_transform_param[rot_num_i] transed_roi = self.x_trans_train.backward_with_param({'boxes': cur_roi, 'transform_param': previous_trans_param}) transed_roi = self.x_trans_train.forward_with_param({'boxes': transed_roi['boxes'], 'transform_param': current_trans_param}) x_transformed_roi.append(transed_roi['boxes']) return torch.stack(x_transformed_roi) def pred_x_trans(self, preds, rot_num_i, transform_param): batch_size = len(preds) preds = preds.clone() x_transformed_roi = [] for bt_i in range(batch_size): cur_roi = preds[bt_i] bt_transform_param = transform_param[bt_i] current_trans_param = bt_transform_param[rot_num_i] transed_roi = self.x_trans_train.backward_with_param({'boxes': cur_roi, 'transform_param': current_trans_param}) x_transformed_roi.append(transed_roi['boxes']) return torch.stack(x_transformed_roi) def multi_grid_pool_aggregation(self, batch_dict, targets_dict): all_preds = [] all_scores = [] all_shared_features = [] for i in range(self.rot_num): rot_num_id = str(i) if i >= 1 and 'transform_param' in batch_dict: batch_dict['rois'] = self.roi_x_trans(batch_dict['rois'], i, batch_dict['transform_param']) if self.training: targets_dict = self.assign_targets(batch_dict, i, enable_dif=True) batch_dict['rois'] = targets_dict['rois'] batch_dict['roi_labels'] = targets_dict['roi_labels'] if 'transform_param' in batch_dict: pooled_features = self.roi_grid_pool(batch_dict, i) else: pooled_features = self.roi_grid_pool(batch_dict, 0) pooled_features = pooled_features.view(pooled_features.size(0), -1) shared_features = self.shared_fc_layers(pooled_features) shared_features = shared_features.unsqueeze(0) # 1,B,C all_shared_features.append(shared_features) pre_feat = torch.cat(all_shared_features, 0) attentive_cur_feat = self.cross_attention_layers(pre_feat.permute(1, 0, 2)).unsqueeze(0) attentive_cur_feat = torch.cat([attentive_cur_feat, shared_features], -1) attentive_cur_feat = attentive_cur_feat.squeeze(0) # B, C*2 rcnn_cls = self.cls_layers(attentive_cur_feat) rcnn_reg = self.reg_layers(attentive_cur_feat) batch_cls_preds, batch_box_preds = self.generate_predicted_boxes( batch_size=batch_dict['batch_size'], rois=batch_dict['rois'], cls_preds=rcnn_cls, box_preds=rcnn_reg ) if self.training: targets_dict['rcnn_cls'] = rcnn_cls targets_dict['rcnn_reg'] = rcnn_reg self.forward_ret_dict['targets_dict' + rot_num_id] = targets_dict batch_dict['rois'] = batch_box_preds batch_dict['roi_scores'] = batch_cls_preds.squeeze(-1) outs = batch_box_preds.clone() if 'transform_param' in batch_dict: outs = self.pred_x_trans(outs, i, batch_dict['transform_param']) all_preds.append(outs) all_scores.append(batch_cls_preds) return torch.mean(torch.stack(all_preds), 0), torch.mean(torch.stack(all_scores), 0) def forward(self, batch_dict): if 'transform_param' in batch_dict: trans_param = batch_dict['transform_param'] self.rot_num = trans_param.shape[1] targets_dict = self.proposal_layer( batch_dict, nms_config=self.model_cfg.NMS_CONFIG['TRAIN' if self.training else 'TEST'] ) boxes, scores = self.multi_grid_pool_aggregation(batch_dict, targets_dict) if not self.training: batch_dict['batch_box_preds'] = boxes batch_dict['batch_cls_preds'] = scores return batch_dict class TEDMHead(RoIHeadTemplate): def __init__(self, input_channels, model_cfg, point_cloud_range=None, voxel_size=None, num_class=1, **kwargs): super().__init__(num_class=num_class, model_cfg=model_cfg) self.model_cfg = model_cfg self.pool_cfg = model_cfg.ROI_GRID_POOL self.pool_cfg_mm = model_cfg.ROI_GRID_POOL_MM LAYER_cfg = self.pool_cfg.POOL_LAYERS LAYER_cfg_mm = self.pool_cfg_mm.POOL_LAYERS self.point_cloud_range = point_cloud_range self.voxel_size = voxel_size self.rot_num = 3 self.x_trans_train = X_TRANS() c_out = 0 self.roi_grid_pool_layers = nn.ModuleList() for src_name in self.pool_cfg.FEATURES_SOURCE: mlps = LAYER_cfg[src_name].MLPS for k in range(len(mlps)): mlps[k] = [input_channels[src_name]] + mlps[k] pool_layer = voxelpool_stack_modules.NeighborVoxelSAModuleMSG( query_ranges=LAYER_cfg[src_name].QUERY_RANGES, nsamples=LAYER_cfg[src_name].NSAMPLE, radii=LAYER_cfg[src_name].POOL_RADIUS, mlps=mlps, pool_method=LAYER_cfg[src_name].POOL_METHOD, ) self.roi_grid_pool_layers.append(pool_layer) c_out += sum([x[-1] for x in mlps]) c_out_mm = 0 self.roi_grid_pool_layers_mm = nn.ModuleList() feat = self.pool_cfg_mm.get('FEAT_NUM', 1) for src_name in self.pool_cfg_mm.FEATURES_SOURCE: mlps = LAYER_cfg_mm[src_name].MLPS for k in range(len(mlps)): mlps[k] = [input_channels[src_name]*feat] + mlps[k] pool_layer = voxelpool_stack_modules.NeighborVoxelSAModuleMSG( query_ranges=LAYER_cfg_mm[src_name].QUERY_RANGES, nsamples=LAYER_cfg_mm[src_name].NSAMPLE, radii=LAYER_cfg_mm[src_name].POOL_RADIUS, mlps=mlps, pool_method=LAYER_cfg_mm[src_name].POOL_METHOD, ) self.roi_grid_pool_layers_mm.append(pool_layer) c_out_mm += sum([x[-1] for x in mlps]) self.shared_fc_layers = nn.ModuleList() for i in range(self.rot_num): GRID_SIZE = self.model_cfg.ROI_GRID_POOL.GRID_SIZE pre_channel = GRID_SIZE * GRID_SIZE * GRID_SIZE * c_out shared_fc_list = [] for k in range(0, self.model_cfg.SHARED_FC.__len__()): shared_fc_list.extend([ nn.Linear(pre_channel, self.model_cfg.SHARED_FC[k], bias=False), nn.BatchNorm1d(self.model_cfg.SHARED_FC[k]), nn.ReLU(inplace=True) ]) pre_channel = self.model_cfg.SHARED_FC[k] if k != self.model_cfg.SHARED_FC.__len__() - 1 and self.model_cfg.DP_RATIO > 0: shared_fc_list.append(nn.Dropout(self.model_cfg.DP_RATIO)) self.shared_fc_layers.append(nn.Sequential(*shared_fc_list)) break self.shared_fc_layers_mm = nn.ModuleList() for i in range(self.rot_num): GRID_SIZE = self.model_cfg.ROI_GRID_POOL_MM.GRID_SIZE pre_channel = GRID_SIZE * GRID_SIZE * GRID_SIZE * c_out_mm shared_fc_list = [] for k in range(0, self.model_cfg.SHARED_FC.__len__()): shared_fc_list.extend([ nn.Linear(pre_channel, self.model_cfg.SHARED_FC[k], bias=False), nn.BatchNorm1d(self.model_cfg.SHARED_FC[k]), nn.ReLU(inplace=True) ]) pre_channel = self.model_cfg.SHARED_FC[k] if k != self.model_cfg.SHARED_FC.__len__() - 1 and self.model_cfg.DP_RATIO > 0: shared_fc_list.append(nn.Dropout(self.model_cfg.DP_RATIO)) self.shared_fc_layers_mm.append(nn.Sequential(*shared_fc_list)) break self.shared_channel = pre_channel self.cls_layers = nn.ModuleList() self.reg_layers = nn.ModuleList() for i in range(self.rot_num): pre_channel = self.model_cfg.SHARED_FC[-1] * 2 * 2 cls_fc_list = [] for k in range(0, self.model_cfg.CLS_FC.__len__()): cls_fc_list.extend([ nn.Linear(pre_channel, self.model_cfg.CLS_FC[k], bias=False), nn.BatchNorm1d(self.model_cfg.CLS_FC[k]), nn.ReLU() ]) pre_channel = self.model_cfg.CLS_FC[k] if k != self.model_cfg.CLS_FC.__len__() - 1 and self.model_cfg.DP_RATIO > 0: cls_fc_list.append(nn.Dropout(self.model_cfg.DP_RATIO)) cls_fc_list.append(nn.Linear(pre_channel, self.num_class, bias=True)) cls_fc_layers = nn.Sequential(*cls_fc_list) self.cls_layers.append(cls_fc_layers) pre_channel = self.model_cfg.SHARED_FC[-1] * 2 * 2 reg_fc_list = [] for k in range(0, self.model_cfg.REG_FC.__len__()): reg_fc_list.extend([ nn.Linear(pre_channel, self.model_cfg.REG_FC[k], bias=False), nn.BatchNorm1d(self.model_cfg.REG_FC[k]), nn.ReLU() ]) pre_channel = self.model_cfg.REG_FC[k] if k != self.model_cfg.REG_FC.__len__() - 1 and self.model_cfg.DP_RATIO > 0: reg_fc_list.append(nn.Dropout(self.model_cfg.DP_RATIO)) reg_fc_list.append(nn.Linear(pre_channel, self.box_coder.code_size * self.num_class, bias=True)) reg_fc_layers = nn.Sequential(*reg_fc_list) self.reg_layers.append(reg_fc_layers) break self.cls_layers_P = nn.ModuleList() self.reg_layers_P = nn.ModuleList() for i in range(self.rot_num): pre_channel = self.model_cfg.SHARED_FC[-1] * 2 cls_fc_list = [] for k in range(0, self.model_cfg.CLS_FC.__len__()): cls_fc_list.extend([ nn.Linear(pre_channel, self.model_cfg.CLS_FC[k], bias=False), nn.BatchNorm1d(self.model_cfg.CLS_FC[k]), nn.ReLU() ]) pre_channel = self.model_cfg.CLS_FC[k] if k != self.model_cfg.CLS_FC.__len__() - 1 and self.model_cfg.DP_RATIO > 0: cls_fc_list.append(nn.Dropout(self.model_cfg.DP_RATIO)) cls_fc_list.append(nn.Linear(pre_channel, self.num_class, bias=True)) cls_fc_layers = nn.Sequential(*cls_fc_list) self.cls_layers_P.append(cls_fc_layers) pre_channel = self.model_cfg.SHARED_FC[-1] * 2 reg_fc_list = [] for k in range(0, self.model_cfg.REG_FC.__len__()): reg_fc_list.extend([ nn.Linear(pre_channel, self.model_cfg.REG_FC[k], bias=False), nn.BatchNorm1d(self.model_cfg.REG_FC[k]), nn.ReLU() ]) pre_channel = self.model_cfg.REG_FC[k] if k != self.model_cfg.REG_FC.__len__() - 1 and self.model_cfg.DP_RATIO > 0: reg_fc_list.append(nn.Dropout(self.model_cfg.DP_RATIO)) reg_fc_list.append(nn.Linear(pre_channel, self.box_coder.code_size * self.num_class, bias=True)) reg_fc_layers = nn.Sequential(*reg_fc_list) self.reg_layers_P.append(reg_fc_layers) break self.cls_layers_PI = nn.ModuleList() self.reg_layers_PI = nn.ModuleList() for i in range(self.rot_num): pre_channel = self.model_cfg.SHARED_FC[-1] * 2 cls_fc_list = [] for k in range(0, self.model_cfg.CLS_FC.__len__()): cls_fc_list.extend([ nn.Linear(pre_channel, self.model_cfg.CLS_FC[k], bias=False), nn.BatchNorm1d(self.model_cfg.CLS_FC[k]), nn.ReLU() ]) pre_channel = self.model_cfg.CLS_FC[k] if k != self.model_cfg.CLS_FC.__len__() - 1 and self.model_cfg.DP_RATIO > 0: cls_fc_list.append(nn.Dropout(self.model_cfg.DP_RATIO)) cls_fc_list.append(nn.Linear(pre_channel, self.num_class, bias=True)) cls_fc_layers = nn.Sequential(*cls_fc_list) self.cls_layers_PI.append(cls_fc_layers) pre_channel = self.model_cfg.SHARED_FC[-1] * 2 reg_fc_list = [] for k in range(0, self.model_cfg.REG_FC.__len__()): reg_fc_list.extend([ nn.Linear(pre_channel, self.model_cfg.REG_FC[k], bias=False), nn.BatchNorm1d(self.model_cfg.REG_FC[k]), nn.ReLU() ]) pre_channel = self.model_cfg.REG_FC[k] if k != self.model_cfg.REG_FC.__len__() - 1 and self.model_cfg.DP_RATIO > 0: reg_fc_list.append(nn.Dropout(self.model_cfg.DP_RATIO)) reg_fc_list.append(nn.Linear(pre_channel, self.box_coder.code_size * self.num_class, bias=True)) reg_fc_layers = nn.Sequential(*reg_fc_list) self.reg_layers_PI.append(reg_fc_layers) break if self.model_cfg.get('PART', False): self.grid_offsets = self.model_cfg.PART.GRID_OFFSETS self.featmap_stride = self.model_cfg.PART.FEATMAP_STRIDE part_inchannel = self.model_cfg.PART.IN_CHANNEL self.num_parts = self.model_cfg.PART.SIZE ** 2 self.conv_part = nn.Sequential( nn.Conv2d(part_inchannel, part_inchannel, 3, 1, padding=1, bias=False), nn.BatchNorm2d(part_inchannel, eps=1e-3, momentum=0.01), nn.ReLU(inplace=True), nn.Conv2d(part_inchannel, self.num_parts, 1, 1, padding=0, bias=False), ) self.gen_grid_fn = partial(gen_sample_grid, grid_offsets=self.grid_offsets, spatial_scale=1 / self.featmap_stride) self.cross_attention_layers = nn.ModuleList() for i in range(self.rot_num): this_mo = CrossAttention(self.shared_channel) # print(count_parameters(this_mo)) # input() self.cross_attention_layers.append(this_mo) self.cross_attention_layers_mm = nn.ModuleList() for i in range(self.rot_num): this_mo = CrossAttention(self.shared_channel) # print(count_parameters(this_mo)) # input() self.cross_attention_layers_mm.append(this_mo) self.init_weights() self.ious = {0: [], 1: [], 2: [], 3: []} def init_weights(self): init_func = nn.init.xavier_normal_ for module_list in [self.cls_layers, self.reg_layers]: for trans_module in module_list: for m in trans_module.modules(): if isinstance(m, nn.Linear): init_func(m.weight) if m.bias is not None: nn.init.constant_(m.bias, 0) for module_list in [self.cls_layers, self.reg_layers]: for trans_module in module_list: nn.init.normal_(trans_module[-1].weight, 0, 0.01) nn.init.constant_(trans_module[-1].bias, 0) for m in self.shared_fc_layers.modules(): if isinstance(m, nn.Linear): init_func(m.weight) if m.bias is not None: nn.init.constant_(m.bias, 0) def obtain_conf_preds(self, confi_im, anchors): confi = [] for i, im in enumerate(confi_im): boxes = anchors[i] im = confi_im[i] if len(boxes) == 0: confi.append(torch.empty(0).type_as(im)) else: (xs, ys) = self.gen_grid_fn(boxes) out = bilinear_interpolate_torch_gridsample(im, xs, ys) x = torch.mean(out, 0).view(-1, 1) confi.append(x) confi = torch.cat(confi) return confi def roi_part_pool(self, batch_dict, parts_feat): rois = batch_dict['rois_score'].clone() confi_preds = self.obtain_conf_preds(parts_feat, rois) return confi_preds def roi_grid_pool(self, batch_dict, i): """ Args: batch_dict: batch_size: rois: (B, num_rois, 7 + C) point_coords: (num_points, 4) [bs_idx, x, y, z] point_features: (num_points, C) point_cls_scores: (N1 + N2 + N3 + ..., 1) point_part_offset: (N1 + N2 + N3 + ..., 3) Returns: """ if i==0: rot_num_id = '' else: rot_num_id = str(i) rois = batch_dict['rois'].clone() batch_size = batch_dict['batch_size'] with_vf_transform = batch_dict.get('with_voxel_feature_transform', False) roi_grid_xyz, _ = self.get_global_grid_points_of_roi( rois, grid_size=self.pool_cfg.GRID_SIZE ) # (BxN, 6x6x6, 3) # roi_grid_xyz: (B, Nx6x6x6, 3) roi_grid_xyz = roi_grid_xyz.view(batch_size, -1, 3) # compute the voxel coordinates of grid points roi_grid_coords_x = (roi_grid_xyz[:, :, 0:1] - self.point_cloud_range[0]) // self.voxel_size[0] roi_grid_coords_y = (roi_grid_xyz[:, :, 1:2] - self.point_cloud_range[1]) // self.voxel_size[1] roi_grid_coords_z = (roi_grid_xyz[:, :, 2:3] - self.point_cloud_range[2]) // self.voxel_size[2] # roi_grid_coords: (B, Nx6x6x6, 3) roi_grid_coords = torch.cat([roi_grid_coords_x, roi_grid_coords_y, roi_grid_coords_z], dim=-1) batch_idx = rois.new_zeros(batch_size, roi_grid_coords.shape[1], 1) for bs_idx in range(batch_size): batch_idx[bs_idx, :, 0] = bs_idx # roi_grid_coords: (B, Nx6x6x6, 4) # roi_grid_coords = torch.cat([batch_idx, roi_grid_coords], dim=-1) # roi_grid_coords = roi_grid_coords.int() roi_grid_batch_cnt = rois.new_zeros(batch_size).int().fill_(roi_grid_coords.shape[1]) pooled_features_list = [] for k, src_name in enumerate(self.pool_cfg.FEATURES_SOURCE): pool_layer = self.roi_grid_pool_layers[k] if src_name in ['x_conv1', 'x_conv2', 'x_conv3', 'x_conv4']: cur_stride = batch_dict['multi_scale_3d_strides'][src_name] j=i while 'multi_scale_3d_features'+rot_num_id not in batch_dict: j-=1 rot_num_id = str(j) cur_sp_tensors = batch_dict['multi_scale_3d_features'+rot_num_id][src_name] if with_vf_transform: cur_sp_tensors = batch_dict['multi_scale_3d_features_post'][src_name] else: cur_sp_tensors = batch_dict['multi_scale_3d_features'+rot_num_id][src_name] # compute voxel center xyz and batch_cnt cur_coords = cur_sp_tensors.indices cur_voxel_xyz = common_utils.get_voxel_centers( cur_coords[:, 1:4], downsample_times=cur_stride, voxel_size=self.voxel_size, point_cloud_range=self.point_cloud_range ) # cur_voxel_xyz_batch_cnt = cur_voxel_xyz.new_zeros(batch_size).int() for bs_idx in range(batch_size): cur_voxel_xyz_batch_cnt[bs_idx] = (cur_coords[:, 0] == bs_idx).sum() # get voxel2point tensor v2p_ind_tensor = spconv_utils.generate_voxel2pinds(cur_sp_tensors) # compute the grid coordinates in this scale, in [batch_idx, x y z] order cur_roi_grid_coords = roi_grid_coords // cur_stride cur_roi_grid_coords = torch.cat([batch_idx, cur_roi_grid_coords], dim=-1) cur_roi_grid_coords = cur_roi_grid_coords.int() # voxel neighbor aggregation pooled_features = pool_layer( xyz=cur_voxel_xyz.contiguous(), xyz_batch_cnt=cur_voxel_xyz_batch_cnt, new_xyz=roi_grid_xyz.contiguous().view(-1, 3), new_xyz_batch_cnt=roi_grid_batch_cnt, new_coords=cur_roi_grid_coords.contiguous().view(-1, 4), features=cur_sp_tensors.features.contiguous(), voxel2point_indices=v2p_ind_tensor ) pooled_features = pooled_features.view( -1, self.pool_cfg.GRID_SIZE ** 3, pooled_features.shape[-1] ) # (BxN, 6x6x6, C) pooled_features_list.append(pooled_features) ms_pooled_features = torch.cat(pooled_features_list, dim=-1) return ms_pooled_features def roi_grid_pool_mm(self, batch_dict, i): """ Args: batch_dict: batch_size: rois: (B, num_rois, 7 + C) point_coords: (num_points, 4) [bs_idx, x, y, z] point_features: (num_points, C) point_cls_scores: (N1 + N2 + N3 + ..., 1) point_part_offset: (N1 + N2 + N3 + ..., 3) Returns: """ if i==0: rot_num_id = '' else: rot_num_id = str(i) rois = batch_dict['rois'].clone() #rois[:, 3:5] = rois[:, 3:5]*0.5 batch_size = batch_dict['batch_size'] with_vf_transform = batch_dict.get('with_voxel_feature_transform', False) roi_grid_xyz, _ = self.get_global_grid_points_of_roi( rois, grid_size=self.pool_cfg_mm.GRID_SIZE ) # (BxN, 6x6x6, 3) # roi_grid_xyz: (B, Nx6x6x6, 3) roi_grid_xyz = roi_grid_xyz.view(batch_size, -1, 3) # compute the voxel coordinates of grid points roi_grid_coords_x = (roi_grid_xyz[:, :, 0:1] - self.point_cloud_range[0]) // self.voxel_size[0] roi_grid_coords_y = (roi_grid_xyz[:, :, 1:2] - self.point_cloud_range[1]) // self.voxel_size[1] roi_grid_coords_z = (roi_grid_xyz[:, :, 2:3] - self.point_cloud_range[2]) // self.voxel_size[2] # roi_grid_coords: (B, Nx6x6x6, 3) roi_grid_coords = torch.cat([roi_grid_coords_x, roi_grid_coords_y, roi_grid_coords_z], dim=-1) batch_idx = rois.new_zeros(batch_size, roi_grid_coords.shape[1], 1) for bs_idx in range(batch_size): batch_idx[bs_idx, :, 0] = bs_idx # roi_grid_coords: (B, Nx6x6x6, 4) # roi_grid_coords = torch.cat([batch_idx, roi_grid_coords], dim=-1) # roi_grid_coords = roi_grid_coords.int() roi_grid_batch_cnt = rois.new_zeros(batch_size).int().fill_(roi_grid_coords.shape[1]) pooled_features_list = [] for k, src_name in enumerate(self.pool_cfg_mm.FEATURES_SOURCE): pool_layer = self.roi_grid_pool_layers_mm[k] if src_name in ['x_conv1', 'x_conv2', 'x_conv3', 'x_conv4']: cur_stride = batch_dict['multi_scale_3d_strides'][src_name] j=i while 'multi_scale_3d_features_mm'+rot_num_id not in batch_dict: j-=1 rot_num_id = str(j) cur_sp_tensors = batch_dict['multi_scale_3d_features_mm'+rot_num_id][src_name] if with_vf_transform: cur_sp_tensors = batch_dict['multi_scale_3d_features_post'][src_name] else: cur_sp_tensors = batch_dict['multi_scale_3d_features_mm'+rot_num_id][src_name] # compute voxel center xyz and batch_cnt cur_coords = cur_sp_tensors.indices cur_voxel_xyz = common_utils.get_voxel_centers( cur_coords[:, 1:4], downsample_times=cur_stride, voxel_size=self.voxel_size, point_cloud_range=self.point_cloud_range ) # cur_voxel_xyz_batch_cnt = cur_voxel_xyz.new_zeros(batch_size).int() for bs_idx in range(batch_size): cur_voxel_xyz_batch_cnt[bs_idx] = (cur_coords[:, 0] == bs_idx).sum() # get voxel2point tensor v2p_ind_tensor = spconv_utils.generate_voxel2pinds(cur_sp_tensors) # compute the grid coordinates in this scale, in [batch_idx, x y z] order cur_roi_grid_coords = roi_grid_coords // cur_stride cur_roi_grid_coords = torch.cat([batch_idx, cur_roi_grid_coords], dim=-1) cur_roi_grid_coords = cur_roi_grid_coords.int() # voxel neighbor aggregation pooled_features = pool_layer( xyz=cur_voxel_xyz.contiguous(), xyz_batch_cnt=cur_voxel_xyz_batch_cnt, new_xyz=roi_grid_xyz.contiguous().view(-1, 3), new_xyz_batch_cnt=roi_grid_batch_cnt, new_coords=cur_roi_grid_coords.contiguous().view(-1, 4), features=cur_sp_tensors.features.contiguous(), voxel2point_indices=v2p_ind_tensor ) pooled_features = pooled_features.view( -1, self.pool_cfg_mm.GRID_SIZE ** 3, pooled_features.shape[-1] ) # (BxN, 6x6x6, C) pooled_features_list.append(pooled_features) ms_pooled_features = torch.cat(pooled_features_list, dim=-1) return ms_pooled_features def get_global_grid_points_of_roi(self, rois, grid_size): rois = rois.view(-1, rois.shape[-1]) batch_size_rcnn = rois.shape[0] local_roi_grid_points = self.get_dense_grid_points(rois, batch_size_rcnn, grid_size) # (B, 6x6x6, 3) global_roi_grid_points = common_utils.rotate_points_along_z( local_roi_grid_points.clone(), rois[:, 6] ).squeeze(dim=1) global_center = rois[:, 0:3].clone() global_roi_grid_points += global_center.unsqueeze(dim=1) return global_roi_grid_points, local_roi_grid_points @staticmethod def get_dense_grid_points(rois, batch_size_rcnn, grid_size): faked_features = rois.new_ones((grid_size, grid_size, grid_size)) dense_idx = faked_features.nonzero() # (N, 3) [x_idx, y_idx, z_idx] dense_idx = dense_idx.repeat(batch_size_rcnn, 1, 1).float() # (B, 6x6x6, 3) local_roi_size = rois.view(batch_size_rcnn, -1)[:, 3:6] roi_grid_points = (dense_idx + 0.5) / grid_size * local_roi_size.unsqueeze(dim=1) \ - (local_roi_size.unsqueeze(dim=1) / 2) # (B, 6x6x6, 3) return roi_grid_points def roi_x_trans(self, rois, trans_i, transform_param): while trans_i>=len(transform_param[0]): trans_i-=1 batch_size = len(rois) rois = rois.clone() x_transformed_roi = [] for bt_i in range(batch_size): cur_roi = rois[bt_i] bt_transform_param = transform_param[bt_i] previous_trans_param = bt_transform_param[trans_i-1] current_trans_param = bt_transform_param[trans_i] transed_roi = self.x_trans_train.backward_with_param({'boxes': cur_roi, 'transform_param': previous_trans_param}) transed_roi = self.x_trans_train.forward_with_param({'boxes': transed_roi['boxes'], 'transform_param': current_trans_param}) x_transformed_roi.append(transed_roi['boxes']) return torch.stack(x_transformed_roi) def roi_score_trans(self, rois, trans_i, transform_param): while trans_i>=len(transform_param[0]): trans_i-=1 batch_size = len(rois) rois = rois.clone() x_transformed_roi = [] for bt_i in range(batch_size): cur_roi = rois[bt_i] bt_transform_param = transform_param[bt_i] previous_trans_param = bt_transform_param[0] current_trans_param = bt_transform_param[trans_i] transed_roi = self.x_trans_train.backward_with_param({'boxes': cur_roi, 'transform_param': current_trans_param}) transed_roi = self.x_trans_train.forward_with_param({'boxes': transed_roi['boxes'], 'transform_param': previous_trans_param}) x_transformed_roi.append(transed_roi['boxes']) return torch.stack(x_transformed_roi) def pred_x_trans(self, preds, trans_i, transform_param): while trans_i>=len(transform_param[0]): trans_i-=1 batch_size = len(preds) preds = preds.clone() x_transformed_roi = [] for bt_i in range(batch_size): cur_roi = preds[bt_i] bt_transform_param = transform_param[bt_i] current_trans_param = bt_transform_param[trans_i] transed_roi = self.x_trans_train.backward_with_param({'boxes': cur_roi, 'transform_param': current_trans_param}) x_transformed_roi.append(transed_roi['boxes']) return torch.stack(x_transformed_roi) def multi_grid_pool_aggregation(self, batch_dict, targets_dict): if self.model_cfg.get('PART', False): feat_2d = batch_dict['st_features_2d'] parts_feat = self.conv_part(feat_2d) all_preds = [] all_scores = [] all_shared_features = [] all_shared_features_mm = [] for i in range(self.rot_num): rot_num_id = str(i) if i >= 1 and 'transform_param' in batch_dict: batch_dict['rois'] = self.roi_x_trans(batch_dict['rois'], i, batch_dict['transform_param']) if self.training: targets_dict = self.assign_targets(batch_dict, i, enable_dif=True) targets_dict['aug_param'] = batch_dict['aug_param'] targets_dict['image_shape'] = batch_dict['image_shape'] targets_dict['calib'] = batch_dict['calib'] batch_dict['rois'] = targets_dict['rois'] batch_dict['roi_labels'] = targets_dict['roi_labels'] if i >= 1 and 'transform_param' in batch_dict: batch_dict['rois_score'] = self.roi_score_trans(batch_dict['rois'], i, batch_dict['transform_param']) else: batch_dict['rois_score'] = batch_dict['rois'] if self.model_cfg.get('PART', False): part_scores = self.roi_part_pool(batch_dict, parts_feat) if 'transform_param' in batch_dict: pooled_features = self.roi_grid_pool(batch_dict, i) pooled_features_mm = self.roi_grid_pool_mm(batch_dict, i) else: pooled_features = self.roi_grid_pool(batch_dict, 0) pooled_features_mm = self.roi_grid_pool_mm(batch_dict, 0) pooled_features = pooled_features.view(pooled_features.size(0), -1) shared_features = self.shared_fc_layers[0](pooled_features) shared_features = shared_features.unsqueeze(0) # 1,B,C all_shared_features.append(shared_features) pre_feat = torch.cat(all_shared_features, 0) cur_feat = self.cross_attention_layers[i](pre_feat, shared_features) cur_feat = torch.cat([cur_feat, shared_features], -1) cur_feat = cur_feat.squeeze(0) # B, C*2 pooled_features_mm = pooled_features_mm.view(pooled_features_mm.size(0), -1) shared_features_mm = self.shared_fc_layers_mm[0](pooled_features_mm) shared_features_mm = shared_features_mm.unsqueeze(0) # 1,B,C all_shared_features_mm.append(shared_features_mm) pre_feat_mm = torch.cat(all_shared_features_mm, 0) cur_feat_mm = self.cross_attention_layers_mm[i](pre_feat_mm, shared_features_mm) cur_feat_mm = torch.cat([cur_feat_mm, shared_features_mm], -1) cur_feat_mm = cur_feat_mm.squeeze(0) # B, C*2 final_feat = torch.cat([cur_feat_mm, cur_feat],-1) rcnn_cls = self.cls_layers[0](final_feat) rcnn_reg = self.reg_layers[0](final_feat) rcnn_cls_pi = self.cls_layers_PI[0](cur_feat_mm) rcnn_reg_pi = self.reg_layers_PI[0](cur_feat_mm) rcnn_cls_p = self.cls_layers_P[0](cur_feat) rcnn_reg_p = self.reg_layers_P[0](cur_feat) if self.model_cfg.get('PART', False): rcnn_cls = rcnn_cls+part_scores rcnn_cls_pi = rcnn_cls_pi+part_scores rcnn_cls_p = rcnn_cls_p+part_scores batch_cls_preds, batch_box_preds = self.generate_predicted_boxes( batch_size=batch_dict['batch_size'], rois=batch_dict['rois'], cls_preds=rcnn_cls, box_preds=rcnn_reg ) outs = batch_box_preds.clone() if 'transform_param' in batch_dict: outs = self.pred_x_trans(outs, i, batch_dict['transform_param']) all_preds.append(outs) all_scores.append(batch_cls_preds) if self.training: targets_dict_pi = copy.deepcopy(targets_dict) targets_dict_p = copy.deepcopy(targets_dict) targets_dict['rcnn_cls'] = rcnn_cls targets_dict['rcnn_reg'] = rcnn_reg targets_dict_pi['rcnn_cls'] = rcnn_cls_pi targets_dict_pi['rcnn_reg'] = rcnn_reg_pi targets_dict_p['rcnn_cls'] = rcnn_cls_p targets_dict_p['rcnn_reg'] = rcnn_reg_p self.forward_ret_dict['targets_dict' + rot_num_id] = targets_dict self.forward_ret_dict['targets_dict_pi' + rot_num_id] = targets_dict_pi self.forward_ret_dict['targets_dict_p' + rot_num_id] = targets_dict_p batch_dict['rois'] = batch_box_preds batch_dict['roi_scores'] = batch_cls_preds.squeeze(-1) return torch.mean(torch.stack(all_preds), 0), torch.mean(torch.stack(all_scores), 0) def forward(self, batch_dict): if 'transform_param' in batch_dict: trans_param = batch_dict['transform_param'] self.rot_num = trans_param.shape[1] targets_dict = self.proposal_layer( batch_dict, nms_config=self.model_cfg.NMS_CONFIG['TRAIN' if self.training else 'TEST'] ) boxes, scores = self.multi_grid_pool_aggregation(batch_dict,targets_dict) if not self.training: batch_dict['batch_box_preds'] = boxes batch_dict['batch_cls_preds'] = scores return batch_dict ================================================ FILE: pcdet/ops/dcn/__init__.py ================================================ from .deform_conv import (DeformConv, DeformConvPack, ModulatedDeformConv, ModulatedDeformConvPack, deform_conv, modulated_deform_conv) __all__ = [ 'DeformConv', 'DeformConvPack', 'ModulatedDeformConv', 'ModulatedDeformConvPack', 'deform_conv', 'modulated_deform_conv', ] ================================================ FILE: pcdet/ops/dcn/deform_conv.py ================================================ import math import torch import torch.nn as nn import torch.nn.functional as F from torch.autograd import Function from torch.autograd.function import once_differentiable from torch.nn.modules.utils import _pair, _single # from mmdet.utils import print_log from . import deform_conv_cuda class DeformConvFunction(Function): @staticmethod def forward(ctx, input, offset, weight, stride=1, padding=0, dilation=1, groups=1, deformable_groups=1, im2col_step=64): if input is not None and input.dim() != 4: raise ValueError( 'Expected 4D tensor as input, got {}D tensor instead.'.format( input.dim())) ctx.stride = _pair(stride) ctx.padding = _pair(padding) ctx.dilation = _pair(dilation) ctx.groups = groups ctx.deformable_groups = deformable_groups ctx.im2col_step = im2col_step ctx.save_for_backward(input, offset, weight) output = input.new_empty( DeformConvFunction._output_size(input, weight, ctx.padding, ctx.dilation, ctx.stride)) ctx.bufs_ = [input.new_empty(0), input.new_empty(0)] # columns, ones if not input.is_cuda: raise NotImplementedError else: cur_im2col_step = min(ctx.im2col_step, input.shape[0]) assert (input.shape[0] % cur_im2col_step) == 0, 'im2col step must divide batchsize' deform_conv_cuda.deform_conv_forward_cuda( input, weight, offset, output, ctx.bufs_[0], ctx.bufs_[1], weight.size(3), weight.size(2), ctx.stride[1], ctx.stride[0], ctx.padding[1], ctx.padding[0], ctx.dilation[1], ctx.dilation[0], ctx.groups, ctx.deformable_groups, cur_im2col_step) return output @staticmethod @once_differentiable def backward(ctx, grad_output): input, offset, weight = ctx.saved_tensors grad_input = grad_offset = grad_weight = None if not grad_output.is_cuda: raise NotImplementedError else: cur_im2col_step = min(ctx.im2col_step, input.shape[0]) assert (input.shape[0] % cur_im2col_step) == 0, 'im2col step must divide batchsize' if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]: grad_input = torch.zeros_like(input) grad_offset = torch.zeros_like(offset) deform_conv_cuda.deform_conv_backward_input_cuda( input, offset, grad_output, grad_input, grad_offset, weight, ctx.bufs_[0], weight.size(3), weight.size(2), ctx.stride[1], ctx.stride[0], ctx.padding[1], ctx.padding[0], ctx.dilation[1], ctx.dilation[0], ctx.groups, ctx.deformable_groups, cur_im2col_step) if ctx.needs_input_grad[2]: grad_weight = torch.zeros_like(weight) deform_conv_cuda.deform_conv_backward_parameters_cuda( input, offset, grad_output, grad_weight, ctx.bufs_[0], ctx.bufs_[1], weight.size(3), weight.size(2), ctx.stride[1], ctx.stride[0], ctx.padding[1], ctx.padding[0], ctx.dilation[1], ctx.dilation[0], ctx.groups, ctx.deformable_groups, 1, cur_im2col_step) return (grad_input, grad_offset, grad_weight, None, None, None, None, None) @staticmethod def _output_size(input, weight, padding, dilation, stride): channels = weight.size(0) output_size = (input.size(0), channels) for d in range(input.dim() - 2): in_size = input.size(d + 2) pad = padding[d] kernel = dilation[d] * (weight.size(d + 2) - 1) + 1 stride_ = stride[d] output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, ) if not all(map(lambda s: s > 0, output_size)): raise ValueError( 'convolution input is too small (output would be {})'.format( 'x'.join(map(str, output_size)))) return output_size class ModulatedDeformConvFunction(Function): @staticmethod def forward(ctx, input, offset, mask, weight, bias=None, stride=1, padding=0, dilation=1, groups=1, deformable_groups=1): ctx.stride = stride ctx.padding = padding ctx.dilation = dilation ctx.groups = groups ctx.deformable_groups = deformable_groups ctx.with_bias = bias is not None if not ctx.with_bias: bias = input.new_empty(1) # fake tensor if not input.is_cuda: raise NotImplementedError if weight.requires_grad or mask.requires_grad or offset.requires_grad \ or input.requires_grad: ctx.save_for_backward(input, offset, mask, weight, bias) output = input.new_empty( ModulatedDeformConvFunction._infer_shape(ctx, input, weight)) ctx._bufs = [input.new_empty(0), input.new_empty(0)] deform_conv_cuda.modulated_deform_conv_cuda_forward( input, weight, bias, ctx._bufs[0], offset, mask, output, ctx._bufs[1], weight.shape[2], weight.shape[3], ctx.stride, ctx.stride, ctx.padding, ctx.padding, ctx.dilation, ctx.dilation, ctx.groups, ctx.deformable_groups, ctx.with_bias) return output @staticmethod @once_differentiable def backward(ctx, grad_output): if not grad_output.is_cuda: raise NotImplementedError input, offset, mask, weight, bias = ctx.saved_tensors grad_input = torch.zeros_like(input) grad_offset = torch.zeros_like(offset) grad_mask = torch.zeros_like(mask) grad_weight = torch.zeros_like(weight) grad_bias = torch.zeros_like(bias) deform_conv_cuda.modulated_deform_conv_cuda_backward( input, weight, bias, ctx._bufs[0], offset, mask, ctx._bufs[1], grad_input, grad_weight, grad_bias, grad_offset, grad_mask, grad_output, weight.shape[2], weight.shape[3], ctx.stride, ctx.stride, ctx.padding, ctx.padding, ctx.dilation, ctx.dilation, ctx.groups, ctx.deformable_groups, ctx.with_bias) if not ctx.with_bias: grad_bias = None return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias, None, None, None, None, None) @staticmethod def _infer_shape(ctx, input, weight): n = input.size(0) channels_out = weight.size(0) height, width = input.shape[2:4] kernel_h, kernel_w = weight.shape[2:4] height_out = (height + 2 * ctx.padding - (ctx.dilation * (kernel_h - 1) + 1)) // ctx.stride + 1 width_out = (width + 2 * ctx.padding - (ctx.dilation * (kernel_w - 1) + 1)) // ctx.stride + 1 return n, channels_out, height_out, width_out deform_conv = DeformConvFunction.apply modulated_deform_conv = ModulatedDeformConvFunction.apply class DeformConv(nn.Module): def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, deformable_groups=1, bias=False): super(DeformConv, self).__init__() assert not bias assert in_channels % groups == 0, \ 'in_channels {} cannot be divisible by groups {}'.format( in_channels, groups) assert out_channels % groups == 0, \ 'out_channels {} cannot be divisible by groups {}'.format( out_channels, groups) self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = _pair(kernel_size) self.stride = _pair(stride) self.padding = _pair(padding) self.dilation = _pair(dilation) self.groups = groups self.deformable_groups = deformable_groups # enable compatibility with nn.Conv2d self.transposed = False self.output_padding = _single(0) self.weight = nn.Parameter( torch.Tensor(out_channels, in_channels // self.groups, *self.kernel_size)) self.reset_parameters() def reset_parameters(self): n = self.in_channels for k in self.kernel_size: n *= k stdv = 1. / math.sqrt(n) self.weight.data.uniform_(-stdv, stdv) def forward(self, x, offset): # To fix an assert error in deform_conv_cuda.cpp:128 # input image is smaller than kernel input_pad = ( x.size(2) < self.kernel_size[0] or x.size(3) < self.kernel_size[1]) if input_pad: pad_h = max(self.kernel_size[0] - x.size(2), 0) pad_w = max(self.kernel_size[1] - x.size(3), 0) x = F.pad(x, (0, pad_w, 0, pad_h), 'constant', 0).contiguous() offset = F.pad(offset, (0, pad_w, 0, pad_h), 'constant', 0).contiguous() out = deform_conv(x, offset, self.weight, self.stride, self.padding, self.dilation, self.groups, self.deformable_groups) if input_pad: out = out[:, :, :out.size(2) - pad_h, :out.size(3) - pad_w].contiguous() return out class DeformConvPack(DeformConv): """A Deformable Conv Encapsulation that acts as normal Conv layers. Args: in_channels (int): Same as nn.Conv2d. out_channels (int): Same as nn.Conv2d. kernel_size (int or tuple[int]): Same as nn.Conv2d. stride (int or tuple[int]): Same as nn.Conv2d. padding (int or tuple[int]): Same as nn.Conv2d. dilation (int or tuple[int]): Same as nn.Conv2d. groups (int): Same as nn.Conv2d. bias (bool or str): If specified as `auto`, it will be decided by the norm_cfg. Bias will be set as True if norm_cfg is None, otherwise False. """ _version = 2 def __init__(self, *args, **kwargs): super(DeformConvPack, self).__init__(*args, **kwargs) self.conv_offset = nn.Conv2d( self.in_channels, self.deformable_groups * 2 * self.kernel_size[0] * self.kernel_size[1], kernel_size=self.kernel_size, stride=_pair(self.stride), padding=_pair(self.padding), bias=True) self.init_offset() def init_offset(self): self.conv_offset.weight.data.zero_() self.conv_offset.bias.data.zero_() def forward(self, x): offset = self.conv_offset(x) return deform_conv(x, offset, self.weight, self.stride, self.padding, self.dilation, self.groups, self.deformable_groups) def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs): version = local_metadata.get('version', None) if version is None or version < 2: # the key is different in early versions # In version < 2, DeformConvPack loads previous benchmark models. if (prefix + 'conv_offset.weight' not in state_dict and prefix[:-1] + '_offset.weight' in state_dict): state_dict[prefix + 'conv_offset.weight'] = state_dict.pop( prefix[:-1] + '_offset.weight') if (prefix + 'conv_offset.bias' not in state_dict and prefix[:-1] + '_offset.bias' in state_dict): state_dict[prefix + 'conv_offset.bias'] = state_dict.pop(prefix[:-1] + '_offset.bias') if version is not None and version > 1: print_log( 'DeformConvPack {} is upgraded to version 2.'.format( prefix.rstrip('.')), logger='root') super()._load_from_state_dict(state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs) class ModulatedDeformConv(nn.Module): def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, deformable_groups=1, bias=True): super(ModulatedDeformConv, self).__init__() self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = _pair(kernel_size) self.stride = stride self.padding = padding self.dilation = dilation self.groups = groups self.deformable_groups = deformable_groups self.with_bias = bias # enable compatibility with nn.Conv2d self.transposed = False self.output_padding = _single(0) self.weight = nn.Parameter( torch.Tensor(out_channels, in_channels // groups, *self.kernel_size)) if bias: self.bias = nn.Parameter(torch.Tensor(out_channels)) else: self.register_parameter('bias', None) self.reset_parameters() def reset_parameters(self): n = self.in_channels for k in self.kernel_size: n *= k stdv = 1. / math.sqrt(n) self.weight.data.uniform_(-stdv, stdv) if self.bias is not None: self.bias.data.zero_() def forward(self, x, offset, mask): return modulated_deform_conv(x, offset, mask, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups, self.deformable_groups) class ModulatedDeformConvPack(ModulatedDeformConv): """A ModulatedDeformable Conv Encapsulation that acts as normal Conv layers. Args: in_channels (int): Same as nn.Conv2d. out_channels (int): Same as nn.Conv2d. kernel_size (int or tuple[int]): Same as nn.Conv2d. stride (int or tuple[int]): Same as nn.Conv2d. padding (int or tuple[int]): Same as nn.Conv2d. dilation (int or tuple[int]): Same as nn.Conv2d. groups (int): Same as nn.Conv2d. bias (bool or str): If specified as `auto`, it will be decided by the norm_cfg. Bias will be set as True if norm_cfg is None, otherwise False. """ _version = 2 def __init__(self, *args, **kwargs): super(ModulatedDeformConvPack, self).__init__(*args, **kwargs) self.conv_offset = nn.Conv2d( self.in_channels, self.deformable_groups * 3 * self.kernel_size[0] * self.kernel_size[1], kernel_size=self.kernel_size, stride=_pair(self.stride), padding=_pair(self.padding), bias=True) self.init_offset() def init_offset(self): self.conv_offset.weight.data.zero_() self.conv_offset.bias.data.zero_() def forward(self, x): out = self.conv_offset(x) o1, o2, mask = torch.chunk(out, 3, dim=1) offset = torch.cat((o1, o2), dim=1) mask = torch.sigmoid(mask) return modulated_deform_conv(x, offset, mask, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups, self.deformable_groups) def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs): version = local_metadata.get('version', None) if version is None or version < 2: # the key is different in early versions # In version < 2, ModulatedDeformConvPack # loads previous benchmark models. if (prefix + 'conv_offset.weight' not in state_dict and prefix[:-1] + '_offset.weight' in state_dict): state_dict[prefix + 'conv_offset.weight'] = state_dict.pop( prefix[:-1] + '_offset.weight') if (prefix + 'conv_offset.bias' not in state_dict and prefix[:-1] + '_offset.bias' in state_dict): state_dict[prefix + 'conv_offset.bias'] = state_dict.pop(prefix[:-1] + '_offset.bias') if version is not None and version > 1: print_log( 'ModulatedDeformConvPack {} is upgraded to version 2.'.format( prefix.rstrip('.')), logger='root') super()._load_from_state_dict(state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs) ================================================ FILE: pcdet/ops/dcn/setup.py ================================================ from setuptools import setup from torch.utils.cpp_extension import BuildExtension, CUDAExtension setup( name='masked_conv', ext_modules=[ CUDAExtension('deform_conv_cuda', [ 'src/deform_conv_cuda.cpp', 'src/deform_conv_cuda_kernel.cu', ], define_macros=[('WITH_CUDA', None)], extra_compile_args={ 'cxx': [], 'nvcc': [ '-D__CUDA_NO_HALF_OPERATORS__', '-D__CUDA_NO_HALF_CONVERSIONS__', '-D__CUDA_NO_HALF2_OPERATORS__', ]})], cmdclass={'build_ext': BuildExtension}) ================================================ FILE: pcdet/ops/dcn/src/deform_conv_cuda.cpp ================================================ // modify from // https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda.c #include #include #include #include void deformable_im2col(const at::Tensor data_im, const at::Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, at::Tensor data_col); void deformable_col2im(const at::Tensor data_col, const at::Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, at::Tensor grad_im); void deformable_col2im_coord( const at::Tensor data_col, const at::Tensor data_im, const at::Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, at::Tensor grad_offset); void modulated_deformable_im2col_cuda( const at::Tensor data_im, const at::Tensor data_offset, const at::Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kenerl_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, at::Tensor data_col); void modulated_deformable_col2im_cuda( const at::Tensor data_col, const at::Tensor data_offset, const at::Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kenerl_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, at::Tensor grad_im); void modulated_deformable_col2im_coord_cuda( const at::Tensor data_col, const at::Tensor data_im, const at::Tensor data_offset, const at::Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kenerl_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, at::Tensor grad_offset, at::Tensor grad_mask); void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput, at::Tensor weight, int kH, int kW, int dH, int dW, int padH, int padW, int dilationH, int dilationW, int group, int deformable_group) { TORCH_CHECK(weight.ndimension() == 4, "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, " "but got: %s", weight.ndimension()); TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); TORCH_CHECK(kW > 0 && kH > 0, "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); TORCH_CHECK((weight.size(2) == kH && weight.size(3) == kW), "kernel size should be consistent with weight, ", "but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d", kH, kW, weight.size(2), weight.size(3)); TORCH_CHECK(dW > 0 && dH > 0, "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); TORCH_CHECK( dilationW > 0 && dilationH > 0, "dilation should be greater than 0, but got dilationH: %d dilationW: %d", dilationH, dilationW); int ndim = input.ndimension(); int dimf = 0; int dimh = 1; int dimw = 2; if (ndim == 4) { dimf++; dimh++; dimw++; } TORCH_CHECK(ndim == 3 || ndim == 4, "3D or 4D input tensor expected but got: %s", ndim); long nInputPlane = weight.size(1) * group; long inputHeight = input.size(dimh); long inputWidth = input.size(dimw); long nOutputPlane = weight.size(0); long outputHeight = (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; long outputWidth = (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; TORCH_CHECK(nInputPlane % deformable_group == 0, "input channels must divide deformable group size"); if (outputWidth < 1 || outputHeight < 1) AT_ERROR( "Given input size: (%ld x %ld x %ld). " "Calculated output size: (%ld x %ld x %ld). Output size is too small", nInputPlane, inputHeight, inputWidth, nOutputPlane, outputHeight, outputWidth); TORCH_CHECK(input.size(1) == nInputPlane, "invalid number of input planes, expected: %d, but got: %d", nInputPlane, input.size(1)); TORCH_CHECK((inputHeight >= kH && inputWidth >= kW), "input image is smaller than kernel"); TORCH_CHECK((offset.size(2) == outputHeight && offset.size(3) == outputWidth), "invalid spatial size of offset, expected height: %d width: %d, but " "got height: %d width: %d", outputHeight, outputWidth, offset.size(2), offset.size(3)); TORCH_CHECK((offset.size(1) == deformable_group * 2 * kH * kW), "invalid number of channels of offset"); if (gradOutput != NULL) { TORCH_CHECK(gradOutput->size(dimf) == nOutputPlane, "invalid number of gradOutput planes, expected: %d, but got: %d", nOutputPlane, gradOutput->size(dimf)); TORCH_CHECK((gradOutput->size(dimh) == outputHeight && gradOutput->size(dimw) == outputWidth), "invalid size of gradOutput, expected height: %d width: %d , but " "got height: %d width: %d", outputHeight, outputWidth, gradOutput->size(dimh), gradOutput->size(dimw)); } } int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight, at::Tensor offset, at::Tensor output, at::Tensor columns, at::Tensor ones, int kW, int kH, int dW, int dH, int padW, int padH, int dilationW, int dilationH, int group, int deformable_group, int im2col_step) { // todo: resize columns to include im2col: done // todo: add im2col_step as input // todo: add new output buffer and transpose it to output (or directly // transpose output) todo: possibly change data indexing because of // parallel_imgs shape_check(input, offset, NULL, weight, kH, kW, dH, dW, padH, padW, dilationH, dilationW, group, deformable_group); at::DeviceGuard guard(input.device()); input = input.contiguous(); offset = offset.contiguous(); weight = weight.contiguous(); int batch = 1; if (input.ndimension() == 3) { // Force batch batch = 0; input.unsqueeze_(0); offset.unsqueeze_(0); } // todo: assert batchsize dividable by im2col_step long batchSize = input.size(0); long nInputPlane = input.size(1); long inputHeight = input.size(2); long inputWidth = input.size(3); long nOutputPlane = weight.size(0); long outputWidth = (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; long outputHeight = (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); output = output.view({batchSize / im2col_step, im2col_step, nOutputPlane, outputHeight, outputWidth}); columns = at::zeros( {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, input.options()); if (ones.ndimension() != 2 || ones.size(0) * ones.size(1) < outputHeight * outputWidth) { ones = at::ones({outputHeight, outputWidth}, input.options()); } input = input.view({batchSize / im2col_step, im2col_step, nInputPlane, inputHeight, inputWidth}); offset = offset.view({batchSize / im2col_step, im2col_step, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); at::Tensor output_buffer = at::zeros({batchSize / im2col_step, nOutputPlane, im2col_step * outputHeight, outputWidth}, output.options()); output_buffer = output_buffer.view( {output_buffer.size(0), group, output_buffer.size(1) / group, output_buffer.size(2), output_buffer.size(3)}); for (int elt = 0; elt < batchSize / im2col_step; elt++) { deformable_im2col(input[elt], offset[elt], nInputPlane, inputHeight, inputWidth, kH, kW, padH, padW, dH, dW, dilationH, dilationW, im2col_step, deformable_group, columns); columns = columns.view({group, columns.size(0) / group, columns.size(1)}); weight = weight.view({group, weight.size(0) / group, weight.size(1), weight.size(2), weight.size(3)}); for (int g = 0; g < group; g++) { output_buffer[elt][g] = output_buffer[elt][g] .flatten(1) .addmm_(weight[g].flatten(1), columns[g]) .view_as(output_buffer[elt][g]); } } output_buffer = output_buffer.view( {output_buffer.size(0), output_buffer.size(1) * output_buffer.size(2), output_buffer.size(3), output_buffer.size(4)}); output_buffer = output_buffer.view({batchSize / im2col_step, nOutputPlane, im2col_step, outputHeight, outputWidth}); output_buffer.transpose_(1, 2); output.copy_(output_buffer); output = output.view({batchSize, nOutputPlane, outputHeight, outputWidth}); input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); offset = offset.view( {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); if (batch == 0) { output = output.view({nOutputPlane, outputHeight, outputWidth}); input = input.view({nInputPlane, inputHeight, inputWidth}); offset = offset.view({offset.size(1), offset.size(2), offset.size(3)}); } return 1; } int deform_conv_backward_input_cuda(at::Tensor input, at::Tensor offset, at::Tensor gradOutput, at::Tensor gradInput, at::Tensor gradOffset, at::Tensor weight, at::Tensor columns, int kW, int kH, int dW, int dH, int padW, int padH, int dilationW, int dilationH, int group, int deformable_group, int im2col_step) { shape_check(input, offset, &gradOutput, weight, kH, kW, dH, dW, padH, padW, dilationH, dilationW, group, deformable_group); at::DeviceGuard guard(input.device()); input = input.contiguous(); offset = offset.contiguous(); gradOutput = gradOutput.contiguous(); weight = weight.contiguous(); int batch = 1; if (input.ndimension() == 3) { // Force batch batch = 0; input = input.view({1, input.size(0), input.size(1), input.size(2)}); offset = offset.view({1, offset.size(0), offset.size(1), offset.size(2)}); gradOutput = gradOutput.view( {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)}); } long batchSize = input.size(0); long nInputPlane = input.size(1); long inputHeight = input.size(2); long inputWidth = input.size(3); long nOutputPlane = weight.size(0); long outputWidth = (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; long outputHeight = (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; TORCH_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset"); gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth}); columns = at::zeros( {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, input.options()); // change order of grad output gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step, nOutputPlane, outputHeight, outputWidth}); gradOutput.transpose_(1, 2); gradInput = gradInput.view({batchSize / im2col_step, im2col_step, nInputPlane, inputHeight, inputWidth}); input = input.view({batchSize / im2col_step, im2col_step, nInputPlane, inputHeight, inputWidth}); gradOffset = gradOffset.view({batchSize / im2col_step, im2col_step, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); offset = offset.view({batchSize / im2col_step, im2col_step, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); for (int elt = 0; elt < batchSize / im2col_step; elt++) { // divide into groups columns = columns.view({group, columns.size(0) / group, columns.size(1)}); weight = weight.view({group, weight.size(0) / group, weight.size(1), weight.size(2), weight.size(3)}); gradOutput = gradOutput.view( {gradOutput.size(0), group, gradOutput.size(1) / group, gradOutput.size(2), gradOutput.size(3), gradOutput.size(4)}); for (int g = 0; g < group; g++) { columns[g] = columns[g].addmm_(weight[g].flatten(1).transpose(0, 1), gradOutput[elt][g].flatten(1), 0.0f, 1.0f); } columns = columns.view({columns.size(0) * columns.size(1), columns.size(2)}); gradOutput = gradOutput.view( {gradOutput.size(0), gradOutput.size(1) * gradOutput.size(2), gradOutput.size(3), gradOutput.size(4), gradOutput.size(5)}); deformable_col2im_coord(columns, input[elt], offset[elt], nInputPlane, inputHeight, inputWidth, kH, kW, padH, padW, dH, dW, dilationH, dilationW, im2col_step, deformable_group, gradOffset[elt]); deformable_col2im(columns, offset[elt], nInputPlane, inputHeight, inputWidth, kH, kW, padH, padW, dH, dW, dilationH, dilationW, im2col_step, deformable_group, gradInput[elt]); } gradOutput.transpose_(1, 2); gradOutput = gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth}); gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth}); input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); gradOffset = gradOffset.view( {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); offset = offset.view( {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); if (batch == 0) { gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth}); input = input.view({nInputPlane, inputHeight, inputWidth}); gradInput = gradInput.view({nInputPlane, inputHeight, inputWidth}); offset = offset.view({offset.size(1), offset.size(2), offset.size(3)}); gradOffset = gradOffset.view({offset.size(1), offset.size(2), offset.size(3)}); } return 1; } int deform_conv_backward_parameters_cuda( at::Tensor input, at::Tensor offset, at::Tensor gradOutput, at::Tensor gradWeight, // at::Tensor gradBias, at::Tensor columns, at::Tensor ones, int kW, int kH, int dW, int dH, int padW, int padH, int dilationW, int dilationH, int group, int deformable_group, float scale, int im2col_step) { // todo: transpose and reshape outGrad // todo: reshape columns // todo: add im2col_step as input shape_check(input, offset, &gradOutput, gradWeight, kH, kW, dH, dW, padH, padW, dilationH, dilationW, group, deformable_group); at::DeviceGuard guard(input.device()); input = input.contiguous(); offset = offset.contiguous(); gradOutput = gradOutput.contiguous(); int batch = 1; if (input.ndimension() == 3) { // Force batch batch = 0; input = input.view( at::IntList({1, input.size(0), input.size(1), input.size(2)})); gradOutput = gradOutput.view( {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)}); } long batchSize = input.size(0); long nInputPlane = input.size(1); long inputHeight = input.size(2); long inputWidth = input.size(3); long nOutputPlane = gradWeight.size(0); long outputWidth = (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; long outputHeight = (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); columns = at::zeros( {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, input.options()); gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step, nOutputPlane, outputHeight, outputWidth}); gradOutput.transpose_(1, 2); at::Tensor gradOutputBuffer = at::zeros_like(gradOutput); gradOutputBuffer = gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane, im2col_step, outputHeight, outputWidth}); gradOutputBuffer.copy_(gradOutput); gradOutputBuffer = gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane, im2col_step * outputHeight, outputWidth}); gradOutput.transpose_(1, 2); gradOutput = gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth}); input = input.view({batchSize / im2col_step, im2col_step, nInputPlane, inputHeight, inputWidth}); offset = offset.view({batchSize / im2col_step, im2col_step, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); for (int elt = 0; elt < batchSize / im2col_step; elt++) { deformable_im2col(input[elt], offset[elt], nInputPlane, inputHeight, inputWidth, kH, kW, padH, padW, dH, dW, dilationH, dilationW, im2col_step, deformable_group, columns); // divide into group gradOutputBuffer = gradOutputBuffer.view( {gradOutputBuffer.size(0), group, gradOutputBuffer.size(1) / group, gradOutputBuffer.size(2), gradOutputBuffer.size(3)}); columns = columns.view({group, columns.size(0) / group, columns.size(1)}); gradWeight = gradWeight.view({group, gradWeight.size(0) / group, gradWeight.size(1), gradWeight.size(2), gradWeight.size(3)}); for (int g = 0; g < group; g++) { gradWeight[g] = gradWeight[g] .flatten(1) .addmm_(gradOutputBuffer[elt][g].flatten(1), columns[g].transpose(1, 0), 1.0, scale) .view_as(gradWeight[g]); } gradOutputBuffer = gradOutputBuffer.view( {gradOutputBuffer.size(0), gradOutputBuffer.size(1) * gradOutputBuffer.size(2), gradOutputBuffer.size(3), gradOutputBuffer.size(4)}); columns = columns.view({columns.size(0) * columns.size(1), columns.size(2)}); gradWeight = gradWeight.view({gradWeight.size(0) * gradWeight.size(1), gradWeight.size(2), gradWeight.size(3), gradWeight.size(4)}); } input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); offset = offset.view( {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); if (batch == 0) { gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth}); input = input.view({nInputPlane, inputHeight, inputWidth}); } return 1; } void modulated_deform_conv_cuda_forward( at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones, at::Tensor offset, at::Tensor mask, at::Tensor output, at::Tensor columns, int kernel_h, int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, const int group, const int deformable_group, const bool with_bias) { TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); at::DeviceGuard guard(input.device()); const int batch = input.size(0); const int channels = input.size(1); const int height = input.size(2); const int width = input.size(3); const int channels_out = weight.size(0); const int channels_kernel = weight.size(1); const int kernel_h_ = weight.size(2); const int kernel_w_ = weight.size(3); if (kernel_h_ != kernel_h || kernel_w_ != kernel_w) AT_ERROR("Input shape and kernel shape wont match: (%d x %d vs %d x %d).", kernel_h_, kernel_w, kernel_h_, kernel_w_); if (channels != channels_kernel * group) AT_ERROR("Input shape and kernel channels wont match: (%d vs %d).", channels, channels_kernel * group); const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; if (ones.ndimension() != 2 || ones.size(0) * ones.size(1) < height_out * width_out) { // Resize plane and fill with ones... ones = at::ones({height_out, width_out}, input.options()); } // resize output output = output.view({batch, channels_out, height_out, width_out}).zero_(); // resize temporary columns columns = at::zeros({channels * kernel_h * kernel_w, 1 * height_out * width_out}, input.options()); output = output.view({output.size(0), group, output.size(1) / group, output.size(2), output.size(3)}); for (int b = 0; b < batch; b++) { modulated_deformable_im2col_cuda( input[b], offset[b], mask[b], 1, channels, height, width, height_out, width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, deformable_group, columns); // divide into group weight = weight.view({group, weight.size(0) / group, weight.size(1), weight.size(2), weight.size(3)}); columns = columns.view({group, columns.size(0) / group, columns.size(1)}); for (int g = 0; g < group; g++) { output[b][g] = output[b][g] .flatten(1) .addmm_(weight[g].flatten(1), columns[g]) .view_as(output[b][g]); } weight = weight.view({weight.size(0) * weight.size(1), weight.size(2), weight.size(3), weight.size(4)}); columns = columns.view({columns.size(0) * columns.size(1), columns.size(2)}); } output = output.view({output.size(0), output.size(1) * output.size(2), output.size(3), output.size(4)}); if (with_bias) { output += bias.view({1, bias.size(0), 1, 1}); } } void modulated_deform_conv_cuda_backward( at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones, at::Tensor offset, at::Tensor mask, at::Tensor columns, at::Tensor grad_input, at::Tensor grad_weight, at::Tensor grad_bias, at::Tensor grad_offset, at::Tensor grad_mask, at::Tensor grad_output, int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, int pad_w, int dilation_h, int dilation_w, int group, int deformable_group, const bool with_bias) { TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); at::DeviceGuard guard(input.device()); const int batch = input.size(0); const int channels = input.size(1); const int height = input.size(2); const int width = input.size(3); const int channels_kernel = weight.size(1); const int kernel_h_ = weight.size(2); const int kernel_w_ = weight.size(3); if (kernel_h_ != kernel_h || kernel_w_ != kernel_w) AT_ERROR("Input shape and kernel shape wont match: (%d x %d vs %d x %d).", kernel_h_, kernel_w, kernel_h_, kernel_w_); if (channels != channels_kernel * group) AT_ERROR("Input shape and kernel channels wont match: (%d vs %d).", channels, channels_kernel * group); const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; if (ones.ndimension() != 2 || ones.size(0) * ones.size(1) < height_out * width_out) { // Resize plane and fill with ones... ones = at::ones({height_out, width_out}, input.options()); } grad_input = grad_input.view({batch, channels, height, width}); columns = at::zeros({channels * kernel_h * kernel_w, height_out * width_out}, input.options()); grad_output = grad_output.view({grad_output.size(0), group, grad_output.size(1) / group, grad_output.size(2), grad_output.size(3)}); for (int b = 0; b < batch; b++) { // divide int group columns = columns.view({group, columns.size(0) / group, columns.size(1)}); weight = weight.view({group, weight.size(0) / group, weight.size(1), weight.size(2), weight.size(3)}); for (int g = 0; g < group; g++) { columns[g].addmm_(weight[g].flatten(1).transpose(0, 1), grad_output[b][g].flatten(1), 0.0f, 1.0f); } columns = columns.view({columns.size(0) * columns.size(1), columns.size(2)}); weight = weight.view({weight.size(0) * weight.size(1), weight.size(2), weight.size(3), weight.size(4)}); // gradient w.r.t. input coordinate data modulated_deformable_col2im_coord_cuda( columns, input[b], offset[b], mask[b], 1, channels, height, width, height_out, width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, deformable_group, grad_offset[b], grad_mask[b]); // gradient w.r.t. input data modulated_deformable_col2im_cuda( columns, offset[b], mask[b], 1, channels, height, width, height_out, width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, deformable_group, grad_input[b]); // gradient w.r.t. weight, dWeight should accumulate across the batch and // group modulated_deformable_im2col_cuda( input[b], offset[b], mask[b], 1, channels, height, width, height_out, width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, deformable_group, columns); columns = columns.view({group, columns.size(0) / group, columns.size(1)}); grad_weight = grad_weight.view({group, grad_weight.size(0) / group, grad_weight.size(1), grad_weight.size(2), grad_weight.size(3)}); if (with_bias) grad_bias = grad_bias.view({group, grad_bias.size(0) / group}); for (int g = 0; g < group; g++) { grad_weight[g] = grad_weight[g] .flatten(1) .addmm_(grad_output[b][g].flatten(1), columns[g].transpose(0, 1)) .view_as(grad_weight[g]); if (with_bias) { grad_bias[g] = grad_bias[g] .view({-1, 1}) .addmm_(grad_output[b][g].flatten(1), ones.view({-1, 1})) .view(-1); } } columns = columns.view({columns.size(0) * columns.size(1), columns.size(2)}); grad_weight = grad_weight.view({grad_weight.size(0) * grad_weight.size(1), grad_weight.size(2), grad_weight.size(3), grad_weight.size(4)}); if (with_bias) grad_bias = grad_bias.view({grad_bias.size(0) * grad_bias.size(1)}); } grad_output = grad_output.view({grad_output.size(0) * grad_output.size(1), grad_output.size(2), grad_output.size(3), grad_output.size(4)}); } PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("deform_conv_forward_cuda", &deform_conv_forward_cuda, "deform forward (CUDA)"); m.def("deform_conv_backward_input_cuda", &deform_conv_backward_input_cuda, "deform_conv_backward_input (CUDA)"); m.def("deform_conv_backward_parameters_cuda", &deform_conv_backward_parameters_cuda, "deform_conv_backward_parameters (CUDA)"); m.def("modulated_deform_conv_cuda_forward", &modulated_deform_conv_cuda_forward, "modulated deform conv forward (CUDA)"); m.def("modulated_deform_conv_cuda_backward", &modulated_deform_conv_cuda_backward, "modulated deform conv backward (CUDA)"); } ================================================ FILE: pcdet/ops/dcn/src/deform_conv_cuda_kernel.cu ================================================ /*! ******************* BEGIN Caffe Copyright Notice and Disclaimer **************** * * COPYRIGHT * * All contributions by the University of California: * Copyright (c) 2014-2017 The Regents of the University of California (Regents) * All rights reserved. * * All other contributions: * Copyright (c) 2014-2017, the respective contributors * All rights reserved. * * Caffe uses a shared copyright model: each contributor holds copyright over * their contributions to Caffe. The project versioning records all such * contribution and copyright details. If a contributor wants to further mark * their specific copyright on a particular contribution, they should indicate * their copyright solely in the commit message of the change when it is * committed. * * LICENSE * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * CONTRIBUTION AGREEMENT * * By contributing to the BVLC/caffe repository through pull-request, comment, * or otherwise, the contributor releases their content to the * license and copyright terms herein. * ***************** END Caffe Copyright Notice and Disclaimer ******************** * * Copyright (c) 2018 Microsoft * Licensed under The MIT License [see LICENSE for details] * \file modulated_deformable_im2col.cuh * \brief Function definitions of converting an image to * column matrix based on kernel, padding, dilation, and offset. * These functions are mainly used in deformable convolution operators. * \ref: https://arxiv.org/abs/1703.06211 * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng */ // modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu #include #include #include #include #include #include using namespace at; #define CUDA_KERNEL_LOOP(i, n) \ for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ i += blockDim.x * gridDim.x) const int CUDA_NUM_THREADS = 1024; const int kMaxGridNum = 65535; inline int GET_BLOCKS(const int N) { return std::min(kMaxGridNum, (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS); } template __device__ scalar_t deformable_im2col_bilinear(const scalar_t *bottom_data, const int data_width, const int height, const int width, scalar_t h, scalar_t w) { int h_low = floor(h); int w_low = floor(w); int h_high = h_low + 1; int w_high = w_low + 1; scalar_t lh = h - h_low; scalar_t lw = w - w_low; scalar_t hh = 1 - lh, hw = 1 - lw; scalar_t v1 = 0; if (h_low >= 0 && w_low >= 0) v1 = bottom_data[h_low * data_width + w_low]; scalar_t v2 = 0; if (h_low >= 0 && w_high <= width - 1) v2 = bottom_data[h_low * data_width + w_high]; scalar_t v3 = 0; if (h_high <= height - 1 && w_low >= 0) v3 = bottom_data[h_high * data_width + w_low]; scalar_t v4 = 0; if (h_high <= height - 1 && w_high <= width - 1) v4 = bottom_data[h_high * data_width + w_high]; scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); return val; } template __device__ scalar_t get_gradient_weight(scalar_t argmax_h, scalar_t argmax_w, const int h, const int w, const int height, const int width) { if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width) { //empty return 0; } int argmax_h_low = floor(argmax_h); int argmax_w_low = floor(argmax_w); int argmax_h_high = argmax_h_low + 1; int argmax_w_high = argmax_w_low + 1; scalar_t weight = 0; if (h == argmax_h_low && w == argmax_w_low) weight = (h + 1 - argmax_h) * (w + 1 - argmax_w); if (h == argmax_h_low && w == argmax_w_high) weight = (h + 1 - argmax_h) * (argmax_w + 1 - w); if (h == argmax_h_high && w == argmax_w_low) weight = (argmax_h + 1 - h) * (w + 1 - argmax_w); if (h == argmax_h_high && w == argmax_w_high) weight = (argmax_h + 1 - h) * (argmax_w + 1 - w); return weight; } template __device__ scalar_t get_coordinate_weight(scalar_t argmax_h, scalar_t argmax_w, const int height, const int width, const scalar_t *im_data, const int data_width, const int bp_dir) { if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width) { //empty return 0; } int argmax_h_low = floor(argmax_h); int argmax_w_low = floor(argmax_w); int argmax_h_high = argmax_h_low + 1; int argmax_w_high = argmax_w_low + 1; scalar_t weight = 0; if (bp_dir == 0) { if (argmax_h_low >= 0 && argmax_w_low >= 0) weight += -1 * (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_low * data_width + argmax_w_low]; if (argmax_h_low >= 0 && argmax_w_high <= width - 1) weight += -1 * (argmax_w - argmax_w_low) * im_data[argmax_h_low * data_width + argmax_w_high]; if (argmax_h_high <= height - 1 && argmax_w_low >= 0) weight += (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_high * data_width + argmax_w_low]; if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) weight += (argmax_w - argmax_w_low) * im_data[argmax_h_high * data_width + argmax_w_high]; } else if (bp_dir == 1) { if (argmax_h_low >= 0 && argmax_w_low >= 0) weight += -1 * (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_low]; if (argmax_h_low >= 0 && argmax_w_high <= width - 1) weight += (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_high]; if (argmax_h_high <= height - 1 && argmax_w_low >= 0) weight += -1 * (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_low]; if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) weight += (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_high]; } return weight; } template __global__ void deformable_im2col_gpu_kernel(const int n, const scalar_t *data_im, const scalar_t *data_offset, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int channel_per_deformable_group, const int batch_size, const int num_channels, const int deformable_group, const int height_col, const int width_col, scalar_t *data_col) { CUDA_KERNEL_LOOP(index, n) { // index index of output matrix const int w_col = index % width_col; const int h_col = (index / width_col) % height_col; const int b_col = (index / width_col / height_col) % batch_size; const int c_im = (index / width_col / height_col) / batch_size; const int c_col = c_im * kernel_h * kernel_w; // compute deformable group index const int deformable_group_index = c_im / channel_per_deformable_group; const int h_in = h_col * stride_h - pad_h; const int w_in = w_col * stride_w - pad_w; scalar_t *data_col_ptr = data_col + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col; //const scalar_t* data_im_ptr = data_im + ((b_col * num_channels + c_im) * height + h_in) * width + w_in; const scalar_t *data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width; const scalar_t *data_offset_ptr = data_offset + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col; for (int i = 0; i < kernel_h; ++i) { for (int j = 0; j < kernel_w; ++j) { const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col; const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + w_col; const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; scalar_t val = static_cast(0); const scalar_t h_im = h_in + i * dilation_h + offset_h; const scalar_t w_im = w_in + j * dilation_w + offset_w; if (h_im > -1 && w_im > -1 && h_im < height && w_im < width) { //const scalar_t map_h = i * dilation_h + offset_h; //const scalar_t map_w = j * dilation_w + offset_w; //const int cur_height = height - h_in; //const int cur_width = width - w_in; //val = deformable_im2col_bilinear(data_im_ptr, width, cur_height, cur_width, map_h, map_w); val = deformable_im2col_bilinear(data_im_ptr, width, height, width, h_im, w_im); } *data_col_ptr = val; data_col_ptr += batch_size * height_col * width_col; } } } } void deformable_im2col( const at::Tensor data_im, const at::Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, at::Tensor data_col) { // num_axes should be smaller than block size // todo: check parallel_imgs is correctly passed in int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; int num_kernels = channels * height_col * width_col * parallel_imgs; int channel_per_deformable_group = channels / deformable_group; AT_DISPATCH_FLOATING_TYPES_AND_HALF( data_im.scalar_type(), "deformable_im2col_gpu", ([&] { const scalar_t *data_im_ = data_im.data(); const scalar_t *data_offset_ = data_offset.data(); scalar_t *data_col_ = data_col.data(); deformable_im2col_gpu_kernel<<>>( num_kernels, data_im_, data_offset_, height, width, ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group, parallel_imgs, channels, deformable_group, height_col, width_col, data_col_); })); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { printf("error in deformable_im2col: %s\n", cudaGetErrorString(err)); } } template __global__ void deformable_col2im_gpu_kernel( const int n, const scalar_t *data_col, const scalar_t *data_offset, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int channel_per_deformable_group, const int batch_size, const int deformable_group, const int height_col, const int width_col, scalar_t *grad_im) { CUDA_KERNEL_LOOP(index, n) { const int j = (index / width_col / height_col / batch_size) % kernel_w; const int i = (index / width_col / height_col / batch_size / kernel_w) % kernel_h; const int c = index / width_col / height_col / batch_size / kernel_w / kernel_h; // compute the start and end of the output const int deformable_group_index = c / channel_per_deformable_group; int w_out = index % width_col; int h_out = (index / width_col) % height_col; int b = (index / width_col / height_col) % batch_size; int w_in = w_out * stride_w - pad_w; int h_in = h_out * stride_h - pad_h; const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col; const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out; const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out; const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; const scalar_t cur_inv_h_data = h_in + i * dilation_h + offset_h; const scalar_t cur_inv_w_data = w_in + j * dilation_w + offset_w; const scalar_t cur_top_grad = data_col[index]; const int cur_h = (int)cur_inv_h_data; const int cur_w = (int)cur_inv_w_data; for (int dy = -2; dy <= 2; dy++) { for (int dx = -2; dx <= 2; dx++) { if (cur_h + dy >= 0 && cur_h + dy < height && cur_w + dx >= 0 && cur_w + dx < width && abs(cur_inv_h_data - (cur_h + dy)) < 1 && abs(cur_inv_w_data - (cur_w + dx)) < 1) { int cur_bottom_grad_pos = ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx; scalar_t weight = get_gradient_weight(cur_inv_h_data, cur_inv_w_data, cur_h + dy, cur_w + dx, height, width); atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad); } } } } } void deformable_col2im( const at::Tensor data_col, const at::Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, at::Tensor grad_im) { // todo: make sure parallel_imgs is passed in correctly int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; int num_kernels = channels * ksize_h * ksize_w * height_col * width_col * parallel_imgs; int channel_per_deformable_group = channels / deformable_group; AT_DISPATCH_FLOATING_TYPES_AND_HALF( data_col.scalar_type(), "deformable_col2im_gpu", ([&] { const scalar_t *data_col_ = data_col.data(); const scalar_t *data_offset_ = data_offset.data(); scalar_t *grad_im_ = grad_im.data(); deformable_col2im_gpu_kernel<<>>( num_kernels, data_col_, data_offset_, channels, height, width, ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group, parallel_imgs, deformable_group, height_col, width_col, grad_im_); })); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { printf("error in deformable_col2im: %s\n", cudaGetErrorString(err)); } } template __global__ void deformable_col2im_coord_gpu_kernel(const int n, const scalar_t *data_col, const scalar_t *data_im, const scalar_t *data_offset, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int channel_per_deformable_group, const int batch_size, const int offset_channels, const int deformable_group, const int height_col, const int width_col, scalar_t *grad_offset) { CUDA_KERNEL_LOOP(index, n) { scalar_t val = 0; int w = index % width_col; int h = (index / width_col) % height_col; int c = (index / width_col / height_col) % offset_channels; int b = (index / width_col / height_col) / offset_channels; // compute the start and end of the output const int deformable_group_index = c / (2 * kernel_h * kernel_w); const int col_step = kernel_h * kernel_w; int cnt = 0; const scalar_t *data_col_ptr = data_col + deformable_group_index * channel_per_deformable_group * batch_size * width_col * height_col; const scalar_t *data_im_ptr = data_im + (b * deformable_group + deformable_group_index) * channel_per_deformable_group / kernel_h / kernel_w * height * width; const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col; const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w; for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; col_c += col_step) { const int col_pos = (((col_c * batch_size + b) * height_col) + h) * width_col + w; const int bp_dir = offset_c % 2; int j = (col_pos / width_col / height_col / batch_size) % kernel_w; int i = (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h; int w_out = col_pos % width_col; int h_out = (col_pos / width_col) % height_col; int w_in = w_out * stride_w - pad_w; int h_in = h_out * stride_h - pad_h; const int data_offset_h_ptr = (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out); const int data_offset_w_ptr = (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out); const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; scalar_t inv_h = h_in + i * dilation_h + offset_h; scalar_t inv_w = w_in + j * dilation_w + offset_w; if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width) { inv_h = inv_w = -2; } const scalar_t weight = get_coordinate_weight( inv_h, inv_w, height, width, data_im_ptr + cnt * height * width, width, bp_dir); val += weight * data_col_ptr[col_pos]; cnt += 1; } grad_offset[index] = val; } } void deformable_col2im_coord( const at::Tensor data_col, const at::Tensor data_im, const at::Tensor data_offset, const int channels, const int height, const int width, const int ksize_h, const int ksize_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int parallel_imgs, const int deformable_group, at::Tensor grad_offset) { int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; int num_kernels = height_col * width_col * 2 * ksize_h * ksize_w * deformable_group * parallel_imgs; int channel_per_deformable_group = channels * ksize_h * ksize_w / deformable_group; AT_DISPATCH_FLOATING_TYPES_AND_HALF( data_col.scalar_type(), "deformable_col2im_coord_gpu", ([&] { const scalar_t *data_col_ = data_col.data(); const scalar_t *data_im_ = data_im.data(); const scalar_t *data_offset_ = data_offset.data(); scalar_t *grad_offset_ = grad_offset.data(); deformable_col2im_coord_gpu_kernel<<>>( num_kernels, data_col_, data_im_, data_offset_, channels, height, width, ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group, parallel_imgs, 2 * ksize_h * ksize_w * deformable_group, deformable_group, height_col, width_col, grad_offset_); })); } template __device__ scalar_t dmcn_im2col_bilinear(const scalar_t *bottom_data, const int data_width, const int height, const int width, scalar_t h, scalar_t w) { int h_low = floor(h); int w_low = floor(w); int h_high = h_low + 1; int w_high = w_low + 1; scalar_t lh = h - h_low; scalar_t lw = w - w_low; scalar_t hh = 1 - lh, hw = 1 - lw; scalar_t v1 = 0; if (h_low >= 0 && w_low >= 0) v1 = bottom_data[h_low * data_width + w_low]; scalar_t v2 = 0; if (h_low >= 0 && w_high <= width - 1) v2 = bottom_data[h_low * data_width + w_high]; scalar_t v3 = 0; if (h_high <= height - 1 && w_low >= 0) v3 = bottom_data[h_high * data_width + w_low]; scalar_t v4 = 0; if (h_high <= height - 1 && w_high <= width - 1) v4 = bottom_data[h_high * data_width + w_high]; scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); return val; } template __device__ scalar_t dmcn_get_gradient_weight(scalar_t argmax_h, scalar_t argmax_w, const int h, const int w, const int height, const int width) { if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width) { //empty return 0; } int argmax_h_low = floor(argmax_h); int argmax_w_low = floor(argmax_w); int argmax_h_high = argmax_h_low + 1; int argmax_w_high = argmax_w_low + 1; scalar_t weight = 0; if (h == argmax_h_low && w == argmax_w_low) weight = (h + 1 - argmax_h) * (w + 1 - argmax_w); if (h == argmax_h_low && w == argmax_w_high) weight = (h + 1 - argmax_h) * (argmax_w + 1 - w); if (h == argmax_h_high && w == argmax_w_low) weight = (argmax_h + 1 - h) * (w + 1 - argmax_w); if (h == argmax_h_high && w == argmax_w_high) weight = (argmax_h + 1 - h) * (argmax_w + 1 - w); return weight; } template __device__ scalar_t dmcn_get_coordinate_weight(scalar_t argmax_h, scalar_t argmax_w, const int height, const int width, const scalar_t *im_data, const int data_width, const int bp_dir) { if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width) { //empty return 0; } int argmax_h_low = floor(argmax_h); int argmax_w_low = floor(argmax_w); int argmax_h_high = argmax_h_low + 1; int argmax_w_high = argmax_w_low + 1; scalar_t weight = 0; if (bp_dir == 0) { if (argmax_h_low >= 0 && argmax_w_low >= 0) weight += -1 * (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_low * data_width + argmax_w_low]; if (argmax_h_low >= 0 && argmax_w_high <= width - 1) weight += -1 * (argmax_w - argmax_w_low) * im_data[argmax_h_low * data_width + argmax_w_high]; if (argmax_h_high <= height - 1 && argmax_w_low >= 0) weight += (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_high * data_width + argmax_w_low]; if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) weight += (argmax_w - argmax_w_low) * im_data[argmax_h_high * data_width + argmax_w_high]; } else if (bp_dir == 1) { if (argmax_h_low >= 0 && argmax_w_low >= 0) weight += -1 * (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_low]; if (argmax_h_low >= 0 && argmax_w_high <= width - 1) weight += (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_high]; if (argmax_h_high <= height - 1 && argmax_w_low >= 0) weight += -1 * (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_low]; if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) weight += (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_high]; } return weight; } template __global__ void modulated_deformable_im2col_gpu_kernel(const int n, const scalar_t *data_im, const scalar_t *data_offset, const scalar_t *data_mask, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int channel_per_deformable_group, const int batch_size, const int num_channels, const int deformable_group, const int height_col, const int width_col, scalar_t *data_col) { CUDA_KERNEL_LOOP(index, n) { // index index of output matrix const int w_col = index % width_col; const int h_col = (index / width_col) % height_col; const int b_col = (index / width_col / height_col) % batch_size; const int c_im = (index / width_col / height_col) / batch_size; const int c_col = c_im * kernel_h * kernel_w; // compute deformable group index const int deformable_group_index = c_im / channel_per_deformable_group; const int h_in = h_col * stride_h - pad_h; const int w_in = w_col * stride_w - pad_w; scalar_t *data_col_ptr = data_col + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col; //const float* data_im_ptr = data_im + ((b_col * num_channels + c_im) * height + h_in) * width + w_in; const scalar_t *data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width; const scalar_t *data_offset_ptr = data_offset + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col; const scalar_t *data_mask_ptr = data_mask + (b_col * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col; for (int i = 0; i < kernel_h; ++i) { for (int j = 0; j < kernel_w; ++j) { const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col; const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + w_col; const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_col) * width_col + w_col; const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; const scalar_t mask = data_mask_ptr[data_mask_hw_ptr]; scalar_t val = static_cast(0); const scalar_t h_im = h_in + i * dilation_h + offset_h; const scalar_t w_im = w_in + j * dilation_w + offset_w; //if (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) { if (h_im > -1 && w_im > -1 && h_im < height && w_im < width) { //const float map_h = i * dilation_h + offset_h; //const float map_w = j * dilation_w + offset_w; //const int cur_height = height - h_in; //const int cur_width = width - w_in; //val = dmcn_im2col_bilinear(data_im_ptr, width, cur_height, cur_width, map_h, map_w); val = dmcn_im2col_bilinear(data_im_ptr, width, height, width, h_im, w_im); } *data_col_ptr = val * mask; data_col_ptr += batch_size * height_col * width_col; //data_col_ptr += height_col * width_col; } } } } template __global__ void modulated_deformable_col2im_gpu_kernel(const int n, const scalar_t *data_col, const scalar_t *data_offset, const scalar_t *data_mask, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int channel_per_deformable_group, const int batch_size, const int deformable_group, const int height_col, const int width_col, scalar_t *grad_im) { CUDA_KERNEL_LOOP(index, n) { const int j = (index / width_col / height_col / batch_size) % kernel_w; const int i = (index / width_col / height_col / batch_size / kernel_w) % kernel_h; const int c = index / width_col / height_col / batch_size / kernel_w / kernel_h; // compute the start and end of the output const int deformable_group_index = c / channel_per_deformable_group; int w_out = index % width_col; int h_out = (index / width_col) % height_col; int b = (index / width_col / height_col) % batch_size; int w_in = w_out * stride_w - pad_w; int h_in = h_out * stride_h - pad_h; const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col; const scalar_t *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col; const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out; const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out; const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_out) * width_col + w_out; const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; const scalar_t mask = data_mask_ptr[data_mask_hw_ptr]; const scalar_t cur_inv_h_data = h_in + i * dilation_h + offset_h; const scalar_t cur_inv_w_data = w_in + j * dilation_w + offset_w; const scalar_t cur_top_grad = data_col[index] * mask; const int cur_h = (int)cur_inv_h_data; const int cur_w = (int)cur_inv_w_data; for (int dy = -2; dy <= 2; dy++) { for (int dx = -2; dx <= 2; dx++) { if (cur_h + dy >= 0 && cur_h + dy < height && cur_w + dx >= 0 && cur_w + dx < width && abs(cur_inv_h_data - (cur_h + dy)) < 1 && abs(cur_inv_w_data - (cur_w + dx)) < 1) { int cur_bottom_grad_pos = ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx; scalar_t weight = dmcn_get_gradient_weight(cur_inv_h_data, cur_inv_w_data, cur_h + dy, cur_w + dx, height, width); atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad); } } } } } template __global__ void modulated_deformable_col2im_coord_gpu_kernel(const int n, const scalar_t *data_col, const scalar_t *data_im, const scalar_t *data_offset, const scalar_t *data_mask, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int channel_per_deformable_group, const int batch_size, const int offset_channels, const int deformable_group, const int height_col, const int width_col, scalar_t *grad_offset, scalar_t *grad_mask) { CUDA_KERNEL_LOOP(index, n) { scalar_t val = 0, mval = 0; int w = index % width_col; int h = (index / width_col) % height_col; int c = (index / width_col / height_col) % offset_channels; int b = (index / width_col / height_col) / offset_channels; // compute the start and end of the output const int deformable_group_index = c / (2 * kernel_h * kernel_w); const int col_step = kernel_h * kernel_w; int cnt = 0; const scalar_t *data_col_ptr = data_col + deformable_group_index * channel_per_deformable_group * batch_size * width_col * height_col; const scalar_t *data_im_ptr = data_im + (b * deformable_group + deformable_group_index) * channel_per_deformable_group / kernel_h / kernel_w * height * width; const scalar_t *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col; const scalar_t *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col; const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w; for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; col_c += col_step) { const int col_pos = (((col_c * batch_size + b) * height_col) + h) * width_col + w; const int bp_dir = offset_c % 2; int j = (col_pos / width_col / height_col / batch_size) % kernel_w; int i = (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h; int w_out = col_pos % width_col; int h_out = (col_pos / width_col) % height_col; int w_in = w_out * stride_w - pad_w; int h_in = h_out * stride_h - pad_h; const int data_offset_h_ptr = (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out); const int data_offset_w_ptr = (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out); const int data_mask_hw_ptr = (((i * kernel_w + j) * height_col + h_out) * width_col + w_out); const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; const scalar_t mask = data_mask_ptr[data_mask_hw_ptr]; scalar_t inv_h = h_in + i * dilation_h + offset_h; scalar_t inv_w = w_in + j * dilation_w + offset_w; if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width) { inv_h = inv_w = -2; } else { mval += data_col_ptr[col_pos] * dmcn_im2col_bilinear(data_im_ptr + cnt * height * width, width, height, width, inv_h, inv_w); } const scalar_t weight = dmcn_get_coordinate_weight( inv_h, inv_w, height, width, data_im_ptr + cnt * height * width, width, bp_dir); val += weight * data_col_ptr[col_pos] * mask; cnt += 1; } // KERNEL_ASSIGN(grad_offset[index], offset_req, val); grad_offset[index] = val; if (offset_c % 2 == 0) // KERNEL_ASSIGN(grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * height_col + h) * width_col + w], mask_req, mval); grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * height_col + h) * width_col + w] = mval; } } void modulated_deformable_im2col_cuda( const at::Tensor data_im, const at::Tensor data_offset, const at::Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kenerl_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, at::Tensor data_col) { // num_axes should be smaller than block size const int channel_per_deformable_group = channels / deformable_group; const int num_kernels = channels * batch_size * height_col * width_col; AT_DISPATCH_FLOATING_TYPES_AND_HALF( data_im.scalar_type(), "modulated_deformable_im2col_gpu", ([&] { const scalar_t *data_im_ = data_im.data(); const scalar_t *data_offset_ = data_offset.data(); const scalar_t *data_mask_ = data_mask.data(); scalar_t *data_col_ = data_col.data(); modulated_deformable_im2col_gpu_kernel<<>>( num_kernels, data_im_, data_offset_, data_mask_, height_im, width_im, kernel_h, kenerl_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group, batch_size, channels, deformable_group, height_col, width_col, data_col_); })); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { printf("error in modulated_deformable_im2col_cuda: %s\n", cudaGetErrorString(err)); } } void modulated_deformable_col2im_cuda( const at::Tensor data_col, const at::Tensor data_offset, const at::Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, at::Tensor grad_im) { const int channel_per_deformable_group = channels / deformable_group; const int num_kernels = channels * kernel_h * kernel_w * batch_size * height_col * width_col; AT_DISPATCH_FLOATING_TYPES_AND_HALF( data_col.scalar_type(), "modulated_deformable_col2im_gpu", ([&] { const scalar_t *data_col_ = data_col.data(); const scalar_t *data_offset_ = data_offset.data(); const scalar_t *data_mask_ = data_mask.data(); scalar_t *grad_im_ = grad_im.data(); modulated_deformable_col2im_gpu_kernel<<>>( num_kernels, data_col_, data_offset_, data_mask_, channels, height_im, width_im, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group, batch_size, deformable_group, height_col, width_col, grad_im_); })); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { printf("error in modulated_deformable_col2im_cuda: %s\n", cudaGetErrorString(err)); } } void modulated_deformable_col2im_coord_cuda( const at::Tensor data_col, const at::Tensor data_im, const at::Tensor data_offset, const at::Tensor data_mask, const int batch_size, const int channels, const int height_im, const int width_im, const int height_col, const int width_col, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const int deformable_group, at::Tensor grad_offset, at::Tensor grad_mask) { const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h * kernel_w * deformable_group; const int channel_per_deformable_group = channels * kernel_h * kernel_w / deformable_group; AT_DISPATCH_FLOATING_TYPES_AND_HALF( data_col.scalar_type(), "modulated_deformable_col2im_coord_gpu", ([&] { const scalar_t *data_col_ = data_col.data(); const scalar_t *data_im_ = data_im.data(); const scalar_t *data_offset_ = data_offset.data(); const scalar_t *data_mask_ = data_mask.data(); scalar_t *grad_offset_ = grad_offset.data(); scalar_t *grad_mask_ = grad_mask.data(); modulated_deformable_col2im_coord_gpu_kernel<<>>( num_kernels, data_col_, data_im_, data_offset_, data_mask_, channels, height_im, width_im, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group, batch_size, 2 * kernel_h * kernel_w * deformable_group, deformable_group, height_col, width_col, grad_offset_, grad_mask_); })); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { printf("error in modulated_deformable_col2im_coord_cuda: %s\n", cudaGetErrorString(err)); } } ================================================ FILE: pcdet/ops/dcn/src/deform_pool_cuda.cpp ================================================ // modify from // https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c // based on // author: Charles Shang // https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu #include #include #include #include void DeformablePSROIPoolForward( const at::Tensor data, const at::Tensor bbox, const at::Tensor trans, at::Tensor out, at::Tensor top_count, const int batch, const int channels, const int height, const int width, const int num_bbox, const int channels_trans, const int no_trans, const float spatial_scale, const int output_dim, const int group_size, const int pooled_size, const int part_size, const int sample_per_part, const float trans_std); void DeformablePSROIPoolBackwardAcc( const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox, const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad, at::Tensor trans_grad, const int batch, const int channels, const int height, const int width, const int num_bbox, const int channels_trans, const int no_trans, const float spatial_scale, const int output_dim, const int group_size, const int pooled_size, const int part_size, const int sample_per_part, const float trans_std); void deform_psroi_pooling_cuda_forward( at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out, at::Tensor top_count, const int no_trans, const float spatial_scale, const int output_dim, const int group_size, const int pooled_size, const int part_size, const int sample_per_part, const float trans_std) { TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); at::DeviceGuard guard(input.device()); const int batch = input.size(0); const int channels = input.size(1); const int height = input.size(2); const int width = input.size(3); const int channels_trans = no_trans ? 2 : trans.size(1); const int num_bbox = bbox.size(0); if (num_bbox != out.size(0)) AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", out.size(0), num_bbox); DeformablePSROIPoolForward( input, bbox, trans, out, top_count, batch, channels, height, width, num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size, pooled_size, part_size, sample_per_part, trans_std); } void deform_psroi_pooling_cuda_backward( at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad, const int no_trans, const float spatial_scale, const int output_dim, const int group_size, const int pooled_size, const int part_size, const int sample_per_part, const float trans_std) { TORCH_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous"); TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); at::DeviceGuard guard(input.device()); const int batch = input.size(0); const int channels = input.size(1); const int height = input.size(2); const int width = input.size(3); const int channels_trans = no_trans ? 2 : trans.size(1); const int num_bbox = bbox.size(0); if (num_bbox != out_grad.size(0)) AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", out_grad.size(0), num_bbox); DeformablePSROIPoolBackwardAcc( out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch, channels, height, width, num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size, pooled_size, part_size, sample_per_part, trans_std); } PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("deform_psroi_pooling_cuda_forward", &deform_psroi_pooling_cuda_forward, "deform psroi pooling forward(CUDA)"); m.def("deform_psroi_pooling_cuda_backward", &deform_psroi_pooling_cuda_backward, "deform psroi pooling backward(CUDA)"); } ================================================ FILE: pcdet/ops/dcn/src/deform_pool_cuda_kernel.cu ================================================ /*! * Copyright (c) 2017 Microsoft * Licensed under The MIT License [see LICENSE for details] * \file deformable_psroi_pooling.cu * \brief * \author Yi Li, Guodong Zhang, Jifeng Dai */ /***************** Adapted by Charles Shang *********************/ // modify from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/cuda/deform_psroi_pooling_cuda.cu #include #include #include #include #include using namespace at; #define CUDA_KERNEL_LOOP(i, n) \ for (int i = blockIdx.x * blockDim.x + threadIdx.x; \ i < (n); \ i += blockDim.x * gridDim.x) const int CUDA_NUM_THREADS = 1024; inline int GET_BLOCKS(const int N) { return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS; } template __device__ scalar_t bilinear_interp( const scalar_t *data, const scalar_t x, const scalar_t y, const int width, const int height) { int x1 = floor(x); int x2 = ceil(x); int y1 = floor(y); int y2 = ceil(y); scalar_t dist_x = (scalar_t)(x - x1); scalar_t dist_y = (scalar_t)(y - y1); scalar_t value11 = data[y1 * width + x1]; scalar_t value12 = data[y2 * width + x1]; scalar_t value21 = data[y1 * width + x2]; scalar_t value22 = data[y2 * width + x2]; scalar_t value = (1 - dist_x) * (1 - dist_y) * value11 + (1 - dist_x) * dist_y * value12 + dist_x * (1 - dist_y) * value21 + dist_x * dist_y * value22; return value; } template __global__ void DeformablePSROIPoolForwardKernel( const int count, const scalar_t *bottom_data, const scalar_t spatial_scale, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, const scalar_t *bottom_rois, const scalar_t *bottom_trans, const int no_trans, const scalar_t trans_std, const int sample_per_part, const int output_dim, const int group_size, const int part_size, const int num_classes, const int channels_each_class, scalar_t *top_data, scalar_t *top_count) { CUDA_KERNEL_LOOP(index, count) { // The output is in order (n, ctop, ph, pw) int pw = index % pooled_width; int ph = (index / pooled_width) % pooled_height; int ctop = (index / pooled_width / pooled_height) % output_dim; int n = index / pooled_width / pooled_height / output_dim; // [start, end) interval for spatial sampling const scalar_t *offset_bottom_rois = bottom_rois + n * 5; int roi_batch_ind = offset_bottom_rois[0]; scalar_t roi_start_w = (scalar_t)(round(offset_bottom_rois[1])) * spatial_scale - 0.5; scalar_t roi_start_h = (scalar_t)(round(offset_bottom_rois[2])) * spatial_scale - 0.5; scalar_t roi_end_w = (scalar_t)(round(offset_bottom_rois[3]) + 1.) * spatial_scale - 0.5; scalar_t roi_end_h = (scalar_t)(round(offset_bottom_rois[4]) + 1.) * spatial_scale - 0.5; // Force too small ROIs to be 1x1 scalar_t roi_width = max(roi_end_w - roi_start_w, 0.1); //avoid 0 scalar_t roi_height = max(roi_end_h - roi_start_h, 0.1); // Compute w and h at bottom scalar_t bin_size_h = roi_height / (scalar_t)(pooled_height); scalar_t bin_size_w = roi_width / (scalar_t)(pooled_width); scalar_t sub_bin_size_h = bin_size_h / (scalar_t)(sample_per_part); scalar_t sub_bin_size_w = bin_size_w / (scalar_t)(sample_per_part); int part_h = floor((scalar_t)(ph) / pooled_height * part_size); int part_w = floor((scalar_t)(pw) / pooled_width * part_size); int class_id = ctop / channels_each_class; scalar_t trans_x = no_trans ? (scalar_t)(0) : bottom_trans[(((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + part_w] * (scalar_t)trans_std; scalar_t trans_y = no_trans ? (scalar_t)(0) : bottom_trans[(((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + part_w] * (scalar_t)trans_std; scalar_t wstart = (scalar_t)(pw)*bin_size_w + roi_start_w; wstart += trans_x * roi_width; scalar_t hstart = (scalar_t)(ph)*bin_size_h + roi_start_h; hstart += trans_y * roi_height; scalar_t sum = 0; int count = 0; int gw = floor((scalar_t)(pw)*group_size / pooled_width); int gh = floor((scalar_t)(ph)*group_size / pooled_height); gw = min(max(gw, 0), group_size - 1); gh = min(max(gh, 0), group_size - 1); const scalar_t *offset_bottom_data = bottom_data + (roi_batch_ind * channels) * height * width; for (int ih = 0; ih < sample_per_part; ih++) { for (int iw = 0; iw < sample_per_part; iw++) { scalar_t w = wstart + iw * sub_bin_size_w; scalar_t h = hstart + ih * sub_bin_size_h; // bilinear interpolation if (w < -0.5 || w > width - 0.5 || h < -0.5 || h > height - 0.5) { continue; } w = min(max(w, 0.), width - 1.); h = min(max(h, 0.), height - 1.); int c = (ctop * group_size + gh) * group_size + gw; scalar_t val = bilinear_interp(offset_bottom_data + c * height * width, w, h, width, height); sum += val; count++; } } top_data[index] = count == 0 ? (scalar_t)(0) : sum / count; top_count[index] = count; } } template __global__ void DeformablePSROIPoolBackwardAccKernel( const int count, const scalar_t *top_diff, const scalar_t *top_count, const int num_rois, const scalar_t spatial_scale, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, const int output_dim, scalar_t *bottom_data_diff, scalar_t *bottom_trans_diff, const scalar_t *bottom_data, const scalar_t *bottom_rois, const scalar_t *bottom_trans, const int no_trans, const scalar_t trans_std, const int sample_per_part, const int group_size, const int part_size, const int num_classes, const int channels_each_class) { CUDA_KERNEL_LOOP(index, count) { // The output is in order (n, ctop, ph, pw) int pw = index % pooled_width; int ph = (index / pooled_width) % pooled_height; int ctop = (index / pooled_width / pooled_height) % output_dim; int n = index / pooled_width / pooled_height / output_dim; // [start, end) interval for spatial sampling const scalar_t *offset_bottom_rois = bottom_rois + n * 5; int roi_batch_ind = offset_bottom_rois[0]; scalar_t roi_start_w = (scalar_t)(round(offset_bottom_rois[1])) * spatial_scale - 0.5; scalar_t roi_start_h = (scalar_t)(round(offset_bottom_rois[2])) * spatial_scale - 0.5; scalar_t roi_end_w = (scalar_t)(round(offset_bottom_rois[3]) + 1.) * spatial_scale - 0.5; scalar_t roi_end_h = (scalar_t)(round(offset_bottom_rois[4]) + 1.) * spatial_scale - 0.5; // Force too small ROIs to be 1x1 scalar_t roi_width = max(roi_end_w - roi_start_w, 0.1); //avoid 0 scalar_t roi_height = max(roi_end_h - roi_start_h, 0.1); // Compute w and h at bottom scalar_t bin_size_h = roi_height / (scalar_t)(pooled_height); scalar_t bin_size_w = roi_width / (scalar_t)(pooled_width); scalar_t sub_bin_size_h = bin_size_h / (scalar_t)(sample_per_part); scalar_t sub_bin_size_w = bin_size_w / (scalar_t)(sample_per_part); int part_h = floor((scalar_t)(ph) / pooled_height * part_size); int part_w = floor((scalar_t)(pw) / pooled_width * part_size); int class_id = ctop / channels_each_class; scalar_t trans_x = no_trans ? (scalar_t)(0) : bottom_trans[(((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + part_w] * (scalar_t)trans_std; scalar_t trans_y = no_trans ? (scalar_t)(0) : bottom_trans[(((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + part_w] * (scalar_t)trans_std; scalar_t wstart = (scalar_t)(pw)*bin_size_w + roi_start_w; wstart += trans_x * roi_width; scalar_t hstart = (scalar_t)(ph)*bin_size_h + roi_start_h; hstart += trans_y * roi_height; if (top_count[index] <= 0) { continue; } scalar_t diff_val = top_diff[index] / top_count[index]; const scalar_t *offset_bottom_data = bottom_data + roi_batch_ind * channels * height * width; scalar_t *offset_bottom_data_diff = bottom_data_diff + roi_batch_ind * channels * height * width; int gw = floor((scalar_t)(pw)*group_size / pooled_width); int gh = floor((scalar_t)(ph)*group_size / pooled_height); gw = min(max(gw, 0), group_size - 1); gh = min(max(gh, 0), group_size - 1); for (int ih = 0; ih < sample_per_part; ih++) { for (int iw = 0; iw < sample_per_part; iw++) { scalar_t w = wstart + iw * sub_bin_size_w; scalar_t h = hstart + ih * sub_bin_size_h; // bilinear interpolation if (w < -0.5 || w > width - 0.5 || h < -0.5 || h > height - 0.5) { continue; } w = min(max(w, 0.), width - 1.); h = min(max(h, 0.), height - 1.); int c = (ctop * group_size + gh) * group_size + gw; // backward on feature int x0 = floor(w); int x1 = ceil(w); int y0 = floor(h); int y1 = ceil(h); scalar_t dist_x = w - x0, dist_y = h - y0; scalar_t q00 = (1 - dist_x) * (1 - dist_y); scalar_t q01 = (1 - dist_x) * dist_y; scalar_t q10 = dist_x * (1 - dist_y); scalar_t q11 = dist_x * dist_y; int bottom_index_base = c * height * width; atomicAdd(offset_bottom_data_diff + bottom_index_base + y0 * width + x0, q00 * diff_val); atomicAdd(offset_bottom_data_diff + bottom_index_base + y1 * width + x0, q01 * diff_val); atomicAdd(offset_bottom_data_diff + bottom_index_base + y0 * width + x1, q10 * diff_val); atomicAdd(offset_bottom_data_diff + bottom_index_base + y1 * width + x1, q11 * diff_val); if (no_trans) { continue; } scalar_t U00 = offset_bottom_data[bottom_index_base + y0 * width + x0]; scalar_t U01 = offset_bottom_data[bottom_index_base + y1 * width + x0]; scalar_t U10 = offset_bottom_data[bottom_index_base + y0 * width + x1]; scalar_t U11 = offset_bottom_data[bottom_index_base + y1 * width + x1]; scalar_t diff_x = (U11 * dist_y + U10 * (1 - dist_y) - U01 * dist_y - U00 * (1 - dist_y)) * trans_std * diff_val; diff_x *= roi_width; scalar_t diff_y = (U11 * dist_x + U01 * (1 - dist_x) - U10 * dist_x - U00 * (1 - dist_x)) * trans_std * diff_val; diff_y *= roi_height; atomicAdd(bottom_trans_diff + (((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + part_w, diff_x); atomicAdd(bottom_trans_diff + (((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + part_w, diff_y); } } } } void DeformablePSROIPoolForward(const at::Tensor data, const at::Tensor bbox, const at::Tensor trans, at::Tensor out, at::Tensor top_count, const int batch, const int channels, const int height, const int width, const int num_bbox, const int channels_trans, const int no_trans, const float spatial_scale, const int output_dim, const int group_size, const int pooled_size, const int part_size, const int sample_per_part, const float trans_std) { const int pooled_height = pooled_size; const int pooled_width = pooled_size; const int count = num_bbox * output_dim * pooled_height * pooled_width; const int num_classes = no_trans ? 1 : channels_trans / 2; const int channels_each_class = no_trans ? output_dim : output_dim / num_classes; AT_DISPATCH_FLOATING_TYPES_AND_HALF( data.scalar_type(), "deformable_psroi_pool_forward", ([&] { const scalar_t *bottom_data = data.data(); const scalar_t *bottom_rois = bbox.data(); const scalar_t *bottom_trans = no_trans ? NULL : trans.data(); scalar_t *top_data = out.data(); scalar_t *top_count_data = top_count.data(); DeformablePSROIPoolForwardKernel<<>>( count, bottom_data, (scalar_t)spatial_scale, channels, height, width, pooled_height, pooled_width, bottom_rois, bottom_trans, no_trans, (scalar_t)trans_std, sample_per_part, output_dim, group_size, part_size, num_classes, channels_each_class, top_data, top_count_data); })); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { printf("error in DeformablePSROIPoolForward: %s\n", cudaGetErrorString(err)); } } void DeformablePSROIPoolBackwardAcc(const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox, const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad, at::Tensor trans_grad, const int batch, const int channels, const int height, const int width, const int num_bbox, const int channels_trans, const int no_trans, const float spatial_scale, const int output_dim, const int group_size, const int pooled_size, const int part_size, const int sample_per_part, const float trans_std) { // LOG(INFO) << "DeformablePSROIPoolBackward"; const int num_rois = num_bbox; const int pooled_height = pooled_size; const int pooled_width = pooled_size; const int count = num_bbox * output_dim * pooled_height * pooled_width; const int num_classes = no_trans ? 1 : channels_trans / 2; const int channels_each_class = no_trans ? output_dim : output_dim / num_classes; AT_DISPATCH_FLOATING_TYPES_AND_HALF( out_grad.scalar_type(), "deformable_psroi_pool_backward_acc", ([&] { const scalar_t *top_diff = out_grad.data(); const scalar_t *bottom_data = data.data(); const scalar_t *bottom_rois = bbox.data(); const scalar_t *bottom_trans = no_trans ? NULL : trans.data(); scalar_t *bottom_data_diff = in_grad.data(); scalar_t *bottom_trans_diff = no_trans ? NULL : trans_grad.data(); const scalar_t *top_count_data = top_count.data(); DeformablePSROIPoolBackwardAccKernel<<>>( count, top_diff, top_count_data, num_rois, (scalar_t)spatial_scale, channels, height, width, pooled_height, pooled_width, output_dim, bottom_data_diff, bottom_trans_diff, bottom_data, bottom_rois, bottom_trans, no_trans, (scalar_t)trans_std, sample_per_part, group_size, part_size, num_classes, channels_each_class); })); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { printf("error in DeformablePSROIPoolForward: %s\n", cudaGetErrorString(err)); } } ================================================ FILE: pcdet/ops/iou3d_nms/iou3d_nms_utils.py ================================================ """ 3D IoU Calculation and Rotated NMS Written by Shaoshuai Shi All Rights Reserved 2019-2020. """ import torch from ...utils import common_utils from . import iou3d_nms_cuda def boxes_bev_iou_cpu(boxes_a, boxes_b): """ Args: boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading] boxes_b: (N, 7) [x, y, z, dx, dy, dz, heading] Returns: """ boxes_a, is_numpy = common_utils.check_numpy_to_torch(boxes_a) boxes_b, is_numpy = common_utils.check_numpy_to_torch(boxes_b) assert not (boxes_a.is_cuda or boxes_b.is_cuda), 'Only support CPU tensors' assert boxes_a.shape[1] == 7 and boxes_b.shape[1] == 7 ans_iou = boxes_a.new_zeros(torch.Size((boxes_a.shape[0], boxes_b.shape[0]))) iou3d_nms_cuda.boxes_iou_bev_cpu(boxes_a.contiguous(), boxes_b.contiguous(), ans_iou) return ans_iou.numpy() if is_numpy else ans_iou def boxes_iou_bev(boxes_a, boxes_b): """ Args: boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading] boxes_b: (N, 7) [x, y, z, dx, dy, dz, heading] Returns: ans_iou: (N, M) """ assert boxes_a.shape[1] == boxes_b.shape[1] == 7 ans_iou = torch.cuda.FloatTensor(torch.Size((boxes_a.shape[0], boxes_b.shape[0]))).zero_() iou3d_nms_cuda.boxes_iou_bev_gpu(boxes_a.contiguous(), boxes_b.contiguous(), ans_iou) return ans_iou def boxes_dis(boxes_a, boxes_b): """ Args: boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading] boxes_b: (N, 7) [x, y, z, dx, dy, dz, heading] Returns: dis: (N, M) """ n,k = boxes_a.shape m,k2 = boxes_b.shape new_boxes_a = boxes_a.unsqueeze(1).expand(n, m, k) new_boxes_b = boxes_b.unsqueeze(0).expand(n, m, k2) dis = (new_boxes_a[..., 0:2] - new_boxes_b[..., 0:2])**2 dis = torch.sqrt(torch.sum(dis, dim=-1)) return dis def boxes_iou3d_gpu(boxes_a, boxes_b): """ Args: boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading] boxes_b: (N, 7) [x, y, z, dx, dy, dz, heading] Returns: ans_iou: (N, M) """ assert boxes_a.shape[1] == boxes_b.shape[1] == 7 # height overlap boxes_a_height_max = (boxes_a[:, 2] + boxes_a[:, 5] / 2).view(-1, 1) boxes_a_height_min = (boxes_a[:, 2] - boxes_a[:, 5] / 2).view(-1, 1) boxes_b_height_max = (boxes_b[:, 2] + boxes_b[:, 5] / 2).view(1, -1) boxes_b_height_min = (boxes_b[:, 2] - boxes_b[:, 5] / 2).view(1, -1) # bev overlap overlaps_bev = torch.cuda.FloatTensor(torch.Size((boxes_a.shape[0], boxes_b.shape[0]))).zero_() # (N, M) iou3d_nms_cuda.boxes_overlap_bev_gpu(boxes_a.contiguous(), boxes_b.contiguous(), overlaps_bev) max_of_min = torch.max(boxes_a_height_min, boxes_b_height_min) min_of_max = torch.min(boxes_a_height_max, boxes_b_height_max) overlaps_h = torch.clamp(min_of_max - max_of_min, min=0) # 3d iou overlaps_3d = overlaps_bev * overlaps_h vol_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1) vol_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1) iou3d = overlaps_3d / torch.clamp(vol_a + vol_b - overlaps_3d, min=1e-6) return iou3d def nms_gpu(boxes, scores, thresh, pre_maxsize=None, **kwargs): """ :param boxes: (N, 7) [x, y, z, dx, dy, dz, heading] :param scores: (N) :param thresh: :return: """ assert boxes.shape[1] == 7 order = scores.sort(0, descending=True)[1] if pre_maxsize is not None: order = order[:pre_maxsize] boxes = boxes[order].contiguous() keep = torch.LongTensor(boxes.size(0)) num_out = iou3d_nms_cuda.nms_gpu(boxes, keep, thresh) return order[keep[:num_out].cuda()].contiguous(), None def nms_normal_gpu(boxes, scores, thresh, **kwargs): """ :param boxes: (N, 7) [x, y, z, dx, dy, dz, heading] :param scores: (N) :param thresh: :return: """ assert boxes.shape[1] == 7 order = scores.sort(0, descending=True)[1] boxes = boxes[order].contiguous() keep = torch.LongTensor(boxes.size(0)) num_out = iou3d_nms_cuda.nms_normal_gpu(boxes, keep, thresh) return order[keep[:num_out].cuda()].contiguous(), None ================================================ FILE: pcdet/ops/iou3d_nms/src/iou3d_cpu.cpp ================================================ /* 3D Rotated IoU Calculation (CPU) Written by Shaoshuai Shi All Rights Reserved 2020. */ #include #include #include #include #include #include #include #include "iou3d_cpu.h" #define CHECK_CUDA(x) do { \ if (!x.type().is_cuda()) { \ fprintf(stderr, "%s must be CUDA tensor at %s:%d\n", #x, __FILE__, __LINE__); \ exit(-1); \ } \ } while (0) #define CHECK_CONTIGUOUS(x) do { \ if (!x.is_contiguous()) { \ fprintf(stderr, "%s must be contiguous tensor at %s:%d\n", #x, __FILE__, __LINE__); \ exit(-1); \ } \ } while (0) #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) inline float min(float a, float b){ return a > b ? b : a; } inline float max(float a, float b){ return a > b ? a : b; } const float EPS = 1e-8; struct Point { float x, y; __device__ Point() {} __device__ Point(double _x, double _y){ x = _x, y = _y; } __device__ void set(float _x, float _y){ x = _x; y = _y; } __device__ Point operator +(const Point &b)const{ return Point(x + b.x, y + b.y); } __device__ Point operator -(const Point &b)const{ return Point(x - b.x, y - b.y); } }; inline float cross(const Point &a, const Point &b){ return a.x * b.y - a.y * b.x; } inline float cross(const Point &p1, const Point &p2, const Point &p0){ return (p1.x - p0.x) * (p2.y - p0.y) - (p2.x - p0.x) * (p1.y - p0.y); } inline int check_rect_cross(const Point &p1, const Point &p2, const Point &q1, const Point &q2){ int ret = min(p1.x,p2.x) <= max(q1.x,q2.x) && min(q1.x,q2.x) <= max(p1.x,p2.x) && min(p1.y,p2.y) <= max(q1.y,q2.y) && min(q1.y,q2.y) <= max(p1.y,p2.y); return ret; } inline int check_in_box2d(const float *box, const Point &p){ //params: (7) [x, y, z, dx, dy, dz, heading] const float MARGIN = 1e-2; float center_x = box[0], center_y = box[1]; float angle_cos = cos(-box[6]), angle_sin = sin(-box[6]); // rotate the point in the opposite direction of box float rot_x = (p.x - center_x) * angle_cos + (p.y - center_y) * (-angle_sin); float rot_y = (p.x - center_x) * angle_sin + (p.y - center_y) * angle_cos; return (fabs(rot_x) < box[3] / 2 + MARGIN && fabs(rot_y) < box[4] / 2 + MARGIN); } inline int intersection(const Point &p1, const Point &p0, const Point &q1, const Point &q0, Point &ans){ // fast exclusion if (check_rect_cross(p0, p1, q0, q1) == 0) return 0; // check cross standing float s1 = cross(q0, p1, p0); float s2 = cross(p1, q1, p0); float s3 = cross(p0, q1, q0); float s4 = cross(q1, p1, q0); if (!(s1 * s2 > 0 && s3 * s4 > 0)) return 0; // calculate intersection of two lines float s5 = cross(q1, p1, p0); if(fabs(s5 - s1) > EPS){ ans.x = (s5 * q0.x - s1 * q1.x) / (s5 - s1); ans.y = (s5 * q0.y - s1 * q1.y) / (s5 - s1); } else{ float a0 = p0.y - p1.y, b0 = p1.x - p0.x, c0 = p0.x * p1.y - p1.x * p0.y; float a1 = q0.y - q1.y, b1 = q1.x - q0.x, c1 = q0.x * q1.y - q1.x * q0.y; float D = a0 * b1 - a1 * b0; ans.x = (b0 * c1 - b1 * c0) / D; ans.y = (a1 * c0 - a0 * c1) / D; } return 1; } inline void rotate_around_center(const Point ¢er, const float angle_cos, const float angle_sin, Point &p){ float new_x = (p.x - center.x) * angle_cos + (p.y - center.y) * (-angle_sin) + center.x; float new_y = (p.x - center.x) * angle_sin + (p.y - center.y) * angle_cos + center.y; p.set(new_x, new_y); } inline int point_cmp(const Point &a, const Point &b, const Point ¢er){ return atan2(a.y - center.y, a.x - center.x) > atan2(b.y - center.y, b.x - center.x); } inline float box_overlap(const float *box_a, const float *box_b){ // params: box_a (7) [x, y, z, dx, dy, dz, heading] // params: box_b (7) [x, y, z, dx, dy, dz, heading] // float a_x1 = box_a[0], a_y1 = box_a[1], a_x2 = box_a[2], a_y2 = box_a[3], a_angle = box_a[4]; // float b_x1 = box_b[0], b_y1 = box_b[1], b_x2 = box_b[2], b_y2 = box_b[3], b_angle = box_b[4]; float a_angle = box_a[6], b_angle = box_b[6]; float a_dx_half = box_a[3] / 2, b_dx_half = box_b[3] / 2, a_dy_half = box_a[4] / 2, b_dy_half = box_b[4] / 2; float a_x1 = box_a[0] - a_dx_half, a_y1 = box_a[1] - a_dy_half; float a_x2 = box_a[0] + a_dx_half, a_y2 = box_a[1] + a_dy_half; float b_x1 = box_b[0] - b_dx_half, b_y1 = box_b[1] - b_dy_half; float b_x2 = box_b[0] + b_dx_half, b_y2 = box_b[1] + b_dy_half; Point center_a(box_a[0], box_a[1]); Point center_b(box_b[0], box_b[1]); Point box_a_corners[5]; box_a_corners[0].set(a_x1, a_y1); box_a_corners[1].set(a_x2, a_y1); box_a_corners[2].set(a_x2, a_y2); box_a_corners[3].set(a_x1, a_y2); Point box_b_corners[5]; box_b_corners[0].set(b_x1, b_y1); box_b_corners[1].set(b_x2, b_y1); box_b_corners[2].set(b_x2, b_y2); box_b_corners[3].set(b_x1, b_y2); // get oriented corners float a_angle_cos = cos(a_angle), a_angle_sin = sin(a_angle); float b_angle_cos = cos(b_angle), b_angle_sin = sin(b_angle); for (int k = 0; k < 4; k++){ rotate_around_center(center_a, a_angle_cos, a_angle_sin, box_a_corners[k]); rotate_around_center(center_b, b_angle_cos, b_angle_sin, box_b_corners[k]); } box_a_corners[4] = box_a_corners[0]; box_b_corners[4] = box_b_corners[0]; // get intersection of lines Point cross_points[16]; Point poly_center; int cnt = 0, flag = 0; poly_center.set(0, 0); for (int i = 0; i < 4; i++){ for (int j = 0; j < 4; j++){ flag = intersection(box_a_corners[i + 1], box_a_corners[i], box_b_corners[j + 1], box_b_corners[j], cross_points[cnt]); if (flag){ poly_center = poly_center + cross_points[cnt]; cnt++; } } } // check corners for (int k = 0; k < 4; k++){ if (check_in_box2d(box_a, box_b_corners[k])){ poly_center = poly_center + box_b_corners[k]; cross_points[cnt] = box_b_corners[k]; cnt++; } if (check_in_box2d(box_b, box_a_corners[k])){ poly_center = poly_center + box_a_corners[k]; cross_points[cnt] = box_a_corners[k]; cnt++; } } poly_center.x /= cnt; poly_center.y /= cnt; // sort the points of polygon Point temp; for (int j = 0; j < cnt - 1; j++){ for (int i = 0; i < cnt - j - 1; i++){ if (point_cmp(cross_points[i], cross_points[i + 1], poly_center)){ temp = cross_points[i]; cross_points[i] = cross_points[i + 1]; cross_points[i + 1] = temp; } } } // get the overlap areas float area = 0; for (int k = 0; k < cnt - 1; k++){ area += cross(cross_points[k] - cross_points[0], cross_points[k + 1] - cross_points[0]); } return fabs(area) / 2.0; } inline float iou_bev(const float *box_a, const float *box_b){ // params: box_a (7) [x, y, z, dx, dy, dz, heading] // params: box_b (7) [x, y, z, dx, dy, dz, heading] float sa = box_a[3] * box_a[4]; float sb = box_b[3] * box_b[4]; float s_overlap = box_overlap(box_a, box_b); return s_overlap / fmaxf(sa + sb - s_overlap, EPS); } int boxes_iou_bev_cpu(at::Tensor boxes_a_tensor, at::Tensor boxes_b_tensor, at::Tensor ans_iou_tensor){ // params boxes_a_tensor: (N, 7) [x, y, z, dx, dy, dz, heading] // params boxes_b_tensor: (M, 7) [x, y, z, dx, dy, dz, heading] // params ans_iou_tensor: (N, M) CHECK_CONTIGUOUS(boxes_a_tensor); CHECK_CONTIGUOUS(boxes_b_tensor); int num_boxes_a = boxes_a_tensor.size(0); int num_boxes_b = boxes_b_tensor.size(0); const float *boxes_a = boxes_a_tensor.data(); const float *boxes_b = boxes_b_tensor.data(); float *ans_iou = ans_iou_tensor.data(); for (int i = 0; i < num_boxes_a; i++){ for (int j = 0; j < num_boxes_b; j++){ ans_iou[i * num_boxes_b + j] = iou_bev(boxes_a + i * 7, boxes_b + j * 7); } } return 1; } ================================================ FILE: pcdet/ops/iou3d_nms/src/iou3d_cpu.h ================================================ #ifndef IOU3D_CPU_H #define IOU3D_CPU_H #include #include #include #include int boxes_iou_bev_cpu(at::Tensor boxes_a_tensor, at::Tensor boxes_b_tensor, at::Tensor ans_iou_tensor); #endif ================================================ FILE: pcdet/ops/iou3d_nms/src/iou3d_nms.cpp ================================================ /* 3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others) Written by Shaoshuai Shi All Rights Reserved 2019-2020. */ #include #include #include #include #include #include "iou3d_nms.h" #define CHECK_CUDA(x) do { \ if (!x.type().is_cuda()) { \ fprintf(stderr, "%s must be CUDA tensor at %s:%d\n", #x, __FILE__, __LINE__); \ exit(-1); \ } \ } while (0) #define CHECK_CONTIGUOUS(x) do { \ if (!x.is_contiguous()) { \ fprintf(stderr, "%s must be contiguous tensor at %s:%d\n", #x, __FILE__, __LINE__); \ exit(-1); \ } \ } while (0) #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) #define CHECK_ERROR(ans) { gpuAssert((ans), __FILE__, __LINE__); } inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) { if (code != cudaSuccess) { fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); if (abort) exit(code); } } const int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8; void boxesoverlapLauncher(const int num_a, const float *boxes_a, const int num_b, const float *boxes_b, float *ans_overlap); void boxesioubevLauncher(const int num_a, const float *boxes_a, const int num_b, const float *boxes_b, float *ans_iou); void nmsLauncher(const float *boxes, unsigned long long * mask, int boxes_num, float nms_overlap_thresh); void nmsNormalLauncher(const float *boxes, unsigned long long * mask, int boxes_num, float nms_overlap_thresh); int boxes_overlap_bev_gpu(at::Tensor boxes_a, at::Tensor boxes_b, at::Tensor ans_overlap){ // params boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading] // params boxes_b: (M, 7) [x, y, z, dx, dy, dz, heading] // params ans_overlap: (N, M) CHECK_INPUT(boxes_a); CHECK_INPUT(boxes_b); CHECK_INPUT(ans_overlap); int num_a = boxes_a.size(0); int num_b = boxes_b.size(0); const float * boxes_a_data = boxes_a.data(); const float * boxes_b_data = boxes_b.data(); float * ans_overlap_data = ans_overlap.data(); boxesoverlapLauncher(num_a, boxes_a_data, num_b, boxes_b_data, ans_overlap_data); return 1; } int boxes_iou_bev_gpu(at::Tensor boxes_a, at::Tensor boxes_b, at::Tensor ans_iou){ // params boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading] // params boxes_b: (M, 7) [x, y, z, dx, dy, dz, heading] // params ans_overlap: (N, M) CHECK_INPUT(boxes_a); CHECK_INPUT(boxes_b); CHECK_INPUT(ans_iou); int num_a = boxes_a.size(0); int num_b = boxes_b.size(0); const float * boxes_a_data = boxes_a.data(); const float * boxes_b_data = boxes_b.data(); float * ans_iou_data = ans_iou.data(); boxesioubevLauncher(num_a, boxes_a_data, num_b, boxes_b_data, ans_iou_data); return 1; } int nms_gpu(at::Tensor boxes, at::Tensor keep, float nms_overlap_thresh){ // params boxes: (N, 7) [x, y, z, dx, dy, dz, heading] // params keep: (N) CHECK_INPUT(boxes); CHECK_CONTIGUOUS(keep); int boxes_num = boxes.size(0); const float * boxes_data = boxes.data(); long * keep_data = keep.data(); const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS); unsigned long long *mask_data = NULL; CHECK_ERROR(cudaMalloc((void**)&mask_data, boxes_num * col_blocks * sizeof(unsigned long long))); nmsLauncher(boxes_data, mask_data, boxes_num, nms_overlap_thresh); // unsigned long long mask_cpu[boxes_num * col_blocks]; // unsigned long long *mask_cpu = new unsigned long long [boxes_num * col_blocks]; std::vector mask_cpu(boxes_num * col_blocks); // printf("boxes_num=%d, col_blocks=%d\n", boxes_num, col_blocks); CHECK_ERROR(cudaMemcpy(&mask_cpu[0], mask_data, boxes_num * col_blocks * sizeof(unsigned long long), cudaMemcpyDeviceToHost)); cudaFree(mask_data); unsigned long long remv_cpu[col_blocks]; memset(remv_cpu, 0, col_blocks * sizeof(unsigned long long)); int num_to_keep = 0; for (int i = 0; i < boxes_num; i++){ int nblock = i / THREADS_PER_BLOCK_NMS; int inblock = i % THREADS_PER_BLOCK_NMS; if (!(remv_cpu[nblock] & (1ULL << inblock))){ keep_data[num_to_keep++] = i; unsigned long long *p = &mask_cpu[0] + i * col_blocks; for (int j = nblock; j < col_blocks; j++){ remv_cpu[j] |= p[j]; } } } if ( cudaSuccess != cudaGetLastError() ) printf( "Error!\n" ); return num_to_keep; } int nms_normal_gpu(at::Tensor boxes, at::Tensor keep, float nms_overlap_thresh){ // params boxes: (N, 7) [x, y, z, dx, dy, dz, heading] // params keep: (N) CHECK_INPUT(boxes); CHECK_CONTIGUOUS(keep); int boxes_num = boxes.size(0); const float * boxes_data = boxes.data(); long * keep_data = keep.data(); const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS); unsigned long long *mask_data = NULL; CHECK_ERROR(cudaMalloc((void**)&mask_data, boxes_num * col_blocks * sizeof(unsigned long long))); nmsNormalLauncher(boxes_data, mask_data, boxes_num, nms_overlap_thresh); // unsigned long long mask_cpu[boxes_num * col_blocks]; // unsigned long long *mask_cpu = new unsigned long long [boxes_num * col_blocks]; std::vector mask_cpu(boxes_num * col_blocks); // printf("boxes_num=%d, col_blocks=%d\n", boxes_num, col_blocks); CHECK_ERROR(cudaMemcpy(&mask_cpu[0], mask_data, boxes_num * col_blocks * sizeof(unsigned long long), cudaMemcpyDeviceToHost)); cudaFree(mask_data); unsigned long long remv_cpu[col_blocks]; memset(remv_cpu, 0, col_blocks * sizeof(unsigned long long)); int num_to_keep = 0; for (int i = 0; i < boxes_num; i++){ int nblock = i / THREADS_PER_BLOCK_NMS; int inblock = i % THREADS_PER_BLOCK_NMS; if (!(remv_cpu[nblock] & (1ULL << inblock))){ keep_data[num_to_keep++] = i; unsigned long long *p = &mask_cpu[0] + i * col_blocks; for (int j = nblock; j < col_blocks; j++){ remv_cpu[j] |= p[j]; } } } if ( cudaSuccess != cudaGetLastError() ) printf( "Error!\n" ); return num_to_keep; } ================================================ FILE: pcdet/ops/iou3d_nms/src/iou3d_nms.h ================================================ #ifndef IOU3D_NMS_H #define IOU3D_NMS_H #include #include #include #include int boxes_overlap_bev_gpu(at::Tensor boxes_a, at::Tensor boxes_b, at::Tensor ans_overlap); int boxes_iou_bev_gpu(at::Tensor boxes_a, at::Tensor boxes_b, at::Tensor ans_iou); int nms_gpu(at::Tensor boxes, at::Tensor keep, float nms_overlap_thresh); int nms_normal_gpu(at::Tensor boxes, at::Tensor keep, float nms_overlap_thresh); #endif ================================================ FILE: pcdet/ops/iou3d_nms/src/iou3d_nms_api.cpp ================================================ #include #include #include #include #include #include "iou3d_cpu.h" #include "iou3d_nms.h" PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("boxes_overlap_bev_gpu", &boxes_overlap_bev_gpu, "oriented boxes overlap"); m.def("boxes_iou_bev_gpu", &boxes_iou_bev_gpu, "oriented boxes iou"); m.def("nms_gpu", &nms_gpu, "oriented nms gpu"); m.def("nms_normal_gpu", &nms_normal_gpu, "nms gpu"); m.def("boxes_iou_bev_cpu", &boxes_iou_bev_cpu, "oriented boxes iou"); } ================================================ FILE: pcdet/ops/iou3d_nms/src/iou3d_nms_kernel.cu ================================================ /* 3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others) Written by Shaoshuai Shi All Rights Reserved 2019-2020. */ #include #define THREADS_PER_BLOCK 16 #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) // #define DEBUG const int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8; const float EPS = 1e-8; struct Point { float x, y; __device__ Point() {} __device__ Point(double _x, double _y){ x = _x, y = _y; } __device__ void set(float _x, float _y){ x = _x; y = _y; } __device__ Point operator +(const Point &b)const{ return Point(x + b.x, y + b.y); } __device__ Point operator -(const Point &b)const{ return Point(x - b.x, y - b.y); } }; __device__ inline float cross(const Point &a, const Point &b){ return a.x * b.y - a.y * b.x; } __device__ inline float cross(const Point &p1, const Point &p2, const Point &p0){ return (p1.x - p0.x) * (p2.y - p0.y) - (p2.x - p0.x) * (p1.y - p0.y); } __device__ int check_rect_cross(const Point &p1, const Point &p2, const Point &q1, const Point &q2){ int ret = min(p1.x,p2.x) <= max(q1.x,q2.x) && min(q1.x,q2.x) <= max(p1.x,p2.x) && min(p1.y,p2.y) <= max(q1.y,q2.y) && min(q1.y,q2.y) <= max(p1.y,p2.y); return ret; } __device__ inline int check_in_box2d(const float *box, const Point &p){ //params: (7) [x, y, z, dx, dy, dz, heading] const float MARGIN = 1e-2; float center_x = box[0], center_y = box[1]; float angle_cos = cos(-box[6]), angle_sin = sin(-box[6]); // rotate the point in the opposite direction of box float rot_x = (p.x - center_x) * angle_cos + (p.y - center_y) * (-angle_sin); float rot_y = (p.x - center_x) * angle_sin + (p.y - center_y) * angle_cos; return (fabs(rot_x) < box[3] / 2 + MARGIN && fabs(rot_y) < box[4] / 2 + MARGIN); } __device__ inline int intersection(const Point &p1, const Point &p0, const Point &q1, const Point &q0, Point &ans){ // fast exclusion if (check_rect_cross(p0, p1, q0, q1) == 0) return 0; // check cross standing float s1 = cross(q0, p1, p0); float s2 = cross(p1, q1, p0); float s3 = cross(p0, q1, q0); float s4 = cross(q1, p1, q0); if (!(s1 * s2 > 0 && s3 * s4 > 0)) return 0; // calculate intersection of two lines float s5 = cross(q1, p1, p0); if(fabs(s5 - s1) > EPS){ ans.x = (s5 * q0.x - s1 * q1.x) / (s5 - s1); ans.y = (s5 * q0.y - s1 * q1.y) / (s5 - s1); } else{ float a0 = p0.y - p1.y, b0 = p1.x - p0.x, c0 = p0.x * p1.y - p1.x * p0.y; float a1 = q0.y - q1.y, b1 = q1.x - q0.x, c1 = q0.x * q1.y - q1.x * q0.y; float D = a0 * b1 - a1 * b0; ans.x = (b0 * c1 - b1 * c0) / D; ans.y = (a1 * c0 - a0 * c1) / D; } return 1; } __device__ inline void rotate_around_center(const Point ¢er, const float angle_cos, const float angle_sin, Point &p){ float new_x = (p.x - center.x) * angle_cos + (p.y - center.y) * (-angle_sin) + center.x; float new_y = (p.x - center.x) * angle_sin + (p.y - center.y) * angle_cos + center.y; p.set(new_x, new_y); } __device__ inline int point_cmp(const Point &a, const Point &b, const Point ¢er){ return atan2(a.y - center.y, a.x - center.x) > atan2(b.y - center.y, b.x - center.x); } __device__ inline float box_overlap(const float *box_a, const float *box_b){ // params box_a: [x, y, z, dx, dy, dz, heading] // params box_b: [x, y, z, dx, dy, dz, heading] float a_angle = box_a[6], b_angle = box_b[6]; float a_dx_half = box_a[3] / 2, b_dx_half = box_b[3] / 2, a_dy_half = box_a[4] / 2, b_dy_half = box_b[4] / 2; float a_x1 = box_a[0] - a_dx_half, a_y1 = box_a[1] - a_dy_half; float a_x2 = box_a[0] + a_dx_half, a_y2 = box_a[1] + a_dy_half; float b_x1 = box_b[0] - b_dx_half, b_y1 = box_b[1] - b_dy_half; float b_x2 = box_b[0] + b_dx_half, b_y2 = box_b[1] + b_dy_half; Point center_a(box_a[0], box_a[1]); Point center_b(box_b[0], box_b[1]); #ifdef DEBUG printf("a: (%.3f, %.3f, %.3f, %.3f, %.3f), b: (%.3f, %.3f, %.3f, %.3f, %.3f)\n", a_x1, a_y1, a_x2, a_y2, a_angle, b_x1, b_y1, b_x2, b_y2, b_angle); printf("center a: (%.3f, %.3f), b: (%.3f, %.3f)\n", center_a.x, center_a.y, center_b.x, center_b.y); #endif Point box_a_corners[5]; box_a_corners[0].set(a_x1, a_y1); box_a_corners[1].set(a_x2, a_y1); box_a_corners[2].set(a_x2, a_y2); box_a_corners[3].set(a_x1, a_y2); Point box_b_corners[5]; box_b_corners[0].set(b_x1, b_y1); box_b_corners[1].set(b_x2, b_y1); box_b_corners[2].set(b_x2, b_y2); box_b_corners[3].set(b_x1, b_y2); // get oriented corners float a_angle_cos = cos(a_angle), a_angle_sin = sin(a_angle); float b_angle_cos = cos(b_angle), b_angle_sin = sin(b_angle); for (int k = 0; k < 4; k++){ #ifdef DEBUG printf("before corner %d: a(%.3f, %.3f), b(%.3f, %.3f) \n", k, box_a_corners[k].x, box_a_corners[k].y, box_b_corners[k].x, box_b_corners[k].y); #endif rotate_around_center(center_a, a_angle_cos, a_angle_sin, box_a_corners[k]); rotate_around_center(center_b, b_angle_cos, b_angle_sin, box_b_corners[k]); #ifdef DEBUG printf("corner %d: a(%.3f, %.3f), b(%.3f, %.3f) \n", k, box_a_corners[k].x, box_a_corners[k].y, box_b_corners[k].x, box_b_corners[k].y); #endif } box_a_corners[4] = box_a_corners[0]; box_b_corners[4] = box_b_corners[0]; // get intersection of lines Point cross_points[16]; Point poly_center; int cnt = 0, flag = 0; poly_center.set(0, 0); for (int i = 0; i < 4; i++){ for (int j = 0; j < 4; j++){ flag = intersection(box_a_corners[i + 1], box_a_corners[i], box_b_corners[j + 1], box_b_corners[j], cross_points[cnt]); if (flag){ poly_center = poly_center + cross_points[cnt]; cnt++; #ifdef DEBUG printf("Cross points (%.3f, %.3f): a(%.3f, %.3f)->(%.3f, %.3f), b(%.3f, %.3f)->(%.3f, %.3f) \n", cross_points[cnt - 1].x, cross_points[cnt - 1].y, box_a_corners[i].x, box_a_corners[i].y, box_a_corners[i + 1].x, box_a_corners[i + 1].y, box_b_corners[i].x, box_b_corners[i].y, box_b_corners[i + 1].x, box_b_corners[i + 1].y); #endif } } } // check corners for (int k = 0; k < 4; k++){ if (check_in_box2d(box_a, box_b_corners[k])){ poly_center = poly_center + box_b_corners[k]; cross_points[cnt] = box_b_corners[k]; cnt++; #ifdef DEBUG printf("b corners in a: corner_b(%.3f, %.3f)", cross_points[cnt - 1].x, cross_points[cnt - 1].y); #endif } if (check_in_box2d(box_b, box_a_corners[k])){ poly_center = poly_center + box_a_corners[k]; cross_points[cnt] = box_a_corners[k]; cnt++; #ifdef DEBUG printf("a corners in b: corner_a(%.3f, %.3f)", cross_points[cnt - 1].x, cross_points[cnt - 1].y); #endif } } poly_center.x /= cnt; poly_center.y /= cnt; // sort the points of polygon Point temp; for (int j = 0; j < cnt - 1; j++){ for (int i = 0; i < cnt - j - 1; i++){ if (point_cmp(cross_points[i], cross_points[i + 1], poly_center)){ temp = cross_points[i]; cross_points[i] = cross_points[i + 1]; cross_points[i + 1] = temp; } } } #ifdef DEBUG printf("cnt=%d\n", cnt); for (int i = 0; i < cnt; i++){ printf("All cross point %d: (%.3f, %.3f)\n", i, cross_points[i].x, cross_points[i].y); } #endif // get the overlap areas float area = 0; for (int k = 0; k < cnt - 1; k++){ area += cross(cross_points[k] - cross_points[0], cross_points[k + 1] - cross_points[0]); } return fabs(area) / 2.0; } __device__ inline float iou_bev(const float *box_a, const float *box_b){ // params box_a: [x, y, z, dx, dy, dz, heading] // params box_b: [x, y, z, dx, dy, dz, heading] float sa = box_a[3] * box_a[4]; float sb = box_b[3] * box_b[4]; float s_overlap = box_overlap(box_a, box_b); return s_overlap / fmaxf(sa + sb - s_overlap, EPS); } __global__ void boxes_overlap_kernel(const int num_a, const float *boxes_a, const int num_b, const float *boxes_b, float *ans_overlap){ // params boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading] // params boxes_b: (M, 7) [x, y, z, dx, dy, dz, heading] const int a_idx = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y; const int b_idx = blockIdx.x * THREADS_PER_BLOCK + threadIdx.x; if (a_idx >= num_a || b_idx >= num_b){ return; } const float * cur_box_a = boxes_a + a_idx * 7; const float * cur_box_b = boxes_b + b_idx * 7; float s_overlap = box_overlap(cur_box_a, cur_box_b); ans_overlap[a_idx * num_b + b_idx] = s_overlap; } __global__ void boxes_iou_bev_kernel(const int num_a, const float *boxes_a, const int num_b, const float *boxes_b, float *ans_iou){ // params boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading] // params boxes_b: (M, 7) [x, y, z, dx, dy, dz, heading] const int a_idx = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y; const int b_idx = blockIdx.x * THREADS_PER_BLOCK + threadIdx.x; if (a_idx >= num_a || b_idx >= num_b){ return; } const float * cur_box_a = boxes_a + a_idx * 7; const float * cur_box_b = boxes_b + b_idx * 7; float cur_iou_bev = iou_bev(cur_box_a, cur_box_b); ans_iou[a_idx * num_b + b_idx] = cur_iou_bev; } __global__ void nms_kernel(const int boxes_num, const float nms_overlap_thresh, const float *boxes, unsigned long long *mask){ //params: boxes (N, 7) [x, y, z, dx, dy, dz, heading] //params: mask (N, N/THREADS_PER_BLOCK_NMS) const int row_start = blockIdx.y; const int col_start = blockIdx.x; // if (row_start > col_start) return; const int row_size = fminf(boxes_num - row_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS); const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS); __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 7]; if (threadIdx.x < col_size) { block_boxes[threadIdx.x * 7 + 0] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 0]; block_boxes[threadIdx.x * 7 + 1] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 1]; block_boxes[threadIdx.x * 7 + 2] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 2]; block_boxes[threadIdx.x * 7 + 3] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 3]; block_boxes[threadIdx.x * 7 + 4] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 4]; block_boxes[threadIdx.x * 7 + 5] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 5]; block_boxes[threadIdx.x * 7 + 6] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 6]; } __syncthreads(); if (threadIdx.x < row_size) { const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x; const float *cur_box = boxes + cur_box_idx * 7; int i = 0; unsigned long long t = 0; int start = 0; if (row_start == col_start) { start = threadIdx.x + 1; } for (i = start; i < col_size; i++) { if (iou_bev(cur_box, block_boxes + i * 7) > nms_overlap_thresh){ t |= 1ULL << i; } } const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS); mask[cur_box_idx * col_blocks + col_start] = t; } } __device__ inline float iou_normal(float const * const a, float const * const b) { //params: a: [x, y, z, dx, dy, dz, heading] //params: b: [x, y, z, dx, dy, dz, heading] float left = fmaxf(a[0] - a[3] / 2, b[0] - b[3] / 2), right = fminf(a[0] + a[3] / 2, b[0] + b[3] / 2); float top = fmaxf(a[1] - a[4] / 2, b[1] - b[4] / 2), bottom = fminf(a[1] + a[4] / 2, b[1] + b[4] / 2); float width = fmaxf(right - left, 0.f), height = fmaxf(bottom - top, 0.f); float interS = width * height; float Sa = a[3] * a[4]; float Sb = b[3] * b[4]; return interS / fmaxf(Sa + Sb - interS, EPS); } __global__ void nms_normal_kernel(const int boxes_num, const float nms_overlap_thresh, const float *boxes, unsigned long long *mask){ //params: boxes (N, 7) [x, y, z, dx, dy, dz, heading] //params: mask (N, N/THREADS_PER_BLOCK_NMS) const int row_start = blockIdx.y; const int col_start = blockIdx.x; // if (row_start > col_start) return; const int row_size = fminf(boxes_num - row_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS); const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS); __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 7]; if (threadIdx.x < col_size) { block_boxes[threadIdx.x * 7 + 0] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 0]; block_boxes[threadIdx.x * 7 + 1] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 1]; block_boxes[threadIdx.x * 7 + 2] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 2]; block_boxes[threadIdx.x * 7 + 3] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 3]; block_boxes[threadIdx.x * 7 + 4] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 4]; block_boxes[threadIdx.x * 7 + 5] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 5]; block_boxes[threadIdx.x * 7 + 6] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 6]; } __syncthreads(); if (threadIdx.x < row_size) { const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x; const float *cur_box = boxes + cur_box_idx * 7; int i = 0; unsigned long long t = 0; int start = 0; if (row_start == col_start) { start = threadIdx.x + 1; } for (i = start; i < col_size; i++) { if (iou_normal(cur_box, block_boxes + i * 7) > nms_overlap_thresh){ t |= 1ULL << i; } } const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS); mask[cur_box_idx * col_blocks + col_start] = t; } } void boxesoverlapLauncher(const int num_a, const float *boxes_a, const int num_b, const float *boxes_b, float *ans_overlap){ dim3 blocks(DIVUP(num_b, THREADS_PER_BLOCK), DIVUP(num_a, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK, THREADS_PER_BLOCK); boxes_overlap_kernel<<>>(num_a, boxes_a, num_b, boxes_b, ans_overlap); #ifdef DEBUG cudaDeviceSynchronize(); // for using printf in kernel function #endif } void boxesioubevLauncher(const int num_a, const float *boxes_a, const int num_b, const float *boxes_b, float *ans_iou){ dim3 blocks(DIVUP(num_b, THREADS_PER_BLOCK), DIVUP(num_a, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK, THREADS_PER_BLOCK); boxes_iou_bev_kernel<<>>(num_a, boxes_a, num_b, boxes_b, ans_iou); #ifdef DEBUG cudaDeviceSynchronize(); // for using printf in kernel function #endif } void nmsLauncher(const float *boxes, unsigned long long * mask, int boxes_num, float nms_overlap_thresh){ dim3 blocks(DIVUP(boxes_num, THREADS_PER_BLOCK_NMS), DIVUP(boxes_num, THREADS_PER_BLOCK_NMS)); dim3 threads(THREADS_PER_BLOCK_NMS); nms_kernel<<>>(boxes_num, nms_overlap_thresh, boxes, mask); } void nmsNormalLauncher(const float *boxes, unsigned long long * mask, int boxes_num, float nms_overlap_thresh){ dim3 blocks(DIVUP(boxes_num, THREADS_PER_BLOCK_NMS), DIVUP(boxes_num, THREADS_PER_BLOCK_NMS)); dim3 threads(THREADS_PER_BLOCK_NMS); nms_normal_kernel<<>>(boxes_num, nms_overlap_thresh, boxes, mask); } ================================================ FILE: pcdet/ops/pointnet2/pointnet2_batch/pointnet2_modules.py ================================================ from typing import List import torch import torch.nn as nn import torch.nn.functional as F from . import pointnet2_utils class _PointnetSAModuleBase(nn.Module): def __init__(self): super().__init__() self.npoint = None self.groupers = None self.mlps = None self.pool_method = 'max_pool' def forward(self, xyz: torch.Tensor, features: torch.Tensor = None, new_xyz=None) -> (torch.Tensor, torch.Tensor): """ :param xyz: (B, N, 3) tensor of the xyz coordinates of the features :param features: (B, N, C) tensor of the descriptors of the the features :param new_xyz: :return: new_xyz: (B, npoint, 3) tensor of the new features' xyz new_features: (B, npoint, \sum_k(mlps[k][-1])) tensor of the new_features descriptors """ new_features_list = [] xyz_flipped = xyz.transpose(1, 2).contiguous() if new_xyz is None: new_xyz = pointnet2_utils.gather_operation( xyz_flipped, pointnet2_utils.furthest_point_sample(xyz, self.npoint) ).transpose(1, 2).contiguous() if self.npoint is not None else None for i in range(len(self.groupers)): new_features = self.groupers[i](xyz, new_xyz, features) # (B, C, npoint, nsample) new_features = self.mlps[i](new_features) # (B, mlp[-1], npoint, nsample) if self.pool_method == 'max_pool': new_features = F.max_pool2d( new_features, kernel_size=[1, new_features.size(3)] ) # (B, mlp[-1], npoint, 1) elif self.pool_method == 'avg_pool': new_features = F.avg_pool2d( new_features, kernel_size=[1, new_features.size(3)] ) # (B, mlp[-1], npoint, 1) else: raise NotImplementedError new_features = new_features.squeeze(-1) # (B, mlp[-1], npoint) new_features_list.append(new_features) return new_xyz, torch.cat(new_features_list, dim=1) class PointnetSAModuleMSG(_PointnetSAModuleBase): """Pointnet set abstraction layer with multiscale grouping""" def __init__(self, *, npoint: int, radii: List[float], nsamples: List[int], mlps: List[List[int]], bn: bool = True, use_xyz: bool = True, pool_method='max_pool'): """ :param npoint: int :param radii: list of float, list of radii to group with :param nsamples: list of int, number of samples in each ball query :param mlps: list of list of int, spec of the pointnet before the global pooling for each scale :param bn: whether to use batchnorm :param use_xyz: :param pool_method: max_pool / avg_pool """ super().__init__() assert len(radii) == len(nsamples) == len(mlps) self.npoint = npoint self.groupers = nn.ModuleList() self.mlps = nn.ModuleList() for i in range(len(radii)): radius = radii[i] nsample = nsamples[i] self.groupers.append( pointnet2_utils.QueryAndGroup(radius, nsample, use_xyz=use_xyz) if npoint is not None else pointnet2_utils.GroupAll(use_xyz) ) mlp_spec = mlps[i] if use_xyz: mlp_spec[0] += 3 shared_mlps = [] for k in range(len(mlp_spec) - 1): shared_mlps.extend([ nn.Conv2d(mlp_spec[k], mlp_spec[k + 1], kernel_size=1, bias=False), nn.BatchNorm2d(mlp_spec[k + 1]), nn.ReLU() ]) self.mlps.append(nn.Sequential(*shared_mlps)) self.pool_method = pool_method class PointnetSAModule(PointnetSAModuleMSG): """Pointnet set abstraction layer""" def __init__(self, *, mlp: List[int], npoint: int = None, radius: float = None, nsample: int = None, bn: bool = True, use_xyz: bool = True, pool_method='max_pool'): """ :param mlp: list of int, spec of the pointnet before the global max_pool :param npoint: int, number of features :param radius: float, radius of ball :param nsample: int, number of samples in the ball query :param bn: whether to use batchnorm :param use_xyz: :param pool_method: max_pool / avg_pool """ super().__init__( mlps=[mlp], npoint=npoint, radii=[radius], nsamples=[nsample], bn=bn, use_xyz=use_xyz, pool_method=pool_method ) class PointnetFPModule(nn.Module): r"""Propigates the features of one set to another""" def __init__(self, *, mlp: List[int], bn: bool = True): """ :param mlp: list of int :param bn: whether to use batchnorm """ super().__init__() shared_mlps = [] for k in range(len(mlp) - 1): shared_mlps.extend([ nn.Conv2d(mlp[k], mlp[k + 1], kernel_size=1, bias=False), nn.BatchNorm2d(mlp[k + 1]), nn.ReLU() ]) self.mlp = nn.Sequential(*shared_mlps) def forward( self, unknown: torch.Tensor, known: torch.Tensor, unknow_feats: torch.Tensor, known_feats: torch.Tensor ) -> torch.Tensor: """ :param unknown: (B, n, 3) tensor of the xyz positions of the unknown features :param known: (B, m, 3) tensor of the xyz positions of the known features :param unknow_feats: (B, C1, n) tensor of the features to be propigated to :param known_feats: (B, C2, m) tensor of features to be propigated :return: new_features: (B, mlp[-1], n) tensor of the features of the unknown features """ if known is not None: dist, idx = pointnet2_utils.three_nn(unknown, known) dist_recip = 1.0 / (dist + 1e-8) norm = torch.sum(dist_recip, dim=2, keepdim=True) weight = dist_recip / norm interpolated_feats = pointnet2_utils.three_interpolate(known_feats, idx, weight) else: interpolated_feats = known_feats.expand(*known_feats.size()[0:2], unknown.size(1)) if unknow_feats is not None: new_features = torch.cat([interpolated_feats, unknow_feats], dim=1) # (B, C2 + C1, n) else: new_features = interpolated_feats new_features = new_features.unsqueeze(-1) new_features = self.mlp(new_features) return new_features.squeeze(-1) if __name__ == "__main__": pass ================================================ FILE: pcdet/ops/pointnet2/pointnet2_batch/pointnet2_utils.py ================================================ from typing import Tuple import torch import torch.nn as nn from torch.autograd import Function, Variable from . import pointnet2_batch_cuda as pointnet2 class FurthestPointSampling(Function): @staticmethod def forward(ctx, xyz: torch.Tensor, npoint: int) -> torch.Tensor: """ Uses iterative furthest point sampling to select a set of npoint features that have the largest minimum distance :param ctx: :param xyz: (B, N, 3) where N > npoint :param npoint: int, number of features in the sampled set :return: output: (B, npoint) tensor containing the set """ assert xyz.is_contiguous() B, N, _ = xyz.size() output = torch.cuda.IntTensor(B, npoint) temp = torch.cuda.FloatTensor(B, N).fill_(1e10) pointnet2.furthest_point_sampling_wrapper(B, N, npoint, xyz, temp, output) return output @staticmethod def backward(xyz, a=None): return None, None furthest_point_sample = FurthestPointSampling.apply class GatherOperation(Function): @staticmethod def forward(ctx, features: torch.Tensor, idx: torch.Tensor) -> torch.Tensor: """ :param ctx: :param features: (B, C, N) :param idx: (B, npoint) index tensor of the features to gather :return: output: (B, C, npoint) """ assert features.is_contiguous() assert idx.is_contiguous() B, npoint = idx.size() _, C, N = features.size() output = torch.cuda.FloatTensor(B, C, npoint) pointnet2.gather_points_wrapper(B, C, N, npoint, features, idx, output) ctx.for_backwards = (idx, C, N) return output @staticmethod def backward(ctx, grad_out): idx, C, N = ctx.for_backwards B, npoint = idx.size() grad_features = Variable(torch.cuda.FloatTensor(B, C, N).zero_()) grad_out_data = grad_out.data.contiguous() pointnet2.gather_points_grad_wrapper(B, C, N, npoint, grad_out_data, idx, grad_features.data) return grad_features, None gather_operation = GatherOperation.apply class ThreeNN(Function): @staticmethod def forward(ctx, unknown: torch.Tensor, known: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: """ Find the three nearest neighbors of unknown in known :param ctx: :param unknown: (B, N, 3) :param known: (B, M, 3) :return: dist: (B, N, 3) l2 distance to the three nearest neighbors idx: (B, N, 3) index of 3 nearest neighbors """ assert unknown.is_contiguous() assert known.is_contiguous() B, N, _ = unknown.size() m = known.size(1) dist2 = torch.cuda.FloatTensor(B, N, 3) idx = torch.cuda.IntTensor(B, N, 3) pointnet2.three_nn_wrapper(B, N, m, unknown, known, dist2, idx) return torch.sqrt(dist2), idx @staticmethod def backward(ctx, a=None, b=None): return None, None three_nn = ThreeNN.apply class ThreeInterpolate(Function): @staticmethod def forward(ctx, features: torch.Tensor, idx: torch.Tensor, weight: torch.Tensor) -> torch.Tensor: """ Performs weight linear interpolation on 3 features :param ctx: :param features: (B, C, M) Features descriptors to be interpolated from :param idx: (B, n, 3) three nearest neighbors of the target features in features :param weight: (B, n, 3) weights :return: output: (B, C, N) tensor of the interpolated features """ assert features.is_contiguous() assert idx.is_contiguous() assert weight.is_contiguous() B, c, m = features.size() n = idx.size(1) ctx.three_interpolate_for_backward = (idx, weight, m) output = torch.cuda.FloatTensor(B, c, n) pointnet2.three_interpolate_wrapper(B, c, m, n, features, idx, weight, output) return output @staticmethod def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """ :param ctx: :param grad_out: (B, C, N) tensor with gradients of outputs :return: grad_features: (B, C, M) tensor with gradients of features None: None: """ idx, weight, m = ctx.three_interpolate_for_backward B, c, n = grad_out.size() grad_features = Variable(torch.cuda.FloatTensor(B, c, m).zero_()) grad_out_data = grad_out.data.contiguous() pointnet2.three_interpolate_grad_wrapper(B, c, n, m, grad_out_data, idx, weight, grad_features.data) return grad_features, None, None three_interpolate = ThreeInterpolate.apply class GroupingOperation(Function): @staticmethod def forward(ctx, features: torch.Tensor, idx: torch.Tensor) -> torch.Tensor: """ :param ctx: :param features: (B, C, N) tensor of features to group :param idx: (B, npoint, nsample) tensor containing the indicies of features to group with :return: output: (B, C, npoint, nsample) tensor """ assert features.is_contiguous() assert idx.is_contiguous() B, nfeatures, nsample = idx.size() _, C, N = features.size() output = torch.cuda.FloatTensor(B, C, nfeatures, nsample) pointnet2.group_points_wrapper(B, C, N, nfeatures, nsample, features, idx, output) ctx.for_backwards = (idx, N) return output @staticmethod def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: """ :param ctx: :param grad_out: (B, C, npoint, nsample) tensor of the gradients of the output from forward :return: grad_features: (B, C, N) gradient of the features """ idx, N = ctx.for_backwards B, C, npoint, nsample = grad_out.size() grad_features = Variable(torch.cuda.FloatTensor(B, C, N).zero_()) grad_out_data = grad_out.data.contiguous() pointnet2.group_points_grad_wrapper(B, C, N, npoint, nsample, grad_out_data, idx, grad_features.data) return grad_features, None grouping_operation = GroupingOperation.apply class BallQuery(Function): @staticmethod def forward(ctx, radius: float, nsample: int, xyz: torch.Tensor, new_xyz: torch.Tensor) -> torch.Tensor: """ :param ctx: :param radius: float, radius of the balls :param nsample: int, maximum number of features in the balls :param xyz: (B, N, 3) xyz coordinates of the features :param new_xyz: (B, npoint, 3) centers of the ball query :return: idx: (B, npoint, nsample) tensor with the indicies of the features that form the query balls """ assert new_xyz.is_contiguous() assert xyz.is_contiguous() B, N, _ = xyz.size() npoint = new_xyz.size(1) idx = torch.cuda.IntTensor(B, npoint, nsample).zero_() pointnet2.ball_query_wrapper(B, N, npoint, radius, nsample, new_xyz, xyz, idx) return idx @staticmethod def backward(ctx, a=None): return None, None, None, None ball_query = BallQuery.apply class QueryAndGroup(nn.Module): def __init__(self, radius: float, nsample: int, use_xyz: bool = True): """ :param radius: float, radius of ball :param nsample: int, maximum number of features to gather in the ball :param use_xyz: """ super().__init__() self.radius, self.nsample, self.use_xyz = radius, nsample, use_xyz def forward(self, xyz: torch.Tensor, new_xyz: torch.Tensor, features: torch.Tensor = None) -> Tuple[torch.Tensor]: """ :param xyz: (B, N, 3) xyz coordinates of the features :param new_xyz: (B, npoint, 3) centroids :param features: (B, C, N) descriptors of the features :return: new_features: (B, 3 + C, npoint, nsample) """ idx = ball_query(self.radius, self.nsample, xyz, new_xyz) xyz_trans = xyz.transpose(1, 2).contiguous() grouped_xyz = grouping_operation(xyz_trans, idx) # (B, 3, npoint, nsample) grouped_xyz -= new_xyz.transpose(1, 2).unsqueeze(-1) if features is not None: grouped_features = grouping_operation(features, idx) if self.use_xyz: new_features = torch.cat([grouped_xyz, grouped_features], dim=1) # (B, C + 3, npoint, nsample) else: new_features = grouped_features else: assert self.use_xyz, "Cannot have not features and not use xyz as a feature!" new_features = grouped_xyz return new_features class GroupAll(nn.Module): def __init__(self, use_xyz: bool = True): super().__init__() self.use_xyz = use_xyz def forward(self, xyz: torch.Tensor, new_xyz: torch.Tensor, features: torch.Tensor = None): """ :param xyz: (B, N, 3) xyz coordinates of the features :param new_xyz: ignored :param features: (B, C, N) descriptors of the features :return: new_features: (B, C + 3, 1, N) """ grouped_xyz = xyz.transpose(1, 2).unsqueeze(2) if features is not None: grouped_features = features.unsqueeze(2) if self.use_xyz: new_features = torch.cat([grouped_xyz, grouped_features], dim=1) # (B, 3 + C, 1, N) else: new_features = grouped_features else: new_features = grouped_xyz return new_features ================================================ FILE: pcdet/ops/pointnet2/pointnet2_batch/src/ball_query.cpp ================================================ /* batch version of ball query, modified from the original implementation of official PointNet++ codes. Written by Shaoshuai Shi All Rights Reserved 2018. */ #include #include #include #include #include #include "ball_query_gpu.h" extern THCState *state; #define CHECK_CUDA(x) do { \ if (!x.type().is_cuda()) { \ fprintf(stderr, "%s must be CUDA tensor at %s:%d\n", #x, __FILE__, __LINE__); \ exit(-1); \ } \ } while (0) #define CHECK_CONTIGUOUS(x) do { \ if (!x.is_contiguous()) { \ fprintf(stderr, "%s must be contiguous tensor at %s:%d\n", #x, __FILE__, __LINE__); \ exit(-1); \ } \ } while (0) #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) int ball_query_wrapper_fast(int b, int n, int m, float radius, int nsample, at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor) { CHECK_INPUT(new_xyz_tensor); CHECK_INPUT(xyz_tensor); const float *new_xyz = new_xyz_tensor.data(); const float *xyz = xyz_tensor.data(); int *idx = idx_tensor.data(); ball_query_kernel_launcher_fast(b, n, m, radius, nsample, new_xyz, xyz, idx); return 1; } ================================================ FILE: pcdet/ops/pointnet2/pointnet2_batch/src/ball_query_gpu.cu ================================================ /* batch version of ball query, modified from the original implementation of official PointNet++ codes. Written by Shaoshuai Shi All Rights Reserved 2018. */ #include #include #include #include "ball_query_gpu.h" #include "cuda_utils.h" __global__ void ball_query_kernel_fast(int b, int n, int m, float radius, int nsample, const float *__restrict__ new_xyz, const float *__restrict__ xyz, int *__restrict__ idx) { // new_xyz: (B, M, 3) // xyz: (B, N, 3) // output: // idx: (B, M, nsample) int bs_idx = blockIdx.y; int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; if (bs_idx >= b || pt_idx >= m) return; new_xyz += bs_idx * m * 3 + pt_idx * 3; xyz += bs_idx * n * 3; idx += bs_idx * m * nsample + pt_idx * nsample; float radius2 = radius * radius; float new_x = new_xyz[0]; float new_y = new_xyz[1]; float new_z = new_xyz[2]; int cnt = 0; for (int k = 0; k < n; ++k) { float x = xyz[k * 3 + 0]; float y = xyz[k * 3 + 1]; float z = xyz[k * 3 + 2]; float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); if (d2 < radius2){ if (cnt == 0){ for (int l = 0; l < nsample; ++l) { idx[l] = k; } } idx[cnt] = k; ++cnt; if (cnt >= nsample) break; } } } void ball_query_kernel_launcher_fast(int b, int n, int m, float radius, int nsample, \ const float *new_xyz, const float *xyz, int *idx) { // new_xyz: (B, M, 3) // xyz: (B, N, 3) // output: // idx: (B, M, nsample) cudaError_t err; dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), b); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); ball_query_kernel_fast<<>>(b, n, m, radius, nsample, new_xyz, xyz, idx); // cudaDeviceSynchronize(); // for using printf in kernel function err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } ================================================ FILE: pcdet/ops/pointnet2/pointnet2_batch/src/ball_query_gpu.h ================================================ #ifndef _BALL_QUERY_GPU_H #define _BALL_QUERY_GPU_H #include #include #include #include int ball_query_wrapper_fast(int b, int n, int m, float radius, int nsample, at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor); void ball_query_kernel_launcher_fast(int b, int n, int m, float radius, int nsample, const float *xyz, const float *new_xyz, int *idx); #endif ================================================ FILE: pcdet/ops/pointnet2/pointnet2_batch/src/cuda_utils.h ================================================ #ifndef _CUDA_UTILS_H #define _CUDA_UTILS_H #include #define TOTAL_THREADS 1024 #define THREADS_PER_BLOCK 256 #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) inline int opt_n_threads(int work_size) { const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); return max(min(1 << pow_2, TOTAL_THREADS), 1); } #endif ================================================ FILE: pcdet/ops/pointnet2/pointnet2_batch/src/group_points.cpp ================================================ /* batch version of point grouping, modified from the original implementation of official PointNet++ codes. Written by Shaoshuai Shi All Rights Reserved 2018. */ #include #include #include #include #include #include "group_points_gpu.h" extern THCState *state; int group_points_grad_wrapper_fast(int b, int c, int n, int npoints, int nsample, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor) { float *grad_points = grad_points_tensor.data(); const int *idx = idx_tensor.data(); const float *grad_out = grad_out_tensor.data(); group_points_grad_kernel_launcher_fast(b, c, n, npoints, nsample, grad_out, idx, grad_points); return 1; } int group_points_wrapper_fast(int b, int c, int n, int npoints, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor) { const float *points = points_tensor.data(); const int *idx = idx_tensor.data(); float *out = out_tensor.data(); group_points_kernel_launcher_fast(b, c, n, npoints, nsample, points, idx, out); return 1; } ================================================ FILE: pcdet/ops/pointnet2/pointnet2_batch/src/group_points_gpu.cu ================================================ /* batch version of point grouping, modified from the original implementation of official PointNet++ codes. Written by Shaoshuai Shi All Rights Reserved 2018. */ #include #include #include "cuda_utils.h" #include "group_points_gpu.h" __global__ void group_points_grad_kernel_fast(int b, int c, int n, int npoints, int nsample, const float *__restrict__ grad_out, const int *__restrict__ idx, float *__restrict__ grad_points) { // grad_out: (B, C, npoints, nsample) // idx: (B, npoints, nsample) // output: // grad_points: (B, C, N) int bs_idx = blockIdx.z; int c_idx = blockIdx.y; int index = blockIdx.x * blockDim.x + threadIdx.x; int pt_idx = index / nsample; if (bs_idx >= b || c_idx >= c || pt_idx >= npoints) return; int sample_idx = index % nsample; grad_out += bs_idx * c * npoints * nsample + c_idx * npoints * nsample + pt_idx * nsample + sample_idx; idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx; atomicAdd(grad_points + bs_idx * c * n + c_idx * n + idx[0] , grad_out[0]); } void group_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample, const float *grad_out, const int *idx, float *grad_points) { // grad_out: (B, C, npoints, nsample) // idx: (B, npoints, nsample) // output: // grad_points: (B, C, N) cudaError_t err; dim3 blocks(DIVUP(npoints * nsample, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); group_points_grad_kernel_fast<<>>(b, c, n, npoints, nsample, grad_out, idx, grad_points); err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } __global__ void group_points_kernel_fast(int b, int c, int n, int npoints, int nsample, const float *__restrict__ points, const int *__restrict__ idx, float *__restrict__ out) { // points: (B, C, N) // idx: (B, npoints, nsample) // output: // out: (B, C, npoints, nsample) int bs_idx = blockIdx.z; int c_idx = blockIdx.y; int index = blockIdx.x * blockDim.x + threadIdx.x; int pt_idx = index / nsample; if (bs_idx >= b || c_idx >= c || pt_idx >= npoints) return; int sample_idx = index % nsample; idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx; int in_idx = bs_idx * c * n + c_idx * n + idx[0]; int out_idx = bs_idx * c * npoints * nsample + c_idx * npoints * nsample + pt_idx * nsample + sample_idx; out[out_idx] = points[in_idx]; } void group_points_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample, const float *points, const int *idx, float *out) { // points: (B, C, N) // idx: (B, npoints, nsample) // output: // out: (B, C, npoints, nsample) cudaError_t err; dim3 blocks(DIVUP(npoints * nsample, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); group_points_kernel_fast<<>>(b, c, n, npoints, nsample, points, idx, out); // cudaDeviceSynchronize(); // for using printf in kernel function err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } ================================================ FILE: pcdet/ops/pointnet2/pointnet2_batch/src/group_points_gpu.h ================================================ #ifndef _GROUP_POINTS_GPU_H #define _GROUP_POINTS_GPU_H #include #include #include #include int group_points_wrapper_fast(int b, int c, int n, int npoints, int nsample, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor); void group_points_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample, const float *points, const int *idx, float *out); int group_points_grad_wrapper_fast(int b, int c, int n, int npoints, int nsample, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor); void group_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample, const float *grad_out, const int *idx, float *grad_points); #endif ================================================ FILE: pcdet/ops/pointnet2/pointnet2_batch/src/interpolate.cpp ================================================ /* batch version of point interpolation, modified from the original implementation of official PointNet++ codes. Written by Shaoshuai Shi All Rights Reserved 2018. */ #include #include #include #include #include #include #include #include #include "interpolate_gpu.h" extern THCState *state; void three_nn_wrapper_fast(int b, int n, int m, at::Tensor unknown_tensor, at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor) { const float *unknown = unknown_tensor.data(); const float *known = known_tensor.data(); float *dist2 = dist2_tensor.data(); int *idx = idx_tensor.data(); three_nn_kernel_launcher_fast(b, n, m, unknown, known, dist2, idx); } void three_interpolate_wrapper_fast(int b, int c, int m, int n, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor) { const float *points = points_tensor.data(); const float *weight = weight_tensor.data(); float *out = out_tensor.data(); const int *idx = idx_tensor.data(); three_interpolate_kernel_launcher_fast(b, c, m, n, points, idx, weight, out); } void three_interpolate_grad_wrapper_fast(int b, int c, int n, int m, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_points_tensor) { const float *grad_out = grad_out_tensor.data(); const float *weight = weight_tensor.data(); float *grad_points = grad_points_tensor.data(); const int *idx = idx_tensor.data(); three_interpolate_grad_kernel_launcher_fast(b, c, n, m, grad_out, idx, weight, grad_points); } ================================================ FILE: pcdet/ops/pointnet2/pointnet2_batch/src/interpolate_gpu.cu ================================================ /* batch version of point interpolation, modified from the original implementation of official PointNet++ codes. Written by Shaoshuai Shi All Rights Reserved 2018. */ #include #include #include #include "cuda_utils.h" #include "interpolate_gpu.h" __global__ void three_nn_kernel_fast(int b, int n, int m, const float *__restrict__ unknown, const float *__restrict__ known, float *__restrict__ dist2, int *__restrict__ idx) { // unknown: (B, N, 3) // known: (B, M, 3) // output: // dist2: (B, N, 3) // idx: (B, N, 3) int bs_idx = blockIdx.y; int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; if (bs_idx >= b || pt_idx >= n) return; unknown += bs_idx * n * 3 + pt_idx * 3; known += bs_idx * m * 3; dist2 += bs_idx * n * 3 + pt_idx * 3; idx += bs_idx * n * 3 + pt_idx * 3; float ux = unknown[0]; float uy = unknown[1]; float uz = unknown[2]; double best1 = 1e40, best2 = 1e40, best3 = 1e40; int besti1 = 0, besti2 = 0, besti3 = 0; for (int k = 0; k < m; ++k) { float x = known[k * 3 + 0]; float y = known[k * 3 + 1]; float z = known[k * 3 + 2]; float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z); if (d < best1) { best3 = best2; besti3 = besti2; best2 = best1; besti2 = besti1; best1 = d; besti1 = k; } else if (d < best2) { best3 = best2; besti3 = besti2; best2 = d; besti2 = k; } else if (d < best3) { best3 = d; besti3 = k; } } dist2[0] = best1; dist2[1] = best2; dist2[2] = best3; idx[0] = besti1; idx[1] = besti2; idx[2] = besti3; } void three_nn_kernel_launcher_fast(int b, int n, int m, const float *unknown, const float *known, float *dist2, int *idx) { // unknown: (B, N, 3) // known: (B, M, 3) // output: // dist2: (B, N, 3) // idx: (B, N, 3) cudaError_t err; dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), b); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); three_nn_kernel_fast<<>>(b, n, m, unknown, known, dist2, idx); err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } __global__ void three_interpolate_kernel_fast(int b, int c, int m, int n, const float *__restrict__ points, const int *__restrict__ idx, const float *__restrict__ weight, float *__restrict__ out) { // points: (B, C, M) // idx: (B, N, 3) // weight: (B, N, 3) // output: // out: (B, C, N) int bs_idx = blockIdx.z; int c_idx = blockIdx.y; int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; if (bs_idx >= b || c_idx >= c || pt_idx >= n) return; weight += bs_idx * n * 3 + pt_idx * 3; points += bs_idx * c * m + c_idx * m; idx += bs_idx * n * 3 + pt_idx * 3; out += bs_idx * c * n + c_idx * n; out[pt_idx] = weight[0] * points[idx[0]] + weight[1] * points[idx[1]] + weight[2] * points[idx[2]]; } void three_interpolate_kernel_launcher_fast(int b, int c, int m, int n, const float *points, const int *idx, const float *weight, float *out) { // points: (B, C, M) // idx: (B, N, 3) // weight: (B, N, 3) // output: // out: (B, C, N) cudaError_t err; dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); three_interpolate_kernel_fast<<>>(b, c, m, n, points, idx, weight, out); err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } __global__ void three_interpolate_grad_kernel_fast(int b, int c, int n, int m, const float *__restrict__ grad_out, const int *__restrict__ idx, const float *__restrict__ weight, float *__restrict__ grad_points) { // grad_out: (B, C, N) // weight: (B, N, 3) // output: // grad_points: (B, C, M) int bs_idx = blockIdx.z; int c_idx = blockIdx.y; int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; if (bs_idx >= b || c_idx >= c || pt_idx >= n) return; grad_out += bs_idx * c * n + c_idx * n + pt_idx; weight += bs_idx * n * 3 + pt_idx * 3; grad_points += bs_idx * c * m + c_idx * m; idx += bs_idx * n * 3 + pt_idx * 3; atomicAdd(grad_points + idx[0], grad_out[0] * weight[0]); atomicAdd(grad_points + idx[1], grad_out[0] * weight[1]); atomicAdd(grad_points + idx[2], grad_out[0] * weight[2]); } void three_interpolate_grad_kernel_launcher_fast(int b, int c, int n, int m, const float *grad_out, const int *idx, const float *weight, float *grad_points) { // grad_out: (B, C, N) // weight: (B, N, 3) // output: // grad_points: (B, C, M) cudaError_t err; dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); three_interpolate_grad_kernel_fast<<>>(b, c, n, m, grad_out, idx, weight, grad_points); err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } ================================================ FILE: pcdet/ops/pointnet2/pointnet2_batch/src/interpolate_gpu.h ================================================ #ifndef _INTERPOLATE_GPU_H #define _INTERPOLATE_GPU_H #include #include #include #include void three_nn_wrapper_fast(int b, int n, int m, at::Tensor unknown_tensor, at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor); void three_nn_kernel_launcher_fast(int b, int n, int m, const float *unknown, const float *known, float *dist2, int *idx); void three_interpolate_wrapper_fast(int b, int c, int m, int n, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor); void three_interpolate_kernel_launcher_fast(int b, int c, int m, int n, const float *points, const int *idx, const float *weight, float *out); void three_interpolate_grad_wrapper_fast(int b, int c, int n, int m, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_points_tensor); void three_interpolate_grad_kernel_launcher_fast(int b, int c, int n, int m, const float *grad_out, const int *idx, const float *weight, float *grad_points); #endif ================================================ FILE: pcdet/ops/pointnet2/pointnet2_batch/src/pointnet2_api.cpp ================================================ #include #include #include "ball_query_gpu.h" #include "group_points_gpu.h" #include "sampling_gpu.h" #include "interpolate_gpu.h" PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("ball_query_wrapper", &ball_query_wrapper_fast, "ball_query_wrapper_fast"); m.def("group_points_wrapper", &group_points_wrapper_fast, "group_points_wrapper_fast"); m.def("group_points_grad_wrapper", &group_points_grad_wrapper_fast, "group_points_grad_wrapper_fast"); m.def("gather_points_wrapper", &gather_points_wrapper_fast, "gather_points_wrapper_fast"); m.def("gather_points_grad_wrapper", &gather_points_grad_wrapper_fast, "gather_points_grad_wrapper_fast"); m.def("furthest_point_sampling_wrapper", &furthest_point_sampling_wrapper, "furthest_point_sampling_wrapper"); m.def("three_nn_wrapper", &three_nn_wrapper_fast, "three_nn_wrapper_fast"); m.def("three_interpolate_wrapper", &three_interpolate_wrapper_fast, "three_interpolate_wrapper_fast"); m.def("three_interpolate_grad_wrapper", &three_interpolate_grad_wrapper_fast, "three_interpolate_grad_wrapper_fast"); } ================================================ FILE: pcdet/ops/pointnet2/pointnet2_batch/src/sampling.cpp ================================================ /* batch version of point sampling and gathering, modified from the original implementation of official PointNet++ codes. Written by Shaoshuai Shi All Rights Reserved 2018. */ #include #include #include #include #include "sampling_gpu.h" extern THCState *state; int gather_points_wrapper_fast(int b, int c, int n, int npoints, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor){ const float *points = points_tensor.data(); const int *idx = idx_tensor.data(); float *out = out_tensor.data(); gather_points_kernel_launcher_fast(b, c, n, npoints, points, idx, out); return 1; } int gather_points_grad_wrapper_fast(int b, int c, int n, int npoints, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor) { const float *grad_out = grad_out_tensor.data(); const int *idx = idx_tensor.data(); float *grad_points = grad_points_tensor.data(); gather_points_grad_kernel_launcher_fast(b, c, n, npoints, grad_out, idx, grad_points); return 1; } int furthest_point_sampling_wrapper(int b, int n, int m, at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor) { const float *points = points_tensor.data(); float *temp = temp_tensor.data(); int *idx = idx_tensor.data(); furthest_point_sampling_kernel_launcher(b, n, m, points, temp, idx); return 1; } ================================================ FILE: pcdet/ops/pointnet2/pointnet2_batch/src/sampling_gpu.cu ================================================ /* batch version of point sampling and gathering, modified from the original implementation of official PointNet++ codes. Written by Shaoshuai Shi All Rights Reserved 2018. */ #include #include #include "cuda_utils.h" #include "sampling_gpu.h" __global__ void gather_points_kernel_fast(int b, int c, int n, int m, const float *__restrict__ points, const int *__restrict__ idx, float *__restrict__ out) { // points: (B, C, N) // idx: (B, M) // output: // out: (B, C, M) int bs_idx = blockIdx.z; int c_idx = blockIdx.y; int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; if (bs_idx >= b || c_idx >= c || pt_idx >= m) return; out += bs_idx * c * m + c_idx * m + pt_idx; idx += bs_idx * m + pt_idx; points += bs_idx * c * n + c_idx * n; out[0] = points[idx[0]]; } void gather_points_kernel_launcher_fast(int b, int c, int n, int npoints, const float *points, const int *idx, float *out) { // points: (B, C, N) // idx: (B, npoints) // output: // out: (B, C, npoints) cudaError_t err; dim3 blocks(DIVUP(npoints, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); gather_points_kernel_fast<<>>(b, c, n, npoints, points, idx, out); err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } __global__ void gather_points_grad_kernel_fast(int b, int c, int n, int m, const float *__restrict__ grad_out, const int *__restrict__ idx, float *__restrict__ grad_points) { // grad_out: (B, C, M) // idx: (B, M) // output: // grad_points: (B, C, N) int bs_idx = blockIdx.z; int c_idx = blockIdx.y; int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; if (bs_idx >= b || c_idx >= c || pt_idx >= m) return; grad_out += bs_idx * c * m + c_idx * m + pt_idx; idx += bs_idx * m + pt_idx; grad_points += bs_idx * c * n + c_idx * n; atomicAdd(grad_points + idx[0], grad_out[0]); } void gather_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, const float *grad_out, const int *idx, float *grad_points) { // grad_out: (B, C, npoints) // idx: (B, npoints) // output: // grad_points: (B, C, N) cudaError_t err; dim3 blocks(DIVUP(npoints, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); gather_points_grad_kernel_fast<<>>(b, c, n, npoints, grad_out, idx, grad_points); err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } __device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i, int idx1, int idx2){ const float v1 = dists[idx1], v2 = dists[idx2]; const int i1 = dists_i[idx1], i2 = dists_i[idx2]; dists[idx1] = max(v1, v2); dists_i[idx1] = v2 > v1 ? i2 : i1; } template __global__ void furthest_point_sampling_kernel(int b, int n, int m, const float *__restrict__ dataset, float *__restrict__ temp, int *__restrict__ idxs) { // dataset: (B, N, 3) // tmp: (B, N) // output: // idx: (B, M) if (m <= 0) return; __shared__ float dists[block_size]; __shared__ int dists_i[block_size]; int batch_index = blockIdx.x; dataset += batch_index * n * 3; temp += batch_index * n; idxs += batch_index * m; int tid = threadIdx.x; const int stride = block_size; int old = 0; if (threadIdx.x == 0) idxs[0] = old; __syncthreads(); for (int j = 1; j < m; j++) { int besti = 0; float best = -1; float x1 = dataset[old * 3 + 0]; float y1 = dataset[old * 3 + 1]; float z1 = dataset[old * 3 + 2]; for (int k = tid; k < n; k += stride) { float x2, y2, z2; x2 = dataset[k * 3 + 0]; y2 = dataset[k * 3 + 1]; z2 = dataset[k * 3 + 2]; // float mag = (x2 * x2) + (y2 * y2) + (z2 * z2); // if (mag <= 1e-3) // continue; float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1); float d2 = min(d, temp[k]); temp[k] = d2; besti = d2 > best ? k : besti; best = d2 > best ? d2 : best; } dists[tid] = best; dists_i[tid] = besti; __syncthreads(); if (block_size >= 1024) { if (tid < 512) { __update(dists, dists_i, tid, tid + 512); } __syncthreads(); } if (block_size >= 512) { if (tid < 256) { __update(dists, dists_i, tid, tid + 256); } __syncthreads(); } if (block_size >= 256) { if (tid < 128) { __update(dists, dists_i, tid, tid + 128); } __syncthreads(); } if (block_size >= 128) { if (tid < 64) { __update(dists, dists_i, tid, tid + 64); } __syncthreads(); } if (block_size >= 64) { if (tid < 32) { __update(dists, dists_i, tid, tid + 32); } __syncthreads(); } if (block_size >= 32) { if (tid < 16) { __update(dists, dists_i, tid, tid + 16); } __syncthreads(); } if (block_size >= 16) { if (tid < 8) { __update(dists, dists_i, tid, tid + 8); } __syncthreads(); } if (block_size >= 8) { if (tid < 4) { __update(dists, dists_i, tid, tid + 4); } __syncthreads(); } if (block_size >= 4) { if (tid < 2) { __update(dists, dists_i, tid, tid + 2); } __syncthreads(); } if (block_size >= 2) { if (tid < 1) { __update(dists, dists_i, tid, tid + 1); } __syncthreads(); } old = dists_i[0]; if (tid == 0) idxs[j] = old; } } void furthest_point_sampling_kernel_launcher(int b, int n, int m, const float *dataset, float *temp, int *idxs) { // dataset: (B, N, 3) // tmp: (B, N) // output: // idx: (B, M) cudaError_t err; unsigned int n_threads = opt_n_threads(n); switch (n_threads) { case 1024: furthest_point_sampling_kernel<1024><<>>(b, n, m, dataset, temp, idxs); break; case 512: furthest_point_sampling_kernel<512><<>>(b, n, m, dataset, temp, idxs); break; case 256: furthest_point_sampling_kernel<256><<>>(b, n, m, dataset, temp, idxs); break; case 128: furthest_point_sampling_kernel<128><<>>(b, n, m, dataset, temp, idxs); break; case 64: furthest_point_sampling_kernel<64><<>>(b, n, m, dataset, temp, idxs); break; case 32: furthest_point_sampling_kernel<32><<>>(b, n, m, dataset, temp, idxs); break; case 16: furthest_point_sampling_kernel<16><<>>(b, n, m, dataset, temp, idxs); break; case 8: furthest_point_sampling_kernel<8><<>>(b, n, m, dataset, temp, idxs); break; case 4: furthest_point_sampling_kernel<4><<>>(b, n, m, dataset, temp, idxs); break; case 2: furthest_point_sampling_kernel<2><<>>(b, n, m, dataset, temp, idxs); break; case 1: furthest_point_sampling_kernel<1><<>>(b, n, m, dataset, temp, idxs); break; default: furthest_point_sampling_kernel<512><<>>(b, n, m, dataset, temp, idxs); } err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } ================================================ FILE: pcdet/ops/pointnet2/pointnet2_batch/src/sampling_gpu.h ================================================ #ifndef _SAMPLING_GPU_H #define _SAMPLING_GPU_H #include #include #include int gather_points_wrapper_fast(int b, int c, int n, int npoints, at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor); void gather_points_kernel_launcher_fast(int b, int c, int n, int npoints, const float *points, const int *idx, float *out); int gather_points_grad_wrapper_fast(int b, int c, int n, int npoints, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor); void gather_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, const float *grad_out, const int *idx, float *grad_points); int furthest_point_sampling_wrapper(int b, int n, int m, at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor); void furthest_point_sampling_kernel_launcher(int b, int n, int m, const float *dataset, float *temp, int *idxs); #endif ================================================ FILE: pcdet/ops/pointnet2/pointnet2_stack/pointnet2_modules.py ================================================ from typing import List import torch import torch.nn as nn import torch.nn.functional as F from . import pointnet2_utils def build_local_aggregation_module(input_channels, config): local_aggregation_name = config.get('NAME', 'StackSAModuleMSG') if local_aggregation_name == 'StackSAModuleMSG': mlps = config.MLPS for k in range(len(mlps)): mlps[k] = [input_channels] + mlps[k] cur_layer = StackSAModuleMSG( radii=config.POOL_RADIUS, nsamples=config.NSAMPLE, mlps=mlps, use_xyz=True, pool_method='max_pool', ) num_c_out = sum([x[-1] for x in mlps]) elif local_aggregation_name == 'VectorPoolAggregationModuleMSG': cur_layer = VectorPoolAggregationModuleMSG(input_channels=input_channels, config=config) num_c_out = config.MSG_POST_MLPS[-1] else: raise NotImplementedError return cur_layer, num_c_out class StackSAModuleMSG(nn.Module): def __init__(self, *, radii: List[float], nsamples: List[int], mlps: List[List[int]], use_xyz: bool = True, pool_method='max_pool'): """ Args: radii: list of float, list of radii to group with nsamples: list of int, number of samples in each ball query mlps: list of list of int, spec of the pointnet before the global pooling for each scale use_xyz: pool_method: max_pool / avg_pool """ super().__init__() assert len(radii) == len(nsamples) == len(mlps) self.groupers = nn.ModuleList() self.mlps = nn.ModuleList() for i in range(len(radii)): radius = radii[i] nsample = nsamples[i] self.groupers.append(pointnet2_utils.QueryAndGroup(radius, nsample, use_xyz=use_xyz)) mlp_spec = mlps[i] if use_xyz: mlp_spec[0] += 3 shared_mlps = [] for k in range(len(mlp_spec) - 1): shared_mlps.extend([ nn.Conv2d(mlp_spec[k], mlp_spec[k + 1], kernel_size=1, bias=False), nn.BatchNorm2d(mlp_spec[k + 1]), nn.ReLU() ]) self.mlps.append(nn.Sequential(*shared_mlps)) self.pool_method = pool_method self.init_weights() def init_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight) if m.bias is not None: nn.init.constant_(m.bias, 0) if isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1.0) nn.init.constant_(m.bias, 0) def forward(self, xyz, xyz_batch_cnt, new_xyz, new_xyz_batch_cnt, features=None, empty_voxel_set_zeros=True): """ :param xyz: (N1 + N2 ..., 3) tensor of the xyz coordinates of the features :param xyz_batch_cnt: (batch_size), [N1, N2, ...] :param new_xyz: (M1 + M2 ..., 3) :param new_xyz_batch_cnt: (batch_size), [M1, M2, ...] :param features: (N1 + N2 ..., C) tensor of the descriptors of the the features :return: new_xyz: (M1 + M2 ..., 3) tensor of the new features' xyz new_features: (M1 + M2 ..., \sum_k(mlps[k][-1])) tensor of the new_features descriptors """ new_features_list = [] for k in range(len(self.groupers)): new_features, ball_idxs = self.groupers[k]( xyz, xyz_batch_cnt, new_xyz, new_xyz_batch_cnt, features ) # (M1 + M2, C, nsample) new_features = new_features.permute(1, 0, 2).unsqueeze(dim=0) # (1, C, M1 + M2 ..., nsample) new_features = self.mlps[k](new_features) # (1, C, M1 + M2 ..., nsample) if self.pool_method == 'max_pool': new_features = F.max_pool2d( new_features, kernel_size=[1, new_features.size(3)] ).squeeze(dim=-1) # (1, C, M1 + M2 ...) elif self.pool_method == 'avg_pool': new_features = F.avg_pool2d( new_features, kernel_size=[1, new_features.size(3)] ).squeeze(dim=-1) # (1, C, M1 + M2 ...) else: raise NotImplementedError new_features = new_features.squeeze(dim=0).permute(1, 0) # (M1 + M2 ..., C) new_features_list.append(new_features) new_features = torch.cat(new_features_list, dim=1) # (M1 + M2 ..., C) return new_xyz, new_features class StackPointnetFPModule(nn.Module): def __init__(self, *, mlp: List[int]): """ Args: mlp: list of int """ super().__init__() shared_mlps = [] for k in range(len(mlp) - 1): shared_mlps.extend([ nn.Conv2d(mlp[k], mlp[k + 1], kernel_size=1, bias=False), nn.BatchNorm2d(mlp[k + 1]), nn.ReLU() ]) self.mlp = nn.Sequential(*shared_mlps) def forward(self, unknown, unknown_batch_cnt, known, known_batch_cnt, unknown_feats=None, known_feats=None): """ Args: unknown: (N1 + N2 ..., 3) known: (M1 + M2 ..., 3) unknow_feats: (N1 + N2 ..., C1) known_feats: (M1 + M2 ..., C2) Returns: new_features: (N1 + N2 ..., C_out) """ dist, idx = pointnet2_utils.three_nn(unknown, unknown_batch_cnt, known, known_batch_cnt) dist_recip = 1.0 / (dist + 1e-8) norm = torch.sum(dist_recip, dim=-1, keepdim=True) weight = dist_recip / norm interpolated_feats = pointnet2_utils.three_interpolate(known_feats, idx, weight) if unknown_feats is not None: new_features = torch.cat([interpolated_feats, unknown_feats], dim=1) # (N1 + N2 ..., C2 + C1) else: new_features = interpolated_feats new_features = new_features.permute(1, 0)[None, :, :, None] # (1, C, N1 + N2 ..., 1) new_features = self.mlp(new_features) new_features = new_features.squeeze(dim=0).squeeze(dim=-1).permute(1, 0) # (N1 + N2 ..., C) return new_features class VectorPoolLocalInterpolateModule(nn.Module): def __init__(self, mlp, num_voxels, max_neighbour_distance, nsample, neighbor_type, use_xyz=True, neighbour_distance_multiplier=1.0, xyz_encoding_type='concat'): """ Args: mlp: num_voxels: max_neighbour_distance: neighbor_type: 1: ball, others: cube nsample: find all (-1), find limited number(>0) use_xyz: neighbour_distance_multiplier: xyz_encoding_type: """ super().__init__() self.num_voxels = num_voxels # [num_grid_x, num_grid_y, num_grid_z]: number of grids in each local area centered at new_xyz self.num_total_grids = self.num_voxels[0] * self.num_voxels[1] * self.num_voxels[2] self.max_neighbour_distance = max_neighbour_distance self.neighbor_distance_multiplier = neighbour_distance_multiplier self.nsample = nsample self.neighbor_type = neighbor_type self.use_xyz = use_xyz self.xyz_encoding_type = xyz_encoding_type if mlp is not None: if self.use_xyz: mlp[0] += 9 if self.xyz_encoding_type == 'concat' else 0 shared_mlps = [] for k in range(len(mlp) - 1): shared_mlps.extend([ nn.Conv2d(mlp[k], mlp[k + 1], kernel_size=1, bias=False), nn.BatchNorm2d(mlp[k + 1]), nn.ReLU() ]) self.mlp = nn.Sequential(*shared_mlps) else: self.mlp = None self.num_avg_length_of_neighbor_idxs = 1000 def forward(self, support_xyz, support_features, xyz_batch_cnt, new_xyz, new_xyz_grid_centers, new_xyz_batch_cnt): """ Args: support_xyz: (N1 + N2 ..., 3) xyz coordinates of the features support_features: (N1 + N2 ..., C) point-wise features xyz_batch_cnt: (batch_size), [N1, N2, ...] new_xyz: (M1 + M2 ..., 3) centers of the ball query new_xyz_grid_centers: (M1 + M2 ..., num_total_grids, 3) grids centers of each grid new_xyz_batch_cnt: (batch_size), [M1, M2, ...] Returns: new_features: (N1 + N2 ..., C_out) """ with torch.no_grad(): dist, idx, num_avg_length_of_neighbor_idxs = pointnet2_utils.three_nn_for_vector_pool_by_two_step( support_xyz, xyz_batch_cnt, new_xyz, new_xyz_grid_centers, new_xyz_batch_cnt, self.max_neighbour_distance, self.nsample, self.neighbor_type, self.num_avg_length_of_neighbor_idxs, self.num_total_grids, self.neighbor_distance_multiplier ) self.num_avg_length_of_neighbor_idxs = max(self.num_avg_length_of_neighbor_idxs, num_avg_length_of_neighbor_idxs.item()) dist_recip = 1.0 / (dist + 1e-8) norm = torch.sum(dist_recip, dim=-1, keepdim=True) weight = dist_recip / torch.clamp_min(norm, min=1e-8) empty_mask = (idx.view(-1, 3)[:, 0] == -1) idx.view(-1, 3)[empty_mask] = 0 interpolated_feats = pointnet2_utils.three_interpolate(support_features, idx.view(-1, 3), weight.view(-1, 3)) interpolated_feats = interpolated_feats.view(idx.shape[0], idx.shape[1], -1) # (M1 + M2 ..., num_total_grids, C) if self.use_xyz: near_known_xyz = support_xyz[idx.view(-1, 3).long()].view(-1, 3, 3) # ( (M1 + M2 ...)*num_total_grids, 3) local_xyz = (new_xyz_grid_centers.view(-1, 1, 3) - near_known_xyz).view(-1, idx.shape[1], 9) if self.xyz_encoding_type == 'concat': interpolated_feats = torch.cat((interpolated_feats, local_xyz), dim=-1) # ( M1 + M2 ..., num_total_grids, 9+C) else: raise NotImplementedError new_features = interpolated_feats.view(-1, interpolated_feats.shape[-1]) # ((M1 + M2 ...) * num_total_grids, C) new_features[empty_mask, :] = 0 if self.mlp is not None: new_features = new_features.permute(1, 0)[None, :, :, None] # (1, C, N1 + N2 ..., 1) new_features = self.mlp(new_features) new_features = new_features.squeeze(dim=0).squeeze(dim=-1).permute(1, 0) # (N1 + N2 ..., C) return new_features class VectorPoolAggregationModule(nn.Module): def __init__( self, input_channels, num_local_voxel=(3, 3, 3), local_aggregation_type='local_interpolation', num_reduced_channels=30, num_channels_of_local_aggregation=32, post_mlps=(128,), max_neighbor_distance=None, neighbor_nsample=-1, neighbor_type=0, neighbor_distance_multiplier=2.0): super().__init__() self.num_local_voxel = num_local_voxel self.total_voxels = self.num_local_voxel[0] * self.num_local_voxel[1] * self.num_local_voxel[2] self.local_aggregation_type = local_aggregation_type assert self.local_aggregation_type in ['local_interpolation', 'voxel_avg_pool', 'voxel_random_choice'] self.input_channels = input_channels self.num_reduced_channels = input_channels if num_reduced_channels is None else num_reduced_channels self.num_channels_of_local_aggregation = num_channels_of_local_aggregation self.max_neighbour_distance = max_neighbor_distance self.neighbor_nsample = neighbor_nsample self.neighbor_type = neighbor_type # 1: ball, others: cube if self.local_aggregation_type == 'local_interpolation': self.local_interpolate_module = VectorPoolLocalInterpolateModule( mlp=None, num_voxels=self.num_local_voxel, max_neighbour_distance=self.max_neighbour_distance, nsample=self.neighbor_nsample, neighbor_type=self.neighbor_type, neighbour_distance_multiplier=neighbor_distance_multiplier, ) num_c_in = (self.num_reduced_channels + 9) * self.total_voxels else: self.local_interpolate_module = None num_c_in = (self.num_reduced_channels + 3) * self.total_voxels num_c_out = self.total_voxels * self.num_channels_of_local_aggregation self.separate_local_aggregation_layer = nn.Sequential( nn.Conv1d(num_c_in, num_c_out, kernel_size=1, groups=self.total_voxels, bias=False), nn.BatchNorm1d(num_c_out), nn.ReLU() ) post_mlp_list = [] c_in = num_c_out for cur_num_c in post_mlps: post_mlp_list.extend([ nn.Conv1d(c_in, cur_num_c, kernel_size=1, bias=False), nn.BatchNorm1d(cur_num_c), nn.ReLU() ]) c_in = cur_num_c self.post_mlps = nn.Sequential(*post_mlp_list) self.num_mean_points_per_grid = 20 self.init_weights() def init_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d): nn.init.kaiming_normal_(m.weight) if m.bias is not None: nn.init.constant_(m.bias, 0) if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): nn.init.constant_(m.weight, 1.0) nn.init.constant_(m.bias, 0) def extra_repr(self) -> str: ret = f'radius={self.max_neighbour_distance}, local_voxels=({self.num_local_voxel}, ' \ f'local_aggregation_type={self.local_aggregation_type}, ' \ f'num_c_reduction={self.input_channels}->{self.num_reduced_channels}, ' \ f'num_c_local_aggregation={self.num_channels_of_local_aggregation}' return ret def vector_pool_with_voxel_query(self, xyz, xyz_batch_cnt, features, new_xyz, new_xyz_batch_cnt): use_xyz = 1 pooling_type = 0 if self.local_aggregation_type == 'voxel_avg_pool' else 1 new_features, new_local_xyz, num_mean_points_per_grid, point_cnt_of_grid = pointnet2_utils.vector_pool_with_voxel_query_op( xyz, xyz_batch_cnt, features, new_xyz, new_xyz_batch_cnt, self.num_local_voxel[0], self.num_local_voxel[1], self.num_local_voxel[2], self.max_neighbour_distance, self.num_reduced_channels, use_xyz, self.num_mean_points_per_grid, self.neighbor_nsample, self.neighbor_type, pooling_type ) self.num_mean_points_per_grid = max(self.num_mean_points_per_grid, num_mean_points_per_grid.item()) num_new_pts = new_features.shape[0] new_local_xyz = new_local_xyz.view(num_new_pts, -1, 3) # (N, num_voxel, 3) new_features = new_features.view(num_new_pts, -1, self.num_reduced_channels) # (N, num_voxel, C) new_features = torch.cat((new_local_xyz, new_features), dim=-1).view(num_new_pts, -1) return new_features, point_cnt_of_grid @staticmethod def get_dense_voxels_by_center(point_centers, max_neighbour_distance, num_voxels): """ Args: point_centers: (N, 3) max_neighbour_distance: float num_voxels: [num_x, num_y, num_z] Returns: voxel_centers: (N, total_voxels, 3) """ R = max_neighbour_distance device = point_centers.device x_grids = torch.arange(-R + R / num_voxels[0], R - R / num_voxels[0] + 1e-5, 2 * R / num_voxels[0], device=device) y_grids = torch.arange(-R + R / num_voxels[1], R - R / num_voxels[1] + 1e-5, 2 * R / num_voxels[1], device=device) z_grids = torch.arange(-R + R / num_voxels[2], R - R / num_voxels[2] + 1e-5, 2 * R / num_voxels[2], device=device) x_offset, y_offset, z_offset = torch.meshgrid(x_grids, y_grids, z_grids) # shape: [num_x, num_y, num_z] xyz_offset = torch.cat(( x_offset.contiguous().view(-1, 1), y_offset.contiguous().view(-1, 1), z_offset.contiguous().view(-1, 1)), dim=-1 ) voxel_centers = point_centers[:, None, :] + xyz_offset[None, :, :] return voxel_centers def vector_pool_with_local_interpolate(self, xyz, xyz_batch_cnt, features, new_xyz, new_xyz_batch_cnt): """ Args: xyz: (N, 3) xyz_batch_cnt: (batch_size) features: (N, C) new_xyz: (M, 3) new_xyz_batch_cnt: (batch_size) Returns: new_features: (M, total_voxels * C) """ voxel_centers = self.get_dense_voxels_by_center( point_centers=new_xyz, max_neighbour_distance=self.max_neighbour_distance, num_voxels=self.num_local_voxel ) # (M1 + M2 + ..., total_voxels, 3) voxel_features = self.local_interpolate_module.forward( support_xyz=xyz, support_features=features, xyz_batch_cnt=xyz_batch_cnt, new_xyz=new_xyz, new_xyz_grid_centers=voxel_centers, new_xyz_batch_cnt=new_xyz_batch_cnt ) # ((M1 + M2 ...) * total_voxels, C) voxel_features = voxel_features.contiguous().view(-1, self.total_voxels * voxel_features.shape[-1]) return voxel_features def forward(self, xyz, xyz_batch_cnt, new_xyz, new_xyz_batch_cnt, features, **kwargs): """ :param xyz: (N1 + N2 ..., 3) tensor of the xyz coordinates of the features :param xyz_batch_cnt: (batch_size), [N1, N2, ...] :param new_xyz: (M1 + M2 ..., 3) :param new_xyz_batch_cnt: (batch_size), [M1, M2, ...] :param features: (N1 + N2 ..., C) tensor of the descriptors of the the features :return: new_xyz: (M1 + M2 ..., 3) tensor of the new features' xyz new_features: (M1 + M2 ..., \sum_k(mlps[k][-1])) tensor of the new_features descriptors """ N, C = features.shape assert C % self.num_reduced_channels == 0, \ f'the input channels ({C}) should be an integral multiple of num_reduced_channels({self.num_reduced_channels})' features = features.view(N, -1, self.num_reduced_channels).sum(dim=1) if self.local_aggregation_type in ['voxel_avg_pool', 'voxel_random_choice']: vector_features, point_cnt_of_grid = self.vector_pool_with_voxel_query( xyz=xyz, xyz_batch_cnt=xyz_batch_cnt, features=features, new_xyz=new_xyz, new_xyz_batch_cnt=new_xyz_batch_cnt ) elif self.local_aggregation_type == 'local_interpolation': vector_features = self.vector_pool_with_local_interpolate( xyz=xyz, xyz_batch_cnt=xyz_batch_cnt, features=features, new_xyz=new_xyz, new_xyz_batch_cnt=new_xyz_batch_cnt ) # (M1 + M2 + ..., total_voxels * C) else: raise NotImplementedError vector_features = vector_features.permute(1, 0)[None, :, :] # (1, num_voxels * C, M1 + M2 ...) new_features = self.separate_local_aggregation_layer(vector_features) new_features = self.post_mlps(new_features) new_features = new_features.squeeze(dim=0).permute(1, 0) return new_xyz, new_features class VectorPoolAggregationModuleMSG(nn.Module): def __init__(self, input_channels, config): super().__init__() self.model_cfg = config self.num_groups = self.model_cfg.NUM_GROUPS self.layers = [] c_in = 0 for k in range(self.num_groups): cur_config = self.model_cfg[f'GROUP_CFG_{k}'] cur_vector_pool_module = VectorPoolAggregationModule( input_channels=input_channels, num_local_voxel=cur_config.NUM_LOCAL_VOXEL, post_mlps=cur_config.POST_MLPS, max_neighbor_distance=cur_config.MAX_NEIGHBOR_DISTANCE, neighbor_nsample=cur_config.NEIGHBOR_NSAMPLE, local_aggregation_type=self.model_cfg.LOCAL_AGGREGATION_TYPE, num_reduced_channels=self.model_cfg.get('NUM_REDUCED_CHANNELS', None), num_channels_of_local_aggregation=self.model_cfg.NUM_CHANNELS_OF_LOCAL_AGGREGATION, neighbor_distance_multiplier=2.0 ) self.__setattr__(f'layer_{k}', cur_vector_pool_module) c_in += cur_config.POST_MLPS[-1] c_in += 3 # use_xyz shared_mlps = [] for cur_num_c in self.model_cfg.MSG_POST_MLPS: shared_mlps.extend([ nn.Conv1d(c_in, cur_num_c, kernel_size=1, bias=False), nn.BatchNorm1d(cur_num_c), nn.ReLU() ]) c_in = cur_num_c self.msg_post_mlps = nn.Sequential(*shared_mlps) def forward(self, **kwargs): features_list = [] for k in range(self.num_groups): cur_xyz, cur_features = self.__getattr__(f'layer_{k}')(**kwargs) features_list.append(cur_features) features = torch.cat(features_list, dim=-1) features = torch.cat((cur_xyz, features), dim=-1) features = features.permute(1, 0)[None, :, :] # (1, C, N) new_features = self.msg_post_mlps(features) new_features = new_features.squeeze(dim=0).permute(1, 0) # (N, C) return cur_xyz, new_features ================================================ FILE: pcdet/ops/pointnet2/pointnet2_stack/pointnet2_utils.py ================================================ import torch import torch.nn as nn from torch.autograd import Function, Variable from . import pointnet2_stack_cuda as pointnet2 class BallQuery(Function): @staticmethod def forward(ctx, radius: float, nsample: int, xyz: torch.Tensor, xyz_batch_cnt: torch.Tensor, new_xyz: torch.Tensor, new_xyz_batch_cnt): """ Args: ctx: radius: float, radius of the balls nsample: int, maximum number of features in the balls xyz: (N1 + N2 ..., 3) xyz coordinates of the features xyz_batch_cnt: (batch_size), [N1, N2, ...] new_xyz: (M1 + M2 ..., 3) centers of the ball query new_xyz_batch_cnt: (batch_size), [M1, M2, ...] Returns: idx: (M1 + M2, nsample) tensor with the indicies of the features that form the query balls """ assert new_xyz.is_contiguous() assert new_xyz_batch_cnt.is_contiguous() assert xyz.is_contiguous() assert xyz_batch_cnt.is_contiguous() B = xyz_batch_cnt.shape[0] M = new_xyz.shape[0] idx = torch.cuda.IntTensor(M, nsample).zero_() pointnet2.ball_query_wrapper(B, M, radius, nsample, new_xyz, new_xyz_batch_cnt, xyz, xyz_batch_cnt, idx) empty_ball_mask = (idx[:, 0] == -1) idx[empty_ball_mask] = 0 return idx, empty_ball_mask @staticmethod def backward(ctx, a=None): return None, None, None, None ball_query = BallQuery.apply class GroupingOperation(Function): @staticmethod def forward(ctx, features: torch.Tensor, features_batch_cnt: torch.Tensor, idx: torch.Tensor, idx_batch_cnt: torch.Tensor): """ Args: ctx: features: (N1 + N2 ..., C) tensor of features to group features_batch_cnt: (batch_size) [N1 + N2 ...] tensor containing the indicies of features to group with idx: (M1 + M2 ..., nsample) tensor containing the indicies of features to group with idx_batch_cnt: (batch_size) [M1 + M2 ...] tensor containing the indicies of features to group with Returns: output: (M1 + M2, C, nsample) tensor """ assert features.is_contiguous() assert features_batch_cnt.is_contiguous() assert idx.is_contiguous() assert idx_batch_cnt.is_contiguous() assert features.shape[0] == features_batch_cnt.sum(), \ 'features: %s, features_batch_cnt: %s' % (str(features.shape), str(features_batch_cnt)) assert idx.shape[0] == idx_batch_cnt.sum(), \ 'idx: %s, idx_batch_cnt: %s' % (str(idx.shape), str(idx_batch_cnt)) M, nsample = idx.size() N, C = features.size() B = idx_batch_cnt.shape[0] output = torch.cuda.FloatTensor(M, C, nsample) pointnet2.group_points_wrapper(B, M, C, nsample, features, features_batch_cnt, idx, idx_batch_cnt, output) ctx.for_backwards = (B, N, idx, features_batch_cnt, idx_batch_cnt) return output @staticmethod def backward(ctx, grad_out: torch.Tensor): """ Args: ctx: grad_out: (M1 + M2 ..., C, nsample) tensor of the gradients of the output from forward Returns: grad_features: (N1 + N2 ..., C) gradient of the features """ B, N, idx, features_batch_cnt, idx_batch_cnt = ctx.for_backwards M, C, nsample = grad_out.size() grad_features = Variable(torch.cuda.FloatTensor(N, C).zero_()) grad_out_data = grad_out.data.contiguous() pointnet2.group_points_grad_wrapper(B, M, C, N, nsample, grad_out_data, idx, idx_batch_cnt, features_batch_cnt, grad_features.data) return grad_features, None, None, None grouping_operation = GroupingOperation.apply class QueryAndGroup(nn.Module): def __init__(self, radius: float, nsample: int, use_xyz: bool = True): """ Args: radius: float, radius of ball nsample: int, maximum number of features to gather in the ball use_xyz: """ super().__init__() self.radius, self.nsample, self.use_xyz = radius, nsample, use_xyz def forward(self, xyz: torch.Tensor, xyz_batch_cnt: torch.Tensor, new_xyz: torch.Tensor, new_xyz_batch_cnt: torch.Tensor, features: torch.Tensor = None): """ Args: xyz: (N1 + N2 ..., 3) xyz coordinates of the features xyz_batch_cnt: (batch_size), [N1, N2, ...] new_xyz: (M1 + M2 ..., 3) centers of the ball query new_xyz_batch_cnt: (batch_size), [M1, M2, ...] features: (N1 + N2 ..., C) tensor of features to group Returns: new_features: (M1 + M2, C, nsample) tensor """ assert xyz.shape[0] == xyz_batch_cnt.sum(), 'xyz: %s, xyz_batch_cnt: %s' % (str(xyz.shape), str(new_xyz_batch_cnt)) assert new_xyz.shape[0] == new_xyz_batch_cnt.sum(), \ 'new_xyz: %s, new_xyz_batch_cnt: %s' % (str(new_xyz.shape), str(new_xyz_batch_cnt)) # idx: (M1 + M2 ..., nsample), empty_ball_mask: (M1 + M2 ...) idx, empty_ball_mask = ball_query(self.radius, self.nsample, xyz, xyz_batch_cnt, new_xyz, new_xyz_batch_cnt) grouped_xyz = grouping_operation(xyz, xyz_batch_cnt, idx, new_xyz_batch_cnt) # (M1 + M2, 3, nsample) grouped_xyz -= new_xyz.unsqueeze(-1) grouped_xyz[empty_ball_mask] = 0 if features is not None: grouped_features = grouping_operation(features, xyz_batch_cnt, idx, new_xyz_batch_cnt) # (M1 + M2, C, nsample) grouped_features[empty_ball_mask] = 0 if self.use_xyz: new_features = torch.cat([grouped_xyz, grouped_features], dim=1) # (M1 + M2 ..., C + 3, nsample) else: new_features = grouped_features else: assert self.use_xyz, "Cannot have not features and not use xyz as a feature!" new_features = grouped_xyz return new_features, idx class FarthestPointSampling(Function): @staticmethod def forward(ctx, xyz: torch.Tensor, npoint: int): """ Args: ctx: xyz: (B, N, 3) where N > npoint npoint: int, number of features in the sampled set Returns: output: (B, npoint) tensor containing the set """ assert xyz.is_contiguous() B, N, _ = xyz.size() output = torch.cuda.IntTensor(B, npoint) temp = torch.cuda.FloatTensor(B, N).fill_(1e10) pointnet2.farthest_point_sampling_wrapper(B, N, npoint, xyz, temp, output) return output @staticmethod def backward(xyz, a=None): return None, None farthest_point_sample = furthest_point_sample = FarthestPointSampling.apply class StackFarthestPointSampling(Function): @staticmethod def forward(ctx, xyz, xyz_batch_cnt, npoint): """ Args: ctx: xyz: (N1 + N2 + ..., 3) where N > npoint xyz_batch_cnt: [N1, N2, ...] npoint: int, number of features in the sampled set Returns: output: (npoint.sum()) tensor containing the set, npoint: (M1, M2, ...) """ assert xyz.is_contiguous() and xyz.shape[1] == 3 batch_size = xyz_batch_cnt.__len__() if not isinstance(npoint, torch.Tensor): if not isinstance(npoint, list): npoint = [npoint for i in range(batch_size)] npoint = torch.tensor(npoint, device=xyz.device).int() N, _ = xyz.size() temp = torch.cuda.FloatTensor(N).fill_(1e10) output = torch.cuda.IntTensor(npoint.sum().item()) pointnet2.stack_farthest_point_sampling_wrapper(xyz, temp, xyz_batch_cnt, output, npoint) return output @staticmethod def backward(xyz, a=None): return None, None stack_farthest_point_sample = StackFarthestPointSampling.apply class ThreeNN(Function): @staticmethod def forward(ctx, unknown, unknown_batch_cnt, known, known_batch_cnt): """ Args: ctx: unknown: (N1 + N2..., 3) unknown_batch_cnt: (batch_size), [N1, N2, ...] known: (M1 + M2..., 3) known_batch_cnt: (batch_size), [M1, M2, ...] Returns: dist: (N1 + N2 ..., 3) l2 distance to the three nearest neighbors idx: (N1 + N2 ..., 3) index of the three nearest neighbors, range [0, M1+M2+...] """ assert unknown.shape.__len__() == 2 and unknown.shape[1] == 3 assert known.shape.__len__() == 2 and known.shape[1] == 3 assert unknown_batch_cnt.__len__() == known_batch_cnt.__len__() dist2 = unknown.new_zeros(unknown.shape) idx = unknown_batch_cnt.new_zeros(unknown.shape).int() pointnet2.three_nn_wrapper( unknown.contiguous(), unknown_batch_cnt.contiguous(), known.contiguous(), known_batch_cnt.contiguous(), dist2, idx ) return torch.sqrt(dist2), idx @staticmethod def backward(ctx, a=None, b=None): return None, None three_nn = ThreeNN.apply class ThreeInterpolate(Function): @staticmethod def forward(ctx, features: torch.Tensor, idx: torch.Tensor, weight: torch.Tensor): """ Args: ctx: features: (M1 + M2 ..., C) idx: [N1 + N2 ..., 3] weight: [N1 + N2 ..., 3] Returns: out_tensor: (N1 + N2 ..., C) """ assert idx.shape[0] == weight.shape[0] and idx.shape[1] == weight.shape[1] == 3 ctx.three_interpolate_for_backward = (idx, weight, features.shape[0]) output = features.new_zeros((idx.shape[0], features.shape[1])) pointnet2.three_interpolate_wrapper(features.contiguous(), idx.contiguous(), weight.contiguous(), output) return output @staticmethod def backward(ctx, grad_out: torch.Tensor): """ Args: ctx: grad_out: (N1 + N2 ..., C) Returns: grad_features: (M1 + M2 ..., C) """ idx, weight, M = ctx.three_interpolate_for_backward grad_features = grad_out.new_zeros((M, grad_out.shape[1])) pointnet2.three_interpolate_grad_wrapper( grad_out.contiguous(), idx.contiguous(), weight.contiguous(), grad_features ) return grad_features, None, None three_interpolate = ThreeInterpolate.apply class ThreeNNForVectorPoolByTwoStep(Function): @staticmethod def forward(ctx, support_xyz, xyz_batch_cnt, new_xyz, new_xyz_grid_centers, new_xyz_batch_cnt, max_neighbour_distance, nsample, neighbor_type, avg_length_of_neighbor_idxs, num_total_grids, neighbor_distance_multiplier): """ Args: ctx: // support_xyz: (N1 + N2 ..., 3) xyz coordinates of the features // xyz_batch_cnt: (batch_size), [N1, N2, ...] // new_xyz: (M1 + M2 ..., 3) centers of the ball query // new_xyz_grid_centers: (M1 + M2 ..., num_total_grids, 3) grids centers of each grid // new_xyz_batch_cnt: (batch_size), [M1, M2, ...] // nsample: find all (-1), find limited number(>0) // neighbor_type: 1: ball, others: cube // neighbor_distance_multiplier: query_distance = neighbor_distance_multiplier * max_neighbour_distance Returns: // new_xyz_grid_idxs: (M1 + M2 ..., num_total_grids, 3) three-nn // new_xyz_grid_dist2: (M1 + M2 ..., num_total_grids, 3) square of dist of three-nn """ num_new_xyz = new_xyz.shape[0] new_xyz_grid_dist2 = new_xyz_grid_centers.new_zeros(new_xyz_grid_centers.shape) new_xyz_grid_idxs = new_xyz_grid_centers.new_zeros(new_xyz_grid_centers.shape).int().fill_(-1) while True: num_max_sum_points = avg_length_of_neighbor_idxs * num_new_xyz stack_neighbor_idxs = new_xyz_grid_idxs.new_zeros(num_max_sum_points) start_len = new_xyz_grid_idxs.new_zeros(num_new_xyz, 2).int() cumsum = new_xyz_grid_idxs.new_zeros(1) pointnet2.query_stacked_local_neighbor_idxs_wrapper_stack( support_xyz.contiguous(), xyz_batch_cnt.contiguous(), new_xyz.contiguous(), new_xyz_batch_cnt.contiguous(), stack_neighbor_idxs.contiguous(), start_len.contiguous(), cumsum, avg_length_of_neighbor_idxs, max_neighbour_distance * neighbor_distance_multiplier, nsample, neighbor_type ) avg_length_of_neighbor_idxs = cumsum[0].item() // num_new_xyz + int(cumsum[0].item() % num_new_xyz > 0) if cumsum[0] <= num_max_sum_points: break stack_neighbor_idxs = stack_neighbor_idxs[:cumsum[0]] pointnet2.query_three_nn_by_stacked_local_idxs_wrapper_stack( support_xyz, new_xyz, new_xyz_grid_centers, new_xyz_grid_idxs, new_xyz_grid_dist2, stack_neighbor_idxs, start_len, num_new_xyz, num_total_grids ) return torch.sqrt(new_xyz_grid_dist2), new_xyz_grid_idxs, torch.tensor(avg_length_of_neighbor_idxs) three_nn_for_vector_pool_by_two_step = ThreeNNForVectorPoolByTwoStep.apply class VectorPoolWithVoxelQuery(Function): @staticmethod def forward(ctx, support_xyz: torch.Tensor, xyz_batch_cnt: torch.Tensor, support_features: torch.Tensor, new_xyz: torch.Tensor, new_xyz_batch_cnt: torch.Tensor, num_grid_x, num_grid_y, num_grid_z, max_neighbour_distance, num_c_out_each_grid, use_xyz, num_mean_points_per_grid=100, nsample=-1, neighbor_type=0, pooling_type=0): """ Args: ctx: support_xyz: (N1 + N2 ..., 3) xyz coordinates of the features xyz_batch_cnt: (batch_size), [N1, N2, ...] support_features: (N1 + N2 ..., C) new_xyz: (M1 + M2 ..., 3) centers of new positions new_xyz_batch_cnt: (batch_size), [M1, M2, ...] num_grid_x: number of grids in each local area centered at new_xyz num_grid_y: num_grid_z: max_neighbour_distance: num_c_out_each_grid: use_xyz: neighbor_type: 1: ball, others: cube: pooling_type: 0: avg_pool, 1: random choice Returns: new_features: (M1 + M2 ..., num_c_out) """ assert support_xyz.is_contiguous() assert support_features.is_contiguous() assert xyz_batch_cnt.is_contiguous() assert new_xyz.is_contiguous() assert new_xyz_batch_cnt.is_contiguous() num_total_grids = num_grid_x * num_grid_y * num_grid_z num_c_out = num_c_out_each_grid * num_total_grids N, num_c_in = support_features.shape M = new_xyz.shape[0] assert num_c_in % num_c_out_each_grid == 0, \ f'the input channels ({num_c_in}) should be an integral multiple of num_c_out_each_grid({num_c_out_each_grid})' while True: new_features = support_features.new_zeros((M, num_c_out)) new_local_xyz = support_features.new_zeros((M, 3 * num_total_grids)) point_cnt_of_grid = xyz_batch_cnt.new_zeros((M, num_total_grids)) num_max_sum_points = num_mean_points_per_grid * M grouped_idxs = xyz_batch_cnt.new_zeros((num_max_sum_points, 3)) num_cum_sum = pointnet2.vector_pool_wrapper( support_xyz, xyz_batch_cnt, support_features, new_xyz, new_xyz_batch_cnt, new_features, new_local_xyz, point_cnt_of_grid, grouped_idxs, num_grid_x, num_grid_y, num_grid_z, max_neighbour_distance, use_xyz, num_max_sum_points, nsample, neighbor_type, pooling_type ) num_mean_points_per_grid = num_cum_sum // M + int(num_cum_sum % M > 0) if num_cum_sum <= num_max_sum_points: break grouped_idxs = grouped_idxs[:num_cum_sum] normalizer = torch.clamp_min(point_cnt_of_grid[:, :, None].float(), min=1e-6) new_features = (new_features.view(-1, num_total_grids, num_c_out_each_grid) / normalizer).view(-1, num_c_out) if use_xyz: new_local_xyz = (new_local_xyz.view(-1, num_total_grids, 3) / normalizer).view(-1, num_total_grids * 3) num_mean_points_per_grid = torch.Tensor([num_mean_points_per_grid]).int() nsample = torch.Tensor([nsample]).int() ctx.vector_pool_for_backward = (point_cnt_of_grid, grouped_idxs, N, num_c_in) ctx.mark_non_differentiable(new_local_xyz, num_mean_points_per_grid, nsample, point_cnt_of_grid) return new_features, new_local_xyz, num_mean_points_per_grid, point_cnt_of_grid @staticmethod def backward(ctx, grad_new_features: torch.Tensor, grad_local_xyz: torch.Tensor, grad_num_cum_sum, grad_point_cnt_of_grid): """ Args: ctx: grad_new_features: (M1 + M2 ..., num_c_out), num_c_out = num_c_out_each_grid * num_total_grids Returns: grad_support_features: (N1 + N2 ..., C_in) """ point_cnt_of_grid, grouped_idxs, N, num_c_in = ctx.vector_pool_for_backward grad_support_features = grad_new_features.new_zeros((N, num_c_in)) pointnet2.vector_pool_grad_wrapper( grad_new_features.contiguous(), point_cnt_of_grid, grouped_idxs, grad_support_features ) return None, None, grad_support_features, None, None, None, None, None, None, None, None, None, None, None, None vector_pool_with_voxel_query_op = VectorPoolWithVoxelQuery.apply if __name__ == '__main__': pass ================================================ FILE: pcdet/ops/pointnet2/pointnet2_stack/src/ball_query.cpp ================================================ /* Stacked-batch-data version of ball query, modified from the original implementation of official PointNet++ codes. Written by Shaoshuai Shi All Rights Reserved 2019-2020. */ #include #include #include #include #include #include "ball_query_gpu.h" extern THCState *state; #define CHECK_CUDA(x) do { \ if (!x.type().is_cuda()) { \ fprintf(stderr, "%s must be CUDA tensor at %s:%d\n", #x, __FILE__, __LINE__); \ exit(-1); \ } \ } while (0) #define CHECK_CONTIGUOUS(x) do { \ if (!x.is_contiguous()) { \ fprintf(stderr, "%s must be contiguous tensor at %s:%d\n", #x, __FILE__, __LINE__); \ exit(-1); \ } \ } while (0) #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) int ball_query_wrapper_stack(int B, int M, float radius, int nsample, at::Tensor new_xyz_tensor, at::Tensor new_xyz_batch_cnt_tensor, at::Tensor xyz_tensor, at::Tensor xyz_batch_cnt_tensor, at::Tensor idx_tensor) { CHECK_INPUT(new_xyz_tensor); CHECK_INPUT(xyz_tensor); CHECK_INPUT(new_xyz_batch_cnt_tensor); CHECK_INPUT(xyz_batch_cnt_tensor); const float *new_xyz = new_xyz_tensor.data(); const float *xyz = xyz_tensor.data(); const int *new_xyz_batch_cnt = new_xyz_batch_cnt_tensor.data(); const int *xyz_batch_cnt = xyz_batch_cnt_tensor.data(); int *idx = idx_tensor.data(); ball_query_kernel_launcher_stack(B, M, radius, nsample, new_xyz, new_xyz_batch_cnt, xyz, xyz_batch_cnt, idx); return 1; } ================================================ FILE: pcdet/ops/pointnet2/pointnet2_stack/src/ball_query_deform.cpp ================================================ #include #include #include #include #include #include "ball_query_deform_gpu.h" extern THCState *state; #define CHECK_CUDA(x) do { \ if (!x.type().is_cuda()) { \ fprintf(stderr, "%s must be CUDA tensor at %s:%d\n", #x, __FILE__, __LINE__); \ exit(-1); \ } \ } while (0) #define CHECK_CONTIGUOUS(x) do { \ if (!x.is_contiguous()) { \ fprintf(stderr, "%s must be contiguous tensor at %s:%d\n", #x, __FILE__, __LINE__); \ exit(-1); \ } \ } while (0) #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) int ball_query_deform_wrapper_stack(int B, int M, int nsample, at::Tensor new_xyz_tensor, at::Tensor new_xyz_r_tensor, at::Tensor new_xyz_batch_cnt_tensor, at::Tensor xyz_tensor, at::Tensor xyz_batch_cnt_tensor, at::Tensor idx_tensor) { CHECK_INPUT(new_xyz_tensor); CHECK_INPUT(new_xyz_r_tensor); CHECK_INPUT(xyz_tensor); CHECK_INPUT(new_xyz_batch_cnt_tensor); CHECK_INPUT(xyz_batch_cnt_tensor); const float *new_xyz = new_xyz_tensor.data(); const float *new_xyz_r = new_xyz_r_tensor.data(); const float *xyz = xyz_tensor.data(); const int *new_xyz_batch_cnt = new_xyz_batch_cnt_tensor.data(); const int *xyz_batch_cnt = xyz_batch_cnt_tensor.data(); int *idx = idx_tensor.data(); ball_query_deform_kernel_launcher_stack(B, M, nsample, new_xyz, new_xyz_r, new_xyz_batch_cnt, xyz, xyz_batch_cnt, idx); return 1; } ================================================ FILE: pcdet/ops/pointnet2/pointnet2_stack/src/ball_query_deform_gpu.cu ================================================ #include #include #include #include "ball_query_deform_gpu.h" #include "cuda_utils.h" __global__ void ball_query_deform_kernel_stack(int B, int M, int nsample, \ const float *new_xyz, const float *new_xyz_r, const int *new_xyz_batch_cnt, const float *xyz, const int *xyz_batch_cnt, int *idx) { // :param xyz: (N1 + N2 ..., 3) xyz coordinates of the features // :param xyz_batch_cnt: (batch_size), [N1, N2, ...] // :param new_xyz: (M1 + M2 ..., 3) centers of the ball query // :param new_xyz_r: (M1 + M2 ..., 1) radius for each new point // :param new_xyz_batch_cnt: (batch_size), [M1, M2, ...] // output: // idx: (M, nsample) int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; if (pt_idx >= M) return; int bs_idx = 0, pt_cnt = new_xyz_batch_cnt[0]; for (int k = 1; k < B; k++){ if (pt_idx < pt_cnt) break; pt_cnt += new_xyz_batch_cnt[k]; bs_idx = k; } int xyz_batch_start_idx = 0; for (int k = 0; k < bs_idx; k++) xyz_batch_start_idx += xyz_batch_cnt[k]; // for (int k = 0; k < bs_idx; k++) new_xyz_batch_start_idx += new_xyz_batch_cnt[k]; new_xyz += pt_idx * 3; new_xyz_r += pt_idx; //add xyz += xyz_batch_start_idx * 3; idx += pt_idx * nsample; float radius = new_xyz_r[0]; float radius2 = radius * radius; float new_x = new_xyz[0]; float new_y = new_xyz[1]; float new_z = new_xyz[2]; int n = xyz_batch_cnt[bs_idx]; int cnt = 0; for (int k = 0; k < n; ++k) { float x = xyz[k * 3 + 0]; float y = xyz[k * 3 + 1]; float z = xyz[k * 3 + 2]; float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); if (d2 < radius2){ if (cnt == 0){ for (int l = 0; l < nsample; ++l) { idx[l] = k; } } idx[cnt] = k; ++cnt; if (cnt >= nsample) break; } } if (cnt == 0) idx[0] = -1; } void ball_query_deform_kernel_launcher_stack(int B, int M, int nsample, const float *new_xyz, const float *new_xyz_r, const int *new_xyz_batch_cnt, const float *xyz, const int *xyz_batch_cnt, int *idx){ // :param xyz: (N1 + N2 ..., 3) xyz coordinates of the features // :param xyz_batch_cnt: (batch_size), [N1, N2, ...] // :param new_xyz: (M1 + M2 ..., 3) centers of the ball query // :param new_xyz_r: (M1 + M2 ..., 1) radius for each new point // :param new_xyz_batch_cnt: (batch_size), [M1, M2, ...] // output: // idx: (M, nsample) cudaError_t err; dim3 blocks(DIVUP(M, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); ball_query_deform_kernel_stack<<>>(B, M, nsample, new_xyz, new_xyz_r, new_xyz_batch_cnt, xyz, xyz_batch_cnt, idx); // cudaDeviceSynchronize(); // for using printf in kernel function err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } ================================================ FILE: pcdet/ops/pointnet2/pointnet2_stack/src/ball_query_deform_gpu.h ================================================ #ifndef _STACK_BALL_QUERY_DEFORM_GPU_H #define _STACK_BALL_QUERY_DEFORM_GPU_H #include #include #include #include int ball_query_deform_wrapper_stack(int B, int M, int nsample, at::Tensor new_xyz_tensor, at::Tensor new_xyz_r_tensor, at::Tensor new_xyz_batch_cnt_tensor, at::Tensor xyz_tensor, at::Tensor xyz_batch_cnt_tensor, at::Tensor idx_tensor); void ball_query_deform_kernel_launcher_stack(int B, int M, int nsample, const float *new_xyz, const float *new_xyz_r, const int *new_xyz_batch_cnt, const float *xyz, const int *xyz_batch_cnt, int *idx); #endif ================================================ FILE: pcdet/ops/pointnet2/pointnet2_stack/src/ball_query_gpu.cu ================================================ /* Stacked-batch-data version of ball query, modified from the original implementation of official PointNet++ codes. Written by Shaoshuai Shi All Rights Reserved 2019-2020. */ #include #include #include #include "ball_query_gpu.h" #include "cuda_utils.h" __global__ void ball_query_kernel_stack(int B, int M, float radius, int nsample, \ const float *new_xyz, const int *new_xyz_batch_cnt, const float *xyz, const int *xyz_batch_cnt, int *idx) { // :param xyz: (N1 + N2 ..., 3) xyz coordinates of the features // :param xyz_batch_cnt: (batch_size), [N1, N2, ...] // :param new_xyz: (M1 + M2 ..., 3) centers of the ball query // :param new_xyz_batch_cnt: (batch_size), [M1, M2, ...] // output: // idx: (M, nsample) int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; if (pt_idx >= M) return; int bs_idx = 0, pt_cnt = new_xyz_batch_cnt[0]; for (int k = 1; k < B; k++){ if (pt_idx < pt_cnt) break; pt_cnt += new_xyz_batch_cnt[k]; bs_idx = k; } int xyz_batch_start_idx = 0; for (int k = 0; k < bs_idx; k++) xyz_batch_start_idx += xyz_batch_cnt[k]; // for (int k = 0; k < bs_idx; k++) new_xyz_batch_start_idx += new_xyz_batch_cnt[k]; new_xyz += pt_idx * 3; xyz += xyz_batch_start_idx * 3; idx += pt_idx * nsample; float radius2 = radius * radius; float new_x = new_xyz[0]; float new_y = new_xyz[1]; float new_z = new_xyz[2]; int n = xyz_batch_cnt[bs_idx]; int cnt = 0; for (int k = 0; k < n; ++k) { float x = xyz[k * 3 + 0]; float y = xyz[k * 3 + 1]; float z = xyz[k * 3 + 2]; float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); if (d2 < radius2){ if (cnt == 0){ for (int l = 0; l < nsample; ++l) { idx[l] = k; } } idx[cnt] = k; ++cnt; if (cnt >= nsample) break; } } if (cnt == 0) idx[0] = -1; } void ball_query_kernel_launcher_stack(int B, int M, float radius, int nsample, const float *new_xyz, const int *new_xyz_batch_cnt, const float *xyz, const int *xyz_batch_cnt, int *idx){ // :param xyz: (N1 + N2 ..., 3) xyz coordinates of the features // :param xyz_batch_cnt: (batch_size), [N1, N2, ...] // :param new_xyz: (M1 + M2 ..., 3) centers of the ball query // :param new_xyz_batch_cnt: (batch_size), [M1, M2, ...] // output: // idx: (M, nsample) cudaError_t err; dim3 blocks(DIVUP(M, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); ball_query_kernel_stack<<>>(B, M, radius, nsample, new_xyz, new_xyz_batch_cnt, xyz, xyz_batch_cnt, idx); // cudaDeviceSynchronize(); // for using printf in kernel function err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } ================================================ FILE: pcdet/ops/pointnet2/pointnet2_stack/src/ball_query_gpu.h ================================================ /* Stacked-batch-data version of ball query, modified from the original implementation of official PointNet++ codes. Written by Shaoshuai Shi All Rights Reserved 2019-2020. */ #ifndef _STACK_BALL_QUERY_GPU_H #define _STACK_BALL_QUERY_GPU_H #include #include #include #include int ball_query_wrapper_stack(int B, int M, float radius, int nsample, at::Tensor new_xyz_tensor, at::Tensor new_xyz_batch_cnt_tensor, at::Tensor xyz_tensor, at::Tensor xyz_batch_cnt_tensor, at::Tensor idx_tensor); void ball_query_kernel_launcher_stack(int B, int M, float radius, int nsample, const float *new_xyz, const int *new_xyz_batch_cnt, const float *xyz, const int *xyz_batch_cnt, int *idx); #endif ================================================ FILE: pcdet/ops/pointnet2/pointnet2_stack/src/cuda_utils.h ================================================ #ifndef _STACK_CUDA_UTILS_H #define _STACK_CUDA_UTILS_H #include #define THREADS_PER_BLOCK 256 #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) #endif ================================================ FILE: pcdet/ops/pointnet2/pointnet2_stack/src/group_points.cpp ================================================ /* Stacked-batch-data version of point grouping, modified from the original implementation of official PointNet++ codes. Written by Shaoshuai Shi All Rights Reserved 2019-2020. */ #include #include #include #include #include #include "group_points_gpu.h" extern THCState *state; #define CHECK_CUDA(x) do { \ if (!x.type().is_cuda()) { \ fprintf(stderr, "%s must be CUDA tensor at %s:%d\n", #x, __FILE__, __LINE__); \ exit(-1); \ } \ } while (0) #define CHECK_CONTIGUOUS(x) do { \ if (!x.is_contiguous()) { \ fprintf(stderr, "%s must be contiguous tensor at %s:%d\n", #x, __FILE__, __LINE__); \ exit(-1); \ } \ } while (0) #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) int group_points_grad_wrapper_stack(int B, int M, int C, int N, int nsample, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor idx_batch_cnt_tensor, at::Tensor features_batch_cnt_tensor, at::Tensor grad_features_tensor) { CHECK_INPUT(grad_out_tensor); CHECK_INPUT(idx_tensor); CHECK_INPUT(idx_batch_cnt_tensor); CHECK_INPUT(features_batch_cnt_tensor); CHECK_INPUT(grad_features_tensor); const float *grad_out = grad_out_tensor.data(); const int *idx = idx_tensor.data(); const int *idx_batch_cnt = idx_batch_cnt_tensor.data(); const int *features_batch_cnt = features_batch_cnt_tensor.data(); float *grad_features = grad_features_tensor.data(); group_points_grad_kernel_launcher_stack(B, M, C, N, nsample, grad_out, idx, idx_batch_cnt, features_batch_cnt, grad_features); return 1; } int group_points_wrapper_stack(int B, int M, int C, int nsample, at::Tensor features_tensor, at::Tensor features_batch_cnt_tensor, at::Tensor idx_tensor, at::Tensor idx_batch_cnt_tensor, at::Tensor out_tensor) { CHECK_INPUT(features_tensor); CHECK_INPUT(features_batch_cnt_tensor); CHECK_INPUT(idx_tensor); CHECK_INPUT(idx_batch_cnt_tensor); CHECK_INPUT(out_tensor); const float *features = features_tensor.data(); const int *idx = idx_tensor.data(); const int *features_batch_cnt = features_batch_cnt_tensor.data(); const int *idx_batch_cnt = idx_batch_cnt_tensor.data(); float *out = out_tensor.data(); group_points_kernel_launcher_stack(B, M, C, nsample, features, features_batch_cnt, idx, idx_batch_cnt, out); return 1; } ================================================ FILE: pcdet/ops/pointnet2/pointnet2_stack/src/group_points_gpu.cu ================================================ /* Stacked-batch-data version of point grouping, modified from the original implementation of official PointNet++ codes. Written by Shaoshuai Shi All Rights Reserved 2019-2020. */ #include #include #include "cuda_utils.h" #include "group_points_gpu.h" __global__ void group_points_grad_kernel_stack(int B, int M, int C, int N, int nsample, const float *grad_out, const int *idx, const int *idx_batch_cnt, const int *features_batch_cnt, float *grad_features) { // :param grad_out: (M1 + M2 ..., C, nsample) tensor of the gradients of the output from forward // :param idx: (M1 + M2 ..., nsample) tensor containing the indicies of features to group with // :param idx_batch_cnt: (batch_size) [M1 + M2 ...] tensor containing the indicies of features to group with // :param features_batch_cnt: (batch_size) [N1 + N2 ...] tensor containing the indicies of features to group with // :return: // grad_features: (N1 + N2 ..., C) gradient of the features int index = blockIdx.x * blockDim.x + threadIdx.x; int sample_idx = index % nsample; int C_idx = (index / nsample) % C; int pt_idx = (index / nsample / C); if (pt_idx >= M || C_idx >= C || sample_idx >= nsample) return; int bs_idx = 0, pt_cnt = idx_batch_cnt[0]; for (int k = 1; k < B; k++){ if (pt_idx < pt_cnt) break; pt_cnt += idx_batch_cnt[k]; bs_idx = k; } int features_batch_start_idx = 0; for (int k = 0; k < bs_idx; k++) features_batch_start_idx += features_batch_cnt[k]; grad_out += pt_idx * C * nsample + C_idx * nsample + sample_idx; idx += pt_idx * nsample + sample_idx; grad_features += (features_batch_start_idx + idx[0]) * C + C_idx; atomicAdd(grad_features, grad_out[0]); } void group_points_grad_kernel_launcher_stack(int B, int M, int C, int N, int nsample, const float *grad_out, const int *idx, const int *idx_batch_cnt, const int *features_batch_cnt, float *grad_features) { // :param grad_out: (M1 + M2 ..., C, nsample) tensor of the gradients of the output from forward // :param idx: (M1 + M2 ..., nsample) tensor containing the indicies of features to group with // :param idx_batch_cnt: (batch_size) [M1 + M2 ...] tensor containing the indicies of features to group with // :param features_batch_cnt: (batch_size) [N1 + N2 ...] tensor containing the indicies of features to group with // :return: // grad_features: (N1 + N2 ..., C) gradient of the features cudaError_t err; // dim3 blocks(DIVUP(npoints * nsample, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row) dim3 blocks(DIVUP(M * C * nsample, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); group_points_grad_kernel_stack<<>>(B, M, C, N, nsample, grad_out, idx, idx_batch_cnt, features_batch_cnt, grad_features); err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } __global__ void group_points_kernel_stack(int B, int M, int C, int nsample, const float *features, const int *features_batch_cnt, const int *idx, const int *idx_batch_cnt, float *out) { // :param features: (N1 + N2 ..., C) tensor of features to group // :param features_batch_cnt: (batch_size) [N1 + N2 ...] tensor containing the indicies of features to group with // :param idx: (M1 + M2 ..., nsample) tensor containing the indicies of features to group with // :param idx_batch_cnt: (batch_size) [M1 + M2 ...] tensor containing the indicies of features to group with // :return: // output: (M1 + M2, C, nsample) tensor int index = blockIdx.x * blockDim.x + threadIdx.x; int sample_idx = index % nsample; int C_idx = (index / nsample) % C; int pt_idx = (index / nsample / C); if (pt_idx >= M || C_idx >= C || sample_idx >= nsample) return; int bs_idx = 0, pt_cnt = idx_batch_cnt[0]; for (int k = 1; k < B; k++){ if (pt_idx < pt_cnt) break; pt_cnt += idx_batch_cnt[k]; bs_idx = k; } int features_batch_start_idx = 0; for (int k = 0; k < bs_idx; k++) features_batch_start_idx += features_batch_cnt[k]; features += features_batch_start_idx * C; idx += pt_idx * nsample + sample_idx; int in_idx = idx[0] * C + C_idx; int out_idx = pt_idx * C * nsample + C_idx * nsample + sample_idx; out[out_idx] = features[in_idx]; } void group_points_kernel_launcher_stack(int B, int M, int C, int nsample, const float *features, const int *features_batch_cnt, const int *idx, const int *idx_batch_cnt, float *out) { // :param features: (N1 + N2 ..., C) tensor of features to group // :param features_batch_cnt: (batch_size) [N1 + N2 ...] tensor containing the indicies of features to group with // :param idx: (M1 + M2 ..., nsample) tensor containing the indicies of features to group with // :param idx_batch_cnt: (batch_size) [M1 + M2 ...] tensor containing the indicies of features to group with // :return: // output: (M1 + M2, C, nsample) tensor cudaError_t err; dim3 blocks(DIVUP(M * C * nsample, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); group_points_kernel_stack<<>>(B, M, C, nsample, features, features_batch_cnt, idx, idx_batch_cnt, out); // cudaDeviceSynchronize(); // for using printf in kernel function err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } ================================================ FILE: pcdet/ops/pointnet2/pointnet2_stack/src/group_points_gpu.h ================================================ /* Stacked-batch-data version of point grouping, modified from the original implementation of official PointNet++ codes. Written by Shaoshuai Shi All Rights Reserved 2019-2020. */ #ifndef _STACK_GROUP_POINTS_GPU_H #define _STACK_GROUP_POINTS_GPU_H #include #include #include #include int group_points_wrapper_stack(int B, int M, int C, int nsample, at::Tensor features_tensor, at::Tensor features_batch_cnt_tensor, at::Tensor idx_tensor, at::Tensor idx_batch_cnt_tensor, at::Tensor out_tensor); void group_points_kernel_launcher_stack(int B, int M, int C, int nsample, const float *features, const int *features_batch_cnt, const int *idx, const int *idx_batch_cnt, float *out); int group_points_grad_wrapper_stack(int B, int M, int C, int N, int nsample, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor idx_batch_cnt_tensor, at::Tensor features_batch_cnt_tensor, at::Tensor grad_features_tensor); void group_points_grad_kernel_launcher_stack(int B, int M, int C, int N, int nsample, const float *grad_out, const int *idx, const int *idx_batch_cnt, const int *features_batch_cnt, float *grad_features); #endif ================================================ FILE: pcdet/ops/pointnet2/pointnet2_stack/src/interpolate.cpp ================================================ /* Stacked-batch-data version of point interpolation, modified from the original implementation of official PointNet++ codes. Written by Shaoshuai Shi All Rights Reserved 2019-2020. */ #include #include #include #include #include #include #include #include #include "interpolate_gpu.h" extern THCState *state; #define CHECK_CUDA(x) do { \ if (!x.type().is_cuda()) { \ fprintf(stderr, "%s must be CUDA tensor at %s:%d\n", #x, __FILE__, __LINE__); \ exit(-1); \ } \ } while (0) #define CHECK_CONTIGUOUS(x) do { \ if (!x.is_contiguous()) { \ fprintf(stderr, "%s must be contiguous tensor at %s:%d\n", #x, __FILE__, __LINE__); \ exit(-1); \ } \ } while (0) #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) void three_nn_wrapper_stack(at::Tensor unknown_tensor, at::Tensor unknown_batch_cnt_tensor, at::Tensor known_tensor, at::Tensor known_batch_cnt_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor){ // unknown: (N1 + N2 ..., 3) // unknown_batch_cnt: (batch_size), [N1, N2, ...] // known: (M1 + M2 ..., 3) // known_batch_cnt: (batch_size), [M1, M2, ...] // Return: // dist: (N1 + N2 ..., 3) l2 distance to the three nearest neighbors // idx: (N1 + N2 ..., 3) index of the three nearest neighbors CHECK_INPUT(unknown_tensor); CHECK_INPUT(unknown_batch_cnt_tensor); CHECK_INPUT(known_tensor); CHECK_INPUT(known_batch_cnt_tensor); CHECK_INPUT(dist2_tensor); CHECK_INPUT(idx_tensor); int batch_size = unknown_batch_cnt_tensor.size(0); int N = unknown_tensor.size(0); int M = known_tensor.size(0); const float *unknown = unknown_tensor.data(); const int *unknown_batch_cnt = unknown_batch_cnt_tensor.data(); const float *known = known_tensor.data(); const int *known_batch_cnt = known_batch_cnt_tensor.data(); float *dist2 = dist2_tensor.data(); int *idx = idx_tensor.data(); three_nn_kernel_launcher_stack(batch_size, N, M, unknown, unknown_batch_cnt, known, known_batch_cnt, dist2, idx); } void three_interpolate_wrapper_stack(at::Tensor features_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor) { // features_tensor: (M1 + M2 ..., C) // idx_tensor: [N1 + N2 ..., 3] // weight_tensor: [N1 + N2 ..., 3] // Return: // out_tensor: (N1 + N2 ..., C) CHECK_INPUT(features_tensor); CHECK_INPUT(idx_tensor); CHECK_INPUT(weight_tensor); CHECK_INPUT(out_tensor); int N = out_tensor.size(0); int channels = features_tensor.size(1); const float *features = features_tensor.data(); const float *weight = weight_tensor.data(); const int *idx = idx_tensor.data(); float *out = out_tensor.data(); three_interpolate_kernel_launcher_stack(N, channels, features, idx, weight, out); } void three_interpolate_grad_wrapper_stack(at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_features_tensor) { // grad_out_tensor: (N1 + N2 ..., C) // idx_tensor: [N1 + N2 ..., 3] // weight_tensor: [N1 + N2 ..., 3] // Return: // grad_features_tensor: (M1 + M2 ..., C) CHECK_INPUT(grad_out_tensor); CHECK_INPUT(idx_tensor); CHECK_INPUT(weight_tensor); CHECK_INPUT(grad_features_tensor); int N = grad_out_tensor.size(0); int channels = grad_out_tensor.size(1); const float *grad_out = grad_out_tensor.data(); const float *weight = weight_tensor.data(); const int *idx = idx_tensor.data(); float *grad_features = grad_features_tensor.data(); // printf("N=%d, channels=%d\n", N, channels); three_interpolate_grad_kernel_launcher_stack(N, channels, grad_out, idx, weight, grad_features); } ================================================ FILE: pcdet/ops/pointnet2/pointnet2_stack/src/interpolate_gpu.cu ================================================ /* Stacked-batch-data version of point interpolation, modified from the original implementation of official PointNet++ codes. Written by Shaoshuai Shi All Rights Reserved 2019-2020. */ #include #include #include #include "cuda_utils.h" #include "interpolate_gpu.h" __global__ void three_nn_kernel_stack(int batch_size, int N, int M, const float *unknown, const int *unknown_batch_cnt, const float *known, const int *known_batch_cnt, float *dist2, int *idx) { // unknown: (N1 + N2 ..., 3) // unknown_batch_cnt: (batch_size), [N1, N2, ...] // known: (M1 + M2 ..., 3) // known_batch_cnt: (batch_size), [M1, M2, ...] // Return: // dist: (N1 + N2 ..., 3) l2 distance to the three nearest neighbors // idx: (N1 + N2 ..., 3) index of the three nearest neighbors int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; if (pt_idx >= N) return; int bs_idx = 0, pt_cnt = unknown_batch_cnt[0]; for (int k = 1; k < batch_size; k++){ if (pt_idx < pt_cnt) break; pt_cnt += unknown_batch_cnt[k]; bs_idx = k; } int cur_num_known_points = known_batch_cnt[bs_idx]; int known_batch_start_idx = 0; for (int k = 0; k < bs_idx; k++) known_batch_start_idx += known_batch_cnt[k]; known += known_batch_start_idx * 3; unknown += pt_idx * 3; dist2 += pt_idx * 3; idx += pt_idx * 3; float ux = unknown[0]; float uy = unknown[1]; float uz = unknown[2]; double best1 = 1e40, best2 = 1e40, best3 = 1e40; int besti1 = 0, besti2 = 0, besti3 = 0; for (int k = 0; k < cur_num_known_points; ++k) { float x = known[k * 3 + 0]; float y = known[k * 3 + 1]; float z = known[k * 3 + 2]; float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z); if (d < best1) { best3 = best2; besti3 = besti2; best2 = best1; besti2 = besti1; best1 = d; besti1 = k; } else if (d < best2) { best3 = best2; besti3 = besti2; best2 = d; besti2 = k; } else if (d < best3) { best3 = d; besti3 = k; } } dist2[0] = best1; dist2[1] = best2; dist2[2] = best3; idx[0] = besti1 + known_batch_start_idx; idx[1] = besti2 + known_batch_start_idx; idx[2] = besti3 + known_batch_start_idx; } void three_nn_kernel_launcher_stack(int batch_size, int N, int M, const float *unknown, const int *unknown_batch_cnt, const float *known, const int *known_batch_cnt, float *dist2, int *idx) { // unknown: (N1 + N2 ..., 3) // unknown_batch_cnt: (batch_size), [N1, N2, ...] // known: (M1 + M2 ..., 3) // known_batch_cnt: (batch_size), [M1, M2, ...] // Return: // dist: (N1 + N2 ..., 3) l2 distance to the three nearest neighbors // idx: (N1 + N2 ..., 3) index of the three nearest neighbors cudaError_t err; dim3 blocks(DIVUP(N, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); three_nn_kernel_stack<<>>( batch_size, N, M, unknown, unknown_batch_cnt, known, known_batch_cnt, dist2, idx ); err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } __global__ void three_interpolate_kernel_stack(int N, int channels, const float *features, const int *idx, const float *weight, float *out) { // features: (M1 + M2 ..., C) // idx: [N1 + N2 ..., 3] // weight: [N1 + N2 ..., 3] // Return: // out: (N1 + N2 ..., C) int c_idx = blockIdx.y; int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; if (pt_idx >= N || c_idx >= channels) return; weight += pt_idx * 3; idx += pt_idx * 3; out += pt_idx * channels + c_idx; out[0] = weight[0] * features[idx[0] * channels + c_idx] + weight[1] * features[idx[1] * channels + c_idx] + weight[2] * features[idx[2] * channels + c_idx]; } void three_interpolate_kernel_launcher_stack(int N, int channels, const float *features, const int *idx, const float *weight, float *out) { // features: (M1 + M2 ..., C) // idx: [N1 + N2 ..., 3] // weight: [N1 + N2 ..., 3] // Return: // out: (N1 + N2 ..., C) cudaError_t err; dim3 blocks(DIVUP(N, THREADS_PER_BLOCK), channels); dim3 threads(THREADS_PER_BLOCK); three_interpolate_kernel_stack<<>>(N, channels, features, idx, weight, out); err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } __global__ void three_interpolate_grad_kernel_stack(int N, int channels, const float *grad_out, const int *idx, const float *weight, float *grad_features) { // grad_out_tensor: (N1 + N2 ..., C) // idx_tensor: [N1 + N2 ..., 3] // weight_tensor: [N1 + N2 ..., 3] // Return: // grad_features_tensor: (M1 + M2 ..., C) int c_idx = blockIdx.y; int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; if (pt_idx >= N || c_idx >= channels) return; grad_out += pt_idx * channels + c_idx; weight += pt_idx * 3; idx += pt_idx * 3; // printf("pt_idx=%d, c_idx=%d, idx=(%d, %d, %d), grad_out=%f\n", pt_idx, c_idx, idx[0], idx[1], idx[2], grad_out[0]); atomicAdd(grad_features + idx[0] * channels + c_idx, grad_out[0] * weight[0]); atomicAdd(grad_features + idx[1] * channels + c_idx, grad_out[0] * weight[1]); atomicAdd(grad_features + idx[2] * channels + c_idx, grad_out[0] * weight[2]); } void three_interpolate_grad_kernel_launcher_stack(int N, int channels, const float *grad_out, const int *idx, const float *weight, float *grad_features) { // grad_out_tensor: (N1 + N2 ..., C) // idx_tensor: [N1 + N2 ..., 3] // weight_tensor: [N1 + N2 ..., 3] // Return: // grad_features_tensor: (M1 + M2 ..., C) cudaError_t err; dim3 blocks(DIVUP(N, THREADS_PER_BLOCK), channels); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); three_interpolate_grad_kernel_stack<<>>( N, channels, grad_out, idx, weight, grad_features ); err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } ================================================ FILE: pcdet/ops/pointnet2/pointnet2_stack/src/interpolate_gpu.h ================================================ #ifndef _INTERPOLATE_GPU_H #define _INTERPOLATE_GPU_H #include #include #include #include void three_nn_wrapper_stack(at::Tensor unknown_tensor, at::Tensor unknown_batch_cnt_tensor, at::Tensor known_tensor, at::Tensor known_batch_cnt_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor); void three_interpolate_wrapper_stack(at::Tensor features_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor); void three_interpolate_grad_wrapper_stack(at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_features_tensor); void three_nn_kernel_launcher_stack(int batch_size, int N, int M, const float *unknown, const int *unknown_batch_cnt, const float *known, const int *known_batch_cnt, float *dist2, int *idx); void three_interpolate_kernel_launcher_stack(int N, int channels, const float *features, const int *idx, const float *weight, float *out); void three_interpolate_grad_kernel_launcher_stack(int N, int channels, const float *grad_out, const int *idx, const float *weight, float *grad_features); #endif ================================================ FILE: pcdet/ops/pointnet2/pointnet2_stack/src/pointnet2_api.cpp ================================================ #include #include #include "ball_query_gpu.h" #include "group_points_gpu.h" #include "sampling_gpu.h" #include "interpolate_gpu.h" #include "voxel_query_gpu.h" #include "ball_query_deform_gpu.h" #include "vector_pool_gpu.h" PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("ball_query_wrapper", &ball_query_wrapper_stack, "ball_query_wrapper_stack"); m.def("voxel_query_wrapper", &voxel_query_wrapper_stack, "voxel_query_wrapper_stack"); m.def("ball_query_deform_wrapper", &ball_query_deform_wrapper_stack, "ball_query_deform_wrapper_stack"); m.def("farthest_point_sampling_wrapper", &farthest_point_sampling_wrapper, "farthest_point_sampling_wrapper"); m.def("stack_farthest_point_sampling_wrapper", &stack_farthest_point_sampling_wrapper, "stack_farthest_point_sampling_wrapper"); m.def("group_points_wrapper", &group_points_wrapper_stack, "group_points_wrapper_stack"); m.def("group_points_grad_wrapper", &group_points_grad_wrapper_stack, "group_points_grad_wrapper_stack"); m.def("three_nn_wrapper", &three_nn_wrapper_stack, "three_nn_wrapper_stack"); m.def("three_interpolate_wrapper", &three_interpolate_wrapper_stack, "three_interpolate_wrapper_stack"); m.def("three_interpolate_grad_wrapper", &three_interpolate_grad_wrapper_stack, "three_interpolate_grad_wrapper_stack"); m.def("query_stacked_local_neighbor_idxs_wrapper_stack", &query_stacked_local_neighbor_idxs_wrapper_stack, "query_stacked_local_neighbor_idxs_wrapper_stack"); m.def("query_three_nn_by_stacked_local_idxs_wrapper_stack", &query_three_nn_by_stacked_local_idxs_wrapper_stack, "query_three_nn_by_stacked_local_idxs_wrapper_stack"); m.def("vector_pool_wrapper", &vector_pool_wrapper_stack, "vector_pool_grad_wrapper_stack"); m.def("vector_pool_grad_wrapper", &vector_pool_grad_wrapper_stack, "vector_pool_grad_wrapper_stack"); } ================================================ FILE: pcdet/ops/pointnet2/pointnet2_stack/src/sampling.cpp ================================================ #include #include #include #include #include "sampling_gpu.h" extern THCState *state; #define CHECK_CUDA(x) do { \ if (!x.type().is_cuda()) { \ fprintf(stderr, "%s must be CUDA tensor at %s:%d\n", #x, __FILE__, __LINE__); \ exit(-1); \ } \ } while (0) #define CHECK_CONTIGUOUS(x) do { \ if (!x.is_contiguous()) { \ fprintf(stderr, "%s must be contiguous tensor at %s:%d\n", #x, __FILE__, __LINE__); \ exit(-1); \ } \ } while (0) #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) int farthest_point_sampling_wrapper(int b, int n, int m, at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor) { CHECK_INPUT(points_tensor); CHECK_INPUT(temp_tensor); CHECK_INPUT(idx_tensor); const float *points = points_tensor.data(); float *temp = temp_tensor.data(); int *idx = idx_tensor.data(); farthest_point_sampling_kernel_launcher(b, n, m, points, temp, idx); return 1; } int stack_farthest_point_sampling_wrapper(at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor xyz_batch_cnt_tensor, at::Tensor idx_tensor, at::Tensor num_sampled_points_tensor) { CHECK_INPUT(points_tensor); CHECK_INPUT(temp_tensor); CHECK_INPUT(idx_tensor); CHECK_INPUT(xyz_batch_cnt_tensor); CHECK_INPUT(num_sampled_points_tensor); int batch_size = xyz_batch_cnt_tensor.size(0); int N = points_tensor.size(0); const float *points = points_tensor.data(); float *temp = temp_tensor.data(); int *xyz_batch_cnt = xyz_batch_cnt_tensor.data(); int *idx = idx_tensor.data(); int *num_sampled_points = num_sampled_points_tensor.data(); stack_farthest_point_sampling_kernel_launcher(N, batch_size, points, temp, xyz_batch_cnt, idx, num_sampled_points); return 1; } ================================================ FILE: pcdet/ops/pointnet2/pointnet2_stack/src/sampling_gpu.cu ================================================ #include #include #include "cuda_utils.h" #include "sampling_gpu.h" #define TOTAL_THREADS 1024 inline int opt_n_threads(int work_size) { const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); return max(min(1 << pow_2, TOTAL_THREADS), 1); } __device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i, int idx1, int idx2){ const float v1 = dists[idx1], v2 = dists[idx2]; const int i1 = dists_i[idx1], i2 = dists_i[idx2]; dists[idx1] = max(v1, v2); dists_i[idx1] = v2 > v1 ? i2 : i1; } template __global__ void farthest_point_sampling_kernel(int b, int n, int m, const float *__restrict__ dataset, float *__restrict__ temp, int *__restrict__ idxs) { // dataset: (B, N, 3) // tmp: (B, N) // output: // idx: (B, M) if (m <= 0) return; __shared__ float dists[block_size]; __shared__ int dists_i[block_size]; int batch_index = blockIdx.x; dataset += batch_index * n * 3; temp += batch_index * n; idxs += batch_index * m; int tid = threadIdx.x; const int stride = block_size; int old = 0; if (threadIdx.x == 0) idxs[0] = old; __syncthreads(); for (int j = 1; j < m; j++) { int besti = 0; float best = -1; float x1 = dataset[old * 3 + 0]; float y1 = dataset[old * 3 + 1]; float z1 = dataset[old * 3 + 2]; for (int k = tid; k < n; k += stride) { float x2, y2, z2; x2 = dataset[k * 3 + 0]; y2 = dataset[k * 3 + 1]; z2 = dataset[k * 3 + 2]; // float mag = (x2 * x2) + (y2 * y2) + (z2 * z2); // if (mag <= 1e-3) // continue; float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1); float d2 = min(d, temp[k]); temp[k] = d2; besti = d2 > best ? k : besti; best = d2 > best ? d2 : best; } dists[tid] = best; dists_i[tid] = besti; __syncthreads(); if (block_size >= 1024) { if (tid < 512) { __update(dists, dists_i, tid, tid + 512); } __syncthreads(); } if (block_size >= 512) { if (tid < 256) { __update(dists, dists_i, tid, tid + 256); } __syncthreads(); } if (block_size >= 256) { if (tid < 128) { __update(dists, dists_i, tid, tid + 128); } __syncthreads(); } if (block_size >= 128) { if (tid < 64) { __update(dists, dists_i, tid, tid + 64); } __syncthreads(); } if (block_size >= 64) { if (tid < 32) { __update(dists, dists_i, tid, tid + 32); } __syncthreads(); } if (block_size >= 32) { if (tid < 16) { __update(dists, dists_i, tid, tid + 16); } __syncthreads(); } if (block_size >= 16) { if (tid < 8) { __update(dists, dists_i, tid, tid + 8); } __syncthreads(); } if (block_size >= 8) { if (tid < 4) { __update(dists, dists_i, tid, tid + 4); } __syncthreads(); } if (block_size >= 4) { if (tid < 2) { __update(dists, dists_i, tid, tid + 2); } __syncthreads(); } if (block_size >= 2) { if (tid < 1) { __update(dists, dists_i, tid, tid + 1); } __syncthreads(); } old = dists_i[0]; if (tid == 0) idxs[j] = old; } } void farthest_point_sampling_kernel_launcher(int b, int n, int m, const float *dataset, float *temp, int *idxs) { // dataset: (B, N, 3) // tmp: (B, N) // output: // idx: (B, M) cudaError_t err; unsigned int n_threads = opt_n_threads(n); switch (n_threads) { case 1024: farthest_point_sampling_kernel<1024><<>>(b, n, m, dataset, temp, idxs); break; case 512: farthest_point_sampling_kernel<512><<>>(b, n, m, dataset, temp, idxs); break; case 256: farthest_point_sampling_kernel<256><<>>(b, n, m, dataset, temp, idxs); break; case 128: farthest_point_sampling_kernel<128><<>>(b, n, m, dataset, temp, idxs); break; case 64: farthest_point_sampling_kernel<64><<>>(b, n, m, dataset, temp, idxs); break; case 32: farthest_point_sampling_kernel<32><<>>(b, n, m, dataset, temp, idxs); break; case 16: farthest_point_sampling_kernel<16><<>>(b, n, m, dataset, temp, idxs); break; case 8: farthest_point_sampling_kernel<8><<>>(b, n, m, dataset, temp, idxs); break; case 4: farthest_point_sampling_kernel<4><<>>(b, n, m, dataset, temp, idxs); break; case 2: farthest_point_sampling_kernel<2><<>>(b, n, m, dataset, temp, idxs); break; case 1: farthest_point_sampling_kernel<1><<>>(b, n, m, dataset, temp, idxs); break; default: farthest_point_sampling_kernel<512><<>>(b, n, m, dataset, temp, idxs); } err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } template __global__ void stack_farthest_point_sampling_kernel(int batch_size, int N, const float *dataset, float *temp, int *xyz_batch_cnt, int *idxs, int *num_sampled_points) { // """ // Args: // ctx: // dataset: (N1 + N2 + ..., 3) where N > npoint // temp: (N1 + N2 + ...) where N > npoint // xyz_batch_cnt: [N1, N2, ...] // num_sampled_points: [M1, M2, ...] int, number of features in the sampled set // Returns: // idxs: (npoint.sum()) tensor containing the set, // npoint: (M1, M2, ...) // """ __shared__ float dists[block_size]; __shared__ int dists_i[block_size]; int bs_idx = blockIdx.x; int xyz_batch_start_idx = 0, idxs_start_idx = 0; for (int k = 0; k < bs_idx; k++){ xyz_batch_start_idx += xyz_batch_cnt[k]; idxs_start_idx += num_sampled_points[k]; } dataset += xyz_batch_start_idx * 3; temp += xyz_batch_start_idx; idxs += idxs_start_idx; int n = xyz_batch_cnt[bs_idx]; int m = num_sampled_points[bs_idx]; int tid = threadIdx.x; const int stride = block_size; int old = 0; if (threadIdx.x == 0) idxs[0] = xyz_batch_start_idx; __syncthreads(); for (int j = 1; j < m; j++) { int besti = 0; float best = -1; float x1 = dataset[old * 3 + 0]; float y1 = dataset[old * 3 + 1]; float z1 = dataset[old * 3 + 2]; for (int k = tid; k < n; k += stride) { float x2, y2, z2; x2 = dataset[k * 3 + 0]; y2 = dataset[k * 3 + 1]; z2 = dataset[k * 3 + 2]; // float mag = (x2 * x2) + (y2 * y2) + (z2 * z2); // if (mag <= 1e-3) // continue; float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1); float d2 = min(d, temp[k]); temp[k] = d2; besti = d2 > best ? k : besti; best = d2 > best ? d2 : best; } dists[tid] = best; dists_i[tid] = besti; __syncthreads(); if (block_size >= 1024) { if (tid < 512) { __update(dists, dists_i, tid, tid + 512); } __syncthreads(); } if (block_size >= 512) { if (tid < 256) { __update(dists, dists_i, tid, tid + 256); } __syncthreads(); } if (block_size >= 256) { if (tid < 128) { __update(dists, dists_i, tid, tid + 128); } __syncthreads(); } if (block_size >= 128) { if (tid < 64) { __update(dists, dists_i, tid, tid + 64); } __syncthreads(); } if (block_size >= 64) { if (tid < 32) { __update(dists, dists_i, tid, tid + 32); } __syncthreads(); } if (block_size >= 32) { if (tid < 16) { __update(dists, dists_i, tid, tid + 16); } __syncthreads(); } if (block_size >= 16) { if (tid < 8) { __update(dists, dists_i, tid, tid + 8); } __syncthreads(); } if (block_size >= 8) { if (tid < 4) { __update(dists, dists_i, tid, tid + 4); } __syncthreads(); } if (block_size >= 4) { if (tid < 2) { __update(dists, dists_i, tid, tid + 2); } __syncthreads(); } if (block_size >= 2) { if (tid < 1) { __update(dists, dists_i, tid, tid + 1); } __syncthreads(); } old = dists_i[0]; if (tid == 0) idxs[j] = old + xyz_batch_start_idx; } } void stack_farthest_point_sampling_kernel_launcher(int N, int batch_size, const float *dataset, float *temp, int *xyz_batch_cnt, int *idxs, int *num_sampled_points) { // """ // Args: // ctx: // dataset: (N1 + N2 + ..., 3) where N > npoint // temp: (N1 + N2 + ...) where N > npoint // xyz_batch_cnt: [N1, N2, ...] // npoint: int, number of features in the sampled set // Returns: // idxs: (npoint.sum()) tensor containing the set, // npoint: (M1, M2, ...) // """ cudaError_t err; unsigned int n_threads = opt_n_threads(N); stack_farthest_point_sampling_kernel<1024><<>>( batch_size, N, dataset, temp, xyz_batch_cnt, idxs, num_sampled_points ); err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } ================================================ FILE: pcdet/ops/pointnet2/pointnet2_stack/src/sampling_gpu.h ================================================ #ifndef _SAMPLING_GPU_H #define _SAMPLING_GPU_H #include #include #include int farthest_point_sampling_wrapper(int b, int n, int m, at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor); void farthest_point_sampling_kernel_launcher(int b, int n, int m, const float *dataset, float *temp, int *idxs); int stack_farthest_point_sampling_wrapper( at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor xyz_batch_cnt_tensor, at::Tensor idx_tensor, at::Tensor num_sampled_points_tensor); void stack_farthest_point_sampling_kernel_launcher(int N, int batch_size, const float *dataset, float *temp, int *xyz_batch_cnt, int *idxs, int *num_sampled_points); #endif ================================================ FILE: pcdet/ops/pointnet2/pointnet2_stack/src/vector_pool.cpp ================================================ /* Vector-pool aggregation based local feature aggregation for point cloud. PV-RCNN++: Point-Voxel Feature Set Abstraction With Local Vector Representation for 3D Object Detection https://arxiv.org/abs/2102.00463 Written by Shaoshuai Shi All Rights Reserved 2020. */ #include #include #include #include #include #include "vector_pool_gpu.h" extern THCState *state; #define CHECK_CUDA(x) do { \ if (!x.type().is_cuda()) { \ fprintf(stderr, "%s must be CUDA tensor at %s:%d\n", #x, __FILE__, __LINE__); \ exit(-1); \ } \ } while (0) #define CHECK_CONTIGUOUS(x) do { \ if (!x.is_contiguous()) { \ fprintf(stderr, "%s must be contiguous tensor at %s:%d\n", #x, __FILE__, __LINE__); \ exit(-1); \ } \ } while (0) #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) int query_stacked_local_neighbor_idxs_wrapper_stack(at::Tensor support_xyz_tensor, at::Tensor xyz_batch_cnt_tensor, at::Tensor new_xyz_tensor, at::Tensor new_xyz_batch_cnt_tensor, at::Tensor stack_neighbor_idxs_tensor, at::Tensor start_len_tensor, at::Tensor cumsum_tensor, int avg_length_of_neighbor_idxs, float max_neighbour_distance, int nsample, int neighbor_type){ // support_xyz: (N1 + N2 ..., 3) xyz coordinates of the features // xyz_batch_cnt: (batch_size), [N1, N2, ...] // new_xyz: (M1 + M2 ..., 3) centers of the ball query // new_xyz_grid_centers: (M1 + M2 ..., num_total_grids, 3) grids centers of each grid // new_xyz_batch_cnt: (batch_size), [M1, M2, ...] // new_xyz_grid_idxs: (M1 + M2 ..., num_total_grids, 3) three-nn // new_xyz_grid_dist2: (M1 + M2 ..., num_total_grids, 3) square of dist of three-nn // num_grid_x, num_grid_y, num_grid_z: number of grids in each local area centered at new_xyz // nsample: find all (-1), find limited number(>0) // neighbor_type: 1: ball, others: cube CHECK_INPUT(support_xyz_tensor); CHECK_INPUT(xyz_batch_cnt_tensor); CHECK_INPUT(new_xyz_tensor); CHECK_INPUT(new_xyz_batch_cnt_tensor); CHECK_INPUT(stack_neighbor_idxs_tensor); CHECK_INPUT(start_len_tensor); CHECK_INPUT(cumsum_tensor); const float *support_xyz = support_xyz_tensor.data(); const int *xyz_batch_cnt = xyz_batch_cnt_tensor.data(); const float *new_xyz = new_xyz_tensor.data(); const int *new_xyz_batch_cnt = new_xyz_batch_cnt_tensor.data(); int *stack_neighbor_idxs = stack_neighbor_idxs_tensor.data(); int *start_len = start_len_tensor.data(); int *cumsum = cumsum_tensor.data(); int batch_size = xyz_batch_cnt_tensor.size(0); int M = new_xyz_tensor.size(0); query_stacked_local_neighbor_idxs_kernel_launcher_stack( support_xyz, xyz_batch_cnt, new_xyz, new_xyz_batch_cnt, stack_neighbor_idxs, start_len, cumsum, avg_length_of_neighbor_idxs, max_neighbour_distance, batch_size, M, nsample, neighbor_type ); return 0; } int query_three_nn_by_stacked_local_idxs_wrapper_stack(at::Tensor support_xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor new_xyz_grid_centers_tensor, at::Tensor new_xyz_grid_idxs_tensor, at::Tensor new_xyz_grid_dist2_tensor, at::Tensor stack_neighbor_idxs_tensor, at::Tensor start_len_tensor, int M, int num_total_grids){ // support_xyz: (N1 + N2 ..., 3) xyz coordinates of the features // new_xyz: (M1 + M2 ..., 3) centers of the ball query // new_xyz_grid_centers: (M1 + M2 ..., num_total_grids, 3) grids centers of each grid // new_xyz_grid_idxs: (M1 + M2 ..., num_total_grids, 3) three-nn // new_xyz_grid_dist2: (M1 + M2 ..., num_total_grids, 3) square of dist of three-nn // stack_neighbor_idxs: (max_length_of_neighbor_idxs) // start_len: (M1 + M2, 2) [start_offset, neighbor_length] CHECK_INPUT(support_xyz_tensor); CHECK_INPUT(new_xyz_tensor); CHECK_INPUT(new_xyz_grid_centers_tensor); CHECK_INPUT(new_xyz_grid_idxs_tensor); CHECK_INPUT(new_xyz_grid_dist2_tensor); CHECK_INPUT(stack_neighbor_idxs_tensor); CHECK_INPUT(start_len_tensor); const float *support_xyz = support_xyz_tensor.data(); const float *new_xyz = new_xyz_tensor.data(); const float *new_xyz_grid_centers = new_xyz_grid_centers_tensor.data(); int *new_xyz_grid_idxs = new_xyz_grid_idxs_tensor.data(); float *new_xyz_grid_dist2 = new_xyz_grid_dist2_tensor.data(); int *stack_neighbor_idxs = stack_neighbor_idxs_tensor.data(); int *start_len = start_len_tensor.data(); query_three_nn_by_stacked_local_idxs_kernel_launcher_stack( support_xyz, new_xyz, new_xyz_grid_centers, new_xyz_grid_idxs, new_xyz_grid_dist2, stack_neighbor_idxs, start_len, M, num_total_grids ); return 0; } int vector_pool_wrapper_stack(at::Tensor support_xyz_tensor, at::Tensor xyz_batch_cnt_tensor, at::Tensor support_features_tensor, at::Tensor new_xyz_tensor, at::Tensor new_xyz_batch_cnt_tensor, at::Tensor new_features_tensor, at::Tensor new_local_xyz_tensor, at::Tensor point_cnt_of_grid_tensor, at::Tensor grouped_idxs_tensor, int num_grid_x, int num_grid_y, int num_grid_z, float max_neighbour_distance, int use_xyz, int num_max_sum_points, int nsample, int neighbor_type, int pooling_type){ // support_xyz_tensor: (N1 + N2 ..., 3) xyz coordinates of the features // support_features_tensor: (N1 + N2 ..., C) // xyz_batch_cnt: (batch_size), [N1, N2, ...] // new_xyz_tensor: (M1 + M2 ..., 3) centers of new positions // new_features_tensor: (M1 + M2 ..., C) // new_xyz_batch_cnt: (batch_size), [M1, M2, ...] // point_cnt_of_grid: (M1 + M2 ..., num_total_grids) // grouped_idxs_tensor: (num_max_sum_points, 3) // num_grid_x, num_grid_y, num_grid_z: number of grids in each local area centered at new_xyz // use_xyz: whether to calculate new_local_xyz // neighbor_type: 1: ball, others: cube // pooling_type: 0: avg_pool, 1: random choice CHECK_INPUT(support_xyz_tensor); CHECK_INPUT(support_features_tensor); CHECK_INPUT(xyz_batch_cnt_tensor); CHECK_INPUT(new_xyz_tensor); CHECK_INPUT(new_xyz_batch_cnt_tensor); CHECK_INPUT(new_features_tensor); CHECK_INPUT(new_local_xyz_tensor); CHECK_INPUT(point_cnt_of_grid_tensor); CHECK_INPUT(grouped_idxs_tensor); const float *support_xyz = support_xyz_tensor.data(); const float *support_features = support_features_tensor.data(); const int *xyz_batch_cnt = xyz_batch_cnt_tensor.data(); const float *new_xyz = new_xyz_tensor.data(); const int *new_xyz_batch_cnt = new_xyz_batch_cnt_tensor.data(); float *new_features = new_features_tensor.data(); float *new_local_xyz = new_local_xyz_tensor.data(); int *point_cnt_of_grid = point_cnt_of_grid_tensor.data(); int *grouped_idxs = grouped_idxs_tensor.data(); int N = support_xyz_tensor.size(0); int batch_size = xyz_batch_cnt_tensor.size(0); int M = new_xyz_tensor.size(0); int num_c_out = new_features_tensor.size(1); int num_c_in = support_features_tensor.size(1); int num_total_grids = point_cnt_of_grid_tensor.size(1); int cum_sum = vector_pool_kernel_launcher_stack( support_xyz, support_features, xyz_batch_cnt, new_xyz, new_features, new_local_xyz, new_xyz_batch_cnt, point_cnt_of_grid, grouped_idxs, num_grid_x, num_grid_y, num_grid_z, max_neighbour_distance, batch_size, N, M, num_c_in, num_c_out, num_total_grids, use_xyz, num_max_sum_points, nsample, neighbor_type, pooling_type ); return cum_sum; } int vector_pool_grad_wrapper_stack(at::Tensor grad_new_features_tensor, at::Tensor point_cnt_of_grid_tensor, at::Tensor grouped_idxs_tensor, at::Tensor grad_support_features_tensor) { // grad_new_features_tensor: (M1 + M2 ..., C_out) // point_cnt_of_grid_tensor: (M1 + M2 ..., num_total_grids) // grouped_idxs_tensor: (num_max_sum_points, 3) [idx of support_xyz, idx of new_xyz, idx of grid_idx in new_xyz] // grad_support_features_tensor: (N1 + N2 ..., C_in) CHECK_INPUT(grad_new_features_tensor); CHECK_INPUT(point_cnt_of_grid_tensor); CHECK_INPUT(grouped_idxs_tensor); CHECK_INPUT(grad_support_features_tensor); int M = grad_new_features_tensor.size(0); int num_c_out = grad_new_features_tensor.size(1); int N = grad_support_features_tensor.size(0); int num_c_in = grad_support_features_tensor.size(1); int num_total_grids = point_cnt_of_grid_tensor.size(1); int num_max_sum_points = grouped_idxs_tensor.size(0); const float *grad_new_features = grad_new_features_tensor.data(); const int *point_cnt_of_grid = point_cnt_of_grid_tensor.data(); const int *grouped_idxs = grouped_idxs_tensor.data(); float *grad_support_features = grad_support_features_tensor.data(); vector_pool_grad_kernel_launcher_stack( grad_new_features, point_cnt_of_grid, grouped_idxs, grad_support_features, N, M, num_c_out, num_c_in, num_total_grids, num_max_sum_points ); return 1; } ================================================ FILE: pcdet/ops/pointnet2/pointnet2_stack/src/vector_pool_gpu.cu ================================================ /* Vector-pool aggregation based local feature aggregation for point cloud. PV-RCNN++: Point-Voxel Feature Set Abstraction With Local Vector Representation for 3D Object Detection https://arxiv.org/abs/2102.00463 Written by Shaoshuai Shi All Rights Reserved 2020. */ #include #include #include #include "vector_pool_gpu.h" #include "cuda_utils.h" __global__ void query_three_nn_by_stacked_local_idxs_kernel( const float *support_xyz, const float *new_xyz, const float *new_xyz_grid_centers, int *new_xyz_grid_idxs, float *new_xyz_grid_dist2, const int *stack_neighbor_idxs, const int *start_len, int M, int num_total_grids){ // support_xyz: (N1 + N2 ..., 3) xyz coordinates of the features // new_xyz: (M1 + M2 ..., 3) centers of the ball query // new_xyz_grid_centers: (M1 + M2 ..., num_total_grids, 3) grids centers of each grid // new_xyz_grid_idxs: (M1 + M2 ..., num_total_grids, 3) three-nn // new_xyz_grid_dist2: (M1 + M2 ..., num_total_grids, 3) square of dist of three-nn // stack_neighbor_idxs: (max_length_of_neighbor_idxs) // start_len: (M1 + M2, 2) [start_offset, neighbor_length] int grid_idx = blockIdx.y; int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; if (pt_idx >= M || grid_idx >= num_total_grids) return; new_xyz += pt_idx * 3; new_xyz_grid_centers += pt_idx * num_total_grids * 3 + grid_idx * 3; new_xyz_grid_idxs += pt_idx * num_total_grids * 3 + grid_idx * 3; new_xyz_grid_dist2 += pt_idx * num_total_grids * 3 + grid_idx * 3; start_len += pt_idx * 2; stack_neighbor_idxs += start_len[0]; int neighbor_length = start_len[1]; float center_x = new_xyz_grid_centers[0]; float center_y = new_xyz_grid_centers[1]; float center_z = new_xyz_grid_centers[2]; double best1 = 1e40, best2 = 1e40, best3 = 1e40; int besti1 = -1, besti2 = -1, besti3 = -1; for (int k = 0; k < neighbor_length; k++){ int cur_neighbor_idx = stack_neighbor_idxs[k]; float x = support_xyz[cur_neighbor_idx * 3 + 0]; float y = support_xyz[cur_neighbor_idx * 3 + 1]; float z = support_xyz[cur_neighbor_idx * 3 + 2]; float d = (center_x - x) * (center_x - x) + (center_y - y) * (center_y - y) + (center_z - z) * (center_z - z); if (d < best1) { best3 = best2; besti3 = besti2; best2 = best1; besti2 = besti1; best1 = d; besti1 = cur_neighbor_idx; } else if (d < best2) { best3 = best2; besti3 = besti2; best2 = d; besti2 = cur_neighbor_idx; } else if (d < best3) { best3 = d; besti3 = cur_neighbor_idx; } } if (besti2 == -1){ besti2 = besti1; best2 = best1; } if (besti3 == -1){ besti3 = besti1; best3 = best1; } new_xyz_grid_dist2[0] = best1; new_xyz_grid_dist2[1] = best2; new_xyz_grid_dist2[2] = best3; new_xyz_grid_idxs[0] = besti1; new_xyz_grid_idxs[1] = besti2; new_xyz_grid_idxs[2] = besti3; } int query_three_nn_by_stacked_local_idxs_kernel_launcher_stack( const float *support_xyz, const float *new_xyz, const float *new_xyz_grid_centers, int *new_xyz_grid_idxs, float *new_xyz_grid_dist2, const int *stack_neighbor_idxs, const int *start_len, int M, int num_total_grids){ // support_xyz: (N1 + N2 ..., 3) xyz coordinates of the features // new_xyz: (M1 + M2 ..., 3) centers of the ball query // new_xyz_grid_centers: (M1 + M2 ..., num_total_grids, 3) grids centers of each grid // new_xyz_grid_idxs: (M1 + M2 ..., num_total_grids, 3) three-nn // new_xyz_grid_dist2: (M1 + M2 ..., num_total_grids, 3) square of dist of three-nn // stack_neighbor_idxs: (max_length_of_neighbor_idxs) // start_len: (M1 + M2, 2) [start_offset, neighbor_length] cudaError_t err; dim3 blocks(DIVUP(M, THREADS_PER_BLOCK), num_total_grids); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); query_three_nn_by_stacked_local_idxs_kernel<<>>( support_xyz, new_xyz, new_xyz_grid_centers, new_xyz_grid_idxs, new_xyz_grid_dist2, stack_neighbor_idxs, start_len, M, num_total_grids ); // cudaDeviceSynchronize(); // for using printf in kernel function err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } return 0; } __global__ void query_stacked_local_neighbor_idxs_kernel( const float *support_xyz, const int *xyz_batch_cnt, const float *new_xyz, const int *new_xyz_batch_cnt, int *stack_neighbor_idxs, int *start_len, int *cumsum, int avg_length_of_neighbor_idxs, float max_neighbour_distance, int batch_size, int M, int nsample, int neighbor_type){ // support_xyz: (N1 + N2 ..., 3) xyz coordinates of the features // xyz_batch_cnt: (batch_size), [N1, N2, ...] // new_xyz: (M1 + M2 ..., 3) centers of the ball query // new_xyz_batch_cnt: (batch_size), [M1, M2, ...] // stack_neighbor_idxs: (max_length_of_neighbor_idxs) // start_len: (M1 + M2, 2) [start_offset, neighbor_length] // cumsum: (1), max offset of current data in stack_neighbor_idxs // max_neighbour_distance: float // nsample: find all (-1), find limited number(>0) // neighbor_type: 1: ball, others: cube int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; if (pt_idx >= M) return; int bs_idx = 0, pt_cnt = new_xyz_batch_cnt[0]; for (int k = 1; k < batch_size; k++){ if (pt_idx < pt_cnt) break; pt_cnt += new_xyz_batch_cnt[k]; bs_idx = k; } int xyz_batch_start_idx = 0; for (int k = 0; k < bs_idx; k++) xyz_batch_start_idx += xyz_batch_cnt[k]; support_xyz += xyz_batch_start_idx * 3; new_xyz += pt_idx * 3; start_len += pt_idx * 2; float new_x = new_xyz[0]; float new_y = new_xyz[1]; float new_z = new_xyz[2]; int n = xyz_batch_cnt[bs_idx]; float local_x, local_y, local_z; float radius2 = max_neighbour_distance * max_neighbour_distance; int temp_idxs[1000]; int sample_cnt = 0; for (int k = 0; k < n; ++k) { local_x = support_xyz[k * 3 + 0] - new_x; local_y = support_xyz[k * 3 + 1] - new_y; local_z = support_xyz[k * 3 + 2] - new_z; if (neighbor_type == 1){ // ball if (local_x * local_x + local_y * local_y + local_z * local_z > radius2){ continue; } } else{ // voxel if ((fabs(local_x) > max_neighbour_distance) | (fabs(local_y) > max_neighbour_distance) | (fabs(local_z) > max_neighbour_distance)){ continue; } } if (sample_cnt < 1000){ temp_idxs[sample_cnt] = k; } else{ break; } sample_cnt++; if (nsample > 0 && sample_cnt >= nsample) break; } start_len[0] = atomicAdd(cumsum, sample_cnt); start_len[1] = sample_cnt; int max_thresh = avg_length_of_neighbor_idxs * M; if (start_len[0] >= max_thresh) return; stack_neighbor_idxs += start_len[0]; if (start_len[0] + sample_cnt >= max_thresh) sample_cnt = max_thresh - start_len[0]; for (int k = 0; k < sample_cnt; k++){ stack_neighbor_idxs[k] = temp_idxs[k] + xyz_batch_start_idx; } } int query_stacked_local_neighbor_idxs_kernel_launcher_stack( const float *support_xyz, const int *xyz_batch_cnt, const float *new_xyz, const int *new_xyz_batch_cnt, int *stack_neighbor_idxs, int *start_len, int *cumsum, int avg_length_of_neighbor_idxs, float max_neighbour_distance, int batch_size, int M, int nsample, int neighbor_type){ // support_xyz: (N1 + N2 ..., 3) xyz coordinates of the features // xyz_batch_cnt: (batch_size), [N1, N2, ...] // new_xyz: (M1 + M2 ..., 3) centers of the ball query // new_xyz_batch_cnt: (batch_size), [M1, M2, ...] // stack_neighbor_idxs: (max_length_of_neighbor_idxs) // start_len: (M1 + M2, 2) [start_offset, neighbor_length] // cumsum: (1), max offset of current data in stack_neighbor_idxs // max_neighbour_distance: float // nsample: find all (-1), find limited number(>0) // neighbor_type: 1: ball, others: cube cudaError_t err; dim3 blocks(DIVUP(M, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); query_stacked_local_neighbor_idxs_kernel<<>>( support_xyz, xyz_batch_cnt, new_xyz, new_xyz_batch_cnt, stack_neighbor_idxs, start_len, cumsum, avg_length_of_neighbor_idxs, max_neighbour_distance, batch_size, M, nsample, neighbor_type ); // cudaDeviceSynchronize(); // for using printf in kernel function err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } return 0; } __global__ void vector_pool_kernel_stack( const float *support_xyz, const float *support_features, const int *xyz_batch_cnt, const float *new_xyz, float *new_features, float *new_local_xyz, const int *new_xyz_batch_cnt, int num_grid_x, int num_grid_y, int num_grid_z, float max_neighbour_distance, int batch_size, int M, int num_c_in, int num_c_out, int num_c_each_grid, int num_total_grids, int *point_cnt_of_grid, int *grouped_idxs, int use_xyz, float grid_size_x, float grid_size_y, float grid_size_z, int *cum_sum, int num_max_sum_points, int nsample, int neighbor_type, int pooling_type){ // support_xyz: (N1 + N2 ..., 3) xyz coordinates of the features // support_features: (N1 + N2 ..., C) // xyz_batch_cnt: (batch_size), [N1, N2, ...] // new_xyz: (M1 + M2 ..., 3) centers of the ball query // new_features: (M1 + M2 ..., C), C = num_total_grids * num_c_each_grid // new_local_xyz: (M1 + M2 ..., 3 * num_total_grids) // new_xyz_batch_cnt: (batch_size), [M1, M2, ...] // num_grid_x, num_grid_y, num_grid_z: number of grids in each local area centered at new_xyz // point_cnt_of_grid: (M1 + M2 ..., num_total_grids) // grouped_idxs: (num_max_sum_points, 3)[idx of support_xyz, idx of new_xyz, idx of grid_idx in new_xyz] // use_xyz: whether to calculate new_local_xyz // neighbor_type: 1: ball, others: cube // pooling_type: 0: avg_pool, 1: random choice int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; if (pt_idx >= M) return; int bs_idx = 0, pt_cnt = new_xyz_batch_cnt[0]; for (int k = 1; k < batch_size; k++){ if (pt_idx < pt_cnt) break; pt_cnt += new_xyz_batch_cnt[k]; bs_idx = k; } int xyz_batch_start_idx = 0; for (int k = 0; k < bs_idx; k++) xyz_batch_start_idx += xyz_batch_cnt[k]; support_xyz += xyz_batch_start_idx * 3; support_features += xyz_batch_start_idx * num_c_in; new_xyz += pt_idx * 3; new_features += pt_idx * num_c_out; point_cnt_of_grid += pt_idx * num_total_grids; new_local_xyz += pt_idx * 3 * num_total_grids; float new_x = new_xyz[0]; float new_y = new_xyz[1]; float new_z = new_xyz[2]; int n = xyz_batch_cnt[bs_idx], grid_idx_x, grid_idx_y, grid_idx_z, grid_idx; float local_x, local_y, local_z; float radius2 = max_neighbour_distance * max_neighbour_distance; int sample_cnt = 0; for (int k = 0; k < n; ++k) { local_x = support_xyz[k * 3 + 0] - new_x; local_y = support_xyz[k * 3 + 1] - new_y; local_z = support_xyz[k * 3 + 2] - new_z; if (neighbor_type == 1){ // ball if (local_x * local_x + local_y * local_y + local_z * local_z > radius2){ continue; } } else{ // voxel if ((fabs(local_x) > max_neighbour_distance) | (fabs(local_y) > max_neighbour_distance) | (fabs(local_z) > max_neighbour_distance)){ continue; } } grid_idx_x = floorf((local_x + max_neighbour_distance) / grid_size_x); grid_idx_y = floorf((local_y + max_neighbour_distance) / grid_size_y); grid_idx_z = floorf((local_z + max_neighbour_distance) / grid_size_z); grid_idx = grid_idx_x * num_grid_y * num_grid_z + grid_idx_y * num_grid_z + grid_idx_z; grid_idx = min(max(grid_idx, 0), num_total_grids - 1); if (pooling_type == 0){ // avg pooling point_cnt_of_grid[grid_idx] ++; for (int i = 0; i < num_c_in; i++){ new_features[grid_idx * num_c_each_grid + i % num_c_each_grid] += support_features[k * num_c_in + i]; } if (use_xyz){ new_local_xyz[grid_idx * 3 + 0] += local_x; new_local_xyz[grid_idx * 3 + 1] += local_y; new_local_xyz[grid_idx * 3 + 2] += local_z; } int cnt = atomicAdd(cum_sum, 1); if (cnt >= num_max_sum_points) continue; // continue to statistics the max number of points grouped_idxs[cnt * 3 + 0] = xyz_batch_start_idx + k; grouped_idxs[cnt * 3 + 1] = pt_idx; grouped_idxs[cnt * 3 + 2] = grid_idx; sample_cnt++; if(nsample > 0 && sample_cnt >= nsample) break; } else if (pooling_type == 1){ // random choose one within sub-voxel // printf("new_xyz=(%.2f, %.2f, %.2f, ), find neighbor k=%d: support_xyz=(%.2f, %.2f, %.2f), local_xyz=(%.2f, %.2f, %.2f), neighbor=%.2f, grid_idx=%d, point_cnt_of_grid_idx=%d\n", // new_x, new_y, new_z, k, support_xyz[k * 3 + 0], support_xyz[k * 3 + 1], support_xyz[k * 3 + 2], local_x, local_y, local_z, max_neighbour_distance, grid_idx, point_cnt_of_grid[grid_idx]); if (point_cnt_of_grid[grid_idx] == 0){ point_cnt_of_grid[grid_idx] ++; for (int i = 0; i < num_c_in; i++){ new_features[grid_idx * num_c_each_grid + i % num_c_each_grid] = support_features[k * num_c_in + i]; } if (use_xyz){ new_local_xyz[grid_idx * 3 + 0] = local_x; new_local_xyz[grid_idx * 3 + 1] = local_y; new_local_xyz[grid_idx * 3 + 2] = local_z; } int cnt = atomicAdd(cum_sum, 1); if (cnt >= num_max_sum_points) continue; // continue to statistics the max number of points grouped_idxs[cnt * 3 + 0] = xyz_batch_start_idx + k; grouped_idxs[cnt * 3 + 1] = pt_idx; grouped_idxs[cnt * 3 + 2] = grid_idx; sample_cnt++; if(nsample > 0 && sample_cnt >= nsample || sample_cnt >= num_total_grids) break; } } } } int vector_pool_kernel_launcher_stack( const float *support_xyz, const float *support_features, const int *xyz_batch_cnt, const float *new_xyz, float *new_features, float *new_local_xyz, const int *new_xyz_batch_cnt, int *point_cnt_of_grid, int *grouped_idxs, int num_grid_x, int num_grid_y, int num_grid_z, float max_neighbour_distance, int batch_size, int N, int M, int num_c_in, int num_c_out, int num_total_grids, int use_xyz, int num_max_sum_points, int nsample, int neighbor_type, int pooling_type){ // support_xyz: (N1 + N2 ..., 3) xyz coordinates of the features // support_features: (N1 + N2 ..., C) // xyz_batch_cnt: (batch_size), [N1, N2, ...] // new_xyz: (M1 + M2 ..., 3) centers of the ball query // new_features: (M1 + M2 ..., C) // new_local_xyz: (M1 + M2 ..., 3) // new_xyz_batch_cnt: (batch_size), [M1, M2, ...] // num_grid_x, num_grid_y, num_grid_z: number of grids in each local area centered at new_xyz // use_xyz: whether to calculate new_local_xyz // grouped_idxs: (num_max_sum_points, 3)[idx of support_xyz, idx of new_xyz, idx of grid_idx in new_xyz] // neighbor_type: 1: ball, others: cube // pooling_type: 0: avg_pool, 1: random choice cudaError_t err; int num_c_each_grid = num_c_out / num_total_grids; float grid_size_x = max_neighbour_distance * 2 / num_grid_x; float grid_size_y = max_neighbour_distance * 2 / num_grid_y; float grid_size_z = max_neighbour_distance * 2 / num_grid_z; dim3 blocks(DIVUP(M, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); int cum_sum = 0; int *p_cum_sum; cudaMalloc((void**)&p_cum_sum, sizeof(int)); cudaMemcpy(p_cum_sum, &cum_sum, sizeof(int), cudaMemcpyHostToDevice); vector_pool_kernel_stack<<>>( support_xyz, support_features, xyz_batch_cnt, new_xyz, new_features, new_local_xyz, new_xyz_batch_cnt, num_grid_x, num_grid_y, num_grid_z, max_neighbour_distance, batch_size, M, num_c_in, num_c_out, num_c_each_grid, num_total_grids, point_cnt_of_grid, grouped_idxs, use_xyz, grid_size_x, grid_size_y, grid_size_z, p_cum_sum, num_max_sum_points, nsample, neighbor_type, pooling_type ); cudaMemcpy(&cum_sum, p_cum_sum, sizeof(int), cudaMemcpyDeviceToHost); // cudaDeviceSynchronize(); // for using printf in kernel function err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } return cum_sum; } __global__ void vector_pool_grad_kernel_stack(const float *grad_new_features, const int *point_cnt_of_grid, const int *grouped_idxs, float *grad_support_features, int N, int M, int num_c_out, int num_c_in, int num_c_each_grid, int num_total_grids, int num_max_sum_points){ // grad_new_features: (M1 + M2 ..., C_out) // point_cnt_of_grid: (M1 + M2 ..., num_total_grids) // grouped_idxs: (num_max_sum_points, 3) [idx of support_xyz, idx of new_xyz, idx of grid_idx in new_xyz] // grad_support_features: (N1 + N2 ..., C_in) int channel_idx = blockIdx.y; int index = blockIdx.x * blockDim.x + threadIdx.x; if (index >= num_max_sum_points || channel_idx >= num_c_in) return; int idx_of_support_xyz = grouped_idxs[index * 3 + 0]; int idx_of_new_xyz = grouped_idxs[index * 3 + 1]; int idx_of_grid_idx = grouped_idxs[index * 3 + 2]; int num_total_pts = point_cnt_of_grid[idx_of_new_xyz * num_total_grids + idx_of_grid_idx]; grad_support_features += idx_of_support_xyz * num_c_in + channel_idx; grad_new_features += idx_of_new_xyz * num_c_out + idx_of_grid_idx * num_c_each_grid; int channel_idx_of_cin = channel_idx % num_c_each_grid; float cur_grad = 1 / fmaxf(float(num_total_pts), 1.0); atomicAdd(grad_support_features, grad_new_features[channel_idx_of_cin] * cur_grad); } void vector_pool_grad_kernel_launcher_stack( const float *grad_new_features, const int *point_cnt_of_grid, const int *grouped_idxs, float *grad_support_features, int N, int M, int num_c_out, int num_c_in, int num_total_grids, int num_max_sum_points){ // grad_new_features: (M1 + M2 ..., C_out) // point_cnt_of_grid: (M1 + M2 ..., num_total_grids) // grouped_idxs: (num_max_sum_points, 3) [idx of support_xyz, idx of new_xyz, idx of grid_idx in new_xyz] // grad_support_features: (N1 + N2 ..., C_in) int num_c_each_grid = num_c_out / num_total_grids; cudaError_t err; dim3 blocks(DIVUP(num_max_sum_points, THREADS_PER_BLOCK), num_c_in); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); vector_pool_grad_kernel_stack<<>>( grad_new_features, point_cnt_of_grid, grouped_idxs, grad_support_features, N, M, num_c_out, num_c_in, num_c_each_grid, num_total_grids, num_max_sum_points ); // cudaDeviceSynchronize(); // for using printf in kernel function err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } ================================================ FILE: pcdet/ops/pointnet2/pointnet2_stack/src/vector_pool_gpu.h ================================================ /* Vector-pool aggregation based local feature aggregation for point cloud. PV-RCNN++: Point-Voxel Feature Set Abstraction With Local Vector Representation for 3D Object Detection https://arxiv.org/abs/2102.00463 Written by Shaoshuai Shi All Rights Reserved 2020. */ #ifndef _STACK_VECTOR_POOL_GPU_H #define _STACK_VECTOR_POOL_GPU_H #include #include #include #include int query_stacked_local_neighbor_idxs_kernel_launcher_stack( const float *support_xyz, const int *xyz_batch_cnt, const float *new_xyz, const int *new_xyz_batch_cnt, int *stack_neighbor_idxs, int *start_len, int *cumsum, int avg_length_of_neighbor_idxs, float max_neighbour_distance, int batch_size, int M, int nsample, int neighbor_type); int query_stacked_local_neighbor_idxs_wrapper_stack(at::Tensor support_xyz_tensor, at::Tensor xyz_batch_cnt_tensor, at::Tensor new_xyz_tensor, at::Tensor new_xyz_batch_cnt_tensor, at::Tensor stack_neighbor_idxs_tensor, at::Tensor start_len_tensor, at::Tensor cumsum_tensor, int avg_length_of_neighbor_idxs, float max_neighbour_distance, int nsample, int neighbor_type); int query_three_nn_by_stacked_local_idxs_kernel_launcher_stack( const float *support_xyz, const float *new_xyz, const float *new_xyz_grid_centers, int *new_xyz_grid_idxs, float *new_xyz_grid_dist2, const int *stack_neighbor_idxs, const int *start_len, int M, int num_total_grids); int query_three_nn_by_stacked_local_idxs_wrapper_stack(at::Tensor support_xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor new_xyz_grid_centers_tensor, at::Tensor new_xyz_grid_idxs_tensor, at::Tensor new_xyz_grid_dist2_tensor, at::Tensor stack_neighbor_idxs_tensor, at::Tensor start_len_tensor, int M, int num_total_grids); int vector_pool_wrapper_stack(at::Tensor support_xyz_tensor, at::Tensor xyz_batch_cnt_tensor, at::Tensor support_features_tensor, at::Tensor new_xyz_tensor, at::Tensor new_xyz_batch_cnt_tensor, at::Tensor new_features_tensor, at::Tensor new_local_xyz, at::Tensor point_cnt_of_grid_tensor, at::Tensor grouped_idxs_tensor, int num_grid_x, int num_grid_y, int num_grid_z, float max_neighbour_distance, int use_xyz, int num_max_sum_points, int nsample, int neighbor_type, int pooling_type); int vector_pool_kernel_launcher_stack( const float *support_xyz, const float *support_features, const int *xyz_batch_cnt, const float *new_xyz, float *new_features, float * new_local_xyz, const int *new_xyz_batch_cnt, int *point_cnt_of_grid, int *grouped_idxs, int num_grid_x, int num_grid_y, int num_grid_z, float max_neighbour_distance, int batch_size, int N, int M, int num_c_in, int num_c_out, int num_total_grids, int use_xyz, int num_max_sum_points, int nsample, int neighbor_type, int pooling_type); int vector_pool_grad_wrapper_stack(at::Tensor grad_new_features_tensor, at::Tensor point_cnt_of_grid_tensor, at::Tensor grouped_idxs_tensor, at::Tensor grad_support_features_tensor); void vector_pool_grad_kernel_launcher_stack( const float *grad_new_features, const int *point_cnt_of_grid, const int *grouped_idxs, float *grad_support_features, int N, int M, int num_c_out, int num_c_in, int num_total_grids, int num_max_sum_points); #endif ================================================ FILE: pcdet/ops/pointnet2/pointnet2_stack/src/voxel_query.cpp ================================================ #include #include #include #include #include #include "voxel_query_gpu.h" extern THCState *state; #define CHECK_CUDA(x) do { \ if (!x.type().is_cuda()) { \ fprintf(stderr, "%s must be CUDA tensor at %s:%d\n", #x, __FILE__, __LINE__); \ exit(-1); \ } \ } while (0) #define CHECK_CONTIGUOUS(x) do { \ if (!x.is_contiguous()) { \ fprintf(stderr, "%s must be contiguous tensor at %s:%d\n", #x, __FILE__, __LINE__); \ exit(-1); \ } \ } while (0) #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) int voxel_query_wrapper_stack(int M, int R1, int R2, int R3, int nsample, float radius, int z_range, int y_range, int x_range, at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor new_coords_tensor, at::Tensor point_indices_tensor, at::Tensor idx_tensor) { CHECK_INPUT(new_coords_tensor); CHECK_INPUT(point_indices_tensor); CHECK_INPUT(new_xyz_tensor); CHECK_INPUT(xyz_tensor); const float *new_xyz = new_xyz_tensor.data(); const float *xyz = xyz_tensor.data(); const int *new_coords = new_coords_tensor.data(); const int *point_indices = point_indices_tensor.data(); int *idx = idx_tensor.data(); voxel_query_kernel_launcher_stack(M, R1, R2, R3, nsample, radius, z_range, y_range, x_range, new_xyz, xyz, new_coords, point_indices, idx); return 1; } ================================================ FILE: pcdet/ops/pointnet2/pointnet2_stack/src/voxel_query_gpu.cu ================================================ #include #include #include #include #include "voxel_query_gpu.h" #include "cuda_utils.h" __global__ void voxel_query_kernel_stack(int M, int R1, int R2, int R3, int nsample, float radius, int z_range, int y_range, int x_range, const float *new_xyz, const float *xyz, const int *new_coords, const int *point_indices, int *idx) { // :param new_coords: (M1 + M2 ..., 4) centers of the ball query // :param point_indices: (B, Z, Y, X) // output: // idx: (M1 + M2, nsample) int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; if (pt_idx >= M) return; new_xyz += pt_idx * 3; new_coords += pt_idx * 4; idx += pt_idx * nsample; curandState state; curand_init(pt_idx, 0, 0, &state); float radius2 = radius * radius; float new_x = new_xyz[0]; float new_y = new_xyz[1]; float new_z = new_xyz[2]; int batch_idx = new_coords[0]; int new_coords_z = new_coords[1]; int new_coords_y = new_coords[2]; int new_coords_x = new_coords[3]; int cnt = 0; int cnt2 = 0; // for (int dz = -1*z_range; dz <= z_range; ++dz) { for (int dz = -1*z_range; dz <= z_range; ++dz) { int z_coord = new_coords_z + dz; if (z_coord < 0 || z_coord >= R1) continue; for (int dy = -1*y_range; dy <= y_range; ++dy) { int y_coord = new_coords_y + dy; if (y_coord < 0 || y_coord >= R2) continue; for (int dx = -1*x_range; dx <= x_range; ++dx) { int x_coord = new_coords_x + dx; if (x_coord < 0 || x_coord >= R3) continue; int index = batch_idx * R1 * R2 * R3 + \ z_coord * R2 * R3 + \ y_coord * R3 + \ x_coord; int neighbor_idx = point_indices[index]; if (neighbor_idx < 0) continue; float x_per = xyz[neighbor_idx*3 + 0]; float y_per = xyz[neighbor_idx*3 + 1]; float z_per = xyz[neighbor_idx*3 + 2]; float dist2 = (x_per - new_x) * (x_per - new_x) + (y_per - new_y) * (y_per - new_y) + (z_per - new_z) * (z_per - new_z); if (dist2 > radius2) continue; ++cnt2; if (cnt < nsample) { if (cnt == 0) { for (int l = 0; l < nsample; ++l) { idx[l] = neighbor_idx; } } idx[cnt] = neighbor_idx; ++cnt; } // else { // float rnd = curand_uniform(&state); // if (rnd < (float(nsample) / cnt2)) { // int insertidx = ceilf(curand_uniform(&state) * nsample) - 1; // idx[insertidx] = neighbor_idx; // } // } } } } if (cnt == 0) idx[0] = -1; } void voxel_query_kernel_launcher_stack(int M, int R1, int R2, int R3, int nsample, float radius, int z_range, int y_range, int x_range, const float *new_xyz, const float *xyz, const int *new_coords, const int *point_indices, int *idx) { // :param new_coords: (M1 + M2 ..., 4) centers of the voxel query // :param point_indices: (B, Z, Y, X) // output: // idx: (M1 + M2, nsample) cudaError_t err; dim3 blocks(DIVUP(M, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); voxel_query_kernel_stack<<>>(M, R1, R2, R3, nsample, radius, z_range, y_range, x_range, new_xyz, xyz, new_coords, point_indices, idx); // cudaDeviceSynchronize(); // for using printf in kernel function err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } ================================================ FILE: pcdet/ops/pointnet2/pointnet2_stack/src/voxel_query_gpu.h ================================================ #ifndef _STACK_VOXEL_QUERY_GPU_H #define _STACK_VOXEL_QUERY_GPU_H #include #include #include #include int voxel_query_wrapper_stack(int M, int R1, int R2, int R3, int nsample, float radius, int z_range, int y_range, int x_range, at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor new_coords_tensor, at::Tensor point_indices_tensor, at::Tensor idx_tensor); void voxel_query_kernel_launcher_stack(int M, int R1, int R2, int R3, int nsample, float radius, int z_range, int y_range, int x_range, const float *new_xyz, const float *xyz, const int *new_coords, const int *point_indices, int *idx); #endif ================================================ FILE: pcdet/ops/pointnet2/pointnet2_stack/voxel_pool_modules.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from . import voxel_query_utils from typing import List class NeighborVoxelSAModuleMSG(nn.Module): def __init__(self, *, query_ranges: List[List[int]], radii: List[float], nsamples: List[int], mlps: List[List[int]], use_xyz: bool = True, pool_method='max_pool'): """ Args: query_ranges: list of int, list of neighbor ranges to group with nsamples: list of int, number of samples in each ball query mlps: list of list of int, spec of the pointnet before the global pooling for each scale use_xyz: pool_method: max_pool / avg_pool """ super().__init__() assert len(query_ranges) == len(nsamples) == len(mlps) self.groupers = nn.ModuleList() self.mlps_in = nn.ModuleList() self.mlps_pos = nn.ModuleList() self.mlps_out = nn.ModuleList() for i in range(len(query_ranges)): max_range = query_ranges[i] nsample = nsamples[i] radius = radii[i] self.groupers.append(voxel_query_utils.VoxelQueryAndGrouping(max_range, radius, nsample)) mlp_spec = mlps[i] cur_mlp_in = nn.Sequential( nn.Conv1d(mlp_spec[0], mlp_spec[1], kernel_size=1, bias=False), nn.BatchNorm1d(mlp_spec[1]) ) cur_mlp_pos = nn.Sequential( nn.Conv2d(3, mlp_spec[1], kernel_size=1, bias=False), nn.BatchNorm2d(mlp_spec[1]) ) cur_mlp_out = nn.Sequential( nn.Conv1d(mlp_spec[1], mlp_spec[2], kernel_size=1, bias=False), nn.BatchNorm1d(mlp_spec[2]), nn.ReLU() ) self.mlps_in.append(cur_mlp_in) self.mlps_pos.append(cur_mlp_pos) self.mlps_out.append(cur_mlp_out) self.relu = nn.ReLU() self.pool_method = pool_method self.init_weights() def init_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d): nn.init.kaiming_normal_(m.weight) if m.bias is not None: nn.init.constant_(m.bias, 0) if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): nn.init.constant_(m.weight, 1.0) nn.init.constant_(m.bias, 0) def forward(self, xyz, xyz_batch_cnt, new_xyz, new_xyz_batch_cnt, \ new_coords, features, voxel2point_indices): """ :param xyz: (N1 + N2 ..., 3) tensor of the xyz coordinates of the features :param xyz_batch_cnt: (batch_size), [N1, N2, ...] :param new_xyz: (M1 + M2 ..., 3) :param new_xyz_batch_cnt: (batch_size), [M1, M2, ...] :param features: (N1 + N2 ..., C) tensor of the descriptors of the the features :param point_indices: (B, Z, Y, X) tensor of point indices :return: new_xyz: (M1 + M2 ..., 3) tensor of the new features' xyz new_features: (M1 + M2 ..., \sum_k(mlps[k][-1])) tensor of the new_features descriptors """ # change the order to [batch_idx, z, y, x] new_coords = new_coords[:, [0, 3, 2, 1]].contiguous() new_features_list = [] for k in range(len(self.groupers)): # features_in: (1, C, M1+M2) features_in = features.permute(1, 0).unsqueeze(0) features_in = self.mlps_in[k](features_in) # features_in: (1, M1+M2, C) features_in = features_in.permute(0, 2, 1).contiguous() # features_in: (M1+M2, C) features_in = features_in.view(-1, features_in.shape[-1]) # grouped_features: (M1+M2, C, nsample) # grouped_xyz: (M1+M2, 3, nsample) grouped_features, grouped_xyz, empty_ball_mask = self.groupers[k]( new_coords, xyz, xyz_batch_cnt, new_xyz, new_xyz_batch_cnt, features_in, voxel2point_indices ) grouped_features[empty_ball_mask] = 0 # grouped_features: (1, C, M1+M2, nsample) grouped_features = grouped_features.permute(1, 0, 2).unsqueeze(dim=0) # grouped_xyz: (M1+M2, 3, nsample) grouped_xyz = grouped_xyz - new_xyz.unsqueeze(-1) grouped_xyz[empty_ball_mask] = 0 # grouped_xyz: (1, 3, M1+M2, nsample) grouped_xyz = grouped_xyz.permute(1, 0, 2).unsqueeze(0) # grouped_xyz: (1, C, M1+M2, nsample) position_features = self.mlps_pos[k](grouped_xyz) new_features = grouped_features + position_features new_features = self.relu(new_features) if self.pool_method == 'max_pool': new_features = F.max_pool2d( new_features, kernel_size=[1, new_features.size(3)] ).squeeze(dim=-1) # (1, C, M1 + M2 ...) elif self.pool_method == 'avg_pool': new_features = F.avg_pool2d( new_features, kernel_size=[1, new_features.size(3)] ).squeeze(dim=-1) # (1, C, M1 + M2 ...) else: raise NotImplementedError new_features = self.mlps_out[k](new_features) new_features = new_features.squeeze(dim=0).permute(1, 0) # (M1 + M2 ..., C) new_features_list.append(new_features) # (M1 + M2 ..., C) new_features = torch.cat(new_features_list, dim=1) return new_features ================================================ FILE: pcdet/ops/pointnet2/pointnet2_stack/voxel_query_utils.py ================================================ import torch from torch.autograd import Variable from torch.autograd import Function import torch.nn as nn from typing import List from . import pointnet2_stack_cuda as pointnet2 from . import pointnet2_utils class VoxelQuery(Function): @staticmethod def forward(ctx, max_range: int, radius: float, nsample: int, xyz: torch.Tensor, \ new_xyz: torch.Tensor, new_coords: torch.Tensor, point_indices: torch.Tensor): """ Args: ctx: max_range: int, max range of voxels to be grouped nsample: int, maximum number of features in the balls new_coords: (M1 + M2, 4), [batch_id, z, y, x] cooridnates of keypoints new_xyz_batch_cnt: (batch_size), [M1, M2, ...] point_indices: (batch_size, Z, Y, X) 4-D tensor recording the point indices of voxels Returns: idx: (M1 + M2, nsample) tensor with the indicies of the features that form the query balls """ assert new_xyz.is_contiguous() assert xyz.is_contiguous() assert new_coords.is_contiguous() assert point_indices.is_contiguous() M = new_coords.shape[0] B, Z, Y, X = point_indices.shape idx = torch.cuda.IntTensor(M, nsample).zero_() z_range, y_range, x_range = max_range pointnet2.voxel_query_wrapper(M, Z, Y, X, nsample, radius, z_range, y_range, x_range, \ new_xyz, xyz, new_coords, point_indices, idx) empty_ball_mask = (idx[:, 0] == -1) idx[empty_ball_mask] = 0 return idx, empty_ball_mask @staticmethod def backward(ctx, a=None): return None, None, None, None voxel_query = VoxelQuery.apply class VoxelQueryAndGrouping(nn.Module): def __init__(self, max_range: int, radius: float, nsample: int): """ Args: radius: float, radius of ball nsample: int, maximum number of features to gather in the ball """ super().__init__() self.max_range, self.radius, self.nsample = max_range, radius, nsample def forward(self, new_coords: torch.Tensor, xyz: torch.Tensor, xyz_batch_cnt: torch.Tensor, new_xyz: torch.Tensor, new_xyz_batch_cnt: torch.Tensor, features: torch.Tensor, voxel2point_indices: torch.Tensor): """ Args: new_coords: (M1 + M2 ..., 3) centers voxel indices of the ball query xyz: (N1 + N2 ..., 3) xyz coordinates of the features xyz_batch_cnt: (batch_size), [N1, N2, ...] new_xyz: (M1 + M2 ..., 3) centers of the ball query new_xyz_batch_cnt: (batch_size), [M1, M2, ...] features: (N1 + N2 ..., C) tensor of features to group voxel2point_indices: (B, Z, Y, X) tensor of points indices of voxels Returns: new_features: (M1 + M2, C, nsample) tensor """ assert xyz.shape[0] == xyz_batch_cnt.sum(), 'xyz: %s, xyz_batch_cnt: %s' % (str(xyz.shape), str(new_xyz_batch_cnt)) assert new_coords.shape[0] == new_xyz_batch_cnt.sum(), \ 'new_coords: %s, new_xyz_batch_cnt: %s' % (str(new_coords.shape), str(new_xyz_batch_cnt)) batch_size = xyz_batch_cnt.shape[0] # idx: (M1 + M2 ..., nsample), empty_ball_mask: (M1 + M2 ...) idx1, empty_ball_mask1 = voxel_query(self.max_range, self.radius, self.nsample, xyz, new_xyz, new_coords, voxel2point_indices) idx1 = idx1.view(batch_size, -1, self.nsample) count = 0 for bs_idx in range(batch_size): idx1[bs_idx] -= count count += xyz_batch_cnt[bs_idx] idx1 = idx1.view(-1, self.nsample) idx1[empty_ball_mask1] = 0 idx = idx1 empty_ball_mask = empty_ball_mask1 grouped_xyz = pointnet2_utils.grouping_operation(xyz, xyz_batch_cnt, idx, new_xyz_batch_cnt) # grouped_features: (M1 + M2, C, nsample) grouped_features = pointnet2_utils.grouping_operation(features, xyz_batch_cnt, idx, new_xyz_batch_cnt) return grouped_features, grouped_xyz, empty_ball_mask ================================================ FILE: pcdet/ops/roiaware_pool3d/roiaware_pool3d_utils.py ================================================ import torch import torch.nn as nn from torch.autograd import Function from ...utils import common_utils from . import roiaware_pool3d_cuda def points_in_boxes_cpu(points, boxes): """ Args: points: (num_points, 3) boxes: [x, y, z, dx, dy, dz, heading], (x, y, z) is the box center, each box DO NOT overlaps Returns: point_indices: (N, num_points) """ assert boxes.shape[1] == 7 assert points.shape[1] == 3 points, is_numpy = common_utils.check_numpy_to_torch(points) boxes, is_numpy = common_utils.check_numpy_to_torch(boxes) point_indices = points.new_zeros((boxes.shape[0], points.shape[0]), dtype=torch.int) roiaware_pool3d_cuda.points_in_boxes_cpu(boxes.float().contiguous(), points.float().contiguous(), point_indices) return point_indices.numpy() if is_numpy else point_indices def points_in_boxes_gpu(points, boxes): """ :param points: (B, M, 3) :param boxes: (B, T, 7), num_valid_boxes <= T :return box_idxs_of_pts: (B, M), default background = -1 """ assert boxes.shape[0] == points.shape[0] assert boxes.shape[2] == 7 and points.shape[2] == 3 batch_size, num_points, _ = points.shape box_idxs_of_pts = points.new_zeros((batch_size, num_points), dtype=torch.int).fill_(-1) roiaware_pool3d_cuda.points_in_boxes_gpu(boxes.contiguous(), points.contiguous(), box_idxs_of_pts) return box_idxs_of_pts class RoIAwarePool3d(nn.Module): def __init__(self, out_size, max_pts_each_voxel=128): super().__init__() self.out_size = out_size self.max_pts_each_voxel = max_pts_each_voxel def forward(self, rois, pts, pts_feature, pool_method='max'): assert pool_method in ['max', 'avg'] return RoIAwarePool3dFunction.apply(rois, pts, pts_feature, self.out_size, self.max_pts_each_voxel, pool_method) class RoIAwarePool3dFunction(Function): @staticmethod def forward(ctx, rois, pts, pts_feature, out_size, max_pts_each_voxel, pool_method): """ Args: ctx: rois: (N, 7) [x, y, z, dx, dy, dz, heading] (x, y, z) is the box center pts: (npoints, 3) pts_feature: (npoints, C) out_size: int or tuple, like 7 or (7, 7, 7) max_pts_each_voxel: pool_method: 'max' or 'avg' Returns: pooled_features: (N, out_x, out_y, out_z, C) """ assert rois.shape[1] == 7 and pts.shape[1] == 3 if isinstance(out_size, int): out_x = out_y = out_z = out_size else: assert len(out_size) == 3 for k in range(3): assert isinstance(out_size[k], int) out_x, out_y, out_z = out_size num_rois = rois.shape[0] num_channels = pts_feature.shape[-1] num_pts = pts.shape[0] pooled_features = pts_feature.new_zeros((num_rois, out_x, out_y, out_z, num_channels)) argmax = pts_feature.new_zeros((num_rois, out_x, out_y, out_z, num_channels), dtype=torch.int) pts_idx_of_voxels = pts_feature.new_zeros((num_rois, out_x, out_y, out_z, max_pts_each_voxel), dtype=torch.int) pool_method_map = {'max': 0, 'avg': 1} pool_method = pool_method_map[pool_method] roiaware_pool3d_cuda.forward(rois, pts, pts_feature, argmax, pts_idx_of_voxels, pooled_features, pool_method) ctx.roiaware_pool3d_for_backward = (pts_idx_of_voxels, argmax, pool_method, num_pts, num_channels) return pooled_features @staticmethod def backward(ctx, grad_out): """ :param grad_out: (N, out_x, out_y, out_z, C) :return: grad_in: (npoints, C) """ pts_idx_of_voxels, argmax, pool_method, num_pts, num_channels = ctx.roiaware_pool3d_for_backward grad_in = grad_out.new_zeros((num_pts, num_channels)) roiaware_pool3d_cuda.backward(pts_idx_of_voxels, argmax, grad_out.contiguous(), grad_in, pool_method) return None, None, grad_in, None, None, None if __name__ == '__main__': pass ================================================ FILE: pcdet/ops/roiaware_pool3d/src/roiaware_pool3d.cpp ================================================ /* RoI-aware point cloud feature pooling Reference paper: https://arxiv.org/abs/1907.03670 Written by Shaoshuai Shi All Rights Reserved 2019-2020. */ #include #include #include //#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") //#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") //#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) void roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x, int out_y, int out_z, const float *rois, const float *pts, const float *pts_feature, int *argmax, int *pts_idx_of_voxels, float *pooled_features, int pool_method); void roiaware_pool3d_backward_launcher(int boxes_num, int out_x, int out_y, int out_z, int channels, int max_pts_each_voxel, const int *pts_idx_of_voxels, const int *argmax, const float *grad_out, float *grad_in, int pool_method); void points_in_boxes_launcher(int batch_size, int boxes_num, int pts_num, const float *boxes, const float *pts, int *box_idx_of_points); int roiaware_pool3d_gpu(at::Tensor rois, at::Tensor pts, at::Tensor pts_feature, at::Tensor argmax, at::Tensor pts_idx_of_voxels, at::Tensor pooled_features, int pool_method){ // params rois: (N, 7) [x, y, z, dx, dy, dz, heading] (x, y, z) is the box center // params pts: (npoints, 3) [x, y, z] // params pts_feature: (npoints, C) // params argmax: (N, out_x, out_y, out_z, C) // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel) // params pooled_features: (N, out_x, out_y, out_z, C) // params pool_method: 0: max_pool 1: avg_pool // CHECK_INPUT(rois); // CHECK_INPUT(pts); // CHECK_INPUT(pts_feature); // CHECK_INPUT(argmax); // CHECK_INPUT(pts_idx_of_voxels); // CHECK_INPUT(pooled_features); int boxes_num = rois.size(0); int pts_num = pts.size(0); int channels = pts_feature.size(1); int max_pts_each_voxel = pts_idx_of_voxels.size(4); // index 0 is the counter int out_x = pts_idx_of_voxels.size(1); int out_y = pts_idx_of_voxels.size(2); int out_z = pts_idx_of_voxels.size(3); assert ((out_x < 256) && (out_y < 256) && (out_z < 256)); // we encode index with 8bit const float *rois_data = rois.data(); const float *pts_data = pts.data(); const float *pts_feature_data = pts_feature.data(); int *argmax_data = argmax.data(); int *pts_idx_of_voxels_data = pts_idx_of_voxels.data(); float *pooled_features_data = pooled_features.data(); roiaware_pool3d_launcher(boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z, rois_data, pts_data, pts_feature_data, argmax_data, pts_idx_of_voxels_data, pooled_features_data, pool_method); return 1; } int roiaware_pool3d_gpu_backward(at::Tensor pts_idx_of_voxels, at::Tensor argmax, at::Tensor grad_out, at::Tensor grad_in, int pool_method){ // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel) // params argmax: (N, out_x, out_y, out_z, C) // params grad_out: (N, out_x, out_y, out_z, C) // params grad_in: (npoints, C), return value // params pool_method: 0: max_pool 1: avg_pool // CHECK_INPUT(pts_idx_of_voxels); // CHECK_INPUT(argmax); // CHECK_INPUT(grad_out); // CHECK_INPUT(grad_in); int boxes_num = pts_idx_of_voxels.size(0); int out_x = pts_idx_of_voxels.size(1); int out_y = pts_idx_of_voxels.size(2); int out_z = pts_idx_of_voxels.size(3); int max_pts_each_voxel = pts_idx_of_voxels.size(4); // index 0 is the counter int channels = grad_out.size(4); const int *pts_idx_of_voxels_data = pts_idx_of_voxels.data(); const int *argmax_data = argmax.data(); const float *grad_out_data = grad_out.data(); float *grad_in_data = grad_in.data(); roiaware_pool3d_backward_launcher(boxes_num, out_x, out_y, out_z, channels, max_pts_each_voxel, pts_idx_of_voxels_data, argmax_data, grad_out_data, grad_in_data, pool_method); return 1; } int points_in_boxes_gpu(at::Tensor boxes_tensor, at::Tensor pts_tensor, at::Tensor box_idx_of_points_tensor){ // params boxes: (B, N, 7) [x, y, z, dx, dy, dz, heading] (x, y, z) is the box center // params pts: (B, npoints, 3) [x, y, z] // params boxes_idx_of_points: (B, npoints), default -1 // CHECK_INPUT(boxes_tensor); // CHECK_INPUT(pts_tensor); // CHECK_INPUT(box_idx_of_points_tensor); int batch_size = boxes_tensor.size(0); int boxes_num = boxes_tensor.size(1); int pts_num = pts_tensor.size(1); const float *boxes = boxes_tensor.data(); const float *pts = pts_tensor.data(); int *box_idx_of_points = box_idx_of_points_tensor.data(); points_in_boxes_launcher(batch_size, boxes_num, pts_num, boxes, pts, box_idx_of_points); return 1; } inline void lidar_to_local_coords_cpu(float shift_x, float shift_y, float rot_angle, float &local_x, float &local_y){ float cosa = cos(-rot_angle), sina = sin(-rot_angle); local_x = shift_x * cosa + shift_y * (-sina); local_y = shift_x * sina + shift_y * cosa; } inline int check_pt_in_box3d_cpu(const float *pt, const float *box3d, float &local_x, float &local_y){ // param pt: (x, y, z) // param box3d: [x, y, z, dx, dy, dz, heading], (x, y, z) is the box center const float MARGIN = 1e-2; float x = pt[0], y = pt[1], z = pt[2]; float cx = box3d[0], cy = box3d[1], cz = box3d[2]; float dx = box3d[3], dy = box3d[4], dz = box3d[5], rz = box3d[6]; if (fabsf(z - cz) > dz / 2.0) return 0; lidar_to_local_coords_cpu(x - cx, y - cy, rz, local_x, local_y); float in_flag = (fabs(local_x) < dx / 2.0 + MARGIN) & (fabs(local_y) < dy / 2.0 + MARGIN); return in_flag; } int points_in_boxes_cpu(at::Tensor boxes_tensor, at::Tensor pts_tensor, at::Tensor pts_indices_tensor){ // params boxes: (N, 7) [x, y, z, dx, dy, dz, heading], (x, y, z) is the box center, each box DO NOT overlaps // params pts: (num_points, 3) [x, y, z] // params pts_indices: (N, num_points) // CHECK_CONTIGUOUS(boxes_tensor); // CHECK_CONTIGUOUS(pts_tensor); // CHECK_CONTIGUOUS(pts_indices_tensor); int boxes_num = boxes_tensor.size(0); int pts_num = pts_tensor.size(0); const float *boxes = boxes_tensor.data(); const float *pts = pts_tensor.data(); int *pts_indices = pts_indices_tensor.data(); float local_x = 0, local_y = 0; for (int i = 0; i < boxes_num; i++){ for (int j = 0; j < pts_num; j++){ int cur_in_flag = check_pt_in_box3d_cpu(pts + j * 3, boxes + i * 7, local_x, local_y); pts_indices[i * pts_num + j] = cur_in_flag; } } return 1; } PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("forward", &roiaware_pool3d_gpu, "roiaware pool3d forward (CUDA)"); m.def("backward", &roiaware_pool3d_gpu_backward, "roiaware pool3d backward (CUDA)"); m.def("points_in_boxes_gpu", &points_in_boxes_gpu, "points_in_boxes_gpu forward (CUDA)"); m.def("points_in_boxes_cpu", &points_in_boxes_cpu, "points_in_boxes_cpu forward (CUDA)"); } ================================================ FILE: pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu ================================================ /* RoI-aware point cloud feature pooling Written by Shaoshuai Shi All Rights Reserved 2019-2020. */ #include #include #define THREADS_PER_BLOCK 256 #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) // #define DEBUG __device__ inline void lidar_to_local_coords(float shift_x, float shift_y, float rot_angle, float &local_x, float &local_y){ float cosa = cos(-rot_angle), sina = sin(-rot_angle); local_x = shift_x * cosa + shift_y * (-sina); local_y = shift_x * sina + shift_y * cosa; } __device__ inline int check_pt_in_box3d(const float *pt, const float *box3d, float &local_x, float &local_y){ // param pt: (x, y, z) // param box3d: [x, y, z, dx, dy, dz, heading] (x, y, z) is the box center const float MARGIN = 1e-5; float x = pt[0], y = pt[1], z = pt[2]; float cx = box3d[0], cy = box3d[1], cz = box3d[2]; float dx = box3d[3], dy = box3d[4], dz = box3d[5], rz = box3d[6]; if (fabsf(z - cz) > dz / 2.0) return 0; lidar_to_local_coords(x - cx, y - cy, rz, local_x, local_y); float in_flag = (fabs(local_x) < dx / 2.0 + MARGIN) & (fabs(local_y) < dy / 2.0 + MARGIN); return in_flag; } __global__ void generate_pts_mask_for_box3d(int boxes_num, int pts_num, int out_x, int out_y, int out_z, const float *rois, const float *pts, int *pts_mask){ // params rois: [x, y, z, dx, dy, dz, heading] (x, y, z) is the box center // params pts: (npoints, 3) [x, y, z] // params pts_mask: (N, npoints): -1 means point doesnot in this box, otherwise: encode (x_idxs, y_idxs, z_idxs) by binary bit int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; int box_idx = blockIdx.y; if (pt_idx >= pts_num || box_idx >= boxes_num) return; pts += pt_idx * 3; rois += box_idx * 7; pts_mask += box_idx * pts_num + pt_idx; float local_x = 0, local_y = 0; int cur_in_flag = check_pt_in_box3d(pts, rois, local_x, local_y); pts_mask[0] = -1; if (cur_in_flag > 0){ float local_z = pts[2] - rois[2]; float dx = rois[3], dy = rois[4], dz = rois[5]; float x_res = dx / out_x; float y_res = dy / out_y; float z_res = dz / out_z; unsigned int x_idx = int((local_x + dx / 2) / x_res); unsigned int y_idx = int((local_y + dy / 2) / y_res); unsigned int z_idx = int((local_z + dz / 2) / z_res); x_idx = min(max(x_idx, 0), out_x - 1); y_idx = min(max(y_idx, 0), out_y - 1); z_idx = min(max(z_idx, 0), out_z - 1); unsigned int idx_encoding = (x_idx << 16) + (y_idx << 8) + z_idx; pts_mask[0] = idx_encoding; } } __global__ void collect_inside_pts_for_box3d(int boxes_num, int pts_num, int max_pts_each_voxel, int out_x, int out_y, int out_z, const int *pts_mask, int *pts_idx_of_voxels){ // params pts_mask: (N, npoints) 0 or 1 // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel) int box_idx = blockIdx.x * blockDim.x + threadIdx.x; if (box_idx >= boxes_num) return; int max_num_pts = max_pts_each_voxel - 1; // index 0 is the counter pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel; for (int k = 0; k < pts_num; k++){ if (pts_mask[box_idx * pts_num + k] != -1){ unsigned int idx_encoding = pts_mask[box_idx * pts_num + k]; unsigned int x_idx = (idx_encoding >> 16) & 0xFF; unsigned int y_idx = (idx_encoding >> 8) & 0xFF; unsigned int z_idx = idx_encoding & 0xFF; unsigned int base_offset = x_idx * out_y * out_z * max_pts_each_voxel + y_idx * out_z * max_pts_each_voxel + z_idx * max_pts_each_voxel; unsigned int cnt = pts_idx_of_voxels[base_offset]; if (cnt < max_num_pts){ pts_idx_of_voxels[base_offset + cnt + 1] = k; pts_idx_of_voxels[base_offset]++; } #ifdef DEBUG printf("collect: pts_%d, idx(%d, %d, %d), idx_encoding=%x\n", k, x_idx, y_idx, z_idx, idx_encoding); #endif } } } __global__ void roiaware_maxpool3d(int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x, int out_y, int out_z, const float *pts_feature, const int *pts_idx_of_voxels, float *pooled_features, int *argmax){ // params pts_feature: (npoints, C) // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel), index 0 is the counter // params pooled_features: (N, out_x, out_y, out_z, C) // params argmax: (N, out_x, out_y, out_z, C) int box_idx = blockIdx.z; int channel_idx = blockIdx.y; int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x; int x_idx = voxel_idx_flat / (out_y * out_z); int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z; int z_idx = voxel_idx_flat % out_z; if (box_idx >= boxes_num || channel_idx >= channels|| x_idx >= out_x || y_idx >= out_y || z_idx >= out_z) return; #ifdef DEBUG printf("src pts_idx_of_voxels: (%p, ), argmax: %p\n", pts_idx_of_voxels, argmax); #endif int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx; pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel + offset_base * max_pts_each_voxel; pooled_features += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx; argmax += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx; int argmax_idx = -1; float max_val = -1e50; int total_pts = pts_idx_of_voxels[0]; for (int k = 1; k <= total_pts; k++){ if (pts_feature[pts_idx_of_voxels[k] * channels + channel_idx] > max_val){ max_val = pts_feature[pts_idx_of_voxels[k] * channels + channel_idx]; argmax_idx = pts_idx_of_voxels[k]; } } if (argmax_idx != -1){ pooled_features[0] = max_val; } argmax[0] = argmax_idx; #ifdef DEBUG printf("channel_%d idx(%d, %d, %d), argmax_idx=(%d, %.3f), total=%d, after pts_idx: %p, argmax: (%p, %d)\n", channel_idx, x_idx, y_idx, z_idx, argmax_idx, max_val, total_pts, pts_idx_of_voxels, argmax, argmax_idx); #endif } __global__ void roiaware_avgpool3d(int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x, int out_y, int out_z, const float *pts_feature, const int *pts_idx_of_voxels, float *pooled_features){ // params pts_feature: (npoints, C) // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel), index 0 is the counter // params pooled_features: (N, out_x, out_y, out_z, C) // params argmax: (N, out_x, out_y, out_z, C) int box_idx = blockIdx.z; int channel_idx = blockIdx.y; int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x; int x_idx = voxel_idx_flat / (out_y * out_z); int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z; int z_idx = voxel_idx_flat % out_z; if (box_idx >= boxes_num || channel_idx >= channels|| x_idx >= out_x || y_idx >= out_y || z_idx >= out_z) return; int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx; pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel + offset_base * max_pts_each_voxel; pooled_features += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx; float sum_val = 0; int total_pts = pts_idx_of_voxels[0]; for (int k = 1; k <= total_pts; k++){ sum_val += pts_feature[pts_idx_of_voxels[k] * channels + channel_idx]; } if (total_pts > 0){ pooled_features[0] = sum_val / total_pts; } } void roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x, int out_y, int out_z, const float *rois, const float *pts, const float *pts_feature, int *argmax, int *pts_idx_of_voxels, float *pooled_features, int pool_method){ // params rois: (N, 7) [x, y, z, dx, dy, dz, heading] (x, y, z) is the box center // params pts: (npoints, 3) [x, y, z] // params pts_feature: (npoints, C) // params argmax: (N, out_x, out_y, out_z, C) // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel) // params pooled_features: (N, out_x, out_y, out_z, C) // params pool_method: 0: max_pool 1: avg_pool int *pts_mask = NULL; cudaMalloc(&pts_mask, boxes_num * pts_num * sizeof(int)); // (N, M) cudaMemset(pts_mask, -1, boxes_num * pts_num * sizeof(int)); dim3 blocks_mask(DIVUP(pts_num, THREADS_PER_BLOCK), boxes_num); dim3 threads(THREADS_PER_BLOCK); generate_pts_mask_for_box3d<<>>(boxes_num, pts_num, out_x, out_y, out_z, rois, pts, pts_mask); // TODO: Merge the collect and pool functions, SS dim3 blocks_collect(DIVUP(boxes_num, THREADS_PER_BLOCK)); collect_inside_pts_for_box3d<<>>(boxes_num, pts_num, max_pts_each_voxel, out_x, out_y, out_z, pts_mask, pts_idx_of_voxels); dim3 blocks_pool(DIVUP(out_x * out_y * out_z, THREADS_PER_BLOCK), channels, boxes_num); if (pool_method == 0){ roiaware_maxpool3d<<>>(boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z, pts_feature, pts_idx_of_voxels, pooled_features, argmax); } else if (pool_method == 1){ roiaware_avgpool3d<<>>(boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z, pts_feature, pts_idx_of_voxels, pooled_features); } cudaFree(pts_mask); #ifdef DEBUG cudaDeviceSynchronize(); // for using printf in kernel function #endif } __global__ void roiaware_maxpool3d_backward(int boxes_num, int channels, int out_x, int out_y, int out_z, const int *argmax, const float *grad_out, float *grad_in){ // params argmax: (N, out_x, out_y, out_z, C) // params grad_out: (N, out_x, out_y, out_z, C) // params grad_in: (npoints, C), return value int box_idx = blockIdx.z; int channel_idx = blockIdx.y; int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x; int x_idx = voxel_idx_flat / (out_y * out_z); int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z; int z_idx = voxel_idx_flat % out_z; if (box_idx >= boxes_num || channel_idx >= channels|| x_idx >= out_x || y_idx >= out_y || z_idx >= out_z) return; int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx; argmax += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx; grad_out += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx; if (argmax[0] == -1) return; atomicAdd(grad_in + argmax[0] * channels + channel_idx, grad_out[0] * 1); } __global__ void roiaware_avgpool3d_backward(int boxes_num, int channels, int out_x, int out_y, int out_z, int max_pts_each_voxel, const int *pts_idx_of_voxels, const float *grad_out, float *grad_in){ // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel) // params grad_out: (N, out_x, out_y, out_z, C) // params grad_in: (npoints, C), return value int box_idx = blockIdx.z; int channel_idx = blockIdx.y; int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x; int x_idx = voxel_idx_flat / (out_y * out_z); int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z; int z_idx = voxel_idx_flat % out_z; if (box_idx >= boxes_num || channel_idx >= channels|| x_idx >= out_x || y_idx >= out_y || z_idx >= out_z) return; int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx; pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel + offset_base * max_pts_each_voxel; grad_out += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx; int total_pts = pts_idx_of_voxels[0]; float cur_grad = 1 / fmaxf(float(total_pts), 1.0); for (int k = 1; k <= total_pts; k++){ atomicAdd(grad_in + pts_idx_of_voxels[k] * channels + channel_idx, grad_out[0] * cur_grad); } } void roiaware_pool3d_backward_launcher(int boxes_num, int out_x, int out_y, int out_z, int channels, int max_pts_each_voxel, const int *pts_idx_of_voxels, const int *argmax, const float *grad_out, float *grad_in, int pool_method){ // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel) // params argmax: (N, out_x, out_y, out_z, C) // params grad_out: (N, out_x, out_y, out_z, C) // params grad_in: (npoints, C), return value // params pool_method: 0: max_pool, 1: avg_pool dim3 blocks(DIVUP(out_x * out_y * out_z, THREADS_PER_BLOCK), channels, boxes_num); dim3 threads(THREADS_PER_BLOCK); if (pool_method == 0){ roiaware_maxpool3d_backward<<>>( boxes_num, channels, out_x, out_y, out_z, argmax, grad_out, grad_in ); } else if (pool_method == 1){ roiaware_avgpool3d_backward<<>>( boxes_num, channels, out_x, out_y, out_z, max_pts_each_voxel, pts_idx_of_voxels, grad_out, grad_in ); } } __global__ void points_in_boxes_kernel(int batch_size, int boxes_num, int pts_num, const float *boxes, const float *pts, int *box_idx_of_points){ // params boxes: (B, N, 7) [x, y, z, dx, dy, dz, heading] (x, y, z) is the box center // params pts: (B, npoints, 3) [x, y, z] in LiDAR coordinate // params boxes_idx_of_points: (B, npoints), default -1 int bs_idx = blockIdx.y; int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; if (bs_idx >= batch_size || pt_idx >= pts_num) return; boxes += bs_idx * boxes_num * 7; pts += bs_idx * pts_num * 3 + pt_idx * 3; box_idx_of_points += bs_idx * pts_num + pt_idx; float local_x = 0, local_y = 0; int cur_in_flag = 0; for (int k = 0; k < boxes_num; k++){ cur_in_flag = check_pt_in_box3d(pts, boxes + k * 7, local_x, local_y); if (cur_in_flag){ box_idx_of_points[0] = k; break; } } } void points_in_boxes_launcher(int batch_size, int boxes_num, int pts_num, const float *boxes, const float *pts, int *box_idx_of_points){ // params boxes: (B, N, 7) [x, y, z, dx, dy, dz, heading] (x, y, z) is the box center // params pts: (B, npoints, 3) [x, y, z] // params boxes_idx_of_points: (B, npoints), default -1 cudaError_t err; dim3 blocks(DIVUP(pts_num, THREADS_PER_BLOCK), batch_size); dim3 threads(THREADS_PER_BLOCK); points_in_boxes_kernel<<>>(batch_size, boxes_num, pts_num, boxes, pts, box_idx_of_points); err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } #ifdef DEBUG cudaDeviceSynchronize(); // for using printf in kernel function #endif } ================================================ FILE: pcdet/ops/roipoint_pool3d/roipoint_pool3d_utils.py ================================================ import torch import torch.nn as nn from torch.autograd import Function from ...utils import box_utils from . import roipoint_pool3d_cuda class RoIPointPool3d(nn.Module): def __init__(self, num_sampled_points=512, pool_extra_width=1.0): super().__init__() self.num_sampled_points = num_sampled_points self.pool_extra_width = pool_extra_width def forward(self, points, point_features, boxes3d): """ Args: points: (B, N, 3) point_features: (B, N, C) boxes3d: (B, M, 7), [x, y, z, dx, dy, dz, heading] Returns: pooled_features: (B, M, 512, 3 + C) pooled_empty_flag: (B, M) """ return RoIPointPool3dFunction.apply( points, point_features, boxes3d, self.pool_extra_width, self.num_sampled_points ) class RoIPointPool3dFunction(Function): @staticmethod def forward(ctx, points, point_features, boxes3d, pool_extra_width, num_sampled_points=512): """ Args: ctx: points: (B, N, 3) point_features: (B, N, C) boxes3d: (B, num_boxes, 7), [x, y, z, dx, dy, dz, heading] pool_extra_width: num_sampled_points: Returns: pooled_features: (B, num_boxes, 512, 3 + C) pooled_empty_flag: (B, num_boxes) """ assert points.shape.__len__() == 3 and points.shape[2] == 3 batch_size, boxes_num, feature_len = points.shape[0], boxes3d.shape[1], point_features.shape[2] pooled_boxes3d = box_utils.enlarge_box3d(boxes3d.view(-1, 7), pool_extra_width).view(batch_size, -1, 7) pooled_features = point_features.new_zeros((batch_size, boxes_num, num_sampled_points, 3 + feature_len)) pooled_empty_flag = point_features.new_zeros((batch_size, boxes_num)).int() roipoint_pool3d_cuda.forward( points.contiguous(), pooled_boxes3d.contiguous(), point_features.contiguous(), pooled_features, pooled_empty_flag ) return pooled_features, pooled_empty_flag @staticmethod def backward(ctx, grad_out): raise NotImplementedError if __name__ == '__main__': pass ================================================ FILE: pcdet/ops/roipoint_pool3d/src/roipoint_pool3d.cpp ================================================ #include #include #define CHECK_CUDA(x) do { \ if (!x.type().is_cuda()) { \ fprintf(stderr, "%s must be CUDA tensor at %s:%d\n", #x, __FILE__, __LINE__); \ exit(-1); \ } \ } while (0) #define CHECK_CONTIGUOUS(x) do { \ if (!x.is_contiguous()) { \ fprintf(stderr, "%s must be contiguous tensor at %s:%d\n", #x, __FILE__, __LINE__); \ exit(-1); \ } \ } while (0) #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) void roipool3dLauncher(int batch_size, int pts_num, int boxes_num, int feature_in_len, int sampled_pts_num, const float *xyz, const float *boxes3d, const float *pts_feature, float *pooled_features, int *pooled_empty_flag); int roipool3d_gpu(at::Tensor xyz, at::Tensor boxes3d, at::Tensor pts_feature, at::Tensor pooled_features, at::Tensor pooled_empty_flag){ // params xyz: (B, N, 3) // params boxes3d: (B, M, 7) // params pts_feature: (B, N, C) // params pooled_features: (B, M, 512, 3+C) // params pooled_empty_flag: (B, M) CHECK_INPUT(xyz); CHECK_INPUT(boxes3d); CHECK_INPUT(pts_feature); CHECK_INPUT(pooled_features); CHECK_INPUT(pooled_empty_flag); int batch_size = xyz.size(0); int pts_num = xyz.size(1); int boxes_num = boxes3d.size(1); int feature_in_len = pts_feature.size(2); int sampled_pts_num = pooled_features.size(2); const float * xyz_data = xyz.data(); const float * boxes3d_data = boxes3d.data(); const float * pts_feature_data = pts_feature.data(); float * pooled_features_data = pooled_features.data(); int * pooled_empty_flag_data = pooled_empty_flag.data(); roipool3dLauncher(batch_size, pts_num, boxes_num, feature_in_len, sampled_pts_num, xyz_data, boxes3d_data, pts_feature_data, pooled_features_data, pooled_empty_flag_data); return 1; } PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("forward", &roipool3d_gpu, "roipool3d forward (CUDA)"); } ================================================ FILE: pcdet/ops/roipoint_pool3d/src/roipoint_pool3d_kernel.cu ================================================ /* Point cloud feature pooling Written by Shaoshuai Shi All Rights Reserved 2018. */ #include #include #define THREADS_PER_BLOCK 256 #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) // #define DEBUG __device__ inline void lidar_to_local_coords(float shift_x, float shift_y, float rot_angle, float &local_x, float &local_y){ float cosa = cos(-rot_angle), sina = sin(-rot_angle); local_x = shift_x * cosa + shift_y * (-sina); local_y = shift_x * sina + shift_y * cosa; } __device__ inline int check_pt_in_box3d(const float *pt, const float *box3d, float &local_x, float &local_y){ // param pt: (x, y, z) // param box3d: [x, y, z, dx, dy, dz, heading] (x, y, z) is the box center const float MARGIN = 1e-5; float x = pt[0], y = pt[1], z = pt[2]; float cx = box3d[0], cy = box3d[1], cz = box3d[2]; float dx = box3d[3], dy = box3d[4], dz = box3d[5], rz = box3d[6]; if (fabsf(z - cz) > dz / 2.0) return 0; lidar_to_local_coords(x - cx, y - cy, rz, local_x, local_y); float in_flag = (fabs(local_x) < dx / 2.0 + MARGIN) & (fabs(local_y) < dy / 2.0 + MARGIN); return in_flag; } __global__ void assign_pts_to_box3d(int batch_size, int pts_num, int boxes_num, const float *xyz, const float *boxes3d, int *pts_assign){ // params xyz: (B, N, 3) // params boxes3d: (B, M, 7) // params pts_assign: (B, N, M): idx of the corresponding box3d, -1 means background points int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; int box_idx = blockIdx.y; int bs_idx = blockIdx.z; if (pt_idx >= pts_num || box_idx >= boxes_num || bs_idx >= batch_size){ return; } int assign_idx = bs_idx * pts_num * boxes_num + pt_idx * boxes_num + box_idx; pts_assign[assign_idx] = 0; int box_offset = bs_idx * boxes_num * 7 + box_idx * 7; int pt_offset = bs_idx * pts_num * 3 + pt_idx * 3; float local_x = 0, local_y = 0; int cur_in_flag = check_pt_in_box3d(xyz + pt_offset, boxes3d + box_offset, local_x, local_y); pts_assign[assign_idx] = cur_in_flag; // printf("bs=%d, pt=%d, in=%d\n", bs_idx, pt_idx, pts_assign[bs_idx * pts_num + pt_idx]); } __global__ void get_pooled_idx(int batch_size, int pts_num, int boxes_num, int sampled_pts_num, const int *pts_assign, int *pts_idx, int *pooled_empty_flag){ // params xyz: (B, N, 3) // params pts_feature: (B, N, C) // params pts_assign: (B, N) // params pts_idx: (B, M, 512) // params pooled_empty_flag: (B, M) int boxes_idx = blockIdx.x * blockDim.x + threadIdx.x; if (boxes_idx >= boxes_num){ return; } int bs_idx = blockIdx.y; int cnt = 0; for (int k = 0; k < pts_num; k++){ if (pts_assign[bs_idx * pts_num * boxes_num + k * boxes_num + boxes_idx]){ if (cnt < sampled_pts_num){ pts_idx[bs_idx * boxes_num * sampled_pts_num + boxes_idx * sampled_pts_num + cnt] = k; cnt++; } else break; } } if (cnt == 0){ pooled_empty_flag[bs_idx * boxes_num + boxes_idx] = 1; } else if (cnt < sampled_pts_num){ // duplicate same points for sampling for (int k = cnt; k < sampled_pts_num; k++){ int duplicate_idx = k % cnt; int base_offset = bs_idx * boxes_num * sampled_pts_num + boxes_idx * sampled_pts_num; pts_idx[base_offset + k] = pts_idx[base_offset + duplicate_idx]; } } } __global__ void roipool3d_forward(int batch_size, int pts_num, int boxes_num, int feature_in_len, int sampled_pts_num, const float *xyz, const int *pts_idx, const float *pts_feature, float *pooled_features, int *pooled_empty_flag){ // params xyz: (B, N, 3) // params pts_idx: (B, M, 512) // params pts_feature: (B, N, C) // params pooled_features: (B, M, 512, 3+C) // params pooled_empty_flag: (B, M) int sample_pt_idx = blockIdx.x * blockDim.x + threadIdx.x; int box_idx = blockIdx.y; int bs_idx = blockIdx.z; if (sample_pt_idx >= sampled_pts_num || box_idx >= boxes_num || bs_idx >= batch_size){ return; } if (pooled_empty_flag[bs_idx * boxes_num + box_idx]){ return; } int temp_idx = bs_idx * boxes_num * sampled_pts_num + box_idx * sampled_pts_num + sample_pt_idx; int src_pt_idx = pts_idx[temp_idx]; int dst_feature_offset = temp_idx * (3 + feature_in_len); for (int j = 0; j < 3; j++) pooled_features[dst_feature_offset + j] = xyz[bs_idx * pts_num * 3 + src_pt_idx * 3 + j]; int src_feature_offset = bs_idx * pts_num * feature_in_len + src_pt_idx * feature_in_len; for (int j = 0; j < feature_in_len; j++) pooled_features[dst_feature_offset + 3 + j] = pts_feature[src_feature_offset + j]; } void roipool3dLauncher(int batch_size, int pts_num, int boxes_num, int feature_in_len, int sampled_pts_num, const float *xyz, const float *boxes3d, const float *pts_feature, float *pooled_features, int *pooled_empty_flag){ // printf("batch_size=%d, pts_num=%d, boxes_num=%d\n", batch_size, pts_num, boxes_num); int *pts_assign = NULL; cudaMalloc(&pts_assign, batch_size * pts_num * boxes_num * sizeof(int)); // (batch_size, N, M) // cudaMemset(&pts_assign, -1, batch_size * pts_num * boxes_num * sizeof(int)); dim3 blocks(DIVUP(pts_num, THREADS_PER_BLOCK), boxes_num, batch_size); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); assign_pts_to_box3d<<>>(batch_size, pts_num, boxes_num, xyz, boxes3d, pts_assign); int *pts_idx = NULL; cudaMalloc(&pts_idx, batch_size * boxes_num * sampled_pts_num * sizeof(int)); // (batch_size, M, sampled_pts_num) dim3 blocks2(DIVUP(boxes_num, THREADS_PER_BLOCK), batch_size); // blockIdx.x(col), blockIdx.y(row) get_pooled_idx<<>>(batch_size, pts_num, boxes_num, sampled_pts_num, pts_assign, pts_idx, pooled_empty_flag); dim3 blocks_pool(DIVUP(sampled_pts_num, THREADS_PER_BLOCK), boxes_num, batch_size); roipool3d_forward<<>>(batch_size, pts_num, boxes_num, feature_in_len, sampled_pts_num, xyz, pts_idx, pts_feature, pooled_features, pooled_empty_flag); cudaFree(pts_assign); cudaFree(pts_idx); #ifdef DEBUG cudaDeviceSynchronize(); // for using printf in kernel function #endif } ================================================ FILE: pcdet/ops/votr_ops/src/build_attention_indices.cpp ================================================ /* Find indices for each attention pattern Written by Jiageng Mao */ #include #include #include #include #include #include "build_attention_indices_gpu.h" extern THCState *state; #define CHECK_CUDA(x) do { \ if (!x.type().is_cuda()) { \ fprintf(stderr, "%s must be CUDA tensor at %s:%d\n", #x, __FILE__, __LINE__); \ exit(-1); \ } \ } while (0) #define CHECK_CONTIGUOUS(x) do { \ if (!x.is_contiguous()) { \ fprintf(stderr, "%s must be contiguous tensor at %s:%d\n", #x, __FILE__, __LINE__); \ exit(-1); \ } \ } while (0) #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) int sparse_local_attention_with_tensor_wrapper(int x_max, int y_max, int z_max, int x_stride, int y_stride, int z_stride, int num_voxels, int attend_size, int attend_range, at::Tensor attend_indices_tensor, at::Tensor v_indices_tensor, at::Tensor xyz_to_vidx_tensor) { CHECK_INPUT(attend_indices_tensor); CHECK_INPUT(v_indices_tensor); CHECK_INPUT(xyz_to_vidx_tensor); int *attend_indices = attend_indices_tensor.data(); const int *v_indices = v_indices_tensor.data(); const int *xyz_to_vidx = xyz_to_vidx_tensor.data(); sparse_local_attention_with_tensor_kernel_launcher(x_max, y_max, z_max, x_stride, y_stride, z_stride, num_voxels, attend_size, attend_range, attend_indices, v_indices, xyz_to_vidx); return 1; } int sparse_local_attention_with_hash_wrapper(int x_max, int y_max, int z_max, int x_stride, int y_stride, int z_stride, int num_voxels, int attend_size, int attend_range, int hash_size, at::Tensor attend_indices_tensor, at::Tensor v_indices_tensor, at::Tensor xyz_to_vidx_tensor) { CHECK_INPUT(attend_indices_tensor); CHECK_INPUT(v_indices_tensor); CHECK_INPUT(xyz_to_vidx_tensor); int *attend_indices = attend_indices_tensor.data(); const int *v_indices = v_indices_tensor.data(); const int *xyz_to_vidx = xyz_to_vidx_tensor.data(); sparse_local_attention_with_hash_kernel_launcher(x_max, y_max, z_max, x_stride, y_stride, z_stride, num_voxels, attend_size, attend_range, hash_size, attend_indices, v_indices, xyz_to_vidx); return 1; } int subm_local_attention_with_tensor_wrapper(int x_max, int y_max, int z_max, int num_voxels, int attend_size, int attend_range, at::Tensor attend_indices_tensor, at::Tensor v_indices_tensor, at::Tensor xyz_to_vidx_tensor) { CHECK_INPUT(attend_indices_tensor); CHECK_INPUT(v_indices_tensor); CHECK_INPUT(xyz_to_vidx_tensor); int *attend_indices = attend_indices_tensor.data(); const int *v_indices = v_indices_tensor.data(); const int *xyz_to_vidx = xyz_to_vidx_tensor.data(); subm_local_attention_with_tensor_kernel_launcher(x_max, y_max, z_max, num_voxels, attend_size, attend_range, attend_indices, v_indices, xyz_to_vidx); return 1; } int subm_local_attention_with_hash_wrapper(int x_max, int y_max, int z_max, int num_voxels, int attend_size, int attend_range, int hash_size, at::Tensor attend_indices_tensor, at::Tensor v_indices_tensor, at::Tensor xyz_to_vidx_tensor) { CHECK_INPUT(attend_indices_tensor); CHECK_INPUT(v_indices_tensor); CHECK_INPUT(xyz_to_vidx_tensor); int *attend_indices = attend_indices_tensor.data(); const int *v_indices = v_indices_tensor.data(); const int *xyz_to_vidx = xyz_to_vidx_tensor.data(); subm_local_attention_with_hash_kernel_launcher(x_max, y_max, z_max, num_voxels, attend_size, attend_range, hash_size, attend_indices, v_indices, xyz_to_vidx); return 1; } int sparse_strided_attention_with_tensor_wrapper(int x_max, int y_max, int z_max, int x_stride, int y_stride, int z_stride, int num_voxels, int attend_size, int num_range, at::Tensor attend_indices_tensor, at::Tensor v_indices_tensor, at::Tensor xyz_to_vidx_tensor, at::Tensor range_spec_tensor) { CHECK_INPUT(attend_indices_tensor); CHECK_INPUT(v_indices_tensor); CHECK_INPUT(xyz_to_vidx_tensor); CHECK_INPUT(range_spec_tensor); int *attend_indices = attend_indices_tensor.data(); const int *v_indices = v_indices_tensor.data(); const int *xyz_to_vidx = xyz_to_vidx_tensor.data(); const int *range_spec = range_spec_tensor.data(); sparse_strided_attention_with_tensor_kernel_launcher(x_max, y_max, z_max, x_stride, y_stride, z_stride, num_voxels, attend_size, num_range, attend_indices, v_indices, xyz_to_vidx, range_spec); return 1; } int sparse_strided_attention_with_hash_wrapper(int x_max, int y_max, int z_max, int x_stride, int y_stride, int z_stride, int num_voxels, int attend_size, int num_range, int hash_size, at::Tensor attend_indices_tensor, at::Tensor v_indices_tensor, at::Tensor xyz_to_vidx_tensor, at::Tensor range_spec_tensor) { CHECK_INPUT(attend_indices_tensor); CHECK_INPUT(v_indices_tensor); CHECK_INPUT(xyz_to_vidx_tensor); CHECK_INPUT(range_spec_tensor); int *attend_indices = attend_indices_tensor.data(); const int *v_indices = v_indices_tensor.data(); const int *xyz_to_vidx = xyz_to_vidx_tensor.data(); const int *range_spec = range_spec_tensor.data(); sparse_strided_attention_with_hash_kernel_launcher(x_max, y_max, z_max, x_stride, y_stride, z_stride, num_voxels, attend_size, num_range, hash_size, attend_indices, v_indices, xyz_to_vidx, range_spec); return 1; } int subm_strided_attention_with_tensor_wrapper(int x_max, int y_max, int z_max, int num_voxels, int attend_size, int num_range, at::Tensor attend_indices_tensor, at::Tensor v_indices_tensor, at::Tensor xyz_to_vidx_tensor, at::Tensor range_spec_tensor) { CHECK_INPUT(attend_indices_tensor); CHECK_INPUT(v_indices_tensor); CHECK_INPUT(xyz_to_vidx_tensor); CHECK_INPUT(range_spec_tensor); int *attend_indices = attend_indices_tensor.data(); const int *v_indices = v_indices_tensor.data(); const int *xyz_to_vidx = xyz_to_vidx_tensor.data(); const int *range_spec = range_spec_tensor.data(); subm_strided_attention_with_tensor_kernel_launcher(x_max, y_max, z_max, num_voxels, attend_size, num_range, attend_indices, v_indices, xyz_to_vidx, range_spec); return 1; } int subm_strided_attention_with_hash_wrapper(int x_max, int y_max, int z_max, int num_voxels, int attend_size, int num_range, int hash_size, at::Tensor attend_indices_tensor, at::Tensor v_indices_tensor, at::Tensor xyz_to_vidx_tensor, at::Tensor range_spec_tensor) { CHECK_INPUT(attend_indices_tensor); CHECK_INPUT(v_indices_tensor); CHECK_INPUT(xyz_to_vidx_tensor); CHECK_INPUT(range_spec_tensor); int *attend_indices = attend_indices_tensor.data(); const int *v_indices = v_indices_tensor.data(); const int *xyz_to_vidx = xyz_to_vidx_tensor.data(); const int *range_spec = range_spec_tensor.data(); subm_strided_attention_with_hash_kernel_launcher(x_max, y_max, z_max, num_voxels, attend_size, num_range, hash_size, attend_indices, v_indices, xyz_to_vidx, range_spec); return 1; } ================================================ FILE: pcdet/ops/votr_ops/src/build_attention_indices_gpu.cu ================================================ /* Find indices for each attention pattern Written by Jiageng Mao */ #include #include #include #include "build_attention_indices_gpu.h" #include "votr_cuda_utils.h" __device__ int simple_hash(int k, int hash_size) { return k % hash_size; } __device__ int hash_table_find(int &key, int &hash_size, const int *xyz_to_vidx) { int hash_idx = simple_hash(key, hash_size); int v_idx = EMPTY_KEY; int prob_cnt = 0; while (true) { // found if (xyz_to_vidx[hash_idx * 2 + 0] == key) { v_idx = xyz_to_vidx[hash_idx * 2 + 1]; break; } // empty, not found if (xyz_to_vidx[hash_idx * 2 + 0] == EMPTY_KEY) { break; } // linear probing hash_idx = (hash_idx + 1) % hash_size; // security in case of dead loop prob_cnt += 1; if (prob_cnt >= hash_size) break; } return v_idx; } __global__ void sparse_local_attention_with_tensor_kernel(int x_max, int y_max, int z_max, int x_stride, int y_stride, int z_stride, int num_voxels, int attend_size, int attend_range, int *attend_indices, const int *v_indices, const int *xyz_to_vidx) { /* in sparse attention, voxels are not necessary at the non-empty location attend_indices: [num_voxels, attend_size] for gather attend indices v_indices: [num_voxels, 4] bs + zyx indices of voxels xyz_to_vidx: [bs, x_max, y_max, z_max] voxel coordinates to voxel indices */ int th_idx = blockIdx.x * blockDim.x + threadIdx.x; if (th_idx >= num_voxels) return; int bs_idx = v_indices[th_idx * 4 + 0]; int z_idx = v_indices[th_idx * 4 + 1]; int y_idx = v_indices[th_idx * 4 + 2]; int x_idx = v_indices[th_idx * 4 + 3]; xyz_to_vidx += bs_idx * x_max * y_max * z_max; int num_samples = 0; for (int sz_idx = z_idx * z_stride - attend_range; sz_idx <= z_idx * z_stride + (z_stride - 1) + attend_range; ++sz_idx){ if (sz_idx >= z_max || sz_idx < 0) continue; for (int sy_idx = y_idx * y_stride - attend_range; sy_idx <= y_idx * y_stride + (y_stride - 1) + attend_range; ++sy_idx){ if (sy_idx >= y_max || sy_idx < 0) continue; for (int sx_idx = x_idx * x_stride - attend_range; sx_idx <= x_idx * x_stride + (x_stride - 1) + attend_range; ++sx_idx){ if (sx_idx >= x_max || sx_idx < 0) continue; int sv_idx = xyz_to_vidx[sx_idx * y_max * z_max + sy_idx * z_max + sz_idx]; if (sv_idx != EMPTY_KEY) { // found non-empty index if (num_samples >= attend_size) return; // full and return attend_indices[th_idx * attend_size + num_samples] = sv_idx; num_samples++; }else { // not found ; } } } } return; } void sparse_local_attention_with_tensor_kernel_launcher(int x_max, int y_max, int z_max, int x_stride, int y_stride, int z_stride, int num_voxels, int attend_size, int attend_range, int *attend_indices, const int *v_indices, const int *xyz_to_vidx) { cudaError_t err; dim3 blocks(DIVUP(num_voxels, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); sparse_local_attention_with_tensor_kernel<<>>(x_max, y_max, z_max, x_stride, y_stride, z_stride, num_voxels, attend_size, attend_range, attend_indices, v_indices, xyz_to_vidx); // cudaDeviceSynchronize(); // for using printf in kernel function err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } __global__ void sparse_local_attention_with_hash_kernel(int x_max, int y_max, int z_max, int x_stride, int y_stride, int z_stride, int num_voxels, int attend_size, int attend_range, int hash_size, int *attend_indices, const int *v_indices, const int *xyz_to_vidx) { /* in sparse attention, voxels are not necessary at the non-empty location attend_indices: [num_voxels, attend_size] for gather attend indices v_indices: [num_voxels, 4] bs + zyx indices of voxels xyz_to_vidx: [bs, hash_size, 2] voxel coordinates to voxel indices */ int th_idx = blockIdx.x * blockDim.x + threadIdx.x; if (th_idx >= num_voxels) return; int bs_idx = v_indices[th_idx * 4 + 0]; int z_idx = v_indices[th_idx * 4 + 1]; int y_idx = v_indices[th_idx * 4 + 2]; int x_idx = v_indices[th_idx * 4 + 3]; xyz_to_vidx += bs_idx * hash_size * 2; int num_samples = 0; for (int sz_idx = z_idx * z_stride - attend_range; sz_idx <= z_idx * z_stride + (z_stride - 1) + attend_range; ++sz_idx){ if (sz_idx >= z_max || sz_idx < 0) continue; for (int sy_idx = y_idx * y_stride - attend_range; sy_idx <= y_idx * y_stride + (y_stride - 1) + attend_range; ++sy_idx){ if (sy_idx >= y_max || sy_idx < 0) continue; for (int sx_idx = x_idx * x_stride - attend_range; sx_idx <= x_idx * x_stride + (x_stride - 1) + attend_range; ++sx_idx){ if (sx_idx >= x_max || sx_idx < 0) continue; int skey = sx_idx * y_max * z_max + sy_idx * z_max + sz_idx; int sv_idx = hash_table_find(skey, hash_size, xyz_to_vidx); if (sv_idx != EMPTY_KEY) { // found non-empty index if (num_samples >= attend_size) return; // full and return attend_indices[th_idx * attend_size + num_samples] = sv_idx; num_samples++; }else { // not found ; } } } } return; } void sparse_local_attention_with_hash_kernel_launcher(int x_max, int y_max, int z_max, int x_stride, int y_stride, int z_stride, int num_voxels, int attend_size, int attend_range, int hash_size, int *attend_indices, const int *v_indices, const int *xyz_to_vidx) { cudaError_t err; dim3 blocks(DIVUP(num_voxels, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); sparse_local_attention_with_hash_kernel<<>>(x_max, y_max, z_max, x_stride, y_stride, z_stride, num_voxels, attend_size, attend_range, hash_size, attend_indices, v_indices, xyz_to_vidx); // cudaDeviceSynchronize(); // for using printf in kernel function err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } __global__ void subm_local_attention_with_tensor_kernel(int x_max, int y_max, int z_max, int num_voxels, int attend_size, int attend_range, int *attend_indices, const int *v_indices, const int *xyz_to_vidx) { /* attend_indices: [num_voxels, attend_size] for gather attend indices v_indices: [num_voxels, 4] bs + zyx indices of voxels xyz_to_vidx: [bs, x_max, y_max, z_max] voxel coordinates to voxel indices */ int th_idx = blockIdx.x * blockDim.x + threadIdx.x; if (th_idx >= num_voxels) return; int bs_idx = v_indices[th_idx * 4 + 0]; int z_idx = v_indices[th_idx * 4 + 1]; int y_idx = v_indices[th_idx * 4 + 2]; int x_idx = v_indices[th_idx * 4 + 3]; if (x_idx >= x_max || x_idx < 0 || y_idx < 0 || y_idx >= y_max || z_idx < 0 || z_idx >= z_max) return; xyz_to_vidx += bs_idx * x_max * y_max * z_max; int num_samples = 0; for (int sz_idx = z_idx - attend_range; sz_idx <= z_idx + attend_range; ++sz_idx){ if (sz_idx >= z_max || sz_idx < 0) continue; for (int sy_idx = y_idx - attend_range; sy_idx <= y_idx + attend_range; ++sy_idx){ if (sy_idx >= y_max || sy_idx < 0) continue; for (int sx_idx = x_idx - attend_range; sx_idx <= x_idx + attend_range; ++sx_idx){ if (sx_idx >= x_max || sx_idx < 0) continue; int sv_idx = xyz_to_vidx[sx_idx * y_max * z_max + sy_idx * z_max + sz_idx]; if (sv_idx != EMPTY_KEY) { // found non-empty index if (num_samples >= attend_size) return; // full and return attend_indices[th_idx * attend_size + num_samples] = sv_idx; num_samples++; }else { // not found ; } } } } return; } void subm_local_attention_with_tensor_kernel_launcher(int x_max, int y_max, int z_max, int num_voxels, int attend_size, int attend_range, int *attend_indices, const int *v_indices, const int *xyz_to_vidx){ cudaError_t err; dim3 blocks(DIVUP(num_voxels, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); subm_local_attention_with_tensor_kernel<<>>(x_max, y_max, z_max, num_voxels, attend_size, attend_range, attend_indices, v_indices, xyz_to_vidx); // cudaDeviceSynchronize(); // for using printf in kernel function err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } __global__ void subm_local_attention_with_hash_kernel(int x_max, int y_max, int z_max, int num_voxels, int attend_size, int attend_range, int hash_size, int *attend_indices, const int *v_indices, const int *xyz_to_vidx) { /* attend_indices: [num_voxels, attend_size] for gather attend indices v_indices: [num_voxels, 4] bs + zyx indices of voxels xyz_to_vidx: [bs, x_max, y_max, z_max] voxel coordinates to voxel indices */ int th_idx = blockIdx.x * blockDim.x + threadIdx.x; if (th_idx >= num_voxels) return; int bs_idx = v_indices[th_idx * 4 + 0]; int z_idx = v_indices[th_idx * 4 + 1]; int y_idx = v_indices[th_idx * 4 + 2]; int x_idx = v_indices[th_idx * 4 + 3]; if (x_idx >= x_max || x_idx < 0 || y_idx < 0 || y_idx >= y_max || z_idx < 0 || z_idx >= z_max) return; xyz_to_vidx += bs_idx * hash_size * 2; int num_samples = 0; for (int sz_idx = z_idx - attend_range; sz_idx <= z_idx + attend_range; ++sz_idx){ if (sz_idx >= z_max || sz_idx < 0) continue; for (int sy_idx = y_idx - attend_range; sy_idx <= y_idx + attend_range; ++sy_idx){ if (sy_idx >= y_max || sy_idx < 0) continue; for (int sx_idx = x_idx - attend_range; sx_idx <= x_idx + attend_range; ++sx_idx){ if (sx_idx >= x_max || sx_idx < 0) continue; int skey = sx_idx * y_max * z_max + sy_idx * z_max + sz_idx; int sv_idx = hash_table_find(skey, hash_size, xyz_to_vidx); if (sv_idx != EMPTY_KEY) { // found non-empty index if (num_samples >= attend_size) return; // full and return attend_indices[th_idx * attend_size + num_samples] = sv_idx; num_samples++; }else { // not found ; } } } } return; } void subm_local_attention_with_hash_kernel_launcher(int x_max, int y_max, int z_max, int num_voxels, int attend_size, int attend_range, int hash_size, int *attend_indices, const int *v_indices, const int *xyz_to_vidx){ cudaError_t err; dim3 blocks(DIVUP(num_voxels, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); subm_local_attention_with_hash_kernel<<>>(x_max, y_max, z_max, num_voxels, attend_size, attend_range, hash_size, attend_indices, v_indices, xyz_to_vidx); // cudaDeviceSynchronize(); // for using printf in kernel function err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } __global__ void sparse_strided_attention_with_tensor_kernel(int x_max, int y_max, int z_max, int x_stride, int y_stride, int z_stride, int num_voxels, int attend_size, int num_range, int *attend_indices, const int *v_indices, const int *xyz_to_vidx, const int *range_spec) { /* attend_indices: [num_voxels, attend_size] for gather attend indices v_indices: [num_voxels, 4] bs + zyx indices of voxels xyz_to_vidx: [bs, x_max, y_max, z_max] voxel coordinates to voxel indices range_spec: [num_range, 3] half start/end range & stride */ int th_idx = blockIdx.x * blockDim.x + threadIdx.x; if (th_idx >= num_voxels) return; int bs_idx = v_indices[th_idx * 4 + 0]; int z_idx = v_indices[th_idx * 4 + 1]; int y_idx = v_indices[th_idx * 4 + 2]; int x_idx = v_indices[th_idx * 4 + 3]; if (x_idx >= x_max || x_idx < 0 || y_idx < 0 || y_idx >= y_max || z_idx < 0 || z_idx >= z_max) return; xyz_to_vidx += bs_idx * x_max * y_max * z_max; int num_samples = 0; for (int range_idx = 0; range_idx < num_range; ++range_idx) { int search_x_start_range = range_spec[range_idx * 9 + 0]; int search_x_end_range = range_spec[range_idx * 9 + 1]; int search_x_stride = range_spec[range_idx * 9 + 2]; int search_y_start_range = range_spec[range_idx * 9 + 3]; int search_y_end_range = range_spec[range_idx * 9 + 4]; int search_y_stride = range_spec[range_idx * 9 + 5]; int search_z_start_range = range_spec[range_idx * 9 + 6]; int search_z_end_range = range_spec[range_idx * 9 + 7]; int search_z_stride = range_spec[range_idx * 9 + 8]; for (int z_offset = 0; z_offset < search_z_end_range; z_offset += search_z_stride) { for (int y_offset = 0; y_offset < search_y_end_range; y_offset += search_y_stride) { for (int x_offset = 0; x_offset < search_x_end_range; x_offset += search_x_stride) { if ((x_offset < search_x_start_range) && (y_offset < search_y_start_range) && (z_offset < search_z_start_range)) { continue; } // each loop process 8 points for (int sz_idx = z_idx * z_stride - z_offset; sz_idx <= z_idx * z_stride + (z_stride - 1) + z_offset; sz_idx += (2 * z_offset + z_stride - 1)){ if (sz_idx >= z_max || sz_idx < 0) continue; for (int sy_idx = y_idx * y_stride - y_offset; sy_idx <= y_idx * y_stride + (y_stride - 1) + y_offset; sy_idx += (2 * y_offset + y_stride - 1)){ if (sy_idx >= y_max || sy_idx < 0) continue; for (int sx_idx = x_idx * x_stride - x_offset; sx_idx <= x_idx * x_stride + (x_stride - 1) + x_offset; sx_idx += (2 * x_offset + x_stride - 1)){ if (sx_idx >= x_max || sx_idx < 0) continue; int sv_idx = xyz_to_vidx[sx_idx * y_max * z_max + sy_idx * z_max + sz_idx]; if (sv_idx != EMPTY_KEY) { // found non-empty index if (num_samples >= attend_size) return; // full and return attend_indices[th_idx * attend_size + num_samples] = sv_idx; num_samples++; }else { // not found ; } } } } } } } } return; } void sparse_strided_attention_with_tensor_kernel_launcher(int x_max, int y_max, int z_max, int x_stride, int y_stride, int z_stride, int num_voxels, int attend_size, int num_range, int *attend_indices, const int *v_indices, const int *xyz_to_vidx, const int *range_spec){ cudaError_t err; dim3 blocks(DIVUP(num_voxels, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); sparse_strided_attention_with_tensor_kernel<<>>(x_max, y_max, z_max, x_stride, y_stride, z_stride, num_voxels, attend_size, num_range, attend_indices, v_indices, xyz_to_vidx, range_spec); // cudaDeviceSynchronize(); // for using printf in kernel function err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } __global__ void sparse_strided_attention_with_hash_kernel(int x_max, int y_max, int z_max, int x_stride, int y_stride, int z_stride, int num_voxels, int attend_size, int num_range, int hash_size, int *attend_indices, const int *v_indices, const int *xyz_to_vidx, const int *range_spec) { /* attend_indices: [num_voxels, attend_size] for gather attend indices v_indices: [num_voxels, 4] bs + zyx indices of voxels xyz_to_vidx: [bs, hash_size, 2] voxel coordinates to voxel indices range_spec: [num_range, 3] half start/end range & stride */ int th_idx = blockIdx.x * blockDim.x + threadIdx.x; if (th_idx >= num_voxels) return; int bs_idx = v_indices[th_idx * 4 + 0]; int z_idx = v_indices[th_idx * 4 + 1]; int y_idx = v_indices[th_idx * 4 + 2]; int x_idx = v_indices[th_idx * 4 + 3]; if (x_idx >= x_max || x_idx < 0 || y_idx < 0 || y_idx >= y_max || z_idx < 0 || z_idx >= z_max) return; xyz_to_vidx += bs_idx * hash_size * 2; int num_samples = 0; for (int range_idx = 0; range_idx < num_range; ++range_idx) { int search_x_start_range = range_spec[range_idx * 9 + 0]; int search_x_end_range = range_spec[range_idx * 9 + 1]; int search_x_stride = range_spec[range_idx * 9 + 2]; int search_y_start_range = range_spec[range_idx * 9 + 3]; int search_y_end_range = range_spec[range_idx * 9 + 4]; int search_y_stride = range_spec[range_idx * 9 + 5]; int search_z_start_range = range_spec[range_idx * 9 + 6]; int search_z_end_range = range_spec[range_idx * 9 + 7]; int search_z_stride = range_spec[range_idx * 9 + 8]; for (int z_offset = 0; z_offset < search_z_end_range; z_offset += search_z_stride) { for (int y_offset = 0; y_offset < search_y_end_range; y_offset += search_y_stride) { for (int x_offset = 0; x_offset < search_x_end_range; x_offset += search_x_stride) { if ((x_offset < search_x_start_range) && (y_offset < search_y_start_range) && (z_offset < search_z_start_range)) { continue; } // each loop process 8 points for (int sz_idx = z_idx * z_stride - z_offset; sz_idx <= z_idx * z_stride + (z_stride - 1) + z_offset; sz_idx += (2 * z_offset + z_stride - 1)){ if (sz_idx >= z_max || sz_idx < 0) continue; for (int sy_idx = y_idx * y_stride - y_offset; sy_idx <= y_idx * y_stride + (y_stride - 1) + y_offset; sy_idx += (2 * y_offset + y_stride - 1)){ if (sy_idx >= y_max || sy_idx < 0) continue; for (int sx_idx = x_idx * x_stride - x_offset; sx_idx <= x_idx * x_stride + (x_stride - 1) + x_offset; sx_idx += (2 * x_offset + x_stride - 1)){ if (sx_idx >= x_max || sx_idx < 0) continue; int skey = sx_idx * y_max * z_max + sy_idx * z_max + sz_idx; int sv_idx = hash_table_find(skey, hash_size, xyz_to_vidx); if (sv_idx != EMPTY_KEY) { // found non-empty index if (num_samples >= attend_size) return; // full and return attend_indices[th_idx * attend_size + num_samples] = sv_idx; num_samples++; }else { // not found ; } } } } } } } } return; } void sparse_strided_attention_with_hash_kernel_launcher(int x_max, int y_max, int z_max, int x_stride, int y_stride, int z_stride, int num_voxels, int attend_size, int num_range, int hash_size, int *attend_indices, const int *v_indices, const int *xyz_to_vidx, const int *range_spec){ cudaError_t err; dim3 blocks(DIVUP(num_voxels, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); sparse_strided_attention_with_hash_kernel<<>>(x_max, y_max, z_max, x_stride, y_stride, z_stride, num_voxels, attend_size, num_range, hash_size, attend_indices, v_indices, xyz_to_vidx, range_spec); // cudaDeviceSynchronize(); // for using printf in kernel function err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } __global__ void subm_strided_attention_with_tensor_kernel(int x_max, int y_max, int z_max, int num_voxels, int attend_size, int num_range, int *attend_indices, const int *v_indices, const int *xyz_to_vidx, const int *range_spec) { /* attend_indices: [num_voxels, attend_size] for gather attend indices v_indices: [num_voxels, 4] bs + zyx indices of voxels xyz_to_vidx: [bs, x_max, y_max, z_max] voxel coordinates to voxel indices range_spec: [num_range, 3] half start/end range & stride */ int th_idx = blockIdx.x * blockDim.x + threadIdx.x; if (th_idx >= num_voxels) return; int bs_idx = v_indices[th_idx * 4 + 0]; int z_idx = v_indices[th_idx * 4 + 1]; int y_idx = v_indices[th_idx * 4 + 2]; int x_idx = v_indices[th_idx * 4 + 3]; if (x_idx >= x_max || x_idx < 0 || y_idx < 0 || y_idx >= y_max || z_idx < 0 || z_idx >= z_max) return; xyz_to_vidx += bs_idx * x_max * y_max * z_max; int num_samples = 0; for (int range_idx = 0; range_idx < num_range; ++range_idx) { int search_x_start_range = range_spec[range_idx * 9 + 0]; int search_x_end_range = range_spec[range_idx * 9 + 1]; int search_x_stride = range_spec[range_idx * 9 + 2]; int search_y_start_range = range_spec[range_idx * 9 + 3]; int search_y_end_range = range_spec[range_idx * 9 + 4]; int search_y_stride = range_spec[range_idx * 9 + 5]; int search_z_start_range = range_spec[range_idx * 9 + 6]; int search_z_end_range = range_spec[range_idx * 9 + 7]; int search_z_stride = range_spec[range_idx * 9 + 8]; int x_step = 0; int y_step = 0; int z_step = 0; for (int z_offset = 0; z_offset < search_z_end_range; z_offset += search_z_stride) { for (int y_offset = 0; y_offset < search_y_end_range; y_offset += search_y_stride) { for (int x_offset = 0; x_offset < search_x_end_range; x_offset += search_x_stride) { if ((x_offset < search_x_start_range) && (y_offset < search_y_start_range) && (z_offset < search_z_start_range)) { continue; } // each loop process 8 points if (z_offset == 0) { z_step = 1; } else { z_step = 2 * z_offset; } for (int sz_idx = z_idx - z_offset; sz_idx <= z_idx + z_offset; sz_idx += z_step){ if (sz_idx >= z_max || sz_idx < 0) continue; if (sz_idx >= z_max || sz_idx < 0) continue; if (y_offset == 0) { y_step = 1; } else { y_step = 2 * y_offset; } for (int sy_idx = y_idx - y_offset; sy_idx <= y_idx + y_offset; sy_idx += y_step){ if (sy_idx >= y_max || sy_idx < 0) continue; if (x_offset == 0) { x_step = 1; } else { x_step = 2 * x_offset; } for (int sx_idx = x_idx - x_offset; sx_idx <= x_idx + x_offset; sx_idx += x_step){ if (sx_idx >= x_max || sx_idx < 0) continue; int sv_idx = xyz_to_vidx[sx_idx * y_max * z_max + sy_idx * z_max + sz_idx]; if (sv_idx != EMPTY_KEY) { // found non-empty index if (num_samples >= attend_size) return; // full and return attend_indices[th_idx * attend_size + num_samples] = sv_idx; num_samples++; }else { // not found ; } } } } } } } } return; } void subm_strided_attention_with_tensor_kernel_launcher(int x_max, int y_max, int z_max, int num_voxels, int attend_size, int num_range, int *attend_indices, const int *v_indices, const int *xyz_to_vidx, const int *range_spec){ cudaError_t err; dim3 blocks(DIVUP(num_voxels, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); subm_strided_attention_with_tensor_kernel<<>>(x_max, y_max, z_max, num_voxels, attend_size, num_range, attend_indices, v_indices, xyz_to_vidx, range_spec); // cudaDeviceSynchronize(); // for using printf in kernel function err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } __global__ void subm_strided_attention_with_hash_kernel(int x_max, int y_max, int z_max, int num_voxels, int attend_size, int num_range, int hash_size, int *attend_indices, const int *v_indices, const int *xyz_to_vidx, const int *range_spec) { /* attend_indices: [num_voxels, attend_size] for gather attend indices v_indices: [num_voxels, 4] bs + zyx indices of voxels xyz_to_vidx: [bs, hash_size, 2] voxel coordinates to voxel indices range_spec: [num_range, 3] half start/end range & stride */ int th_idx = blockIdx.x * blockDim.x + threadIdx.x; if (th_idx >= num_voxels) return; int bs_idx = v_indices[th_idx * 4 + 0]; int z_idx = v_indices[th_idx * 4 + 1]; int y_idx = v_indices[th_idx * 4 + 2]; int x_idx = v_indices[th_idx * 4 + 3]; if (x_idx >= x_max || x_idx < 0 || y_idx < 0 || y_idx >= y_max || z_idx < 0 || z_idx >= z_max) return; xyz_to_vidx += bs_idx * hash_size * 2; int num_samples = 0; for (int range_idx = 0; range_idx < num_range; ++range_idx) { int search_x_start_range = range_spec[range_idx * 9 + 0]; int search_x_end_range = range_spec[range_idx * 9 + 1]; int search_x_stride = range_spec[range_idx * 9 + 2]; int search_y_start_range = range_spec[range_idx * 9 + 3]; int search_y_end_range = range_spec[range_idx * 9 + 4]; int search_y_stride = range_spec[range_idx * 9 + 5]; int search_z_start_range = range_spec[range_idx * 9 + 6]; int search_z_end_range = range_spec[range_idx * 9 + 7]; int search_z_stride = range_spec[range_idx * 9 + 8]; int x_step = 0; int y_step = 0; int z_step = 0; for (int z_offset = 0; z_offset < search_z_end_range; z_offset += search_z_stride) { for (int y_offset = 0; y_offset < search_y_end_range; y_offset += search_y_stride) { for (int x_offset = 0; x_offset < search_x_end_range; x_offset += search_x_stride) { if ((x_offset < search_x_start_range) && (y_offset < search_y_start_range) && (z_offset < search_z_start_range)) { continue; } // each loop process 8 points if (z_offset == 0) { z_step = 1; } else { z_step = 2 * z_offset; } for (int sz_idx = z_idx - z_offset; sz_idx <= z_idx + z_offset; sz_idx += z_step){ if (sz_idx >= z_max || sz_idx < 0) continue; if (y_offset == 0) { y_step = 1; } else { y_step = 2 * y_offset; } for (int sy_idx = y_idx - y_offset; sy_idx <= y_idx + y_offset; sy_idx += y_step){ if (sy_idx >= y_max || sy_idx < 0) continue; if (x_offset == 0) { x_step = 1; } else { x_step = 2 * x_offset; } for (int sx_idx = x_idx - x_offset; sx_idx <= x_idx + x_offset; sx_idx += x_step){ if (sx_idx >= x_max || sx_idx < 0) continue; int skey = sx_idx * y_max * z_max + sy_idx * z_max + sz_idx; int sv_idx = hash_table_find(skey, hash_size, xyz_to_vidx); if (sv_idx != EMPTY_KEY) { // found non-empty index if (num_samples >= attend_size) return; // full and return attend_indices[th_idx * attend_size + num_samples] = sv_idx; num_samples++; }else { // not found ; } } } } } } } } return; } void subm_strided_attention_with_hash_kernel_launcher(int x_max, int y_max, int z_max, int num_voxels, int attend_size, int num_range, int hash_size, int *attend_indices, const int *v_indices, const int *xyz_to_vidx, const int *range_spec){ cudaError_t err; dim3 blocks(DIVUP(num_voxels, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); subm_strided_attention_with_hash_kernel<<>>(x_max, y_max, z_max, num_voxels, attend_size, num_range, hash_size, attend_indices, v_indices, xyz_to_vidx, range_spec); // cudaDeviceSynchronize(); // for using printf in kernel function err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } ================================================ FILE: pcdet/ops/votr_ops/src/build_attention_indices_gpu.h ================================================ /* Find indices for each attention pattern Written by Jiageng Mao */ #ifndef BUILD_ATTENTION_INDICES_GPU_H #define BUILD_ATTENTION_INDICES_GPU_H #include #include #include #include int subm_local_attention_with_tensor_wrapper(int x_max, int y_max, int z_max, int num_voxels, int attend_size, int attend_range, at::Tensor attend_indices_tensor, at::Tensor v_indices_tensor, at::Tensor xyz_to_vidx_tensor); int subm_local_attention_with_hash_wrapper(int x_max, int y_max, int z_max, int num_voxels, int attend_size, int attend_range, int hash_size, at::Tensor attend_indices_tensor, at::Tensor v_indices_tensor, at::Tensor xyz_to_vidx_tensor); int subm_strided_attention_with_tensor_wrapper(int x_max, int y_max, int z_max, int num_voxels, int attend_size, int num_range, at::Tensor attend_indices_tensor, at::Tensor v_indices_tensor, at::Tensor xyz_to_vidx_tensor, at::Tensor range_spec_tensor); int subm_strided_attention_with_hash_wrapper(int x_max, int y_max, int z_max, int num_voxels, int attend_size, int num_range, int hash_size, at::Tensor attend_indices_tensor, at::Tensor v_indices_tensor, at::Tensor xyz_to_vidx_tensor, at::Tensor range_spec_tensor); void subm_local_attention_with_tensor_kernel_launcher(int x_max, int y_max, int z_max, int num_voxels, int attend_size, int attend_range, int *attend_indices, const int *v_indices, const int *xyz_to_vidx); void subm_local_attention_with_hash_kernel_launcher(int x_max, int y_max, int z_max, int num_voxels, int attend_size, int attend_range, int hash_size, int *attend_indices, const int *v_indices, const int *xyz_to_vidx); void subm_strided_attention_with_tensor_kernel_launcher(int x_max, int y_max, int z_max, int num_voxels, int attend_size, int num_range, int *attend_indices, const int *v_indices, const int *xyz_to_vidx, const int *range_spec); void subm_strided_attention_with_hash_kernel_launcher(int x_max, int y_max, int z_max, int num_voxels, int attend_size, int num_range, int hash_size, int *attend_indices, const int *v_indices, const int *xyz_to_vidx, const int *range_spec); int sparse_local_attention_with_tensor_wrapper(int x_max, int y_max, int z_max, int x_stride, int y_stride, int z_stride, int num_voxels, int attend_size, int attend_range, at::Tensor attend_indices_tensor, at::Tensor v_indices_tensor, at::Tensor xyz_to_vidx_tensor); int sparse_local_attention_with_hash_wrapper(int x_max, int y_max, int z_max, int x_stride, int y_stride, int z_stride, int num_voxels, int attend_size, int attend_range, int hash_size, at::Tensor attend_indices_tensor, at::Tensor v_indices_tensor, at::Tensor xyz_to_vidx_tensor); int sparse_strided_attention_with_tensor_wrapper(int x_max, int y_max, int z_max, int x_stride, int y_stride, int z_stride, int num_voxels, int attend_size, int num_range, at::Tensor attend_indices_tensor, at::Tensor v_indices_tensor, at::Tensor xyz_to_vidx_tensor, at::Tensor range_spec_tensor); int sparse_strided_attention_with_hash_wrapper(int x_max, int y_max, int z_max, int x_stride, int y_stride, int z_stride, int num_voxels, int attend_size, int num_range, int hash_size, at::Tensor attend_indices_tensor, at::Tensor v_indices_tensor, at::Tensor xyz_to_vidx_tensor, at::Tensor range_spec_tensor); void sparse_local_attention_with_tensor_kernel_launcher(int x_max, int y_max, int z_max, int x_stride, int y_stride, int z_stride, int num_voxels, int attend_size, int attend_range, int *attend_indices, const int *v_indices, const int *xyz_to_vidx); void sparse_local_attention_with_hash_kernel_launcher(int x_max, int y_max, int z_max, int x_stride, int y_stride, int z_stride, int num_voxels, int attend_size, int attend_range, int hash_size, int *attend_indices, const int *v_indices, const int *xyz_to_vidx); void sparse_strided_attention_with_tensor_kernel_launcher(int x_max, int y_max, int z_max, int x_stride, int y_stride, int z_stride, int num_voxels, int attend_size, int num_range, int *attend_indices, const int *v_indices, const int *xyz_to_vidx, const int *range_spec); void sparse_strided_attention_with_hash_kernel_launcher(int x_max, int y_max, int z_max, int x_stride, int y_stride, int z_stride, int num_voxels, int attend_size, int num_range, int hash_size, int *attend_indices, const int *v_indices, const int *xyz_to_vidx, const int *range_spec); #endif ================================================ FILE: pcdet/ops/votr_ops/src/build_mapping.cpp ================================================ /* Building xyz -> idx sparse tensor mapping Written by Jiageng Mao */ #include #include #include #include #include #include "build_mapping_gpu.h" extern THCState *state; #define CHECK_CUDA(x) do { \ if (!x.type().is_cuda()) { \ fprintf(stderr, "%s must be CUDA tensor at %s:%d\n", #x, __FILE__, __LINE__); \ exit(-1); \ } \ } while (0) #define CHECK_CONTIGUOUS(x) do { \ if (!x.is_contiguous()) { \ fprintf(stderr, "%s must be contiguous tensor at %s:%d\n", #x, __FILE__, __LINE__); \ exit(-1); \ } \ } while (0) #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) int build_mapping_with_tensor_wrapper(int x_max, int y_max, int z_max, int num_voxels, at::Tensor v_indices_tensor, at::Tensor v_bs_cnt_tensor, at::Tensor xyz_to_vidx_tensor) { CHECK_INPUT(v_indices_tensor); CHECK_INPUT(v_bs_cnt_tensor); CHECK_INPUT(xyz_to_vidx_tensor); const int *v_indices = v_indices_tensor.data(); const int *v_bs_cnt = v_bs_cnt_tensor.data(); int *xyz_to_vidx = xyz_to_vidx_tensor.data(); build_mapping_with_tensor_kernel_launcher(x_max, y_max, z_max, num_voxels, v_indices, v_bs_cnt, xyz_to_vidx); return 1; } int downsample_with_tensor_wrapper(int x_max, int y_max, int z_max, int x_stride, int y_stride, int z_stride, int num_voxels, int num_ds_voxels, at::Tensor v_indices_tensor, at::Tensor ds_v_indices_tensor, at::Tensor xyz_to_vidx_tensor, at::Tensor vcount_tensor) { CHECK_INPUT(v_indices_tensor); CHECK_INPUT(ds_v_indices_tensor); CHECK_INPUT(xyz_to_vidx_tensor); CHECK_INPUT(vcount_tensor); const int *v_indices = v_indices_tensor.data(); int *ds_v_indices = ds_v_indices_tensor.data(); int *xyz_to_vidx = xyz_to_vidx_tensor.data(); int *vcount = vcount_tensor.data(); downsample_with_tensor_kernel_launcher(x_max, y_max, z_max, x_stride, y_stride, z_stride, num_voxels, num_ds_voxels, v_indices, ds_v_indices, xyz_to_vidx, vcount); return 1; } int build_mapping_with_hash_wrapper(int x_max, int y_max, int z_max, int num_voxels, int hash_size, at::Tensor v_indices_tensor, at::Tensor v_bs_cnt_tensor, at::Tensor xyz_to_vidx_tensor) { CHECK_INPUT(v_indices_tensor); CHECK_INPUT(v_bs_cnt_tensor); CHECK_INPUT(xyz_to_vidx_tensor); const int *v_indices = v_indices_tensor.data(); const int *v_bs_cnt = v_bs_cnt_tensor.data(); int *xyz_to_vidx = xyz_to_vidx_tensor.data(); build_mapping_with_hash_kernel_launcher(x_max, y_max, z_max, num_voxels, hash_size, v_indices, v_bs_cnt, xyz_to_vidx); return 1; } int downsample_with_hash_wrapper(int x_max, int y_max, int z_max, int x_stride, int y_stride, int z_stride, int num_voxels, int num_ds_voxels, int hash_size, at::Tensor v_indices_tensor, at::Tensor ds_v_indices_tensor, at::Tensor xyz_to_vidx_tensor, at::Tensor vcount_tensor) { CHECK_INPUT(v_indices_tensor); CHECK_INPUT(ds_v_indices_tensor); CHECK_INPUT(xyz_to_vidx_tensor); CHECK_INPUT(vcount_tensor); const int *v_indices = v_indices_tensor.data(); int *ds_v_indices = ds_v_indices_tensor.data(); int *xyz_to_vidx = xyz_to_vidx_tensor.data(); int *vcount = vcount_tensor.data(); downsample_with_hash_kernel_launcher(x_max, y_max, z_max, x_stride, y_stride, z_stride, num_voxels, num_ds_voxels, hash_size, v_indices, ds_v_indices, xyz_to_vidx, vcount); return 1; } ================================================ FILE: pcdet/ops/votr_ops/src/build_mapping_gpu.cu ================================================ /* Building xyz -> idx sparse tensor mapping Written by Jiageng Mao */ #include #include #include #include "build_mapping_gpu.h" #include "votr_cuda_utils.h" // 32 bit Murmur3 hash // unsigned int -> int, k >= 0, hash_size >0, should be ok? __device__ int murmur_hash(int k, int hash_size) { k ^= k >> 16; k *= 0x85ebca6b; k ^= k >> 13; k *= 0xc2b2ae35; k ^= k >> 16; //return k & (hash_size-1); return k % hash_size; } __device__ int hash(int k, int hash_size) { return k % hash_size; } __device__ void hash_table_insert(int &key, int &value, int &hash_size, int *xyz_to_vidx) { /* xyz_to_idx (hash_size, 2) NO BATCH SIZE */ int hash_idx = hash(key, hash_size); int prob_cnt = 0; while(true) { int prev_key = atomicCAS(xyz_to_vidx + hash_idx*2 + 0, EMPTY_KEY, key); // insert key when empty if (prev_key == EMPTY_KEY || prev_key == key) { xyz_to_vidx[hash_idx*2 + 1] = value; // insert value break; } // linear probing hash_idx = (hash_idx + 1) % hash_size; // security in case of dead loop prob_cnt += 1; if (prob_cnt >= hash_size) break; } } __global__ void downsample_with_tensor_kernel(int x_max, int y_max, int z_max, int x_stride, int y_stride, int z_stride, int num_voxels, int num_ds_voxels, const int *v_indices, int *ds_v_indices, int *xyz_to_vidx, int *vcount) { /* v_indices: [num_voxels, 4] bs + zyx indices of voxels ds_v_indices: [bs, num_ds_voxels, 3] downsampled voxels, -1 if not unique xyz_to_vidx: [bs, x_max, y_max, z_max] downsampled dense map vcount: [bs] */ int th_idx = blockIdx.x * blockDim.x + threadIdx.x; if (th_idx >= num_voxels) return; int bs_idx = v_indices[th_idx * 4 + 0]; int z_idx = v_indices[th_idx * 4 + 1]; int y_idx = v_indices[th_idx * 4 + 2]; int x_idx = v_indices[th_idx * 4 + 3]; int ds_z_idx = z_idx / z_stride; int ds_y_idx = y_idx / y_stride; int ds_x_idx = x_idx / x_stride; if (ds_x_idx >= x_max || ds_x_idx < 0 || ds_y_idx < 0 || ds_y_idx >= y_max || ds_z_idx < 0 || ds_z_idx >= z_max) return; xyz_to_vidx += bs_idx * x_max * y_max * z_max; ds_v_indices += bs_idx * num_ds_voxels * 3; int ret_v = atomicExch(xyz_to_vidx + ds_x_idx * y_max * z_max + ds_y_idx * z_max + ds_z_idx, BLK_SIGNAL); if (ret_v == BLK_SIGNAL){ // kill all block threads return; } else if (ret_v != EMPTY_KEY) { // already occupied ret_v = atomicExch(xyz_to_vidx + ds_x_idx * y_max * z_max + ds_y_idx * z_max + ds_z_idx, ret_v); return; } else if (ret_v == EMPTY_KEY) { int v_idx = atomicAdd(vcount + bs_idx, 1); ds_v_indices[v_idx * 3 + 0] = ds_z_idx; ds_v_indices[v_idx * 3 + 1] = ds_y_idx; ds_v_indices[v_idx * 3 + 2] = ds_x_idx; ret_v = atomicExch(xyz_to_vidx + ds_x_idx * y_max * z_max + ds_y_idx * z_max + ds_z_idx, v_idx); return; } } void downsample_with_tensor_kernel_launcher(int x_max, int y_max, int z_max, int x_stride, int y_stride, int z_stride, int num_voxels, int num_ds_voxels, const int *v_indices, int *ds_v_indices, int *xyz_to_vidx, int *vcount) { cudaError_t err; dim3 blocks(DIVUP(num_voxels, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); downsample_with_tensor_kernel<<>>(x_max, y_max, z_max, x_stride, y_stride, z_stride, num_voxels, num_ds_voxels, v_indices, ds_v_indices, xyz_to_vidx, vcount); // cudaDeviceSynchronize(); // for using printf in kernel function err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } __global__ void downsample_with_hash_kernel(int x_max, int y_max, int z_max, int x_stride, int y_stride, int z_stride, int num_voxels, int num_ds_voxels, int hash_size, const int *v_indices, int *ds_v_indices, int *xyz_to_vidx, int *vcount) { /* v_indices: [num_voxels, 4] bs + zyx indices of voxels ds_v_indices: [bs, num_ds_voxels, 3] downsampled voxels, -1 if not unique xyz_to_vidx: [bs, hash_size, 2] downsampled dense map vcount: [bs] */ int th_idx = blockIdx.x * blockDim.x + threadIdx.x; if (th_idx >= num_voxels) return; int bs_idx = v_indices[th_idx * 4 + 0]; int z_idx = v_indices[th_idx * 4 + 1]; int y_idx = v_indices[th_idx * 4 + 2]; int x_idx = v_indices[th_idx * 4 + 3]; int ds_z_idx = z_idx / z_stride; int ds_y_idx = y_idx / y_stride; int ds_x_idx = x_idx / x_stride; if (ds_x_idx >= x_max || ds_x_idx < 0 || ds_y_idx < 0 || ds_y_idx >= y_max || ds_z_idx < 0 || ds_z_idx >= z_max) return; xyz_to_vidx += bs_idx * hash_size * 2; ds_v_indices += bs_idx * num_ds_voxels * 3; int key = ds_x_idx * y_max * z_max + ds_y_idx * z_max + ds_z_idx; // hash table with force insert, reject duplicates int hash_idx = hash(key, hash_size); int prob_cnt = 0; while(true) { int prev_key = atomicCAS(xyz_to_vidx + hash_idx*2 + 0, EMPTY_KEY, key); // insert key when empty if (prev_key == EMPTY_KEY) { int v_idx = atomicAdd(vcount + bs_idx, 1); ds_v_indices[v_idx * 3 + 0] = ds_z_idx; // insert zyx to ds_indices ds_v_indices[v_idx * 3 + 1] = ds_y_idx; ds_v_indices[v_idx * 3 + 2] = ds_x_idx; xyz_to_vidx[hash_idx*2 + 1] = v_idx; // insert value to hash table break; } else if (prev_key == key) { // already occupied break; } // linear probing hash_idx = (hash_idx + 1) % hash_size; // security in case of dead loop prob_cnt += 1; if (prob_cnt >= hash_size) break; } } void downsample_with_hash_kernel_launcher(int x_max, int y_max, int z_max, int x_stride, int y_stride, int z_stride, int num_voxels, int num_ds_voxels, int hash_size, const int *v_indices, int *ds_v_indices, int *xyz_to_vidx, int *vcount) { cudaError_t err; dim3 blocks(DIVUP(num_voxels, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); downsample_with_hash_kernel<<>>(x_max, y_max, z_max, x_stride, y_stride, z_stride, num_voxels, num_ds_voxels, hash_size, v_indices, ds_v_indices, xyz_to_vidx, vcount); // cudaDeviceSynchronize(); // for using printf in kernel function err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } __global__ void build_mapping_with_tensor_kernel(int x_max, int y_max, int z_max, int num_voxels, const int *v_indices, const int *v_bs_cnt, int *xyz_to_vidx) { /* v_indices: [num_voxels, 4] bs + zyx indices of voxels xyz_to_vidx: [bs, x_max, y_max, z_max] voxel coordinates to voxel indices */ int th_idx = blockIdx.x * blockDim.x + threadIdx.x; if (th_idx >= num_voxels) return; int bs_idx = v_indices[th_idx * 4 + 0]; int z_idx = v_indices[th_idx * 4 + 1]; int y_idx = v_indices[th_idx * 4 + 2]; int x_idx = v_indices[th_idx * 4 + 3]; if (x_idx >= x_max || x_idx < 0 || y_idx < 0 || y_idx >= y_max || z_idx < 0 || z_idx >= z_max) return; int v_sum = 0; int bs_cnt = bs_idx - 1; while(bs_cnt >= 0){ v_sum += v_bs_cnt[bs_cnt]; bs_cnt--; } int v_idx = th_idx - v_sum; // v_idx for this sample xyz_to_vidx[bs_idx * x_max * y_max * z_max + x_idx * y_max * z_max + y_idx * z_max + z_idx] = v_idx; } void build_mapping_with_tensor_kernel_launcher(int x_max, int y_max, int z_max, int num_voxels, const int *v_indices, const int *v_bs_cnt, int *xyz_to_vidx){ cudaError_t err; dim3 blocks(DIVUP(num_voxels, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); build_mapping_with_tensor_kernel<<>>(x_max, y_max, z_max, num_voxels, v_indices, v_bs_cnt, xyz_to_vidx); // cudaDeviceSynchronize(); // for using printf in kernel function err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } __global__ void build_mapping_with_hash_kernel(int x_max, int y_max, int z_max, int num_voxels, int hash_size, const int *v_indices, const int *v_bs_cnt, int *xyz_to_vidx) { /* v_indices: [N1+N2, 4] bs zyx indices of voxels v_bs_cnt: [bs] num_voxels in each sample xyz_to_vidx: [B, hash_size, 2] hash table key-value for dim-2 */ int th_idx = blockIdx.x * blockDim.x + threadIdx.x; if (th_idx >= num_voxels) return; int bs_idx = v_indices[th_idx * 4 + 0]; int z_idx = v_indices[th_idx * 4 + 1]; int y_idx = v_indices[th_idx * 4 + 2]; int x_idx = v_indices[th_idx * 4 + 3]; int v_sum = 0; int bs_cnt = bs_idx - 1; while(bs_cnt >= 0){ v_sum += v_bs_cnt[bs_cnt]; bs_cnt--; } int v_idx = th_idx - v_sum; // v_idx for this sample xyz_to_vidx += bs_idx * hash_size * 2; if (x_idx >= x_max || x_idx < 0 || y_idx < 0 || y_idx >= y_max || z_idx < 0 || z_idx >= z_max) return; // out of bound // key -> [x_max, y_max, z_max] value -> v_idx int key = x_idx * y_max * z_max + y_idx * z_max + z_idx; hash_table_insert(key, v_idx, hash_size, xyz_to_vidx); return; } void build_mapping_with_hash_kernel_launcher(int x_max, int y_max, int z_max, int num_voxels, int hash_size, const int *v_indices, const int *v_bs_cnt, int *xyz_to_vidx){ cudaError_t err; dim3 blocks(DIVUP(num_voxels, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); build_mapping_with_hash_kernel<<>>(x_max, y_max, z_max, num_voxels, hash_size, v_indices, v_bs_cnt, xyz_to_vidx); // cudaDeviceSynchronize(); // for using printf in kernel function err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } ================================================ FILE: pcdet/ops/votr_ops/src/build_mapping_gpu.h ================================================ /* Building xyz -> idx sparse tensor mapping Written by Jiageng Mao */ #ifndef BUILD_MAPPING_GPU_H #define BUILD_MAPPING_GPU_H #include #include #include #include int build_mapping_with_tensor_wrapper(int x_max, int y_max, int z_max, int num_voxels, at::Tensor v_indices_tensor, at::Tensor v_bs_cnt_tensor, at::Tensor xyz_to_vidx_tensor); void build_mapping_with_tensor_kernel_launcher(int x_max, int y_max, int z_max, int num_voxels, const int *v_indices, const int *v_bs_cnt, int *xyz_to_vidx); int build_mapping_with_hash_wrapper(int x_max, int y_max, int z_max, int num_voxels, int hash_size, at::Tensor v_indices_tensor, at::Tensor v_bs_cnt_tensor, at::Tensor xyz_to_vidx_tensor); void build_mapping_with_hash_kernel_launcher(int x_max, int y_max, int z_max, int num_voxels, int hash_size, const int *v_indices, const int *v_bs_cnt, int *xyz_to_vidx); int downsample_with_tensor_wrapper(int x_max, int y_max, int z_max, int x_stride, int y_stride, int z_stride, int num_voxels, int num_ds_voxels, at::Tensor v_indices_tensor, at::Tensor ds_v_indices_tensor, at::Tensor xyz_to_vidx_tensor, at::Tensor vcount_tensor); void downsample_with_tensor_kernel_launcher(int x_max, int y_max, int z_max, int x_stride, int y_stride, int z_stride, int num_voxels, int num_ds_voxels, const int *v_indices, int *ds_v_indices, int *xyz_to_vidx, int *vcount); int downsample_with_hash_wrapper(int x_max, int y_max, int z_max, int x_stride, int y_stride, int z_stride, int num_voxels, int num_ds_voxels, int hash_size, at::Tensor v_indices_tensor, at::Tensor ds_v_indices_tensor, at::Tensor xyz_to_vidx_tensor, at::Tensor vcount_tensor); void downsample_with_hash_kernel_launcher(int x_max, int y_max, int z_max, int x_stride, int y_stride, int z_stride, int num_voxels, int num_ds_voxels, int hash_size, const int *v_indices, int *ds_v_indices, int *xyz_to_vidx, int *vcount); #endif ================================================ FILE: pcdet/ops/votr_ops/src/group_features.cpp ================================================ /* Stacked-batch-data version of point grouping, modified from the original implementation of official PointNet++ codes. Written by Shaoshuai Shi All Rights Reserved 2019-2020. */ #include #include #include #include #include #include "group_features_gpu.h" extern THCState *state; #define CHECK_CUDA(x) do { \ if (!x.type().is_cuda()) { \ fprintf(stderr, "%s must be CUDA tensor at %s:%d\n", #x, __FILE__, __LINE__); \ exit(-1); \ } \ } while (0) #define CHECK_CONTIGUOUS(x) do { \ if (!x.is_contiguous()) { \ fprintf(stderr, "%s must be contiguous tensor at %s:%d\n", #x, __FILE__, __LINE__); \ exit(-1); \ } \ } while (0) #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) int group_features_grad_wrapper_stack(int B, int M, int C, int N, int nsample, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor idx_batch_cnt_tensor, at::Tensor features_batch_cnt_tensor, at::Tensor grad_features_tensor) { CHECK_INPUT(grad_out_tensor); CHECK_INPUT(idx_tensor); CHECK_INPUT(idx_batch_cnt_tensor); CHECK_INPUT(features_batch_cnt_tensor); CHECK_INPUT(grad_features_tensor); const float *grad_out = grad_out_tensor.data(); const int *idx = idx_tensor.data(); const int *idx_batch_cnt = idx_batch_cnt_tensor.data(); const int *features_batch_cnt = features_batch_cnt_tensor.data(); float *grad_features = grad_features_tensor.data(); group_features_grad_kernel_launcher_stack(B, M, C, N, nsample, grad_out, idx, idx_batch_cnt, features_batch_cnt, grad_features); return 1; } int group_features_wrapper_stack(int B, int M, int C, int nsample, at::Tensor features_tensor, at::Tensor features_batch_cnt_tensor, at::Tensor idx_tensor, at::Tensor idx_batch_cnt_tensor, at::Tensor out_tensor) { CHECK_INPUT(features_tensor); CHECK_INPUT(features_batch_cnt_tensor); CHECK_INPUT(idx_tensor); CHECK_INPUT(idx_batch_cnt_tensor); CHECK_INPUT(out_tensor); const float *features = features_tensor.data(); const int *idx = idx_tensor.data(); const int *features_batch_cnt = features_batch_cnt_tensor.data(); const int *idx_batch_cnt = idx_batch_cnt_tensor.data(); float *out = out_tensor.data(); group_features_kernel_launcher_stack(B, M, C, nsample, features, features_batch_cnt, idx, idx_batch_cnt, out); return 1; } ================================================ FILE: pcdet/ops/votr_ops/src/group_features_gpu.cu ================================================ /* Modified from group points, don't care indices with -1(<0) Written by Jiageng Mao All Rights Reserved 2019-2020. */ #include #include #include "votr_cuda_utils.h" #include "group_features_gpu.h" __global__ void group_features_grad_kernel_stack(int B, int M, int C, int N, int nsample, const float *grad_out, const int *idx, const int *idx_batch_cnt, const int *features_batch_cnt, float *grad_features) { // :param grad_out: (M1 + M2 ..., C, nsample) tensor of the gradients of the output from forward // :param idx: (M1 + M2 ..., nsample) tensor containing the indicies of features to group with // :param idx_batch_cnt: (batch_size) [M1 + M2 ...] tensor containing the indicies of features to group with // :param features_batch_cnt: (batch_size) [N1 + N2 ...] tensor containing the indicies of features to group with // :return: // grad_features: (N1 + N2 ..., C) gradient of the features int index = blockIdx.x * blockDim.x + threadIdx.x; int sample_idx = index % nsample; int C_idx = (index / nsample) % C; int pt_idx = (index / nsample / C); if (pt_idx >= M || C_idx >= C || sample_idx >= nsample) return; idx += pt_idx * nsample + sample_idx; if (idx[0] < 0) return; // don't care neg indices int bs_idx = 0, pt_cnt = idx_batch_cnt[0]; for (int k = 1; k < B; k++){ if (pt_idx < pt_cnt) break; pt_cnt += idx_batch_cnt[k]; bs_idx = k; } int features_batch_start_idx = 0; for (int k = 0; k < bs_idx; k++) features_batch_start_idx += features_batch_cnt[k]; grad_out += pt_idx * C * nsample + C_idx * nsample + sample_idx; grad_features += (features_batch_start_idx + idx[0]) * C + C_idx; atomicAdd(grad_features, grad_out[0]); } void group_features_grad_kernel_launcher_stack(int B, int M, int C, int N, int nsample, const float *grad_out, const int *idx, const int *idx_batch_cnt, const int *features_batch_cnt, float *grad_features) { // :param grad_out: (M1 + M2 ..., C, nsample) tensor of the gradients of the output from forward // :param idx: (M1 + M2 ..., nsample) tensor containing the indicies of features to group with // :param idx_batch_cnt: (batch_size) [M1 + M2 ...] tensor containing the indicies of features to group with // :param features_batch_cnt: (batch_size) [N1 + N2 ...] tensor containing the indicies of features to group with // :return: // grad_features: (N1 + N2 ..., C) gradient of the features cudaError_t err; // dim3 blocks(DIVUP(npoints * nsample, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row) dim3 blocks(DIVUP(M * C * nsample, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); group_features_grad_kernel_stack<<>>(B, M, C, N, nsample, grad_out, idx, idx_batch_cnt, features_batch_cnt, grad_features); err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } __global__ void group_features_kernel_stack(int B, int M, int C, int nsample, const float *features, const int *features_batch_cnt, const int *idx, const int *idx_batch_cnt, float *out) { // :param features: (N1 + N2 ..., C) tensor of features to group // :param features_batch_cnt: (batch_size) [N1 + N2 ...] tensor containing the indicies of features to group with // :param idx: (M1 + M2 ..., nsample) tensor containing the indicies of features to group with // :param idx_batch_cnt: (batch_size) [M1 + M2 ...] tensor containing the indicies of features to group with // :return: // output: (M1 + M2, C, nsample) tensor int index = blockIdx.x * blockDim.x + threadIdx.x; int sample_idx = index % nsample; int C_idx = (index / nsample) % C; int pt_idx = (index / nsample / C); if (pt_idx >= M || C_idx >= C || sample_idx >= nsample) return; idx += pt_idx * nsample + sample_idx; if (idx[0] < 0) return; // don't care neg indices int bs_idx = 0, pt_cnt = idx_batch_cnt[0]; for (int k = 1; k < B; k++){ if (pt_idx < pt_cnt) break; pt_cnt += idx_batch_cnt[k]; bs_idx = k; } int features_batch_start_idx = 0; for (int k = 0; k < bs_idx; k++) features_batch_start_idx += features_batch_cnt[k]; features += features_batch_start_idx * C; int in_idx = idx[0] * C + C_idx; int out_idx = pt_idx * C * nsample + C_idx * nsample + sample_idx; out[out_idx] = features[in_idx]; } void group_features_kernel_launcher_stack(int B, int M, int C, int nsample, const float *features, const int *features_batch_cnt, const int *idx, const int *idx_batch_cnt, float *out) { // :param features: (N1 + N2 ..., C) tensor of features to group // :param features_batch_cnt: (batch_size) [N1 + N2 ...] tensor containing the indicies of features to group with // :param idx: (M1 + M2 ..., nsample) tensor containing the indicies of features to group with // :param idx_batch_cnt: (batch_size) [M1 + M2 ...] tensor containing the indicies of features to group with // :return: // output: (M1 + M2, C, nsample) tensor cudaError_t err; dim3 blocks(DIVUP(M * C * nsample, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row) dim3 threads(THREADS_PER_BLOCK); group_features_kernel_stack<<>>(B, M, C, nsample, features, features_batch_cnt, idx, idx_batch_cnt, out); // cudaDeviceSynchronize(); // for using printf in kernel function err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } ================================================ FILE: pcdet/ops/votr_ops/src/group_features_gpu.h ================================================ /* Modified from group points, don't care indices with -1(<0) Written by Jiageng Mao All Rights Reserved 2019-2020. */ #ifndef _STACK_GROUP_FEATURES_GPU_H #define _STACK_GROUP_FEATURES_GPU_H #include #include #include #include int group_features_wrapper_stack(int B, int M, int C, int nsample, at::Tensor features_tensor, at::Tensor features_batch_cnt_tensor, at::Tensor idx_tensor, at::Tensor idx_batch_cnt_tensor, at::Tensor out_tensor); void group_features_kernel_launcher_stack(int B, int M, int C, int nsample, const float *features, const int *features_batch_cnt, const int *idx, const int *idx_batch_cnt, float *out); int group_features_grad_wrapper_stack(int B, int M, int C, int N, int nsample, at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor idx_batch_cnt_tensor, at::Tensor features_batch_cnt_tensor, at::Tensor grad_features_tensor); void group_features_grad_kernel_launcher_stack(int B, int M, int C, int N, int nsample, const float *grad_out, const int *idx, const int *idx_batch_cnt, const int *features_batch_cnt, float *grad_features); #endif ================================================ FILE: pcdet/ops/votr_ops/src/votr_api.cpp ================================================ #include #include #include "build_mapping_gpu.h" #include "build_attention_indices_gpu.h" #include "group_features_gpu.h" PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("build_mapping_with_tensor_wrapper", &build_mapping_with_tensor_wrapper, "build_mapping_with_tensor_wrapper"); m.def("build_mapping_with_hash_wrapper", &build_mapping_with_hash_wrapper, "build_mapping_with_hash_wrapper"); m.def("downsample_with_tensor_wrapper", &downsample_with_tensor_wrapper, "downsample_with_tensor_wrapper"); m.def("downsample_with_hash_wrapper", &downsample_with_hash_wrapper, "downsample_with_hash_wrapper"); m.def("subm_local_attention_with_tensor_wrapper", &subm_local_attention_with_tensor_wrapper, "subm_local_attention_with_tensor_wrapper"); m.def("subm_local_attention_with_hash_wrapper", &subm_local_attention_with_hash_wrapper, "subm_local_attention_with_hash_wrapper"); m.def("sparse_local_attention_with_tensor_wrapper", &sparse_local_attention_with_tensor_wrapper, "sparse_local_attention_with_tensor_wrapper"); m.def("sparse_local_attention_with_hash_wrapper", &sparse_local_attention_with_hash_wrapper, "sparse_local_attention_with_hash_wrapper"); m.def("subm_strided_attention_with_tensor_wrapper", &subm_strided_attention_with_tensor_wrapper, "subm_strided_attention_with_tensor_wrapper"); m.def("subm_strided_attention_with_hash_wrapper", &subm_strided_attention_with_hash_wrapper, "subm_strided_attention_with_hash_wrapper"); m.def("sparse_strided_attention_with_tensor_wrapper", &sparse_strided_attention_with_tensor_wrapper, "sparse_strided_attention_with_tensor_wrapper"); m.def("sparse_strided_attention_with_hash_wrapper", &sparse_strided_attention_with_hash_wrapper, "sparse_strided_attention_with_hash_wrapper"); m.def("group_features_grad_wrapper", &group_features_grad_wrapper_stack, "group_features_grad_wrapper_stack"); m.def("group_features_wrapper", &group_features_wrapper_stack, "group_features_wrapper_stack"); } ================================================ FILE: pcdet/ops/votr_ops/src/votr_cuda_utils.h ================================================ #ifndef VOTR_CUDA_UTILS_H #define VOTR_CUDA_UTILS_H #include #define THREADS_PER_BLOCK 256 #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) #define EMPTY_KEY -1 #define BLK_SIGNAL -2 #endif ================================================ FILE: pcdet/ops/votr_ops/votr_utils.py ================================================ import torch from torch.autograd import Function, Variable from . import votr_ops_cuda as votr class BuildTensorTable(Function): @staticmethod def forward(ctx, batch_size, spatial_shape, voxel_indices, v_bs_cnt): """ Args: ctx: voxel_indices: (num_voxels, 4) (bs_idx, z, y, x) Returns: """ x_max, y_max, z_max = spatial_shape num_voxels = voxel_indices.shape[0] assert voxel_indices.is_contiguous() dense_map = torch.zeros((batch_size, x_max, y_max, z_max)).int().fill_(-1) dense_map = dense_map.to(voxel_indices.device) votr.build_mapping_with_tensor_wrapper(x_max, y_max, z_max, num_voxels, voxel_indices, v_bs_cnt, dense_map) return dense_map @staticmethod def backward(ctx, a=None): return None, None, None build_tensor_table = BuildTensorTable.apply class BuildHashTable(Function): @staticmethod def forward(ctx, batch_size, hash_size, spatial_shape, voxel_indices, v_bs_cnt): """ Args: ctx: voxel_indices: (num_voxels, 4) (bs_idx, z, y, x) Returns: """ x_max, y_max, z_max = spatial_shape num_voxels = voxel_indices.shape[0] assert voxel_indices.is_contiguous() dense_map = torch.zeros((batch_size, hash_size, 2)).int().fill_(-1) dense_map = dense_map.to(voxel_indices.device) votr.build_mapping_with_hash_wrapper(x_max, y_max, z_max, num_voxels, hash_size, voxel_indices, v_bs_cnt, dense_map) return dense_map @staticmethod def backward(ctx, a=None): return None, None, None build_hash_table = BuildHashTable.apply class TensorDownSample(Function): @staticmethod def forward(ctx, strides, num_ds_voxels, batch_size, spatial_shape, voxel_indices): """ Args: ctx: voxel_indices: (num_voxels, 4) (bs_idx, z, y, x) Returns: """ x_stride, y_stride, z_stride = strides x_max, y_max, z_max = spatial_shape dense_map = torch.zeros((batch_size, x_max, y_max, z_max)).int().fill_(-1) dense_map = dense_map.to(voxel_indices.device) num_voxels = voxel_indices.shape[0] assert voxel_indices.is_contiguous() ds_voxel_indices = torch.zeros((batch_size, num_ds_voxels, 3)).int().fill_(-1).to(voxel_indices.device) vcount = torch.zeros(batch_size).int().to(voxel_indices.device) votr.downsample_with_tensor_wrapper(x_max, y_max, z_max, x_stride, y_stride, z_stride, num_voxels, num_ds_voxels, voxel_indices, ds_voxel_indices, dense_map, vcount) ds_voxel_list = [] for i in range(batch_size): ds_voxel = ds_voxel_indices[i] ds_voxel = ds_voxel[ds_voxel[:, 0] >= 0] # not -1 bs_idx = torch.zeros((ds_voxel.shape[0], 1)).int().fill_(i).to(voxel_indices.device) ds_voxel = torch.cat([bs_idx, ds_voxel], dim = 1) ds_voxel_list.append(ds_voxel) output_voxels = torch.cat(ds_voxel_list, dim = 0).contiguous() return output_voxels, dense_map @staticmethod def backward(ctx, a=None): return None, None, None, None tensor_down_sample = TensorDownSample.apply class HashTableDownSample(Function): @staticmethod def forward(ctx, strides, num_ds_voxels, batch_size, hash_size, spatial_shape, voxel_indices): """ Args: ctx: voxel_indices: (num_voxels, 4) (bs_idx, z, y, x) Returns: """ x_stride, y_stride, z_stride = strides x_max, y_max, z_max = spatial_shape dense_map = torch.zeros((batch_size, hash_size, 2)).int().fill_(-1) dense_map = dense_map.to(voxel_indices.device) num_voxels = voxel_indices.shape[0] assert voxel_indices.is_contiguous() ds_voxel_indices = torch.zeros((batch_size, num_ds_voxels, 3)).int().fill_(-1).to(voxel_indices.device) vcount = torch.zeros(batch_size).int().to(voxel_indices.device) votr.downsample_with_hash_wrapper(x_max, y_max, z_max, x_stride, y_stride, z_stride, num_voxels, num_ds_voxels, hash_size, voxel_indices, ds_voxel_indices, dense_map, vcount) ds_voxel_list = [] for i in range(batch_size): ds_voxel = ds_voxel_indices[i] ds_voxel = ds_voxel[ds_voxel[:, 0] >= 0] # not -1 bs_idx = torch.zeros((ds_voxel.shape[0], 1)).int().fill_(i).to(voxel_indices.device) ds_voxel = torch.cat([bs_idx, ds_voxel], dim = 1) ds_voxel_list.append(ds_voxel) output_voxels = torch.cat(ds_voxel_list, dim = 0).contiguous() return output_voxels, dense_map @staticmethod def backward(ctx, a=None): return None, None, None, None hash_table_down_sample = HashTableDownSample.apply class SparseLocalAttentionTensorIndices(Function): @staticmethod def forward(ctx, attend_size, attend_range, strides, dense_map, voxel_indices): """ Args: ctx: dense_map: (bs_idx, x_max, y_max, z_max) -> old map table voxel_indices: (num_voxels, 4) (bs_idx, z, y, x) -> new downsampled indices Returns: """ x_stride, y_stride, z_stride = strides batch_size, x_max, y_max, z_max = dense_map.shape num_voxels = voxel_indices.shape[0] assert voxel_indices.is_contiguous() attend_indices = torch.zeros((num_voxels, attend_size)).int().fill_(-1).to(voxel_indices.device) votr.sparse_local_attention_with_tensor_wrapper(x_max, y_max, z_max, x_stride, y_stride, z_stride, num_voxels, attend_size, attend_range, attend_indices, voxel_indices, dense_map) return attend_indices @staticmethod def backward(ctx, a=None): return None, None, None, None, None sparse_local_attention_tensor_indices = SparseLocalAttentionTensorIndices.apply class SparseLocalAttentionHashIndices(Function): @staticmethod def forward(ctx, spatial_shape, attend_size, attend_range, strides, dense_map, voxel_indices): """ Args: ctx: dense_map: (bs_idx, hash_size, 2) -> old map table voxel_indices: (num_voxels, 4) (bs_idx, z, y, x) -> new downsampled indices Returns: """ x_stride, y_stride, z_stride = strides x_max, y_max, z_max = spatial_shape batch_size, hash_size, _ = dense_map.shape num_voxels = voxel_indices.shape[0] assert voxel_indices.is_contiguous() attend_indices = torch.zeros((num_voxels, attend_size)).int().fill_(-1).to(voxel_indices.device) votr.sparse_local_attention_with_hash_wrapper(x_max, y_max, z_max, x_stride, y_stride, z_stride, num_voxels, attend_size, attend_range, hash_size, attend_indices, voxel_indices, dense_map) return attend_indices @staticmethod def backward(ctx, a=None): return None, None, None, None, None sparse_local_attention_hash_indices = SparseLocalAttentionHashIndices.apply class SparseStridedAttentionTensorIndices(Function): @staticmethod def forward(ctx, attend_size, range_spec, strides, dense_map, voxel_indices): """ Args: ctx: dense_map: (bs_idx, x_max, y_max, z_max) -> old map table voxel_indices: (num_voxels, 4) (bs_idx, z, y, x) -> new downsampled indices Returns: """ x_stride, y_stride, z_stride = strides batch_size, x_max, y_max, z_max = dense_map.shape num_voxels = voxel_indices.shape[0] assert voxel_indices.is_contiguous() range_spec = torch.tensor(range_spec).int().to(voxel_indices.device) num_range = range_spec.shape[0] attend_indices = torch.zeros((num_voxels, attend_size)).int().fill_(-1).to(voxel_indices.device) votr.sparse_strided_attention_with_tensor_wrapper(x_max, y_max, z_max, x_stride, y_stride, z_stride, num_voxels, attend_size, num_range, attend_indices, voxel_indices, dense_map, range_spec) return attend_indices @staticmethod def backward(ctx, a=None): return None, None, None, None, None sparse_strided_attention_tensor_indices = SparseStridedAttentionTensorIndices.apply class SparseStridedAttentionHashIndices(Function): @staticmethod def forward(ctx, spatial_shape, attend_size, range_spec, strides, dense_map, voxel_indices): """ Args: ctx: dense_map: (bs_idx, x_max, y_max, z_max) -> old map table voxel_indices: (num_voxels, 4) (bs_idx, z, y, x) -> new downsampled indices Returns: """ x_stride, y_stride, z_stride = strides x_max, y_max, z_max = spatial_shape batch_size, hash_size, _ = dense_map.shape num_voxels = voxel_indices.shape[0] assert voxel_indices.is_contiguous() range_spec = torch.tensor(range_spec).int().to(voxel_indices.device) num_range = range_spec.shape[0] attend_indices = torch.zeros((num_voxels, attend_size)).int().fill_(-1).to(voxel_indices.device) votr.sparse_strided_attention_with_hash_wrapper(x_max, y_max, z_max, x_stride, y_stride, z_stride, num_voxels, attend_size, num_range, hash_size, attend_indices, voxel_indices, dense_map, range_spec) return attend_indices @staticmethod def backward(ctx, a=None): return None, None, None, None, None sparse_strided_attention_hash_indices = SparseStridedAttentionHashIndices.apply class SubMLocalAttentionTensorIndices(Function): @staticmethod def forward(ctx, attend_size, attend_range, dense_map, voxel_indices): """ Args: ctx: voxel_indices: (num_voxels, 4) (bs_idx, z, y, x) Returns: """ batch_size, x_max, y_max, z_max = dense_map.shape num_voxels = voxel_indices.shape[0] assert voxel_indices.is_contiguous() attend_indices = torch.zeros((num_voxels, attend_size)).int().fill_(-1).to(voxel_indices.device) votr.subm_local_attention_with_tensor_wrapper(x_max, y_max, z_max, num_voxels, attend_size, attend_range, attend_indices, voxel_indices, dense_map) return attend_indices @staticmethod def backward(ctx, a=None): return None, None, None, None, None subm_local_attention_tensor_indices = SubMLocalAttentionTensorIndices.apply class SubMLocalAttentionHashIndices(Function): @staticmethod def forward(ctx, spatial_shape, attend_size, attend_range, dense_map, voxel_indices): """ Args: ctx: voxel_indices: (num_voxels, 4) (bs_idx, z, y, x) Returns: """ x_max, y_max, z_max = spatial_shape batch_size, hash_size, _ = dense_map.shape num_voxels = voxel_indices.shape[0] assert voxel_indices.is_contiguous() attend_indices = torch.zeros((num_voxels, attend_size)).int().fill_(-1).to(voxel_indices.device) votr.subm_local_attention_with_hash_wrapper(x_max, y_max, z_max, num_voxels, attend_size, attend_range, hash_size, attend_indices, voxel_indices, dense_map) return attend_indices @staticmethod def backward(ctx, a=None): return None, None, None, None, None subm_local_attention_hash_indices = SubMLocalAttentionHashIndices.apply class SubMStridedAttentionTensorIndices(Function): @staticmethod def forward(ctx, attend_size, range_spec, dense_map, voxel_indices): """ Args: ctx: voxel_indices: (num_voxels, 4) (bs_idx, z, y, x) Returns: """ batch_size, x_max, y_max, z_max = dense_map.shape num_voxels = voxel_indices.shape[0] assert voxel_indices.is_contiguous() range_spec = torch.tensor(range_spec).int().to(voxel_indices.device) num_range = range_spec.shape[0] attend_indices = torch.zeros((num_voxels, attend_size)).int().fill_(-1).to(voxel_indices.device) votr.subm_strided_attention_with_tensor_wrapper(x_max, y_max, z_max, num_voxels, attend_size, num_range, attend_indices, voxel_indices, dense_map, range_spec) return attend_indices @staticmethod def backward(ctx, a=None): return None, None, None, None, None subm_strided_attention_tensor_indices = SubMStridedAttentionTensorIndices.apply class SubMStridedAttentionHashIndices(Function): @staticmethod def forward(ctx, spatial_shape, attend_size, range_spec, dense_map, voxel_indices): """ Args: ctx: voxel_indices: (num_voxels, 4) (bs_idx, z, y, x) Returns: """ x_max, y_max, z_max = spatial_shape batch_size, hash_size, _ = dense_map.shape num_voxels = voxel_indices.shape[0] assert voxel_indices.is_contiguous() range_spec = torch.tensor(range_spec).int().to(voxel_indices.device) num_range = range_spec.shape[0] attend_indices = torch.zeros((num_voxels, attend_size)).int().fill_(-1).to(voxel_indices.device) votr.subm_strided_attention_with_hash_wrapper(x_max, y_max, z_max, num_voxels, attend_size, num_range, hash_size, attend_indices, voxel_indices, dense_map, range_spec) return attend_indices @staticmethod def backward(ctx, a=None): return None, None, None, None, None subm_strided_attention_hash_indices = SubMStridedAttentionHashIndices.apply class GroupingOperation(Function): @staticmethod def forward(ctx, features: torch.Tensor, features_batch_cnt: torch.Tensor, idx: torch.Tensor, idx_batch_cnt: torch.Tensor): """ Args: ctx: features: (N1 + N2 ..., C) tensor of features to group features_batch_cnt: (batch_size) [N1 + N2 ...] tensor containing the indicies of features to group with idx: (M1 + M2 ..., nsample) tensor containing the indicies of features to group with idx_batch_cnt: (batch_size) [M1 + M2 ...] tensor containing the indicies of features to group with Returns: output: (M1 + M2, C, nsample) tensor """ assert features.is_contiguous() assert features_batch_cnt.is_contiguous() assert idx.is_contiguous() assert idx_batch_cnt.is_contiguous() assert features.shape[0] == features_batch_cnt.sum(), \ 'features: %s, features_batch_cnt: %s' % (str(features.shape), str(features_batch_cnt)) assert idx.shape[0] == idx_batch_cnt.sum(), \ 'idx: %s, idx_batch_cnt: %s' % (str(idx.shape), str(idx_batch_cnt)) M, nsample = idx.size() N, C = features.size() B = idx_batch_cnt.shape[0] output = torch.cuda.FloatTensor(M, C, nsample).zero_() votr.group_features_wrapper(B, M, C, nsample, features, features_batch_cnt, idx, idx_batch_cnt, output) ctx.for_backwards = (B, N, idx, features_batch_cnt, idx_batch_cnt) return output @staticmethod def backward(ctx, grad_out: torch.Tensor): """ Args: ctx: grad_out: (M1 + M2 ..., C, nsample) tensor of the gradients of the output from forward Returns: grad_features: (N1 + N2 ..., C) gradient of the features """ B, N, idx, features_batch_cnt, idx_batch_cnt = ctx.for_backwards M, C, nsample = grad_out.size() grad_features = Variable(torch.cuda.FloatTensor(N, C).zero_()) grad_out_data = grad_out.data.contiguous() votr.group_features_grad_wrapper(B, M, C, N, nsample, grad_out_data, idx, idx_batch_cnt, features_batch_cnt, grad_features.data) return grad_features, None, None, None grouping_operation = GroupingOperation.apply ================================================ FILE: pcdet/utils/__init__.py ================================================ ================================================ FILE: pcdet/utils/bbloss.py ================================================ import torch import numpy as np def limit( ang): ang = ang % (2 * np.pi) ang[ang > np.pi] = ang[ang > np.pi] - 2 * np.pi ang[ang < -np.pi] = ang[ang < -np.pi] + 2 * np.pi return ang def ang_weight(pred, gt): a2 = torch.abs(torch.sin(pred - gt)) return 1-a2 def compute_iou(x,w,y,l): zmax1 = x + w * 0.5 zmin1 = x - w * 0.5 zmax2 = y + l * 0.5 zmin2 = y - l * 0.5 z_overlap = (torch.min(zmax1, zmax2) - torch.max(zmin1, zmin2)).clamp_min(0.) all_lap = (torch.max(zmax1, zmax2) - torch.min(zmin1, zmin2)).clamp_min(0.) iou = z_overlap / all_lap return iou def bb_loss(pred, target): iouw = compute_iou(pred[..., 0], pred[..., 3], target[..., 0], target[..., 3]) ioul = compute_iou(pred[..., 1], pred[..., 4], target[..., 1], target[..., 4]) iouh = compute_iou(pred[..., 2], pred[..., 5], target[..., 2], target[..., 5]) a_p = limit(pred[..., 6]) a_g = limit(target[..., 6]) ioua = ang_weight(a_p, a_g) iou = iouw*ioul*iouh*ioua diff_angle = pred[:, -1] - target[:, -1] angle_factor = 1.25 * (1.0 - torch.abs(torch.cos(diff_angle))) center_dist_square = torch.pow(target[:, 0:3] - pred[:, 0:3], 2).sum(-1) finall_loss = 1-iou + angle_factor + center_dist_square return finall_loss*1.5 class APLoss(torch.autograd.Function): @staticmethod def forward(ctx, logits, targets): classification_grads, classification_losses = AP_loss(logits, targets) ######################################################### ctx.save_for_backward(classification_grads, None) return classification_losses @staticmethod def backward(ctx, out_grad1): g1,g2 = ctx.saved_tensors return g1 * out_grad1, None def AP_loss(logits, targets): delta = 1.0 grad = torch.zeros(logits.shape).cuda() metric = torch.zeros(1).cuda() if torch.max(targets) <= 0: return grad, metric labels_p = (targets == 1) fg_logits = logits[labels_p] threshold_logit = torch.min(fg_logits) - delta #-0.9 ######## Ignore those negative j that satisfy (L_{ij}=0 for all positive i), to accelerate the AP-loss computation. valid_labels_n = ((targets == 0) & (logits >= threshold_logit)) valid_bg_logits = logits[valid_labels_n] valid_bg_grad = torch.zeros(len(valid_bg_logits)).cuda() ######## fg_num = len(fg_logits) prec = torch.zeros(fg_num).cuda() order = torch.argsort(fg_logits) max_prec = 0 for ii in order: tmp1 = fg_logits - fg_logits[ii] tmp1 = torch.clamp(tmp1 / (2 * delta) + 0.5, min=0, max=1) tmp2 = valid_bg_logits - fg_logits[ii] tmp2 = torch.clamp(tmp2 / (2 * delta) + 0.5, min=0, max=1) a = torch.sum(tmp1) + 0.5 b = torch.sum(tmp2) tmp2 /= (a + b) current_prec = a / (a + b) if (max_prec <= current_prec): max_prec = current_prec else: tmp2 *= ((1 - max_prec) / (1 - current_prec)) valid_bg_grad += tmp2 prec[ii] = max_prec grad[valid_labels_n] = valid_bg_grad grad[labels_p] = -(1 - prec) fg_num = max(fg_num, 1) grad /= (fg_num) metric = torch.sum(prec, dim=0, keepdim=True) / fg_num return grad, 1 - metric ================================================ FILE: pcdet/utils/box_coder_utils.py ================================================ import numpy as np import torch class ResidualCoder(object): def __init__(self, code_size=7, encode_angle_by_sincos=False, **kwargs): super().__init__() self.code_size = code_size self.encode_angle_by_sincos = encode_angle_by_sincos if self.encode_angle_by_sincos: self.code_size += 1 def encode_torch(self, boxes, anchors): """ Args: boxes: (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...] anchors: (N, 7 + C) [x, y, z, dx, dy, dz, heading or *[cos, sin], ...] Returns: """ anchors[:, 3:6] = torch.clamp_min(anchors[:, 3:6], min=1e-5) boxes[:, 3:6] = torch.clamp_min(boxes[:, 3:6], min=1e-5) xa, ya, za, dxa, dya, dza, ra, *cas = torch.split(anchors, 1, dim=-1) xg, yg, zg, dxg, dyg, dzg, rg, *cgs = torch.split(boxes, 1, dim=-1) diagonal = torch.sqrt(dxa ** 2 + dya ** 2) xt = (xg - xa) / diagonal yt = (yg - ya) / diagonal zt = (zg - za) / dza dxt = torch.log(dxg / dxa) dyt = torch.log(dyg / dya) dzt = torch.log(dzg / dza) if self.encode_angle_by_sincos: rt_cos = torch.cos(rg) - torch.cos(ra) rt_sin = torch.sin(rg) - torch.sin(ra) rts = [rt_cos, rt_sin] else: rts = [rg - ra] cts = [g - a for g, a in zip(cgs, cas)] return torch.cat([xt, yt, zt, dxt, dyt, dzt, *rts, *cts], dim=-1) def decode_torch(self, box_encodings, anchors): """ Args: box_encodings: (B, N, 7 + C) or (N, 7 + C) [x, y, z, dx, dy, dz, heading or *[cos, sin], ...] anchors: (B, N, 7 + C) or (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...] Returns: """ xa, ya, za, dxa, dya, dza, ra, *cas = torch.split(anchors, 1, dim=-1) if not self.encode_angle_by_sincos: xt, yt, zt, dxt, dyt, dzt, rt, *cts = torch.split(box_encodings, 1, dim=-1) else: xt, yt, zt, dxt, dyt, dzt, cost, sint, *cts = torch.split(box_encodings, 1, dim=-1) diagonal = torch.sqrt(dxa ** 2 + dya ** 2) xg = xt * diagonal + xa yg = yt * diagonal + ya zg = zt * dza + za dxg = torch.exp(dxt) * dxa dyg = torch.exp(dyt) * dya dzg = torch.exp(dzt) * dza if self.encode_angle_by_sincos: rg_cos = cost + torch.cos(ra) rg_sin = sint + torch.sin(ra) rg = torch.atan2(rg_sin, rg_cos) else: rg = rt + ra cgs = [t + a for t, a in zip(cts, cas)] return torch.cat([xg, yg, zg, dxg, dyg, dzg, rg, *cgs], dim=-1) class ResidualCoderV2(object): def __init__(self, code_size=7, encode_angle_by_sincos=False, **kwargs): super().__init__() self.code_size = code_size self.encode_angle_by_sincos = encode_angle_by_sincos if self.encode_angle_by_sincos: self.code_size += 1 def encode_torch(self, boxes, anchors): """ Args: boxes: (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...] anchors: (N, 7 + C) [x, y, z, dx, dy, dz, heading or *[cos, sin], ...] Returns: """ anchors[:, 3:6] = torch.clamp_min(anchors[:, 3:6], min=1e-5) boxes[:, 3:6] = torch.clamp_min(boxes[:, 3:6], min=1e-5) xa, ya, za, dxa, dya, dza, ra, *cas = torch.split(anchors, 1, dim=-1) xg, yg, zg, dxg, dyg, dzg, rg, *cgs = torch.split(boxes, 1, dim=-1) za = za - dza/2 zg = zg - dzg / 2 xt = (xg - xa) yt = (yg - ya) zt = (zg - za) dxt = torch.log(dxg ) dyt = torch.log(dyg ) dzt = torch.log(dzg ) if self.encode_angle_by_sincos: rt_cos = torch.cos(rg) rt_sin = torch.sin(rg) rts = [rt_cos, rt_sin] else: rts = [rg - ra] cts = [g - a for g, a in zip(cgs, cas)] return torch.cat([xt, yt, zt, dxt, dyt, dzt, *rts, *cts], dim=-1) def decode_torch(self, box_encodings, anchors): """ Args: box_encodings: (B, N, 7 + C) or (N, 7 + C) [x, y, z, dx, dy, dz, heading or *[cos, sin], ...] anchors: (B, N, 7 + C) or (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...] Returns: """ xa, ya, za, dxa, dya, dza, ra, *cas = torch.split(anchors, 1, dim=-1) if not self.encode_angle_by_sincos: xt, yt, zt, dxt, dyt, dzt, rt, *cts = torch.split(box_encodings, 1, dim=-1) else: xt, yt, zt, dxt, dyt, dzt, cost, sint, *cts = torch.split(box_encodings, 1, dim=-1) za = za - dza / 2 xg = xt + xa yg = yt + ya zg = zt + za dxg = torch.exp(dxt) dyg = torch.exp(dyt) dzg = torch.exp(dzt) zg = zg + dzg/2 if self.encode_angle_by_sincos: rg = torch.atan2(sint, cost) else: rg = rt + ra cgs = [t + a for t, a in zip(cts, cas)] return torch.cat([xg, yg, zg, dxg, dyg, dzg, rg, *cgs], dim=-1) class ResidualCoderFree(object): def __init__(self, code_size=8, **kwargs): super().__init__() self.code_size = code_size def encode_torch(self, boxes, centers): """ Args: boxes: (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...] anchors: (N, 7 + C) [x, y, z, dx, dy, dz, heading or *[cos, sin], ...] Returns: """ boxes[:, 3:6] = torch.clamp_min(boxes[:, 3:6], min=1e-5) xa, ya, *cas = torch.split(centers, 1, dim=-1) xg, yg, zg, dxg, dyg, dzg, rg, *cgs = torch.split(boxes, 1, dim=-1) xt = (xg - xa) yt = (yg - ya) zt = zg dxt = torch.log(dxg) dyt = torch.log(dyg) dzt = torch.log(dzg) rt_cos = torch.cos(rg) rt_sin = torch.sin(rg) rts = [rt_cos, rt_sin] return torch.cat([xt, yt, zt, dxt, dyt, dzt, *rts, *cgs], dim=-1) def decode_torch(self, box_encodings, centers): """ Args: box_encodings: (B, N, 7 + C) or (N, 7 + C) [x, y, z, dx, dy, dz, heading or *[cos, sin], ...] anchors: (B, N, 7 + C) or (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...] Returns: """ xa, ya, *cas = torch.split(centers, 1, dim=-1) xt, yt, zt, dxt, dyt, dzt, cost, sint, *cts = torch.split(box_encodings, 1, dim=-1) xg = xt + xa yg = yt + ya zg = zt dxg = torch.exp(dxt) dyg = torch.exp(dyt) dzg = torch.exp(dzt) rg = torch.atan2(sint, cost) return torch.cat([xg, yg, zg, dxg, dyg, dzg, rg, *cts], dim=-1) class PreviousResidualDecoder(object): def __init__(self, code_size=7, **kwargs): super().__init__() self.code_size = code_size @staticmethod def decode_torch(box_encodings, anchors): """ Args: box_encodings: (B, N, 7 + ?) x, y, z, w, l, h, r, custom values anchors: (B, N, 7 + C) or (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...] Returns: """ xa, ya, za, dxa, dya, dza, ra, *cas = torch.split(anchors, 1, dim=-1) xt, yt, zt, wt, lt, ht, rt, *cts = torch.split(box_encodings, 1, dim=-1) diagonal = torch.sqrt(dxa ** 2 + dya ** 2) xg = xt * diagonal + xa yg = yt * diagonal + ya zg = zt * dza + za dxg = torch.exp(lt) * dxa dyg = torch.exp(wt) * dya dzg = torch.exp(ht) * dza rg = rt + ra cgs = [t + a for t, a in zip(cts, cas)] return torch.cat([xg, yg, zg, dxg, dyg, dzg, rg, *cgs], dim=-1) class PreviousResidualRoIDecoder(object): def __init__(self, code_size=7, **kwargs): super().__init__() self.code_size = code_size @staticmethod def decode_torch(box_encodings, anchors): """ Args: box_encodings: (B, N, 7 + ?) x, y, z, w, l, h, r, custom values anchors: (B, N, 7 + C) or (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...] Returns: """ xa, ya, za, dxa, dya, dza, ra, *cas = torch.split(anchors, 1, dim=-1) xt, yt, zt, wt, lt, ht, rt, *cts = torch.split(box_encodings, 1, dim=-1) diagonal = torch.sqrt(dxa ** 2 + dya ** 2) xg = xt * diagonal + xa yg = yt * diagonal + ya zg = zt * dza + za dxg = torch.exp(lt) * dxa dyg = torch.exp(wt) * dya dzg = torch.exp(ht) * dza rg = ra - rt cgs = [t + a for t, a in zip(cts, cas)] return torch.cat([xg, yg, zg, dxg, dyg, dzg, rg, *cgs], dim=-1) class PointResidualCoder(object): def __init__(self, code_size=8, use_mean_size=True, **kwargs): super().__init__() self.code_size = code_size self.use_mean_size = use_mean_size if self.use_mean_size: self.mean_size = torch.from_numpy(np.array(kwargs['mean_size'])).cuda().float() assert self.mean_size.min() > 0 def encode_torch(self, gt_boxes, points, gt_classes=None): """ Args: gt_boxes: (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...] points: (N, 3) [x, y, z] gt_classes: (N) [1, num_classes] Returns: box_coding: (N, 8 + C) """ gt_boxes[:, 3:6] = torch.clamp_min(gt_boxes[:, 3:6], min=1e-5) xg, yg, zg, dxg, dyg, dzg, rg, *cgs = torch.split(gt_boxes, 1, dim=-1) xa, ya, za = torch.split(points, 1, dim=-1) if self.use_mean_size: assert gt_classes.max() <= self.mean_size.shape[0] point_anchor_size = self.mean_size[gt_classes - 1] dxa, dya, dza = torch.split(point_anchor_size, 1, dim=-1) diagonal = torch.sqrt(dxa ** 2 + dya ** 2) xt = (xg - xa) / diagonal yt = (yg - ya) / diagonal zt = (zg - za) / dza dxt = torch.log(dxg / dxa) dyt = torch.log(dyg / dya) dzt = torch.log(dzg / dza) else: xt = (xg - xa) yt = (yg - ya) zt = (zg - za) dxt = torch.log(dxg) dyt = torch.log(dyg) dzt = torch.log(dzg) cts = [g for g in cgs] return torch.cat([xt, yt, zt, dxt, dyt, dzt, torch.cos(rg), torch.sin(rg), *cts], dim=-1) def decode_torch(self, box_encodings, points, pred_classes=None): """ Args: box_encodings: (N, 8 + C) [x, y, z, dx, dy, dz, cos, sin, ...] points: [x, y, z] pred_classes: (N) [1, num_classes] Returns: """ xt, yt, zt, dxt, dyt, dzt, cost, sint, *cts = torch.split(box_encodings, 1, dim=-1) xa, ya, za = torch.split(points, 1, dim=-1) if self.use_mean_size: assert pred_classes.max() <= self.mean_size.shape[0] point_anchor_size = self.mean_size[pred_classes - 1] dxa, dya, dza = torch.split(point_anchor_size, 1, dim=-1) diagonal = torch.sqrt(dxa ** 2 + dya ** 2) xg = xt * diagonal + xa yg = yt * diagonal + ya zg = zt * dza + za dxg = torch.exp(dxt) * dxa dyg = torch.exp(dyt) * dya dzg = torch.exp(dzt) * dza else: xg = xt + xa yg = yt + ya zg = zt + za dxg, dyg, dzg = torch.split(torch.exp(box_encodings[..., 3:6]), 1, dim=-1) rg = torch.atan2(sint, cost) cgs = [t for t in cts] return torch.cat([xg, yg, zg, dxg, dyg, dzg, rg, *cgs], dim=-1) ================================================ FILE: pcdet/utils/box_np_ops.py ================================================ import numba import numpy as np def corners_nd(dims, origin=0.5): """Generate relative box corners based on length per dim and origin point. Args: dims (np.ndarray, shape=[N, ndim]): Array of length per dim origin (list or array or float): origin point relate to smallest point. Returns: np.ndarray, shape=[N, 2 ** ndim, ndim]: Returned corners. point layout example: (2d) x0y0, x0y1, x1y0, x1y1; (3d) x0y0z0, x0y0z1, x0y1z0, x0y1z1, x1y0z0, x1y0z1, x1y1z0, x1y1z1 where x0 < x1, y0 < y1, z0 < z1. """ ndim = int(dims.shape[1]) corners_norm = np.stack( np.unravel_index(np.arange(2**ndim), [2] * ndim), axis=1).astype(dims.dtype) # now corners_norm has format: (2d) x0y0, x0y1, x1y0, x1y1 # (3d) x0y0z0, x0y0z1, x0y1z0, x0y1z1, x1y0z0, x1y0z1, x1y1z0, x1y1z1 # so need to convert to a format which is convenient to do other computing. # for 2d boxes, format is clockwise start with minimum point # for 3d boxes, please draw lines by your hand. if ndim == 2: # generate clockwise box corners corners_norm = corners_norm[[0, 1, 3, 2]] elif ndim == 3: corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]] corners_norm = corners_norm - np.array(origin, dtype=dims.dtype) corners = dims.reshape([-1, 1, ndim]) * corners_norm.reshape( [1, 2**ndim, ndim]) return corners def rotation_3d_in_axis(points, angles, axis=0): """Rotate points in specific axis. Args: points (np.ndarray, shape=[N, point_size, 3]]): angles (np.ndarray, shape=[N]]): axis (int): Axis to rotate at. Returns: np.ndarray: Rotated points. """ # points: [N, point_size, 3] rot_sin = np.sin(angles) rot_cos = np.cos(angles) ones = np.ones_like(rot_cos) zeros = np.zeros_like(rot_cos) if axis == 1: rot_mat_T = np.stack([[rot_cos, zeros, -rot_sin], [zeros, ones, zeros], [rot_sin, zeros, rot_cos]]) elif axis == 2 or axis == -1: rot_mat_T = np.stack([[rot_cos, -rot_sin, zeros], [rot_sin, rot_cos, zeros], [zeros, zeros, ones]]) elif axis == 0: rot_mat_T = np.stack([[zeros, rot_cos, -rot_sin], [zeros, rot_sin, rot_cos], [ones, zeros, zeros]]) else: raise ValueError('axis should in range') return np.einsum('aij,jka->aik', points, rot_mat_T) def center_to_corner_box3d(centers, dims, angles=None, origin=(0.5, 1.0, 0.5), axis=1): """Convert kitti locations, dimensions and angles to corners. Args: centers (np.ndarray): Locations in kitti label file with shape (N, 3). dims (np.ndarray): Dimensions in kitti label file with shape (N, 3). angles (np.ndarray): Rotation_y in kitti label file with shape (N). origin (list or array or float): Origin point relate to smallest point. use (0.5, 1.0, 0.5) in camera and (0.5, 0.5, 0) in lidar. axis (int): Rotation axis. 1 for camera and 2 for lidar. Returns: np.ndarray: Corners with the shape of (N, 8, 3). 6 -------- 5 /| /| 2 -------- 1 . | | | | . 7 -------- 4 |/ |/ 3 -------- 0 """ # 'length' in kitti format is in x axis. # yzx(hwl)(kitti label file)<->xyz(lhw)(camera)<->z(-x)(-y)(wlh)(lidar) # center in kitti format is [0.5, 1.0, 0.5] in xyz. corners = corners_nd(dims, origin=origin) # corners: [N, 8, 3] if angles is not None: corners = rotation_3d_in_axis(corners, angles, axis=axis) corners += centers.reshape([-1, 1, 3]) return corners @numba.jit(nopython=True) def box2d_to_corner_jit(boxes): """Convert box2d to corner. Args: boxes (np.ndarray, shape=[N, 5]): Boxes2d with rotation. Returns: box_corners (np.ndarray, shape=[N, 4, 2]): Box corners. 2 ------ 3 / / 1 ------ 0 """ num_box = boxes.shape[0] corners_norm = np.zeros((4, 2), dtype=boxes.dtype) corners_norm[1, 1] = 1.0 corners_norm[2] = 1.0 corners_norm[3, 0] = 1.0 corners_norm -= np.array([0.5, 0.5], dtype=boxes.dtype) corners = boxes.reshape(num_box, 1, 5)[:, :, 2:4] * corners_norm.reshape( 1, 4, 2) rot_mat_T = np.zeros((2, 2), dtype=boxes.dtype) box_corners = np.zeros((num_box, 4, 2), dtype=boxes.dtype) for i in range(num_box): rot_sin = np.sin(boxes[i, -1]) rot_cos = np.cos(boxes[i, -1]) rot_mat_T[0, 0] = rot_cos rot_mat_T[0, 1] = -rot_sin rot_mat_T[1, 0] = rot_sin rot_mat_T[1, 1] = rot_cos box_corners[i] = corners[i] @ rot_mat_T + boxes[i, :2] return box_corners @numba.njit def corner_to_standup_nd_jit(boxes_corner): """Convert boxes_corner to aligned (min-max) boxes. Args: boxes_corner (np.ndarray, shape=[N, 2**dim, dim]): Boxes corners. Returns: np.ndarray, shape=[N, dim*2]: Aligned (min-max) boxes. """ num_boxes = boxes_corner.shape[0] ndim = boxes_corner.shape[-1] result = np.zeros((num_boxes, ndim * 2), dtype=boxes_corner.dtype) for i in range(num_boxes): for j in range(ndim): result[i, j] = np.min(boxes_corner[i, :, j]) for j in range(ndim): result[i, j + ndim] = np.max(boxes_corner[i, :, j]) return result @numba.jit(nopython=True) def corner_to_surfaces_3d_jit(corners): """Convert 3d box corners from corner function above to surfaces that normal vectors all direct to internal. Args: corners (np.ndarray): 3d box corners with the shape of (N, 8, 3). 6 -------- 5 /| /| 2 -------- 1 . | | | | . 7 -------- 4 |/ |/ 3 -------- 0 Returns: np.ndarray: Surfaces with the shape of (N, 6, 4, 3). """ # box_corners: [N, 8, 3], must from corner functions in this module num_boxes = corners.shape[0] surfaces = np.zeros((num_boxes, 6, 4, 3), dtype=corners.dtype) corner_idxes = np.array([ 0, 1, 2, 3, 7, 6, 5, 4, 0, 3, 7, 4, 1, 5, 6, 2, 0, 4, 5, 1, 3, 2, 6, 7 ]).reshape(6, 4) for i in range(num_boxes): for j in range(6): for k in range(4): surfaces[i, j, k] = corners[i, corner_idxes[j, k]] return surfaces def rotation_points_single_angle(points, angle, axis=0): """Rotate points with a single angle. Args: points (np.ndarray, shape=[N, 3]]): angles (np.ndarray, shape=[1]]): axis (int): Axis to rotate at. Returns: np.ndarray: Rotated points. """ # points: [N, 3] rot_sin = np.sin(angle) rot_cos = np.cos(angle) if axis == 1: rot_mat_T = np.array( [[rot_cos, 0, -rot_sin], [0, 1, 0], [rot_sin, 0, rot_cos]], dtype=points.dtype) elif axis == 2 or axis == -1: rot_mat_T = np.array( [[rot_cos, -rot_sin, 0], [rot_sin, rot_cos, 0], [0, 0, 1]], dtype=points.dtype) elif axis == 0: rot_mat_T = np.array( [[1, 0, 0], [0, rot_cos, -rot_sin], [0, rot_sin, rot_cos]], dtype=points.dtype) else: raise ValueError('axis should in range') return points @ rot_mat_T, rot_mat_T def corner_to_surfaces_3d(corners): """convert 3d box corners from corner function above to surfaces that normal vectors all direct to internal. Args: corners (np.ndarray): 3D box corners with shape of (N, 8, 3). Returns: np.ndarray: Surfaces with the shape of (N, 6, 4, 3). """ # box_corners: [N, 8, 3], must from corner functions in this module surfaces = np.array([ [corners[:, 0], corners[:, 1], corners[:, 2], corners[:, 3]], [corners[:, 7], corners[:, 6], corners[:, 5], corners[:, 4]], [corners[:, 0], corners[:, 3], corners[:, 7], corners[:, 4]], [corners[:, 1], corners[:, 5], corners[:, 6], corners[:, 2]], [corners[:, 0], corners[:, 4], corners[:, 5], corners[:, 1]], [corners[:, 3], corners[:, 2], corners[:, 6], corners[:, 7]], ]).transpose([2, 0, 1, 3]) return surfaces def surface_equ_3d(polygon_surfaces): """ Args: polygon_surfaces (np.ndarray): Polygon surfaces with shape of [num_polygon, max_num_surfaces, max_num_points_of_surface, 3]. All surfaces' normal vector must direct to internal. Max_num_points_of_surface must at least 3. Returns: tuple: normal vector and its direction. """ # return [a, b, c], d in ax+by+cz+d=0 # polygon_surfaces: [num_polygon, num_surfaces, num_points_of_polygon, 3] surface_vec = polygon_surfaces[:, :, :2, :] - \ polygon_surfaces[:, :, 1:3, :] # normal_vec: [..., 3] normal_vec = np.cross(surface_vec[:, :, 0, :], surface_vec[:, :, 1, :]) # print(normal_vec.shape, points[..., 0, :].shape) # d = -np.inner(normal_vec, points[..., 0, :]) d = np.einsum('aij, aij->ai', normal_vec, polygon_surfaces[:, :, 0, :]) return normal_vec, -d @numba.njit def _points_in_convex_polygon_3d_jit(points, polygon_surfaces, normal_vec, d, num_surfaces): """ Args: points (np.ndarray): Input points with shape of (num_points, 3). polygon_surfaces (np.ndarray): Polygon surfaces with shape of (num_polygon, max_num_surfaces, max_num_points_of_surface, 3). All surfaces' normal vector must direct to internal. Max_num_points_of_surface must at least 3. normal_vec (np.ndarray): Normal vector of polygon_surfaces. d (int): Directions of normal vector. num_surfaces (np.ndarray): Number of surfaces a polygon contains shape of (num_polygon). Returns: np.ndarray: Result matrix with the shape of [num_points, num_polygon]. """ max_num_surfaces, max_num_points_of_surface = polygon_surfaces.shape[1:3] num_points = points.shape[0] num_polygons = polygon_surfaces.shape[0] ret = np.ones((num_points, num_polygons), dtype=np.bool_) sign = 0.0 for i in range(num_points): for j in range(num_polygons): for k in range(max_num_surfaces): if k > num_surfaces[j]: break sign = ( points[i, 0] * normal_vec[j, k, 0] + points[i, 1] * normal_vec[j, k, 1] + points[i, 2] * normal_vec[j, k, 2] + d[j, k]) if sign >= 0: ret[i, j] = False break return ret def points_in_convex_polygon_3d_jit(points, polygon_surfaces, num_surfaces=None): """Check points is in 3d convex polygons. Args: points (np.ndarray): Input points with shape of (num_points, 3). polygon_surfaces (np.ndarray): Polygon surfaces with shape of \ (num_polygon, max_num_surfaces, max_num_points_of_surface, 3). \ All surfaces' normal vector must direct to internal. \ Max_num_points_of_surface must at least 3. num_surfaces (np.ndarray): Number of surfaces a polygon contains \ shape of (num_polygon). Returns: np.ndarray: Result matrix with the shape of [num_points, num_polygon]. """ max_num_surfaces, max_num_points_of_surface = polygon_surfaces.shape[1:3] # num_points = points.shape[0] num_polygons = polygon_surfaces.shape[0] if num_surfaces is None: num_surfaces = np.full((num_polygons, ), 9999999, dtype=np.int64) normal_vec, d = surface_equ_3d(polygon_surfaces[:, :, :3, :]) # normal_vec: [num_polygon, max_num_surfaces, 3] # d: [num_polygon, max_num_surfaces] return _points_in_convex_polygon_3d_jit(points, polygon_surfaces, normal_vec, d, num_surfaces) @numba.jit def points_in_convex_polygon_jit(points, polygon, clockwise=True): """Check points is in 2d convex polygons. True when point in polygon. Args: points (np.ndarray): Input points with the shape of [num_points, 2]. polygon (np.ndarray): Input polygon with the shape of [num_polygon, num_points_of_polygon, 2]. clockwise (bool): Indicate polygon is clockwise. Returns: np.ndarray: Result matrix with the shape of [num_points, num_polygon]. """ # first convert polygon to directed lines num_points_of_polygon = polygon.shape[1] num_points = points.shape[0] num_polygons = polygon.shape[0] # if clockwise: # vec1 = polygon - polygon[:, [num_points_of_polygon - 1] + # list(range(num_points_of_polygon - 1)), :] # else: # vec1 = polygon[:, [num_points_of_polygon - 1] + # list(range(num_points_of_polygon - 1)), :] - polygon # vec1: [num_polygon, num_points_of_polygon, 2] vec1 = np.zeros((2), dtype=polygon.dtype) ret = np.zeros((num_points, num_polygons), dtype=np.bool_) success = True cross = 0.0 for i in range(num_points): for j in range(num_polygons): success = True for k in range(num_points_of_polygon): if clockwise: vec1 = polygon[j, k] - polygon[j, k - 1] else: vec1 = polygon[j, k - 1] - polygon[j, k] cross = vec1[1] * (polygon[j, k, 0] - points[i, 0]) cross -= vec1[0] * (polygon[j, k, 1] - points[i, 1]) if cross >= 0: success = False break ret[i, j] = success return ret ================================================ FILE: pcdet/utils/box_utils.py ================================================ import numpy as np import scipy import torch from scipy.spatial import Delaunay from ..ops.roiaware_pool3d import roiaware_pool3d_utils from . import common_utils def in_hull(p, hull): """ :param p: (N, K) test points :param hull: (M, K) M corners of a box :return (N) bool """ try: if not isinstance(hull, Delaunay): hull = Delaunay(hull) flag = hull.find_simplex(p) >= 0 except scipy.spatial.qhull.QhullError: print('Warning: not a hull %s' % str(hull)) flag = np.zeros(p.shape[0], dtype=np.bool) return flag def boxes_to_corners_3d(boxes3d): """ 7 -------- 4 /| /| 6 -------- 5 . | | | | . 3 -------- 0 |/ |/ 2 -------- 1 Args: boxes3d: (N, 7) [x, y, z, dx, dy, dz, heading], (x, y, z) is the box center Returns: """ boxes3d, is_numpy = common_utils.check_numpy_to_torch(boxes3d) template = boxes3d.new_tensor(( [1, 1, -1], [1, -1, -1], [-1, -1, -1], [-1, 1, -1], [1, 1, 1], [1, -1, 1], [-1, -1, 1], [-1, 1, 1], )) / 2 corners3d = boxes3d[:, None, 3:6].repeat(1, 8, 1) * template[None, :, :] corners3d = common_utils.rotate_points_along_z(corners3d.view(-1, 8, 3), boxes3d[:, 6]).view(-1, 8, 3) corners3d += boxes3d[:, None, 0:3] return corners3d.numpy() if is_numpy else corners3d def mask_boxes_outside_range_numpy(boxes, limit_range, min_num_corners=1): """ Args: boxes: (N, 7) [x, y, z, dx, dy, dz, heading, ...], (x, y, z) is the box center limit_range: [minx, miny, minz, maxx, maxy, maxz] min_num_corners: Returns: """ if boxes.shape[1] > 7: boxes = boxes[:, 0:7] corners = boxes_to_corners_3d(boxes) # (N, 8, 3) mask = ((corners >= limit_range[0:3]) & (corners <= limit_range[3:6])).all(axis=2) mask = mask.sum(axis=1) >= min_num_corners # (N) return mask def remove_points_in_boxes3d(points, boxes3d): """ Args: points: (num_points, 3 + C) boxes3d: (N, 7) [x, y, z, dx, dy, dz, heading], (x, y, z) is the box center, each box DO NOT overlaps Returns: """ boxes3d, is_numpy = common_utils.check_numpy_to_torch(boxes3d) points, is_numpy = common_utils.check_numpy_to_torch(points) point_masks = roiaware_pool3d_utils.points_in_boxes_cpu(points[:, 0:3], boxes3d) points = points[point_masks.sum(dim=0) == 0] return points.numpy() if is_numpy else points def boxes3d_kitti_camera_to_lidar(boxes3d_camera, calib): """ Args: boxes3d_camera: (N, 7) [x, y, z, l, h, w, r] in rect camera coords calib: Returns: boxes3d_lidar: [x, y, z, dx, dy, dz, heading], (x, y, z) is the box center """ xyz_camera = boxes3d_camera[:, 0:3] l, h, w, r = boxes3d_camera[:, 3:4], boxes3d_camera[:, 4:5], boxes3d_camera[:, 5:6], boxes3d_camera[:, 6:7] xyz_lidar = calib.rect_to_lidar(xyz_camera) xyz_lidar[:, 2] += h[:, 0] / 2 return np.concatenate([xyz_lidar, l, w, h, -(r + np.pi / 2)], axis=-1) def boxes3d_kitti_fakelidar_to_lidar(boxes3d_lidar): """ Args: boxes3d_fakelidar: (N, 7) [x, y, z, w, l, h, r] in old LiDAR coordinates, z is bottom center Returns: boxes3d_lidar: [x, y, z, dx, dy, dz, heading], (x, y, z) is the box center """ w, l, h, r = boxes3d_lidar[:, 3:4], boxes3d_lidar[:, 4:5], boxes3d_lidar[:, 5:6], boxes3d_lidar[:, 6:7] boxes3d_lidar[:, 2] += h[:, 0] / 2 return np.concatenate([boxes3d_lidar[:, 0:3], l, w, h, -(r + np.pi / 2)], axis=-1) def boxes3d_kitti_lidar_to_fakelidar(boxes3d_lidar): """ Args: boxes3d_lidar: (N, 7) [x, y, z, dx, dy, dz, heading], (x, y, z) is the box center Returns: boxes3d_fakelidar: [x, y, z, w, l, h, r] in old LiDAR coordinates, z is bottom center """ dx, dy, dz, heading = boxes3d_lidar[:, 3:4], boxes3d_lidar[:, 4:5], boxes3d_lidar[:, 5:6], boxes3d_lidar[:, 6:7] boxes3d_lidar[:, 2] -= dz[:, 0] / 2 return np.concatenate([boxes3d_lidar[:, 0:3], dy, dx, dz, -heading - np.pi / 2], axis=-1) def enlarge_box3d(boxes3d, extra_width=(0, 0, 0)): """ Args: boxes3d: [x, y, z, dx, dy, dz, heading], (x, y, z) is the box center extra_width: [extra_x, extra_y, extra_z] Returns: """ boxes3d, is_numpy = common_utils.check_numpy_to_torch(boxes3d) large_boxes3d = boxes3d.clone() large_boxes3d[:, 3:6] += boxes3d.new_tensor(extra_width)[None, :] return large_boxes3d def boxes3d_lidar_to_kitti_camera(boxes3d_lidar, calib): """ :param boxes3d_lidar: (N, 7) [x, y, z, dx, dy, dz, heading], (x, y, z) is the box center :param calib: :return: boxes3d_camera: (N, 7) [x, y, z, l, h, w, r] in rect camera coords """ xyz_lidar = boxes3d_lidar[:, 0:3] l, w, h, r = boxes3d_lidar[:, 3:4], boxes3d_lidar[:, 4:5], boxes3d_lidar[:, 5:6], boxes3d_lidar[:, 6:7] xyz_lidar[:, 2] -= h.reshape(-1) / 2 xyz_cam = calib.lidar_to_rect(xyz_lidar) # xyz_cam[:, 1] += h.reshape(-1) / 2 r = -r - np.pi / 2 return np.concatenate([xyz_cam, l, h, w, r], axis=-1) def boxes3d_to_corners3d_kitti_camera(boxes3d, bottom_center=True): """ :param boxes3d: (N, 7) [x, y, z, l, h, w, ry] in camera coords, see the definition of ry in KITTI dataset :param bottom_center: whether y is on the bottom center of object :return: corners3d: (N, 8, 3) 7 -------- 4 /| /| 6 -------- 5 . | | | | . 3 -------- 0 |/ |/ 2 -------- 1 """ boxes_num = boxes3d.shape[0] l, h, w = boxes3d[:, 3], boxes3d[:, 4], boxes3d[:, 5] x_corners = np.array([l / 2., l / 2., -l / 2., -l / 2., l / 2., l / 2., -l / 2., -l / 2], dtype=np.float32).T z_corners = np.array([w / 2., -w / 2., -w / 2., w / 2., w / 2., -w / 2., -w / 2., w / 2.], dtype=np.float32).T if bottom_center: y_corners = np.zeros((boxes_num, 8), dtype=np.float32) y_corners[:, 4:8] = -h.reshape(boxes_num, 1).repeat(4, axis=1) # (N, 8) else: y_corners = np.array([h / 2., h / 2., h / 2., h / 2., -h / 2., -h / 2., -h / 2., -h / 2.], dtype=np.float32).T ry = boxes3d[:, 6] zeros, ones = np.zeros(ry.size, dtype=np.float32), np.ones(ry.size, dtype=np.float32) rot_list = np.array([[np.cos(ry), zeros, -np.sin(ry)], [zeros, ones, zeros], [np.sin(ry), zeros, np.cos(ry)]]) # (3, 3, N) R_list = np.transpose(rot_list, (2, 0, 1)) # (N, 3, 3) temp_corners = np.concatenate((x_corners.reshape(-1, 8, 1), y_corners.reshape(-1, 8, 1), z_corners.reshape(-1, 8, 1)), axis=2) # (N, 8, 3) rotated_corners = np.matmul(temp_corners, R_list) # (N, 8, 3) x_corners, y_corners, z_corners = rotated_corners[:, :, 0], rotated_corners[:, :, 1], rotated_corners[:, :, 2] x_loc, y_loc, z_loc = boxes3d[:, 0], boxes3d[:, 1], boxes3d[:, 2] x = x_loc.reshape(-1, 1) + x_corners.reshape(-1, 8) y = y_loc.reshape(-1, 1) + y_corners.reshape(-1, 8) z = z_loc.reshape(-1, 1) + z_corners.reshape(-1, 8) corners = np.concatenate((x.reshape(-1, 8, 1), y.reshape(-1, 8, 1), z.reshape(-1, 8, 1)), axis=2) return corners.astype(np.float32) def boxes3d_kitti_camera_to_imageboxes(boxes3d, calib, image_shape=None): """ :param boxes3d: (N, 7) [x, y, z, l, h, w, r] in rect camera coords :param calib: :return: box_2d_preds: (N, 4) [x1, y1, x2, y2] """ corners3d = boxes3d_to_corners3d_kitti_camera(boxes3d) pts_img, _ = calib.rect_to_img(corners3d.reshape(-1, 3)) corners_in_image = pts_img.reshape(-1, 8, 2) min_uv = np.min(corners_in_image, axis=1) # (N, 2) max_uv = np.max(corners_in_image, axis=1) # (N, 2) boxes2d_image = np.concatenate([min_uv, max_uv], axis=1) if image_shape is not None: boxes2d_image[:, 0] = np.clip(boxes2d_image[:, 0], a_min=0, a_max=image_shape[1] - 1) boxes2d_image[:, 1] = np.clip(boxes2d_image[:, 1], a_min=0, a_max=image_shape[0] - 1) boxes2d_image[:, 2] = np.clip(boxes2d_image[:, 2], a_min=0, a_max=image_shape[1] - 1) boxes2d_image[:, 3] = np.clip(boxes2d_image[:, 3], a_min=0, a_max=image_shape[0] - 1) return boxes2d_image def boxes_iou_normal(boxes_a, boxes_b): """ Args: boxes_a: (N, 4) [x1, y1, x2, y2] boxes_b: (M, 4) [x1, y1, x2, y2] Returns: """ assert boxes_a.shape[1] == boxes_b.shape[1] == 4 x_min = torch.max(boxes_a[:, 0, None], boxes_b[None, :, 0]) x_max = torch.min(boxes_a[:, 2, None], boxes_b[None, :, 2]) y_min = torch.max(boxes_a[:, 1, None], boxes_b[None, :, 1]) y_max = torch.min(boxes_a[:, 3, None], boxes_b[None, :, 3]) x_len = torch.clamp_min(x_max - x_min, min=0) y_len = torch.clamp_min(y_max - y_min, min=0) area_a = (boxes_a[:, 2] - boxes_a[:, 0]) * (boxes_a[:, 3] - boxes_a[:, 1]) area_b = (boxes_b[:, 2] - boxes_b[:, 0]) * (boxes_b[:, 3] - boxes_b[:, 1]) a_intersect_b = x_len * y_len iou = a_intersect_b / torch.clamp_min(area_a[:, None] + area_b[None, :] - a_intersect_b, min=1e-6) return iou def boxes3d_lidar_to_aligned_bev_boxes(boxes3d): """ Args: boxes3d: (N, 7 + C) [x, y, z, dx, dy, dz, heading] in lidar coordinate Returns: aligned_bev_boxes: (N, 4) [x1, y1, x2, y2] in the above lidar coordinate """ rot_angle = common_utils.limit_period(boxes3d[:, 6], offset=0.5, period=np.pi).abs() choose_dims = torch.where(rot_angle[:, None] < np.pi / 4, boxes3d[:, [3, 4]], boxes3d[:, [4, 3]]) aligned_bev_boxes = torch.cat((boxes3d[:, 0:2] - choose_dims / 2, boxes3d[:, 0:2] + choose_dims / 2), dim=1) return aligned_bev_boxes def boxes3d_nearest_bev_iou(boxes_a, boxes_b): """ Args: boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading] boxes_b: (N, 7) [x, y, z, dx, dy, dz, heading] Returns: """ boxes_bev_a = boxes3d_lidar_to_aligned_bev_boxes(boxes_a) boxes_bev_b = boxes3d_lidar_to_aligned_bev_boxes(boxes_b) return boxes_iou_normal(boxes_bev_a, boxes_bev_b) ================================================ FILE: pcdet/utils/calibration_kitti.py ================================================ import numpy as np import re import torch ''' def get_calib_from_file(calib_file): with open(calib_file) as f: lines = f.readlines() obj = lines[2].strip().split(' ')[1:] P2 = np.array(obj, dtype=np.float32) obj = lines[3].strip().split(' ')[1:] P3 = np.array(obj, dtype=np.float32) obj = lines[4].strip().split(' ')[1:] R0 = np.array(obj, dtype=np.float32) obj = lines[5].strip().split(' ')[1:] Tr_velo_to_cam = np.array(obj, dtype=np.float32) return {'P2': P2.reshape(3, 4), 'P3': P3.reshape(3, 4), 'R0': R0.reshape(3, 3), 'Tr_velo2cam': Tr_velo_to_cam.reshape(3, 4)} ''' def get_calib_from_file(filepath): ''' Read in a calibration file and parse into a dictionary. Ref: https://github.com/utiasSTARS/pykitti/blob/master/pykitti/utils.py ''' data2 = {} R0 = np.array([[ 0.99992624, 0.00965411, -0.0072371 ], [-0.00968531, 0.99994343, -0.00433077], [ 0.00719491, 0.00440054, 0.99996366]]) with open(filepath) as f: for line in f.readlines(): if line[:2] == "P2": P2 = re.split(" ", line.strip()) P2 = np.array(P2[-12:], np.float32) if line[:2] == "P3": P3 = re.split(" ", line.strip()) P3 = np.array(P3[-12:], np.float32) if line[:14] == "Tr_velo_to_cam" or line[:11] == "Tr_velo_cam": vtc_mat = re.split(" ", line.strip()) vtc_mat = np.array(vtc_mat[-12:], np.float32) if line[:7] == "R0_rect" or line[:6] == "R_rect": R0 = re.split(" ", line.strip()) R0 = np.array(R0[-9:], np.float32) data2["P2"]=P2.reshape(3, 4) data2["P3"]=P3.reshape(3, 4) data2["Tr_velo2cam"]=vtc_mat.reshape(3, 4) data2["R0"]=R0.reshape(3, 3) return data2 class Calibration(object): def __init__(self, calib_file): if not isinstance(calib_file, dict): calib = get_calib_from_file(calib_file) else: calib = calib_file self.P2 = calib['P2'] # 3 x 4 self.R0 = calib['R0'] # 3 x 3 self.V2C = calib['Tr_velo2cam'] # 3 x 4 # Camera intrinsics and extrinsics self.cu = self.P2[0, 2] self.cv = self.P2[1, 2] self.fu = self.P2[0, 0] self.fv = self.P2[1, 1] self.tx = self.P2[0, 3] / (-self.fu) self.ty = self.P2[1, 3] / (-self.fv) def cart_to_hom(self, pts): """ :param pts: (N, 3 or 2) :return pts_hom: (N, 4 or 3) """ pts_hom = np.hstack((pts, np.ones((pts.shape[0], 1), dtype=np.float32))) return pts_hom def cart_to_hom_cuda(self, pts): """ :param pts: (N, 3 or 2) :return pts_hom: (N, 4 or 3) Nx3 """ pts_hom = torch.cat([pts, torch.ones((pts.shape[0], 1)).to(pts.device)], -1) return pts_hom def rect_to_lidar(self, pts_rect): """ :param pts_lidar: (N, 3) :return pts_rect: (N, 3) """ pts_rect_hom = self.cart_to_hom(pts_rect) # (N, 4) R0_ext = np.hstack((self.R0, np.zeros((3, 1), dtype=np.float32))) # (3, 4) R0_ext = np.vstack((R0_ext, np.zeros((1, 4), dtype=np.float32))) # (4, 4) R0_ext[3, 3] = 1 V2C_ext = np.vstack((self.V2C, np.zeros((1, 4), dtype=np.float32))) # (4, 4) V2C_ext[3, 3] = 1 pts_lidar = np.dot(pts_rect_hom, np.linalg.inv(np.dot(R0_ext, V2C_ext).T)) return pts_lidar[:, 0:3] def lidar_to_rect(self, pts_lidar): """ :param pts_lidar: (N, 3) :return pts_rect: (N, 3) """ pts_lidar_hom = self.cart_to_hom(pts_lidar) pts_rect = np.dot(pts_lidar_hom, np.dot(self.V2C.T, self.R0.T)) # pts_rect = reduce(np.dot, (pts_lidar_hom, self.V2C.T, self.R0.T)) return pts_rect def lidar_to_rect_cuda(self, pts_lidar): """ :param pts_lidar: (N, 3) :return pts_rect: (N, 3) """ pts_lidar_hom = self.cart_to_hom_cuda(pts_lidar) V2C = torch.from_numpy(self.V2C.T).to(pts_lidar.device) R0 = torch.from_numpy(self.R0.T).to(pts_lidar.device) pts_rect = torch.matmul(pts_lidar_hom, torch.matmul(V2C, R0)) # pts_rect = reduce(np.dot, (pts_lidar_hom, self.V2C.T, self.R0.T)) return pts_rect def rect_to_img(self, pts_rect): """ :param pts_rect: (N, 3) :return pts_img: (N, 2) """ pts_rect_hom = self.cart_to_hom(pts_rect) pts_2d_hom = np.dot(pts_rect_hom, self.P2.T) pts_img = (pts_2d_hom[:, 0:2].T / pts_rect_hom[:, 2]).T # (N, 2) pts_rect_depth = pts_2d_hom[:, 2] - self.P2.T[3, 2] # depth in rect camera coord return pts_img, pts_rect_depth def rect_to_img_cuda(self, pts_rect): """ :param pts_rect: (N, 3) :return pts_img: (N, 2) """ pts_rect_hom = self.cart_to_hom_cuda(pts_rect) P2 = torch.from_numpy(self.P2.T).to(pts_rect.device) pts_2d_hom = torch.matmul(pts_rect_hom, P2) pts_img = (pts_2d_hom[:, 0:2].T / pts_rect_hom[:, 2]).T # (N, 2) pts_rect_depth = pts_2d_hom[:, 2] - P2[3, 2] # depth in rect camera coord return pts_img, pts_rect_depth def lidar_to_img(self, pts_lidar): """ :param pts_lidar: (N, 3) :return pts_img: (N, 2) """ pts_rect = self.lidar_to_rect(pts_lidar) pts_img, pts_depth = self.rect_to_img(pts_rect) return pts_img, pts_depth def img_to_rect(self, u, v, depth_rect): """ :param u: (N) :param v: (N) :param depth_rect: (N) :return: """ x = ((u - self.cu) * depth_rect) / self.fu + self.tx y = ((v - self.cv) * depth_rect) / self.fv + self.ty pts_rect = np.concatenate((x.reshape(-1, 1), y.reshape(-1, 1), depth_rect.reshape(-1, 1)), axis=1) return pts_rect def corners3d_to_img_boxes(self, corners3d): """ :param corners3d: (N, 8, 3) corners in rect coordinate :return: boxes: (None, 4) [x1, y1, x2, y2] in rgb coordinate :return: boxes_corner: (None, 8) [xi, yi] in rgb coordinate """ sample_num = corners3d.shape[0] corners3d_hom = np.concatenate((corners3d, np.ones((sample_num, 8, 1))), axis=2) # (N, 8, 4) img_pts = np.matmul(corners3d_hom, self.P2.T) # (N, 8, 3) x, y = img_pts[:, :, 0] / img_pts[:, :, 2], img_pts[:, :, 1] / img_pts[:, :, 2] x1, y1 = np.min(x, axis=1), np.min(y, axis=1) x2, y2 = np.max(x, axis=1), np.max(y, axis=1) boxes = np.concatenate((x1.reshape(-1, 1), y1.reshape(-1, 1), x2.reshape(-1, 1), y2.reshape(-1, 1)), axis=1) boxes_corner = np.concatenate((x.reshape(-1, 8, 1), y.reshape(-1, 8, 1)), axis=2) return boxes, boxes_corner ================================================ FILE: pcdet/utils/common_utils.py ================================================ import logging import os import pickle import random import shutil import subprocess import numpy as np import torch import torch.distributed as dist import torch.multiprocessing as mp def check_numpy_to_torch(x): if isinstance(x, np.ndarray): return torch.from_numpy(x).float(), True return x, False def limit_period(val, offset=0.5, period=np.pi): val, is_numpy = check_numpy_to_torch(val) ans = val - torch.floor(val / period + offset) * period return ans.numpy() if is_numpy else ans def drop_info_with_name(info, name): ret_info = {} keep_indices = [i for i, x in enumerate(info['name']) if x != name] for key in info.keys(): ret_info[key] = info[key][keep_indices] return ret_info def rotate_points_along_z(points, angle): """ Args: points: (B, N, 3 + C) angle: (B), angle along z-axis, angle increases x ==> y Returns: """ points, is_numpy = check_numpy_to_torch(points) angle, _ = check_numpy_to_torch(angle) cosa = torch.cos(angle) sina = torch.sin(angle) zeros = angle.new_zeros(points.shape[0]) ones = angle.new_ones(points.shape[0]) rot_matrix = torch.stack(( cosa, sina, zeros, -sina, cosa, zeros, zeros, zeros, ones ), dim=1).view(-1, 3, 3).float() points_rot = torch.matmul(points[:, :, 0:3], rot_matrix) points_rot = torch.cat((points_rot, points[:, :, 3:]), dim=-1) return points_rot.numpy() if is_numpy else points_rot def mask_points_by_range(points, limit_range): mask = (points[:, 0] >= limit_range[0]) & (points[:, 0] <= limit_range[3]) \ & (points[:, 1] >= limit_range[1]) & (points[:, 1] <= limit_range[4]) return mask def get_voxel_centers(voxel_coords, downsample_times, voxel_size, point_cloud_range): """ Args: voxel_coords: (N, 3) downsample_times: voxel_size: point_cloud_range: Returns: """ assert voxel_coords.shape[1] == 3 voxel_centers = voxel_coords[:, [2, 1, 0]].float() # (xyz) voxel_size = torch.tensor(voxel_size, device=voxel_centers.device).float() * downsample_times pc_range = torch.tensor(point_cloud_range[0:3], device=voxel_centers.device).float() voxel_centers = (voxel_centers + 0.5) * voxel_size + pc_range return voxel_centers def create_logger(log_file=None, rank=0, log_level=logging.INFO): logger = logging.getLogger(__name__) logger.setLevel(log_level if rank == 0 else 'ERROR') formatter = logging.Formatter('%(asctime)s %(levelname)5s %(message)s') console = logging.StreamHandler() console.setLevel(log_level if rank == 0 else 'ERROR') console.setFormatter(formatter) logger.addHandler(console) if log_file is not None: file_handler = logging.FileHandler(filename=log_file) file_handler.setLevel(log_level if rank == 0 else 'ERROR') file_handler.setFormatter(formatter) logger.addHandler(file_handler) return logger def set_random_seed(seed): random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False def keep_arrays_by_name(gt_names, used_classes): inds = [i for i, x in enumerate(gt_names) if x in used_classes] inds = np.array(inds, dtype=np.int64) return inds def init_dist_slurm(tcp_port, local_rank, backend='nccl'): """ modified from https://github.com/open-mmlab/mmdetection Args: tcp_port: backend: Returns: """ proc_id = int(os.environ['SLURM_PROCID']) ntasks = int(os.environ['SLURM_NTASKS']) node_list = os.environ['SLURM_NODELIST'] num_gpus = torch.cuda.device_count() torch.cuda.set_device(proc_id % num_gpus) addr = subprocess.getoutput('scontrol show hostname {} | head -n1'.format(node_list)) os.environ['MASTER_PORT'] = str(tcp_port) os.environ['MASTER_ADDR'] = addr os.environ['WORLD_SIZE'] = str(ntasks) os.environ['RANK'] = str(proc_id) dist.init_process_group(backend=backend) total_gpus = dist.get_world_size() rank = dist.get_rank() return total_gpus, rank def init_dist_pytorch(tcp_port, local_rank, backend='nccl'): if mp.get_start_method(allow_none=True) is None: mp.set_start_method('spawn') num_gpus = torch.cuda.device_count() torch.cuda.set_device(local_rank % num_gpus) dist.init_process_group( backend=backend, init_method='tcp://127.0.0.1:%d' % tcp_port, rank=local_rank, world_size=num_gpus ) rank = dist.get_rank() return num_gpus, rank def get_dist_info(): if torch.__version__ < '1.0': initialized = dist._initialized else: if dist.is_available(): initialized = dist.is_initialized() else: initialized = False if initialized: rank = dist.get_rank() world_size = dist.get_world_size() else: rank = 0 world_size = 1 return rank, world_size def merge_results_dist(result_part, size, tmpdir): rank, world_size = get_dist_info() os.makedirs(tmpdir, exist_ok=True) dist.barrier() pickle.dump(result_part, open(os.path.join(tmpdir, 'result_part_{}.pkl'.format(rank)), 'wb')) dist.barrier() if rank != 0: return None part_list = [] for i in range(world_size): part_file = os.path.join(tmpdir, 'result_part_{}.pkl'.format(i)) part_list.append(pickle.load(open(part_file, 'rb'))) ordered_results = [] for res in zip(*part_list): ordered_results.extend(list(res)) ordered_results = ordered_results[:size] shutil.rmtree(tmpdir) return ordered_results ================================================ FILE: pcdet/utils/commu_utils.py ================================================ """ This file contains primitives for multi-gpu communication. This is useful when doing distributed training. deeply borrow from maskrcnn-benchmark and ST3D """ import pickle import time import torch import torch.distributed as dist def get_world_size(): if not dist.is_available(): return 1 if not dist.is_initialized(): return 1 return dist.get_world_size() def get_rank(): if not dist.is_available(): return 0 if not dist.is_initialized(): return 0 return dist.get_rank() def is_main_process(): return get_rank() == 0 def synchronize(): """ Helper function to synchronize (barrier) among all processes when using distributed training """ if not dist.is_available(): return if not dist.is_initialized(): return world_size = dist.get_world_size() if world_size == 1: return dist.barrier() def all_gather(data): """ Run all_gather on arbitrary picklable data (not necessarily tensors) Args: data: any picklable object Returns: list[data]: list of data gathered from each rank """ world_size = get_world_size() if world_size == 1: return [data] # serialized to a Tensor origin_size = None if not isinstance(data, torch.Tensor): buffer = pickle.dumps(data) storage = torch.ByteStorage.from_buffer(buffer) tensor = torch.ByteTensor(storage).to("cuda") else: origin_size = data.size() tensor = data.reshape(-1) tensor_type = tensor.dtype # obtain Tensor size of each rank local_size = torch.LongTensor([tensor.numel()]).to("cuda") size_list = [torch.LongTensor([0]).to("cuda") for _ in range(world_size)] dist.all_gather(size_list, local_size) size_list = [int(size.item()) for size in size_list] max_size = max(size_list) # receiving Tensor from all ranks # we pad the tensor because torch all_gather does not support # gathering tensors of different shapes tensor_list = [] for _ in size_list: tensor_list.append(torch.FloatTensor(size=(max_size,)).cuda().to(tensor_type)) if local_size != max_size: padding = torch.FloatTensor(size=(max_size - local_size,)).cuda().to(tensor_type) tensor = torch.cat((tensor, padding), dim=0) dist.all_gather(tensor_list, tensor) data_list = [] for size, tensor in zip(size_list, tensor_list): if origin_size is None: buffer = tensor.cpu().numpy().tobytes()[:size] data_list.append(pickle.loads(buffer)) else: buffer = tensor[:size] data_list.append(buffer) if origin_size is not None: new_shape = [-1] + list(origin_size[1:]) resized_list = [] for data in data_list: # suppose the difference of tensor size exist in first dimension data = data.reshape(new_shape) resized_list.append(data) return resized_list else: return data_list def reduce_dict(input_dict, average=True): """ Args: input_dict (dict): all the values will be reduced average (bool): whether to do average or sum Reduce the values in the dictionary from all processes so that process with rank 0 has the averaged results. Returns a dict with the same fields as input_dict, after reduction. """ world_size = get_world_size() if world_size < 2: return input_dict with torch.no_grad(): names = [] values = [] # sort the keys so that they are consistent across processes for k in sorted(input_dict.keys()): names.append(k) values.append(input_dict[k]) values = torch.stack(values, dim=0) dist.reduce(values, dst=0) if dist.get_rank() == 0 and average: # only main process gets accumulated, so only divide by # world_size in this case values /= world_size reduced_dict = {k: v for k, v in zip(names, values)} return reduced_dict def average_reduce_value(data): data_list = all_gather(data) return sum(data_list) / len(data_list) def all_reduce(data, op="sum", average=False): def op_map(op): op_dict = { "SUM": dist.ReduceOp.SUM, "MAX": dist.ReduceOp.MAX, "MIN": dist.ReduceOp.MIN, "PRODUCT": dist.ReduceOp.PRODUCT, } return op_dict[op] world_size = get_world_size() if world_size > 1: reduced_data = data.clone() dist.all_reduce(reduced_data, op=op_map(op.upper())) if average: assert op.upper() == 'SUM' return reduced_data / world_size else: return reduced_data return data @torch.no_grad() def concat_all_gather(tensor): """ Performs all_gather operation on the provided tensors. *** Warning ***: torch.distributed.all_gather has no gradient. """ tensors_gather = [torch.ones_like(tensor) for _ in range(torch.distributed.get_world_size())] torch.distributed.all_gather(tensors_gather, tensor, async_op=False) output = torch.cat(tensors_gather, dim=0) return output ================================================ FILE: pcdet/utils/loss_utils.py ================================================ import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from . import box_utils class SigmoidFocalClassificationLoss(nn.Module): """ Sigmoid focal cross entropy loss. """ def __init__(self, gamma: float = 2.0, alpha: float = 0.25): """ Args: gamma: Weighting parameter to balance loss for hard and easy examples. alpha: Weighting parameter to balance loss for positive and negative examples. """ super(SigmoidFocalClassificationLoss, self).__init__() self.alpha = alpha self.gamma = gamma @staticmethod def sigmoid_cross_entropy_with_logits(input: torch.Tensor, target: torch.Tensor): """ PyTorch Implementation for tf.nn.sigmoid_cross_entropy_with_logits: max(x, 0) - x * z + log(1 + exp(-abs(x))) in https://www.tensorflow.org/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits Args: input: (B, #anchors, #classes) float tensor. Predicted logits for each class target: (B, #anchors, #classes) float tensor. One-hot encoded classification targets Returns: loss: (B, #anchors, #classes) float tensor. Sigmoid cross entropy loss without reduction """ loss = torch.clamp(input, min=0) - input * target + \ torch.log1p(torch.exp(-torch.abs(input))) return loss def forward(self, input: torch.Tensor, target: torch.Tensor, weights: torch.Tensor): """ Args: input: (B, #anchors, #classes) float tensor. Predicted logits for each class target: (B, #anchors, #classes) float tensor. One-hot encoded classification targets weights: (B, #anchors) float tensor. Anchor-wise weights. Returns: weighted_loss: (B, #anchors, #classes) float tensor after weighting. """ pred_sigmoid = torch.sigmoid(input) alpha_weight = target * self.alpha + (1 - target) * (1 - self.alpha) pt = target * (1.0 - pred_sigmoid) + (1.0 - target) * pred_sigmoid focal_weight = alpha_weight * torch.pow(pt, self.gamma) bce_loss = self.sigmoid_cross_entropy_with_logits(input, target) loss = focal_weight * bce_loss if weights.shape.__len__() == 2 or \ (weights.shape.__len__() == 1 and target.shape.__len__() == 2): weights = weights.unsqueeze(-1) assert weights.shape.__len__() == loss.shape.__len__() return loss * weights class WeightedSmoothL1Loss(nn.Module): """ Code-wise Weighted Smooth L1 Loss modified based on fvcore.nn.smooth_l1_loss https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/smooth_l1_loss.py | 0.5 * x ** 2 / beta if abs(x) < beta smoothl1(x) = | | abs(x) - 0.5 * beta otherwise, where x = input - target. """ def __init__(self, beta: float = 1.0 / 9.0, code_weights: list = None): """ Args: beta: Scalar float. L1 to L2 change point. For beta values < 1e-5, L1 loss is computed. code_weights: (#codes) float list if not None. Code-wise weights. """ super(WeightedSmoothL1Loss, self).__init__() self.beta = beta if code_weights is not None: self.code_weights = np.array(code_weights, dtype=np.float32) self.code_weights = torch.from_numpy(self.code_weights).cuda() @staticmethod def smooth_l1_loss(diff, beta): if beta < 1e-5: loss = torch.abs(diff) else: n = torch.abs(diff) loss = torch.where(n < beta, 0.5 * n ** 2 / beta, n - 0.5 * beta) return loss def forward(self, input: torch.Tensor, target: torch.Tensor, weights: torch.Tensor = None): """ Args: input: (B, #anchors, #codes) float tensor. Ecoded predicted locations of objects. target: (B, #anchors, #codes) float tensor. Regression targets. weights: (B, #anchors) float tensor if not None. Returns: loss: (B, #anchors) float tensor. Weighted smooth l1 loss without reduction. """ target = torch.where(torch.isnan(target), input, target) # ignore nan targets diff = input - target # code-wise weighting if self.code_weights is not None: diff = diff * self.code_weights.view(1, 1, -1) loss = self.smooth_l1_loss(diff, self.beta) # anchor-wise weighting if weights is not None: assert weights.shape[0] == loss.shape[0] and weights.shape[1] == loss.shape[1] loss = loss * weights.unsqueeze(-1) return loss class WeightedL1Loss(nn.Module): def __init__(self, code_weights: list = None): """ Args: code_weights: (#codes) float list if not None. Code-wise weights. """ super(WeightedL1Loss, self).__init__() if code_weights is not None: self.code_weights = np.array(code_weights, dtype=np.float32) self.code_weights = torch.from_numpy(self.code_weights).cuda() def forward(self, input: torch.Tensor, target: torch.Tensor, weights: torch.Tensor = None): """ Args: input: (B, #anchors, #codes) float tensor. Ecoded predicted locations of objects. target: (B, #anchors, #codes) float tensor. Regression targets. weights: (B, #anchors) float tensor if not None. Returns: loss: (B, #anchors) float tensor. Weighted smooth l1 loss without reduction. """ target = torch.where(torch.isnan(target), input, target) # ignore nan targets diff = input - target # code-wise weighting if self.code_weights is not None: diff = diff * self.code_weights.view(1, 1, -1) loss = torch.abs(diff) # anchor-wise weighting if weights is not None: assert weights.shape[0] == loss.shape[0] and weights.shape[1] == loss.shape[1] loss = loss * weights.unsqueeze(-1) return loss class WeightedCrossEntropyLoss(nn.Module): """ Transform input to fit the fomation of PyTorch offical cross entropy loss with anchor-wise weighting. """ def __init__(self): super(WeightedCrossEntropyLoss, self).__init__() def forward(self, input: torch.Tensor, target: torch.Tensor, weights: torch.Tensor): """ Args: input: (B, #anchors, #classes) float tensor. Predited logits for each class. target: (B, #anchors, #classes) float tensor. One-hot classification targets. weights: (B, #anchors) float tensor. Anchor-wise weights. Returns: loss: (B, #anchors) float tensor. Weighted cross entropy loss without reduction """ input = input.permute(0, 2, 1) target = target.argmax(dim=-1) loss = F.cross_entropy(input, target, reduction='none') * weights return loss def get_corner_loss_lidar(pred_bbox3d: torch.Tensor, gt_bbox3d: torch.Tensor): """ Args: pred_bbox3d: (N, 7) float Tensor. gt_bbox3d: (N, 7) float Tensor. Returns: corner_loss: (N) float Tensor. """ assert pred_bbox3d.shape[0] == gt_bbox3d.shape[0] pred_box_corners = box_utils.boxes_to_corners_3d(pred_bbox3d) gt_box_corners = box_utils.boxes_to_corners_3d(gt_bbox3d) gt_bbox3d_flip = gt_bbox3d.clone() gt_bbox3d_flip[:, 6] += np.pi gt_box_corners_flip = box_utils.boxes_to_corners_3d(gt_bbox3d_flip) # (N, 8) corner_dist = torch.min(torch.norm(pred_box_corners - gt_box_corners, dim=2), torch.norm(pred_box_corners - gt_box_corners_flip, dim=2)) # (N, 8) corner_loss = WeightedSmoothL1Loss.smooth_l1_loss(corner_dist, beta=1.0) return corner_loss.mean(dim=1) def compute_fg_mask(gt_boxes2d, shape, downsample_factor=1, device=torch.device("cpu")): """ Compute foreground mask for images Args: gt_boxes2d: (B, N, 4), 2D box labels shape: torch.Size or tuple, Foreground mask desired shape downsample_factor: int, Downsample factor for image device: torch.device, Foreground mask desired device Returns: fg_mask (shape), Foreground mask """ fg_mask = torch.zeros(shape, dtype=torch.bool, device=device) # Set box corners gt_boxes2d /= downsample_factor gt_boxes2d[:, :, :2] = torch.floor(gt_boxes2d[:, :, :2]) gt_boxes2d[:, :, 2:] = torch.ceil(gt_boxes2d[:, :, 2:]) gt_boxes2d = gt_boxes2d.long() # Set all values within each box to True B, N = gt_boxes2d.shape[:2] for b in range(B): for n in range(N): u1, v1, u2, v2 = gt_boxes2d[b, n] fg_mask[b, v1:v2, u1:u2] = True return fg_mask def neg_loss_cornernet(pred, gt, mask=None): """ Refer to https://github.com/tianweiy/CenterPoint. Modified focal loss. Exactly the same as CornerNet. Runs faster and costs a little bit more memory Args: pred: (batch x c x h x w) gt: (batch x c x h x w) mask: (batch x h x w) Returns: """ pos_inds = gt.eq(1).float() neg_inds = gt.lt(1).float() neg_weights = torch.pow(1 - gt, 4) loss = 0 pos_loss = torch.log(pred) * torch.pow(1 - pred, 2) * pos_inds neg_loss = torch.log(1 - pred) * torch.pow(pred, 2) * neg_weights * neg_inds if mask is not None: mask = mask[:, None, :, :].float() pos_loss = pos_loss * mask neg_loss = neg_loss * mask num_pos = (pos_inds.float() * mask).sum() else: num_pos = pos_inds.float().sum() pos_loss = pos_loss.sum() neg_loss = neg_loss.sum() if num_pos == 0: loss = loss - neg_loss else: loss = loss - (pos_loss + neg_loss) / num_pos return loss class FocalLossCenterNet(nn.Module): """ Refer to https://github.com/tianweiy/CenterPoint """ def __init__(self): super(FocalLossCenterNet, self).__init__() self.neg_loss = neg_loss_cornernet def forward(self, out, target, mask=None): return self.neg_loss(out, target, mask=mask) def _reg_loss(regr, gt_regr, mask): """ Refer to https://github.com/tianweiy/CenterPoint L1 regression loss Args: regr (batch x max_objects x dim) gt_regr (batch x max_objects x dim) mask (batch x max_objects) Returns: """ num = mask.float().sum() mask = mask.unsqueeze(2).expand_as(gt_regr).float() isnotnan = (~ torch.isnan(gt_regr)).float() mask *= isnotnan regr = regr * mask gt_regr = gt_regr * mask loss = torch.abs(regr - gt_regr) loss = loss.transpose(2, 0) loss = torch.sum(loss, dim=2) loss = torch.sum(loss, dim=1) # else: # # D x M x B # loss = loss.reshape(loss.shape[0], -1) # loss = loss / (num + 1e-4) loss = loss / torch.clamp_min(num, min=1.0) # import pdb; pdb.set_trace() return loss def _gather_feat(feat, ind, mask=None): dim = feat.size(2) ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) feat = feat.gather(1, ind) if mask is not None: mask = mask.unsqueeze(2).expand_as(feat) feat = feat[mask] feat = feat.view(-1, dim) return feat def _transpose_and_gather_feat(feat, ind): feat = feat.permute(0, 2, 3, 1).contiguous() feat = feat.view(feat.size(0), -1, feat.size(3)) feat = _gather_feat(feat, ind) return feat class RegLossCenterNet(nn.Module): """ Refer to https://github.com/tianweiy/CenterPoint """ def __init__(self): super(RegLossCenterNet, self).__init__() def forward(self, output, mask, ind=None, target=None): """ Args: output: (batch x dim x h x w) or (batch x max_objects) mask: (batch x max_objects) ind: (batch x max_objects) target: (batch x max_objects x dim) Returns: """ if ind is None: pred = output else: pred = _transpose_and_gather_feat(output, ind) loss = _reg_loss(pred, target, mask) return loss ================================================ FILE: pcdet/utils/object3d_kitti.py ================================================ import numpy as np def get_objects_from_label(label_file): with open(label_file, 'r') as f: lines = f.readlines() objects = [Object3d(line) for line in lines] if len(objects) == 0: return [Object3d('DontCare -1 -1 -4.0061 0.0000 198.4733 416.3764 373.0000 1.5332 1.6821 4.2322 -2.7611 1.6843 4.1515 -4.5719')] return objects def get_objects_from_tracking_label(label_file): objects = [Object3d(line) for line in label_file] return objects def cls_type_to_id(cls_type): type_to_id = {'Car': 1, 'Pedestrian': 2, 'Cyclist': 3, 'Van': 4} if cls_type not in type_to_id.keys(): return -1 return type_to_id[cls_type] class Object3d(object): def __init__(self, line): label = line.strip().split(' ') self.src = line self.cls_type = label[0] self.cls_id = cls_type_to_id(self.cls_type) self.truncation = float(label[1]) self.occlusion = float(label[2]) # 0:fully visible 1:partly occluded 2:largely occluded 3:unknown self.alpha = float(label[3]) self.box2d = np.array((float(label[4]), float(label[5]), float(label[6]), float(label[7])), dtype=np.float32) self.h = float(label[8]) self.w = float(label[9]) self.l = float(label[10]) self.loc = np.array((float(label[11]), float(label[12]), float(label[13])), dtype=np.float32) self.dis_to_cam = np.linalg.norm(self.loc) self.ry = float(label[14]) self.score = float(label[15]) if label.__len__() == 16 else -1.0 self.level_str = None self.ob_id = -1 if len(label)>15: self.ob_id=label[-1] self.level = self.get_kitti_tracking_obj_level() else: self.level = self.get_kitti_obj_level() def get_kitti_obj_level(self): height = float(self.box2d[3]) - float(self.box2d[1]) + 1 if height >= 40 and self.truncation <= 0.15 and self.occlusion <= 0: self.level_str = 'Easy' return 0 # Easy elif height >= 25 and self.truncation <= 0.3 and self.occlusion <= 1: self.level_str = 'Moderate' return 1 # Moderate elif height >= 25 and self.truncation <= 0.5 and self.occlusion <= 2: self.level_str = 'Hard' return 2 # Hard else: self.level_str = 'UnKnown' return -1 def get_kitti_tracking_obj_level(self): height = float(self.box2d[3]) - float(self.box2d[1]) + 1 if height >= 40 and self.truncation <= 0 and self.occlusion <= 0: self.level_str = 'Easy' return 0 # Easy elif height >= 25 and self.truncation <= 1 and self.occlusion <= 1: self.level_str = 'Moderate' return 1 # Moderate elif height >= 25 and self.truncation <= 2 and self.occlusion <= 2: self.level_str = 'Hard' return 2 # Hard else: self.level_str = 'UnKnown' return -1 def generate_corners3d(self): """ generate corners3d representation for this object :return corners_3d: (8, 3) corners of box3d in camera coord """ l, h, w = self.l, self.h, self.w x_corners = [l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2] y_corners = [0, 0, 0, 0, -h, -h, -h, -h] z_corners = [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2] R = np.array([[np.cos(self.ry), 0, np.sin(self.ry)], [0, 1, 0], [-np.sin(self.ry), 0, np.cos(self.ry)]]) corners3d = np.vstack([x_corners, y_corners, z_corners]) # (3, 8) corners3d = np.dot(R, corners3d).T corners3d = corners3d + self.loc return corners3d def to_str(self): print_str = '%s %.3f %.3f %.3f box2d: %s hwl: [%.3f %.3f %.3f] pos: %s ry: %.3f' \ % (self.cls_type, self.truncation, self.occlusion, self.alpha, self.box2d, self.h, self.w, self.l, self.loc, self.ry) return print_str def to_kitti_format(self): kitti_str = '%s %.2f %d %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f' \ % (self.cls_type, self.truncation, int(self.occlusion), self.alpha, self.box2d[0], self.box2d[1], self.box2d[2], self.box2d[3], self.h, self.w, self.l, self.loc[0], self.loc[1], self.loc[2], self.ry) return kitti_str ================================================ FILE: pcdet/utils/odiou_loss.py ================================================ ### Compute the IOU of two rotated 2D rectangle import math import numpy as np import sys import random import torch from torch.autograd import Function import torch.nn as nn # from compute_ious import compute_ious_whih_shapely from scipy.spatial import ConvexHull ## This function is used to determine whether a point is inside a rectangle or not class compute_vertex(Function): ''' Compute the corners which are inside the rectangles ''' @staticmethod def forward(ctx, corners_gboxes, corners_qboxes): np_corners_gboxes = corners_gboxes.cpu().numpy() np_corners_qboxes = corners_qboxes.cpu().detach().numpy() N = corners_gboxes.shape[0] num_of_intersections = np.zeros((N,), dtype=np.int32) intersections = np.zeros((N, 16), dtype=np.float32) flags_qboxes = np.zeros((N, 4), dtype=np.float32) flags_gboxes = np.zeros((N, 4), dtype=np.float32) flags_inters = np.zeros((N, 4, 4), dtype=np.float32) for iter in range(N): # step 1: determine how many corners from corners_gboxes inside the np_qboxes ab0 = np_corners_qboxes[iter, 2] - np_corners_qboxes[iter, 0] ab1 = np_corners_qboxes[iter, 3] - np_corners_qboxes[iter, 1] ad0 = np_corners_qboxes[iter, 6] - np_corners_qboxes[iter, 0] ad1 = np_corners_qboxes[iter, 7] - np_corners_qboxes[iter, 1] for i in range(4): ap0 = np_corners_gboxes[iter, i * 2] - np_corners_qboxes[iter, 0] ap1 = np_corners_gboxes[iter, i * 2 + 1] - np_corners_qboxes[iter, 1] abab = ab0 * ab0 + ab1 * ab1 abap = ab0 * ap0 + ab1 * ap1 adad = ad0 * ad0 + ad1 * ad1 adap = ad0 * ap0 + ad1 * ap1 if (abab >= abap and abap >= 0 and adad >= adap and adap >= 0): intersections[iter, num_of_intersections[iter] * 2] = np_corners_gboxes[iter, i * 2] intersections[iter, num_of_intersections[iter] * 2 + 1] = np_corners_gboxes[iter, i * 2 + 1] num_of_intersections[iter] += 1 flags_gboxes[iter, i] = 1.0 # step 2: determine how many corners from np_qboxes inside corners_gboxes ab0 = np_corners_gboxes[iter, 2] - np_corners_gboxes[iter, 0] ab1 = np_corners_gboxes[iter, 3] - np_corners_gboxes[iter, 1] ad0 = np_corners_gboxes[iter, 6] - np_corners_gboxes[iter, 0] ad1 = np_corners_gboxes[iter, 7] - np_corners_gboxes[iter, 1] for i in range(4): ap0 = np_corners_qboxes[iter, i * 2] - np_corners_gboxes[iter, 0] ap1 = np_corners_qboxes[iter, i * 2 + 1] - np_corners_gboxes[iter, 1] abab = ab0 * ab0 + ab1 * ab1 abap = ab0 * ap0 + ab1 * ap1 adad = ad0 * ad0 + ad1 * ad1 adap = ad0 * ap0 + ad1 * ap1 if (abab >= abap and abap >= 0 and adad >= adap and adap >= 0): intersections[iter, num_of_intersections[iter] * 2] = np_corners_qboxes[iter, i * 2] intersections[iter, num_of_intersections[iter] * 2 + 1] = np_corners_qboxes[iter, i * 2 + 1] num_of_intersections[iter] += 1 flags_qboxes[iter, i] = 1.0 # step 3: find the intersection of all the edges for i in range(4): for j in range(4): A = np.zeros((2,), dtype=np.float32) B = np.zeros((2,), dtype=np.float32) C = np.zeros((2,), dtype=np.float32) D = np.zeros((2,), dtype=np.float32) A[0] = np_corners_gboxes[iter, 2 * i] A[1] = np_corners_gboxes[iter, 2 * i + 1] B[0] = np_corners_gboxes[iter, 2 * ((i + 1) % 4)] B[1] = np_corners_gboxes[iter, 2 * ((i + 1) % 4) + 1] C[0] = np_corners_qboxes[iter, 2 * j] C[1] = np_corners_qboxes[iter, 2 * j + 1] D[0] = np_corners_qboxes[iter, 2 * ((j + 1) % 4)] D[1] = np_corners_qboxes[iter, 2 * ((j + 1) % 4) + 1] BA0 = B[0] - A[0] BA1 = B[1] - A[1] CA0 = C[0] - A[0] CA1 = C[1] - A[1] DA0 = D[0] - A[0] DA1 = D[1] - A[1] acd = DA1 * CA0 > CA1 * DA0 bcd = (D[1] - B[1]) * (C[0] - B[0]) > (C[1] - B[1]) * (D[0] - B[0]) if acd != bcd: abc = CA1 * BA0 > BA1 * CA0 abd = DA1 * BA0 > BA1 * DA0 if abc != abd: DC0 = D[0] - C[0] DC1 = D[1] - C[1] ABBA = A[0] * B[1] - B[0] * A[1] CDDC = C[0] * D[1] - D[0] * C[1] DH = BA1 * DC0 - BA0 * DC1 Dx = ABBA * DC0 - BA0 * CDDC Dy = ABBA * DC1 - BA1 * CDDC # DH = (B[1] - A[1]) * (D[0] - C[0]) - (B[0] - A[0]) * (D[1] - C[1]) # Dx = (A[0] * B[1] - B[0] * A[1]) * (D[0] - C[0]) - (B[0] - A[0]) * (C[0] * D[1] - D[0] * C[1]) # Dy = (A[0] * B[1] - B[0] * A[1]) * (D[1] - C[1]) - (B[1] - A[1]) * (C[0] * D[1] - D[0] * C[1]) if (num_of_intersections[iter] > 7): print("iter = ", iter) print("(%.4f %.4f) (%.4f %.4f) (%.4f %.4f) (%.4f %.4f)" % ( np_corners_gboxes[iter, 0], np_corners_gboxes[iter, 1], np_corners_gboxes[iter, 2], np_corners_gboxes[iter, 3], np_corners_gboxes[iter, 4], np_corners_gboxes[iter, 5], np_corners_gboxes[iter, 6], np_corners_gboxes[iter, 7])) print("(%.4f %.4f) (%.4f %.4f) (%.4f %.4f) (%.4f %.4f)" % ( np_corners_qboxes[iter, 0], np_corners_qboxes[iter, 1], np_corners_qboxes[iter, 2], np_corners_qboxes[iter, 3], np_corners_qboxes[iter, 4], np_corners_qboxes[iter, 5], np_corners_qboxes[iter, 6], np_corners_qboxes[iter, 7])) continue intersections[iter, num_of_intersections[iter] * 2] = Dx / DH intersections[iter, num_of_intersections[iter] * 2 + 1] = Dy / DH num_of_intersections[iter] += 1 flags_inters[iter, i, j] = 1.0 ctx.save_for_backward(corners_qboxes) ctx.corners_gboxes = corners_gboxes ctx.flags_qboxes = flags_qboxes ctx.flags_gboxes = flags_gboxes ctx.flags_inters = flags_inters # conver numpy to tensor tensor_intersections = torch.from_numpy(intersections) tensor_num_of_intersections = torch.from_numpy(num_of_intersections) return tensor_intersections, tensor_num_of_intersections.detach() @staticmethod def backward(ctx, *grad_outputs): _variables = ctx.saved_tensors corners_qboxes = _variables[0] corners_gboxes = ctx.corners_gboxes flags_qboxes = ctx.flags_qboxes flags_gboxes = ctx.flags_gboxes flags_inters = ctx.flags_inters grad_output = grad_outputs[0] np_corners_gboxes = corners_gboxes.cpu().numpy() np_corners_qboxes = corners_qboxes.cpu().detach().numpy() N = flags_qboxes.shape[0] n_of_inter = np.zeros((N,), dtype=np.int32) ### Check whether here is correct or not Jacbian_qboxes = np.zeros((N, 8, 16), dtype=np.float32) Jacbian_gboxes = np.zeros((N, 8, 16), dtype=np.float32) for iter in range(N): for i in range(4): if (flags_gboxes[iter, i] > 0): Jacbian_gboxes[iter, i * 2, n_of_inter[iter] * 2] += 1.0 Jacbian_gboxes[iter, i * 2 + 1, n_of_inter[iter] * 2 + 1] += 1.0 n_of_inter[iter] += 1 for i in range(4): if (flags_qboxes[iter, i] > 0): Jacbian_qboxes[iter, i * 2, n_of_inter[iter] * 2] += 1.0 Jacbian_qboxes[iter, i * 2 + 1, n_of_inter[iter] * 2 + 1] += 1.0 n_of_inter[iter] += 1 for i in range(4): for j in range(4): if (flags_inters[iter, i, j] > 0): ### A = np.zeros((2,), dtype=np.float32) B = np.zeros((2,), dtype=np.float32) C = np.zeros((2,), dtype=np.float32) D = np.zeros((2,), dtype=np.float32) A[0] = np_corners_gboxes[iter, 2 * i] A[1] = np_corners_gboxes[iter, 2 * i + 1] B[0] = np_corners_gboxes[iter, 2 * ((i + 1) % 4)] B[1] = np_corners_gboxes[iter, 2 * ((i + 1) % 4) + 1] C[0] = np_corners_qboxes[iter, 2 * j] C[1] = np_corners_qboxes[iter, 2 * j + 1] D[0] = np_corners_qboxes[iter, 2 * ((j + 1) % 4)] D[1] = np_corners_qboxes[iter, 2 * ((j + 1) % 4) + 1] BA0 = B[0] - A[0] BA1 = B[1] - A[1] CA0 = C[0] - A[0] CA1 = C[1] - A[1] DA0 = D[0] - A[0] DA1 = D[1] - A[1] acd = DA1 * CA0 > CA1 * DA0 bcd = (D[1] - B[1]) * (C[0] - B[0]) > (C[1] - B[1]) * (D[0] - B[0]) if acd != bcd: abc = CA1 * BA0 > BA1 * CA0 abd = DA1 * BA0 > BA1 * DA0 if abc != abd: DC0 = D[0] - C[0] DC1 = D[1] - C[1] ABBA = A[0] * B[1] - B[0] * A[1] CDDC = C[0] * D[1] - D[0] * C[1] DH = BA1 * DC0 - BA0 * DC1 Dx = ABBA * DC0 - BA0 * CDDC Dy = ABBA * DC1 - BA1 * CDDC # DH = (B[1] - A[1]) * (D[0] - C[0]) - (B[0] - A[0]) * (D[1] - C[1]) # Dx = (A[0] * B[1] - B[0] * A[1]) * (D[0] - C[0]) - (B[0] - A[0]) * (C[0] * D[1] - D[0] * C[1]) det_DxA0 = B[1] * (D[0] - C[0]) + (C[0] * D[1] - D[0] * C[1]) det_DxA1 = - B[0] * (D[0] - C[0]) det_DxB0 = - A[1] * (D[0] - C[0]) - (C[0] * D[1] - D[0] * C[1]) det_DxB1 = A[0] * (D[0] - C[0]) det_DxC0 = - (A[0] * B[1] - B[0] * A[1]) - (B[0] - A[0]) * D[1] det_DxC1 = (B[0] - A[0]) * D[0] det_DxD0 = (A[0] * B[1] - B[0] * A[1]) + (B[0] - A[0]) * C[1] det_DxD1 = -(B[0] - A[0]) * C[0] # Dy = (A[0] * B[1] - B[0] * A[1]) * (D[1] - C[1]) - (B[1] - A[1]) * (C[0] * D[1] - D[0] * C[1]) det_DyA0 = B[1] * (D[1] - C[1]) det_DyA1 = - B[0] * (D[1] - C[1]) + (C[0] * D[1] - D[0] * C[1]) det_DyB0 = - A[1] * (D[1] - C[1]) det_DyB1 = A[0] * (D[1] - C[1]) - (C[0] * D[1] - D[0] * C[1]) det_DyC0 = - (B[1] - A[1]) * D[1] det_DyC1 = - (A[0] * B[1] - B[0] * A[1]) + (B[1] - A[1]) * D[0] det_DyD0 = (B[1] - A[1]) * C[1] det_DyD1 = (A[0] * B[1] - B[0] * A[1]) - (B[1] - A[1]) * C[0] # DH = (B[1] - A[1]) * (D[0] - C[0]) - (B[0] - A[0]) * (D[1] - C[1]) det_DHA0 = (D[1] - C[1]) det_DHA1 = - (D[0] - C[0]) det_DHB0 = - (D[1] - C[1]) det_DHB1 = (D[0] - C[0]) det_DHC0 = - (B[1] - A[1]) det_DHC1 = (B[0] - A[0]) det_DHD0 = (B[1] - A[1]) det_DHD1 = - (B[0] - A[0]) DHDH = DH * DH Jacbian_gboxes[iter, i * 2, n_of_inter[iter] * 2] += (det_DxA0 * DH - Dx * det_DHA0) / DHDH Jacbian_gboxes[iter, i * 2, n_of_inter[iter] * 2 + 1] += (det_DyA0 * DH - Dy * det_DHA0) / DHDH Jacbian_gboxes[iter, i * 2 + 1, n_of_inter[iter] * 2] += (det_DxA1 * DH - Dx * det_DHA1) / DHDH Jacbian_gboxes[iter, i * 2 + 1, n_of_inter[iter] * 2 + 1] += (det_DyA1 * DH - Dy * det_DHA1) / DHDH Jacbian_gboxes[iter, 2 * ((i + 1) % 4), n_of_inter[iter] * 2] += (det_DxB0 * DH - Dx * det_DHB0) / DHDH Jacbian_gboxes[iter, 2 * ((i + 1) % 4), n_of_inter[iter] * 2 + 1] += (det_DyB0 * DH - Dy * det_DHB0) / DHDH Jacbian_gboxes[iter, 2 * ((i + 1) % 4) + 1, n_of_inter[iter] * 2] += (det_DxB1 * DH - Dx * det_DHB1) / DHDH Jacbian_gboxes[iter, 2 * ((i + 1) % 4) + 1, n_of_inter[iter] * 2 + 1] += (det_DyB1 * DH - Dy * det_DHB1) / DHDH Jacbian_qboxes[iter, j * 2, n_of_inter[iter] * 2] += (det_DxC0 * DH - Dx * det_DHC0) / DHDH Jacbian_qboxes[iter, j * 2, n_of_inter[iter] * 2 + 1] += (det_DyC0 * DH - Dy * det_DHC0) / DHDH Jacbian_qboxes[iter, j * 2 + 1, n_of_inter[iter] * 2] += (det_DxC1 * DH - Dx * det_DHC1) / DHDH Jacbian_qboxes[iter, j * 2 + 1, n_of_inter[iter] * 2 + 1] += (det_DyC1 * DH - Dy * det_DHC1) / DHDH Jacbian_qboxes[iter, 2 * ((j + 1) % 4), n_of_inter[iter] * 2] += (det_DxD0 * DH - Dx * det_DHD0) / DHDH Jacbian_qboxes[iter, 2 * ((j + 1) % 4), n_of_inter[iter] * 2 + 1] += (det_DyD0 * DH - Dy * det_DHD0) / DHDH Jacbian_qboxes[iter, 2 * ((j + 1) % 4) + 1, n_of_inter[iter] * 2] += (det_DxD1 * DH - Dx * det_DHD1) / DHDH Jacbian_qboxes[iter, 2 * ((j + 1) % 4) + 1, n_of_inter[iter] * 2 + 1] += (det_DyD1 * DH - Dy * det_DHD1) / DHDH n_of_inter[iter] += 1 tensor_Jacbian_gboxes = torch.from_numpy(Jacbian_gboxes).to(torch.device(corners_qboxes.device)) tensor_Jacbian_qboxes = torch.from_numpy(Jacbian_qboxes).to(torch.device(corners_qboxes.device)) grad_output_cuda = grad_output.to(torch.device(corners_qboxes.device)) # print("grad_output_cuda =", grad_output_cuda.shape) tensor_grad_corners_gboxes = tensor_Jacbian_gboxes.matmul(grad_output_cuda.unsqueeze(2)).squeeze(2) tensor_grad_corners_qboxes = tensor_Jacbian_qboxes.matmul(grad_output_cuda.unsqueeze(2)).squeeze(2) return tensor_grad_corners_gboxes, tensor_grad_corners_qboxes class sort_vertex(Function): @staticmethod def forward(ctx, int_pts, num_of_inter): np_int_pts = int_pts.detach().numpy() #np_num_of_inter = num_of_inter.detach().numpy() np_num_of_inter = num_of_inter N = int_pts.shape[0] np_sorted_indexs = np.zeros((N, 8), dtype=np.int32) sorted_int_pts = np.zeros((N, 16), dtype=np.float32) for iter in range(N): if np_num_of_inter[iter] > 0: center = np.zeros((2,), dtype=np.float32) for i in range(np_num_of_inter[iter]): center[0] += np_int_pts[iter, 2 * i] center[1] += np_int_pts[iter, 2 * i + 1] center[0] /= np_num_of_inter[iter].float() center[1] /= np_num_of_inter[iter].float() angle = np.zeros((8,), dtype=np.float32) v = np.zeros((2,), dtype=np.float32) for i in range(np_num_of_inter[iter]): v[0] = np_int_pts[iter, 2 * i] - center[0] v[1] = np_int_pts[iter, 2 * i + 1] - center[1] d = math.sqrt(v[0] * v[0] + v[1] * v[1]) v[0] = v[0] / d v[1] = v[1] / d anglei = math.atan2(v[1], v[0]) if anglei < 0: angle[i] = anglei + 2 * 3.1415926 else: angle[i] = anglei # sort angles with descending np_sorted_indexs[iter, :] = np.argsort(-angle) for i in range(np_num_of_inter[iter]): sorted_int_pts[iter, 2 * i] = np_int_pts[iter, 2 * np_sorted_indexs[iter, i]] sorted_int_pts[iter, 2 * i + 1] = np_int_pts[iter, 2 * np_sorted_indexs[iter, i] + 1] # conver numpy to tensor ctx.save_for_backward(int_pts, num_of_inter) ctx.np_sorted_indexs = np_sorted_indexs tensor_sorted_int_pts = torch.from_numpy(sorted_int_pts) return tensor_sorted_int_pts @staticmethod def backward(ctx, grad_output): int_pts, num_of_inter = ctx.saved_tensors np_sorted_indexs = ctx.np_sorted_indexs N = int_pts.shape[0] Jacbian_int_pts = np.zeros((N, 16, 16), dtype=np.float32) for iter in range(N): for i in range(num_of_inter[iter]): Jacbian_int_pts[iter, 2 * np_sorted_indexs[iter, i], 2 * i] = 1 Jacbian_int_pts[iter, 2 * np_sorted_indexs[iter, i] + 1, 2 * i + 1] = 1 tensor_Jacbian_int_pts = torch.from_numpy(Jacbian_int_pts).to(torch.device(int_pts.device)) grad_output_cuda = grad_output.to(torch.device(int_pts.device)) tensor_grad_int_pts = tensor_Jacbian_int_pts.matmul(grad_output_cuda.unsqueeze(2)).squeeze(2) # todo: my second addtion # my_add_1 = torch.zeros(tensor_grad_int_pts.shape[0], dtype=torch.float32) return tensor_grad_int_pts, None class area_polygon(Function): @staticmethod def forward(ctx, int_pts, num_of_inter): ctx.save_for_backward(int_pts, num_of_inter) np_int_pts = int_pts.detach().numpy() #np_num_of_inter = num_of_inter.detach().numpy() np_num_of_inter = num_of_inter N = int_pts.shape[0] areas = np.zeros((N,), dtype=np.float32) for iter in range(N): for i in range(np_num_of_inter[iter] - 2): p1 = np_int_pts[iter, 0:2] p2 = np_int_pts[iter, 2 * i + 2:2 * i + 4] p3 = np_int_pts[iter, 2 * i + 4:2 * i + 6] areas[iter] += abs(((p1[0] - p3[0]) * (p2[1] - p3[1]) - (p1[1] - p3[1]) * (p2[0] - p3[0])) / 2.0) tensor_areas = torch.from_numpy(areas) return tensor_areas @staticmethod def backward(ctx, *grad_outputs): int_pts, num_of_inter = ctx.saved_tensors np_int_pts = int_pts.detach().numpy() np_num_of_inter = num_of_inter.detach().numpy() grad_output0 = grad_outputs[0] N = int_pts.shape[0] grad_int_pts = np.zeros((N, 16), dtype=np.float32) for iter in range(N): if (np_num_of_inter[iter] > 2): for i in range(np_num_of_inter[iter]): if i == 0: for j in range(np_num_of_inter[iter] - 2): p1 = np_int_pts[iter, 0:2] p2 = np_int_pts[iter, 2 * j + 2:2 * j + 4] p3 = np_int_pts[iter, 2 * j + 4:2 * j + 6] if ((p1[0] - p3[0]) * (p2[1] - p3[1]) - (p1[1] - p3[1]) * (p2[0] - p3[0])) > 0: grad_int_pts[iter, 0] += (p2[1] - p3[1]) * grad_output0[iter] * 0.5 grad_int_pts[iter, 1] += -(p2[0] - p3[0]) * grad_output0[iter] * 0.5 else: grad_int_pts[iter, 0] += -(p2[1] - p3[1]) * grad_output0[iter] * 0.5 grad_int_pts[iter, 1] += (p2[0] - p3[0]) * grad_output0[iter] * 0.5 elif i == 1: p1 = np_int_pts[iter, 0:2] p2 = np_int_pts[iter, 2:4] p3 = np_int_pts[iter, 4:6] if ((p1[0] - p3[0]) * (p2[1] - p3[1]) - (p1[1] - p3[1]) * (p2[0] - p3[0])) > 0: grad_int_pts[iter, 2] = -(p1[1] - p3[1]) * grad_output0[iter] * 0.5 grad_int_pts[iter, 3] = (p1[0] - p3[0]) * grad_output0[iter] * 0.5 else: grad_int_pts[iter, 2] = (p1[1] - p3[1]) * grad_output0[iter] * 0.5 grad_int_pts[iter, 3] = -(p1[0] - p3[0]) * grad_output0[iter] * 0.5 elif i == np_num_of_inter[iter] - 1: p1 = np_int_pts[iter, 2 * (np_num_of_inter[iter] - 2):2 * (np_num_of_inter[iter] - 1)] p2 = np_int_pts[iter, 2 * (np_num_of_inter[iter] - 1):2 * (np_num_of_inter[iter])] p3 = np_int_pts[iter, 0:2] if ((p1[0] - p3[0]) * (p2[1] - p3[1]) - (p1[1] - p3[1]) * (p2[0] - p3[0])) > 0: grad_int_pts[iter, 2 * (np_num_of_inter[iter] - 1)] = - (p1[1] - p3[1]) * grad_output0[ iter] * 0.5 grad_int_pts[iter, 2 * np_num_of_inter[iter] - 1] = (p1[0] - p3[0]) * grad_output0[ iter] * 0.5 else: grad_int_pts[iter, 2 * (np_num_of_inter[iter] - 1)] = (p1[1] - p3[1]) * grad_output0[ iter] * 0.5 grad_int_pts[iter, 2 * np_num_of_inter[iter] - 1] = - (p1[0] - p3[0]) * grad_output0[ iter] * 0.5 else: p1 = np_int_pts[iter, 0:2] p2 = np_int_pts[iter, 2 * i - 2: 2 * i] p3 = np_int_pts[iter, 2 * i: 2 * i + 2] if ((p1[0] - p3[0]) * (p2[1] - p3[1]) - (p1[1] - p3[1]) * (p2[0] - p3[0])) > 0: grad_int_pts[iter, i * 2] += (- (p2[1] - p3[1]) + (p1[1] - p3[1])) * grad_output0[ iter] * 0.5 grad_int_pts[iter, i * 2 + 1] += (- (p1[0] - p3[0]) + (p2[0] - p3[0])) * grad_output0[ iter] * 0.5 else: grad_int_pts[iter, i * 2] += ((p2[1] - p3[1]) - (p1[1] - p3[1])) * grad_output0[iter] * 0.5 grad_int_pts[iter, i * 2 + 1] += ((p1[0] - p3[0]) - (p2[0] - p3[0])) * grad_output0[ iter] * 0.5 p1 = np_int_pts[iter, 0:2] p2 = np_int_pts[iter, 2 * i: 2 * i + 2] p3 = np_int_pts[iter, 2 * i + 2: 2 * i + 4] if ((p1[0] - p3[0]) * (p2[1] - p3[1]) - (p1[1] - p3[1]) * (p2[0] - p3[0])) > 0: grad_int_pts[iter, i * 2] += - (p1[1] - p3[1]) * grad_output0[iter] * 0.5 grad_int_pts[iter, i * 2 + 1] += (p1[0] - p3[0]) * grad_output0[iter] * 0.5 else: grad_int_pts[iter, i * 2] += (p1[1] - p3[1]) * grad_output0[iter] * 0.5 grad_int_pts[iter, i * 2 + 1] += -(p1[0] - p3[0]) * grad_output0[iter] * 0.5 tensor_grad_int_pts = torch.from_numpy(grad_int_pts) # todo: my first addition. # my_add_0 = torch.zeros(tensor_grad_int_pts.shape[0], dtype=torch.float32) #print("area_polygon backward") return tensor_grad_int_pts, None ## Transform the (cx, cy, w, l, theta) representation to 4 corners representation class rbbox_to_corners(nn.Module): def _init_(self, rbbox): super(rbbox_to_corners, self)._init_() self.rbbox = rbbox return def forward(ctx, rbbox): ''' There is no rotation performed here. As axis are aligned. ^ [y] 1 --------- 2 / / ---> 0 -------- 3 [x] Each node has the coordinate of [x, y]. Corresponding the order of input. Output: [N, 8] [x_0, y_0, x_1, y_1, x_2, y_2, x_3, y_3], if ry > 0, then rotate clockwisely. ''' assert rbbox.shape[1] == 5 device = rbbox.device corners = torch.zeros((rbbox.shape[0], 8), dtype=torch.float32, device=device) dxcos = rbbox[:, 2].mul(torch.cos(rbbox[:, 4])) / 2.0 dxsin = rbbox[:, 2].mul(torch.sin(rbbox[:, 4])) / 2.0 dycos = rbbox[:, 3].mul(torch.cos(rbbox[:, 4])) / 2.0 dysin = rbbox[:, 3].mul(torch.sin(rbbox[:, 4])) / 2.0 corners[:, 0] = -dxcos - dysin + rbbox[:, 0] corners[:, 1] = dxsin - dycos + rbbox[:, 1] corners[:, 2] = -dxcos + dysin + rbbox[:, 0] corners[:, 3] = dxsin + dycos + rbbox[:, 1] corners[:, 4] = dxcos + dysin + rbbox[:, 0] corners[:, 5] = -dxsin + dycos + rbbox[:, 1] corners[:, 6] = dxcos - dysin + rbbox[:, 0] corners[:, 7] = -dxsin - dycos + rbbox[:, 1] return corners class rinter_area_compute(nn.Module): def _init_(self, corners_gboxes, corners_qboxes): super(rinter_area_compute, self)._init_() self.corners_gboxes = corners_gboxes self.corners_qboxes = corners_qboxes return def forward(ctx, corners_gboxes, corners_qboxes): intersections, num_of_intersections = compute_vertex(corners_gboxes, corners_qboxes) num_of_intersections = num_of_intersections.detach() sorted_int_pts = sort_vertex(intersections, num_of_intersections) # x = sorted_int_pts.clone() # x[0, 4:6] = sorted_int_pts[0, 6:8] # x[0, 6:8] = sorted_int_pts[0, 4:6] inter_area = area_polygon(sorted_int_pts, num_of_intersections) return inter_area class find_convex_hull(Function): # get the minimum bounding box from a set of points, points are reordered with a anti-clockwise order. # and those points inside the minimum bbox are removed. @staticmethod def forward(ctx, corners): np_corners = corners.cpu().detach().numpy() hull = ConvexHull(np_corners) M = hull.nsimplex index = hull.vertices hull_points_2d = np.zeros((M, 2), np.float32) for i in range(M): hull_points_2d[i, 0] = np_corners[index[i], 0] hull_points_2d[i, 1] = np_corners[index[i], 1] tensor_hull_points_2d = torch.from_numpy(hull_points_2d).to(torch.device(corners.device)) ctx.index = index return tensor_hull_points_2d @staticmethod def backward(ctx, *grad_outputs): index = ctx.index grad_output0 = grad_outputs[0] device = grad_output0.device grad_corners = torch.zeros((8, 2), dtype=torch.float32, device=device) for i in range(len(index)): grad_corners[index[i], 0] = grad_output0[i, 0] grad_corners[index[i], 1] = grad_output0[i, 1] return grad_corners ## nn Module class mbr_convex_hull(nn.Module): ''' Miminum Bounding Rectangle (MBR) Algorithm core: The orientation of the MBR is the same as the one of one of the edges of the point cloud convex hull, which means the result rectangle must overlap with at least one of the edges. ''' def _init_(self, hull_points_2d): super(mbr_convex_hull, self)._init_() self.hull_points_2d = hull_points_2d return def forward(ctx, hull_points_2d): device = hull_points_2d.device N = hull_points_2d.shape[0] edges = hull_points_2d[1:N, :].add(- hull_points_2d[0:N - 1, :]) edge_angles = torch.atan2(edges[:, 1], edges[:, 0]) edge_angles = torch.fmod(edge_angles, 3.1415926 / 2.0) edge_angles = torch.abs(edge_angles) # edge_angles = torch.unique(edge_angles) # print("edge_angles =", edge_angles) a = torch.stack((torch.cos(edge_angles), torch.cos(edge_angles - 3.1415926 / 2.0)), 1) a = torch.unsqueeze(a, 1) b = torch.stack((torch.cos(edge_angles + 3.1415926 / 2.0), torch.cos(edge_angles)), 1) b = torch.unsqueeze(b, 1) R_tensor = torch.cat((a, b), 1) hull_points_2d_ = torch.unsqueeze(torch.transpose(hull_points_2d, 0, 1), 0) rot_points = R_tensor.matmul(hull_points_2d_) min_x = torch.min(rot_points, 2)[0] max_x = torch.max(rot_points, 2)[0] areas = (max_x[:, 0] - min_x[:, 0]).mul(max_x[:, 1] - min_x[:, 1]) return torch.min(areas) class mbr_area_compute(nn.Module): # get the minimum bounding box from a set of points def _init_(self, corners): super(mbr_area_compute, self)._init_() self.corners = corners return def forward(ctx, corners): # np_corners = corners.numpy() N = corners.shape[0] # mbr_rect_areas = torch.zeros((N,), dtype=torch.float32) mbr_rect_area = [] for i in range(N): mbr_rect_area.append(torch.zeros((1,), dtype=torch.float32, device=corners.device)) # mbr_rect_areas = torch.zeros((N,), dtype=torch.float32, device = corners_gboxes.device) for iter in range(N): convex_hull_pts = find_convex_hull(corners[iter, :, :].squeeze()) mbr_convex_hull_object = mbr_convex_hull() mbr_rect_area[iter] = mbr_convex_hull_object(convex_hull_pts) mbr_rect_areas = torch.stack(mbr_rect_area) # torch.cat(mbr_rect_area) # ctx.save_for_backward(corners) return mbr_rect_areas class mbr_diag_convex_hull(nn.Module): ''' # added by zhengwu ''' def _init_(self, hull_points_2d): super(mbr_diag_convex_hull, self)._init_() self.hull_points_2d = hull_points_2d return def forward(ctx, hull_points_2d): device = hull_points_2d.device N = hull_points_2d.shape[0] edges = hull_points_2d[1:N, :].add(- hull_points_2d[0:N - 1, :]) edge_angles = torch.atan2(edges[:, 1], edges[:, 0]) edge_angles = torch.fmod(edge_angles, 3.1415926 / 2.0) edge_angles = torch.abs(edge_angles) # edge_angles = torch.unique(edge_angles) # print("edge_angles =", edge_angles) a = torch.stack((torch.cos(edge_angles), torch.cos(edge_angles - 3.1415926 / 2.0)), 1) a = torch.unsqueeze(a, 1) b = torch.stack((torch.cos(edge_angles + 3.1415926 / 2.0), torch.cos(edge_angles)), 1) b = torch.unsqueeze(b, 1) R_tensor = torch.cat((a, b), 1) hull_points_2d_ = torch.unsqueeze(torch.transpose(hull_points_2d, 0, 1), 0) rot_points = R_tensor.matmul(hull_points_2d_) min_x = torch.min(rot_points, 2)[0] max_x = torch.max(rot_points, 2)[0] areas = (max_x[:, 0] - min_x[:, 0]).mul(max_x[:, 1] - min_x[:, 1]) # modified here min_index = torch.argmin(areas) corner_max, corner_min = max_x[min_index], min_x[min_index] diag = torch.sqrt((corner_max[0] - corner_min[0]) ** 2 + (corner_max[1] - corner_min[1]) ** 2) return diag class mbr_diag_compute(nn.Module): # added by zhengwu def _init_(self, corners): super(mbr_diag_compute, self)._init_() self.corners = corners return def forward(ctx, corners): N = corners.shape[0] mbr_rect_diag = [] for iter in range(N): convex_hull_pts = find_convex_hull(corners[iter, :, :].squeeze()) mbr_diag_convex_hull_object = mbr_diag_convex_hull() mbr_rect_diag.append(mbr_diag_convex_hull_object(convex_hull_pts)) mbr_rect_diags = torch.stack(mbr_rect_diag) return mbr_rect_diags class _second_box_decode_operation(nn.Module): """box decode for VoxelNet in lidar Args: boxes ([N, 7] Tensor): normal boxes: x, y, z, w, l, h, r anchors ([N, 7] Tensor): anchors """ # need to convert box_encodings to z-bottom format def _init_(self, box_encodings, anchors, encode_angle_to_vector, smooth_dim): super(_second_box_decode_operation, self)._init_() self.box_encodings = box_encodings self.anchors = anchors self.encode_angle_to_vector = False self.smooth_dim = False return def forward(ctx, box_encodings, anchors, encode_angle_to_vector, smooth_dim): """box decode for VoxelNet in lidar Args: boxes ([N, 7] Tensor): normal boxes: x, y, z, w, l, h, r anchors ([N, 7] Tensor): anchors """ xa, ya, za, wa, la, ha, ra = torch.split(anchors, 1, dim=-1) if encode_angle_to_vector: xt, yt, zt, wt, lt, ht, rtx, rty = torch.split(box_encodings, 1, dim=-1) else: xt, yt, zt, wt, lt, ht, rt = torch.split(box_encodings, 1, dim=-1) # xt, yt, zt, wt, lt, ht, rt = torch.split(box_encodings, 1, dim=-1) za = za + ha / 2. diagonal = torch.sqrt(la ** 2 + wa ** 2) xg = xt * diagonal + xa yg = yt * diagonal + ya zg = zt * ha + za if smooth_dim: lg = (lt + 1) * la wg = (wt + 1) * wa hg = (ht + 1) * ha else: lg = torch.exp(lt) * la wg = torch.exp(wt) * wa hg = torch.exp(ht) * ha if encode_angle_to_vector: rax = torch.cos(ra) ray = torch.sin(ra) rgx = rtx + rax rgy = rty + ray rg = torch.atan2(rgy, rgx) else: rg = rt + ra zg = zg - hg / 2. return torch.cat([xg, yg, zg, wg, lg, hg, rg], dim=-1) ################################### # simplified version ################################### class rbbox_corners_aligned(nn.Module): def _init_(self, gboxes): super(rbbox_corners_aligned, self)._init_() self.corners_gboxes = gboxes return def forward(ctx, gboxes): ''' There is no rotation performed here. As axis are aligned. ^ [y] 1 --------- 2 / / ---> 0 -------- 3 [x] Each node has the coordinate of [x, y]. Corresponding the order of input. Output: [N, 2, 4] [[x_0, x_1, x_2, x_3], [y_0, y_1, y_2, y_3]]. ''' N = gboxes.shape[0] center_x = gboxes[:, 0] center_y = gboxes[:, 1] x_d = gboxes[:, 2] y_d = gboxes[:, 3] corners = torch.zeros([N, 2, 4], device=gboxes.device, dtype=torch.float32) corners[:, 0, 0] = x_d.mul(-0.5) corners[:, 1, 0] = y_d.mul(-0.5) corners[:, 0, 1] = x_d.mul(-0.5) corners[:, 1, 1] = y_d.mul(0.5) corners[:, 0, 2] = x_d.mul(0.5) corners[:, 1, 2] = y_d.mul(0.5) corners[:, 0, 3] = x_d.mul(0.5) corners[:, 1, 3] = y_d.mul(-0.5) b = center_x.unsqueeze(1).repeat(1, 4).unsqueeze(1) c = center_y.unsqueeze(1).repeat(1, 4).unsqueeze(1) return (corners + torch.cat((b, c), 1)) class align_inter_aligned(nn.Module): def _init_(self, gboxes, qboxes): super(align_inter_aligned, self)._init_() self.gboxes = gboxes self.qboxes = qboxes return def forward(ctx, gboxes, qboxes): N = gboxes.shape[0] M = qboxes.shape[0] eps = 1e-5 assert N == M ## we can project the 3D bounding boxes into 3 different plane ## Notice: ry is not used here. ## view1 xoz plane inter_area_xoz = torch.zeros((N,), device=gboxes.device, dtype=torch.float32) mbr_area_xoz = torch.zeros((N,), device=gboxes.device, dtype=torch.float32) rbbox_corners_aligned_object = rbbox_corners_aligned() rotated_corners1 = rbbox_corners_aligned_object(gboxes[:, [0, 2, 3, 5, 6]]) rotated_corners2 = rbbox_corners_aligned_object(qboxes[:, [0, 2, 3, 5, 6]]) for i in range(N): iw = (min(rotated_corners1[i, 0, 2], rotated_corners2[i, 0, 2]) - max(rotated_corners1[i, 0, 1], rotated_corners2[i, 0, 1]) + eps) if (iw > 0): ih = ((min(rotated_corners1[i, 1, 1], rotated_corners2[i, 1, 1]) - max(rotated_corners1[i, 1, 0], rotated_corners2[i, 1, 0]) + eps)) if (ih > 0): inter_area_xoz[i] = iw * ih iwmbr = (max(rotated_corners1[i, 0, 3], rotated_corners2[i, 0, 3]) - min(rotated_corners1[i, 0, 0], rotated_corners2[i, 0, 0]) + eps) ihmbr = ((max(rotated_corners1[i, 1, 1], rotated_corners2[i, 1, 1]) - min(rotated_corners1[i, 1, 0], rotated_corners2[i, 1, 0]) + eps)) mbr_area_xoz[i] = iwmbr * ihmbr ### view2 xoy plane inter_area_xoy = torch.zeros((N,), device=gboxes.device, dtype=torch.float32) mbr_area_xoy = torch.zeros((N,), device=gboxes.device, dtype=torch.float32) rotated_corners1 = rbbox_corners_aligned_object(gboxes[:, [0, 1, 3, 4, 6]]) rotated_corners2 = rbbox_corners_aligned_object(qboxes[:, [0, 1, 3, 4, 6]]) for i in range(N): iw = (min(rotated_corners1[i, 0, 2], rotated_corners2[i, 0, 2]) - max(rotated_corners1[i, 0, 1], rotated_corners2[i, 0, 1]) + eps) if (iw > 0): ih = ((min(rotated_corners1[i, 1, 1], rotated_corners2[i, 1, 1]) - max(rotated_corners1[i, 1, 0], rotated_corners2[i, 1, 0]) + eps)) if (ih > 0): inter_area_xoy[i] = iw * ih iwmbr = (max(rotated_corners1[i, 0, 3], rotated_corners2[i, 0, 3]) - min(rotated_corners1[i, 0, 0], rotated_corners2[i, 0, 0]) + eps) ihmbr = ((max(rotated_corners1[i, 1, 1], rotated_corners2[i, 1, 1]) - min(rotated_corners1[i, 1, 0], rotated_corners2[i, 1, 0]) + eps)) mbr_area_xoy[i] = iwmbr * ihmbr ### view3 yoz plane inter_area_yoz = torch.zeros((N,), device=gboxes.device, dtype=torch.float32) mbr_area_yoz = torch.zeros((N,), device=gboxes.device, dtype=torch.float32) rotated_corners1 = rbbox_corners_aligned_object(gboxes[:, [1, 2, 4, 5, 6]]) rotated_corners2 = rbbox_corners_aligned_object(qboxes[:, [1, 2, 4, 5, 6]]) for i in range(N): iw = (min(rotated_corners1[i, 0, 2], rotated_corners2[i, 0, 2]) - max(rotated_corners1[i, 0, 1], rotated_corners2[i, 0, 1]) + eps) if (iw > 0): ih = ((min(rotated_corners1[i, 1, 1], rotated_corners2[i, 1, 1]) - max(rotated_corners1[i, 1, 0], rotated_corners2[i, 1, 0]) + eps)) if (ih > 0): inter_area_yoz[i] = iw * ih iwmbr = (max(rotated_corners1[i, 0, 3], rotated_corners2[i, 0, 3]) - min(rotated_corners1[i, 0, 0], rotated_corners2[i, 0, 0]) + eps) ihmbr = ((max(rotated_corners1[i, 1, 1], rotated_corners2[i, 1, 1]) - min(rotated_corners1[i, 1, 0], rotated_corners2[i, 1, 0]) + eps)) mbr_area_yoz[i] = iwmbr * ihmbr return inter_area_xoz, mbr_area_xoz, inter_area_xoy, mbr_area_xoy, inter_area_yoz, mbr_area_yoz class odiou_3D(nn.Module): def _init_(self, gboxes=None, qboxes=None, aligned=False): super(odiou_3D, self)._init_() self.gboxes = gboxes self.qboxes = qboxes self.aligned = aligned return def forward(ctx, gboxes, qboxes, weights, batch_size): ''' gboxes / qboxes: [N, 7], [x, y, z, w, l, h, ry] in velo coord. Notice: (x, y, z) is the real center of bbox. ''' xa, ya, za, dxa, dya, dza, ra = torch.split(gboxes, 1, dim=-1) gboxes = torch.cat([xa, ya, za, dya, dxa, dza, ra], dim=-1) xa1, ya1, za1, dxa1, dya1, dza1, ra1 = torch.split(qboxes, 1, dim=-1) qboxes = torch.cat([xa1, ya1, za1, dya1, dxa1, dza1, ra1], dim=-1) assert gboxes.shape[0] == qboxes.shape[0] indicator = torch.gt(gboxes[:, 3], 0) & torch.gt(gboxes[:, 4], 0) & torch.gt(gboxes[:, 5], 0) \ & torch.gt(qboxes[:, 3], 0) & torch.gt(qboxes[:, 4], 0) & torch.gt(qboxes[:, 5], 0) index_loc = torch.nonzero(indicator) # todo: my addtion to avoid too large number after model initialization. gboxes = torch.clamp(gboxes, -200.0, 200.0) qboxes = torch.clamp(qboxes, -200.0, 200.0) odious = torch.zeros([gboxes.shape[0], ], device=gboxes.device, dtype=torch.float32) if gboxes.shape[0] == 0 or qboxes.shape[0] == 0: return torch.unsqueeze(odious, 1) diff_angle = qboxes[:, -1] - gboxes[:, -1] angle_factor = 1.25 * (1.0 - torch.abs(torch.cos(diff_angle))) rbbox_to_corners_object = rbbox_to_corners() corners_gboxes = rbbox_to_corners_object(gboxes[:, [0, 1, 3, 4, 6]]) corners_qboxes = rbbox_to_corners_object(qboxes[:, [0, 1, 3, 4, 6]]) corners_gboxes_1 = torch.stack((corners_gboxes[:, [0, 2, 4, 6]], corners_gboxes[:, [1, 3, 5, 7]]), 2) corners_qboxes_1 = torch.stack((corners_qboxes[:, [0, 2, 4, 6]], corners_qboxes[:, [1, 3, 5, 7]]), 2) corners_pts = torch.cat((corners_gboxes_1, corners_qboxes_1), 1) # compute the inter area rinter_area_compute_object = rinter_area_compute() inter_area = rinter_area_compute_object(corners_gboxes, corners_qboxes) # compute center distance center_dist_square = torch.pow(gboxes[:, 0:3] - qboxes[:, 0:3], 2).sum(-1) # compute the mbr bev diag mbr_diag_compute_object = mbr_diag_compute() mbr_diag_bev = mbr_diag_compute_object(corners_pts) inter_h = (torch.min(gboxes[:, 2] + 0.5 * gboxes[:, 5], qboxes[:, 2] + 0.5 * qboxes[:, 5]) - torch.max(gboxes[:, 2] - 0.5 * gboxes[:, 5], qboxes[:, 2] - 0.5 * qboxes[:, 5])) oniou_h = (torch.max(gboxes[:, 2] + 0.5 * gboxes[:, 5], qboxes[:, 2] + 0.5 * qboxes[:, 5]) - torch.min(gboxes[:, 2] - 0.5 * gboxes[:, 5], qboxes[:, 2] - 0.5 * qboxes[:, 5])) inter_h[inter_h < 0] = 0 mbr_diag_3d_square = mbr_diag_bev**2 + inter_h ** 2 + 1e-7 volume_gboxes = gboxes[:, 3].mul(gboxes[:, 4]).mul(gboxes[:, 5]) volume_qboxes = qboxes[:, 3].mul(qboxes[:, 4]).mul(qboxes[:, 5]) inter_area_cuda = inter_area.to(torch.device(gboxes.device)) volume_inc = inter_h.mul(inter_area_cuda) volume_union = (volume_gboxes + volume_qboxes - volume_inc) center_dist_square_cuda = center_dist_square.to(torch.device(gboxes.device)) mbr_diag_3d_square_cuda = mbr_diag_3d_square.to(torch.device(gboxes.device)) ious = torch.div(volume_inc, volume_union) dp = torch.div(center_dist_square_cuda[index_loc[:, 0]], mbr_diag_3d_square_cuda[index_loc[:, 0]]) odious[index_loc[:, 0]] = 1 - ious[index_loc[:, 0]] + dp + angle_factor batch_ious = odious * weights ious_loss = 2.0 * batch_ious.sum() / batch_size return ious_loss compute_vertex = compute_vertex.apply sort_vertex = sort_vertex.apply area_polygon = area_polygon.apply find_convex_hull = find_convex_hull.apply ================================================ FILE: pcdet/utils/spconv_utils.py ================================================ import torch def scatter_point_inds(indices, point_inds, shape): ret = -1 * torch.ones(*shape, dtype=point_inds.dtype, device=point_inds.device) ndim = indices.shape[-1] flattened_indices = indices.view(-1, ndim) slices = [flattened_indices[:, i] for i in range(ndim)] ret[slices] = point_inds return ret def generate_voxel2pinds(sparse_tensor): device = sparse_tensor.indices.device batch_size = sparse_tensor.batch_size spatial_shape = sparse_tensor.spatial_shape indices = sparse_tensor.indices.long() point_indices = torch.arange(indices.shape[0], device=device, dtype=torch.int32) output_shape = [batch_size] + list(spatial_shape) v2pinds_tensor = scatter_point_inds(indices, point_indices, output_shape) return v2pinds_tensor def generate_voxel2pinds2(batch_size,spatial_shape,indices): indices = indices.long() device = indices.device point_indices = torch.arange(indices.shape[0], device=device, dtype=torch.int32) output_shape = [batch_size] + list(spatial_shape) v2pinds_tensor = scatter_point_inds(indices, point_indices, output_shape) return v2pinds_tensor from typing import Set try: import spconv.pytorch as spconv except: import spconv as spconv import torch.nn as nn def find_all_spconv_keys(model: nn.Module, prefix="") -> Set[str]: """ Finds all spconv keys that need to have weight's transposed """ found_keys: Set[str] = set() for name, child in model.named_children(): new_prefix = f"{prefix}.{name}" if prefix != "" else name if isinstance(child, spconv.conv.SparseConvolution): new_prefix = f"{new_prefix}.weight" found_keys.add(new_prefix) found_keys.update(find_all_spconv_keys(child, prefix=new_prefix)) return found_keys def replace_feature(out, new_features): if "replace_feature" in out.__dir__(): # spconv 2.x behaviour return out.replace_feature(new_features) else: out.features = new_features return out ================================================ FILE: pcdet/utils/transform_utils.py ================================================ import math import torch try: from kornia.geometry.conversions import ( convert_points_to_homogeneous, convert_points_from_homogeneous, ) except: pass # print('Warning: kornia is not installed. This package is only required by CaDDN') def project_to_image(project, points): """ Project points to image Args: project [torch.tensor(..., 3, 4)]: Projection matrix points [torch.Tensor(..., 3)]: 3D points Returns: points_img [torch.Tensor(..., 2)]: Points in image points_depth [torch.Tensor(...)]: Depth of each point """ # Reshape tensors to expected shape points = convert_points_to_homogeneous(points) points = points.unsqueeze(dim=-1) project = project.unsqueeze(dim=1) # Transform points to image and get depths points_t = project @ points points_t = points_t.squeeze(dim=-1) points_img = convert_points_from_homogeneous(points_t) points_depth = points_t[..., -1] - project[..., 2, 3] return points_img, points_depth def normalize_coords(coords, shape): """ Normalize coordinates of a grid between [-1, 1] Args: coords: (..., 3), Coordinates in grid shape: (3), Grid shape Returns: norm_coords: (.., 3), Normalized coordinates in grid """ min_n = -1 max_n = 1 shape = torch.flip(shape, dims=[0]) # Reverse ordering of shape # Subtract 1 since pixel indexing from [0, shape - 1] norm_coords = coords / (shape - 1) * (max_n - min_n) + min_n return norm_coords def bin_depths(depth_map, mode, depth_min, depth_max, num_bins, target=False): """ Converts depth map into bin indices Args: depth_map: (H, W), Depth Map mode: string, Discretiziation mode (See https://arxiv.org/pdf/2005.13423.pdf for more details) UD: Uniform discretiziation LID: Linear increasing discretiziation SID: Spacing increasing discretiziation depth_min: float, Minimum depth value depth_max: float, Maximum depth value num_bins: int, Number of depth bins target: bool, Whether the depth bins indices will be used for a target tensor in loss comparison Returns: indices: (H, W), Depth bin indices """ if mode == "UD": bin_size = (depth_max - depth_min) / num_bins indices = ((depth_map - depth_min) / bin_size) elif mode == "LID": bin_size = 2 * (depth_max - depth_min) / (num_bins * (1 + num_bins)) indices = -0.5 + 0.5 * torch.sqrt(1 + 8 * (depth_map - depth_min) / bin_size) elif mode == "SID": indices = num_bins * (torch.log(1 + depth_map) - math.log(1 + depth_min)) / \ (math.log(1 + depth_max) - math.log(1 + depth_min)) else: raise NotImplementedError if target: # Remove indicies outside of bounds mask = (indices < 0) | (indices > num_bins) | (~torch.isfinite(indices)) indices[mask] = num_bins # Convert to integer indices = indices.type(torch.int64) return indices ================================================ FILE: pcdet/version.py ================================================ __version__ = "0.3.0+0000000" ================================================ FILE: requirements.txt ================================================ numpy torch>=1.1 numba tensorboardX easydict pyyaml scikit-image tqdm ================================================ FILE: setup.py ================================================ import os import subprocess from setuptools import find_packages, setup from torch.utils.cpp_extension import BuildExtension, CUDAExtension def get_git_commit_number(): if not os.path.exists('.git'): return '0000000' cmd_out = subprocess.run(['git', 'rev-parse', 'HEAD'], stdout=subprocess.PIPE) git_commit_number = cmd_out.stdout.decode('utf-8')[:7] return git_commit_number def make_cuda_ext(name, module, sources): cuda_ext = CUDAExtension( name='%s.%s' % (module, name), sources=[os.path.join(*module.split('.'), src) for src in sources] ) return cuda_ext def write_version_to_file(version, target_file): with open(target_file, 'w') as f: print('__version__ = "%s"' % version, file=f) if __name__ == '__main__': version = '0.3.0+%s' % get_git_commit_number() write_version_to_file(version, 'pcdet/version.py') setup( name='pcdet', version=version, description='OpenPCDet is a general codebase for 3D object detection from point cloud', install_requires=[ 'numpy', 'torch>=1.1', 'spconv', 'numba', 'tensorboardX', 'easydict', 'pyyaml' ], author='Shaoshuai Shi', author_email='shaoshuaics@gmail.com', license='Apache License 2.0', packages=find_packages(exclude=['tools', 'data', 'output']), cmdclass={'build_ext': BuildExtension}, ext_modules=[ make_cuda_ext( name='votr_ops_cuda', module='pcdet.ops.votr_ops', sources=[ 'src/votr_api.cpp', 'src/build_mapping.cpp', 'src/build_mapping_gpu.cu', 'src/build_attention_indices.cpp', 'src/build_attention_indices_gpu.cu', 'src/group_features.cpp', 'src/group_features_gpu.cu', ], ), make_cuda_ext( name='iou3d_nms_cuda', module='pcdet.ops.iou3d_nms', sources=[ 'src/iou3d_cpu.cpp', 'src/iou3d_nms_api.cpp', 'src/iou3d_nms.cpp', 'src/iou3d_nms_kernel.cu', ] ), make_cuda_ext( name='roiaware_pool3d_cuda', module='pcdet.ops.roiaware_pool3d', sources=[ 'src/roiaware_pool3d.cpp', 'src/roiaware_pool3d_kernel.cu', ] ), make_cuda_ext( name='roipoint_pool3d_cuda', module='pcdet.ops.roipoint_pool3d', sources=[ 'src/roipoint_pool3d.cpp', 'src/roipoint_pool3d_kernel.cu', ] ), make_cuda_ext( name='pointnet2_stack_cuda', module='pcdet.ops.pointnet2.pointnet2_stack', sources=[ 'src/pointnet2_api.cpp', 'src/ball_query.cpp', 'src/ball_query_gpu.cu', 'src/group_points.cpp', 'src/group_points_gpu.cu', 'src/sampling.cpp', 'src/sampling_gpu.cu', 'src/interpolate.cpp', 'src/interpolate_gpu.cu', 'src/voxel_query.cpp', 'src/voxel_query_gpu.cu', 'src/ball_query_deform.cpp', 'src/ball_query_deform_gpu.cu', 'src/vector_pool.cpp', 'src/vector_pool_gpu.cu' ], ), make_cuda_ext( name='pointnet2_batch_cuda', module='pcdet.ops.pointnet2.pointnet2_batch', sources=[ 'src/pointnet2_api.cpp', 'src/ball_query.cpp', 'src/ball_query_gpu.cu', 'src/group_points.cpp', 'src/group_points_gpu.cu', 'src/interpolate.cpp', 'src/interpolate_gpu.cu', 'src/sampling.cpp', 'src/sampling_gpu.cu', ], ), ], ) ================================================ FILE: tools/PENet/CoordConv.py ================================================ from __future__ import print_function import numpy as np class AddCoordsNp(): """Add coords to a tensor""" def __init__(self, x_dim=64, y_dim=64, with_r=False): self.x_dim = x_dim self.y_dim = y_dim self.with_r = with_r def call(self): """ input_tensor: (batch, x_dim, y_dim, c) """ #batch_size_tensor = np.shape(input_tensor)[0] xx_ones = np.ones([self.x_dim], dtype=np.int32) xx_ones = np.expand_dims(xx_ones, 1) #print(xx_ones.shape) xx_range = np.expand_dims(np.arange(self.y_dim), 0) #xx_range = np.expand_dims(xx_range, 1) #print(xx_range.shape) xx_channel = np.matmul(xx_ones, xx_range) xx_channel = np.expand_dims(xx_channel, -1) yy_ones = np.ones([self.y_dim], dtype=np.int32) yy_ones = np.expand_dims(yy_ones, 0) #print(yy_ones.shape) yy_range = np.expand_dims(np.arange(self.x_dim), 1) #yy_range = np.expand_dims(yy_range, -1) #print(yy_range.shape) yy_channel = np.matmul(yy_range, yy_ones) yy_channel = np.expand_dims(yy_channel, -1) xx_channel = xx_channel.astype('float32') / (self.y_dim - 1) yy_channel = yy_channel.astype('float32') / (self.x_dim - 1) xx_channel = xx_channel*2 - 1 yy_channel = yy_channel*2 - 1 #xx_channel = xx_channel.repeat(batch_size_tensor, axis=0) #yy_channel = yy_channel.repeat(batch_size_tensor, axis=0) ret = np.concatenate([xx_channel, yy_channel], axis=-1) if self.with_r: rr = np.sqrt( np.square(xx_channel-0.5) + np.square(yy_channel-0.5)) ret = np.concatenate([ret, rr], axis=-1) return ret ================================================ FILE: tools/PENet/LICENSE ================================================ MIT License Copyright (c) 2018 Fangchang Ma Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: tools/PENet/basic.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F import math gks = 5 pad = [i for i in range(gks*gks)] shift = torch.zeros(gks*gks, 4) for i in range(gks): for j in range(gks): top = i bottom = gks-1-i left = j right = gks-1-j pad[i*gks + j] = torch.nn.ZeroPad2d((left, right, top, bottom)) #shift[i*gks + j, :] = torch.tensor([left, right, top, bottom]) mid_pad = torch.nn.ZeroPad2d(((gks-1)/2, (gks-1)/2, (gks-1)/2, (gks-1)/2)) zero_pad = pad[0] gks2 = 3 #guide kernel size pad2 = [i for i in range(gks2*gks2)] shift = torch.zeros(gks2*gks2, 4) for i in range(gks2): for j in range(gks2): top = i bottom = gks2-1-i left = j right = gks2-1-j pad2[i*gks2 + j] = torch.nn.ZeroPad2d((left, right, top, bottom)) gks3 = 7 #guide kernel size pad3 = [i for i in range(gks3*gks3)] shift = torch.zeros(gks3*gks3, 4) for i in range(gks3): for j in range(gks3): top = i bottom = gks3-1-i left = j right = gks3-1-j pad3[i*gks3 + j] = torch.nn.ZeroPad2d((left, right, top, bottom)) def weights_init(m): # Initialize filters with Gaussian random weights if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) if m.bias is not None: m.bias.data.zero_() elif isinstance(m, nn.ConvTranspose2d): n = m.kernel_size[0] * m.kernel_size[1] * m.in_channels m.weight.data.normal_(0, math.sqrt(2. / n)) if m.bias is not None: m.bias.data.zero_() elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() def convbnrelu(in_channels, out_channels, kernel_size=3,stride=1, padding=1): return nn.Sequential( nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False), nn.BatchNorm2d(out_channels), nn.ReLU(inplace=True) ) def deconvbnrelu(in_channels, out_channels, kernel_size=5, stride=2, padding=2, output_padding=1): return nn.Sequential( nn.ConvTranspose2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, output_padding=output_padding, bias=False), nn.BatchNorm2d(out_channels), nn.ReLU(inplace=True) ) def convbn(in_channels, out_channels, kernel_size=3,stride=1, padding=1): return nn.Sequential( nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False), nn.BatchNorm2d(out_channels) ) def deconvbn(in_channels, out_channels, kernel_size=4, stride=2, padding=1, output_padding=0): return nn.Sequential( nn.ConvTranspose2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, output_padding=output_padding, bias=False), nn.BatchNorm2d(out_channels) ) class BasicBlock(nn.Module): expansion = 1 __constants__ = ['downsample'] def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, base_width=64, dilation=1, norm_layer=None): super(BasicBlock, self).__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d #norm_layer = encoding.nn.BatchNorm2d if groups != 1 or base_width != 64: raise ValueError('BasicBlock only supports groups=1 and base_width=64') if dilation > 1: raise NotImplementedError("Dilation > 1 not supported in BasicBlock") # Both self.conv1 and self.downsample layers downsample the input when stride != 1 self.conv1 = conv3x3(inplanes, planes, stride) self.bn1 = norm_layer(planes) self.relu = nn.ReLU(inplace=True) self.conv2 = conv3x3(planes, planes) self.bn2 = norm_layer(planes) if stride != 1 or inplanes != planes: downsample = nn.Sequential( conv1x1(inplanes, planes, stride), norm_layer(planes), ) self.downsample = downsample self.stride = stride def forward(self, x): identity = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) if self.downsample is not None: identity = self.downsample(x) out += identity out = self.relu(out) return out def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1, bias=False, padding=1): """3x3 convolution with padding""" if padding >= 1: padding = dilation return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=padding, groups=groups, bias=bias, dilation=dilation) def conv1x1(in_planes, out_planes, stride=1, groups=1, bias=False): """1x1 convolution""" return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, groups=groups, bias=bias) class SparseDownSampleClose(nn.Module): def __init__(self, stride): super(SparseDownSampleClose, self).__init__() self.pooling = nn.MaxPool2d(stride, stride) self.large_number = 600 def forward(self, d, mask): encode_d = - (1-mask)*self.large_number - d d = - self.pooling(encode_d) mask_result = self.pooling(mask) d_result = d - (1-mask_result)*self.large_number return d_result, mask_result class CSPNGenerate(nn.Module): def __init__(self, in_channels, kernel_size): super(CSPNGenerate, self).__init__() self.kernel_size = kernel_size self.generate = convbn(in_channels, self.kernel_size * self.kernel_size - 1, kernel_size=3, stride=1, padding=1) def forward(self, feature): guide = self.generate(feature) #normalization guide_sum = torch.sum(guide.abs(), dim=1).unsqueeze(1) guide = torch.div(guide, guide_sum) guide_mid = (1 - torch.sum(guide, dim=1)).unsqueeze(1) #padding weight_pad = [i for i in range(self.kernel_size * self.kernel_size)] for t in range(self.kernel_size*self.kernel_size): zero_pad = 0 if(self.kernel_size==3): zero_pad = pad2[t] elif(self.kernel_size==5): zero_pad = pad[t] elif(self.kernel_size==7): zero_pad = pad3[t] if(t < int((self.kernel_size*self.kernel_size-1)/2)): weight_pad[t] = zero_pad(guide[:, t:t+1, :, :]) elif(t > int((self.kernel_size*self.kernel_size-1)/2)): weight_pad[t] = zero_pad(guide[:, t-1:t, :, :]) else: weight_pad[t] = zero_pad(guide_mid) guide_weight = torch.cat([weight_pad[t] for t in range(self.kernel_size*self.kernel_size)], dim=1) return guide_weight class CSPN(nn.Module): def __init__(self, kernel_size): super(CSPN, self).__init__() self.kernel_size = kernel_size def forward(self, guide_weight, hn, h0): #CSPN half = int(0.5 * (self.kernel_size * self.kernel_size - 1)) result_pad = [i for i in range(self.kernel_size * self.kernel_size)] for t in range(self.kernel_size*self.kernel_size): zero_pad = 0 if(self.kernel_size==3): zero_pad = pad2[t] elif(self.kernel_size==5): zero_pad = pad[t] elif(self.kernel_size==7): zero_pad = pad3[t] if(t == half): result_pad[t] = zero_pad(h0) else: result_pad[t] = zero_pad(hn) guide_result = torch.cat([result_pad[t] for t in range(self.kernel_size*self.kernel_size)], dim=1) #guide_result = torch.cat([result0_pad, result1_pad, result2_pad, result3_pad,result4_pad, result5_pad, result6_pad, result7_pad, result8_pad], 1) guide_result = torch.sum((guide_weight.mul(guide_result)), dim=1) guide_result = guide_result[:, int((self.kernel_size-1)/2):-int((self.kernel_size-1)/2), int((self.kernel_size-1)/2):-int((self.kernel_size-1)/2)] return guide_result.unsqueeze(dim=1) class CSPNGenerateAccelerate(nn.Module): def __init__(self, in_channels, kernel_size): super(CSPNGenerateAccelerate, self).__init__() self.kernel_size = kernel_size self.generate = convbn(in_channels, self.kernel_size * self.kernel_size - 1, kernel_size=3, stride=1, padding=1) def forward(self, feature): guide = self.generate(feature) #normalization in standard CSPN #''' guide_sum = torch.sum(guide.abs(), dim=1).unsqueeze(1) guide = torch.div(guide, guide_sum) guide_mid = (1 - torch.sum(guide, dim=1)).unsqueeze(1) #''' #weight_pad = [i for i in range(self.kernel_size * self.kernel_size)] half1, half2 = torch.chunk(guide, 2, dim=1) output = torch.cat((half1, guide_mid, half2), dim=1) return output def kernel_trans(kernel, weight): kernel_size = int(math.sqrt(kernel.size()[1])) kernel = F.conv2d(kernel, weight, stride=1, padding=int((kernel_size-1)/2)) return kernel class CSPNAccelerate(nn.Module): def __init__(self, kernel_size, dilation=1, padding=1, stride=1): super(CSPNAccelerate, self).__init__() self.kernel_size = kernel_size self.dilation = dilation self.padding = padding self.stride = stride def forward(self, kernel, input, input0): #with standard CSPN, an addition input0 port is added bs = input.size()[0] h, w = input.size()[2], input.size()[3] input_im2col = F.unfold(input, self.kernel_size, self.dilation, self.padding, self.stride) kernel = kernel.reshape(bs, self.kernel_size * self.kernel_size, h * w) # standard CSPN input0 = input0.view(bs, 1, h * w) mid_index = int((self.kernel_size*self.kernel_size-1)/2) input_im2col[:, mid_index:mid_index+1, :] = input0 #print(input_im2col.size(), kernel.size()) output = torch.einsum('ijk,ijk->ik', (input_im2col, kernel)) return output.view(bs, 1, h, w) class GeometryFeature(nn.Module): def __init__(self): super(GeometryFeature, self).__init__() def forward(self, z, vnorm, unorm, h, w, ch, cw, fh, fw): x = z*(0.5*h*(vnorm+1)-ch)/fh y = z*(0.5*w*(unorm+1)-cw)/fw return torch.cat((x, y, z),1) class BasicBlockGeo(nn.Module): expansion = 1 __constants__ = ['downsample'] def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, base_width=64, dilation=1, norm_layer=None, geoplanes=3): super(BasicBlockGeo, self).__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d #norm_layer = encoding.nn.BatchNorm2d if groups != 1 or base_width != 64: raise ValueError('BasicBlock only supports groups=1 and base_width=64') if dilation > 1: raise NotImplementedError("Dilation > 1 not supported in BasicBlock") # Both self.conv1 and self.downsample layers downsample the input when stride != 1 self.conv1 = conv3x3(inplanes + geoplanes, planes, stride) self.bn1 = norm_layer(planes) self.relu = nn.ReLU(inplace=True) self.conv2 = conv3x3(planes+geoplanes, planes) self.bn2 = norm_layer(planes) if stride != 1 or inplanes != planes: downsample = nn.Sequential( conv1x1(inplanes+geoplanes, planes, stride), norm_layer(planes), ) self.downsample = downsample self.stride = stride def forward(self, x, g1=None, g2=None): identity = x if g1 is not None: x = torch.cat((x, g1), 1) out = self.conv1(x) out = self.bn1(out) out = self.relu(out) if g2 is not None: out = torch.cat((g2,out), 1) out = self.conv2(out) out = self.bn2(out) if self.downsample is not None: identity = self.downsample(x) out += identity out = self.relu(out) return out ================================================ FILE: tools/PENet/criteria.py ================================================ import torch import torch.nn as nn loss_names = ['l1', 'l2'] class MaskedMSELoss(nn.Module): def __init__(self): super(MaskedMSELoss, self).__init__() def forward(self, pred, target): assert pred.dim() == target.dim(), "inconsistent dimensions" valid_mask = (target > 0).detach() diff = target - pred diff = diff[valid_mask] self.loss = (diff**2).mean() return self.loss class MaskedL1Loss(nn.Module): def __init__(self): super(MaskedL1Loss, self).__init__() def forward(self, pred, target, weight=None): assert pred.dim() == target.dim(), "inconsistent dimensions" valid_mask = (target > 0).detach() diff = target - pred diff = diff[valid_mask] self.loss = diff.abs().mean() return self.loss ================================================ FILE: tools/PENet/dataloaders/calib_cam_to_cam.txt ================================================ calib_time: 09-Jan-2012 13:57:47 corner_dist: 9.950000e-02 S_00: 1.392000e+03 5.120000e+02 K_00: 9.842439e+02 0.000000e+00 6.900000e+02 0.000000e+00 9.808141e+02 2.331966e+02 0.000000e+00 0.000000e+00 1.000000e+00 D_00: -3.728755e-01 2.037299e-01 2.219027e-03 1.383707e-03 -7.233722e-02 R_00: 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 T_00: 2.573699e-16 -1.059758e-16 1.614870e-16 S_rect_00: 1.242000e+03 3.750000e+02 R_rect_00: 9.999239e-01 9.837760e-03 -7.445048e-03 -9.869795e-03 9.999421e-01 -4.278459e-03 7.402527e-03 4.351614e-03 9.999631e-01 P_rect_00: 7.215377e+02 0.000000e+00 6.095593e+02 0.000000e+00 0.000000e+00 7.215377e+02 1.728540e+02 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 S_01: 1.392000e+03 5.120000e+02 K_01: 9.895267e+02 0.000000e+00 7.020000e+02 0.000000e+00 9.878386e+02 2.455590e+02 0.000000e+00 0.000000e+00 1.000000e+00 D_01: -3.644661e-01 1.790019e-01 1.148107e-03 -6.298563e-04 -5.314062e-02 R_01: 9.993513e-01 1.860866e-02 -3.083487e-02 -1.887662e-02 9.997863e-01 -8.421873e-03 3.067156e-02 8.998467e-03 9.994890e-01 T_01: -5.370000e-01 4.822061e-03 -1.252488e-02 S_rect_01: 1.242000e+03 3.750000e+02 R_rect_01: 9.996878e-01 -8.976826e-03 2.331651e-02 8.876121e-03 9.999508e-01 4.418952e-03 -2.335503e-02 -4.210612e-03 9.997184e-01 P_rect_01: 7.215377e+02 0.000000e+00 6.095593e+02 -3.875744e+02 0.000000e+00 7.215377e+02 1.728540e+02 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 S_02: 1.392000e+03 5.120000e+02 K_02: 9.597910e+02 0.000000e+00 6.960217e+02 0.000000e+00 9.569251e+02 2.241806e+02 0.000000e+00 0.000000e+00 1.000000e+00 D_02: -3.691481e-01 1.968681e-01 1.353473e-03 5.677587e-04 -6.770705e-02 R_02: 9.999758e-01 -5.267463e-03 -4.552439e-03 5.251945e-03 9.999804e-01 -3.413835e-03 4.570332e-03 3.389843e-03 9.999838e-01 T_02: 5.956621e-02 2.900141e-04 2.577209e-03 S_rect_02: 1.242000e+03 3.750000e+02 R_rect_02: 9.998817e-01 1.511453e-02 -2.841595e-03 -1.511724e-02 9.998853e-01 -9.338510e-04 2.827154e-03 9.766976e-04 9.999955e-01 P_rect_02: 7.215377e+02 0.000000e+00 6.095593e+02 4.485728e+01 0.000000e+00 7.215377e+02 1.728540e+02 2.163791e-01 0.000000e+00 0.000000e+00 1.000000e+00 2.745884e-03 S_03: 1.392000e+03 5.120000e+02 K_03: 9.037596e+02 0.000000e+00 6.957519e+02 0.000000e+00 9.019653e+02 2.242509e+02 0.000000e+00 0.000000e+00 1.000000e+00 D_03: -3.639558e-01 1.788651e-01 6.029694e-04 -3.922424e-04 -5.382460e-02 R_03: 9.995599e-01 1.699522e-02 -2.431313e-02 -1.704422e-02 9.998531e-01 -1.809756e-03 2.427880e-02 2.223358e-03 9.997028e-01 T_03: -4.731050e-01 5.551470e-03 -5.250882e-03 S_rect_03: 1.242000e+03 3.750000e+02 R_rect_03: 9.998321e-01 -7.193136e-03 1.685599e-02 7.232804e-03 9.999712e-01 -2.293585e-03 -1.683901e-02 2.415116e-03 9.998553e-01 P_rect_03: 7.215377e+02 0.000000e+00 6.095593e+02 -3.395242e+02 0.000000e+00 7.215377e+02 1.728540e+02 2.199936e+00 0.000000e+00 0.000000e+00 1.000000e+00 2.729905e-03 ================================================ FILE: tools/PENet/dataloaders/calibration_kitti.py ================================================ import numpy as np import re ''' def get_calib_from_file(calib_file): with open(calib_file) as f: lines = f.readlines() obj = lines[2].strip().split(' ')[1:] P2 = np.array(obj, dtype=np.float32) obj = lines[3].strip().split(' ')[1:] P3 = np.array(obj, dtype=np.float32) obj = lines[4].strip().split(' ')[1:] R0 = np.array(obj, dtype=np.float32) obj = lines[5].strip().split(' ')[1:] Tr_velo_to_cam = np.array(obj, dtype=np.float32) return {'P2': P2.reshape(3, 4), 'P3': P3.reshape(3, 4), 'R0': R0.reshape(3, 3), 'Tr_velo2cam': Tr_velo_to_cam.reshape(3, 4)} ''' def get_calib_from_file(filepath): ''' Read in a calibration file and parse into a dictionary. Ref: https://github.com/utiasSTARS/pykitti/blob/master/pykitti/utils.py ''' data2 = {} R0 = np.array([[ 0.99992624, 0.00965411, -0.0072371 ], [-0.00968531, 0.99994343, -0.00433077], [ 0.00719491, 0.00440054, 0.99996366]]) with open(filepath) as f: for line in f.readlines(): if line[:2] == "P2": P2 = re.split(" ", line.strip()) P2 = np.array(P2[-12:], np.float32) if line[:2] == "P3": P3 = re.split(" ", line.strip()) P3 = np.array(P3[-12:], np.float32) if line[:14] == "Tr_velo_to_cam" or line[:11] == "Tr_velo_cam": vtc_mat = re.split(" ", line.strip()) vtc_mat = np.array(vtc_mat[-12:], np.float32) if line[:7] == "R0_rect" or line[:6] == "R_rect": R0 = re.split(" ", line.strip()) R0 = np.array(R0[-9:], np.float32) data2["P2"]=P2.reshape(3, 4) data2["P3"]=P3.reshape(3, 4) data2["Tr_velo2cam"]=vtc_mat.reshape(3, 4) data2["R0"]=R0.reshape(3, 3) return data2 class Calibration(object): def __init__(self, calib_file): if not isinstance(calib_file, dict): calib = get_calib_from_file(calib_file) else: calib = calib_file self.P2 = calib['P2'] # 3 x 4 self.R0 = calib['R0'] # 3 x 3 self.V2C = calib['Tr_velo2cam'] # 3 x 4 # Camera intrinsics and extrinsics self.cu = self.P2[0, 2] self.cv = self.P2[1, 2] self.fu = self.P2[0, 0] self.fv = self.P2[1, 1] self.tx = self.P2[0, 3] / (-self.fu) self.ty = self.P2[1, 3] / (-self.fv) def cart_to_hom(self, pts): """ :param pts: (N, 3 or 2) :return pts_hom: (N, 4 or 3) """ pts_hom = np.hstack((pts, np.ones((pts.shape[0], 1), dtype=np.float32))) return pts_hom def rect_to_lidar(self, pts_rect): """ :param pts_lidar: (N, 3) :return pts_rect: (N, 3) """ pts_rect_hom = self.cart_to_hom(pts_rect) # (N, 4) R0_ext = np.hstack((self.R0, np.zeros((3, 1), dtype=np.float32))) # (3, 4) R0_ext = np.vstack((R0_ext, np.zeros((1, 4), dtype=np.float32))) # (4, 4) R0_ext[3, 3] = 1 V2C_ext = np.vstack((self.V2C, np.zeros((1, 4), dtype=np.float32))) # (4, 4) V2C_ext[3, 3] = 1 pts_lidar = np.dot(pts_rect_hom, np.linalg.inv(np.dot(R0_ext, V2C_ext).T)) return pts_lidar[:, 0:3] def lidar_to_rect(self, pts_lidar): """ :param pts_lidar: (N, 3) :return pts_rect: (N, 3) """ pts_lidar_hom = self.cart_to_hom(pts_lidar) pts_rect = np.dot(pts_lidar_hom, np.dot(self.V2C.T, self.R0.T)) # pts_rect = reduce(np.dot, (pts_lidar_hom, self.V2C.T, self.R0.T)) return pts_rect def rect_to_img(self, pts_rect): """ :param pts_rect: (N, 3) :return pts_img: (N, 2) """ pts_rect_hom = self.cart_to_hom(pts_rect) pts_2d_hom = np.dot(pts_rect_hom, self.P2.T) pts_img = (pts_2d_hom[:, 0:2].T / pts_rect_hom[:, 2]).T # (N, 2) pts_rect_depth = pts_2d_hom[:, 2] - self.P2.T[3, 2] # depth in rect camera coord return pts_img, pts_rect_depth def lidar_to_img(self, pts_lidar): """ :param pts_lidar: (N, 3) :return pts_img: (N, 2) """ pts_rect = self.lidar_to_rect(pts_lidar) pts_img, pts_depth = self.rect_to_img(pts_rect) return pts_img, pts_depth def img_to_rect(self, u, v, depth_rect): """ :param u: (N) :param v: (N) :param depth_rect: (N) :return: """ x = ((u - self.cu) * depth_rect) / self.fu + self.tx y = ((v - self.cv) * depth_rect) / self.fv + self.ty pts_rect = np.concatenate((x.reshape(-1, 1), y.reshape(-1, 1), depth_rect.reshape(-1, 1)), axis=1) return pts_rect def corners3d_to_img_boxes(self, corners3d): """ :param corners3d: (N, 8, 3) corners in rect coordinate :return: boxes: (None, 4) [x1, y1, x2, y2] in rgb coordinate :return: boxes_corner: (None, 8) [xi, yi] in rgb coordinate """ sample_num = corners3d.shape[0] corners3d_hom = np.concatenate((corners3d, np.ones((sample_num, 8, 1))), axis=2) # (N, 8, 4) img_pts = np.matmul(corners3d_hom, self.P2.T) # (N, 8, 3) x, y = img_pts[:, :, 0] / img_pts[:, :, 2], img_pts[:, :, 1] / img_pts[:, :, 2] x1, y1 = np.min(x, axis=1), np.min(y, axis=1) x2, y2 = np.max(x, axis=1), np.max(y, axis=1) boxes = np.concatenate((x1.reshape(-1, 1), y1.reshape(-1, 1), x2.reshape(-1, 1), y2.reshape(-1, 1)), axis=1) boxes_corner = np.concatenate((x.reshape(-1, 8, 1), y.reshape(-1, 8, 1)), axis=2) return boxes, boxes_corner ================================================ FILE: tools/PENet/dataloaders/kitti_loader.py ================================================ import os import os.path import glob import fnmatch # pattern matching import numpy as np from numpy import linalg as LA from random import choice from PIL import Image import torch import torch.utils.data as data import cv2 from dataloaders import transforms import CoordConv from dataloaders.my_loader import MyLoader input_options = ['d', 'rgb', 'rgbd', 'g', 'gd'] oheight, owidth, cwidth = 256, 1216, 1216 def load_calib(): """ Temporarily hardcoding the calibration matrix using calib file from 2011_09_26 """ calib = open("dataloaders/calib_cam_to_cam.txt", "r") lines = calib.readlines() P_rect_line = lines[25] Proj_str = P_rect_line.split(":")[1].split(" ")[1:] Proj = np.reshape(np.array([float(p) for p in Proj_str]), (3, 4)).astype(np.float32) K = Proj[:3, :3] # camera matrix # note: we will take the center crop of the images during augmentation # that changes the optical centers, but not focal lengths # K[0, 2] = K[0, 2] - 13 # from width = 1242 to 1216, with a 13-pixel cut on both sides # K[1, 2] = K[1, 2] - 11.5 # from width = 375 to 352, with a 11.5-pixel cut on both sides K[0, 2] = K[0, 2] - 13; K[1, 2] = K[1, 2] - 11.5; return K def get_paths_and_transform(split, args): assert (args.use_d or args.use_rgb or args.use_g), 'no proper input selected' if split == "train": transform = train_transform # transform = val_transform ''' glob_d = os.path.join( args.data_folder, 'data_depth_velodyne/train/*_sync/proj_depth/velodyne_raw/image_0[2,3]/*.png' ) glob_gt = os.path.join( args.data_folder, 'data_depth_annotated/train/*_sync/proj_depth/groundtruth/image_0[2,3]/*.png' ) def get_rgb_paths(p): ps = p.split('/') date_liststr = [] date_liststr.append(ps[-5][:10]) # pnew = '/'.join([args.data_folder] + ['data_rgb'] + ps[-6:-4] + # ps[-2:-1] + ['data'] + ps[-1:]) pnew = '/'.join(date_liststr + ps[-5:-4] + ps[-2:-1] + ['data'] + ps[-1:]) pnew = os.path.join(args.data_folder_rgb, pnew) return pnew ''' elif split == "val": if args.val == "full": transform = val_transform ''' glob_d = os.path.join( args.data_folder, 'data_depth_velodyne/val/*_sync/proj_depth/velodyne_raw/image_0[2,3]/*.png' ) glob_gt = os.path.join( args.data_folder, 'data_depth_annotated/val/*_sync/proj_depth/groundtruth/image_0[2,3]/*.png' ) def get_rgb_paths(p): ps = p.split('/') date_liststr = [] date_liststr.append(ps[-5][:10]) # pnew = '/'.join(ps[:-7] + # ['data_rgb']+ps[-6:-4]+ps[-2:-1]+['data']+ps[-1:]) pnew = '/'.join(date_liststr + ps[-5:-4] + ps[-2:-1] + ['data'] + ps[-1:]) pnew = os.path.join(args.data_folder_rgb, pnew) return pnew ''' elif args.val == "select": # transform = no_transform transform = val_transform ''' glob_d = os.path.join( args.data_folder, "data_depth_selection/val_selection_cropped/velodyne_raw/*.png") glob_gt = os.path.join( args.data_folder, "data_depth_selection/val_selection_cropped/groundtruth_depth/*.png" ) def get_rgb_paths(p): return p.replace("groundtruth_depth", "image") ''' elif split == "test_completion": transform = no_transform ''' glob_d = os.path.join( args.data_folder, "data_depth_selection/test_depth_completion_anonymous/velodyne_raw/*.png" ) glob_gt = None # "test_depth_completion_anonymous/" glob_rgb = os.path.join( args.data_folder, "data_depth_selection/test_depth_completion_anonymous/image/*.png") ''' elif split == "test_prediction": transform = no_transform ''' glob_d = None glob_gt = None # "test_depth_completion_anonymous/" glob_rgb = os.path.join( args.data_folder, "data_depth_selection/test_depth_prediction_anonymous/image/*.png") ''' else: raise ValueError("Unrecognized split " + str(split)) ''' if glob_gt is not None: # train or val-full or val-select paths_d = sorted(glob.glob(glob_d)) paths_gt = sorted(glob.glob(glob_gt)) paths_rgb = [get_rgb_paths(p) for p in paths_gt] else: # test only has d or rgb paths_rgb = sorted(glob.glob(glob_rgb)) paths_gt = [None] * len(paths_rgb) if split == "test_prediction": paths_d = [None] * len( paths_rgb) # test_prediction has no sparse depth else: paths_d = sorted(glob.glob(glob_d)) if len(paths_d) == 0 and len(paths_rgb) == 0 and len(paths_gt) == 0: raise (RuntimeError("Found 0 images under {}".format(glob_gt))) if len(paths_d) == 0 and args.use_d: raise (RuntimeError("Requested sparse depth but none was found")) if len(paths_rgb) == 0 and args.use_rgb: raise (RuntimeError("Requested rgb images but none was found")) if len(paths_rgb) == 0 and args.use_g: raise (RuntimeError("Requested gray images but no rgb was found")) if len(paths_rgb) != len(paths_d) or len(paths_rgb) != len(paths_gt): print(len(paths_rgb), len(paths_d), len(paths_gt)) # for i in range(999): # print("#####") # print(paths_rgb[i]) # print(paths_d[i]) # print(paths_gt[i]) # raise (RuntimeError("Produced different sizes for datasets")) #paths = {"rgb": paths_rgb, "d": paths_d, "gt": paths_gt} ''' paths = None return paths, transform def rgb_read(filename): assert os.path.exists(filename), "file not found: {}".format(filename) img_file = Image.open(filename) # rgb_png = np.array(img_file, dtype=float) / 255.0 # scale pixels to the range [0,1] rgb_png = np.array(img_file, dtype='uint8') # in the range [0,255] img_file.close() return rgb_png def depth_read(filename): # loads depth map D from png file # and returns it as a numpy array, # for details see readme.txt assert os.path.exists(filename), "file not found: {}".format(filename) img_file = Image.open(filename) depth_png = np.array(img_file, dtype=int) img_file.close() # make sure we have a proper 16bit depth map here.. not 8bit! assert np.max(depth_png) > 255, \ "np.max(depth_png)={}, path={}".format(np.max(depth_png), filename) depth = depth_png.astype(np.float) / 256. # depth[depth_png == 0] = -1. depth = np.expand_dims(depth, -1) return depth def drop_depth_measurements(depth, prob_keep): mask = np.random.binomial(1, prob_keep, depth.shape) depth *= mask return depth def train_transform(rgb, sparse, target, position, args): # s = np.random.uniform(1.0, 1.5) # random scaling # angle = np.random.uniform(-5.0, 5.0) # random rotation degrees oheight = args.val_h owidth = args.val_w do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip transforms_list = [ # transforms.Rotate(angle), # transforms.Resize(s), transforms.BottomCrop((oheight, owidth)), transforms.HorizontalFlip(do_flip) ] # if small_training == True: # transforms_list.append(transforms.RandomCrop((rheight, rwidth))) transform_geometric = transforms.Compose(transforms_list) if sparse is not None: sparse = transform_geometric(sparse) target = transform_geometric(target) if rgb is not None: brightness = np.random.uniform(max(0, 1 - args.jitter), 1 + args.jitter) contrast = np.random.uniform(max(0, 1 - args.jitter), 1 + args.jitter) saturation = np.random.uniform(max(0, 1 - args.jitter), 1 + args.jitter) transform_rgb = transforms.Compose([ transforms.ColorJitter(brightness, contrast, saturation, 0), transform_geometric ]) rgb = transform_rgb(rgb) # sparse = drop_depth_measurements(sparse, 0.9) if position is not None: bottom_crop_only = transforms.Compose([transforms.BottomCrop((oheight, owidth))]) position = bottom_crop_only(position) # random crop #if small_training == True: if args.not_random_crop == False: h = oheight w = owidth rheight = args.random_crop_height rwidth = args.random_crop_width # randomlize i = np.random.randint(0, h - rheight + 1) j = np.random.randint(0, w - rwidth + 1) if rgb is not None: if rgb.ndim == 3: rgb = rgb[i:i + rheight, j:j + rwidth, :] elif rgb.ndim == 2: rgb = rgb[i:i + rheight, j:j + rwidth] if sparse is not None: if sparse.ndim == 3: sparse = sparse[i:i + rheight, j:j + rwidth, :] elif sparse.ndim == 2: sparse = sparse[i:i + rheight, j:j + rwidth] if target is not None: if target.ndim == 3: target = target[i:i + rheight, j:j + rwidth, :] elif target.ndim == 2: target = target[i:i + rheight, j:j + rwidth] if position is not None: if position.ndim == 3: position = position[i:i + rheight, j:j + rwidth, :] elif position.ndim == 2: position = position[i:i + rheight, j:j + rwidth] return rgb, sparse, target, position def val_transform(rgb, sparse, target, position, args): oheight = args.val_h owidth = args.val_w transform = transforms.Compose([ transforms.BottomCrop((oheight, owidth)), ]) if rgb is not None: rgb = transform(rgb) if sparse is not None: sparse = transform(sparse) if target is not None: target = transform(target) if position is not None: position = transform(position) return rgb, sparse, target, position def no_transform(rgb, sparse, target, position, args): return rgb, sparse, target, position to_tensor = transforms.ToTensor() to_float_tensor = lambda x: to_tensor(x).float() def handle_gray(rgb, args): if rgb is None: return None, None if not args.use_g: return rgb, None else: img = np.array(Image.fromarray(rgb).convert('L')) img = np.expand_dims(img, -1) if not args.use_rgb: rgb_ret = None else: rgb_ret = rgb return rgb_ret, img def get_rgb_near(path, args): assert path is not None, "path is None" def extract_frame_id(filename): head, tail = os.path.split(filename) number_string = tail[0:tail.find('.')] number = int(number_string) return head, number def get_nearby_filename(filename, new_id): head, _ = os.path.split(filename) new_filename = os.path.join(head, '%010d.png' % new_id) return new_filename head, number = extract_frame_id(path) count = 0 max_frame_diff = 3 candidates = [ i - max_frame_diff for i in range(max_frame_diff * 2 + 1) if i - max_frame_diff != 0 ] while True: random_offset = choice(candidates) path_near = get_nearby_filename(path, number + random_offset) if os.path.exists(path_near): break assert count < 20, "cannot find a nearby frame in 20 trials for {}".format(path_near) return rgb_read(path_near) class KittiDepth(data.Dataset): """A data loader for the Kitti dataset """ def __init__(self, split, args): self.args = args self.split = split paths, transform = get_paths_and_transform(split, args) self.paths = paths self.transform = transform self.K = load_calib() self.threshold_translation = 0.1 self.my_loader = MyLoader(args.detpath) def __getraw__(self, index): rgb = rgb_read(self.paths['rgb'][index]) if \ (self.paths['rgb'][index] is not None and (self.args.use_rgb or self.args.use_g)) else None sparse = depth_read(self.paths['d'][index]) if \ (self.paths['d'][index] is not None and self.args.use_d) else None target = depth_read(self.paths['gt'][index]) if \ self.paths['gt'][index] is not None else None return rgb, sparse, target def __getitem__(self, index): rgb, sparse = self.my_loader[index] target = None position = CoordConv.AddCoordsNp(self.args.val_h, self.args.val_w) position = position.call() rgb, sparse, target, position = self.transform(rgb, sparse, target, position, self.args) rgb, gray = handle_gray(rgb, self.args) # candidates = {"rgb": rgb, "d": sparse, "gt": target, \ # "g": gray, "r_mat": r_mat, "t_vec": t_vec, "rgb_near": rgb_near} candidates = {"rgb": rgb, "d": sparse, "gt": target, \ "g": gray, 'position': position, 'K': self.K} items = { key: to_float_tensor(val) for key, val in candidates.items() if val is not None } return items def __len__(self): return len(self.my_loader) ================================================ FILE: tools/PENet/dataloaders/my_loader.py ================================================ from dataloaders import calibration_kitti import numpy as np from skimage import io import cv2 from PIL import Image import os import copy import torch from dataloaders.spconv_utils import replace_feature, spconv from torch import nn import torch.nn.functional as F import torch import numpy as np tv = None try: import cumm.tensorview as tv except: pass class VoxelGeneratorWrapper(): def __init__(self, vsize_xyz, coors_range_xyz, num_point_features, max_num_points_per_voxel, max_num_voxels): try: from spconv.utils import VoxelGeneratorV2 as VoxelGenerator self.spconv_ver = 1 except: try: from spconv.utils import VoxelGenerator self.spconv_ver = 1 except: from spconv.utils import Point2VoxelCPU3d as VoxelGenerator self.spconv_ver = 2 if self.spconv_ver == 1: self._voxel_generator = VoxelGenerator( voxel_size=vsize_xyz, point_cloud_range=coors_range_xyz, max_num_points=max_num_points_per_voxel, max_voxels=max_num_voxels ) else: self._voxel_generator = VoxelGenerator( vsize_xyz=vsize_xyz, coors_range_xyz=coors_range_xyz, num_point_features=num_point_features, max_num_points_per_voxel=max_num_points_per_voxel, max_num_voxels=max_num_voxels ) def generate(self, points): if self.spconv_ver == 1: voxel_output = self._voxel_generator.generate(points) if isinstance(voxel_output, dict): voxels, coordinates, num_points = \ voxel_output['voxels'], voxel_output['coordinates'], voxel_output['num_points_per_voxel'] else: voxels, coordinates, num_points = voxel_output else: assert tv is not None, f"Unexpected error, library: 'cumm' wasn't imported properly." voxel_output = self._voxel_generator.point_to_voxel(tv.from_numpy(points)) tv_voxels, tv_coordinates, tv_num_points = voxel_output # make copy with numpy(), since numpy_view() will disappear as soon as the generator is deleted voxels = tv_voxels.numpy() coordinates = tv_coordinates.numpy() num_points = tv_num_points.numpy() return voxels, coordinates, num_points voxel_generator = VoxelGeneratorWrapper( vsize_xyz=[200, 0.002, 0.002], coors_range_xyz=[-100,-5,-5,100,5,5], num_point_features=11, max_num_points_per_voxel=100, max_num_voxels=1000000, ) def get_fov_flag(pts_rect, img_shape, calib): """ Args: pts_rect: img_shape: calib: Returns: """ pts_img, pts_rect_depth = calib.rect_to_img(pts_rect) val_flag_1 = np.logical_and(pts_img[:, 0] >= 0, pts_img[:, 0] < img_shape[1]) val_flag_2 = np.logical_and(pts_img[:, 1] >= 0, pts_img[:, 1] < img_shape[0]) val_flag_merge = np.logical_and(val_flag_1, val_flag_2) pts_valid_flag = np.logical_and(val_flag_merge, pts_rect_depth >= 0) return pts_valid_flag def load_depth_input(calib, image, points): image = copy.deepcopy(image) pts_rect = calib.lidar_to_rect(points[:, 0:3]) fov_flag = get_fov_flag(pts_rect, image.shape, calib) points = points[fov_flag] pts_rect = calib.lidar_to_rect(points[:, 0:3]) pts_img, pts_rect_depth = calib.rect_to_img(pts_rect) val_inds = (pts_img[:, 0] >= 0) & (pts_img[:, 1] >= 0) val_inds = val_inds & (pts_img[:, 0] < image.shape[1]) & (pts_img[:, 1] < image.shape[0]) pts_img = pts_img[val_inds].astype(np.int32) depth = pts_rect_depth[val_inds] new_im = np.zeros(shape=image.shape[0:2]) new_im[pts_img[:, 1], pts_img[:, 0]] = depth depth = np.expand_dims(new_im, -1) rgb_png = np.array(image, dtype='uint8') return rgb_png, depth def depth_read(filename): # loads depth map D from png file # and returns it as a numpy array, # for details see readme.txt assert os.path.exists(filename), "file not found: {}".format(filename) img_file = Image.open(filename) depth_png = np.array(img_file, dtype=int) img_file.close() # make sure we have a proper 16bit depth map here.. not 8bit! assert np.max(depth_png) > 255, \ "np.max(depth_png)={}, path={}".format(np.max(depth_png), filename) depth = depth_png.astype(np.float32) / 256. # depth[depth_png == 0] = -1. depth = np.expand_dims(depth, -1) return depth def depth2points(depth, calib): depth[depth<0.1] = 0 uv = depth.nonzero() depth_val = depth[depth>0] p_rect = calib.img_to_rect(uv[1], uv[0], depth_val) p_lidar = calib.rect_to_lidar(p_rect) return p_lidar def depth2pointsrgb(depth, image, calib): depth[depth<0.1] = 0 uv = depth.nonzero() depth_val = depth[depth>0] new_p = np.zeros(shape=(uv[0].shape[0], 6)) p_rect = calib.img_to_rect(uv[1], uv[0], depth_val) p_lidar = calib.rect_to_lidar(p_rect) new_p[:, 0:3] = p_lidar new_p[:, 3:] = image[uv[0], uv[1]] return new_p def to_sphere_coords(points): r = np.linalg.norm(points[:, 0:3], ord=2, axis=-1) theta = np.arccos(points[:, 2]/r) fan = np.arctan(points[:, 1]/points[:, 0]) new_points = copy.deepcopy(points) new_points[:, 0] = r new_points[:, 1] = theta new_points[:, 2] = fan mask1 = new_points[:, 1]>1.5 new_points=new_points[mask1] points = points[mask1] return new_points, points def de_noise(points, vert_res = 0.05, hor_res = 0.05): new_points = copy.deepcopy(points) sp_coords, new_points = to_sphere_coords(new_points) voxel_dict = {} for i, point in enumerate(sp_coords): vert_coord = point[1]//vert_res hor_coord = point[2]//hor_res voxel_key = str(vert_coord)+'_'+str(hor_coord) if voxel_key in voxel_dict: voxel_dict[voxel_key]['sp'].append(point) voxel_dict[voxel_key]['pts'].append(new_points[i]) else: voxel_dict[voxel_key] = {'sp': [point], 'pts': [new_points[i]]} sampled_list = [] for voxel_key in voxel_dict: sp = voxel_dict[voxel_key]['pts'] if len(sp)<=20: continue sampled_list+=sp return np.array(sampled_list) def la_sampling(points, vert_res = 0.002, hor_res = 0.002): new_points = copy.deepcopy(points) sp_coords, new_points = to_sphere_coords(new_points) voxel_dict = {} for i, point in enumerate(sp_coords): vert_coord = point[1]//vert_res hor_coord = point[2]//hor_res voxel_key = str(vert_coord)+'_'+str(hor_coord) if voxel_key in voxel_dict: voxel_dict[voxel_key]['sp'].append(point) voxel_dict[voxel_key]['pts'].append(new_points[i]) else: voxel_dict[voxel_key] = {'sp': [point], 'pts': [new_points[i]]} sampled_list = [] for voxel_key in voxel_dict: sp = voxel_dict[voxel_key]['pts'] #N,10 arg_min = np.argmin(np.array(sp)[:, 0]) min_point = voxel_dict[voxel_key]['pts'][arg_min] sampled_list.append(min_point) return np.array(sampled_list) def la_sampling2(points, vert_res=0.002, hor_res=0.002): new_points = copy.deepcopy(points) sp_coords, new_points = to_sphere_coords(new_points) cat_points = np.concatenate([sp_coords,new_points[:,0:3]],-1) voxels, coordinates, num_points = voxel_generator.generate(cat_points) finals = [] for i,voxel in enumerate(voxels): pt_n = num_points[i] arg_min = np.argmin(np.array(voxel[:pt_n, 10])) finals.append(voxel[arg_min]) finals = np.array(finals) return np.concatenate([finals[:, 8:11], finals[:, 3:8]],-1) def voxel_sampling(point2, res_x=0.05, res_y=0.05, res_z = 0.05): min_x = -100 min_y = -100 min_z = -10 voxels = {} for point in point2: x = point[0] y = point[1] z = point[2] x_coord = (x-min_x)//res_x y_coord = (y-min_y)//res_y z_coord = (z-min_z)//res_z key = str(x_coord)+'_'+str(y_coord)+'_'+str(z_coord) voxels[key] = point return np.array(list(voxels.values())) def lidar_guied_voxel_sampling(point2, ref_points, res_x=0.2, res_y=0.2, res_z = 0.2): min_x = -100 min_y = -100 min_z = -10 voxels = {} for point in ref_points: x = point[0] y = point[1] z = point[2] x_coord = (x-min_x)//res_x y_coord = (y-min_y)//res_y z_coord = (z-min_z)//res_z key = str(x_coord)+'_'+str(y_coord)+'_'+str(z_coord) voxels[key] = 1 new_points = [] for point in point2: x = point[0] y = point[1] z = point[2] x_coord = (x - min_x) // res_x y_coord = (y - min_y) // res_y z_coord = (z - min_z) // res_z key = str(x_coord) + '_' + str(y_coord) + '_' + str(z_coord) if key in voxels: new_points.append(point) return np.array(new_points) def lidar_guied_dis_sampling(point2, ref_points, dis = 0.3, res_z = 0.3): point2[np.abs(point2[:, 0] > 100)] = 100 point2[np.abs(point2[:, 1] > 100)] = 100 new_points=[] for i, point in enumerate(ref_points): if i%1000==0: print(i) x = point[0] y = point[1] z = point[2] mask_x = np.abs(point2[:, 0] - x) < dis mask_y = np.abs(point2[:, 1] - y) < dis mask_z = np.abs(point2[:, 2] - z) < res_z mask = mask_x*mask_z*mask_y new_points.append(point2[mask]) point2[mask]=10000 return np.concatenate(new_points) def range_sampling(points2, ref_points, calib, pix_dis_x = 1, pix_dis_y = 7, depth_dis = 0.3): pts_img2, pts_depth2 = calib.lidar_to_img(points2[:, 0:3]) ref_img, ref_depth = calib.lidar_to_img(ref_points[:, 0:3]) pts = np.concatenate([pts_img2, pts_depth2.reshape(pts_img2.shape[0], 1)], -1) ref = np.concatenate([ref_img, ref_depth.reshape(ref_img.shape[0], 1)], -1) new_points=[] for i, point in enumerate(ref): if i%1000==0: print(i) x = point[0] y = point[1] dis = point[2] mask_x = np.abs(pts[:, 0] - x) < pix_dis_x mask_y = np.abs(pts[:, 1] - y) < pix_dis_y mask_z = np.abs(pts[:, 2] - dis) < depth_dis mask = mask_x*mask_z*mask_y new_points.append(points2[mask]) pts[mask]=100000 return np.concatenate(new_points) def range_sampling_torch(points2, ref_points, calib, pix_dis_x = 4, pix_dis_y = 7, depth_dis = 0.5): pts_img2, pts_depth2 = calib.lidar_to_img(points2[:, 0:3]) ref_img, ref_depth = calib.lidar_to_img(ref_points[:, 0:3]) pts = np.concatenate([pts_img2, pts_depth2.reshape(pts_img2.shape[0], 1)], -1) ref = np.concatenate([ref_img, ref_depth.reshape(ref_img.shape[0], 1)], -1) pts_t = torch.from_numpy(pts).cuda() mask_all = torch.zeros((points2.shape[0],)).bool().cuda() for i, point in enumerate(ref): x = point[0] y = point[1] dis = point[2] mask_x = torch.abs(pts_t[:, 0] - x) < pix_dis_x mask_y = torch.abs(pts_t[:, 1] - y) < pix_dis_y mask_z1 = (pts_t[:, 2] - dis) < depth_dis mask_z2 = (pts_t[:, 2] - dis) > 0 mask_z = mask_z1*mask_z2 mask = mask_x*mask_z*mask_y pts_t[mask] = 100000 mask_all+=mask return points2[mask_all.cpu().numpy()] def depth2pointsrgbp(depth, image, calib, lidar): depth[depth<0.01] = 0 uv = depth.nonzero() depth_val = depth[depth>0] new_p = np.zeros(shape=(uv[0].shape[0], 8)) p_rect = calib.img_to_rect(uv[1], uv[0], depth_val) p_lidar = calib.rect_to_lidar(p_rect) new_p[:, 0:3] = p_lidar new_p[:, 4:7] = image[uv[0], uv[1]]/3 new_p = new_p[new_p[:, 2] < 1.] new_p = la_sampling2(new_p) new_p[:, -1] = 1 new_lidar = np.zeros(shape=(lidar.shape[0], 8)) new_lidar[:, 0:4] = lidar[:, 0:4] new_lidar[:, 3] *= 10 new_lidar[:, -1] = 2 #new_p = new_p[new_p[:, 2]<1.] #_, new_p = to_sphere_coords(new_p) #new_p = voxel_sampling(new_p) #new_p = range_sampling_torch(new_p, new_lidar, calib) all_points = np.concatenate([new_lidar, new_p], 0) return all_points class MyLoader(): def __init__(self, root_path=''): self.root_path = root_path self.file_list = self.include_all_files() def include_all_files(self): velo_path = os.path.join(self.root_path, 'velodyne') all_files = os.listdir(velo_path) all_files.sort() all_files = [x[0:6] for x in all_files] return all_files def __len__(self): return len(self.file_list) def __getitem__(self, item): file_idx = self.file_list[item] file_image_path = os.path.join(self.root_path, 'image_2', file_idx+'.png') file_velo_path = os.path.join(self.root_path, 'velodyne', file_idx+'.bin') file_calib = os.path.join(self.root_path, 'calib', file_idx+'.txt') calib = calibration_kitti.Calibration(file_calib) points = np.fromfile(str(file_velo_path), dtype=np.float32).reshape(-1, 4) image = np.array(io.imread(file_image_path), dtype=np.int32) image = image[:352, :1216] rgb, depth = load_depth_input(calib, image, points) return rgb, depth ================================================ FILE: tools/PENet/dataloaders/spconv_utils.py ================================================ import torch def scatter_point_inds(indices, point_inds, shape): ret = -1 * torch.ones(*shape, dtype=point_inds.dtype, device=point_inds.device) ndim = indices.shape[-1] flattened_indices = indices.view(-1, ndim) slices = [flattened_indices[:, i] for i in range(ndim)] ret[slices] = point_inds return ret def generate_voxel2pinds(sparse_tensor): device = sparse_tensor.indices.device batch_size = sparse_tensor.batch_size spatial_shape = sparse_tensor.spatial_shape indices = sparse_tensor.indices.long() point_indices = torch.arange(indices.shape[0], device=device, dtype=torch.int32) output_shape = [batch_size] + list(spatial_shape) v2pinds_tensor = scatter_point_inds(indices, point_indices, output_shape) return v2pinds_tensor def generate_voxel2pinds2(batch_size,spatial_shape,indices): indices = indices.long() device = indices.device point_indices = torch.arange(indices.shape[0], device=device, dtype=torch.int32) output_shape = [batch_size] + list(spatial_shape) v2pinds_tensor = scatter_point_inds(indices, point_indices, output_shape) return v2pinds_tensor from typing import Set try: import spconv.pytorch as spconv except: import spconv as spconv import torch.nn as nn def find_all_spconv_keys(model: nn.Module, prefix="") -> Set[str]: """ Finds all spconv keys that need to have weight's transposed """ found_keys: Set[str] = set() for name, child in model.named_children(): new_prefix = f"{prefix}.{name}" if prefix != "" else name if isinstance(child, spconv.conv.SparseConvolution): new_prefix = f"{new_prefix}.weight" found_keys.add(new_prefix) found_keys.update(find_all_spconv_keys(child, prefix=new_prefix)) return found_keys def replace_feature(out, new_features): if "replace_feature" in out.__dir__(): # spconv 2.x behaviour return out.replace_feature(new_features) else: out.features = new_features return out ================================================ FILE: tools/PENet/dataloaders/transforms.py ================================================ from __future__ import division import torch import math import random from PIL import Image, ImageOps, ImageEnhance try: import accimage except ImportError: accimage = None import numpy as np import numbers import types import collections import warnings import scipy.ndimage.interpolation as itpl import skimage.transform def _is_numpy_image(img): return isinstance(img, np.ndarray) and (img.ndim in {2, 3}) def _is_pil_image(img): if accimage is not None: return isinstance(img, (Image.Image, accimage.Image)) else: return isinstance(img, Image.Image) def _is_tensor_image(img): return torch.is_tensor(img) and img.ndimension() == 3 def adjust_brightness(img, brightness_factor): """Adjust brightness of an Image. Args: img (PIL Image): PIL Image to be adjusted. brightness_factor (float): How much to adjust the brightness. Can be any non negative number. 0 gives a black image, 1 gives the original image while 2 increases the brightness by a factor of 2. Returns: PIL Image: Brightness adjusted image. """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) enhancer = ImageEnhance.Brightness(img) img = enhancer.enhance(brightness_factor) return img def adjust_contrast(img, contrast_factor): """Adjust contrast of an Image. Args: img (PIL Image): PIL Image to be adjusted. contrast_factor (float): How much to adjust the contrast. Can be any non negative number. 0 gives a solid gray image, 1 gives the original image while 2 increases the contrast by a factor of 2. Returns: PIL Image: Contrast adjusted image. """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) enhancer = ImageEnhance.Contrast(img) img = enhancer.enhance(contrast_factor) return img def adjust_saturation(img, saturation_factor): """Adjust color saturation of an image. Args: img (PIL Image): PIL Image to be adjusted. saturation_factor (float): How much to adjust the saturation. 0 will give a black and white image, 1 will give the original image while 2 will enhance the saturation by a factor of 2. Returns: PIL Image: Saturation adjusted image. """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) enhancer = ImageEnhance.Color(img) img = enhancer.enhance(saturation_factor) return img def adjust_hue(img, hue_factor): """Adjust hue of an image. The image hue is adjusted by converting the image to HSV and cyclically shifting the intensities in the hue channel (H). The image is then converted back to original image mode. `hue_factor` is the amount of shift in H channel and must be in the interval `[-0.5, 0.5]`. See https://en.wikipedia.org/wiki/Hue for more details on Hue. Args: img (PIL Image): PIL Image to be adjusted. hue_factor (float): How much to shift the hue channel. Should be in [-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in HSV space in positive and negative direction respectively. 0 means no shift. Therefore, both -0.5 and 0.5 will give an image with complementary colors while 0 gives the original image. Returns: PIL Image: Hue adjusted image. """ if not (-0.5 <= hue_factor <= 0.5): raise ValueError( 'hue_factor is not in [-0.5, 0.5].'.format(hue_factor)) if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) input_mode = img.mode if input_mode in {'L', '1', 'I', 'F'}: return img h, s, v = img.convert('HSV').split() np_h = np.array(h, dtype=np.uint8) # uint8 addition take cares of rotation across boundaries with np.errstate(over='ignore'): np_h += np.uint8(hue_factor * 255) h = Image.fromarray(np_h, 'L') img = Image.merge('HSV', (h, s, v)).convert(input_mode) return img def adjust_gamma(img, gamma, gain=1): """Perform gamma correction on an image. Also known as Power Law Transform. Intensities in RGB mode are adjusted based on the following equation: I_out = 255 * gain * ((I_in / 255) ** gamma) See https://en.wikipedia.org/wiki/Gamma_correction for more details. Args: img (PIL Image): PIL Image to be adjusted. gamma (float): Non negative real number. gamma larger than 1 make the shadows darker, while gamma smaller than 1 make dark regions lighter. gain (float): The constant multiplier. """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) if gamma < 0: raise ValueError('Gamma should be a non-negative real number') input_mode = img.mode img = img.convert('RGB') np_img = np.array(img, dtype=np.float32) np_img = 255 * gain * ((np_img / 255)**gamma) np_img = np.uint8(np.clip(np_img, 0, 255)) img = Image.fromarray(np_img, 'RGB').convert(input_mode) return img class Compose(object): """Composes several transforms together. Args: transforms (list of ``Transform`` objects): list of transforms to compose. Example: >>> transforms.Compose([ >>> transforms.CenterCrop(10), >>> transforms.ToTensor(), >>> ]) """ def __init__(self, transforms): self.transforms = transforms def __call__(self, img): for t in self.transforms: img = t(img) return img class ToTensor(object): """Convert a ``numpy.ndarray`` to tensor. Converts a numpy.ndarray (H x W x C) to a torch.FloatTensor of shape (C x H x W). """ def __call__(self, img): """Convert a ``numpy.ndarray`` to tensor. Args: img (numpy.ndarray): Image to be converted to tensor. Returns: Tensor: Converted image. """ if not (_is_numpy_image(img)): raise TypeError('img should be ndarray. Got {}'.format(type(img))) if isinstance(img, np.ndarray): # handle numpy array if img.ndim == 3: img = torch.from_numpy(img.transpose((2, 0, 1)).copy()) elif img.ndim == 2: img = torch.from_numpy(img.copy()) else: raise RuntimeError( 'img should be ndarray with 2 or 3 dimensions. Got {}'. format(img.ndim)) return img class NormalizeNumpyArray(object): """Normalize a ``numpy.ndarray`` with mean and standard deviation. Given mean: ``(M1,...,Mn)`` and std: ``(M1,..,Mn)`` for ``n`` channels, this transform will normalize each channel of the input ``numpy.ndarray`` i.e. ``input[channel] = (input[channel] - mean[channel]) / std[channel]`` Args: mean (sequence): Sequence of means for each channel. std (sequence): Sequence of standard deviations for each channel. """ def __init__(self, mean, std): self.mean = mean self.std = std def __call__(self, img): """ Args: img (numpy.ndarray): Image of size (H, W, C) to be normalized. Returns: Tensor: Normalized image. """ if not (_is_numpy_image(img)): raise TypeError('img should be ndarray. Got {}'.format(type(img))) # TODO: make efficient print(img.shape) for i in range(3): img[:, :, i] = (img[:, :, i] - self.mean[i]) / self.std[i] return img class NormalizeTensor(object): """Normalize an tensor image with mean and standard deviation. Given mean: ``(M1,...,Mn)`` and std: ``(M1,..,Mn)`` for ``n`` channels, this transform will normalize each channel of the input ``torch.*Tensor`` i.e. ``input[channel] = (input[channel] - mean[channel]) / std[channel]`` Args: mean (sequence): Sequence of means for each channel. std (sequence): Sequence of standard deviations for each channel. """ def __init__(self, mean, std): self.mean = mean self.std = std def __call__(self, tensor): """ Args: tensor (Tensor): Tensor image of size (C, H, W) to be normalized. Returns: Tensor: Normalized Tensor image. """ if not _is_tensor_image(tensor): raise TypeError('tensor is not a torch image.') # TODO: make efficient for t, m, s in zip(tensor, self.mean, self.std): t.sub_(m).div_(s) return tensor class Rotate(object): """Rotates the given ``numpy.ndarray``. Args: angle (float): The rotation angle in degrees. """ def __init__(self, angle): self.angle = angle def __call__(self, img): """ Args: img (numpy.ndarray (C x H x W)): Image to be rotated. Returns: img (numpy.ndarray (C x H x W)): Rotated image. """ # order=0 means nearest-neighbor type interpolation return skimage.transform.rotate(img, self.angle, resize=False, order=0) class Resize(object): """Resize the the given ``numpy.ndarray`` to the given size. Args: size (sequence or int): Desired output size. If size is a sequence like (h, w), output size will be matched to this. If size is an int, smaller edge of the image will be matched to this number. i.e, if height > width, then image will be rescaled to (size * height / width, size) interpolation (int, optional): Desired interpolation. Default is ``PIL.Image.BILINEAR`` """ def __init__(self, size, interpolation='nearest'): assert isinstance(size, float) self.size = size self.interpolation = interpolation def __call__(self, img): """ Args: img (numpy.ndarray (C x H x W)): Image to be scaled. Returns: img (numpy.ndarray (C x H x W)): Rescaled image. """ if img.ndim == 3: return skimage.transform.rescale(img, self.size, order=0) elif img.ndim == 2: return skimage.transform.rescale(img, self.size, order=0) else: RuntimeError( 'img should be ndarray with 2 or 3 dimensions. Got {}'.format( img.ndim)) class CenterCrop(object): """Crops the given ``numpy.ndarray`` at the center. Args: size (sequence or int): Desired output size of the crop. If size is an int instead of sequence like (h, w), a square crop (size, size) is made. """ def __init__(self, size): if isinstance(size, numbers.Number): self.size = (int(size), int(size)) else: self.size = size @staticmethod def get_params(img, output_size): """Get parameters for ``crop`` for center crop. Args: img (numpy.ndarray (C x H x W)): Image to be cropped. output_size (tuple): Expected output size of the crop. Returns: tuple: params (i, j, h, w) to be passed to ``crop`` for center crop. """ h = img.shape[0] w = img.shape[1] th, tw = output_size i = int(round((h - th) / 2.)) j = int(round((w - tw) / 2.)) # # randomized cropping # i = np.random.randint(i-3, i+4) # j = np.random.randint(j-3, j+4) return i, j, th, tw def __call__(self, img): """ Args: img (numpy.ndarray (C x H x W)): Image to be cropped. Returns: img (numpy.ndarray (C x H x W)): Cropped image. """ i, j, h, w = self.get_params(img, self.size) """ i: Upper pixel coordinate. j: Left pixel coordinate. h: Height of the cropped image. w: Width of the cropped image. """ if not (_is_numpy_image(img)): raise TypeError('img should be ndarray. Got {}'.format(type(img))) if img.ndim == 3: return img[i:i + h, j:j + w, :] elif img.ndim == 2: return img[i:i + h, j:j + w] else: raise RuntimeError( 'img should be ndarray with 2 or 3 dimensions. Got {}'.format( img.ndim)) class BottomCrop(object): """Crops the given ``numpy.ndarray`` at the bottom. Args: size (sequence or int): Desired output size of the crop. If size is an int instead of sequence like (h, w), a square crop (size, size) is made. """ def __init__(self, size): if isinstance(size, numbers.Number): self.size = (int(size), int(size)) else: self.size = size @staticmethod def get_params(img, output_size): """Get parameters for ``crop`` for bottom crop. Args: img (numpy.ndarray (C x H x W)): Image to be cropped. output_size (tuple): Expected output size of the crop. Returns: tuple: params (i, j, h, w) to be passed to ``crop`` for bottom crop. """ h = img.shape[0] w = img.shape[1] th, tw = output_size i = h - th j = int(round((w - tw) / 2.)) # randomized left and right cropping # i = np.random.randint(i-3, i+4) # j = np.random.randint(j-1, j+1) return i, j, th, tw def __call__(self, img): """ Args: img (numpy.ndarray (C x H x W)): Image to be cropped. Returns: img (numpy.ndarray (C x H x W)): Cropped image. """ i, j, h, w = self.get_params(img, self.size) """ i: Upper pixel coordinate. j: Left pixel coordinate. h: Height of the cropped image. w: Width of the cropped image. """ if not (_is_numpy_image(img)): raise TypeError('img should be ndarray. Got {}'.format(type(img))) if img.ndim == 3: return img[i:i + h, j:j + w, :] elif img.ndim == 2: return img[i:i + h, j:j + w] else: raise RuntimeError( 'img should be ndarray with 2 or 3 dimensions. Got {}'.format( img.ndim)) class RandomCrop(object): """Crops the given ``numpy.ndarray`` at the bottom. Args: size (sequence or int): Desired output size of the crop. If size is an int instead of sequence like (h, w), a square crop (size, size) is made. """ def __init__(self, size): if isinstance(size, numbers.Number): self.size = (int(size), int(size)) else: self.size = size @staticmethod def get_params(img, output_size): """Get parameters for ``crop`` for bottom crop. Args: img (numpy.ndarray (C x H x W)): Image to be cropped. output_size (tuple): Expected output size of the crop. Returns: tuple: params (i, j, h, w) to be passed to ``crop`` for bottom crop. """ h = img.shape[0] w = img.shape[1] th, tw = output_size # randomized left and right cropping i = np.random.randint(0, h-th+1) j = np.random.randint(0, w-tw+1) return i, j, th, tw def __call__(self, img): """ Args: img (numpy.ndarray (C x H x W)): Image to be cropped. Returns: img (numpy.ndarray (C x H x W)): Cropped image. """ i, j, h, w = self.get_params(img, self.size) """ i: Upper pixel coordinate. j: Left pixel coordinate. h: Height of the cropped image. w: Width of the cropped image. """ if not (_is_numpy_image(img)): raise TypeError('img should be ndarray. Got {}'.format(type(img))) if img.ndim == 3: return img[i:i + h, j:j + w, :] elif img.ndim == 2: return img[i:i + h, j:j + w] else: raise RuntimeError( 'img should be ndarray with 2 or 3 dimensions. Got {}'.format( img.ndim)) class Crop(object): """Crops the given ``numpy.ndarray`` at the center. Args: size (sequence or int): Desired output size of the crop. If size is an int instead of sequence like (h, w), a square crop (size, size) is made. """ def __init__(self, crop): self.crop = crop @staticmethod def get_params(img, crop): """Get parameters for ``crop`` for center crop. Args: img (numpy.ndarray (C x H x W)): Image to be cropped. output_size (tuple): Expected output size of the crop. Returns: tuple: params (i, j, h, w) to be passed to ``crop`` for center crop. """ x_l, x_r, y_b, y_t = crop h = img.shape[0] w = img.shape[1] assert x_l >= 0 and x_l < w assert x_r >= 0 and x_r < w assert y_b >= 0 and y_b < h assert y_t >= 0 and y_t < h assert x_l < x_r and y_b < y_t return x_l, x_r, y_b, y_t def __call__(self, img): """ Args: img (numpy.ndarray (C x H x W)): Image to be cropped. Returns: img (numpy.ndarray (C x H x W)): Cropped image. """ x_l, x_r, y_b, y_t = self.get_params(img, self.crop) """ i: Upper pixel coordinate. j: Left pixel coordinate. h: Height of the cropped image. w: Width of the cropped image. """ if not (_is_numpy_image(img)): raise TypeError('img should be ndarray. Got {}'.format(type(img))) if img.ndim == 3: return img[y_b:y_t, x_l:x_r, :] elif img.ndim == 2: return img[y_b:y_t, x_l:x_r] else: raise RuntimeError( 'img should be ndarray with 2 or 3 dimensions. Got {}'.format( img.ndim)) class Lambda(object): """Apply a user-defined lambda as a transform. Args: lambd (function): Lambda/function to be used for transform. """ def __init__(self, lambd): assert isinstance(lambd, types.LambdaType) self.lambd = lambd def __call__(self, img): return self.lambd(img) class HorizontalFlip(object): """Horizontally flip the given ``numpy.ndarray``. Args: do_flip (boolean): whether or not do horizontal flip. """ def __init__(self, do_flip): self.do_flip = do_flip def __call__(self, img): """ Args: img (numpy.ndarray (C x H x W)): Image to be flipped. Returns: img (numpy.ndarray (C x H x W)): flipped image. """ if not (_is_numpy_image(img)): raise TypeError('img should be ndarray. Got {}'.format(type(img))) if self.do_flip: return np.fliplr(img) else: return img class ColorJitter(object): """Randomly change the brightness, contrast and saturation of an image. Args: brightness (float): How much to jitter brightness. brightness_factor is chosen uniformly from [max(0, 1 - brightness), 1 + brightness]. contrast (float): How much to jitter contrast. contrast_factor is chosen uniformly from [max(0, 1 - contrast), 1 + contrast]. saturation (float): How much to jitter saturation. saturation_factor is chosen uniformly from [max(0, 1 - saturation), 1 + saturation]. hue(float): How much to jitter hue. hue_factor is chosen uniformly from [-hue, hue]. Should be >=0 and <= 0.5. """ def __init__(self, brightness=0, contrast=0, saturation=0, hue=0): transforms = [] transforms.append( Lambda(lambda img: adjust_brightness(img, brightness))) transforms.append(Lambda(lambda img: adjust_contrast(img, contrast))) transforms.append( Lambda(lambda img: adjust_saturation(img, saturation))) transforms.append(Lambda(lambda img: adjust_hue(img, hue))) np.random.shuffle(transforms) self.transform = Compose(transforms) def __call__(self, img): """ Args: img (numpy.ndarray (C x H x W)): Input image. Returns: img (numpy.ndarray (C x H x W)): Color jittered image. """ if not (_is_numpy_image(img)): raise TypeError('img should be ndarray. Got {}'.format(type(img))) pil = Image.fromarray(img) return np.array(self.transform(pil)) ================================================ FILE: tools/PENet/helper.py ================================================ import math import os, time import shutil import torch import csv import vis_utils from metrics import Result fieldnames = [ 'epoch', 'rmse', 'photo', 'mae', 'irmse', 'imae', 'mse', 'absrel', 'lg10', 'silog', 'squared_rel', 'delta1', 'delta2', 'delta3', 'data_time', 'gpu_time' ] class logger: def __init__(self, args, prepare=True): self.args = args output_directory = get_folder_name(args) self.output_directory = output_directory self.best_result = Result() self.best_result.set_to_worst() if not prepare: return if not os.path.exists(output_directory): os.makedirs(output_directory) self.train_csv = os.path.join(output_directory, 'train.csv') self.val_csv = os.path.join(output_directory, 'val.csv') self.best_txt = os.path.join(output_directory, 'best.txt') # backup the source code if args.resume == '': print("=> creating source code backup ...") backup_directory = os.path.join(output_directory, "code_backup") self.backup_directory = backup_directory backup_source_code(backup_directory) # create new csv files with only header with open(self.train_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() with open(self.val_csv, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() print("=> finished creating source code backup.") def conditional_print(self, split, i, epoch, lr, n_set, blk_avg_meter, avg_meter): if (i + 1) % self.args.print_freq == 0: avg = avg_meter.average() blk_avg = blk_avg_meter.average() print('=> output: {}'.format(self.output_directory)) print( '{split} Epoch: {0} [{1}/{2}]\tlr={lr} ' 't_Data={blk_avg.data_time:.3f}({average.data_time:.3f}) ' 't_GPU={blk_avg.gpu_time:.3f}({average.gpu_time:.3f})\n\t' 'RMSE={blk_avg.rmse:.2f}({average.rmse:.2f}) ' 'MAE={blk_avg.mae:.2f}({average.mae:.2f}) ' 'iRMSE={blk_avg.irmse:.2f}({average.irmse:.2f}) ' 'iMAE={blk_avg.imae:.2f}({average.imae:.2f})\n\t' 'silog={blk_avg.silog:.2f}({average.silog:.2f}) ' 'squared_rel={blk_avg.squared_rel:.2f}({average.squared_rel:.2f}) ' 'Delta1={blk_avg.delta1:.3f}({average.delta1:.3f}) ' 'REL={blk_avg.absrel:.3f}({average.absrel:.3f})\n\t' 'Lg10={blk_avg.lg10:.3f}({average.lg10:.3f}) ' 'Photometric={blk_avg.photometric:.3f}({average.photometric:.3f}) ' .format(epoch, i + 1, n_set, lr=lr, blk_avg=blk_avg, average=avg, split=split.capitalize())) blk_avg_meter.reset(False) def conditional_save_info(self, split, average_meter, epoch): avg = average_meter.average() if split == "train": csvfile_name = self.train_csv elif split == "val": csvfile_name = self.val_csv elif split == "eval": eval_filename = os.path.join(self.output_directory, 'eval.txt') self.save_single_txt(eval_filename, avg, epoch) return avg elif "test" in split: return avg else: raise ValueError("wrong split provided to logger") with open(csvfile_name, 'a') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writerow({ 'epoch': epoch, 'rmse': avg.rmse, 'photo': avg.photometric, 'mae': avg.mae, 'irmse': avg.irmse, 'imae': avg.imae, 'mse': avg.mse, 'silog': avg.silog, 'squared_rel': avg.squared_rel, 'absrel': avg.absrel, 'lg10': avg.lg10, 'delta1': avg.delta1, 'delta2': avg.delta2, 'delta3': avg.delta3, 'gpu_time': avg.gpu_time, 'data_time': avg.data_time }) return avg def save_single_txt(self, filename, result, epoch): with open(filename, 'w') as txtfile: txtfile.write( ("rank_metric={}\n" + "epoch={}\n" + "rmse={:.3f}\n" + "mae={:.3f}\n" + "silog={:.3f}\n" + "squared_rel={:.3f}\n" + "irmse={:.3f}\n" + "imae={:.3f}\n" + "mse={:.3f}\n" + "absrel={:.3f}\n" + "lg10={:.3f}\n" + "delta1={:.3f}\n" + "t_gpu={:.4f}").format(self.args.rank_metric, epoch, result.rmse, result.mae, result.silog, result.squared_rel, result.irmse, result.imae, result.mse, result.absrel, result.lg10, result.delta1, result.gpu_time)) def save_best_txt(self, result, epoch): self.save_single_txt(self.best_txt, result, epoch) def _get_img_comparison_name(self, mode, epoch, is_best=False): if mode == 'eval': return self.output_directory + '/comparison_eval.png' if mode == 'val': if is_best: return self.output_directory + '/comparison_best.png' else: return self.output_directory + '/comparison_' + str(epoch) + '.png' def conditional_save_img_comparison(self, mode, i, ele, pred, epoch, predrgb=None, predg=None, extra=None, extra2=None, extrargb=None): # save 8 images for visualization if mode == 'val' or mode == 'eval': skip = 100 if i == 0: self.img_merge = vis_utils.merge_into_row(ele, pred, predrgb, predg, extra, extra2, extrargb) elif i % skip == 0 and i < 8 * skip: row = vis_utils.merge_into_row(ele, pred, predrgb, predg, extra, extra2, extrargb) self.img_merge = vis_utils.add_row(self.img_merge, row) elif i == 8 * skip: filename = self._get_img_comparison_name(mode, epoch) vis_utils.save_image(self.img_merge, filename) def save_img_comparison_as_best(self, mode, epoch): if mode == 'val': filename = self._get_img_comparison_name(mode, epoch, is_best=True) vis_utils.save_image(self.img_merge, filename) def get_ranking_error(self, result): return getattr(result, self.args.rank_metric) def rank_conditional_save_best(self, mode, result, epoch): error = self.get_ranking_error(result) best_error = self.get_ranking_error(self.best_result) is_best = error < best_error if is_best and mode == "val": self.old_best_result = self.best_result self.best_result = result self.save_best_txt(result, epoch) return is_best def conditional_save_pred(self, mode, i, pred, epoch): if ("test" in mode or mode == "eval") and self.args.save_pred: # save images for visualization/ testing image_folder = os.path.join(self.output_directory, mode + "_output") if not os.path.exists(image_folder): os.makedirs(image_folder) img = torch.squeeze(pred.data.cpu()).numpy() filename = os.path.join(image_folder, '{0:010d}.png'.format(i)) vis_utils.save_depth_as_uint16png(img, filename) def conditional_summarize(self, mode, avg, is_best): print("\n*\nSummary of ", mode, "round") print('' 'RMSE={average.rmse:.3f}\n' 'MAE={average.mae:.3f}\n' 'Photo={average.photometric:.3f}\n' 'iRMSE={average.irmse:.3f}\n' 'iMAE={average.imae:.3f}\n' 'squared_rel={average.squared_rel}\n' 'silog={average.silog}\n' 'Delta1={average.delta1:.3f}\n' 'REL={average.absrel:.3f}\n' 'Lg10={average.lg10:.3f}\n' 't_GPU={time:.3f}'.format(average=avg, time=avg.gpu_time)) if is_best and mode == "val": print("New best model by %s (was %.3f)" % (self.args.rank_metric, self.get_ranking_error(self.old_best_result))) elif mode == "val": print("(best %s is %.3f)" % (self.args.rank_metric, self.get_ranking_error(self.best_result))) print("*\n") ignore_hidden = shutil.ignore_patterns(".", "..", ".git*", "*pycache*", "*build", "*.fuse*", "*_drive_*") def backup_source_code(backup_directory): if os.path.exists(backup_directory): shutil.rmtree(backup_directory) shutil.copytree('.', backup_directory, ignore=ignore_hidden) def adjust_learning_rate(lr_init, optimizer, epoch, args): """Sets the learning rate to the initial LR decayed by 10 every 5 epochs""" #lr = lr_init * (0.5**(epoch // 5)) #''' lr = lr_init if (args.network_model == 'pe' and args.freeze_backbone == False): if (epoch >= 10): lr = lr_init * 0.5 if (epoch >= 20): lr = lr_init * 0.1 if (epoch >= 30): lr = lr_init * 0.01 if (epoch >= 40): lr = lr_init * 0.0005 if (epoch >= 50): lr = lr_init * 0.00001 else: if (epoch >= 10): lr = lr_init * 0.5 if (epoch >= 15): lr = lr_init * 0.1 if (epoch >= 25): lr = lr_init * 0.01 #''' for param_group in optimizer.param_groups: param_group['lr'] = lr return lr def save_checkpoint(state, is_best, epoch, output_directory): checkpoint_filename = os.path.join(output_directory, 'checkpoint-' + str(epoch) + '.pth.tar') torch.save(state, checkpoint_filename) if is_best: best_filename = os.path.join(output_directory, 'model_best.pth.tar') shutil.copyfile(checkpoint_filename, best_filename) if epoch > 0: prev_checkpoint_filename = os.path.join( output_directory, 'checkpoint-' + str(epoch - 1) + '.pth.tar') if os.path.exists(prev_checkpoint_filename): os.remove(prev_checkpoint_filename) def get_folder_name(args): current_time = time.strftime('%Y-%m-%d@%H-%M') return os.path.join(args.result, 'input={}.criterion={}.lr={}.bs={}.wd={}.jitter={}.time={}'. format(args.input, args.criterion, \ args.lr, args.batch_size, args.weight_decay, \ args.jitter, current_time )) avgpool = torch.nn.AvgPool2d(kernel_size=2, stride=2).cuda() def multiscale(img): img1 = avgpool(img) img2 = avgpool(img1) img3 = avgpool(img2) img4 = avgpool(img3) img5 = avgpool(img4) return img5, img4, img3, img2, img1 ================================================ FILE: tools/PENet/main.py ================================================ import argparse import os #os.environ["CUDA_VISIBLE_DEVICES"] = '1' import torch import torch.nn.parallel import torch.optim import torch.utils.data import time from dataloaders.kitti_loader import load_calib, input_options, KittiDepth from metrics import AverageMeter, Result import criteria import helper import vis_utils from model import ENet from model import PENet_C1_train from model import PENet_C2_train #from model import PENet_C4_train (Not Implemented) from model import PENet_C1 from model import PENet_C2 from model import PENet_C4 import time parser = argparse.ArgumentParser(description='Sparse-to-Dense') parser.add_argument('-n', '--network-model', type=str, default="pe", choices=["e", "pe"], help='choose a model: enet or penet' ) parser.add_argument('--workers', default=4, type=int, metavar='N', help='number of data loading workers (default: 4)') parser.add_argument('--epochs', default=100, type=int, metavar='N', help='number of total epochs to run (default: 100)') parser.add_argument('--start-epoch', default=0, type=int, metavar='N', help='manual epoch number (useful on restarts)') parser.add_argument('--start-epoch-bias', default=0, type=int, metavar='N', help='manual epoch number bias(useful on restarts)') parser.add_argument('-c', '--criterion', metavar='LOSS', default='l2', choices=criteria.loss_names, help='loss function: | '.join(criteria.loss_names) + ' (default: l2)') parser.add_argument('-b', '--batch-size', default=1, type=int, help='mini-batch size (default: 1)') parser.add_argument('--lr', '--learning-rate', default=1e-3, type=float, metavar='LR', help='initial learning rate (default 1e-5)') parser.add_argument('--weight-decay', '--wd', default=1e-6, type=float, metavar='W', help='weight decay (default: 0)') parser.add_argument('--print-freq', '-p', default=10, type=int, metavar='N', help='print frequency (default: 10)') parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('--data-folder', default='/data/dataset/kitti_depth/depth', type=str, metavar='PATH', help='data folder (default: none)') parser.add_argument('--data-folder-rgb', default='/data/dataset/kitti_raw', type=str, metavar='PATH', help='data folder rgb (default: none)') parser.add_argument('--data-folder-save', default='', type=str, metavar='PATH', help='data folder test results(default: none)') parser.add_argument('--detpath', default='../../data/kitti/training', type=str, metavar='PATH', help='data folder of 3D object detection') parser.add_argument('-i', '--input', type=str, default='rgbd', choices=input_options, help='input: | '.join(input_options)) parser.add_argument('--val', type=str, default="select", choices=["select", "full"], help='full or select validation set') parser.add_argument('--jitter', type=float, default=0.1, help='color jitter for images') parser.add_argument('--rank-metric', type=str, default='rmse', choices=[m for m in dir(Result()) if not m.startswith('_')], help='metrics for which best result is saved') parser.add_argument('-e', '--evaluate', default='pe.pth.tar', type=str, metavar='PATH') parser.add_argument('-f', '--freeze-backbone', action="store_true", default=False, help='freeze parameters in backbone') parser.add_argument('--test', action="store_true", default=True, help='save result kitti test dataset for submission') parser.add_argument('--cpu', action="store_true", default=False, help='run on cpu') #random cropping parser.add_argument('--not-random-crop', action="store_true", default=False, help='prohibit random cropping') parser.add_argument('-he', '--random-crop-height', default=320, type=int, metavar='N', help='random crop height') parser.add_argument('-w', '--random-crop-width', default=1216, type=int, metavar='N', help='random crop height') #geometric encoding parser.add_argument('-co', '--convolutional-layer-encoding', default="xyz", type=str, choices=["std", "z", "uv", "xyz"], help='information concatenated in encoder convolutional layers') #dilated rate of DA-CSPN++ parser.add_argument('-d', '--dilation-rate', default="2", type=int, choices=[1, 2, 4], help='CSPN++ dilation rate') args = parser.parse_args() args.result = os.path.join('..', 'results') args.use_rgb = ('rgb' in args.input) args.use_d = 'd' in args.input args.use_g = 'g' in args.input args.val_h = 352 args.val_w = 1216 print(args) cuda = torch.cuda.is_available() and not args.cpu if cuda: import torch.backends.cudnn as cudnn cudnn.benchmark = True device = torch.device("cuda") else: device = torch.device("cpu") print("=> using '{}' for computation.".format(device)) # define loss functions depth_criterion = criteria.MaskedMSELoss() if ( args.criterion == 'l2') else criteria.MaskedL1Loss() #multi batch multi_batch_size = 1 def iterate(mode, args, loader, model, optimizer, logger, epoch): actual_epoch = epoch - args.start_epoch + args.start_epoch_bias block_average_meter = AverageMeter() block_average_meter.reset(False) average_meter = AverageMeter() meters = [block_average_meter, average_meter] # switch to appropriate mode assert mode in ["train", "val", "eval", "test_prediction", "test_completion"], \ "unsupported mode: {}".format(mode) if mode == 'train': model.train() lr = helper.adjust_learning_rate(args.lr, optimizer, actual_epoch, args) else: model.eval() lr = 0 torch.cuda.empty_cache() for i, batch_data in enumerate(loader): dstart = time.time() batch_data = { key: val.to(device) for key, val in batch_data.items() if val is not None } gt = batch_data[ 'gt'] if mode != 'test_prediction' and mode != 'test_completion' else None data_time = time.time() - dstart pred = None start = None gpu_time = 0 #start = time.time() #pred = model(batch_data) #gpu_time = time.time() - start #''' if(args.network_model == 'e'): start = time.time() st1_pred, st2_pred, pred = model(batch_data) else: start = time.time() pred = model(batch_data) if(args.evaluate): gpu_time = time.time() - start #''' depth_loss, photometric_loss, smooth_loss, mask = 0, 0, 0, None # inter loss_param st1_loss, st2_loss, loss = 0, 0, 0 w_st1, w_st2 = 0, 0 round1, round2, round3 = 1, 3, None if(actual_epoch <= round1): w_st1, w_st2 = 0.2, 0.2 elif(actual_epoch <= round2): w_st1, w_st2 = 0.05, 0.05 else: w_st1, w_st2 = 0, 0 if mode == 'train': # Loss 1: the direct depth supervision from ground truth label # mask=1 indicates that a pixel does not ground truth labels depth_loss = depth_criterion(pred, gt) if args.network_model == 'e': st1_loss = depth_criterion(st1_pred, gt) st2_loss = depth_criterion(st2_pred, gt) loss = (1 - w_st1 - w_st2) * depth_loss + w_st1 * st1_loss + w_st2 * st2_loss else: loss = depth_loss if i % multi_batch_size == 0: optimizer.zero_grad() loss.backward() if i % multi_batch_size == (multi_batch_size-1) or i==(len(loader)-1): optimizer.step() print("loss:", loss, " epoch:", epoch, " ", i, "/", len(loader)) if mode == "test_completion": vis_utils.save_depth_as_points(pred, i, args.detpath) if(not args.evaluate): gpu_time = time.time() - start # measure accuracy and record loss with torch.no_grad(): mini_batch_size = next(iter(batch_data.values())).size(0) result = Result() if mode != 'test_prediction' and mode != 'test_completion': result.evaluate(pred.data, gt.data, photometric_loss) [ m.update(result, gpu_time, data_time, mini_batch_size) for m in meters ] if mode != 'train': logger.conditional_print(mode, i, epoch, lr, len(loader), block_average_meter, average_meter) logger.conditional_save_img_comparison(mode, i, batch_data, pred, epoch) logger.conditional_save_pred(mode, i, pred, epoch) end_time = time.time()-dstart print('iter: ', i,' ', 'remain time:', (len(loader)-i)*end_time//60, 'min') avg = logger.conditional_save_info(mode, average_meter, epoch) is_best = logger.rank_conditional_save_best(mode, avg, epoch) if is_best and not (mode == "train"): logger.save_img_comparison_as_best(mode, epoch) logger.conditional_summarize(mode, avg, is_best) return avg, is_best def main(): global args checkpoint = None is_eval = False if args.evaluate: args_new = args if os.path.isfile(args.evaluate): print("=> loading checkpoint '{}' ... ".format(args.evaluate), end='') checkpoint = torch.load(args.evaluate, map_location=device) #args = checkpoint['args'] args.start_epoch = checkpoint['epoch'] + 1 args.data_folder = args_new.data_folder args.val = args_new.val is_eval = True print("Completed.") else: is_eval = True print("No model found at '{}'".format(args.evaluate)) #return elif args.resume: # optionally resume from a checkpoint args_new = args if os.path.isfile(args.resume): print("=> loading checkpoint '{}' ... ".format(args.resume), end='') checkpoint = torch.load(args.resume, map_location=device) args.start_epoch = checkpoint['epoch'] + 1 args.data_folder = args_new.data_folder args.val = args_new.val print("Completed. Resuming from epoch {}.".format( checkpoint['epoch'])) else: print("No checkpoint found at '{}'".format(args.resume)) return print("=> creating model and optimizer ... ", end='') model = None penet_accelerated = False if (args.network_model == 'e'): model = ENet(args).to(device) elif (is_eval == False): if (args.dilation_rate == 1): model = PENet_C1_train(args).to(device) elif (args.dilation_rate == 2): model = PENet_C2_train(args).to(device) elif (args.dilation_rate == 4): model = PENet_C4(args).to(device) penet_accelerated = True else: if (args.dilation_rate == 1): model = PENet_C1(args).to(device) penet_accelerated = True elif (args.dilation_rate == 2): model = PENet_C2(args).to(device) penet_accelerated = True elif (args.dilation_rate == 4): model = PENet_C4(args).to(device) penet_accelerated = True if (penet_accelerated == True): model.encoder3.requires_grad = False model.encoder5.requires_grad = False model.encoder7.requires_grad = False model_named_params = None model_bone_params = None model_new_params = None optimizer = None if checkpoint is not None: #print(checkpoint.keys()) if (args.freeze_backbone == True): model.backbone.load_state_dict(checkpoint['model']) else: model.load_state_dict(checkpoint['model'], strict=False) #optimizer.load_state_dict(checkpoint['optimizer']) print("=> checkpoint state loaded.") logger = helper.logger(args) if checkpoint is not None: logger.best_result = checkpoint['best_result'] del checkpoint print("=> logger created.") test_dataset = None test_loader = None if (args.test): test_dataset = KittiDepth('test_completion', args) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=True) iterate("test_completion", args, test_loader, model, None, logger, 0) return val_dataset = KittiDepth('val', args) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=1, shuffle=False, num_workers=2, pin_memory=True) # set batch size to be 1 for validation print("\t==> val_loader size:{}".format(len(val_loader))) if is_eval == True: for p in model.parameters(): p.requires_grad = False result, is_best = iterate("val", args, val_loader, model, None, logger, args.start_epoch - 1) return if (args.freeze_backbone == True): for p in model.backbone.parameters(): p.requires_grad = False model_named_params = [ p for _, p in model.named_parameters() if p.requires_grad ] optimizer = torch.optim.Adam(model_named_params, lr=args.lr, weight_decay=args.weight_decay, betas=(0.9, 0.99)) elif (args.network_model == 'pe'): model_bone_params = [ p for _, p in model.backbone.named_parameters() if p.requires_grad ] model_new_params = [ p for _, p in model.named_parameters() if p.requires_grad ] model_new_params = list(set(model_new_params) - set(model_bone_params)) optimizer = torch.optim.Adam([{'params': model_bone_params, 'lr': args.lr / 10}, {'params': model_new_params}], lr=args.lr, weight_decay=args.weight_decay, betas=(0.9, 0.99)) else: model_named_params = [ p for _, p in model.named_parameters() if p.requires_grad ] optimizer = torch.optim.Adam(model_named_params, lr=args.lr, weight_decay=args.weight_decay, betas=(0.9, 0.99)) print("completed.") model = torch.nn.DataParallel(model) # Data loading code print("=> creating data loaders ... ") if not is_eval: train_dataset = KittiDepth('train', args) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, sampler=None) print("\t==> train_loader size:{}".format(len(train_loader))) print("=> starting main loop ...") for epoch in range(args.start_epoch, args.epochs): print("=> starting training epoch {} ..".format(epoch)) iterate("train", args, train_loader, model, optimizer, logger, epoch) # train for one epoch # validation memory reset for p in model.parameters(): p.requires_grad = False result, is_best = iterate("val", args, val_loader, model, None, logger, epoch) # evaluate on validation set for p in model.parameters(): p.requires_grad = True if (args.freeze_backbone == True): for p in model.module.backbone.parameters(): p.requires_grad = False if (penet_accelerated == True): model.module.encoder3.requires_grad = False model.module.encoder5.requires_grad = False model.module.encoder7.requires_grad = False helper.save_checkpoint({ # save checkpoint 'epoch': epoch, 'model': model.module.state_dict(), 'best_result': logger.best_result, 'optimizer' : optimizer.state_dict(), 'args' : args, }, is_best, epoch, logger.output_directory) if __name__ == '__main__': main() ================================================ FILE: tools/PENet/metrics.py ================================================ import torch import math import numpy as np lg_e_10 = math.log(10) def log10(x): """Convert a new tensor with the base-10 logarithm of the elements of x. """ return torch.log(x) / lg_e_10 class Result(object): def __init__(self): self.irmse = 0 self.imae = 0 self.mse = 0 self.rmse = 0 self.mae = 0 self.absrel = 0 self.squared_rel = 0 self.lg10 = 0 self.delta1 = 0 self.delta2 = 0 self.delta3 = 0 self.data_time = 0 self.gpu_time = 0 self.silog = 0 # Scale invariant logarithmic error [log(m)*100] self.photometric = 0 def set_to_worst(self): self.irmse = np.inf self.imae = np.inf self.mse = np.inf self.rmse = np.inf self.mae = np.inf self.absrel = np.inf self.squared_rel = np.inf self.lg10 = np.inf self.silog = np.inf self.delta1 = 0 self.delta2 = 0 self.delta3 = 0 self.data_time = 0 self.gpu_time = 0 def update(self, irmse, imae, mse, rmse, mae, absrel, squared_rel, lg10, \ delta1, delta2, delta3, gpu_time, data_time, silog, photometric=0): self.irmse = irmse self.imae = imae self.mse = mse self.rmse = rmse self.mae = mae self.absrel = absrel self.squared_rel = squared_rel self.lg10 = lg10 self.delta1 = delta1 self.delta2 = delta2 self.delta3 = delta3 self.data_time = data_time self.gpu_time = gpu_time self.silog = silog self.photometric = photometric def evaluate(self, output, target, photometric=0): valid_mask = target > 0.1 # convert from meters to mm output_mm = 1e3 * output[valid_mask] target_mm = 1e3 * target[valid_mask] abs_diff = (output_mm - target_mm).abs() self.mse = float((torch.pow(abs_diff, 2)).mean()) self.rmse = math.sqrt(self.mse) self.mae = float(abs_diff.mean()) self.lg10 = float((log10(output_mm) - log10(target_mm)).abs().mean()) self.absrel = float((abs_diff / target_mm).mean()) self.squared_rel = float(((abs_diff / target_mm)**2).mean()) maxRatio = torch.max(output_mm / target_mm, target_mm / output_mm) self.delta1 = float((maxRatio < 1.25).float().mean()) self.delta2 = float((maxRatio < 1.25**2).float().mean()) self.delta3 = float((maxRatio < 1.25**3).float().mean()) self.data_time = 0 self.gpu_time = 0 # silog uses meters err_log = torch.log(target[valid_mask]) - torch.log(output[valid_mask]) normalized_squared_log = (err_log**2).mean() log_mean = err_log.mean() self.silog = math.sqrt(normalized_squared_log - log_mean * log_mean) * 100 # convert from meters to km inv_output_km = (1e-3 * output[valid_mask])**(-1) inv_target_km = (1e-3 * target[valid_mask])**(-1) abs_inv_diff = (inv_output_km - inv_target_km).abs() self.irmse = math.sqrt((torch.pow(abs_inv_diff, 2)).mean()) self.imae = float(abs_inv_diff.mean()) self.photometric = float(photometric) class AverageMeter(object): def __init__(self): self.reset(time_stable=True) def reset(self, time_stable): self.count = 0.0 self.sum_irmse = 0 self.sum_imae = 0 self.sum_mse = 0 self.sum_rmse = 0 self.sum_mae = 0 self.sum_absrel = 0 self.sum_squared_rel = 0 self.sum_lg10 = 0 self.sum_delta1 = 0 self.sum_delta2 = 0 self.sum_delta3 = 0 self.sum_data_time = 0 self.sum_gpu_time = 0 self.sum_photometric = 0 self.sum_silog = 0 self.time_stable = time_stable self.time_stable_counter_init = 10 self.time_stable_counter = self.time_stable_counter_init def update(self, result, gpu_time, data_time, n=1): self.count += n self.sum_irmse += n * result.irmse self.sum_imae += n * result.imae self.sum_mse += n * result.mse self.sum_rmse += n * result.rmse self.sum_mae += n * result.mae self.sum_absrel += n * result.absrel self.sum_squared_rel += n * result.squared_rel self.sum_lg10 += n * result.lg10 self.sum_delta1 += n * result.delta1 self.sum_delta2 += n * result.delta2 self.sum_delta3 += n * result.delta3 self.sum_data_time += n * data_time if self.time_stable == True and self.time_stable_counter > 0: self.time_stable_counter = self.time_stable_counter - 1 else: self.sum_gpu_time += n * gpu_time self.sum_silog += n * result.silog self.sum_photometric += n * result.photometric def average(self): avg = Result() if self.time_stable == True: if self.count > 0 and self.count - self.time_stable_counter_init > 0: avg.update( self.sum_irmse / self.count, self.sum_imae / self.count, self.sum_mse / self.count, self.sum_rmse / self.count, self.sum_mae / self.count, self.sum_absrel / self.count, self.sum_squared_rel / self.count, self.sum_lg10 / self.count, self.sum_delta1 / self.count, self.sum_delta2 / self.count, self.sum_delta3 / self.count, self.sum_gpu_time / (self.count - self.time_stable_counter_init), self.sum_data_time / self.count, self.sum_silog / self.count, self.sum_photometric / self.count) elif self.count > 0: avg.update( self.sum_irmse / self.count, self.sum_imae / self.count, self.sum_mse / self.count, self.sum_rmse / self.count, self.sum_mae / self.count, self.sum_absrel / self.count, self.sum_squared_rel / self.count, self.sum_lg10 / self.count, self.sum_delta1 / self.count, self.sum_delta2 / self.count, self.sum_delta3 / self.count, 0, self.sum_data_time / self.count, self.sum_silog / self.count, self.sum_photometric / self.count) elif self.count > 0: avg.update( self.sum_irmse / self.count, self.sum_imae / self.count, self.sum_mse / self.count, self.sum_rmse / self.count, self.sum_mae / self.count, self.sum_absrel / self.count, self.sum_squared_rel / self.count, self.sum_lg10 / self.count, self.sum_delta1 / self.count, self.sum_delta2 / self.count, self.sum_delta3 / self.count, self.sum_gpu_time / self.count, self.sum_data_time / self.count, self.sum_silog / self.count, self.sum_photometric / self.count) return avg ================================================ FILE: tools/PENet/model.py ================================================ from basic import * class ENet(nn.Module): def __init__(self, args): super(ENet, self).__init__() self.args = args self.geofeature = None self.geoplanes = 3 if self.args.convolutional_layer_encoding == "xyz": self.geofeature = GeometryFeature() elif self.args.convolutional_layer_encoding == "std": self.geoplanes = 0 elif self.args.convolutional_layer_encoding == "uv": self.geoplanes = 2 elif self.args.convolutional_layer_encoding == "z": self.geoplanes = 1 # rgb encoder self.rgb_conv_init = convbnrelu(in_channels=4, out_channels=32, kernel_size=5, stride=1, padding=2) self.rgb_encoder_layer1 = BasicBlockGeo(inplanes=32, planes=64, stride=2, geoplanes=self.geoplanes) self.rgb_encoder_layer2 = BasicBlockGeo(inplanes=64, planes=64, stride=1, geoplanes=self.geoplanes) self.rgb_encoder_layer3 = BasicBlockGeo(inplanes=64, planes=128, stride=2, geoplanes=self.geoplanes) self.rgb_encoder_layer4 = BasicBlockGeo(inplanes=128, planes=128, stride=1, geoplanes=self.geoplanes) self.rgb_encoder_layer5 = BasicBlockGeo(inplanes=128, planes=256, stride=2, geoplanes=self.geoplanes) self.rgb_encoder_layer6 = BasicBlockGeo(inplanes=256, planes=256, stride=1, geoplanes=self.geoplanes) self.rgb_encoder_layer7 = BasicBlockGeo(inplanes=256, planes=512, stride=2, geoplanes=self.geoplanes) self.rgb_encoder_layer8 = BasicBlockGeo(inplanes=512, planes=512, stride=1, geoplanes=self.geoplanes) self.rgb_encoder_layer9 = BasicBlockGeo(inplanes=512, planes=1024, stride=2, geoplanes=self.geoplanes) self.rgb_encoder_layer10 = BasicBlockGeo(inplanes=1024, planes=1024, stride=1, geoplanes=self.geoplanes) self.rgb_decoder_layer8 = deconvbnrelu(in_channels=1024, out_channels=512, kernel_size=5, stride=2, padding=2, output_padding=1) self.rgb_decoder_layer6 = deconvbnrelu(in_channels=512, out_channels=256, kernel_size=5, stride=2, padding=2, output_padding=1) self.rgb_decoder_layer4 = deconvbnrelu(in_channels=256, out_channels=128, kernel_size=5, stride=2, padding=2, output_padding=1) self.rgb_decoder_layer2 = deconvbnrelu(in_channels=128, out_channels=64, kernel_size=5, stride=2, padding=2, output_padding=1) self.rgb_decoder_layer0 = deconvbnrelu(in_channels=64, out_channels=32, kernel_size=5, stride=2, padding=2, output_padding=1) self.rgb_decoder_output = deconvbnrelu(in_channels=32, out_channels=2, kernel_size=3, stride=1, padding=1, output_padding=0) # depth encoder self.depth_conv_init = convbnrelu(in_channels=2, out_channels=32, kernel_size=5, stride=1, padding=2) self.depth_layer1 = BasicBlockGeo(inplanes=32, planes=64, stride=2, geoplanes=self.geoplanes) self.depth_layer2 = BasicBlockGeo(inplanes=64, planes=64, stride=1, geoplanes=self.geoplanes) self.depth_layer3 = BasicBlockGeo(inplanes=128, planes=128, stride=2, geoplanes=self.geoplanes) self.depth_layer4 = BasicBlockGeo(inplanes=128, planes=128, stride=1, geoplanes=self.geoplanes) self.depth_layer5 = BasicBlockGeo(inplanes=256, planes=256, stride=2, geoplanes=self.geoplanes) self.depth_layer6 = BasicBlockGeo(inplanes=256, planes=256, stride=1, geoplanes=self.geoplanes) self.depth_layer7 = BasicBlockGeo(inplanes=512, planes=512, stride=2, geoplanes=self.geoplanes) self.depth_layer8 = BasicBlockGeo(inplanes=512, planes=512, stride=1, geoplanes=self.geoplanes) self.depth_layer9 = BasicBlockGeo(inplanes=1024, planes=1024, stride=2, geoplanes=self.geoplanes) self.depth_layer10 = BasicBlockGeo(inplanes=1024, planes=1024, stride=1, geoplanes=self.geoplanes) # decoder self.decoder_layer1 = deconvbnrelu(in_channels=1024, out_channels=512, kernel_size=5, stride=2, padding=2, output_padding=1) self.decoder_layer2 = deconvbnrelu(in_channels=512, out_channels=256, kernel_size=5, stride=2, padding=2, output_padding=1) self.decoder_layer3 = deconvbnrelu(in_channels=256, out_channels=128, kernel_size=5, stride=2, padding=2, output_padding=1) self.decoder_layer4 = deconvbnrelu(in_channels=128, out_channels=64, kernel_size=5, stride=2, padding=2, output_padding=1) self.decoder_layer5 = deconvbnrelu(in_channels=64, out_channels=32, kernel_size=5, stride=2, padding=2, output_padding=1) self.decoder_layer6 = convbnrelu(in_channels=32, out_channels=2, kernel_size=3, stride=1, padding=1) self.softmax = nn.Softmax(dim=1) self.pooling = nn.AvgPool2d(kernel_size=2) self.sparsepooling = SparseDownSampleClose(stride=2) weights_init(self) def forward(self, input): #independent input rgb = input['rgb'] d = input['d'] position = input['position'] K = input['K'] unorm = position[:, 0:1, :, :] vnorm = position[:, 1:2, :, :] f352 = K[:, 1, 1] f352 = f352.unsqueeze(1) f352 = f352.unsqueeze(2) f352 = f352.unsqueeze(3) c352 = K[:, 1, 2] c352 = c352.unsqueeze(1) c352 = c352.unsqueeze(2) c352 = c352.unsqueeze(3) f1216 = K[:, 0, 0] f1216 = f1216.unsqueeze(1) f1216 = f1216.unsqueeze(2) f1216 = f1216.unsqueeze(3) c1216 = K[:, 0, 2] c1216 = c1216.unsqueeze(1) c1216 = c1216.unsqueeze(2) c1216 = c1216.unsqueeze(3) vnorm_s2 = self.pooling(vnorm) vnorm_s3 = self.pooling(vnorm_s2) vnorm_s4 = self.pooling(vnorm_s3) vnorm_s5 = self.pooling(vnorm_s4) vnorm_s6 = self.pooling(vnorm_s5) unorm_s2 = self.pooling(unorm) unorm_s3 = self.pooling(unorm_s2) unorm_s4 = self.pooling(unorm_s3) unorm_s5 = self.pooling(unorm_s4) unorm_s6 = self.pooling(unorm_s5) valid_mask = torch.where(d>0, torch.full_like(d, 1.0), torch.full_like(d, 0.0)) d_s2, vm_s2 = self.sparsepooling(d, valid_mask) d_s3, vm_s3 = self.sparsepooling(d_s2, vm_s2) d_s4, vm_s4 = self.sparsepooling(d_s3, vm_s3) d_s5, vm_s5 = self.sparsepooling(d_s4, vm_s4) d_s6, vm_s6 = self.sparsepooling(d_s5, vm_s5) geo_s1 = None geo_s2 = None geo_s3 = None geo_s4 = None geo_s5 = None geo_s6 = None if self.args.convolutional_layer_encoding == "xyz": geo_s1 = self.geofeature(d, vnorm, unorm, 352, 1216, c352, c1216, f352, f1216) geo_s2 = self.geofeature(d_s2, vnorm_s2, unorm_s2, 352 / 2, 1216 / 2, c352, c1216, f352, f1216) geo_s3 = self.geofeature(d_s3, vnorm_s3, unorm_s3, 352 / 4, 1216 / 4, c352, c1216, f352, f1216) geo_s4 = self.geofeature(d_s4, vnorm_s4, unorm_s4, 352 / 8, 1216 / 8, c352, c1216, f352, f1216) geo_s5 = self.geofeature(d_s5, vnorm_s5, unorm_s5, 352 / 16, 1216 / 16, c352, c1216, f352, f1216) geo_s6 = self.geofeature(d_s6, vnorm_s6, unorm_s6, 352 / 32, 1216 / 32, c352, c1216, f352, f1216) elif self.args.convolutional_layer_encoding == "uv": geo_s1 = torch.cat((vnorm, unorm), dim=1) geo_s2 = torch.cat((vnorm_s2, unorm_s2), dim=1) geo_s3 = torch.cat((vnorm_s3, unorm_s3), dim=1) geo_s4 = torch.cat((vnorm_s4, unorm_s4), dim=1) geo_s5 = torch.cat((vnorm_s5, unorm_s5), dim=1) geo_s6 = torch.cat((vnorm_s6, unorm_s6), dim=1) elif self.args.convolutional_layer_encoding == "z": geo_s1 = d geo_s2 = d_s2 geo_s3 = d_s3 geo_s4 = d_s4 geo_s5 = d_s5 geo_s6 = d_s6 #embeded input #rgb = input[:, 0:3, :, :] #d = input[:, 3:4, :, :] # b 1 352 1216 rgb_feature = self.rgb_conv_init(torch.cat((rgb, d), dim=1)) rgb_feature1 = self.rgb_encoder_layer1(rgb_feature, geo_s1, geo_s2) # b 32 176 608 rgb_feature2 = self.rgb_encoder_layer2(rgb_feature1, geo_s2, geo_s2) # b 32 176 608 rgb_feature3 = self.rgb_encoder_layer3(rgb_feature2, geo_s2, geo_s3) # b 64 88 304 rgb_feature4 = self.rgb_encoder_layer4(rgb_feature3, geo_s3, geo_s3) # b 64 88 304 rgb_feature5 = self.rgb_encoder_layer5(rgb_feature4, geo_s3, geo_s4) # b 128 44 152 rgb_feature6 = self.rgb_encoder_layer6(rgb_feature5, geo_s4, geo_s4) # b 128 44 152 rgb_feature7 = self.rgb_encoder_layer7(rgb_feature6, geo_s4, geo_s5) # b 256 22 76 rgb_feature8 = self.rgb_encoder_layer8(rgb_feature7, geo_s5, geo_s5) # b 256 22 76 rgb_feature9 = self.rgb_encoder_layer9(rgb_feature8, geo_s5, geo_s6) # b 512 11 38 rgb_feature10 = self.rgb_encoder_layer10(rgb_feature9, geo_s6, geo_s6) # b 512 11 38 rgb_feature_decoder8 = self.rgb_decoder_layer8(rgb_feature10) rgb_feature8_plus = rgb_feature_decoder8 + rgb_feature8 rgb_feature_decoder6 = self.rgb_decoder_layer6(rgb_feature8_plus) rgb_feature6_plus = rgb_feature_decoder6 + rgb_feature6 rgb_feature_decoder4 = self.rgb_decoder_layer4(rgb_feature6_plus) rgb_feature4_plus = rgb_feature_decoder4 + rgb_feature4 rgb_feature_decoder2 = self.rgb_decoder_layer2(rgb_feature4_plus) rgb_feature2_plus = rgb_feature_decoder2 + rgb_feature2 # b 32 176 608 rgb_feature_decoder0 = self.rgb_decoder_layer0(rgb_feature2_plus) rgb_feature0_plus = rgb_feature_decoder0 + rgb_feature rgb_output = self.rgb_decoder_output(rgb_feature0_plus) rgb_depth = rgb_output[:, 0:1, :, :] rgb_conf = rgb_output[:, 1:2, :, :] # ----------------------------------------------------------------------- # mask = torch.where(d>0, torch.full_like(d, 1.0), torch.full_like(d, 0.0)) # input = torch.cat([d, mask], 1) sparsed_feature = self.depth_conv_init(torch.cat((d, rgb_depth), dim=1)) sparsed_feature1 = self.depth_layer1(sparsed_feature, geo_s1, geo_s2)# b 32 176 608 sparsed_feature2 = self.depth_layer2(sparsed_feature1, geo_s2, geo_s2) # b 32 176 608 sparsed_feature2_plus = torch.cat([rgb_feature2_plus, sparsed_feature2], 1) sparsed_feature3 = self.depth_layer3(sparsed_feature2_plus, geo_s2, geo_s3) # b 64 88 304 sparsed_feature4 = self.depth_layer4(sparsed_feature3, geo_s3, geo_s3) # b 64 88 304 sparsed_feature4_plus = torch.cat([rgb_feature4_plus, sparsed_feature4], 1) sparsed_feature5 = self.depth_layer5(sparsed_feature4_plus, geo_s3, geo_s4) # b 128 44 152 sparsed_feature6 = self.depth_layer6(sparsed_feature5, geo_s4, geo_s4) # b 128 44 152 sparsed_feature6_plus = torch.cat([rgb_feature6_plus, sparsed_feature6], 1) sparsed_feature7 = self.depth_layer7(sparsed_feature6_plus, geo_s4, geo_s5) # b 256 22 76 sparsed_feature8 = self.depth_layer8(sparsed_feature7, geo_s5, geo_s5) # b 256 22 76 sparsed_feature8_plus = torch.cat([rgb_feature8_plus, sparsed_feature8], 1) sparsed_feature9 = self.depth_layer9(sparsed_feature8_plus, geo_s5, geo_s6) # b 512 11 38 sparsed_feature10 = self.depth_layer10(sparsed_feature9, geo_s6, geo_s6) # b 512 11 38 # ----------------------------------------------------------------------------------------- fusion1 = rgb_feature10 + sparsed_feature10 decoder_feature1 = self.decoder_layer1(fusion1) fusion2 = sparsed_feature8 + decoder_feature1 decoder_feature2 = self.decoder_layer2(fusion2) fusion3 = sparsed_feature6 + decoder_feature2 decoder_feature3 = self.decoder_layer3(fusion3) fusion4 = sparsed_feature4 + decoder_feature3 decoder_feature4 = self.decoder_layer4(fusion4) fusion5 = sparsed_feature2 + decoder_feature4 decoder_feature5 = self.decoder_layer5(fusion5) depth_output = self.decoder_layer6(decoder_feature5) d_depth, d_conf = torch.chunk(depth_output, 2, dim=1) rgb_conf, d_conf = torch.chunk(self.softmax(torch.cat((rgb_conf, d_conf), dim=1)), 2, dim=1) output = rgb_conf*rgb_depth + d_conf*d_depth if(self.args.network_model == 'e'): return rgb_depth, d_depth, output elif(self.args.dilation_rate == 1): return torch.cat((rgb_feature0_plus, decoder_feature5),1), output elif (self.args.dilation_rate == 2): return torch.cat((rgb_feature0_plus, decoder_feature5), 1), torch.cat((rgb_feature2_plus, decoder_feature4),1), output elif (self.args.dilation_rate == 4): return torch.cat((rgb_feature0_plus, decoder_feature5), 1), torch.cat((rgb_feature2_plus, decoder_feature4),1),\ torch.cat((rgb_feature4_plus, decoder_feature3), 1), output class PENet_C1(nn.Module): def __init__(self, args): super(PENet_C1, self).__init__() self.backbone = ENet(args) #self.backbone = Bone() self.mask_layer = convbn(64, 3) self.kernel_conf_layer = convbn(64, 3) self.iter_conf_layer = convbn(64, 12) self.iter_guide_layer3 = CSPNGenerateAccelerate(64, 3) self.iter_guide_layer5 = CSPNGenerateAccelerate(64, 5) self.iter_guide_layer7 = CSPNGenerateAccelerate(64, 7) self.softmax = nn.Softmax(dim=1) self.CSPN3 = CSPNAccelerate(3) self.CSPN5 = CSPNAccelerate(5, padding=2) self.CSPN7 = CSPNAccelerate(7, padding=3) # CSPN new ks = 3 encoder3 = torch.zeros(ks * ks, ks * ks, ks, ks).cuda() kernel_range_list = [i for i in range(ks - 1, -1, -1)] ls = [] for i in range(ks): ls.extend(kernel_range_list) index = [[j for j in range(ks * ks - 1, -1, -1)], [j for j in range(ks * ks)], \ [val for val in kernel_range_list for j in range(ks)], ls] encoder3[index] = 1 self.encoder3 = nn.Parameter(encoder3, requires_grad=False) ks = 5 encoder5 = torch.zeros(ks * ks, ks * ks, ks, ks).cuda() kernel_range_list = [i for i in range(ks - 1, -1, -1)] ls = [] for i in range(ks): ls.extend(kernel_range_list) index = [[j for j in range(ks * ks - 1, -1, -1)], [j for j in range(ks * ks)], \ [val for val in kernel_range_list for j in range(ks)], ls] encoder5[index] = 1 self.encoder5 = nn.Parameter(encoder5, requires_grad=False) ks = 7 encoder7 = torch.zeros(ks * ks, ks * ks, ks, ks).cuda() kernel_range_list = [i for i in range(ks - 1, -1, -1)] ls = [] for i in range(ks): ls.extend(kernel_range_list) index = [[j for j in range(ks * ks - 1, -1, -1)], [j for j in range(ks * ks)], \ [val for val in kernel_range_list for j in range(ks)], ls] encoder7[index] = 1 self.encoder7 = nn.Parameter(encoder7, requires_grad=False) weights_init(self) def forward(self, input): #rgb = input['rgb'] d = input['d'] valid_mask = torch.where(d>0, torch.full_like(d, 1.0), torch.full_like(d, 0.0)) feature, coarse_depth= self.backbone(input) mask = self.mask_layer(feature) mask = torch.sigmoid(mask) mask = mask*valid_mask mask3 = mask[:, 0:1, :, :] mask5 = mask[:, 1:2, :, :] mask7 = mask[:, 2:3, :, :] kernel_conf = self.kernel_conf_layer(feature) kernel_conf = self.softmax(kernel_conf) kernel_conf3 = kernel_conf[:, 0:1, :, :] kernel_conf5 = kernel_conf[:, 1:2, :, :] kernel_conf7 = kernel_conf[:, 2:3, :, :] conf = self.iter_conf_layer(feature) conf3 = conf[:, 0:4, :, :] conf5 = conf[:, 4:8, :, :] conf7 = conf[:, 8:12, :, :] conf3 = self.softmax(conf3) conf5 = self.softmax(conf5) conf7 = self.softmax(conf7) guide3 = self.iter_guide_layer3(feature) guide5 = self.iter_guide_layer5(feature) guide7 = self.iter_guide_layer7(feature) #init depth = coarse_depth depth3 = depth depth5 = depth depth7 = depth d3_list = [i for i in range(4)] d5_list = [i for i in range(4)] d7_list = [i for i in range(4)] #prop guide3 = kernel_trans(guide3, self.encoder3) guide5 = kernel_trans(guide5, self.encoder5) guide7 = kernel_trans(guide7, self.encoder7) for i in range(12): depth3 = self.CSPN3(guide3, depth3, depth) depth3 = mask3*d + (1-mask3)*depth3 depth5 = self.CSPN5(guide5, depth5, depth) depth5 = mask5*d + (1-mask5)*depth5 depth7 = self.CSPN7(guide7, depth7, depth) depth7 = mask7*d + (1-mask7)*depth7 if(i==2): d3_list[0] = depth3 d5_list[0] = depth5 d7_list[0] = depth7 if(i==5): d3_list[1] = depth3 d5_list[1] = depth5 d7_list[1] = depth7 if(i==8): d3_list[2] = depth3 d5_list[2] = depth5 d7_list[2] = depth7 if(i==11): d3_list[3] = depth3 d5_list[3] = depth5 d7_list[3] = depth7 refined_depth = \ d3_list[0] * (kernel_conf3 * conf3[:, 0:1, :, :]) + \ d3_list[1] * (kernel_conf3 * conf3[:, 1:2, :, :]) + \ d3_list[2] * (kernel_conf3 * conf3[:, 2:3, :, :]) + \ d3_list[3] * (kernel_conf3 * conf3[:, 3:4, :, :]) + \ d5_list[0] * (kernel_conf5 * conf5[:, 0:1, :, :]) + \ d5_list[1] * (kernel_conf5 * conf5[:, 1:2, :, :]) + \ d5_list[2] * (kernel_conf5 * conf5[:, 2:3, :, :]) + \ d5_list[3] * (kernel_conf5 * conf5[:, 3:4, :, :]) + \ d7_list[0] * (kernel_conf7 * conf7[:, 0:1, :, :]) + \ d7_list[1] * (kernel_conf7 * conf7[:, 1:2, :, :]) + \ d7_list[2] * (kernel_conf7 * conf7[:, 2:3, :, :]) + \ d7_list[3] * (kernel_conf7 * conf7[:, 3:4, :, :]) return refined_depth class PENet_C2(nn.Module): def __init__(self, args): super(PENet_C2, self).__init__() self.backbone = ENet(args) self.kernel_conf_layer = convbn(64, 3) self.mask_layer = convbn(64, 1) self.iter_guide_layer3 = CSPNGenerateAccelerate(64, 3) self.iter_guide_layer5 = CSPNGenerateAccelerate(64, 5) self.iter_guide_layer7 = CSPNGenerateAccelerate(64, 7) self.kernel_conf_layer_s2 = convbn(128, 3) self.mask_layer_s2 = convbn(128, 1) self.iter_guide_layer3_s2 = CSPNGenerateAccelerate(128, 3) self.iter_guide_layer5_s2 = CSPNGenerateAccelerate(128, 5) self.iter_guide_layer7_s2 = CSPNGenerateAccelerate(128, 7) self.upsample = nn.UpsamplingBilinear2d(scale_factor=2) self.nnupsample = nn.UpsamplingNearest2d(scale_factor=2) self.downsample = SparseDownSampleClose(stride=2) self.softmax = nn.Softmax(dim=1) self.CSPN3 = CSPNAccelerate(kernel_size=3, dilation=1, padding=1, stride=1) self.CSPN5 = CSPNAccelerate(kernel_size=5, dilation=1, padding=2, stride=1) self.CSPN7 = CSPNAccelerate(kernel_size=7, dilation=1, padding=3, stride=1) self.CSPN3_s2 = CSPNAccelerate(kernel_size=3, dilation=2, padding=2, stride=1) self.CSPN5_s2 = CSPNAccelerate(kernel_size=5, dilation=2, padding=4, stride=1) self.CSPN7_s2 = CSPNAccelerate(kernel_size=7, dilation=2, padding=6, stride=1) # CSPN ks = 3 encoder3 = torch.zeros(ks * ks, ks * ks, ks, ks).cuda() kernel_range_list = [i for i in range(ks - 1, -1, -1)] ls = [] for i in range(ks): ls.extend(kernel_range_list) index = [[j for j in range(ks * ks - 1, -1, -1)], [j for j in range(ks * ks)], \ [val for val in kernel_range_list for j in range(ks)], ls] encoder3[index] = 1 self.encoder3 = nn.Parameter(encoder3, requires_grad=False) ks = 5 encoder5 = torch.zeros(ks * ks, ks * ks, ks, ks).cuda() kernel_range_list = [i for i in range(ks - 1, -1, -1)] ls = [] for i in range(ks): ls.extend(kernel_range_list) index = [[j for j in range(ks * ks - 1, -1, -1)], [j for j in range(ks * ks)], \ [val for val in kernel_range_list for j in range(ks)], ls] encoder5[index] = 1 self.encoder5 = nn.Parameter(encoder5, requires_grad=False) ks = 7 encoder7 = torch.zeros(ks * ks, ks * ks, ks, ks).cuda() kernel_range_list = [i for i in range(ks - 1, -1, -1)] ls = [] for i in range(ks): ls.extend(kernel_range_list) index = [[j for j in range(ks * ks - 1, -1, -1)], [j for j in range(ks * ks)], \ [val for val in kernel_range_list for j in range(ks)], ls] encoder7[index] = 1 self.encoder7 = nn.Parameter(encoder7, requires_grad=False) weights_init(self) def forward(self, input): d = input['d'] valid_mask = torch.where(d>0, torch.full_like(d, 1.0), torch.full_like(d, 0.0)) feature_s1, feature_s2, coarse_depth = self.backbone(input) depth = coarse_depth d_s2, valid_mask_s2 = self.downsample(d, valid_mask) mask_s2 = self.mask_layer_s2(feature_s2) mask_s2 = torch.sigmoid(mask_s2) mask_s2 = mask_s2*valid_mask_s2 kernel_conf_s2 = self.kernel_conf_layer_s2(feature_s2) kernel_conf_s2 = self.softmax(kernel_conf_s2) kernel_conf3_s2 = self.nnupsample(kernel_conf_s2[:, 0:1, :, :]) kernel_conf5_s2 = self.nnupsample(kernel_conf_s2[:, 1:2, :, :]) kernel_conf7_s2 = self.nnupsample(kernel_conf_s2[:, 2:3, :, :]) guide3_s2 = self.iter_guide_layer3_s2(feature_s2) guide5_s2 = self.iter_guide_layer5_s2(feature_s2) guide7_s2 = self.iter_guide_layer7_s2(feature_s2) depth_s2 = self.nnupsample(d_s2) mask_s2 = self.nnupsample(mask_s2) depth3 = depth5 = depth7 = depth mask = self.mask_layer(feature_s1) mask = torch.sigmoid(mask) mask = mask * valid_mask kernel_conf = self.kernel_conf_layer(feature_s1) kernel_conf = self.softmax(kernel_conf) kernel_conf3 = kernel_conf[:, 0:1, :, :] kernel_conf5 = kernel_conf[:, 1:2, :, :] kernel_conf7 = kernel_conf[:, 2:3, :, :] guide3 = self.iter_guide_layer3(feature_s1) guide5 = self.iter_guide_layer5(feature_s1) guide7 = self.iter_guide_layer7(feature_s1) guide3 = kernel_trans(guide3, self.encoder3) guide5 = kernel_trans(guide5, self.encoder5) guide7 = kernel_trans(guide7, self.encoder7) guide3_s2 = kernel_trans(guide3_s2, self.encoder3) guide5_s2 = kernel_trans(guide5_s2, self.encoder5) guide7_s2 = kernel_trans(guide7_s2, self.encoder7) guide3_s2 = self.nnupsample(guide3_s2) guide5_s2 = self.nnupsample(guide5_s2) guide7_s2 = self.nnupsample(guide7_s2) for i in range(6): depth3 = self.CSPN3_s2(guide3_s2, depth3, coarse_depth) depth3 = mask_s2*depth_s2 + (1-mask_s2)*depth3 depth5 = self.CSPN5_s2(guide5_s2, depth5, coarse_depth) depth5 = mask_s2*depth_s2 + (1-mask_s2)*depth5 depth7 = self.CSPN7_s2(guide7_s2, depth7, coarse_depth) depth7 = mask_s2*depth_s2 + (1-mask_s2)*depth7 depth_s2 = kernel_conf3_s2*depth3 + kernel_conf5_s2*depth5 + kernel_conf7_s2*depth7 refined_depth_s2 = depth_s2 depth3 = depth5 = depth7 = refined_depth_s2 #prop for i in range(6): depth3 = self.CSPN3(guide3, depth3, depth_s2) depth3 = mask*d + (1-mask)*depth3 depth5 = self.CSPN5(guide5, depth5, depth_s2) depth5 = mask*d + (1-mask)*depth5 depth7 = self.CSPN7(guide7, depth7, depth_s2) depth7 = mask*d + (1-mask)*depth7 refined_depth = kernel_conf3*depth3 + kernel_conf5*depth5 + kernel_conf7*depth7 return refined_depth class PENet_C4(nn.Module): def __init__(self, args): super(PENet_C4, self).__init__() self.backbone = ENet(args) self.kernel_conf_layer = convbn(64, 3) self.mask_layer = convbn(64, 1) self.prop_mask_layer = convbn(64, 1) self.iter_guide_layer3 = CSPNGenerateAccelerate(64, 3) self.iter_guide_layer5 = CSPNGenerateAccelerate(64, 5) self.iter_guide_layer7 = CSPNGenerateAccelerate(64, 7) self.kernel_conf_layer_s2 = convbn(128, 3) self.mask_layer_s2 = convbn(128, 1) self.prop_mask_layer_s2 = convbn(128, 1) self.iter_guide_layer3_s2 = CSPNGenerateAccelerate(128, 3) self.iter_guide_layer5_s2 = CSPNGenerateAccelerate(128, 5) self.iter_guide_layer7_s2 = CSPNGenerateAccelerate(128, 7) self.kernel_conf_layer_s3 = convbn(256, 3) self.mask_layer_s3 = convbn(256, 1) self.prop_mask_layer_s3 = convbn(256, 1) self.iter_guide_layer3_s3 = CSPNGenerateAccelerate(256, 3) self.iter_guide_layer5_s3 = CSPNGenerateAccelerate(256, 5) self.iter_guide_layer7_s3 = CSPNGenerateAccelerate(256, 7) self.upsample = nn.UpsamplingBilinear2d(scale_factor=2) self.upsample4 = nn.UpsamplingBilinear2d(scale_factor=4) self.nnupsample = nn.UpsamplingNearest2d(scale_factor=2) self.nnupsample4 = nn.UpsamplingNearest2d(scale_factor=4) self.downsample = SparseDownSampleClose(stride=2) self.softmax = nn.Softmax(dim=1) self.CSPN3 = CSPNAccelerate(kernel_size=3, dilation=1, padding=1, stride=1) self.CSPN5 = CSPNAccelerate(kernel_size=5, dilation=1, padding=2, stride=1) self.CSPN7 = CSPNAccelerate(kernel_size=7, dilation=1, padding=3, stride=1) self.CSPN3_s2 = CSPNAccelerate(kernel_size=3, dilation=2, padding=2, stride=1) self.CSPN5_s2 = CSPNAccelerate(kernel_size=5, dilation=2, padding=4, stride=1) self.CSPN7_s2 = CSPNAccelerate(kernel_size=7, dilation=2, padding=6, stride=1) self.CSPN3_s3 = CSPNAccelerate(kernel_size=3, dilation=4, padding=4, stride=1) self.CSPN5_s3 = CSPNAccelerate(kernel_size=5, dilation=4, padding=8, stride=1) self.CSPN7_s3 = CSPNAccelerate(kernel_size=7, dilation=4, padding=12, stride=1) # CSPN ks = 3 encoder3 = torch.zeros(ks * ks, ks * ks, ks, ks).cuda() kernel_range_list = [i for i in range(ks - 1, -1, -1)] ls = [] for i in range(ks): ls.extend(kernel_range_list) index = [[j for j in range(ks * ks - 1, -1, -1)], [j for j in range(ks * ks)], \ [val for val in kernel_range_list for j in range(ks)], ls] encoder3[index] = 1 self.encoder3 = nn.Parameter(encoder3, requires_grad=False) ks = 5 encoder5 = torch.zeros(ks * ks, ks * ks, ks, ks).cuda() kernel_range_list = [i for i in range(ks - 1, -1, -1)] ls = [] for i in range(ks): ls.extend(kernel_range_list) index = [[j for j in range(ks * ks - 1, -1, -1)], [j for j in range(ks * ks)], \ [val for val in kernel_range_list for j in range(ks)], ls] encoder5[index] = 1 self.encoder5 = nn.Parameter(encoder5, requires_grad=False) ks = 7 encoder7 = torch.zeros(ks * ks, ks * ks, ks, ks).cuda() kernel_range_list = [i for i in range(ks - 1, -1, -1)] ls = [] for i in range(ks): ls.extend(kernel_range_list) index = [[j for j in range(ks * ks - 1, -1, -1)], [j for j in range(ks * ks)], \ [val for val in kernel_range_list for j in range(ks)], ls] encoder7[index] = 1 self.encoder7 = nn.Parameter(encoder7, requires_grad=False) weights_init(self) def forward(self, input): #rgb = input['rgb'] d = input['d'] valid_mask = torch.where(d>0, torch.full_like(d, 1.0), torch.full_like(d, 0.0)) feature_s1, feature_s2, feature_s3, coarse_depth = self.backbone(input) depth = coarse_depth d_s2, valid_mask_s2 = self.downsample(d, valid_mask) d_s3, valid_mask_s3 = self.downsample(d_s2, valid_mask_s2) #s3 mask_s3 = self.mask_layer_s3(feature_s3) mask_s3 = torch.sigmoid(mask_s3) mask_s3 = mask_s3 * valid_mask_s3 prop_mask_s3 = self.prop_mask_layer_s3(feature_s3) prop_mask_s3 = torch.sigmoid(prop_mask_s3) kernel_conf_s3 = self.kernel_conf_layer_s3(feature_s3) kernel_conf_s3 = self.softmax(kernel_conf_s3) kernel_conf3_s3 = self.nnupsample4(kernel_conf_s3[:, 0:1, :, :]) kernel_conf5_s3 = self.nnupsample4(kernel_conf_s3[:, 1:2, :, :]) kernel_conf7_s3 = self.nnupsample4(kernel_conf_s3[:, 2:3, :, :]) guide3_s3 = self.iter_guide_layer3_s3(feature_s3) guide5_s3 = self.iter_guide_layer5_s3(feature_s3) guide7_s3 = self.iter_guide_layer7_s3(feature_s3) guide3_s3 = kernel_trans(guide3_s3, self.encoder3) guide5_s3 = kernel_trans(guide5_s3, self.encoder5) guide7_s3 = kernel_trans(guide7_s3, self.encoder7) guide3_s3 = prop_mask_s3*guide3_s3 guide5_s3 = prop_mask_s3*guide5_s3 guide7_s3 = prop_mask_s3*guide7_s3 guide3_s3 = self.nnupsample4(guide3_s3) guide5_s3 = self.nnupsample4(guide5_s3) guide7_s3 = self.nnupsample4(guide7_s3) depth_s3 = self.nnupsample4(d_s3) mask_s3 = self.nnupsample4(mask_s3) depth3 = depth5 = depth7 = depth for i in range(4): depth3 = self.CSPN3_s3(guide3_s3, depth3, coarse_depth) depth3 = mask_s3 * depth_s3 + (1 - mask_s3) * depth3 depth5 = self.CSPN5_s3(guide5_s3, depth5, coarse_depth) depth5 = mask_s3 * depth_s3 + (1 - mask_s3) * depth5 depth7 = self.CSPN7_s3(guide7_s3, depth7, coarse_depth) depth7 = mask_s3 * depth_s3 + (1 - mask_s3) * depth7 depth_s3 = kernel_conf3_s3 * depth3 + kernel_conf5_s3 * depth5 + kernel_conf7_s3 * depth7 refined_depth_s3 = depth_s3 #s2 mask_s2 = self.mask_layer_s2(feature_s2) mask_s2 = torch.sigmoid(mask_s2) mask_s2 = mask_s2*valid_mask_s2 prop_mask_s2 = self.prop_mask_layer_s2(feature_s2) prop_mask_s2 = torch.sigmoid(prop_mask_s2) kernel_conf_s2 = self.kernel_conf_layer_s2(feature_s2) kernel_conf_s2 = self.softmax(kernel_conf_s2) kernel_conf3_s2 = self.nnupsample(kernel_conf_s2[:, 0:1, :, :]) kernel_conf5_s2 = self.nnupsample(kernel_conf_s2[:, 1:2, :, :]) kernel_conf7_s2 = self.nnupsample(kernel_conf_s2[:, 2:3, :, :]) guide3_s2 = self.iter_guide_layer3_s2(feature_s2) guide5_s2 = self.iter_guide_layer5_s2(feature_s2) guide7_s2 = self.iter_guide_layer7_s2(feature_s2) guide3_s2 = kernel_trans(guide3_s2, self.encoder3) guide5_s2 = kernel_trans(guide5_s2, self.encoder5) guide7_s2 = kernel_trans(guide7_s2, self.encoder7) guide3_s2 = prop_mask_s2*guide3_s2 guide5_s2 = prop_mask_s2*guide5_s2 guide7_s2 = prop_mask_s2*guide7_s2 guide3_s2 = self.nnupsample(guide3_s2) guide5_s2 = self.nnupsample(guide5_s2) guide7_s2 = self.nnupsample(guide7_s2) depth_s2 = self.nnupsample(d_s2) mask_s2 = self.nnupsample(mask_s2) depth3 = depth5 = depth7 = refined_depth_s3 for i in range(4): depth3 = self.CSPN3_s2(guide3_s2, depth3, depth_s3) depth3 = mask_s2*depth_s2 + (1-mask_s2)*depth3 depth5 = self.CSPN5_s2(guide5_s2, depth5, depth_s3) depth5 = mask_s2*depth_s2 + (1-mask_s2)*depth5 depth7 = self.CSPN7_s2(guide7_s2, depth7, depth_s3) depth7 = mask_s2*depth_s2 + (1-mask_s2)*depth7 depth_s2 = kernel_conf3_s2*depth3 + kernel_conf5_s2*depth5 + kernel_conf7_s2*depth7 refined_depth_s2 = depth_s2 #s1 mask = self.mask_layer(feature_s1) mask = torch.sigmoid(mask) mask = mask*valid_mask prop_mask = self.prop_mask_layer(feature_s1) prop_mask = torch.sigmoid(prop_mask) kernel_conf = self.kernel_conf_layer(feature_s1) kernel_conf = self.softmax(kernel_conf) kernel_conf3 = kernel_conf[:, 0:1, :, :] kernel_conf5 = kernel_conf[:, 1:2, :, :] kernel_conf7 = kernel_conf[:, 2:3, :, :] guide3 = self.iter_guide_layer3(feature_s1) guide5 = self.iter_guide_layer5(feature_s1) guide7 = self.iter_guide_layer7(feature_s1) guide3 = kernel_trans(guide3, self.encoder3) guide5 = kernel_trans(guide5, self.encoder5) guide7 = kernel_trans(guide7, self.encoder7) guide3 = prop_mask*guide3 guide5 = prop_mask*guide5 guide7 = prop_mask*guide7 depth3 = depth5 = depth7 = refined_depth_s2 for i in range(4): depth3 = self.CSPN3(guide3, depth3, depth_s2) depth3 = mask*d + (1-mask)*depth3 depth5 = self.CSPN5(guide5, depth5, depth_s2) depth5 = mask*d + (1-mask)*depth5 depth7 = self.CSPN7(guide7, depth7, depth_s2) depth7 = mask*d + (1-mask)*depth7 refined_depth = kernel_conf3*depth3 + kernel_conf5*depth5 + kernel_conf7*depth7 return refined_depth class PENet_C1_train(nn.Module): def __init__(self, args): super(PENet_C1_train, self).__init__() self.backbone = ENet(args) self.mask_layer = convbn(64, 3) self.kernel_conf_layer = convbn(64, 3) self.iter_conf_layer = convbn(64, 12) self.iter_guide_layer3 = CSPNGenerate(64, 3) self.iter_guide_layer5 = CSPNGenerate(64, 5) self.iter_guide_layer7 = CSPNGenerate(64, 7) self.softmax = nn.Softmax(dim=1) self.CSPN3 = CSPN(3) self.CSPN5 = CSPN(5) self.CSPN7 = CSPN(7) weights_init(self) def forward(self, input): #rgb = input['rgb'] d = input['d'] valid_mask = torch.where(d>0, torch.full_like(d, 1.0), torch.full_like(d, 0.0)) feature, coarse_depth = self.backbone(input) mask = self.mask_layer(feature) mask = torch.sigmoid(mask) mask = mask*valid_mask mask3 = mask[:, 0:1, :, :] mask5 = mask[:, 1:2, :, :] mask7 = mask[:, 2:3, :, :] kernel_conf = self.kernel_conf_layer(feature) kernel_conf = self.softmax(kernel_conf) kernel_conf3 = kernel_conf[:, 0:1, :, :] kernel_conf5 = kernel_conf[:, 1:2, :, :] kernel_conf7 = kernel_conf[:, 2:3, :, :] conf = self.iter_conf_layer(feature) conf3 = conf[:, 0:4, :, :] conf5 = conf[:, 4:8, :, :] conf7 = conf[:, 8:12, :, :] conf3 = self.softmax(conf3) conf5 = self.softmax(conf5) conf7 = self.softmax(conf7) #guide3 = self.iter_guide_layer3(feature) #guide5 = self.iter_guide_layer5(feature) #guide7 = self.iter_guide_layer7(feature) #init depth = coarse_depth depth3 = depth depth5 = depth depth7 = depth d3_list = [i for i in range(4)] d5_list = [i for i in range(4)] d7_list = [i for i in range(4)] #prop guide3 = self.iter_guide_layer3(feature) guide5 = self.iter_guide_layer5(feature) guide7 = self.iter_guide_layer7(feature) for i in range(12): depth3 = self.CSPN3(guide3, depth3, depth) depth3 = mask3*d + (1-mask3)*depth3 depth5 = self.CSPN5(guide5, depth5, depth) depth5 = mask5*d + (1-mask5)*depth5 depth7 = self.CSPN7(guide7, depth7, depth) depth7 = mask7*d + (1-mask7)*depth7 if(i==2): d3_list[0] = depth3 d5_list[0] = depth5 d7_list[0] = depth7 if(i==5): d3_list[1] = depth3 d5_list[1] = depth5 d7_list[1] = depth7 if(i==8): d3_list[2] = depth3 d5_list[2] = depth5 d7_list[2] = depth7 if(i==11): d3_list[3] = depth3 d5_list[3] = depth5 d7_list[3] = depth7 refined_depth = \ d3_list[0] * (kernel_conf3 * conf3[:, 0:1, :, :]) + \ d3_list[1] * (kernel_conf3 * conf3[:, 1:2, :, :]) + \ d3_list[2] * (kernel_conf3 * conf3[:, 2:3, :, :]) + \ d3_list[3] * (kernel_conf3 * conf3[:, 3:4, :, :]) + \ d5_list[0] * (kernel_conf5 * conf5[:, 0:1, :, :]) + \ d5_list[1] * (kernel_conf5 * conf5[:, 1:2, :, :]) + \ d5_list[2] * (kernel_conf5 * conf5[:, 2:3, :, :]) + \ d5_list[3] * (kernel_conf5 * conf5[:, 3:4, :, :]) + \ d7_list[0] * (kernel_conf7 * conf7[:, 0:1, :, :]) + \ d7_list[1] * (kernel_conf7 * conf7[:, 1:2, :, :]) + \ d7_list[2] * (kernel_conf7 * conf7[:, 2:3, :, :]) + \ d7_list[3] * (kernel_conf7 * conf7[:, 3:4, :, :]) return refined_depth class PENet_C2_train(nn.Module): def __init__(self, args): super(PENet_C2_train, self).__init__() self.backbone = ENet(args) self.kernel_conf_layer = convbn(64, 3) self.mask_layer = convbn(64, 1) self.iter_guide_layer3 = CSPNGenerate(64, 3) self.iter_guide_layer5 = CSPNGenerate(64, 5) self.iter_guide_layer7 = CSPNGenerate(64, 7) self.kernel_conf_layer_s2 = convbn(128, 3) self.mask_layer_s2 = convbn(128, 1) self.iter_guide_layer3_s2 = CSPNGenerate(128, 3) self.iter_guide_layer5_s2 = CSPNGenerate(128, 5) self.iter_guide_layer7_s2 = CSPNGenerate(128, 7) self.dimhalf_s2 = convbnrelu(128, 64, 1, 1, 0) self.att_12 = convbnrelu(128, 2) self.upsample = nn.UpsamplingBilinear2d(scale_factor=2) self.downsample = SparseDownSampleClose(stride=2) self.softmax = nn.Softmax(dim=1) self.CSPN3 = CSPN(3) self.CSPN5 = CSPN(5) self.CSPN7 = CSPN(7) weights_init(self) def forward(self, input): d = input['d'] valid_mask = torch.where(d>0, torch.full_like(d, 1.0), torch.full_like(d, 0.0)) feature_s1, feature_s2, coarse_depth = self.backbone(input) depth = coarse_depth d_s2, valid_mask_s2 = self.downsample(d, valid_mask) mask_s2 = self.mask_layer_s2(feature_s2) mask_s2 = torch.sigmoid(mask_s2) mask_s2 = mask_s2*valid_mask_s2 kernel_conf_s2 = self.kernel_conf_layer_s2(feature_s2) kernel_conf_s2 = self.softmax(kernel_conf_s2) kernel_conf3_s2 = kernel_conf_s2[:, 0:1, :, :] kernel_conf5_s2 = kernel_conf_s2[:, 1:2, :, :] kernel_conf7_s2 = kernel_conf_s2[:, 2:3, :, :] mask = self.mask_layer(feature_s1) mask = torch.sigmoid(mask) mask = mask*valid_mask kernel_conf = self.kernel_conf_layer(feature_s1) kernel_conf = self.softmax(kernel_conf) kernel_conf3 = kernel_conf[:, 0:1, :, :] kernel_conf5 = kernel_conf[:, 1:2, :, :] kernel_conf7 = kernel_conf[:, 2:3, :, :] feature_12 = torch.cat((feature_s1, self.upsample(self.dimhalf_s2(feature_s2))), 1) att_map_12 = self.softmax(self.att_12(feature_12)) guide3_s2 = self.iter_guide_layer3_s2(feature_s2) guide5_s2 = self.iter_guide_layer5_s2(feature_s2) guide7_s2 = self.iter_guide_layer7_s2(feature_s2) guide3 = self.iter_guide_layer3(feature_s1) guide5 = self.iter_guide_layer5(feature_s1) guide7 = self.iter_guide_layer7(feature_s1) depth_s2 = depth depth_s2_00 = depth_s2[:, :, 0::2, 0::2] depth_s2_01 = depth_s2[:, :, 0::2, 1::2] depth_s2_10 = depth_s2[:, :, 1::2, 0::2] depth_s2_11 = depth_s2[:, :, 1::2, 1::2] depth_s2_00_h0 = depth3_s2_00 = depth5_s2_00 = depth7_s2_00 = depth_s2_00 depth_s2_01_h0 = depth3_s2_01 = depth5_s2_01 = depth7_s2_01 = depth_s2_01 depth_s2_10_h0 = depth3_s2_10 = depth5_s2_10 = depth7_s2_10 = depth_s2_10 depth_s2_11_h0 = depth3_s2_11 = depth5_s2_11 = depth7_s2_11 = depth_s2_11 for i in range(6): depth3_s2_00 = self.CSPN3(guide3_s2, depth3_s2_00, depth_s2_00_h0) depth3_s2_00 = mask_s2*d_s2 + (1-mask_s2)*depth3_s2_00 depth5_s2_00 = self.CSPN5(guide5_s2, depth5_s2_00, depth_s2_00_h0) depth5_s2_00 = mask_s2*d_s2 + (1-mask_s2)*depth5_s2_00 depth7_s2_00 = self.CSPN7(guide7_s2, depth7_s2_00, depth_s2_00_h0) depth7_s2_00 = mask_s2*d_s2 + (1-mask_s2)*depth7_s2_00 depth3_s2_01 = self.CSPN3(guide3_s2, depth3_s2_01, depth_s2_01_h0) depth3_s2_01 = mask_s2*d_s2 + (1-mask_s2)*depth3_s2_01 depth5_s2_01 = self.CSPN5(guide5_s2, depth5_s2_01, depth_s2_01_h0) depth5_s2_01 = mask_s2*d_s2 + (1-mask_s2)*depth5_s2_01 depth7_s2_01 = self.CSPN7(guide7_s2, depth7_s2_01, depth_s2_01_h0) depth7_s2_01 = mask_s2*d_s2 + (1-mask_s2)*depth7_s2_01 depth3_s2_10 = self.CSPN3(guide3_s2, depth3_s2_10, depth_s2_10_h0) depth3_s2_10 = mask_s2*d_s2 + (1-mask_s2)*depth3_s2_10 depth5_s2_10 = self.CSPN5(guide5_s2, depth5_s2_10, depth_s2_10_h0) depth5_s2_10 = mask_s2*d_s2 + (1-mask_s2)*depth5_s2_10 depth7_s2_10 = self.CSPN7(guide7_s2, depth7_s2_10, depth_s2_10_h0) depth7_s2_10 = mask_s2*d_s2 + (1-mask_s2)*depth7_s2_10 depth3_s2_11 = self.CSPN3(guide3_s2, depth3_s2_11, depth_s2_11_h0) depth3_s2_11 = mask_s2*d_s2 + (1-mask_s2)*depth3_s2_11 depth5_s2_11 = self.CSPN5(guide5_s2, depth5_s2_11, depth_s2_11_h0) depth5_s2_11 = mask_s2*d_s2 + (1-mask_s2)*depth5_s2_11 depth7_s2_11 = self.CSPN7(guide7_s2, depth7_s2_11, depth_s2_11_h0) depth7_s2_11 = mask_s2*d_s2 + (1-mask_s2)*depth7_s2_11 depth_s2_00 = kernel_conf3_s2*depth3_s2_00 + kernel_conf5_s2*depth5_s2_00 + kernel_conf7_s2*depth7_s2_00 depth_s2_01 = kernel_conf3_s2*depth3_s2_01 + kernel_conf5_s2*depth5_s2_01 + kernel_conf7_s2*depth7_s2_01 depth_s2_10 = kernel_conf3_s2*depth3_s2_10 + kernel_conf5_s2*depth5_s2_10 + kernel_conf7_s2*depth7_s2_10 depth_s2_11 = kernel_conf3_s2*depth3_s2_11 + kernel_conf5_s2*depth5_s2_11 + kernel_conf7_s2*depth7_s2_11 depth_s2[:, :, 0::2, 0::2] = depth_s2_00 depth_s2[:, :, 0::2, 1::2] = depth_s2_01 depth_s2[:, :, 1::2, 0::2] = depth_s2_10 depth_s2[:, :, 1::2, 1::2] = depth_s2_11 #feature_12 = torch.cat((feature_s1, self.upsample(self.dimhalf_s2(feature_s2))), 1) #att_map_12 = self.softmax(self.att_12(feature_12)) refined_depth_s2 = depth*att_map_12[:, 0:1, :, :] + depth_s2*att_map_12[:, 1:2, :, :] #refined_depth_s2 = depth depth3 = depth5 = depth7 = refined_depth_s2 #prop for i in range(6): depth3 = self.CSPN3(guide3, depth3, depth) depth3 = mask*d + (1-mask)*depth3 depth5 = self.CSPN5(guide5, depth5, depth) depth5 = mask*d + (1-mask)*depth5 depth7 = self.CSPN7(guide7, depth7, depth) depth7 = mask*d + (1-mask)*depth7 refined_depth = kernel_conf3*depth3 + kernel_conf5*depth5 + kernel_conf7*depth7 return refined_depth ================================================ FILE: tools/PENet/vis_utils.py ================================================ import os import matplotlib.pyplot as plt from PIL import Image import numpy as np import cv2 from dataloaders import calibration_kitti from skimage import io import cv2 cmap = plt.cm.jet cmap2 = plt.cm.nipy_spectral from dataloaders.my_loader import depth2pointsrgb, depth2pointsrgbp def validcrop(img): ratio = 256/1216 h = img.size()[2] w = img.size()[3] return img[:, :, h-int(ratio*w):, :] def depth_colorize(depth): depth = (depth - np.min(depth)) / (np.max(depth) - np.min(depth)) depth = 255 * cmap(depth)[:, :, :3] # H, W, C return depth.astype('uint8') def feature_colorize(feature): feature = (feature - np.min(feature)) / ((np.max(feature) - np.min(feature))) feature = 255 * cmap2(feature)[:, :, :3] return feature.astype('uint8') def mask_vis(mask): mask = (mask - np.min(mask)) / (np.max(mask) - np.min(mask)) mask = 255 * mask return mask.astype('uint8') def merge_into_row(ele, pred, predrgb=None, predg=None, extra=None, extra2=None, extrargb=None): def preprocess_depth(x): y = np.squeeze(x.data.cpu().numpy()) return depth_colorize(y) # if is gray, transforms to rgb img_list = [] if 'rgb' in ele: rgb = np.squeeze(ele['rgb'][0, ...].data.cpu().numpy()) rgb = np.transpose(rgb, (1, 2, 0)) img_list.append(rgb) elif 'g' in ele: g = np.squeeze(ele['g'][0, ...].data.cpu().numpy()) g = np.array(Image.fromarray(g).convert('RGB')) img_list.append(g) if 'd' in ele: img_list.append(preprocess_depth(ele['d'][0, ...])) img_list.append(preprocess_depth(pred[0, ...])) if extrargb is not None: img_list.append(preprocess_depth(extrargb[0, ...])) if predrgb is not None: predrgb = np.squeeze(ele['rgb'][0, ...].data.cpu().numpy()) predrgb = np.transpose(predrgb, (1, 2, 0)) #predrgb = predrgb.astype('uint8') img_list.append(predrgb) if predg is not None: predg = np.squeeze(predg[0, ...].data.cpu().numpy()) predg = mask_vis(predg) predg = np.array(Image.fromarray(predg).convert('RGB')) #predg = predg.astype('uint8') img_list.append(predg) if extra is not None: extra = np.squeeze(extra[0, ...].data.cpu().numpy()) extra = mask_vis(extra) extra = np.array(Image.fromarray(extra).convert('RGB')) img_list.append(extra) if extra2 is not None: extra2 = np.squeeze(extra2[0, ...].data.cpu().numpy()) extra2 = mask_vis(extra2) extra2 = np.array(Image.fromarray(extra2).convert('RGB')) img_list.append(extra2) if 'gt' in ele: img_list.append(preprocess_depth(ele['gt'][0, ...])) img_merge = np.hstack(img_list) return img_merge.astype('uint8') def add_row(img_merge, row): return np.vstack([img_merge, row]) def save_image(img_merge, filename): image_to_write = cv2.cvtColor(img_merge, cv2.COLOR_RGB2BGR) cv2.imwrite(filename, image_to_write) def save_image_torch(rgb, filename): #torch2numpy rgb = validcrop(rgb) rgb = np.squeeze(rgb[0, ...].data.cpu().numpy()) #print(rgb.size()) rgb = np.transpose(rgb, (1, 2, 0)) rgb = rgb.astype('uint8') image_to_write = cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR) cv2.imwrite(filename, image_to_write) def save_depth_as_uint16png(img, filename): #from tensor img = np.squeeze(img.data.cpu().numpy()) img = (img * 256).astype('uint16') cv2.imwrite(filename, img) def get_fov_flag(pts_rect, img_shape, calib): """ Args: pts_rect: img_shape: calib: Returns: """ pts_img, pts_rect_depth = calib.rect_to_img(pts_rect) val_flag_1 = np.logical_and(pts_img[:, 0] >= 0, pts_img[:, 0] < img_shape[1]) val_flag_2 = np.logical_and(pts_img[:, 1] >= 0, pts_img[:, 1] < img_shape[0]) val_flag_merge = np.logical_and(val_flag_1, val_flag_2) pts_valid_flag = np.logical_and(val_flag_merge, pts_rect_depth >= 0) return pts_valid_flag def save_depth_as_points(depth, idx, root_path): file_idx = str(idx).zfill(6) file_image_path = os.path.join(root_path, 'image_2', file_idx + '.png') file_velo_path = os.path.join(root_path, 'velodyne', file_idx + '.bin') file_calib = os.path.join(root_path, 'calib', file_idx + '.txt') calib = calibration_kitti.Calibration(file_calib) lidar = np.fromfile(str(file_velo_path), dtype=np.float32).reshape(-1, 4) image = np.array(io.imread(file_image_path), dtype=np.int32) image = image[:352, :1216] pts_rect = calib.lidar_to_rect(lidar[:, 0:3]) fov_flag = get_fov_flag(pts_rect, image.shape, calib) lidar = lidar[fov_flag] paths = os.path.join(root_path, 'velodyne_depth') if not os.path.exists(paths): os.makedirs(paths) out_path = os.path.join(paths, file_idx + '.npy') depth = depth.cpu().detach().numpy().reshape(352, 1216,1) final_points = depth2pointsrgbp(depth, image, calib, lidar) final_points = final_points.astype(np.float16) np.save(out_path, final_points) def save_depth_as_uint16png_upload(img, filename): #from tensor img = np.squeeze(img.data.cpu().numpy()) img = (img * 256.0).astype('uint16') img_buffer = img.tobytes() imgsave = Image.new("I", img.T.shape) imgsave.frombytes(img_buffer, 'raw', "I;16") imgsave.save(filename) def save_depth_as_uint8colored(img, filename): #from tensor img = validcrop(img) img = np.squeeze(img.data.cpu().numpy()) img = depth_colorize(img) img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) cv2.imwrite(filename, img) def save_mask_as_uint8colored(img, filename, colored=True, normalized=True): img = validcrop(img) img = np.squeeze(img.data.cpu().numpy()) if(normalized==False): img = (img - np.min(img)) / (np.max(img) - np.min(img)) if(colored==True): img = 255 * cmap(img)[:, :, :3] else: img = 255 * img img = img.astype('uint8') img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) cv2.imwrite(filename, img) def save_feature_as_uint8colored(img, filename): img = validcrop(img) img = np.squeeze(img.data.cpu().numpy()) img = feature_colorize(img) img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) cv2.imwrite(filename, img) ================================================ FILE: tools/cfgs/dataset_configs/kitti_dataset.yaml ================================================ DATA_PATH: '../data/kitti' DATASET: 'KittiDataset' MM_PATH: 'velodyne_depth' POINT_CLOUD_RANGE: [0, -40, -3, 70.4, 40, 1] DATA_SPLIT: { 'train': train, 'test': val } INFO_PATH: { 'train': [kitti_infos_train.pkl], 'test': [kitti_infos_val.pkl], } FOV_POINTS_ONLY: True X_TRANS: AUG_CONFIG_LIST: - NAME: world_rotation WORLD_ROT_ANGLE: [0.39269908,0 , 0.39269908, -0.39269908, -0.39269908, 0] - NAME: world_flip ALONG_AXIS_LIST: [0, 1, 1, 0, 1, 0] - NAME: world_scaling WORLD_SCALE_RANGE: [ 0.98, 1.02, 1., 0.98, 1.02, 1.] DATA_AUGMENTOR: DISABLE_AUG_LIST: ['placeholder'] AUG_CONFIG_LIST: - NAME: gt_sampling USE_ROAD_PLANE: True DB_INFO_PATH: - kitti_dbinfos_train.pkl PREPARE: { filter_by_min_points: ['Car:5', 'Pedestrian:5', 'Cyclist:5'], filter_by_difficulty: [-1], } SAMPLE_GROUPS: ['Car:15','Pedestrian:10', 'Cyclist:10'] NUM_POINT_FEATURES: 4 DATABASE_WITH_FAKELIDAR: False REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0] LIMIT_WHOLE_SCENE: False - NAME: random_world_flip ALONG_AXIS_LIST: ['x'] - NAME: random_world_rotation WORLD_ROT_ANGLE: [-0.78539816, 0.78539816] - NAME: random_world_scaling WORLD_SCALE_RANGE: [0.95, 1.05] POINT_FEATURE_ENCODING: { encoding_type: absolute_coordinates_encoding, used_feature_list: ['x', 'y', 'z', 'intensity'], src_feature_list: ['x', 'y', 'z', 'intensity'], } DATA_PROCESSOR: - NAME: mask_points_and_boxes_outside_range REMOVE_OUTSIDE_BOXES: True - NAME: shuffle_points SHUFFLE_ENABLED: { 'train': True, 'test': False } - NAME: transform_points_to_voxels VOXEL_SIZE: [0.05, 0.05, 0.1] MAX_POINTS_PER_VOXEL: 5 MAX_NUMBER_OF_VOXELS: { 'train': 1600000, 'test': 4000000 } ================================================ FILE: tools/cfgs/models/kitti/TED-M.yaml ================================================ CLASS_NAMES: ['Car'] DATA_CONFIG: _BASE_CONFIG_: cfgs/dataset_configs/kitti_dataset.yaml DATASET: 'KittiDatasetMM' MM_PATH: 'velodyne_depth' ROT_NUM: 3 USE_VAN: True DATA_SPLIT: { 'train': train, 'test': val } INFO_PATH: { 'train': [kitti_infos_train.pkl], 'test': [kitti_infos_val.pkl], } DATA_AUGMENTOR: DISABLE_AUG_LIST: ['placeholder'] AUG_CONFIG_LIST: - NAME: gt_sampling USE_ROAD_PLANE: True DB_INFO_PATH: - kitti_dbinfos_train_mm.pkl PREPARE: { filter_by_min_points: ['Car:5', 'Pedestrian:5', 'Cyclist:5'], filter_by_difficulty: [-1], } SAMPLE_GROUPS: ['Car:10', 'Pedestrian:10', 'Cyclist:10'] NUM_POINT_FEATURES: 8 DATABASE_WITH_FAKELIDAR: False REMOVE_EXTRA_WIDTH: [0.0, 0.0, -0.2] LIMIT_WHOLE_SCENE: False - NAME: da_sampling USE_ROAD_PLANE: True DB_INFO_PATH: - kitti_dbinfos_train_mm.pkl PREPARE: { filter_by_min_points: ['Car:5'], filter_by_difficulty: [-1], } SAMPLE_GROUPS: ['Car:10'] MIN_SAMPLING_DIS: 0 MAX_SAMPLING_DIS: 20 OCCLUSION_NOISE: 0.2 OCCLUSION_OFFSET: 2. SAMPLING_METHOD: 'LiDAR-aware' VERT_RES: 0.006 HOR_RES: 0.003 NUM_POINT_FEATURES: 8 DATABASE_WITH_FAKELIDAR: False REMOVE_EXTRA_WIDTH: [0.0, 0.0, -0.2] LIMIT_WHOLE_SCENE: False - NAME: random_local_noise LOCAL_ROT_RANGE: [-0.78539816, 0.78539816] TRANSLATION_STD: [1.0, 1.0, 0.5] GLOBAL_ROT_RANGE: [0.0, 0.0] EXTRA_WIDTH: [0.2, 0.2, 0.] - NAME: random_world_rotation WORLD_ROT_ANGLE: [-0.39269908, 0.39269908] - NAME: random_world_scaling WORLD_SCALE_RANGE: [0.95, 1.05] - NAME: random_local_pyramid_aug DROP_PROB: 0.25 SPARSIFY_PROB: 0.05 SPARSIFY_MAX_NUM: 50 SWAP_PROB: 0.1 SWAP_MAX_NUM: 50 X_TRANS: AUG_CONFIG_LIST: - NAME: world_rotation WORLD_ROT_ANGLE: [0.39269908, 0, 0.39269908, -0.39269908, -0.39269908, 0] - NAME: world_flip ALONG_AXIS_LIST: [0, 1, 1, 0, 1, 0] - NAME: world_scaling WORLD_SCALE_RANGE: [ 0.98, 1.02, 1., 0.98, 1.02, 1.] POINT_FEATURE_ENCODING: { encoding_type: absolute_coordinates_encoding_mm, used_feature_list: ['x', 'y', 'z', 'intensity'], src_feature_list: ['x', 'y', 'z', 'intensity'], num_features: 8 } DATA_PROCESSOR: - NAME: mask_points_and_boxes_outside_range REMOVE_OUTSIDE_BOXES: True - NAME: shuffle_points SHUFFLE_ENABLED: { 'train': True, 'test': True } - NAME: transform_points_to_voxels VOXEL_SIZE: [0.05, 0.05, 0.05] MAX_POINTS_PER_VOXEL: 5 MAX_NUMBER_OF_VOXELS: { 'train': 16000, 'test': 40000 } MODEL: NAME: VoxelRCNN VFE: NAME: MeanVFE MODEL: 'max' BACKBONE_3D: NAME: TeMMVoxelBackBone8x NUM_FILTERS: [16, 32, 64, 64] RETURN_NUM_FEATURES_AS_DICT: True OUT_FEATURES: 64 MM: True MAP_TO_BEV: NAME: BEVPool NUM_BEV_FEATURES: 256 ALIGN_METHOD: 'max' BACKBONE_2D: NAME: BaseBEVBackbone LAYER_NUMS: [4, 4] LAYER_STRIDES: [1, 2] NUM_FILTERS: [64, 128] UPSAMPLE_STRIDES: [1, 2] NUM_UPSAMPLE_FILTERS: [128, 128] DENSE_HEAD: NAME: AnchorHeadSingle CLASS_AGNOSTIC: False USE_DIRECTION_CLASSIFIER: True DIR_OFFSET: 0.78539 DIR_LIMIT_OFFSET: 0.0 NUM_DIR_BINS: 2 ANCHOR_GENERATOR_CONFIG: [ { 'class_name': 'Car', 'anchor_sizes': [[3.9, 1.6, 1.56]], 'anchor_rotations': [0, 1.57], 'anchor_bottom_heights': [-1.78], 'align_center': False, 'feature_map_stride': 8, 'matched_threshold': 0.6, 'unmatched_threshold': 0.45 } ] TARGET_ASSIGNER_CONFIG: NAME: AxisAlignedTargetAssigner POS_FRACTION: -1.0 SAMPLE_SIZE: 512 NORM_BY_NUM_EXAMPLES: False MATCH_HEIGHT: False BOX_CODER: ResidualCoder LOSS_CONFIG: LOSS_WEIGHTS: { 'cls_weight': 1.0, 'loc_weight': 2.0, 'dir_weight': 0.2, 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] } ROI_HEAD: NAME: TEDMHead CLASS_AGNOSTIC: True SHARED_FC: [256, 256] CLS_FC: [256, 256] REG_FC: [256, 256] DP_RATIO: 0.01 PART: IN_CHANNEL: 256 SIZE: 7 GRID_OFFSETS: [0., 40.] FEATMAP_STRIDE: 0.4 NMS_CONFIG: TRAIN: NMS_TYPE: nms_gpu MULTI_CLASSES_NMS: False NMS_PRE_MAXSIZE: 4000 NMS_POST_MAXSIZE: 512 NMS_THRESH: 0.8 TEST: NMS_TYPE: nms_gpu MULTI_CLASSES_NMS: False USE_FAST_NMS: True SCORE_THRESH: 0.0 NMS_PRE_MAXSIZE: 4000 NMS_POST_MAXSIZE: 50 NMS_THRESH: 0.75 ROI_GRID_POOL: FEATURES_SOURCE: ['x_conv3','x_conv4'] PRE_MLP: True GRID_SIZE: 6 POOL_LAYERS: x_conv3: MLPS: [[32, 32], [32, 32]] QUERY_RANGES: [[2, 2, 2], [4, 4, 4]] POOL_RADIUS: [0.4, 0.8] NSAMPLE: [16, 16] POOL_METHOD: max_pool x_conv4: MLPS: [[32, 32], [32, 32]] QUERY_RANGES: [[2, 2, 2], [4, 4, 4]] POOL_RADIUS: [0.8, 1.6] NSAMPLE: [16, 16] POOL_METHOD: max_pool ROI_GRID_POOL_MM: FEATURES_SOURCE: ['x_conv3','x_conv4'] PRE_MLP: True GRID_SIZE: 4 POOL_LAYERS: x_conv3: MLPS: [[32, 32], [32, 32]] QUERY_RANGES: [[2, 2, 2], [4, 4, 4]] POOL_RADIUS: [0.4, 0.8] NSAMPLE: [16, 16] POOL_METHOD: max_pool x_conv4: MLPS: [[32, 32], [32, 32]] QUERY_RANGES: [[2, 2, 2], [4, 4, 4]] POOL_RADIUS: [0.8, 1.6] NSAMPLE: [16, 16] POOL_METHOD: max_pool TARGET_CONFIG: BOX_CODER: ResidualCoder ROI_PER_IMAGE: 160 FG_RATIO: 0.5 SAMPLE_ROI_BY_EACH_CLASS: True CLS_SCORE_TYPE: roi_iou_x CLS_FG_THRESH: [0.75] CLS_BG_THRESH: [0.25] CLS_BG_THRESH_LO: 0.1 HARD_BG_RATIO: 0.8 REG_FG_THRESH: [0.55] ENABLE_HARD_SAMPLING: True HARD_SAMPLING_THRESH: [0.5] HARD_SAMPLING_RATIO: [0.5] LOSS_CONFIG: CLS_LOSS: BinaryCrossEntropy REG_LOSS: smooth-l1 CORNER_LOSS_REGULARIZATION: True GRID_3D_IOU_LOSS: False LOSS_WEIGHTS: { 'rcnn_cls_weight': 1.0, 'rcnn_reg_weight': 1.0, 'rcnn_corner_weight': 1.0, 'rcnn_iou3d_weight': 1.0, 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] } POST_PROCESSING: RECALL_THRESH_LIST: [0.3, 0.5, 0.7] SCORE_THRESH: 0.7 OUTPUT_RAW_SCORE: False EVAL_METRIC: kitti NMS_CONFIG: MULTI_CLASSES_NMS: False NMS_TYPE: nms_gpu NMS_THRESH: 0.1 NMS_PRE_MAXSIZE: 4096 NMS_POST_MAXSIZE: 500 OPTIMIZATION: BATCH_SIZE_PER_GPU: 2 NUM_EPOCHS: 30 OPTIMIZER: adam_onecycle LR: 0.01 WEIGHT_DECAY: 0.01 MOMENTUM: 0.9 MOMS: [0.95, 0.85] PCT_START: 0.4 DIV_FACTOR: 10 DECAY_STEP_LIST: [35, 45] LR_DECAY: 0.1 LR_CLIP: 0.0000001 LR_WARMUP: False WARMUP_EPOCH: 1 GRAD_NORM_CLIP: 10 ================================================ FILE: tools/cfgs/models/kitti/TED-S.yaml ================================================ CLASS_NAMES: ['Car'] DATA_CONFIG: _BASE_CONFIG_: cfgs/dataset_configs/kitti_dataset.yaml DATASET: 'KittiDataset' ROT_NUM: 3 USE_VAN: True DATA_SPLIT: { 'train': train, 'test': val } INFO_PATH: { 'train': [kitti_infos_train.pkl], 'test': [kitti_infos_val.pkl], } DATA_AUGMENTOR: DISABLE_AUG_LIST: ['placeholder'] AUG_CONFIG_LIST: - NAME: gt_sampling USE_ROAD_PLANE: True DB_INFO_PATH: - kitti_dbinfos_train.pkl PREPARE: { filter_by_min_points: ['Car:5', 'Pedestrian:5', 'Cyclist:5'], filter_by_difficulty: [-1], } SAMPLE_GROUPS: ['Car:10', 'Pedestrian:10', 'Cyclist:10'] NUM_POINT_FEATURES: 4 DATABASE_WITH_FAKELIDAR: False REMOVE_EXTRA_WIDTH: [0.0, 0.0, -0.2] LIMIT_WHOLE_SCENE: False - NAME: da_sampling USE_ROAD_PLANE: True DB_INFO_PATH: - kitti_dbinfos_train.pkl PREPARE: { filter_by_min_points: ['Car:5'], filter_by_difficulty: [-1], } SAMPLE_GROUPS: ['Car:10'] MIN_SAMPLING_DIS: 0 MAX_SAMPLING_DIS: 20 OCCLUSION_NOISE: 0.2 OCCLUSION_OFFSET: 2. SAMPLING_METHOD: 'LiDAR-aware' VERT_RES: 0.006 HOR_RES: 0.003 NUM_POINT_FEATURES: 4 DATABASE_WITH_FAKELIDAR: False REMOVE_EXTRA_WIDTH: [0.0, 0.0, -0.2] LIMIT_WHOLE_SCENE: False - NAME: random_local_noise LOCAL_ROT_RANGE: [-0.78539816, 0.78539816] TRANSLATION_STD: [1.0, 1.0, 0.5] GLOBAL_ROT_RANGE: [0.0, 0.0] EXTRA_WIDTH: [0.2, 0.2, 0.] - NAME: random_world_rotation WORLD_ROT_ANGLE: [-0.39269908, 0.39269908] - NAME: random_world_scaling WORLD_SCALE_RANGE: [0.95, 1.05] - NAME: random_local_pyramid_aug DROP_PROB: 0.25 SPARSIFY_PROB: 0.05 SPARSIFY_MAX_NUM: 50 SWAP_PROB: 0.1 SWAP_MAX_NUM: 50 X_TRANS: AUG_CONFIG_LIST: - NAME: world_rotation WORLD_ROT_ANGLE: [0.39269908, 0, 0.39269908, -0.39269908, -0.39269908, 0] - NAME: world_flip ALONG_AXIS_LIST: [0, 1, 1, 0, 1, 0] - NAME: world_scaling WORLD_SCALE_RANGE: [ 0.98, 1.02, 1., 0.98, 1.02, 1.] POINT_FEATURE_ENCODING: { encoding_type: absolute_coordinates_encoding_mm, used_feature_list: ['x', 'y', 'z', 'intensity'], src_feature_list: ['x', 'y', 'z', 'intensity'], num_features: 4 } DATA_PROCESSOR: - NAME: mask_points_and_boxes_outside_range REMOVE_OUTSIDE_BOXES: True - NAME: shuffle_points SHUFFLE_ENABLED: { 'train': True, 'test': True } - NAME: transform_points_to_voxels VOXEL_SIZE: [0.05, 0.05, 0.05] MAX_POINTS_PER_VOXEL: 5 MAX_NUMBER_OF_VOXELS: { 'train': 16000, 'test': 40000 } MODEL: NAME: VoxelRCNN VFE: NAME: MeanVFE MODEL: 'max' BACKBONE_3D: NAME: TeVoxelBackBone8x NUM_FILTERS: [16, 32, 64, 64] RETURN_NUM_FEATURES_AS_DICT: True OUT_FEATURES: 64 MAP_TO_BEV: NAME: BEVPool NUM_BEV_FEATURES: 256 ALIGN_METHOD: 'max' BACKBONE_2D: NAME: BaseBEVBackbone LAYER_NUMS: [4, 4] LAYER_STRIDES: [1, 2] NUM_FILTERS: [64, 128] UPSAMPLE_STRIDES: [1, 2] NUM_UPSAMPLE_FILTERS: [128, 128] DENSE_HEAD: NAME: AnchorHeadSingle CLASS_AGNOSTIC: False USE_DIRECTION_CLASSIFIER: True DIR_OFFSET: 0.78539 DIR_LIMIT_OFFSET: 0.0 NUM_DIR_BINS: 2 ANCHOR_GENERATOR_CONFIG: [ { 'class_name': 'Car', 'anchor_sizes': [[3.9, 1.6, 1.56]], 'anchor_rotations': [0, 1.57], 'anchor_bottom_heights': [-1.78], 'align_center': False, 'feature_map_stride': 8, 'matched_threshold': 0.6, 'unmatched_threshold': 0.45 } ] TARGET_ASSIGNER_CONFIG: NAME: AxisAlignedTargetAssigner POS_FRACTION: -1.0 SAMPLE_SIZE: 512 NORM_BY_NUM_EXAMPLES: False MATCH_HEIGHT: False BOX_CODER: ResidualCoder LOSS_CONFIG: LOSS_WEIGHTS: { 'cls_weight': 1.0, 'loc_weight': 2.0, 'dir_weight': 0.2, 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] } ROI_HEAD: NAME: TEDSHead CLASS_AGNOSTIC: True SHARED_FC: [256, 256] CLS_FC: [256, 256] REG_FC: [256, 256] DP_RATIO: 0.01 NMS_CONFIG: TRAIN: NMS_TYPE: nms_gpu MULTI_CLASSES_NMS: False NMS_PRE_MAXSIZE: 4000 NMS_POST_MAXSIZE: 512 NMS_THRESH: 0.8 TEST: NMS_TYPE: nms_gpu MULTI_CLASSES_NMS: False USE_FAST_NMS: True SCORE_THRESH: 0.0 NMS_PRE_MAXSIZE: 4000 NMS_POST_MAXSIZE: 50 NMS_THRESH: 0.75 ROI_GRID_POOL: FEATURES_SOURCE: ['x_conv3','x_conv4'] PRE_MLP: True GRID_SIZE: 6 POOL_LAYERS: x_conv3: MLPS: [[32, 32], [32, 32]] QUERY_RANGES: [[2, 2, 2], [4, 4, 4]] POOL_RADIUS: [0.4, 0.8] NSAMPLE: [16, 16] POOL_METHOD: max_pool x_conv4: MLPS: [[32, 32], [32, 32]] QUERY_RANGES: [[2, 2, 2], [4, 4, 4]] POOL_RADIUS: [0.8, 1.6] NSAMPLE: [16, 16] POOL_METHOD: max_pool TARGET_CONFIG: BOX_CODER: ResidualCoder ROI_PER_IMAGE: 160 FG_RATIO: 0.5 SAMPLE_ROI_BY_EACH_CLASS: True CLS_SCORE_TYPE: roi_iou_x CLS_FG_THRESH: [0.75] CLS_BG_THRESH: [0.25] CLS_BG_THRESH_LO: 0.1 HARD_BG_RATIO: 0.8 REG_FG_THRESH: [0.55] ENABLE_HARD_SAMPLING: True HARD_SAMPLING_THRESH: [0.5] HARD_SAMPLING_RATIO: [0.5] LOSS_CONFIG: CLS_LOSS: BinaryCrossEntropy REG_LOSS: smooth-l1 CORNER_LOSS_REGULARIZATION: True GRID_3D_IOU_LOSS: False LOSS_WEIGHTS: { 'rcnn_cls_weight': 1.0, 'rcnn_reg_weight': 1.0, 'rcnn_corner_weight': 1.0, 'rcnn_iou3d_weight': 1.0, 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] } POST_PROCESSING: RECALL_THRESH_LIST: [0.3, 0.5, 0.7] SCORE_THRESH: 0.25 OUTPUT_RAW_SCORE: False EVAL_METRIC: kitti NMS_CONFIG: MULTI_CLASSES_NMS: False NMS_TYPE: nms_gpu NMS_THRESH: 0.1 NMS_PRE_MAXSIZE: 4096 NMS_POST_MAXSIZE: 500 OPTIMIZATION: BATCH_SIZE_PER_GPU: 2 NUM_EPOCHS: 40 OPTIMIZER: adam_onecycle LR: 0.01 WEIGHT_DECAY: 0.01 MOMENTUM: 0.9 MOMS: [0.95, 0.85] PCT_START: 0.4 DIV_FACTOR: 10 DECAY_STEP_LIST: [35, 45] LR_DECAY: 0.1 LR_CLIP: 0.0000001 LR_WARMUP: False WARMUP_EPOCH: 1 GRAD_NORM_CLIP: 10 ================================================ FILE: tools/dist_test.sh ================================================ #!/usr/bin/env bash CUDA_VISIBLE_DEVICES=1,2,3,4 nohup python3 -m torch.distributed.launch --nproc_per_node=4 test.py --launcher pytorch > log-test.txt & ================================================ FILE: tools/dist_train.sh ================================================ #!/usr/bin/env bash CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 nohup python3 -m torch.distributed.launch --nproc_per_node=8 train.py --launcher pytorch > log.txt& ================================================ FILE: tools/eval_utils/eval_utils.py ================================================ import pickle import time import numpy as np import torch import tqdm import os from pcdet.models import load_data_to_gpu from pcdet.utils import common_utils import time def statistics_info(cfg, ret_dict, metric, disp_dict): for cur_thresh in cfg.MODEL.POST_PROCESSING.RECALL_THRESH_LIST: metric['recall_roi_%s' % str(cur_thresh)] += ret_dict.get('roi_%s' % str(cur_thresh), 0) metric['recall_rcnn_%s' % str(cur_thresh)] += ret_dict.get('rcnn_%s' % str(cur_thresh), 0) metric['gt_num'] += ret_dict.get('gt', 0) min_thresh = cfg.MODEL.POST_PROCESSING.RECALL_THRESH_LIST[0] disp_dict['recall_%s' % str(min_thresh)] = \ '(%d, %d) / %d' % (metric['recall_roi_%s' % str(min_thresh)], metric['recall_rcnn_%s' % str(min_thresh)], metric['gt_num']) def eval_one_epoch(cfg, model, dataloader, epoch_id, logger, dist_test=False, save_to_file=True, result_dir=None): result_dir.mkdir(parents=True, exist_ok=True) final_output_dir = result_dir / 'final_result' / 'data' if save_to_file: final_output_dir.mkdir(parents=True, exist_ok=True) metric = { 'gt_num': 0, } for cur_thresh in cfg.MODEL.POST_PROCESSING.RECALL_THRESH_LIST: metric['recall_roi_%s' % str(cur_thresh)] = 0 metric['recall_rcnn_%s' % str(cur_thresh)] = 0 dataset = dataloader.dataset class_names = dataset.class_names det_annos = [] logger.info('*************** EPOCH %s EVALUATION *****************' % epoch_id) if dist_test: num_gpus = torch.cuda.device_count() local_rank = cfg.LOCAL_RANK % num_gpus model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[local_rank], broadcast_buffers=False ) model.eval() if cfg.LOCAL_RANK == 0: progress_bar = tqdm.tqdm(total=len(dataloader), leave=True, desc='eval', dynamic_ncols=True) start_time = time.time() for i, batch_dict in enumerate(dataloader): load_data_to_gpu(batch_dict) #begin = time.time() with torch.no_grad(): pred_dicts, ret_dict, batch_dict = model(batch_dict) disp_dict = {} #end = time.time() #print(end-begin) statistics_info(cfg, ret_dict, metric, disp_dict) annos = dataset.generate_prediction_dicts( batch_dict, pred_dicts, class_names, output_path=final_output_dir if save_to_file else None ) det_annos += annos if cfg.LOCAL_RANK == 0: progress_bar.set_postfix(disp_dict) progress_bar.update() if cfg.LOCAL_RANK == 0: progress_bar.close() if dist_test: rank, world_size = common_utils.get_dist_info() det_annos = common_utils.merge_results_dist(det_annos, len(dataset), tmpdir=result_dir / 'tmpdir') metric = common_utils.merge_results_dist([metric], world_size, tmpdir=result_dir / 'tmpdir') logger.info('*************** Performance of EPOCH %s *****************' % epoch_id) sec_per_example = (time.time() - start_time) / len(dataloader.dataset) logger.info('Generate label finished(sec_per_example: %.4f second).' % sec_per_example) if cfg.LOCAL_RANK != 0: return {} ret_dict = {} if dist_test: for key, val in metric[0].items(): for k in range(1, world_size): metric[0][key] += metric[k][key] metric = metric[0] gt_num_cnt = metric['gt_num'] for cur_thresh in cfg.MODEL.POST_PROCESSING.RECALL_THRESH_LIST: cur_roi_recall = metric['recall_roi_%s' % str(cur_thresh)] / max(gt_num_cnt, 1) cur_rcnn_recall = metric['recall_rcnn_%s' % str(cur_thresh)] / max(gt_num_cnt, 1) logger.info('recall_roi_%s: %f' % (cur_thresh, cur_roi_recall)) logger.info('recall_rcnn_%s: %f' % (cur_thresh, cur_rcnn_recall)) ret_dict['recall/roi_%s' % str(cur_thresh)] = cur_roi_recall ret_dict['recall/rcnn_%s' % str(cur_thresh)] = cur_rcnn_recall total_pred_objects = 0 for anno in det_annos: total_pred_objects += anno['name'].__len__() logger.info('Average predicted number of objects(%d samples): %.3f' % (len(det_annos), total_pred_objects / max(1, len(det_annos)))) path = result_dir / 'result.pkl' if os.path.exists(path): path = result_dir / ('result_'+str(time.time())[:10]+'.pkl') with open(path, 'wb') as f: pickle.dump(det_annos, f) result_str, result_dict = dataset.evaluation( det_annos, class_names, eval_metric=cfg.MODEL.POST_PROCESSING.EVAL_METRIC, output_path=final_output_dir ) logger.info(result_str) ret_dict.update(result_dict) logger.info('Result is save to %s' % result_dir) logger.info('****************Evaluation done.*****************') return ret_dict if __name__ == '__main__': pass ================================================ FILE: tools/test.py ================================================ import os import argparse import datetime import glob import re import time from pathlib import Path import numpy as np import torch from tensorboardX import SummaryWriter from eval_utils import eval_utils from pcdet.config import cfg, cfg_from_list, cfg_from_yaml_file, log_config_to_file from pcdet.datasets import build_dataloader from pcdet.models import build_network from pcdet.utils import common_utils import warnings warnings.filterwarnings("ignore") def parse_config(): parser = argparse.ArgumentParser(description='arg parser') parser.add_argument('--cfg_file', type=str, default="cfgs/models/kitti/TED-S.yaml", help='specify the config for training') parser.add_argument('--batch_size', type=int, default=None, required=False, help='batch size for training') parser.add_argument('--workers', type=int, default=0, help='number of workers for dataloader') parser.add_argument('--extra_tag', type=str, default='default', help='extra tag for this experiment') parser.add_argument('--ckpt', type=str, default="TED-S.pth", help='checkpoint to start from') parser.add_argument('--launcher', choices=['none', 'pytorch', 'slurm'], default='none') parser.add_argument('--tcp_port', type=int, default=18888, help='tcp port for distrbuted training') parser.add_argument('--local_rank', type=int, default=0, help='local rank for distributed training') parser.add_argument('--set', dest='set_cfgs', default=None, nargs=argparse.REMAINDER, help='set extra config keys if needed') parser.add_argument('--max_waiting_mins', type=int, default=30, help='max waiting minutes') parser.add_argument('--start_epoch', type=int, default=0, help='') parser.add_argument('--eval_tag', type=str, default='default', help='eval tag for this experiment') parser.add_argument('--eval_all', action='store_true', default=False, help='whether to evaluate all checkpoints') parser.add_argument('--ckpt_dir', type=str, default=None, help='specify a ckpt directory to be evaluated if needed') parser.add_argument('--save_to_file', action='store_true', default=False, help='') args = parser.parse_args() cfg_from_yaml_file(args.cfg_file, cfg) cfg.TAG = Path(args.cfg_file).stem cfg.EXP_GROUP_PATH = '/'.join(args.cfg_file.split('/')[1:-1]) # remove 'cfgs' and 'xxxx.yaml' np.random.seed(1024) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs, cfg) return args, cfg def eval_single_ckpt(model, test_loader, args, eval_output_dir, logger, epoch_id, dist_test=False): # load checkpoint model.load_params_from_file(filename=args.ckpt, logger=logger, to_cpu=dist_test) model.cuda() # start evaluation eval_utils.eval_one_epoch( cfg, model, test_loader, epoch_id, logger, dist_test=dist_test, result_dir=eval_output_dir, save_to_file=args.save_to_file ) def get_no_evaluated_ckpt(ckpt_dir, ckpt_record_file, args): ckpt_list = glob.glob(os.path.join(ckpt_dir, '*checkpoint_epoch_*.pth')) ckpt_list.sort(key=os.path.getmtime) evaluated_ckpt_list = [float(x.strip()) for x in open(ckpt_record_file, 'r').readlines()] for cur_ckpt in ckpt_list: num_list = re.findall('checkpoint_epoch_(.*).pth', cur_ckpt) if num_list.__len__() == 0: continue epoch_id = num_list[-1] if 'optim' in epoch_id: continue if float(epoch_id) not in evaluated_ckpt_list and int(float(epoch_id)) >= args.start_epoch: return epoch_id, cur_ckpt return -1, None def repeat_eval_ckpt(model, test_loader, args, eval_output_dir, logger, ckpt_dir, dist_test=False): # evaluated ckpt record ckpt_record_file = eval_output_dir / ('eval_list_%s.txt' % cfg.DATA_CONFIG.DATA_SPLIT['test']) with open(ckpt_record_file, 'a'): pass # tensorboard log if cfg.LOCAL_RANK == 0: tb_log = SummaryWriter(log_dir=str(eval_output_dir / ('tensorboard_%s' % cfg.DATA_CONFIG.DATA_SPLIT['test']))) total_time = 0 first_eval = True while True: # check whether there is checkpoint which is not evaluated cur_epoch_id, cur_ckpt = get_no_evaluated_ckpt(ckpt_dir, ckpt_record_file, args) if cur_epoch_id == -1 or int(float(cur_epoch_id)) < args.start_epoch: break total_time = 0 first_eval = False model.load_params_from_file(filename=cur_ckpt, logger=logger, to_cpu=dist_test) model.cuda() # start evaluation cur_result_dir = eval_output_dir / ('epoch_%s' % cur_epoch_id) / cfg.DATA_CONFIG.DATA_SPLIT['test'] tb_dict = eval_utils.eval_one_epoch( cfg, model, test_loader, cur_epoch_id, logger, dist_test=dist_test, result_dir=cur_result_dir, save_to_file=args.save_to_file ) if cfg.LOCAL_RANK == 0: for key, val in tb_dict.items(): tb_log.add_scalar(key, val, cur_epoch_id) # record this epoch which has been evaluated with open(ckpt_record_file, 'a') as f: print('%s' % cur_epoch_id, file=f) logger.info('Epoch %s has been evaluated' % cur_epoch_id) def main(): args, cfg = parse_config() if args.launcher == 'none': dist_test = False total_gpus = 1 else: total_gpus, cfg.LOCAL_RANK = getattr(common_utils, 'init_dist_%s' % args.launcher)( args.tcp_port, args.local_rank, backend='nccl' ) dist_test = True if args.batch_size is None: args.batch_size = cfg.OPTIMIZATION.BATCH_SIZE_PER_GPU else: assert args.batch_size % total_gpus == 0, 'Batch size should match the number of gpus' args.batch_size = args.batch_size // total_gpus output_dir = cfg.ROOT_DIR / 'output' / cfg.EXP_GROUP_PATH / cfg.TAG / args.extra_tag output_dir.mkdir(parents=True, exist_ok=True) eval_output_dir = output_dir / 'eval' if not args.eval_all: num_list = re.findall(r'\d+', args.ckpt) if args.ckpt is not None else [] epoch_id = num_list[-1] if num_list.__len__() > 0 else 'no_number' eval_output_dir = eval_output_dir / ('epoch_%s' % epoch_id) / cfg.DATA_CONFIG.DATA_SPLIT['test'] else: eval_output_dir = eval_output_dir / 'eval_all_default' if args.eval_tag is not None: eval_output_dir = eval_output_dir / args.eval_tag eval_output_dir.mkdir(parents=True, exist_ok=True) log_file = eval_output_dir / ('log_eval_%s.txt' % datetime.datetime.now().strftime('%Y%m%d-%H%M%S')) logger = common_utils.create_logger(log_file, rank=cfg.LOCAL_RANK) # log to file logger.info('**********************Start logging**********************') gpu_list = os.environ['CUDA_VISIBLE_DEVICES'] if 'CUDA_VISIBLE_DEVICES' in os.environ.keys() else 'ALL' logger.info('CUDA_VISIBLE_DEVICES=%s' % gpu_list) if dist_test: logger.info('total_batch_size: %d' % (total_gpus * args.batch_size)) for key, val in vars(args).items(): logger.info('{:16} {}'.format(key, val)) log_config_to_file(cfg, logger=logger) ckpt_dir = args.ckpt_dir if args.ckpt_dir is not None else output_dir / 'ckpt' test_set, test_loader, sampler = build_dataloader( dataset_cfg=cfg.DATA_CONFIG, class_names=cfg.CLASS_NAMES, batch_size=args.batch_size, dist=dist_test, workers=args.workers, logger=logger, training=False ) model = build_network(model_cfg=cfg.MODEL, num_class=len(cfg.CLASS_NAMES), dataset=test_set) with torch.no_grad(): if args.eval_all: repeat_eval_ckpt(model, test_loader, args, eval_output_dir, logger, ckpt_dir, dist_test=dist_test) else: eval_single_ckpt(model, test_loader, args, eval_output_dir, logger, epoch_id, dist_test=dist_test) if __name__ == '__main__': main() ================================================ FILE: tools/train.py ================================================ import os import argparse import datetime import glob from pathlib import Path from test import repeat_eval_ckpt import torch import torch.distributed as dist import torch.nn as nn from tensorboardX import SummaryWriter from pcdet.config import cfg, cfg_from_list, cfg_from_yaml_file, log_config_to_file from pcdet.datasets import build_dataloader from pcdet.models import build_network, model_fn_decorator from pcdet.utils import common_utils from train_utils.optimization import build_optimizer, build_scheduler from train_utils.train_utils import train_model import warnings warnings.filterwarnings("ignore") def parse_config(): parser = argparse.ArgumentParser(description='arg parser') parser.add_argument('--cfg_file', type=str, default="cfgs/models/kitti/TED-M.yaml", help='specify the config for training') parser.add_argument('--batch_size', type=int, default=None, required=False, help='batch size for training') parser.add_argument('--epochs', type=int, default=None, required=False, help='number of epochs to train for') parser.add_argument('--workers', type=int, default=0, help='number of workers for dataloader') parser.add_argument('--extra_tag', type=str, default='default', help='extra tag for this experiment') parser.add_argument('--ckpt', type=str, default=None, help='checkpoint to start from') parser.add_argument('--pretrained_model', type=str, default=None, help='pretrained_model') parser.add_argument('--launcher', choices=['none', 'pytorch', 'slurm'], default='none') parser.add_argument('--tcp_port', type=int, default=23271, help='tcp port for distrbuted training') parser.add_argument('--sync_bn', action='store_true', default=False, help='whether to use sync bn') parser.add_argument('--fix_random_seed', action='store_true', default=True, help='') parser.add_argument('--ckpt_save_interval', type=int, default=1, help='number of training epochs') parser.add_argument('--local_rank', type=int, default=0, help='local rank for distributed training') parser.add_argument('--max_ckpt_save_num', type=int, default=10, help='max number of saved checkpoint') parser.add_argument('--merge_all_iters_to_one_epoch', action='store_true', default=False, help='') parser.add_argument('--set', dest='set_cfgs', default=None, nargs=argparse.REMAINDER, help='set extra config keys if needed') parser.add_argument('--max_waiting_mins', type=int, default=0, help='max waiting minutes') parser.add_argument('--start_epoch', type=int, default=0, help='') parser.add_argument('--save_to_file', action='store_true', default=False, help='') args = parser.parse_args() cfg_from_yaml_file(args.cfg_file, cfg) cfg.TAG = Path(args.cfg_file).stem cfg.EXP_GROUP_PATH = '/'.join(args.cfg_file.split('/')[1:-1]) # remove 'cfgs' and 'xxxx.yaml' if args.set_cfgs is not None: cfg_from_list(args.set_cfgs, cfg) return args, cfg def main(): args, cfg = parse_config() if args.launcher == 'none': dist_train = False total_gpus = 1 else: total_gpus, cfg.LOCAL_RANK = getattr(common_utils, 'init_dist_%s' % args.launcher)( args.tcp_port, args.local_rank, backend='nccl' ) dist_train = True if args.batch_size is None: args.batch_size = cfg.OPTIMIZATION.BATCH_SIZE_PER_GPU else: assert args.batch_size % total_gpus == 0, 'Batch size should match the number of gpus' args.batch_size = args.batch_size // total_gpus args.epochs = cfg.OPTIMIZATION.NUM_EPOCHS if args.epochs is None else args.epochs if args.fix_random_seed: common_utils.set_random_seed(666) output_dir = cfg.ROOT_DIR / 'output' / cfg.EXP_GROUP_PATH / cfg.TAG / args.extra_tag ckpt_dir = output_dir / 'ckpt' output_dir.mkdir(parents=True, exist_ok=True) ckpt_dir.mkdir(parents=True, exist_ok=True) log_file = output_dir / ('log_train_%s.txt' % datetime.datetime.now().strftime('%Y%m%d-%H%M%S')) logger = common_utils.create_logger(log_file, rank=cfg.LOCAL_RANK) # log to file logger.info('**********************Start logging**********************') gpu_list = os.environ['CUDA_VISIBLE_DEVICES'] if 'CUDA_VISIBLE_DEVICES' in os.environ.keys() else 'ALL' logger.info('CUDA_VISIBLE_DEVICES=%s' % gpu_list) if dist_train: logger.info('total_batch_size: %d' % (total_gpus * args.batch_size)) for key, val in vars(args).items(): logger.info('{:16} {}'.format(key, val)) log_config_to_file(cfg, logger=logger) if cfg.LOCAL_RANK == 0: os.system('cp %s %s' % (args.cfg_file, output_dir)) tb_log = SummaryWriter(log_dir=str(output_dir / 'tensorboard')) if cfg.LOCAL_RANK == 0 else None # -----------------------create dataloader & network & optimizer--------------------------- train_set, train_loader, train_sampler = build_dataloader( dataset_cfg=cfg.DATA_CONFIG, class_names=cfg.CLASS_NAMES, batch_size=args.batch_size, dist=dist_train, workers=args.workers, logger=logger, training=True, merge_all_iters_to_one_epoch=args.merge_all_iters_to_one_epoch, total_epochs=args.epochs, ) model = build_network(model_cfg=cfg.MODEL, num_class=len(cfg.CLASS_NAMES), dataset=train_set) if args.sync_bn: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) model.cuda() optimizer = build_optimizer(model, cfg.OPTIMIZATION) # load checkpoint if it is possible start_epoch = it = 0 last_epoch = -1 if args.pretrained_model is not None: model.load_params_from_file(filename=args.pretrained_model, to_cpu=dist, logger=logger) if args.ckpt is not None: it, start_epoch = model.load_params_with_optimizer(args.ckpt, to_cpu=dist, optimizer=optimizer, logger=logger) last_epoch = start_epoch + 1 else: ckpt_list = glob.glob(str(ckpt_dir / '*checkpoint_epoch_*.pth')) if len(ckpt_list) > 0: ckpt_list.sort(key=os.path.getmtime) it, start_epoch = model.load_params_with_optimizer( ckpt_list[-1], to_cpu=dist, optimizer=optimizer, logger=logger ) last_epoch = start_epoch + 1 model.train() # before wrap to DistributedDataParallel to support fixed some parameters if dist_train: model = nn.parallel.DistributedDataParallel(model, device_ids=[cfg.LOCAL_RANK % torch.cuda.device_count()])#,find_unused_parameters=True logger.info(model) lr_scheduler, lr_warmup_scheduler = build_scheduler( optimizer, total_iters_each_epoch=len(train_loader), total_epochs=args.epochs, last_epoch=last_epoch, optim_cfg=cfg.OPTIMIZATION ) # -----------------------start training--------------------------- logger.info('**********************Start training %s/%s(%s)**********************' % (cfg.EXP_GROUP_PATH, cfg.TAG, args.extra_tag)) train_model( model, optimizer, train_loader, model_func=model_fn_decorator(), lr_scheduler=lr_scheduler, optim_cfg=cfg.OPTIMIZATION, start_epoch=start_epoch, total_epochs=args.epochs, start_iter=it, rank=cfg.LOCAL_RANK, tb_log=tb_log, ckpt_save_dir=ckpt_dir, train_sampler=train_sampler, lr_warmup_scheduler=lr_warmup_scheduler, ckpt_save_interval=args.ckpt_save_interval, max_ckpt_save_num=args.max_ckpt_save_num, merge_all_iters_to_one_epoch=args.merge_all_iters_to_one_epoch ) logger.info('**********************End training %s/%s(%s)**********************\n\n\n' % (cfg.EXP_GROUP_PATH, cfg.TAG, args.extra_tag)) logger.info('**********************Start evaluation %s/%s(%s)**********************' % (cfg.EXP_GROUP_PATH, cfg.TAG, args.extra_tag)) test_set, test_loader, sampler = build_dataloader( dataset_cfg=cfg.DATA_CONFIG, class_names=cfg.CLASS_NAMES, batch_size=args.batch_size, dist=dist_train, workers=args.workers, logger=logger, training=False ) eval_output_dir = output_dir / 'eval' / ('eval_with_train') eval_output_dir.mkdir(parents=True, exist_ok=True) args.start_epoch = max(args.epochs - 10, 0) # Only evaluate the last 10 epochs repeat_eval_ckpt( model.module if dist_train else model, test_loader, args, eval_output_dir, logger, ckpt_dir, dist_test=dist_train ) logger.info('**********************End evaluation %s/%s(%s)**********************' % (cfg.EXP_GROUP_PATH, cfg.TAG, args.extra_tag)) if __name__ == '__main__': main() ================================================ FILE: tools/train_utils/optimization/__init__.py ================================================ from functools import partial import torch.nn as nn import torch.optim as optim import torch.optim.lr_scheduler as lr_sched from .fastai_optim import OptimWrapper from .learning_schedules_fastai import CosineWarmupLR, OneCycle,CosineWarmup def build_optimizer(model, optim_cfg): if optim_cfg.OPTIMIZER == 'adam': optimizer = optim.Adam(model.parameters(), lr=optim_cfg.LR, weight_decay=optim_cfg.WEIGHT_DECAY) elif optim_cfg.OPTIMIZER == 'sgd': optimizer = optim.SGD( model.parameters(), lr=optim_cfg.LR, weight_decay=optim_cfg.WEIGHT_DECAY, momentum=optim_cfg.MOMENTUM ) elif optim_cfg.OPTIMIZER == 'adam_onecycle' or optim_cfg.OPTIMIZER == 'adam_cosin': def children(m: nn.Module): return list(m.children()) def num_children(m: nn.Module) -> int: return len(children(m)) flatten_model = lambda m: sum(map(flatten_model, m.children()), []) if num_children(m) else [m] get_layer_groups = lambda m: [nn.Sequential(*flatten_model(m))] optimizer_func = partial(optim.Adam, betas=(0.9, 0.99)) optimizer = OptimWrapper.create( optimizer_func, 3e-3, get_layer_groups(model), wd=optim_cfg.WEIGHT_DECAY, true_wd=True, bn_wd=True ) else: raise NotImplementedError return optimizer def build_scheduler(optimizer, total_iters_each_epoch, total_epochs, last_epoch, optim_cfg): decay_steps = [x * total_iters_each_epoch for x in optim_cfg.DECAY_STEP_LIST] def lr_lbmd(cur_epoch): cur_decay = 1 for decay_step in decay_steps: if cur_epoch >= decay_step: cur_decay = cur_decay * optim_cfg.LR_DECAY return max(cur_decay, optim_cfg.LR_CLIP / optim_cfg.LR) lr_warmup_scheduler = None total_steps = total_iters_each_epoch * total_epochs if optim_cfg.OPTIMIZER == 'adam_onecycle': lr_scheduler = OneCycle( optimizer, total_steps, optim_cfg.LR, list(optim_cfg.MOMS), optim_cfg.DIV_FACTOR, optim_cfg.PCT_START ) elif optim_cfg.OPTIMIZER == 'adam_cosin': lr_scheduler = CosineWarmup( optimizer, total_steps, optim_cfg.WARMUP_EPOCH * total_iters_each_epoch, optim_cfg.LR, list(optim_cfg.MOMS), optim_cfg.DIV_FACTOR, optim_cfg.PCT_START ) else: lr_scheduler = lr_sched.LambdaLR(optimizer, lr_lbmd, last_epoch=last_epoch) if optim_cfg.LR_WARMUP: lr_warmup_scheduler = CosineWarmupLR( optimizer, T_max=optim_cfg.WARMUP_EPOCH * total_iters_each_epoch, eta_min=optim_cfg.LR / optim_cfg.DIV_FACTOR ) return lr_scheduler, lr_warmup_scheduler ================================================ FILE: tools/train_utils/optimization/fastai_optim.py ================================================ # This file is modified from https://github.com/traveller59/second.pytorch from collections import Iterable import torch from torch import nn from torch._utils import _unflatten_dense_tensors from torch.nn.utils import parameters_to_vector bn_types = (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm) def split_bn_bias(layer_groups): "Split the layers in `layer_groups` into batchnorm (`bn_types`) and non-batchnorm groups." split_groups = [] for l in layer_groups: l1, l2 = [], [] for c in l.children(): if isinstance(c, bn_types): l2.append(c) else: l1.append(c) split_groups += [nn.Sequential(*l1), nn.Sequential(*l2)] return split_groups def get_master(layer_groups, flat_master: bool = False): "Return two lists, one for the model parameters in FP16 and one for the master parameters in FP32." split_groups = split_bn_bias(layer_groups) model_params = [[param for param in lg.parameters() if param.requires_grad] for lg in split_groups] if flat_master: master_params = [] for lg in model_params: if len(lg) != 0: mp = parameters_to_vector([param.data.float() for param in lg]) mp = torch.nn.Parameter(mp, requires_grad=True) if mp.grad is None: mp.grad = mp.new(*mp.size()) master_params.append([mp]) else: master_params.append([]) return model_params, master_params else: master_params = [[param.clone().float().detach() for param in lg] for lg in model_params] for mp in master_params: for param in mp: param.requires_grad = True return model_params, master_params def model_g2master_g(model_params, master_params, flat_master: bool = False) -> None: "Copy the `model_params` gradients to `master_params` for the optimizer step." if flat_master: for model_group, master_group in zip(model_params, master_params): if len(master_group) != 0: master_group[0].grad.data.copy_(parameters_to_vector([p.grad.data.float() for p in model_group])) else: for model_group, master_group in zip(model_params, master_params): for model, master in zip(model_group, master_group): if model.grad is not None: if master.grad is None: master.grad = master.data.new(*master.data.size()) master.grad.data.copy_(model.grad.data) else: master.grad = None def master2model(model_params, master_params, flat_master: bool = False) -> None: "Copy `master_params` to `model_params`." if flat_master: for model_group, master_group in zip(model_params, master_params): if len(model_group) != 0: for model, master in zip(model_group, _unflatten_dense_tensors(master_group[0].data, model_group)): model.data.copy_(master) else: for model_group, master_group in zip(model_params, master_params): for model, master in zip(model_group, master_group): model.data.copy_(master.data) def listify(p=None, q=None): "Make `p` listy and the same length as `q`." if p is None: p = [] elif isinstance(p, str): p = [p] elif not isinstance(p, Iterable): p = [p] n = q if type(q) == int else len(p) if q is None else len(q) if len(p) == 1: p = p * n assert len(p) == n, f'List len mismatch ({len(p)} vs {n})' return list(p) def trainable_params(m: nn.Module): "Return list of trainable params in `m`." res = filter(lambda p: p.requires_grad, m.parameters()) return res def is_tuple(x) -> bool: return isinstance(x, tuple) # copy from fastai. class OptimWrapper(): "Basic wrapper around `opt` to simplify hyper-parameters changes." def __init__(self, opt, wd, true_wd: bool = False, bn_wd: bool = True): self.opt, self.true_wd, self.bn_wd = opt, true_wd, bn_wd self.opt_keys = list(self.opt.param_groups[0].keys()) self.opt_keys.remove('params') self.read_defaults() self.wd = wd @classmethod def create(cls, opt_func, lr, layer_groups, **kwargs): "Create an `optim.Optimizer` from `opt_func` with `lr`. Set lr on `layer_groups`." split_groups = split_bn_bias(layer_groups) opt = opt_func([{'params': trainable_params(l), 'lr': 0} for l in split_groups]) opt = cls(opt, **kwargs) opt.lr, opt.opt_func = listify(lr, layer_groups), opt_func return opt def new(self, layer_groups): "Create a new `OptimWrapper` from `self` with another `layer_groups` but the same hyper-parameters." opt_func = getattr(self, 'opt_func', self.opt.__class__) split_groups = split_bn_bias(layer_groups) opt = opt_func([{'params': trainable_params(l), 'lr': 0} for l in split_groups]) return self.create(opt_func, self.lr, layer_groups, wd=self.wd, true_wd=self.true_wd, bn_wd=self.bn_wd) def __repr__(self) -> str: return f'OptimWrapper over {repr(self.opt)}.\nTrue weight decay: {self.true_wd}' # Pytorch optimizer methods def step(self) -> None: "Set weight decay and step optimizer." # weight decay outside of optimizer step (AdamW) if self.true_wd: for lr, wd, pg1, pg2 in zip(self._lr, self._wd, self.opt.param_groups[::2], self.opt.param_groups[1::2]): for p in pg1['params']: # When some parameters are fixed: Shaoshuai Shi if p.requires_grad is False: continue p.data.mul_(1 - wd * lr) if self.bn_wd: for p in pg2['params']: # When some parameters are fixed: Shaoshuai Shi if p.requires_grad is False: continue p.data.mul_(1 - wd * lr) self.set_val('weight_decay', listify(0, self._wd)) self.opt.step() def zero_grad(self) -> None: "Clear optimizer gradients." self.opt.zero_grad() # Passthrough to the inner opt. def __getattr__(self, k: str): return getattr(self.opt, k, None) def clear(self): "Reset the state of the inner optimizer." sd = self.state_dict() sd['state'] = {} self.load_state_dict(sd) # Hyperparameters as properties @property def lr(self) -> float: return self._lr[-1] @lr.setter def lr(self, val: float) -> None: self._lr = self.set_val('lr', listify(val, self._lr)) @property def mom(self) -> float: return self._mom[-1] @mom.setter def mom(self, val: float) -> None: if 'momentum' in self.opt_keys: self.set_val('momentum', listify(val, self._mom)) elif 'betas' in self.opt_keys: self.set_val('betas', (listify(val, self._mom), self._beta)) self._mom = listify(val, self._mom) @property def beta(self) -> float: return None if self._beta is None else self._beta[-1] @beta.setter def beta(self, val: float) -> None: "Set beta (or alpha as makes sense for given optimizer)." if val is None: return if 'betas' in self.opt_keys: self.set_val('betas', (self._mom, listify(val, self._beta))) elif 'alpha' in self.opt_keys: self.set_val('alpha', listify(val, self._beta)) self._beta = listify(val, self._beta) @property def wd(self) -> float: return self._wd[-1] @wd.setter def wd(self, val: float) -> None: "Set weight decay." if not self.true_wd: self.set_val('weight_decay', listify(val, self._wd), bn_groups=self.bn_wd) self._wd = listify(val, self._wd) # Helper functions def read_defaults(self) -> None: "Read the values inside the optimizer for the hyper-parameters." self._beta = None if 'lr' in self.opt_keys: self._lr = self.read_val('lr') if 'momentum' in self.opt_keys: self._mom = self.read_val('momentum') if 'alpha' in self.opt_keys: self._beta = self.read_val('alpha') if 'betas' in self.opt_keys: self._mom, self._beta = self.read_val('betas') if 'weight_decay' in self.opt_keys: self._wd = self.read_val('weight_decay') def set_val(self, key: str, val, bn_groups: bool = True): "Set `val` inside the optimizer dictionary at `key`." if is_tuple(val): val = [(v1, v2) for v1, v2 in zip(*val)] for v, pg1, pg2 in zip(val, self.opt.param_groups[::2], self.opt.param_groups[1::2]): pg1[key] = v if bn_groups: pg2[key] = v return val def read_val(self, key: str): "Read a hyperparameter `key` in the optimizer dictionary." val = [pg[key] for pg in self.opt.param_groups[::2]] if is_tuple(val[0]): val = [o[0] for o in val], [o[1] for o in val] return val class FastAIMixedOptim(OptimWrapper): @classmethod def create(cls, opt_func, lr, layer_groups, model, flat_master=False, loss_scale=512.0, **kwargs): "Create an `optim.Optimizer` from `opt_func` with `lr`. Set lr on `layer_groups`." opt = OptimWrapper.create(opt_func, lr, layer_groups, **kwargs) opt.model_params, opt.master_params = get_master(layer_groups, flat_master) opt.flat_master = flat_master opt.loss_scale = loss_scale opt.model = model # Changes the optimizer so that the optimization step is done in FP32. # opt = self.learn.opt mom, wd, beta = opt.mom, opt.wd, opt.beta lrs = [lr for lr in opt._lr for _ in range(2)] opt_params = [{'params': mp, 'lr': lr} for mp, lr in zip(opt.master_params, lrs)] opt.opt = opt_func(opt_params) opt.mom, opt.wd, opt.beta = mom, wd, beta return opt def step(self): model_g2master_g(self.model_params, self.master_params, self.flat_master) for group in self.master_params: for param in group: param.grad.div_(self.loss_scale) super(FastAIMixedOptim, self).step() self.model.zero_grad() # Update the params from master to model. master2model(self.model_params, self.master_params, self.flat_master) ================================================ FILE: tools/train_utils/optimization/learning_schedules_fastai.py ================================================ # This file is modified from https://github.com/traveller59/second.pytorch import math from functools import partial import numpy as np import torch.optim.lr_scheduler as lr_sched from .fastai_optim import OptimWrapper class LRSchedulerStep(object): def __init__(self, fai_optimizer: OptimWrapper, total_step, lr_phases, mom_phases): # if not isinstance(fai_optimizer, OptimWrapper): # raise TypeError('{} is not a fastai OptimWrapper'.format( # type(fai_optimizer).__name__)) self.optimizer = fai_optimizer self.total_step = total_step self.lr_phases = [] for i, (start, lambda_func) in enumerate(lr_phases): if len(self.lr_phases) != 0: assert self.lr_phases[-1][0] < start if isinstance(lambda_func, str): lambda_func = eval(lambda_func) if i < len(lr_phases) - 1: self.lr_phases.append((int(start * total_step), int(lr_phases[i + 1][0] * total_step), lambda_func)) else: self.lr_phases.append((int(start * total_step), total_step, lambda_func)) assert self.lr_phases[0][0] == 0 self.mom_phases = [] for i, (start, lambda_func) in enumerate(mom_phases): if len(self.mom_phases) != 0: assert self.mom_phases[-1][0] < start if isinstance(lambda_func, str): lambda_func = eval(lambda_func) if i < len(mom_phases) - 1: self.mom_phases.append((int(start * total_step), int(mom_phases[i + 1][0] * total_step), lambda_func)) else: self.mom_phases.append((int(start * total_step), total_step, lambda_func)) assert self.mom_phases[0][0] == 0 def step(self, step): for start, end, func in self.lr_phases: if step >= start: self.optimizer.lr = func((step - start) / (end - start)) for start, end, func in self.mom_phases: if step >= start: self.optimizer.mom = func((step - start) / (end - start)) def annealing_cos(start, end, pct): # print(pct, start, end) "Cosine anneal from `start` to `end` as pct goes from 0.0 to 1.0." cos_out = np.cos(np.pi * pct) + 1 return end + (start - end) / 2 * cos_out class OneCycle(LRSchedulerStep): def __init__(self, fai_optimizer, total_step, lr_max, moms, div_factor, pct_start): self.lr_max = lr_max self.moms = moms self.div_factor = div_factor self.pct_start = pct_start a1 = int(total_step * self.pct_start) a2 = total_step - a1 low_lr = self.lr_max / self.div_factor lr_phases = ((0, partial(annealing_cos, low_lr, self.lr_max)), (self.pct_start, partial(annealing_cos, self.lr_max, low_lr / 1e4))) mom_phases = ((0, partial(annealing_cos, *self.moms)), (self.pct_start, partial(annealing_cos, *self.moms[::-1]))) fai_optimizer.lr, fai_optimizer.mom = low_lr, self.moms[0] super().__init__(fai_optimizer, total_step, lr_phases, mom_phases) class CosineWarmup(): def __init__(self, optimizer, total_step, up_steps, lr_max, moms, div_factor, pct_start): self.scheme = OneCycle(optimizer, up_steps, lr_max, moms, div_factor, pct_start) self.total_step = total_step self.up_steps = up_steps def step(self, step): this_step = step%self.up_steps self.scheme.step(this_step) class CosineWarmupLR(lr_sched._LRScheduler): def __init__(self, optimizer, T_max, eta_min=0, last_epoch=-1): self.T_max = T_max self.eta_min = eta_min super(CosineWarmupLR, self).__init__(optimizer, last_epoch) def get_lr(self): return [self.eta_min + (base_lr - self.eta_min) * (1 - math.cos(math.pi * self.last_epoch / self.T_max)) / 2 for base_lr in self.base_lrs] class FakeOptim: def __init__(self): self.lr = 0 self.mom = 0 if __name__ == "__main__": import matplotlib.pyplot as plt opt = FakeOptim() # 3e-3, wd=0.4, div_factor=10 schd = CosineWarmup(opt, 1000,100, 3e-3, (0.95, 0.85), 10.0, 0.1) lrs = [] moms = [] for i in range(1000): schd.step(i) lrs.append(opt.lr) moms.append(opt.mom) plt.plot(lrs) # plt.plot(moms) plt.show() plt.plot(moms) plt.show() ================================================ FILE: tools/train_utils/train_utils.py ================================================ import glob import os import torch import tqdm from torch.nn.utils import clip_grad_norm_ import numpy as np from pcdet.models import load_data_to_gpu import copy import pcdet.datasets.augmentor.augmentor_utils as uti def train_one_epoch(model, optimizer, train_loader, model_func, lr_scheduler, accumulated_iter, optim_cfg, rank, tbar, total_it_each_epoch, dataloader_iter, tb_log=None, leave_pbar=False): if total_it_each_epoch == len(train_loader): dataloader_iter = iter(train_loader) if rank == 0: pbar = tqdm.tqdm(total=total_it_each_epoch, leave=leave_pbar, desc='train', dynamic_ncols=True) accus = 1 for cur_it in range(total_it_each_epoch): try: batch = next(dataloader_iter) except StopIteration: dataloader_iter = iter(train_loader) batch = next(dataloader_iter) print('new iters') lr_scheduler.step(accumulated_iter) try: cur_lr = float(optimizer.lr) except: cur_lr = optimizer.param_groups[0]['lr'] if tb_log is not None: tb_log.add_scalar('meta_data/learning_rate', cur_lr, accumulated_iter) model.train() loss, tb_dict, disp_dict = model_func(model, batch) loss = loss/accus loss.backward() if ((cur_it + 1) % accus) == 0: clip_grad_norm_(model.parameters(), optim_cfg.GRAD_NORM_CLIP) optimizer.step() optimizer.zero_grad() accumulated_iter += 1 disp_dict.update({'loss': loss.item()*accus, 'lr': cur_lr}) # log to console and tensorboard if rank == 0: pbar.update() pbar.set_postfix(dict(total_it=accumulated_iter)) tbar.set_postfix(disp_dict) tbar.refresh() if tb_log is not None: tb_log.add_scalar('train/loss', loss, accumulated_iter) tb_log.add_scalar('meta_data/learning_rate', cur_lr, accumulated_iter) for key, val in tb_dict.items(): tb_log.add_scalar('train/' + key, val, accumulated_iter) if rank == 0: pbar.close() return accumulated_iter def train_model(model, optimizer, train_loader, model_func, lr_scheduler, optim_cfg, start_epoch, total_epochs, start_iter, rank, tb_log, ckpt_save_dir, train_sampler=None, lr_warmup_scheduler=None, ckpt_save_interval=1, max_ckpt_save_num=50, merge_all_iters_to_one_epoch=False): accumulated_iter = start_iter with tqdm.trange(start_epoch, total_epochs, desc='epochs', dynamic_ncols=True, leave=(rank == 0)) as tbar: total_it_each_epoch = len(train_loader) if merge_all_iters_to_one_epoch: assert hasattr(train_loader.dataset, 'merge_all_iters_to_one_epoch') train_loader.dataset.merge_all_iters_to_one_epoch(merge=True, epochs=total_epochs) total_it_each_epoch = len(train_loader) // max(total_epochs, 1) dataloader_iter = iter(train_loader) for cur_epoch in tbar: if train_sampler is not None: train_sampler.set_epoch(cur_epoch) # train one epoch if lr_warmup_scheduler is not None: cur_scheduler = lr_warmup_scheduler else: cur_scheduler = lr_scheduler accumulated_iter = train_one_epoch( model, optimizer, train_loader, model_func, lr_scheduler=cur_scheduler, accumulated_iter=accumulated_iter, optim_cfg=optim_cfg, rank=rank, tbar=tbar, tb_log=tb_log, leave_pbar=(cur_epoch + 1 == total_epochs), total_it_each_epoch=total_it_each_epoch, dataloader_iter=dataloader_iter ) # save trained model trained_epoch = cur_epoch + 1 if trained_epoch % ckpt_save_interval == 0 and rank == 0: ckpt_list = glob.glob(str(ckpt_save_dir / 'checkpoint_epoch_*.pth')) ckpt_list.sort(key=os.path.getmtime) if ckpt_list.__len__() >= max_ckpt_save_num: for cur_file_idx in range(0, len(ckpt_list) - max_ckpt_save_num + 1): os.remove(ckpt_list[cur_file_idx]) ckpt_name = ckpt_save_dir / ('checkpoint_epoch_%d' % trained_epoch) save_checkpoint( checkpoint_state(model, optimizer, trained_epoch, accumulated_iter), filename=ckpt_name, ) def model_state_to_cpu(model_state): model_state_cpu = type(model_state)() # ordered dict for key, val in model_state.items(): model_state_cpu[key] = val.cpu() return model_state_cpu def checkpoint_state(model=None, optimizer=None, epoch=None, it=None): optim_state = optimizer.state_dict() if optimizer is not None else None if model is not None: if isinstance(model, torch.nn.parallel.DistributedDataParallel): model_state = model_state_to_cpu(model.module.state_dict()) else: model_state = model.state_dict() else: model_state = None try: import pcdet version = 'pcdet+' + pcdet.__version__ except: version = 'none' return {'epoch': epoch, 'it': it, 'model_state': model_state, 'optimizer_state': optim_state, 'version': version} def save_checkpoint(state, filename='checkpoint'): if False and 'optimizer_state' in state: optimizer_state = state['optimizer_state'] state.pop('optimizer_state', None) optimizer_filename = '{}_optim.pth'.format(filename) torch.save({'optimizer_state': optimizer_state}, optimizer_filename,_use_new_zipfile_serialization=False) filename = '{}.pth'.format(filename) torch.save(state, filename,_use_new_zipfile_serialization=False) ================================================ FILE: tools/visual_utils/visualize_utils.py ================================================ import mayavi.mlab as mlab import numpy as np import torch box_colormap = [ [1, 1, 1], [0, 1, 0], [0, 1, 1], [1, 1, 0], ] def check_numpy_to_torch(x): if isinstance(x, np.ndarray): return torch.from_numpy(x).float(), True return x, False def rotate_points_along_z(points, angle): """ Args: points: (B, N, 3 + C) angle: (B), angle along z-axis, angle increases x ==> y Returns: """ points, is_numpy = check_numpy_to_torch(points) angle, _ = check_numpy_to_torch(angle) cosa = torch.cos(angle) sina = torch.sin(angle) zeros = angle.new_zeros(points.shape[0]) ones = angle.new_ones(points.shape[0]) rot_matrix = torch.stack(( cosa, sina, zeros, -sina, cosa, zeros, zeros, zeros, ones ), dim=1).view(-1, 3, 3).float() points_rot = torch.matmul(points[:, :, 0:3], rot_matrix) points_rot = torch.cat((points_rot, points[:, :, 3:]), dim=-1) return points_rot.numpy() if is_numpy else points_rot def boxes_to_corners_3d(boxes3d): """ 7 -------- 4 /| /| 6 -------- 5 . | | | | . 3 -------- 0 |/ |/ 2 -------- 1 Args: boxes3d: (N, 7) [x, y, z, dx, dy, dz, heading], (x, y, z) is the box center Returns: """ boxes3d, is_numpy = check_numpy_to_torch(boxes3d) template = boxes3d.new_tensor(( [1, 1, -1], [1, -1, -1], [-1, -1, -1], [-1, 1, -1], [1, 1, 1], [1, -1, 1], [-1, -1, 1], [-1, 1, 1], )) / 2 corners3d = boxes3d[:, None, 3:6].repeat(1, 8, 1) * template[None, :, :] corners3d = rotate_points_along_z(corners3d.view(-1, 8, 3), boxes3d[:, 6]).view(-1, 8, 3) corners3d += boxes3d[:, None, 0:3] return corners3d.numpy() if is_numpy else corners3d def visualize_pts(pts, fig=None, bgcolor=(0, 0, 0), fgcolor=(1.0, 1.0, 1.0), show_intensity=False, size=(600, 600), draw_origin=True): if not isinstance(pts, np.ndarray): pts = pts.cpu().numpy() if fig is None: fig = mlab.figure(figure=None, bgcolor=bgcolor, fgcolor=fgcolor, engine=None, size=size) if show_intensity: G = mlab.points3d(pts[:, 0], pts[:, 1], pts[:, 2], pts[:, 3], mode='point', colormap='gnuplot', scale_factor=1, figure=fig) else: G = mlab.points3d(pts[:, 0], pts[:, 1], pts[:, 2], mode='point', colormap='gnuplot', scale_factor=1, figure=fig) if draw_origin: mlab.points3d(0, 0, 0, color=(1, 1, 1), mode='cube', scale_factor=0.2) mlab.plot3d([0, 3], [0, 0], [0, 0], color=(0, 0, 1), tube_radius=0.1) mlab.plot3d([0, 0], [0, 3], [0, 0], color=(0, 1, 0), tube_radius=0.1) mlab.plot3d([0, 0], [0, 0], [0, 3], color=(1, 0, 0), tube_radius=0.1) return fig def draw_sphere_pts(pts, color=(0, 1, 0), fig=None, bgcolor=(0, 0, 0), scale_factor=0.2): if not isinstance(pts, np.ndarray): pts = pts.cpu().numpy() if fig is None: fig = mlab.figure(figure=None, bgcolor=bgcolor, fgcolor=None, engine=None, size=(600, 600)) if isinstance(color, np.ndarray) and color.shape[0] == 1: color = color[0] color = (color[0] / 255.0, color[1] / 255.0, color[2] / 255.0) if isinstance(color, np.ndarray): pts_color = np.zeros((pts.__len__(), 4), dtype=np.uint8) pts_color[:, 0:3] = color pts_color[:, 3] = 255 G = mlab.points3d(pts[:, 0], pts[:, 1], pts[:, 2], np.arange(0, pts_color.__len__()), mode='sphere', scale_factor=scale_factor, figure=fig) G.glyph.color_mode = 'color_by_scalar' G.glyph.scale_mode = 'scale_by_vector' G.module_manager.scalar_lut_manager.lut.table = pts_color else: mlab.points3d(pts[:, 0], pts[:, 1], pts[:, 2], mode='sphere', color=color, colormap='gnuplot', scale_factor=scale_factor, figure=fig) mlab.points3d(0, 0, 0, color=(1, 1, 1), mode='cube', scale_factor=0.2) mlab.plot3d([0, 3], [0, 0], [0, 0], color=(0, 0, 1), line_width=3, tube_radius=None, figure=fig) mlab.plot3d([0, 0], [0, 3], [0, 0], color=(0, 1, 0), line_width=3, tube_radius=None, figure=fig) mlab.plot3d([0, 0], [0, 0], [0, 3], color=(1, 0, 0), line_width=3, tube_radius=None, figure=fig) return fig def draw_grid(x1, y1, x2, y2, fig, tube_radius=None, color=(0.5, 0.5, 0.5)): mlab.plot3d([x1, x1], [y1, y2], [0, 0], color=color, tube_radius=tube_radius, line_width=1, figure=fig) mlab.plot3d([x2, x2], [y1, y2], [0, 0], color=color, tube_radius=tube_radius, line_width=1, figure=fig) mlab.plot3d([x1, x2], [y1, y1], [0, 0], color=color, tube_radius=tube_radius, line_width=1, figure=fig) mlab.plot3d([x1, x2], [y2, y2], [0, 0], color=color, tube_radius=tube_radius, line_width=1, figure=fig) return fig def draw_multi_grid_range(fig, grid_size=20, bv_range=(-60, -60, 60, 60)): for x in range(bv_range[0], bv_range[2], grid_size): for y in range(bv_range[1], bv_range[3], grid_size): fig = draw_grid(x, y, x + grid_size, y + grid_size, fig) return fig def draw_scenes(points, gt_boxes=None, ref_boxes=None, ref_scores=None, ref_labels=None): if not isinstance(points, np.ndarray): points = points.cpu().numpy() if ref_boxes is not None and not isinstance(ref_boxes, np.ndarray): ref_boxes = ref_boxes.cpu().numpy() if gt_boxes is not None and not isinstance(gt_boxes, np.ndarray): gt_boxes = gt_boxes.cpu().numpy() if ref_scores is not None and not isinstance(ref_scores, np.ndarray): ref_scores = ref_scores.cpu().numpy() if ref_labels is not None and not isinstance(ref_labels, np.ndarray): ref_labels = ref_labels.cpu().numpy() fig = visualize_pts(points) fig = draw_multi_grid_range(fig, bv_range=(0, -40, 80, 40)) if gt_boxes is not None: corners3d = boxes_to_corners_3d(gt_boxes) fig = draw_corners3d(corners3d, fig=fig, color=(0, 0, 1), max_num=100) if ref_boxes is not None and len(ref_boxes) > 0: ref_corners3d = boxes_to_corners_3d(ref_boxes) if ref_labels is None: fig = draw_corners3d(ref_corners3d, fig=fig, color=(0, 1, 0), cls=ref_scores, max_num=100) else: for k in range(ref_labels.min(), ref_labels.max() + 1): cur_color = tuple(box_colormap[k % len(box_colormap)]) mask = (ref_labels == k) fig = draw_corners3d(ref_corners3d[mask], fig=fig, color=cur_color, cls=ref_scores[mask], max_num=100) mlab.view(azimuth=-179, elevation=54.0, distance=104.0, roll=90.0) return fig def draw_corners3d(corners3d, fig, color=(1, 1, 1), line_width=2, cls=None, tag='', max_num=500, tube_radius=None): """ :param corners3d: (N, 8, 3) :param fig: :param color: :param line_width: :param cls: :param tag: :param max_num: :return: """ import mayavi.mlab as mlab num = min(max_num, len(corners3d)) for n in range(num): b = corners3d[n] # (8, 3) if cls is not None: if isinstance(cls, np.ndarray): mlab.text3d(b[6, 0], b[6, 1], b[6, 2], '%.2f' % cls[n], scale=(0.3, 0.3, 0.3), color=color, figure=fig) else: mlab.text3d(b[6, 0], b[6, 1], b[6, 2], '%s' % cls[n], scale=(0.3, 0.3, 0.3), color=color, figure=fig) for k in range(0, 4): i, j = k, (k + 1) % 4 mlab.plot3d([b[i, 0], b[j, 0]], [b[i, 1], b[j, 1]], [b[i, 2], b[j, 2]], color=color, tube_radius=tube_radius, line_width=line_width, figure=fig) i, j = k + 4, (k + 1) % 4 + 4 mlab.plot3d([b[i, 0], b[j, 0]], [b[i, 1], b[j, 1]], [b[i, 2], b[j, 2]], color=color, tube_radius=tube_radius, line_width=line_width, figure=fig) i, j = k, k + 4 mlab.plot3d([b[i, 0], b[j, 0]], [b[i, 1], b[j, 1]], [b[i, 2], b[j, 2]], color=color, tube_radius=tube_radius, line_width=line_width, figure=fig) i, j = 0, 5 mlab.plot3d([b[i, 0], b[j, 0]], [b[i, 1], b[j, 1]], [b[i, 2], b[j, 2]], color=color, tube_radius=tube_radius, line_width=line_width, figure=fig) i, j = 1, 4 mlab.plot3d([b[i, 0], b[j, 0]], [b[i, 1], b[j, 1]], [b[i, 2], b[j, 2]], color=color, tube_radius=tube_radius, line_width=line_width, figure=fig) return fig