Repository: DecaYale/RNNPose Branch: main Commit: ff223f3cb6bf Files: 126 Total size: 12.7 MB Directory structure: gitextract_p7a82okx/ ├── .gitignore ├── LICENSE.md ├── README.md ├── builder/ │ ├── __init__.py │ ├── dataset_builder.py │ ├── input_reader_builder.py │ ├── losses_builder.py │ ├── lr_scheduler_builder.py │ ├── optimizer_builder.py │ └── rnnpose_builder.py ├── config/ │ ├── default.py │ └── linemod/ │ ├── copy.sh │ ├── copy_occ.sh │ ├── template_fw0.5.yml │ └── template_fw0.5_occ.yml ├── data/ │ ├── __init__.py │ ├── dataset.py │ ├── linemod/ │ │ └── linemod_config.py │ ├── linemod_dataset.py │ ├── preprocess.py │ ├── transforms.py │ └── ycb/ │ └── basic.py ├── doc/ │ └── prepare_data.md ├── docker/ │ ├── Dockerfile │ └── freeze.yml ├── geometry/ │ ├── __init__.py │ ├── cholesky.py │ ├── diff_render.py │ ├── diff_render_optim.py │ ├── einsum.py │ ├── intrinsics.py │ ├── projective_ops.py │ ├── se3.py │ └── transformation.py ├── model/ │ ├── CFNet.py │ ├── HybridNet.py │ ├── PoseRefiner.py │ ├── RNNPose.py │ ├── descriptor2D.py │ ├── descriptor3D.py │ └── losses.py ├── scripts/ │ ├── compile_3rdparty.sh │ ├── eval.sh │ ├── eval_lmocc.sh │ ├── run_dataformatter.sh │ ├── run_datainfo_generation.sh │ └── train.sh ├── thirdparty/ │ ├── kpconv/ │ │ ├── __init__.py │ │ ├── cpp_wrappers/ │ │ │ ├── compile_wrappers.sh │ │ │ ├── cpp_neighbors/ │ │ │ │ ├── build.bat │ │ │ │ ├── neighbors/ │ │ │ │ │ ├── neighbors.cpp │ │ │ │ │ └── neighbors.h │ │ │ │ ├── setup.py │ │ │ │ └── wrapper.cpp │ │ │ ├── cpp_subsampling/ │ │ │ │ ├── build.bat │ │ │ │ ├── grid_subsampling/ │ │ │ │ │ ├── grid_subsampling.cpp │ │ │ │ │ └── grid_subsampling.h │ │ │ │ ├── setup.py │ │ │ │ └── wrapper.cpp │ │ │ └── cpp_utils/ │ │ │ ├── cloud/ │ │ │ │ ├── cloud.cpp │ │ │ │ └── cloud.h │ │ │ └── nanoflann/ │ │ │ └── nanoflann.hpp │ │ ├── kernels/ │ │ │ ├── dispositions/ │ │ │ │ └── k_015_center_3D.ply │ │ │ └── kernel_points.py │ │ ├── kpconv_blocks.py │ │ └── lib/ │ │ ├── __init__.py │ │ ├── ply.py │ │ ├── timer.py │ │ └── utils.py │ ├── nn/ │ │ ├── _ext.c │ │ ├── nn_utils.py │ │ ├── setup.py │ │ └── src/ │ │ ├── ext.h │ │ └── nearest_neighborhood.cu │ ├── raft/ │ │ ├── corr.py │ │ ├── extractor.py │ │ ├── update.py │ │ └── utils/ │ │ ├── __init__.py │ │ ├── augmentor.py │ │ ├── flow_viz.py │ │ ├── frame_utils.py │ │ └── utils.py │ └── vsd/ │ └── inout.py ├── tools/ │ ├── eval.py │ ├── generate_data_info_deepim_0_orig.py │ ├── generate_data_info_deepim_1_syn.py │ ├── generate_data_info_deepim_2_posecnnval.py │ ├── generate_data_info_v2_deepim.py │ ├── train.py │ └── transform_data_format.py ├── torchplus/ │ ├── __init__.py │ ├── metrics.py │ ├── nn/ │ │ ├── __init__.py │ │ ├── functional.py │ │ └── modules/ │ │ ├── __init__.py │ │ ├── common.py │ │ └── normalization.py │ ├── ops/ │ │ ├── __init__.py │ │ └── array_ops.py │ ├── tools.py │ └── train/ │ ├── __init__.py │ ├── checkpoint.py │ ├── common.py │ ├── fastai_optim.py │ ├── learning_schedules.py │ ├── learning_schedules_fastai.py │ └── optim.py ├── utils/ │ ├── __init__.py │ ├── config_io.py │ ├── distributed_utils.py │ ├── eval_metric.py │ ├── furthest_point_sample.py │ ├── geometric.py │ ├── img_utils.py │ ├── log_tool.py │ ├── pose_utils.py │ ├── pose_utils_np.py │ ├── progress_bar.py │ ├── rand_utils.py │ ├── singleton.py │ ├── timer.py │ ├── util.py │ └── visualize.py └── weights/ ├── gru_update.pth ├── img_fea_enc.pth └── superpoint_v1.pth ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ **/*.old **/*.bak .DS_Store # Created by https://www.toptal.com/developers/gitignore/api/vscode,python,jupyternotebooks # Edit at https://www.toptal.com/developers/gitignore?templates=vscode,python,jupyternotebooks ### JupyterNotebooks ### # gitignore template for Jupyter Notebooks # website: http://jupyter.org/ .ipynb_checkpoints */.ipynb_checkpoints/* # IPython profile_default/ ipython_config.py # Remove previous ipynb_checkpoints # git rm -r .ipynb_checkpoints/ ### Python ### # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ #lib/ #lib64/ parts/ sdist/ var/ wheels/ pip-wheel-metadata/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ pytestdebug.log # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ doc/_build/ # PyBuilder target/ # Jupyter Notebook # IPython # pyenv .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # PEP 582; used by e.g. github.com/David-OConnor/pyflow __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ pythonenv* # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # pytype static type analyzer .pytype/ # profiling data .prof ### vscode ### .vscode/* !.vscode/settings.json !.vscode/tasks.json !.vscode/launch.json !.vscode/extensions.json *.code-workspace # End of https://www.toptal.com/developers/gitignore/api/vscode,python,jupyternotebooks ================================================ FILE: LICENSE.md ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================ # RNNPose: Recurrent 6-DoF Object Pose Refinement with Robust Correspondence Field Estimation and Pose Optimization [Yan Xu](https://decayale.github.io/), [Kwan-Yee Lin](https://kwanyeelin.github.io/), [Guofeng Zhang](http://www.cad.zju.edu.cn/home/gfzhang/), [Xiaogang Wang](https://www.ee.cuhk.edu.hk/en-gb/people/academic-staff/professors/prof-xiaogang-wang), [Hongsheng Li](http://www.ee.cuhk.edu.hk/~hsli/). *Conference on Computer Vision and Pattern Recognition (CVPR), 2022.* [[Paper]](https://scholar.google.com/scholar?hl=zh-CN&as_sdt=0%2C5&q=RNNPose%3A+Recurrent+6-DoF+Object+Pose+Refinement+with+Robust+Correspondence+Field+Estimation+and+Pose+Optimization&btnG=) ## 1. Framework The basic pipeline of our proposed RNNPose. (a) Before refinement, a reference image is rendered according to the object initial pose (shown in a fused view). (b) Our RNN-based framework recurrently refines the object pose based on the estimated correspondence field between the reference and target images. The pose is optimized to be consistent with the reliable correspondence estimations highlighted by the similarity score map (built from learned 3D-2D descriptors) via differentiable LM optimization. (c) The output refined pose.

alt text

## 2. Pose Estimation with Occlusions and Erroneous Pose Initializations ### Estimated Poses and Intermediate System Outputs from Different Recurrent Iterations.

animatedanimated

### Pose Estimates with Erroneous Pose Initializations Visualization of our pose estimations (first row) on Occlusion LINEMOD dataset and the similarity score maps (second row) for downweighting unreliable correspondences during pose optimization. For pose visualization, the white boxes represent the erroneous initial poses, the red boxes are estimated by our algorithm and the ground-truth boxes are in blue. Here, the initial poses for pose refinement are originally from PVNet but added with significant disturbances for robustness testing.
## 3. Installation ### Install the Docker A dockerfile is provided to help with the environment setup. You need to install [docker](https://docs.docker.com/get-docker/) and [nvidia-docker2](https://github.com/NVIDIA/nvidia-docker) first and then set up the docker image and start up a container with the following commands: ``` cd RNNPose/docker sudo docker build -t rnnpose . sudo docker run -it --runtime=nvidia --ipc=host --volume="HOST_VOLUME_YOU_WANT_TO_MAP:DOCKER_VOLUME" -e DISPLAY=$DISPLAY -e QT_X11_NO_MITSHM=1 rnnpose bash ``` If you are not familiar with [docker](https://docs.docker.com/get-docker/), you could also install the dependencies manually following the provided dockerfile. ### Compile the Dependencies ``` cd RNNPose/scripts bash compile_3rdparty.sh ``` ## 4. Data Preparation We follow [DeepIM](https://github.com/liyi14/mx-DeepIM) and [PVNet](https://github.com/zju3dv/pvnet-rendering) to preprocess the training data for LINEMOD. You could follow the steps [here](doc/prepare_data.md) for data preparation. ## 5. Test with the Pretrained Models We train our model with the mixture of the real data and the synthetic data on LINEMOD dataset. The trained models on the LINEMOD dataset have been uploaded to the [OneDrive](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155139432_link_cuhk_edu_hk/ESPTVyUryHdGl65fRAxN51gBBayJJb9NpCqWA-tY2CFKJQ?e=R9bcLW). You can download them and put them into the directory *weight/* for testing. An example bash script is provided below for reference. ``` export PYTHONPATH="$PROJECT_ROOT_PATH:$PYTHONPATH" export PYTHONPATH="$PROJECT_ROOT_PATH/thirdparty:$PYTHONPATH" seq=cat gpu=1 start_gpu_id=0 mkdir $model_dir train_file=/home/yxu/Projects/Works/RNNPose_release/tools/eval.py config_path=/mnt/workspace/Works/RNNPose_release/config/linemod/"$seq"_fw0.5.yml pretrain=$PROJECT_ROOT_PATH/weights/trained_models/"$seq".tckpt python -u $train_file multi_proc_train \ --config_path $config_path \ --model_dir $model_dir/results \ --use_dist True \ --dist_port 10000 \ --gpus_per_node $gpu \ --optim_eval True \ --use_apex True \ --world_size $gpu \ --start_gpu_id $start_gpu_id \ --pretrained_path $pretrain ``` Note that you need to specify the PROJECT_ROOT_PATH, i.e. the absolute directory of the project folder *RNNPose* and modify the respective data paths in the configuration files to the locations of downloaded data before executing the commands. You could also refer to the commands below for evaluation with our provide scripts. ### Evaluation on LINEMOD ``` bash scripts/eval.sh ``` ### Evaluation on LINEMOD OCCLUSION ``` bash scripts/eval_lmocc.sh ``` ## Training from Scratch An example training script is provided. ``` bash scripts/train.sh ``` ## 6. Citation If you find our code useful, please cite our paper. ``` @inproceedings{xu2022rnnpose, title={RNNPose: Recurrent 6-DoF Object Pose Refinement with Robust Correspondence Field Estimation and Pose Optimization}, author={Xu, Yan and Kwan-Yee Lin and Zhang, Guofeng and Wang, Xiaogang and Li, Hongsheng}, booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, year={2022} } @article{xu2024rnnpose, title={Rnnpose: 6-dof object pose estimation via recurrent correspondence field estimation and pose optimization}, author={Xu, Yan and Lin, Kwan-Yee and Zhang, Guofeng and Wang, Xiaogang and Li, Hongsheng}, journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, year={2024}, publisher={IEEE} ``` ## 7. Acknowledgement The skeleton of this code is borrowed from [RSLO](https://github.com/DecaYale/RSLO). We also would like to thank the public codebases [PVNet](https://github.com/zju3dv/pvnet), [RAFT](https://github.com/princeton-vl/RAFT), [SuperGlue](https://github.com/magicleap/SuperGluePretrainedNetwork) and [DeepV2D](https://github.com/princeton-vl/DeepV2D). ================================================ FILE: builder/__init__.py ================================================ ================================================ FILE: builder/dataset_builder.py ================================================ from data.dataset import get_dataset_class import numpy as np from functools import partial from data.preprocess import preprocess, preprocess_deepim, patch_crop def build(input_reader_config, training, ): prep_cfg = input_reader_config.preprocess dataset_cfg = input_reader_config.dataset cfg = input_reader_config dataset_cls = get_dataset_class(dataset_cfg.dataset_class_name) if 0:# 'DeepIM' in dataset_cfg.dataset_class_name: # patch_cropper = partial(patch_crop, margin_ratio=0.2, output_size=256 ) patch_cropper = None prep_func = partial(preprocess_deepim, max_points=dataset_cfg.max_points, correspondence_radius=prep_cfg.correspondence_radius_threshold, patch_cropper=patch_cropper, image_scale=prep_cfg.get('image_scale', 1), ) else: prep_func = partial(preprocess, max_points=dataset_cfg.max_points, correspondence_radius=prep_cfg.correspondence_radius_threshold, image_scale=prep_cfg.get('image_scale', 1), crop_param=prep_cfg.get('crop_param', None), kp_3d_param=prep_cfg.get('kp_3d_param', {"kp_num":30} ), use_coords_as_3d_feat=prep_cfg.get('use_coords_as_3d_feat', False) ) dataset = dataset_cls( info_path=dataset_cfg.info_path, root_path=dataset_cfg.root_path, model_point_dim=dataset_cfg.model_point_dim, is_train=training, prep_func=prep_func, seq_names=dataset_cfg.get('seq_names', None), cfg=dataset_cfg ) return dataset ================================================ FILE: builder/input_reader_builder.py ================================================ from torch.utils.data import Dataset from builder import dataset_builder class DatasetWrapper(Dataset): """ convert our dataset to Dataset class in pytorch. """ def __init__(self, dataset): self._dataset = dataset def __len__(self): return len(self._dataset) def __getitem__(self, idx): return self._dataset[idx] @property def dataset(self): return self._dataset def build(input_reader_config, training, ) -> DatasetWrapper: dataset = dataset_builder.build( input_reader_config, training, ) dataset = DatasetWrapper(dataset) return dataset ================================================ FILE: builder/losses_builder.py ================================================ from model import losses def build(loss_config): criterions = {} criterions["metric_loss"] =losses.MetricLoss(configs=loss_config.metric_loss,) criterions["pose_loss"] = losses.PointAlignmentLoss(loss_weight=1) return criterions ================================================ FILE: builder/lr_scheduler_builder.py ================================================ from torchplus.train import learning_schedules_fastai as lsf import torch import numpy as np def build(optimizer_config, optimizer, total_step): optimizer_type = list(optimizer_config.keys())[0] if optimizer_type == 'rms_prop_optimizer': config = optimizer_config.rms_prop_optimizer lr_scheduler = _create_learning_rate_scheduler( config.learning_rate, optimizer, total_step=total_step) if optimizer_type == 'momentum_optimizer': config = optimizer_config.momentum_optimizer lr_scheduler = _create_learning_rate_scheduler( config.learning_rate, optimizer, total_step=total_step) if optimizer_type == 'adam_optimizer': config = optimizer_config.adam_optimizer lr_scheduler = _create_learning_rate_scheduler( config.learning_rate, optimizer, total_step=total_step) return lr_scheduler def _create_learning_rate_scheduler(learning_rate_config, optimizer, total_step): """Create optimizer learning rate scheduler based on config. Args: learning_rate_config: A LearningRate proto message. Returns: A learning rate. Raises: ValueError: when using an unsupported input data type. """ lr_scheduler = None # learning_rate_type = learning_rate_config.WhichOneof('learning_rate') learning_rate_type = list(learning_rate_config.keys())[0] if learning_rate_type == 'multi_phase': config = learning_rate_config.multi_phase lr_phases = [] mom_phases = [] for phase_cfg in config.phases: lr_phases.append((phase_cfg.start, phase_cfg.lambda_func)) mom_phases.append( (phase_cfg.start, phase_cfg.momentum_lambda_func)) lr_scheduler = lsf.LRSchedulerStep( optimizer, total_step, lr_phases, mom_phases) if learning_rate_type == 'one_cycle': config = learning_rate_config.one_cycle if len(config.lr_maxs)>1: assert(len(config.lr_maxs)==4 ) lr_max=[] # for i in range(len(config.lr_maxs)): # lr_max += [config.lr_maxs[i]]*optimizer.param_segs[i] lr_max = np.array(list(config.lr_maxs) ) else: lr_max = config.lr_max lr_scheduler = lsf.OneCycle( optimizer, total_step, lr_max, list(config.moms), config.div_factor, config.pct_start) if learning_rate_type == 'exponential_decay': config = learning_rate_config.exponential_decay lr_scheduler = lsf.ExponentialDecay( optimizer, total_step, config.initial_learning_rate, config.decay_length, config.decay_factor, config.staircase) if learning_rate_type == 'exponential_decay_warmup': config = learning_rate_config.exponential_decay_warmup lr_scheduler = lsf.ExponentialDecayWarmup( optimizer, total_step, config.initial_learning_rate, config.decay_length, config.decay_factor, config.div_factor, config.pct_start, config.staircase) if learning_rate_type == 'manual_stepping': config = learning_rate_config.manual_stepping lr_scheduler = lsf.ManualStepping( optimizer, total_step, list(config.boundaries), list(config.rates)) if lr_scheduler is None: raise ValueError('Learning_rate %s not supported.' % learning_rate_type) return lr_scheduler ================================================ FILE: builder/optimizer_builder.py ================================================ from torchplus.train import learning_schedules from torchplus.train import optim import torch from torch import nn from torchplus.train.fastai_optim import OptimWrapper, FastAIMixedOptim from functools import partial def children(m: nn.Module): "Get children of `m`." return list(m.children()) def num_children(m: nn.Module) -> int: "Get number of children modules in `m`." return len(children(m)) # return a list of smallest modules dy def flatten_model(m): if m is None: return [] return sum( map(flatten_model, m.children()), []) if num_children(m) else [m] # def get_layer_groups(m): return [nn.Sequential(*flatten_model(m))] def get_layer_groups(m): return [nn.ModuleList(flatten_model(m))] def get_voxeLO_net_layer_groups(net): vfe_grp = get_layer_groups(net)#[0] other_grp = get_layer_groups(nn.Sequential(net._rotation_loss, net._translation_loss, net._pyramid_rotation_loss, net._pyramid_translation_loss, net._consistency_loss, )) return [vfe_grp, mfe_grp, op_grp,other_grp] def get_voxeLO_net_layer_groups(net): vfe_grp = get_layer_groups(net.voxel_feature_extractor)#[0] mfe_grp = get_layer_groups(net.middle_feature_extractor)#[0] op_grp = get_layer_groups(net.odom_predictor)#[0] # other_grp = get_layer_groups(net._rotation_loss) + \ # get_layer_groups(net._translation_loss) \ # + get_layer_groups(net._pyramid_rotation_loss) \ # + get_layer_groups(net._pyramid_translation_loss) \ # + get_layer_groups(net._consistency_loss)\ other_grp = get_layer_groups(nn.Sequential(net._rotation_loss, net._translation_loss, net._pyramid_rotation_loss, net._pyramid_translation_loss, net._consistency_loss, )) return [vfe_grp, mfe_grp, op_grp,other_grp] def build(optimizer_config, net, name=None, mixed=False, loss_scale=512.0): optimizer_type = list(optimizer_config.keys())[0] print("Optimizer:", optimizer_type) optimizer=None if optimizer_type == 'rms_prop_optimizer': config=optimizer_config.rms_prop_optimizer optimizer_func=partial( torch.optim.RMSprop, alpha=config.decay, momentum=config.momentum_optimizer_value, eps=config.epsilon) if optimizer_type == 'momentum_optimizer': config=optimizer_config.momentum_optimizer optimizer_func=partial( torch.optim.SGD, momentum=config.momentum_optimizer_value, eps=config.epsilon) if optimizer_type == 'adam_optimizer': config=optimizer_config.adam_optimizer if optimizer_config.fixed_weight_decay: optimizer_func=partial( torch.optim.Adam, betas=(0.9, 0.99), amsgrad=config.amsgrad) else: # regular adam optimizer_func=partial( torch.optim.Adam, amsgrad=config.amsgrad) optimizer=OptimWrapper.create( optimizer_func, 3e-3, get_layer_groups(net), # get_voxeLO_net_layer_groups(net), wd=config.weight_decay, true_wd=optimizer_config.fixed_weight_decay, bn_wd=True) print(hasattr(optimizer, "_amp_stash"), '_amp_stash') if optimizer is None: raise ValueError('Optimizer %s not supported.' % optimizer_type) if optimizer_config.use_moving_average: raise ValueError('torch don\'t support moving average') if name is None: # assign a name to optimizer for checkpoint system optimizer.name=optimizer_type else: optimizer.name=name return optimizer ================================================ FILE: builder/rnnpose_builder.py ================================================ from builder import losses_builder from model.RNNPose import get_posenet_class import model.RNNPose def build(model_cfg, measure_time=False, testing=False): """build second pytorch instance. """ criterions=losses_builder.build(model_cfg.loss) net = get_posenet_class(model_cfg.network_class_name)( criterions=criterions, opt=model_cfg) return net ================================================ FILE: config/default.py ================================================ from yacs.config import CfgNode as CN from utils.singleton import Singleton import os def _merge_a_into_b(a, b): """Merge config dictionary a into config dictionary b, clobbering the options in b whenever they are also specified in a. """ # if type(a) is not dict: if not isinstance(a, (dict)): return for k, v in a.items(): # a must specify keys that are in b if not k in b: raise KeyError('{} is not a valid config key'.format(k)) # the types must match, too old_type = type(b[k]) if old_type is not type(v): if isinstance(b[k], np.ndarray): v = np.array(v, dtype=b[k].dtype) else: raise ValueError(('Type mismatch ({} vs. {}) ' 'for config key: {}').format(type(b[k]), type(v), k)) # recursively merge dicts # if type(v) is dict: if isinstance(v, (dict)): try: _merge_a_into_b(a[k], b[k]) except: print('Error under config key: {}'.format(k)) raise else: b[k] = v class Config(metaclass=Singleton): def __init__(self): ############## ↓ Basic ↓ ############## self.ROOT_CN = CN() self.ROOT_CN.BASIC = CN() self.ROOT_CN.BASIC.input_size=[480,640] #h,w self.ROOT_CN.BASIC.crop_size=[320,320] #h,w self.ROOT_CN.BASIC.zoom_crop_size=[320,320] #h,w self.ROOT_CN.BASIC.render_image_size=[320,320]#h,w self.ROOT_CN.BASIC.patch_num=64#h,w ############## ↓ LM OPTIM ↓ ############## self.ROOT_CN.LM=CN() self.ROOT_CN.LM.LM_LMBDA= 0.0001 self.ROOT_CN.LM.EP_LMBDA=100 ############## ↓ data ↓ ############## self.ROOT_CN.DATA=CN() self.ROOT_CN.DATA.OBJ_ROOT="" #h,w self.ROOT_CN.DATA.VOC_ROOT=f"{os.path.dirname(os.path.abspath(__file__)) }/../EXPDATA/" #h,w def __get_item__(self, key): return self.ROOT_CN.__getitem__(key) def merge(self, config_dict, sub_key=None): if sub_key is not None: _merge_a_into_b(config_dict, self.ROOT_CN[sub_key]) else: _merge_a_into_b(config_dict, self.ROOT_CN) ############## ↓ Model ↓ ############## # _CN.model = CN() # _CN.model.input_size=[480,640] # _CN.model.crop_size=[320,320] # def get_cfg_defaults(): def get_cfg(Node=None): """Get a yacs CfgNode object with default values for my_project.""" # Return a clone so that the defaults will not be altered # This is for the "local variable" use pattern # return _CN.clone() if Node is None: return Config() else: return Config().__get_item__(Node) ================================================ FILE: config/linemod/copy.sh ================================================ declare -a arr=("glue" "ape" "cat" "phone" "eggbox" "benchvise" "lamp" "camera" "can" "driller" "duck" "holepuncher" "iron" ) #create training scripts for seq in "${arr[@]}" do echo "$seq" cat template_fw0.5.yml > "$seq"_fw0.5.yml sed -i "s/SEQ_NAME/$seq/g" "$seq"_fw0.5.yml done arraylength=${#arr[@]} ================================================ FILE: config/linemod/copy_occ.sh ================================================ declare -a arr=("glue" "ape" "cat" "phone" "eggbox" "benchvise" "lamp" "camera" "can" "driller" "duck" "holepuncher" "iron" ) #create training scripts for seq in "${arr[@]}" do echo "$seq" cat template_fw0.5_occ.yml > "$seq"_fw0.5_occ.yml sed -i "s/SEQ_NAME/$seq/g" "$seq"_fw0.5_occ.yml done ================================================ FILE: config/linemod/template_fw0.5.yml ================================================ vars: input_h: &input_h 320 input_w: &input_w 320 batch_size: &batch_size 1 descriptor_dim: &descriptor_dim 32 correspondence_radius_threshold: &correspondence_radius_threshold 0.01 #0.04 seq_name: &seq_name ["SEQ_NAME"] BASIC: zoom_crop_size: [240,240] model: input_h: *input_h input_w: *input_w batch_size: *batch_size seq_len: 2 network_class_name: RNNPose descriptor_net: module_class_name: HybridFeaNet keypoints_detector_2d: input_dim: 3 descriptor_dim: *descriptor_dim remove_borders: 4 normalize_output: True keypoints_detector_3d: #KPCONV configurations num_layers: 4 KP_extent: 2.0 batch_norm_momentum: 0.02 use_batch_norm: true in_points_dim: 3 fixed_kernel_points: 'center' #['center', 'verticals', 'none'] KP_influence: 'linear' aggregation_mode: 'sum' #['closest', 'sum'] modulated: false first_subsampling_dl: 0.025 conv_radius: 2.5 deform_radius: 5 in_features_dim: 1 #3 first_feats_dim: 128 num_kernel_points: 15 final_feats_dim: *descriptor_dim #256 #32 normalize_output: True gnn_feats_dim: 128 #256 context_fea_extractor_3d: #KPCONV configurations num_layers: 4 KP_extent: 2.0 batch_norm_momentum: 0.02 use_batch_norm: true in_points_dim: 3 fixed_kernel_points: 'center' #['center', 'verticals', 'none'] KP_influence: 'linear' aggregation_mode: 'sum' #['closest', 'sum'] modulated: false first_subsampling_dl: 0.025 conv_radius: 2.5 deform_radius: 5 in_features_dim: 1 #3 first_feats_dim: 128 num_kernel_points: 15 final_feats_dim: 256 #*descriptor_dim #256 #32 normalize_output: False gnn_feats_dim: 128 #256 motion_net: IS_CALIBRATED: True RESCALE_IMAGES: False ITER_COUNT: 4 RENDER_ITER_COUNT: 3 #2 #1 #3 TRAIN_RESIDUAL_WEIGHT: 0 #0.1 TRAIN_FLOW_WEIGHT: 0.5 #0.1 #1 TRAIN_REPROJ_WEIGHT: 0 OPTIM_ITER_COUNT: 1 FLOW_NET: 'raft' SYN_OBSERVED: False ONLINE_CROP: True raft: small: False #True fea_net: "default" mixed_precision: True # pretrained_model: "/mnt/workspace/datasets/weights/models/raft-small.pth" pretrained_model: "/mnt/workspace/datasets/weights/models/raft-chairs.pth" input_dim: 3 iters: 1 loss: metric_loss: type: "normal" pos_radius: *correspondence_radius_threshold # the radius used to find the positive correspondences safe_radius: 0.02 #0.13 pos_margin: 0.1 neg_margin: 1.4 max_points: 256 matchability_radius: 0.06 weight: 0.001 saliency_loss: loss_weight: 1 reg_weight: 0.01 geometric_loss: loss_weight: 1 reg_weight: 0.5 #0.1 train_config: optimizer: adam_optimizer: learning_rate: one_cycle: lr_maxs: [] lr_max: 0.0001 # moms: [0.95, 0.85] div_factor: 10.0 pct_start: 0.01 #0.05 amsgrad: false weight_decay: 0.0001 fixed_weight_decay: true use_moving_average: false steps: 200000 steps_per_eval: 10000 loss_scale_factor: -1 clear_metrics_every_epoch: true train_input_reader: dataset: dataset_class_name: "LinemodDeepIMSynRealV2" info_path: ["/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/data_info/deepim/linemod_orig_deepim.info.train", "/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/data_info/deepim/linemod_syn_deepim.info.train", "/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/data_info/linemod_fusesformatted_all10k_deepim.info.train", ] root_path: ["/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/LM6d_converted/LM6d_refine", "/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/LM6d_converted/LM6d_refine_syn", "/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/LINEMOD/fuse_formatted/" ] model_point_dim: 3 max_points: 20000 seq_names: *seq_name batch_size: *batch_size preprocess: correspondence_radius_threshold: *correspondence_radius_threshold num_workers: 3 image_scale: 1 crop_param: rand_crop: false margin_ratio: 0.85 output_size: *input_h crop_with_init_pose: True eval_input_reader: dataset: dataset_class_name: "LinemodDeepIMSynRealV2" info_path: ["/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/data_info/linemod_posecnn.info.eval" ] root_path: ["/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/LM6d_converted/LM6d_refine" ] model_point_dim: 3 max_points: 20000 seq_names: *seq_name batch_size: *batch_size preprocess: correspondence_radius_threshold: *correspondence_radius_threshold num_workers: 3 image_scale: 1 crop_param: rand_crop: false margin_ratio: 0.85 #0.5 output_size: *input_h # crop_with_init_pose: True ================================================ FILE: config/linemod/template_fw0.5_occ.yml ================================================ vars: input_h: &input_h 320 input_w: &input_w 320 batch_size: &batch_size 1 descriptor_dim: &descriptor_dim 32 correspondence_radius_threshold: &correspondence_radius_threshold 0.01 #0.04 seq_name: &seq_name ["SEQ_NAME"] BASIC: zoom_crop_size: [240,240] model: input_h: *input_h input_w: *input_w batch_size: *batch_size seq_len: 2 network_class_name: RNNPose descriptor_net: module_class_name: HybridFeaNet keypoints_detector_2d: input_dim: 3 descriptor_dim: *descriptor_dim remove_borders: 4 normalize_output: True keypoints_detector_3d: #KPCONV configurations num_layers: 4 KP_extent: 2.0 batch_norm_momentum: 0.02 use_batch_norm: true in_points_dim: 3 fixed_kernel_points: 'center' #['center', 'verticals', 'none'] KP_influence: 'linear' aggregation_mode: 'sum' #['closest', 'sum'] modulated: false first_subsampling_dl: 0.025 conv_radius: 2.5 deform_radius: 5 in_features_dim: 1 #3 first_feats_dim: 128 num_kernel_points: 15 final_feats_dim: *descriptor_dim #256 #32 normalize_output: True gnn_feats_dim: 128 #256 context_fea_extractor_3d: #KPCONV configurations num_layers: 4 KP_extent: 2.0 batch_norm_momentum: 0.02 use_batch_norm: true in_points_dim: 3 fixed_kernel_points: 'center' #['center', 'verticals', 'none'] KP_influence: 'linear' aggregation_mode: 'sum' #['closest', 'sum'] modulated: false first_subsampling_dl: 0.025 conv_radius: 2.5 deform_radius: 5 in_features_dim: 1 #3 first_feats_dim: 128 num_kernel_points: 15 final_feats_dim: 256 #*descriptor_dim #256 #32 normalize_output: False gnn_feats_dim: 128 #256 motion_net: IS_CALIBRATED: True RESCALE_IMAGES: False ITER_COUNT: 4 RENDER_ITER_COUNT: 3 #2 #1 #3 TRAIN_RESIDUAL_WEIGHT: 0 #0.1 TRAIN_FLOW_WEIGHT: 0.5 #0.1 #1 TRAIN_REPROJ_WEIGHT: 0 OPTIM_ITER_COUNT: 1 FLOW_NET: 'raft' SYN_OBSERVED: False ONLINE_CROP: True raft: small: False #True fea_net: "default" mixed_precision: True # pretrained_model: "/mnt/workspace/datasets/weights/models/raft-small.pth" pretrained_model: "/mnt/workspace/datasets/weights/models/raft-chairs.pth" input_dim: 3 iters: 1 loss: metric_loss: type: "normal" pos_radius: *correspondence_radius_threshold # the radius used to find the positive correspondences safe_radius: 0.02 #0.13 pos_margin: 0.1 neg_margin: 1.4 max_points: 256 matchability_radius: 0.06 weight: 0.001 saliency_loss: loss_weight: 1 reg_weight: 0.01 geometric_loss: loss_weight: 1 reg_weight: 0.5 #0.1 train_config: optimizer: adam_optimizer: learning_rate: one_cycle: lr_maxs: [] lr_max: 0.0001 # moms: [0.95, 0.85] div_factor: 10.0 pct_start: 0.01 #0.05 amsgrad: false weight_decay: 0.0001 fixed_weight_decay: true use_moving_average: false steps: 200000 steps_per_eval: 10000 loss_scale_factor: -1 clear_metrics_every_epoch: true train_input_reader: dataset: dataset_class_name: "LinemodDeepIMSynRealV2" info_path: ["/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/data_info/deepim/linemod_orig_deepim.info.train", "/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/data_info/deepim/linemod_syn_deepim.info.train", "/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/data_info/linemod_fusesformatted_all10k_deepim.info.train", ] root_path: ["/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/LM6d_converted/LM6d_refine", "/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/LM6d_converted/LM6d_refine_syn", "/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/LINEMOD/fuse_formatted/" ] model_point_dim: 3 max_points: 20000 seq_names: *seq_name batch_size: *batch_size preprocess: correspondence_radius_threshold: *correspondence_radius_threshold num_workers: 3 image_scale: 1 crop_param: rand_crop: false margin_ratio: 0.85 output_size: *input_h crop_with_init_pose: True eval_input_reader: dataset: dataset_class_name: "LinemodDeepIMSynRealV2" info_path: ["/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/data_info/deepim/linemod_bop_lmocc_pvnetdr.info.eval"] root_path: ["/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/lmo"] init_post_type: "PVNET_LINEMOD_OCC" model_point_dim: 3 max_points: 20000 seq_names: *seq_name batch_size: *batch_size preprocess: correspondence_radius_threshold: *correspondence_radius_threshold num_workers: 3 image_scale: 1 crop_param: rand_crop: false margin_ratio: 0.85 #0.5 output_size: *input_h # crop_with_init_pose: True ================================================ FILE: data/__init__.py ================================================ from . import dataset from . import linemod_dataset ================================================ FILE: data/dataset.py ================================================ import pathlib import pickle import time from functools import partial import numpy as np REGISTERED_DATASET_CLASSES = {} def register_dataset(cls, name=None): global REGISTERED_DATASET_CLASSES if name is None: name = cls.__name__ assert name not in REGISTERED_DATASET_CLASSES, f"exist class: {REGISTERED_DATASET_CLASSES}" REGISTERED_DATASET_CLASSES[name] = cls return cls def get_dataset_class(name): global REGISTERED_DATASET_CLASSES assert name in REGISTERED_DATASET_CLASSES, f"available class: {REGISTERED_DATASET_CLASSES}" return REGISTERED_DATASET_CLASSES[name] class Dataset(object): NumPointFeatures = -1 def __getitem__(self, index): raise NotImplementedError def __len__(self): raise NotImplementedError def _read_data(self, query): raise NotImplementedError def evaluation(self, dt_annos, output_dir): """Dataset must provide a evaluation function to evaluate model.""" raise NotImplementedError ================================================ FILE: data/linemod/linemod_config.py ================================================ import numpy as np diameters = { 'cat': 15.2633, 'ape': 9.74298, 'benchvise': 28.6908, 'bowl': 17.1185, 'cam': 17.1593, 'camera': 17.1593, 'can': 19.3416, 'cup': 12.5961, 'driller': 25.9425, 'duck': 10.7131, 'eggbox': 17.6364, 'glue': 16.4857, 'holepuncher': 14.8204, 'iron': 30.3153, 'lamp': 28.5155, 'phone': 20.8394 } linemod_cls_names = ['ape', 'cam', 'cat', 'duck', 'glue', 'iron', 'phone', 'benchvise', 'can', 'driller', 'eggbox', 'holepuncher', 'lamp'] linemod_K = np.array([[572.4114, 0., 325.2611], [0., 573.57043, 242.04899], [0., 0., 1.]]) blender_K = np.array([[700., 0., 320.], [0., 700., 240.], [0., 0., 1.]]) ================================================ FILE: data/linemod_dataset.py ================================================ import numpy as np import random import os from data.dataset import Dataset, register_dataset import pickle import PIL import cv2 import torch import time import scipy from utils.geometric import range_to_depth, render_pointcloud from .transforms import make_transforms from thirdparty.kpconv.lib.utils import square_distance from utils.geometric import rotation_angle # from utils.visualize import * from transforms3d.quaternions import mat2quat, quat2mat, qmult from transforms3d.euler import mat2euler, euler2mat, euler2quat, quat2euler import math from config.default import get_cfg CURRENT_DIR=os.path.dirname(os.path.abspath(__file__)) try: from pytorch3d.io import load_obj, load_ply except: print("Warning: error occurs when importing pytorch3d ") pass def se3_q2m(se3_q): assert se3_q.size == 7 se3_mx = np.zeros((3, 4)) # quat = se3_q[0:4] / LA.norm(se3_q[0:4]) quat = se3_q[:4] R = quat2mat(quat) se3_mx[:, :3] = R se3_mx[:, 3] = se3_q[4:] return se3_mx def info_convertor(info,): """ [Transform the original kitti info file] """ seqs = info.keys() #['cat']# seq_lengths = [len(info[i]) for i in seqs] data = [] for seq in seqs: print(seq) data.append(info[seq]) new_infos = { "seqs": list(seqs), "seq_lengths": seq_lengths, "data": data } return new_infos def resize(im, target_size, max_size, stride=0, interpolation=cv2.INTER_LINEAR): """ only resize input image to target size and return scale :param im: BGR image input by opencv :param target_size: one dimensional size (the short side) :param max_size: one dimensional max size (the long side) :param stride: if given, pad the image to designated stride :param interpolation: if given, using given interpolation method to resize image :return: """ im_shape = im.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) im_scale = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=interpolation) if stride == 0: return im, im_scale else: # pad to product of stride im_height = int(np.ceil(im.shape[0] / float(stride)) * stride) im_width = int(np.ceil(im.shape[1] / float(stride)) * stride) im_channel = im.shape[2] padded_im = np.zeros((im_height, im_width, im_channel)) padded_im[: im.shape[0], : im.shape[1], :] = im return padded_im, im_scale def sample_poses(pose_tgt): SYN_STD_ROTATION = 15 SYN_STD_TRANSLATION = 0.01 ANGLE_MAX=45 pose_src = pose_tgt.copy() num = pose_tgt.shape[0] for i in range(num): euler = mat2euler(pose_tgt[i, :3, :3]) euler += SYN_STD_ROTATION * np.random.randn(3) * math.pi / 180.0 pose_src[i, :3, :3] = euler2mat(euler[0], euler[1], euler[2]) pose_src[i, 0, 3] = pose_tgt[i, 0, 3]+ SYN_STD_TRANSLATION * np.random.randn(1) pose_src[i, 1, 3] = pose_tgt[i, 1, 3] + SYN_STD_TRANSLATION * np.random.randn(1) pose_src[i, 2, 3] = pose_tgt[i, 2, 3] + 5 * SYN_STD_TRANSLATION * np.random.randn(1) r_dist = np.arccos((np.trace(pose_src[i, :3,:3].transpose(-1,-2) @ pose_tgt[i, :3,:3]) - 1 )/2)/math.pi*180 while r_dist > ANGLE_MAX:#or not (16 < center_x < (640 - 16) and 16 < center_y < (480 - 16)): # print("r_dist > ANGLE_MAX, resampling...") print("Too large angular differences. Resample the pose...") euler = mat2euler(pose_tgt[i, :3, :3]) euler += SYN_STD_ROTATION * np.random.randn(3) * math.pi / 180.0 pose_src[i, :3, :3] = euler2mat(euler[0], euler[1], euler[2]) pose_src[i, 0, 3] = pose_tgt[i, 0, 3]+ SYN_STD_TRANSLATION * np.random.randn(1) pose_src[i, 1, 3] = pose_tgt[i, 1, 3] + SYN_STD_TRANSLATION * np.random.randn(1) pose_src[i, 2, 3] = pose_tgt[i, 2, 3] + 5 * SYN_STD_TRANSLATION * np.random.randn(1) r_dist = np.arccos((np.trace(pose_src[i, :3,:3].transpose(-1,-2) @ pose_tgt[i, :3,:3]) - 1 )/2)*math.pi/180 return pose_src.squeeze() @register_dataset class LinemodDeepIMSynRealV2(Dataset): # use deepim 3d model for geometric feature extraction, mingle the synthetic and real data def __init__(self, root_path, info_path, model_point_dim, is_train, prep_func=None, seq_names=None, cfg={} ): super().__init__() assert info_path is not None assert isinstance(root_path, (tuple, list)) and isinstance(info_path, (tuple, list)) assert len(root_path) == len(info_path) print("Info:",info_path) # assert split in ['train', 'val', 'test'] self.is_train = is_train self.VOC_ROOT = get_cfg('DATA').VOC_ROOT#"/DATA/yxu/LINEMOD_DEEPIM/" infos=[] for ipath in info_path: with open(ipath, 'rb') as f: info = pickle.load(f) if seq_names is not None: for k in list(info.keys()): if k not in seq_names: del info[k] infos.append( info_convertor(info) ) #merge multiple infos self.infos = infos[0] self.infos['dataset_idx'] = [0]*len(self.infos['seqs']) for i, info in enumerate(infos[1:]): for k in self.infos: if k == 'dataset_idx': self.infos[k].extend([i+1]*len(info['seqs'])) else: self.infos[k].extend(info[k]) self.root_paths = root_path self.model_point_dim = model_point_dim # self.max_points=max_points#30000 self.prep_func=prep_func # self.rgb_transformer = None #make_transforms(None, is_train=is_train) self.rgb_transformer = make_transforms(None, is_train=is_train) print("dataset size:",self.__len__()) self.init_pose_type = cfg.get("init_post_type", "POSECNN_LINEMOD" ) # self.init_pose_type = cfg.get("init_post_type", "PVNET_LINEMOD_OCC" ) # self.init_pose_type = cfg.get("init_post_type", "PVNET_LINEMOD" ) print("INIT_POSE_TYPE:", self.init_pose_type) #Load posecnn results if not self.is_train: with open(f"{CURRENT_DIR}/../EXPDATA/init_poses/linemod_posecnn_results.pkl", 'rb') as f: self.pose_cnn_results_test_posecnn=pickle.load(f) try: if self.init_pose_type == "POSECNN_LINEMOD": #load posecnn results self.pose_cnn_results_test=self.pose_cnn_results_test_posecnn elif self.init_pose_type =="PVNET_LINEMOD": self.pose_cnn_results_test=np.load(f"{CURRENT_DIR}/../EXPDATA/init_poses/pvnet/pvnet_linemod_test.npy", allow_pickle=True).flat[0] elif self.init_pose_type =="PVNET_LINEMOD_OCC": self.pose_cnn_results_test=np.load(f"{CURRENT_DIR}/../EXPDATA/init_poses/pvnet/pvnet_linemodocc_test.npy", allow_pickle=True).flat[0] else: raise NotImplementedError except: print("Loading posecnn results failed!") self.pose_cnn_results_test=None try: # self.blender_to_bop_pose=np.load(f"{CURRENT_DIR}/../EXPDATA/init_poses/metricpose/blender2bop_RT.npy", allow_pickle=True).flat[0] self.blender_to_bop_pose=np.load(f"{CURRENT_DIR}/../EXPDATA/init_poses/pose_conversion/blender2bop_RT.npy", allow_pickle=True).flat[0] except: print("Loading pose conversion matrix failed!") self.blender_to_bop_pose=None else: self.pose_cnn_results_test=None self.blender_to_bop_pose=None def load_random_background(self, im_observed, mask): VOC_root = os.path.join(self.VOC_ROOT, "VOCdevkit/VOC2012") VOC_image_set_dir = os.path.join(VOC_root, "ImageSets/Main") VOC_bg_list_path = os.path.join(VOC_image_set_dir, "diningtable_trainval.txt") with open(VOC_bg_list_path, "r") as f: VOC_bg_list = [ line.strip("\r\n").split()[0] for line in f.readlines() if line.strip("\r\n").split()[1] == "1" ] height, width, channel = im_observed.shape target_size = min(height, width) max_size = max(height, width) observed_hw_ratio = float(height) / float(width) k = random.randint(0, len(VOC_bg_list) - 1) bg_idx = VOC_bg_list[k] bg_path = os.path.join(VOC_root, "JPEGImages/{}.jpg".format(bg_idx)) bg_image = cv2.imread(bg_path, cv2.IMREAD_COLOR)[...,::-1] #RGB bg_h, bg_w, bg_c = bg_image.shape bg_image_resize = np.zeros((height, width, channel), dtype="uint8") if (float(height) / float(width) < 1 and float(bg_h) / float(bg_w) < 1) or ( float(height) / float(width) >= 1 and float(bg_h) / float(bg_w) >= 1 ): if bg_h >= bg_w: bg_h_new = int(np.ceil(bg_w * observed_hw_ratio)) if bg_h_new < bg_h: bg_image_crop = bg_image[0:bg_h_new, 0:bg_w, :] else: bg_image_crop = bg_image else: bg_w_new = int(np.ceil(bg_h / observed_hw_ratio)) if bg_w_new < bg_w: bg_image_crop = bg_image[0:bg_h, 0:bg_w_new, :] else: bg_image_crop = bg_image else: if bg_h >= bg_w: bg_h_new = int(np.ceil(bg_w * observed_hw_ratio)) bg_image_crop = bg_image[0:bg_h_new, 0:bg_w, :] else: # bg_h < bg_w bg_w_new = int(np.ceil(bg_h / observed_hw_ratio)) print(bg_w_new) bg_image_crop = bg_image[0:bg_h, 0:bg_w_new, :] bg_image_resize_0, _ = resize(bg_image_crop, target_size, max_size) h, w, c = bg_image_resize_0.shape bg_image_resize[0:h, 0:w, :] = bg_image_resize_0 # add background to image_observed res_image = bg_image_resize.copy() res_image[mask>0]=im_observed[mask>0] # im_observed = res_image return res_image def _read_data(self, idx): """ info structure: { 'cat':[ { "index": idx, "model_path": str, "rgb_path": str, "depth_path": str, "RT": np.array([3,4]), "K": np.array([3,3]), }, { "index": idx, "model_path": str, "rgb_path": str, "depth_path": str, "RT": np.array([3,4]), "K": np.array([3,3]), } ... ], 'dog':[ ] ... } """ if isinstance(idx, (tuple, list)): idx, seed = idx else: seed = None seq_lengths = np.array(self.infos['seq_lengths']) seq_lengths_cum = np.cumsum(seq_lengths) seq_lengths_cum = np.insert(seq_lengths_cum, 0, 0) # insert a dummy 0 seq_idx = np.nonzero(seq_lengths_cum > idx)[0][0]-1 frame_idx = idx - seq_lengths_cum[seq_idx] info = self.infos["data"][seq_idx] dataset_idx = self.infos["dataset_idx"][seq_idx] model_points_path = os.path.join(f'{os.path.dirname(__file__)}/../EXPDATA/LM6d_converted/models/{self.infos["seqs"][seq_idx]}/textured.obj' ) # TODO: need check rgb_path = os.path.join(self.root_paths[dataset_idx], info[frame_idx]['rgb_observed_path']) depth_path = os.path.join(self.root_paths[dataset_idx], info[frame_idx]['depth_gt_observed_path']) if info[frame_idx].get('rgb_noisy_rendered', None) is not None: rgb_noisy_rendered_path = os.path.join(self.root_paths[dataset_idx], info[frame_idx]['rgb_noisy_rendered']) else: rgb_noisy_rendered_path = None if info[frame_idx].get('depth_noisy_rendered', None) is not None: depth_noisy_rendered_path = os.path.join(self.root_paths[dataset_idx], info[frame_idx]['depth_noisy_rendered']) else: depth_noisy_rendered_path = None if info[frame_idx].get('pose_noisy_rendered', None) is not None: rendered_RT = info[frame_idx]['pose_noisy_rendered'].astype(np.float32) # else: elif self.is_train: rendered_RT = sample_poses( info[frame_idx]['gt_pose'].astype(np.float32)[None] ) K = info[frame_idx]['K'].astype(np.float32) RT = info[frame_idx]['gt_pose'].astype(np.float32) #[R,t] # evaluation if not self.is_train: if self.pose_cnn_results_test is not None: class_name=self.infos["seqs"][seq_idx] if self.init_pose_type == "PVNET_LINEMOD": try: posecnn_RT = self.pose_cnn_results_test[class_name][frame_idx] # if self.pose_cnn_results_test is not None else np.zeros_like(RT) #Transformations are needed as the pvnet has a different coordinate system. posecnn_RT[:3,:3] = posecnn_RT[:3,:3]@self.blender_to_bop_pose[class_name][:3,:3].T posecnn_RT[:3,3:] = -posecnn_RT[:3,:3] @self.blender_to_bop_pose[class_name][:3,3:] + posecnn_RT[:3,3:] except: print("Warning: frame_idx is out of the range of self.pose_cnn_results_test!", flush=True) posecnn_RT= se3_q2m(self.pose_cnn_results_test_posecnn[class_name][frame_idx]['pose']) #np.zeros_like(RT) elif self.init_pose_type =="POSECNN_LINEMOD": posecnn_RT= se3_q2m(self.pose_cnn_results_test_posecnn[class_name][frame_idx]['pose']) elif self.init_pose_type == "PVNET_LINEMOD_OCC": try: posecnn_RT = self.pose_cnn_results_test[class_name][frame_idx].copy()# if self.pose_cnn_results_test is not None else np.zeros_like(RT) #Transformations are needed as the pvnet has a different coordinate system. posecnn_RT[:3,:3] = posecnn_RT[:3,:3]@self.blender_to_bop_pose[class_name][:3,:3].T posecnn_RT[:3,3:] = -posecnn_RT[:3,:3] @self.blender_to_bop_pose[class_name][:3,3:] + posecnn_RT[:3,3:] except: # print(frame_idx) raise else: raise NotImplementedError rendered_RT = posecnn_RT else: print("Warning: fail to load cnn poses!", flush=True) posecnn_RT = np.zeros_like(RT) else: posecnn_RT = np.zeros_like(RT) #add noise--for testing purpose only, should always be disabled in normal cases # rot_std=0; trans_std=0.04; ang_max=1000; # print(f"Add pose noises rot_std={rot_std}, trans_std={trans_std}", flush=True) # rendered_RT=sample_poses(rendered_RT[None], rot_std=rot_std, trans_std=trans_std, ang_max=ang_max) # Regularize the matrix to be a valid rotation rendered_RT[:3,:3] = rendered_RT[:3,:3]@ np.linalg.inv(scipy.linalg.sqrtm(rendered_RT[:3,:3].T@rendered_RT[:3,:3])) # model_points = np.fromfile( # str(model_points_path), dtype=np.float32, count=-1).reshape([-1, self.model_point_dim]) # N x model_point_dim model_points, _,_ = load_obj(str(model_points_path) ) model_points = model_points.numpy() visb = model_points[:,-1:] # N x model_point_dim model_point_features=np.ones_like(model_points[:,:1]).astype(np.float32) rgb = np.asarray(PIL.Image.open(rgb_path)) if depth_path.endswith('.npy'): depth = np.load(depth_path) # blender else: depth = cv2.imread(depth_path, -1) /1000. if self.is_train and "LM6d_refine_syn" in self.root_paths[dataset_idx]: #synthetic data rgb = self.load_random_background(rgb, mask=(depth>0)[...,None].repeat(rgb.shape[-1], axis=-1) ) rgb_rendered = np.asarray(PIL.Image.open(rgb_noisy_rendered_path)) if rgb_noisy_rendered_path is not None else None depth_rendered = np.asarray(PIL.Image.open(depth_noisy_rendered_path))/1000 if depth_noisy_rendered_path is not None else None #TODO: need check ren_mask = render_pointcloud(model_points, rendered_RT[None],K=K[None], render_image_size=rgb.shape[:2] ).squeeze()>0 # depth = range_to_depth(depth<1, depth*2, K) return { "class_name": self.infos["seqs"][seq_idx], "idx": idx, "model_points": model_points, "visibility": visb, "model_point_features":model_point_features, "image": rgb, "depth": depth, "mask": depth>0, "rendered_image": rgb_rendered, "rendered_depth": depth_rendered, "K": K, "RT": RT, "rendered_RT": rendered_RT.astype(np.float32), "ren_mask":ren_mask, "POSECNN_RT": posecnn_RT.astype(np.float32), #for test, TODO "scale": 1 # model_scale * scale = depth_scale } def __getitem__(self, idx): data=self._read_data(idx) try: data_p=self.prep_func(data, rand_rgb_transformer=self.rgb_transformer, find_2d3d_correspondence=self.is_train ) except Exception as e: if e.args[0] in ["Too few correspondences are found!"] : if isinstance(idx, (tuple, list)): # idx, seed = idx idx = [(idx[0]+1)%self.__len__(), idx[1]] else: idx = (idx+1) %self.__len__() data_p= self.__getitem__(idx ) else: raise ValueError return data_p def __len__(self): return np.sum(self.infos['seq_lengths']) ================================================ FILE: data/preprocess.py ================================================ import open3d as o3d import copy import os import pathlib import pickle import time from collections import defaultdict from functools import partial import cv2 import numpy as np import quaternion from skimage import io as imgio from utils.timer import simple_timer import matplotlib.pyplot as plt from collections.abc import Iterable import torch import torch.nn.functional as F import quaternion from functools import partial import thirdparty.kpconv.cpp_wrappers.cpp_subsampling.grid_subsampling as cpp_subsampling import thirdparty.kpconv.cpp_wrappers.cpp_neighbors.radius_neighbors as cpp_neighbors from thirdparty.kpconv.lib.timer import Timer from utils.geometric import range_to_depth, mask_depth_to_point_cloud from utils.furthest_point_sample import fragmentation_fps from utils.rand_utils import truncated_normal def merge_batch(batch_list): # [batch][key][seq]->example[key][seq][batch] # Or [batch][key]->example[key][batch] example_merged = defaultdict(list) for example in batch_list: # batch dim for k, v in example.items(): # key dim # assert isinstance(v, list) if isinstance(v, list): seq_len = len(v) if k not in example_merged: example_merged[k] = [[] for i in range(seq_len)] for i, vi in enumerate(v): # seq dim example_merged[k][i].append(vi) else: example_merged[k].append(v) ret = {} for key, elems in example_merged.items(): if key in ['model_points', "original_model_points", 'visibility']: # concat the points of lenghts (N1,N2...) to a longer one with length (N1+N2+...) ret[key] = np.concatenate(elems, axis=0) # record the point numbers for original batches ret['batched_model_point_lengths'] = np.array( [len(p) for p in elems], dtype=np.int32) elif key in ['rand_model_points', ]: # concat the points of lenghts (N1,N2...) to a longer one with length (N1+N2+...) ret[key] = np.concatenate(elems, axis=0) # record the point numbers for original batches ret['batched_rand_model_point_lengths'] = np.array( [len(p) for p in elems], dtype=np.int32) elif key in ['model_point_features']: ret[key] = np.concatenate(elems, axis=0) # ['odometry/tq','odometry/RT','odometry/invRT' ]: elif key in ['image', 'depth', 'K', 'RT', 'original_RT' ,'rand_RT', 'correspondences_2d3d', 'scale', 'POSECNN_RT','rendered_image', 'rendered_depth', 'rendered_RT', '3d_keypoint_inds', '3d_keypoints', 'mask', 'ren_mask']: #'depth_coords2d','lifted_points', try: ret[key] = np.stack(elems, axis=0) except: print(key, flush=True) raise elif key == 'metrics': ret[key] = elems else: ret[key] = [] for e in elems: ret[key].append(e) return ret def get_correspondences(src_pcd, tgt_pcd, search_voxel_size, K=None, trans=None): if trans is not None: src_pcd.transform(trans) pcd_tree = o3d.geometry.KDTreeFlann(tgt_pcd) correspondences = [] for i, point in enumerate(src_pcd.points): [count, idx, _] = pcd_tree.search_radius_vector_3d( point, search_voxel_size) if K is not None: idx = idx[:K] for j in idx: correspondences.append([i, j]) correspondences = np.array(correspondences) # correspondences = torch.from_numpy(correspondences) return correspondences def to_pcd(xyz): pcd = o3d.geometry.PointCloud() pcd.points = o3d.utility.Vector3dVector(xyz) return pcd def to_tsfm(rot, trans): tsfm = np.eye(4) tsfm[:3, :3] = rot tsfm[:3, 3] = trans.flatten() return tsfm def CameraIntrinsicUpdate(old_K, aug_param): ''' old_K: array of shape (N,3,3), the old camera intrinsic parameters aug_pram: dict, the data augmentation parameters ''' aug_type = aug_param['aug_type'] assert aug_type in ['crop', 'scale', 'flip'] new_K = np.copy(old_K) if aug_type == 'crop': cx, cy = aug_param['crop/left_top_corner'] # x,y new_K[..., 0, 2] = new_K[..., 0, 2] - cx new_K[..., 1, 2] = new_K[..., 1, 2] - cy elif aug_type == 'scale': s_x, s_y = aug_param['scale/scale'] new_K[..., 0, 0] = s_x * new_K[..., 0, 0] new_K[..., 1, 1] = s_y * new_K[..., 1, 1] new_K[..., 0, 2] = s_x * new_K[..., 0, 2] new_K[..., 1, 2] = s_y * new_K[..., 1, 2] elif aug_type == 'flip': w = aug_param['flip/width'] # h = aug_param['flip/heigh'] new_K[..., 0, 2] = w - new_K[..., 0, 2] # px' = w-px # new_K[...,1,2] = h- new_K[...,1,2] new_K[..., 0, 0] = - new_K[..., 0, 0] # fx' = -fx return new_K def crop_transform(images, depths, Ks, crop_param, ): assert(len(images) == len(depths) == len(Ks)) crop_type = crop_param["crop_type"] assert(crop_type in ["fixed", "center", "random"]) crop_size = crop_param["crop_size"] iheight, iwidth = images[0].shape[:2] if crop_type == "fixed": lt_corner = crop_param["lt_corner"] op = transforms.Crop( lt_corner[0], lt_corner[1], crop_size[0], crop_size[1]) elif crop_type == "center": op = transforms.CenterCrop(crop_size) ci, cj, _, _ = op.get_params(images[0], crop_size) lt_corner = ci, cj elif crop_type == "random": op = transforms.RandomCrop((iheight, iwidth), crop_size) lt_corner = op.i, op.j for i, _ in enumerate(images): images[i] = op(images[i]) depths[i] = op(depths[i]) Ks[i] = CameraIntrinsicUpdate(Ks[i], {"aug_type": "crop", "crop/left_top_corner": (lt_corner[1], lt_corner[0])}) return images, depths, Ks # def patch_crop(image, depth, mask, K_old, margin_ratio=0.2, output_size=128, offset_ratio=(0,0),bbox=None, mask_depth=True): def patch_crop(image, depth, mask, K_old, margin_ratio=0.2, output_size=128, offset_ratio=(0,0),bbox=None, mask_depth=False): ''' image: HxWx3 mask: HxW K_old: 3x3 offset: (offset_h, offset_w) ''' H, W, _ = image.shape mask = mask.astype('uint8')*255 if bbox is None: _x, _y, _w, _h = cv2.boundingRect(mask) else: _x, _y, _w, _h = bbox[1], bbox[0], bbox[3]-bbox[1], bbox[2]-bbox[0] # center = [_x+_w/2, _y+_h/2] center = [_x+_w/2+offset_ratio[1]*_w, _y+_h/2+offset_ratio[0]*_h ] L = int(max(_w, _h) * (1+2*margin_ratio)) if L<0: #TODO print(mask.sum(), depth.sum(), '!!!', flush=True) L=128 x = max(0, int(center[0] - L/2)) y = max(0, int(center[1] - L/2)) crop = image[y:y+L, x:x+L] # only keep the ROI depth if mask_depth: depth[mask < 1] = 0 # removed by dy at 0810 depth_crop = depth[y:y+L, x:x+L] mask_crop = mask[y:y+L, x:x+L] # w=h=int ((1+2*margin_ratio)*L) # actual crop size w = h = L # actual crop size # automatically handle the "out of range" problem patch = np.zeros([h, w, 3], dtype=image.dtype) # depth_patch = np.ones([h, w], dtype=depth.dtype) depth_patch = np.zeros([h, w], dtype=depth.dtype) mask_patch = np.zeros([h, w], dtype=depth.dtype) try: xp = 0 yp = 0 patch[xp: xp+crop.shape[0], yp:yp+crop.shape[1]] = crop depth_patch[xp: xp+crop.shape[0], yp:yp+crop.shape[1]] = depth_crop mask_patch[xp: xp+crop.shape[0], yp:yp+crop.shape[1]] = mask_crop except: import pdb pdb.set_trace() patch = cv2.resize(patch, (output_size, output_size), interpolation=cv2.INTER_LINEAR) depth_patch = cv2.resize( depth_patch, (output_size, output_size), interpolation=cv2.INTER_NEAREST) mask_patch = cv2.resize( mask_patch, (output_size, output_size), interpolation=cv2.INTER_NEAREST) # update the intrinsic parameters K_new = np.zeros_like(K_old) scale = output_size/L K_new[0, 2] = (K_old[0, 2]-x)*scale K_new[1, 2] = (K_old[1, 2]-y)*scale K_new[0, 0] = K_old[0, 0]*scale K_new[1, 1] = K_old[1, 1]*scale K_new[2, 2] = 1 # return patch, depth_patch, K_new return patch, depth_patch, mask_patch, K_new def preprocess_deepim( input_dict, max_points, correspondence_radius, normalize_model=True, rand_transform_model=False, # False,#True, rand_rgb_transformer=None, image_scale=None, patch_cropper=None, # func patch_crop(...) ): output_dict = copy.deepcopy(input_dict) ################################### process 3D point clouds ################################### if (output_dict['model_points'].shape[0] > max_points): # if(output_dict['model_points'].shape[0] > 20000): idx = np.random.permutation( output_dict['model_points'].shape[0])[:max_points] print(idx, output_dict['model_points'].shape, flush=True) output_dict['model_points'] = output_dict['model_points'][idx] output_dict['model_point_features'] = output_dict['model_point_features'][idx] output_dict['original_RT'] = copy.deepcopy(output_dict['RT']) if normalize_model: points = output_dict['model_points'] mean = points.mean(axis=0) scope = points.max(axis=0)-points.min(axis=0) points_normalize = (points-mean)/scope.max() # points_normalize.tofile(bin_save_path) # modify the extrinsic parameters output_dict['RT'][:, 3:] = output_dict['RT'][:, :3] @ mean[:, None] + output_dict['RT'][:, 3:] # 3x3 @ 3x1 + 3x1 # input_dict['RT'][:,:3] *=scope.max() output_dict['scale'] = scope.max() output_dict['original_model_points'] = output_dict['model_points'] output_dict['model_points'] = points_normalize if rand_transform_model: points = output_dict['model_points'] rand_quat = np.random.randn(1, 4) rand_quat = rand_quat/np.linalg.norm(rand_quat, axis=-1) rand_rot = quaternion.as_rotation_matrix( quaternion.from_float_array(rand_quat)).squeeze() # 3x3 output_dict['rand_model_points'] = ( rand_rot@ points.T).T.astype(np.float32) output_dict['rand_RT'] = copy.deepcopy(output_dict['RT']) # output_dict['RT'][:,:3]@rand_rot.T output_dict['rand_RT'][:, :3] = rand_rot output_dict['rand_RT'][:, 3] = 0 ################################### process 2D images ################################### # carve out image patches if patch_cropper is not None: ref_mask = output_dict['depth'] > 0 output_dict['image'], output_dict['depth'], output_dict['K'] = patch_cropper( output_dict['image'], output_dict['depth'], ref_mask, output_dict['K']) output_dict['rendered_image'], output_dict['rendered_depth'], _ = patch_cropper( output_dict['rendered_image'], output_dict['rendered_depth'], ref_mask, output_dict['K'].copy() ) # rescale image if image_scale is not None: output_dict['image'] = cv2.resize(output_dict['image'], (output_dict['image'].shape[1]*image_scale, output_dict['image'].shape[0]*image_scale), interpolation=cv2.INTER_AREA) output_dict['depth'] = cv2.resize(output_dict['depth'], (output_dict['depth'].shape[1]*image_scale, output_dict['depth'].shape[0]*image_scale), interpolation=cv2.INTER_NEAREST) output_dict['K'][:2] = output_dict['K'][:2]*image_scale # lift depth depth = output_dict['depth'].squeeze() # H,W depth_pts, depth_coords2d = mask_depth_to_point_cloud( depth != 0, depth, output_dict['K']) depth_pts = (output_dict['RT'][:, :3].T@(depth_pts.T - output_dict['RT'] [:, 3:])).T / output_dict['scale'] # transformed to the model frame # find 2d-3d correspondences tsfm = np.eye(4) tsfm[:3] = output_dict['RT'] model_pcd = output_dict['model_points'] correspondences_2d3d = get_correspondences( to_pcd(depth_pts), to_pcd(model_pcd), correspondence_radius, K=5) if len(correspondences_2d3d.shape) < 2 or len(correspondences_2d3d) < 10: print(depth_pts.shape, model_pcd.shape) print("correspondences_2d3d.shape:", correspondences_2d3d.shape, flush=True) # raise ValueError("Too few correspondences are found!") raise Exception("Too few correspondences are found!") output_dict['depth_coords2d'] = depth_coords2d output_dict['lifted_points'] = depth_pts # output_dict['correspondences_2d3d'] = np.zeros(1)#correspondences_2d3d output_dict['correspondences_2d3d'] = correspondences_2d3d if rand_rgb_transformer is not None: output_dict['image'], _, _ = rand_rgb_transformer(output_dict['image']) # TO TENSOR output_dict['image'] = (output_dict['image'].astype( np.float32)/255.0).transpose([2, 0, 1]) # .mean(axis=0, keepdims=True) # 1,H,W output_dict['depth'] = output_dict['depth'].astype(np.float32)[ None] # 1,H,W return output_dict def preprocess( input_dict, max_points, correspondence_radius, normalize_model=True, rand_transform_model=False, rand_rgb_transformer=None, image_scale=None, crop_param=None, kp_3d_param=None, use_coords_as_3d_feat=False, find_2d3d_correspondence=True, ): output_dict = copy.deepcopy(input_dict) ################################### process 3D point clouds ################################### if use_coords_as_3d_feat: output_dict['model_point_features'] = output_dict['model_points'][:,:3] if (output_dict['model_points'].shape[0] > max_points): # if(output_dict['model_points'].shape[0] > 20000): idx = np.random.permutation( output_dict['model_points'].shape[0])[:max_points] print(idx, output_dict['model_points'].shape, flush=True) output_dict['model_points'] = output_dict['model_points'][idx] output_dict['model_point_features'] = output_dict['model_point_features'][idx] output_dict['original_RT'] = copy.deepcopy(output_dict['RT']) output_dict['original_model_points'] = output_dict['model_points'] if normalize_model: points = output_dict['model_points'] mean = points.mean(axis=0) scope = points.max(axis=0)-points.min(axis=0) points_normalize = (points-mean)/scope.max() # modify the extrinsic parameters output_dict['RT'][:, 3:] = output_dict['RT'][:, :3] @ mean[:, None] + output_dict['RT'][:, 3:] # 3x3 @ 3x1 + 3x1 output_dict['scale'] = scope.max() output_dict['model_points'] = points_normalize if rand_transform_model: points = output_dict['model_points'] rand_quat = np.random.randn(1, 4) rand_quat = rand_quat/np.linalg.norm(rand_quat, axis=-1) rand_rot = quaternion.as_rotation_matrix( quaternion.from_float_array(rand_quat)).squeeze() # 3x3 output_dict['rand_model_points'] = ( rand_rot@ points.T).T.astype(np.float32) output_dict['rand_RT'] = copy.deepcopy(output_dict['RT']) output_dict['rand_RT'][:, :3] = rand_rot output_dict['rand_RT'][:, 3] = 0 ################################### process 2D images ################################### #crop image if crop_param is not None:# and output_dict['mask'].sum()>0: #without random cropping if not crop_param.rand_crop: if crop_param.get("crop_with_init_pose", False): # bbox= output_dict.get('bbox', None) bbox=None output_dict['image'], output_dict['depth'], output_dict['mask'], output_dict['K'] = patch_crop(output_dict['image'], output_dict['depth'], mask=output_dict['ren_mask'], K_old=output_dict['K'], margin_ratio=crop_param.margin_ratio, output_size=crop_param.output_size, bbox=bbox ) elif crop_param.get("crop_with_rand_bbox_shift", True): bbox= output_dict.get('bbox', None) # offset_ratio= [truncated_normal(0,0.5,-1,1)*crop_param.max_rand_offset_ratio, truncated_normal(0,0.5,-1,1)*crop_param.max_rand_offset_ratio] offset_ratio= [truncated_normal(0,0.33,-1,1)*1, truncated_normal(0,0.33,-1,1)*1] output_dict['image'], output_dict['depth'], output_dict['mask'], output_dict['K'] = patch_crop(output_dict['image'], output_dict['depth'], mask=output_dict['mask'], K_old=output_dict['K'], margin_ratio=crop_param.margin_ratio, output_size=crop_param.output_size, offset_ratio=offset_ratio, bbox=output_dict.get('bbox', None) ) else: bbox= output_dict.get('bbox', None) output_dict['image'], output_dict['depth'], output_dict['mask'], output_dict['K'] = patch_crop(output_dict['image'], output_dict['depth'], mask=output_dict['mask'], K_old=output_dict['K'], margin_ratio=crop_param.margin_ratio, output_size=crop_param.output_size, bbox=output_dict.get('bbox', None) ) else: margin_ratio= truncated_normal(0.5, 0.5, 0, 1) *crop_param.max_rand_margin_ratio offset_ratio= [truncated_normal(0,0.5,-1,1)*crop_param.max_rand_offset_ratio, truncated_normal(0,0.5,-1,1)*crop_param.max_rand_offset_ratio] output_dict['image'], output_dict['depth'], output_dict['mask'], output_dict['K'] = patch_crop(output_dict['image'], output_dict['depth'], mask=output_dict['mask'], K_old=output_dict['K'], margin_ratio=margin_ratio, output_size=crop_param.output_size, offset_ratio=offset_ratio, bbox=output_dict.get('bbox', None) ) # rescale image if image_scale is not None: output_dict['image'] = cv2.resize(output_dict['image'], (output_dict['image'].shape[1]*image_scale, output_dict['image'].shape[0]*image_scale), interpolation=cv2.INTER_AREA) output_dict['depth'] = cv2.resize(output_dict['depth'], (output_dict['depth'].shape[1]*image_scale, output_dict['depth'].shape[0]*image_scale), interpolation=cv2.INTER_NEAREST) output_dict['K'][:2] = output_dict['K'][:2]*image_scale # lift depths depth = output_dict['depth'].squeeze() # H,W depth_pts, depth_coords2d = mask_depth_to_point_cloud( depth != 0, depth, output_dict['K']) depth_pts = (output_dict['RT'][:, :3].T@(depth_pts.T - output_dict['RT'] [:, 3:])).T / output_dict['scale'] # transformed to the model frame # find 2d-3d correspondences if find_2d3d_correspondence: tsfm = np.eye(4) tsfm[:3] = output_dict['RT'] model_pcd = output_dict['model_points'] correspondences_2d3d = get_correspondences( to_pcd(depth_pts), to_pcd(model_pcd), correspondence_radius, K=5) if len(correspondences_2d3d.shape) < 2 or len(correspondences_2d3d) < 10:# or ( "mask" in output_dict and output_dict['mask'].sum()<10 ) : print(depth_pts.shape, model_pcd.shape) print("correspondences_2d3d.shape:", correspondences_2d3d.shape, flush=True) raise Exception("Too few correspondences are found!") output_dict['depth_coords2d'] = depth_coords2d output_dict['lifted_points'] = depth_pts output_dict['correspondences_2d3d'] = correspondences_2d3d else: output_dict['depth_coords2d'] = depth_coords2d output_dict['lifted_points'] = depth_pts output_dict['correspondences_2d3d'] = np.zeros([10,2], dtype=np.int64) if rand_rgb_transformer is not None: output_dict['image'], _, _ = rand_rgb_transformer(output_dict['image']) # TO TENSORs output_dict['image'] = (output_dict['image'].astype( np.float32)/255.0).transpose([2, 0, 1]) # .mean(axis=0, keepdims=True) # 1,H,W output_dict['depth'] = output_dict['depth'].astype(np.float32)[ None] # 1,H,W return output_dict def batch_grid_subsampling_kpconv(points, batches_len, features=None, labels=None, sampleDl=0.1, max_p=0, verbose=0, random_grid_orient=True): """ CPP wrapper for a grid subsampling (method = barycenter for points and features) """ if (features is None) and (labels is None): s_points, s_len = cpp_subsampling.subsample_batch(points, batches_len, sampleDl=sampleDl, max_p=max_p, verbose=verbose) return torch.from_numpy(s_points), torch.from_numpy(s_len) elif (labels is None): s_points, s_len, s_features = cpp_subsampling.subsample_batch(points, batches_len, features=features, sampleDl=sampleDl, max_p=max_p, verbose=verbose) return torch.from_numpy(s_points), torch.from_numpy(s_len), torch.from_numpy(s_features) elif (features is None): s_points, s_len, s_labels = cpp_subsampling.subsample_batch(points, batches_len, classes=labels, sampleDl=sampleDl, max_p=max_p, verbose=verbose) return torch.from_numpy(s_points), torch.from_numpy(s_len), torch.from_numpy(s_labels) else: s_points, s_len, s_features, s_labels = cpp_subsampling.subsample_batch(points, batches_len, features=features, classes=labels, sampleDl=sampleDl, max_p=max_p, verbose=verbose) return torch.from_numpy(s_points), torch.from_numpy(s_len), torch.from_numpy(s_features), torch.from_numpy(s_labels) def batch_neighbors_kpconv(queries, supports, q_batches, s_batches, radius, max_neighbors): """ Computes neighbors for a batch of queries and supports, apply radius search :param queries: (N1, 3) the query points :param supports: (N2, 3) the support points :param q_batches: (B) the list of lengths of batch elements in queries :param s_batches: (B)the list of lengths of batch elements in supports :param radius: float32 :return: neighbors indices """ neighbors = cpp_neighbors.batch_query( queries, supports, q_batches, s_batches, radius=radius) # print("neighbors.shape" , neighbors.shape, queries.shape,flush=True) if max_neighbors > 0: return torch.from_numpy(neighbors[:, :max_neighbors]) else: return torch.from_numpy(neighbors) def collate_fn_descriptor(list_data, config, neighborhood_limits): ret = merge_batch(list_data) batched_points = torch.from_numpy(ret['model_points']) batched_lengths = torch.from_numpy(ret['batched_model_point_lengths']) batched_features = torch.from_numpy(ret['model_point_features']) if ret.get('rand_model_points', None) is not None: batched_rand_points = torch.from_numpy(ret['rand_model_points']) batched_rand_lengths = torch.from_numpy( ret['batched_rand_model_point_lengths']) batched_points = torch.cat( [batched_points, batched_rand_points], dim=0) batched_lengths = torch.cat( [batched_lengths, batched_rand_lengths], dim=0) batched_features = torch.cat( [batched_features, batched_features], dim=0) # Starting radius of convolutions r_normal = config.first_subsampling_dl * config.conv_radius # Starting layer layer_blocks = [] layer = 0 # Lists of inputs input_points = [] input_neighbors = [] input_pools = [] input_upsamples = [] input_batches_len = [] timer = Timer() for block_i, block in enumerate(config.architecture): # Stop when meeting a global pooling or upsampling if 'global' in block or 'upsample' in block: break # Get all blocks of the layer if not ('pool' in block or 'strided' in block): layer_blocks += [block] if block_i < len(config.architecture) - 1 and not ('upsample' in config.architecture[block_i + 1]): continue # Convolution neighbors indices # ***************************** if layer_blocks: # Convolutions are done in this layer, compute the neighbors with the good radius if np.any(['deformable' in blck for blck in layer_blocks[:-1]]): r = r_normal * config.deform_radius / config.conv_radius else: r = r_normal conv_i = batch_neighbors_kpconv( batched_points, batched_points, batched_lengths, batched_lengths, r, neighborhood_limits[layer]) else: # This layer only perform pooling, no neighbors required conv_i = torch.zeros((0, 1), dtype=torch.int64) # Pooling neighbors indices # ************************* if 'pool' in block or 'strided' in block: # New subsampling length dl = 2 * r_normal / config.conv_radius # Subsampled points pool_p, pool_b = batch_grid_subsampling_kpconv( batched_points, batched_lengths, sampleDl=dl) # Radius of pooled neighbors if 'deformable' in block: r = r_normal * config.deform_radius / config.conv_radius else: r = r_normal # Subsample indices pool_i = batch_neighbors_kpconv( pool_p, batched_points, pool_b, batched_lengths, r, neighborhood_limits[layer]) # Upsample indices (with the radius of the next layer to keep wanted density) up_i = batch_neighbors_kpconv( batched_points, pool_p, batched_lengths, pool_b, 2 * r, neighborhood_limits[layer]) else: # No pooling in the end of this layer, no pooling indices required pool_i = torch.zeros((0, 1), dtype=torch.int64) pool_p = torch.zeros((0, 3), dtype=torch.float32) pool_b = torch.zeros((0,), dtype=torch.int64) up_i = torch.zeros((0, 1), dtype=torch.int64) # Updating input lists input_points += [batched_points.float()] input_neighbors += [conv_i.long()] input_pools += [pool_i.long()] input_upsamples += [up_i.long()] input_batches_len += [batched_lengths] # New points for next layer batched_points = pool_p batched_lengths = pool_b # Update radius and reset blocks r_normal *= 2 layer += 1 layer_blocks = [] ############### # Return inputs ############### dict_inputs = { "idx": ret["idx"], 'model_points': input_points, 'visibility': torch.from_numpy(ret['visibility']), 'neighbors': input_neighbors, 'pools': input_pools, 'upsamples': input_upsamples, 'model_point_features': batched_features.float(), 'stack_lengths': input_batches_len, 'image': torch.from_numpy(ret['image']), 'depth': torch.from_numpy(ret['depth']), 'mask': torch.from_numpy(ret['mask']), 'ren_mask': torch.from_numpy(ret['ren_mask']), 'K': torch.from_numpy(ret['K']), 'RT': torch.from_numpy(ret['RT']), 'original_RT': torch.from_numpy(ret['original_RT']), 'POSECNN_RT': torch.from_numpy(ret.get('POSECNN_RT', np.zeros_like(ret['RT']) ) ), 'rand_RT': torch.from_numpy(ret.get('rand_RT', np.zeros_like(ret['RT']))), # "lifted_points": torch.from_numpy(ret['lifted_points']), "lifted_points": [torch.from_numpy(d) for d in ret['lifted_points'] ] , # 'depth_coords2d': torch.from_numpy(ret['depth_coords2d']), 'depth_coords2d': [torch.from_numpy(d) for d in ret['depth_coords2d']], "correspondences_2d3d": torch.from_numpy(ret['correspondences_2d3d']), "original_model_points": torch.from_numpy(ret['original_model_points']), "class_name": ret['class_name'], "3d_keypoint_inds": torch.from_numpy(ret['3d_keypoint_inds']), "3d_keypoints": torch.from_numpy(ret['3d_keypoints'] ) } return dict_inputs def collate_fn_descriptor_deepim(list_data, config, neighborhood_limits): ret = merge_batch(list_data) batched_points = torch.from_numpy(ret['model_points']) batched_lengths = torch.from_numpy(ret['batched_model_point_lengths']) batched_features = torch.from_numpy(ret['model_point_features']) if ret.get('rand_model_points', None) is not None: # torch.from_numpy(np.concatenate(batched_points_list, axis=0)) batched_rand_points = torch.from_numpy(ret['rand_model_points']) # torch.from_numpy(np.concatenate(batched_points_list, axis=0)) batched_rand_lengths = torch.from_numpy( ret['batched_rand_model_point_lengths']) batched_points = torch.cat( [batched_points, batched_rand_points], dim=0) batched_lengths = torch.cat( [batched_lengths, batched_rand_lengths], dim=0) batched_features = torch.cat( [batched_features, batched_features], dim=0) # Starting radius of convolutions r_normal = config.first_subsampling_dl * config.conv_radius # Starting layer layer_blocks = [] layer = 0 # Lists of inputs input_points = [] input_neighbors = [] input_pools = [] input_upsamples = [] input_batches_len = [] timer = Timer() for block_i, block in enumerate(config.architecture): # timer.tic() # Stop when meeting a global pooling or upsampling if 'global' in block or 'upsample' in block: break # Get all blocks of the layer if not ('pool' in block or 'strided' in block): layer_blocks += [block] if block_i < len(config.architecture) - 1 and not ('upsample' in config.architecture[block_i + 1]): continue # Convolution neighbors indices # ***************************** if layer_blocks: # Convolutions are done in this layer, compute the neighbors with the good radius if np.any(['deformable' in blck for blck in layer_blocks[:-1]]): r = r_normal * config.deform_radius / config.conv_radius else: r = r_normal conv_i = batch_neighbors_kpconv( batched_points, batched_points, batched_lengths, batched_lengths, r, neighborhood_limits[layer]) else: # This layer only perform pooling, no neighbors required conv_i = torch.zeros((0, 1), dtype=torch.int64) # Pooling neighbors indices # ************************* # If end of layer is a pooling operation if 'pool' in block or 'strided' in block: # New subsampling length dl = 2 * r_normal / config.conv_radius # Subsampled points pool_p, pool_b = batch_grid_subsampling_kpconv( batched_points, batched_lengths, sampleDl=dl) # Radius of pooled neighbors if 'deformable' in block: r = r_normal * config.deform_radius / config.conv_radius else: r = r_normal # Subsample indices pool_i = batch_neighbors_kpconv( pool_p, batched_points, pool_b, batched_lengths, r, neighborhood_limits[layer]) # Upsample indices (with the radius of the next layer to keep wanted density) up_i = batch_neighbors_kpconv( batched_points, pool_p, batched_lengths, pool_b, 2 * r, neighborhood_limits[layer]) else: # No pooling in the end of this layer, no pooling indices required pool_i = torch.zeros((0, 1), dtype=torch.int64) pool_p = torch.zeros((0, 3), dtype=torch.float32) pool_b = torch.zeros((0,), dtype=torch.int64) up_i = torch.zeros((0, 1), dtype=torch.int64) # Updating input lists input_points += [batched_points.float()] input_neighbors += [conv_i.long()] input_pools += [pool_i.long()] input_upsamples += [up_i.long()] input_batches_len += [batched_lengths] # New points for next layer batched_points = pool_p batched_lengths = pool_b # Update radius and reset blocks r_normal *= 2 layer += 1 layer_blocks = [] # timer.toc() ############### # Return inputs ############### dict_inputs = { "idx": ret["idx"], 'model_points': input_points, 'visibility': torch.from_numpy(ret['visibility']), 'neighbors': input_neighbors, 'pools': input_pools, 'upsamples': input_upsamples, 'model_point_features': batched_features.float(), 'stack_lengths': input_batches_len, 'image': torch.from_numpy(ret['image']), 'depth': torch.from_numpy(ret['depth']), "ren_mask": torch.from_numpy(ret['ren_mask']), 'K': torch.from_numpy(ret['K']), 'RT': torch.from_numpy(ret['RT']), 'original_RT': torch.from_numpy(ret['original_RT']), 'rendered_RT': torch.from_numpy(ret['rendered_RT']) if ret.get('rendered_RT', None) is not None else None , 'POSECNN_RT': torch.from_numpy(ret.get('POSECNN_RT', np.zeros_like(ret['RT']) ) ), # TODO 'rand_RT': torch.from_numpy(ret.get('rand_RT', np.zeros_like(ret['RT']))), # "lifted_points": torch.from_numpy(ret['lifted_points']), "lifted_points": [torch.from_numpy(d) for d in ret['lifted_points'] ] , # 'depth_coords2d': torch.from_numpy(ret['depth_coords2d']), 'depth_coords2d': [torch.from_numpy(d) for d in ret['depth_coords2d']], "correspondences_2d3d": torch.from_numpy(ret['correspondences_2d3d']), "original_model_points": torch.from_numpy(ret['original_model_points']), "class_name": ret['class_name'], } return dict_inputs def calibrate_neighbors(dataset, config, collate_fn, keep_ratio=0.8, samples_threshold=2000): timer = Timer() last_display = timer.total_time # From config parameter, compute higher bound of neighbors number in a neighborhood hist_n = int(np.ceil(4 / 3 * np.pi * (config.deform_radius + 1) ** 3)) neighb_hists = np.zeros((config.num_layers, hist_n), dtype=np.int32) # Get histogram of neighborhood sizes i in 1 epoch max. for i in range(len(dataset)): timer.tic() batched_input = collate_fn( [dataset[i]], config, neighborhood_limits=[hist_n] * 5) # update histogram counts = [torch.sum(neighb_mat < neighb_mat.shape[0], dim=1).numpy() for neighb_mat in batched_input['neighbors']] hists = [np.bincount(c, minlength=hist_n)[:hist_n] for c in counts] neighb_hists += np.vstack(hists) timer.toc() if timer.total_time - last_display > 0.1: last_display = timer.total_time print(f"Calib Neighbors {i:08d}: timings {timer.total_time:4.2f}s") if np.min(np.sum(neighb_hists, axis=1)) > samples_threshold: break cumsum = np.cumsum(neighb_hists.T, axis=0) percentiles = np.sum(cumsum < (keep_ratio * cumsum[hist_n - 1, :]), axis=0) neighborhood_limits = percentiles print('\n') return neighborhood_limits def get_dataloader(dataset, kpconv_config, batch_size=1, num_workers=4, shuffle=True, sampler=None, neighborhood_limits=None): if neighborhood_limits is None: # neighborhood_limits = calibrate_neighbors(dataset, dataset.config, collate_fn=collate_fn_descriptor) neighborhood_limits = calibrate_neighbors( dataset, kpconv_config, collate_fn=collate_fn_descriptor) print("neighborhood:", neighborhood_limits) dataloader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, # https://discuss.pytorch.org/t/supplying-arguments-to-collate-fn/25754/4 collate_fn=partial(collate_fn_descriptor, config=kpconv_config, neighborhood_limits=neighborhood_limits), sampler=sampler, drop_last=False ) return dataloader, neighborhood_limits def get_dataloader_deepim(dataset, kpconv_config, batch_size=1, num_workers=4, shuffle=True, sampler=None, neighborhood_limits=None): if neighborhood_limits is None: neighborhood_limits = calibrate_neighbors( dataset, kpconv_config, collate_fn=collate_fn_descriptor_deepim) print("Neighborhood:", neighborhood_limits) dataloader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, # https://discuss.pytorch.org/t/supplying-arguments-to-collate-fn/25754/4 collate_fn=partial(collate_fn_descriptor_deepim, config=kpconv_config, neighborhood_limits=neighborhood_limits), sampler=sampler, drop_last=False ) return dataloader, neighborhood_limits if __name__ == '__main__': pass ================================================ FILE: data/transforms.py ================================================ import numpy as np import random import torch import torchvision from torchvision.transforms import functional as F import cv2 from PIL import Image class Compose(object): def __init__(self, transforms): self.transforms = transforms def __call__(self, img, kpts=None, mask=None): for t in self.transforms: img, kpts, mask = t(img, kpts, mask) return img, kpts, mask def __repr__(self): format_string = self.__class__.__name__ + "(" for t in self.transforms: format_string += "\n" format_string += " {0}".format(t) format_string += "\n)" return format_string class ToTensor(object): def __call__(self, img, kpts, mask): return np.asarray(img).astype(np.float32) / 255., kpts, mask class Normalize(object): def __init__(self, mean, std, to_bgr=True): self.mean = mean self.std = std self.to_bgr = to_bgr def __call__(self, img, kpts, mask): img -= self.mean img /= self.std if self.to_bgr: img = img.transpose(2, 0, 1).astype(np.float32) return img, kpts, mask class ColorJitter(object): def __init__(self, brightness=None, contrast=None, saturation=None, hue=None, ): self.color_jitter = torchvision.transforms.ColorJitter( brightness=brightness, contrast=contrast, saturation=saturation, hue=hue,) def __call__(self, image, kpts, mask): image = np.asarray(self.color_jitter(Image.fromarray(np.ascontiguousarray(image, np.uint8)))) return image, kpts, mask class RandomBlur(object): def __init__(self, prob=0.5): self.prob = prob def __call__(self, image, kpts, mask): if random.random() < self.prob: sigma = np.random.choice([3, 5, 7, 9]) image = cv2.GaussianBlur(image, (sigma, sigma), 0) return image, kpts, mask def make_transforms(cfg, is_train): if is_train is True: transform = Compose( [ RandomBlur(0.5), ColorJitter(0.1, 0.1, 0.05, 0.05), # ToTensor(), # Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ] ) else: transform = Compose( [ # ToTensor(), # Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ] ) return transform ================================================ FILE: data/ycb/basic.py ================================================ import mmcv bop_ycb_idx2class={ 1: '002_master_chef_can', 2: '003_cracker_box', 3: '004_sugar_box', 4: '005_tomato_soup_can', 5: '006_mustard_bottle', 6: '007_tuna_fish_can', 7: '008_pudding_box', 8: '009_gelatin_box', 9: '010_potted_meat_can', 10: '011_banana', 11: '019_pitcher_base', 12: '021_bleach_cleanser', 13: '024_bowl', 14: '025_mug', 15: '035_power_drill', 16: '036_wood_block', 17: '037_scissors', 18: '040_large_marker', 19: '051_large_clamp', 20: '052_extra_large_clamp', 21: '061_foam_brick', } bop_ycb_class2idx = dict([[bop_ycb_idx2class[k],k ] for k in bop_ycb_idx2class.keys() ]) ================================================ FILE: doc/prepare_data.md ================================================ # Data Preparation Tips All the related data for data preparation can be downloaded [here](https://mycuhk-my.sharepoint.com/:f:/g/personal/1155139432_link_cuhk_edu_hk/EoXnZ96Tuy9PpYlZCvDN8vUBPdP1lP-PWQWiZH2KtIQoaQ?e=lpE472). You could download them first and then follow the instructions below for data preparation. ## Download Datasets First, the following dataset need to be downloaded and extracted to the folder *EXPDATA/* [LINEMOD](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155139432_link_cuhk_edu_hk/EYFaYrk0kcdBgC6WMtLJqP0B9Ar0_Nff9qhI2Cs95qDbdA?e=yYxexC) [LINEMOD_OCC_TEST](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155139432_link_cuhk_edu_hk/EUKcRnwyy9RGu2ASwA3QDXsBnMRrFP-U4X4Eqq-g_MhmIQ?e=hv6H2s) ## Synthetic Data Generation The preprocessed data following [DeepIM](https://github.com/liyi14/mx-DeepIM) and [PVNet](https://github.com/zju3dv/pvnet-rendering) can be downloaded from [LM6d_converted](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155139432_link_cuhk_edu_hk/EYFaYrk0kcdBgC6WMtLJqP0B9Ar0_Nff9qhI2Cs95qDbdA) and [raw_data](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155139432_link_cuhk_edu_hk/ESSFXi_7qs1AgNmty7_9y4AB8ffFsGJWOC3ikgD5BIeXHQ?e=qOmvds). After downloading, you should put the downloaded files into the folder *EXPDATA/* (lying in the repository's root directory). To create occluded objects during training, we follow [PVNet](https://github.com/zju3dv/pvnet-rendering) to randomly create occlusions. You could run the following scripts to transform the data format for our dataloader. ``` bash scripts/run_dataformatter.sh ``` The command above will automatically save the formatted data into *EXPDATA/*. ## Download the Object CAD Models You also need to download the [object models](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155139432_link_cuhk_edu_hk/EQScZuLrkPNPmN4eO3kePaUBjOe92EvbKb7kGJk2vKz-bA?e=8McAdh) and put the extracted folder *models* into *./EXPDATA/LM6d_converted/. ## Download Background Images [Pascal VOC 2012](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar) need to be downloaded to folder *EXPDATA/*. These images will be necessary for the random background generation for training. ## Download Initial Poses The initial poses estimated by PoseCNN and PVNet can be downloaded from [here](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155139432_link_cuhk_edu_hk/EQh5y0M_zHVMnbVszjEviCUBNAX_22MFN26Msa48XlJ5MQ?e=rfhT7k). The initial pose folder also should be put into the folder *EXPDATA/* ## Generate the Information Files Run the following script to generate the info files, which is put into the folder *EXPDATA/data_info/* ``` bash scripts/run_datainfo_generation.sh ``` After the the data preparation, the expected directory structure should be ``` ./EXPDATA |──LM6d_converted | |──LM6d_refine | |──LM6d_refine_syn | └──models |──LINEMOD | └──fuse_formatted |──lmo |──VOCdevkit |──raw_data |──init_poses └──data_info ``` ================================================ FILE: docker/Dockerfile ================================================ FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04 RUN apt-key del 7fa2af80 RUN apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub RUN rm /etc/apt/sources.list.d/cuda.list RUN rm /etc/apt/sources.list.d/nvidia-ml.list # Dependencies for glvnd and X11. RUN apt-get update RUN apt-get install -y -qq --no-install-recommends \ libglvnd0 \ libgl1 \ libglx0 \ libegl1 \ libxext6 \ libx11-6 \ && rm -rf /var/lib/apt/lists/* # Env vars for the nvidia-container-runtime. ENV NVIDIA_VISIBLE_DEVICES all ENV NVIDIA_DRIVER_CAPABILITIES graphics,utility,compute #env vars for cuda ENV CUDA_HOME /usr/local/cuda #install miniconda RUN apt-get update --fix-missing && \ apt-get install -y wget bzip2 ca-certificates curl git && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* RUN wget --quiet https://mirrors.tuna.tsinghua.edu.cn/anaconda/miniconda/Miniconda3-py37_4.9.2-Linux-x86_64.sh -O ~/miniconda.sh && \ /bin/bash ~/miniconda.sh -b -p /opt/miniconda3 && \ rm ~/miniconda.sh && \ /opt/miniconda3/bin/conda clean -tipsy && \ ln -s /opt/miniconda3/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ echo ". /opt/miniconda3/etc/profile.d/conda.sh" >> ~/.bashrc && \ echo "conda activate base" >> ~/.bashrc && \ echo "conda deactivate && conda activate py37" >> ~/.bashrc #https://blog.csdn.net/Mao_Jonah/article/details/89502380 COPY freeze.yml freeze.yml RUN /opt/miniconda3/bin/conda env create -n py37 -f freeze.yml # WORKDIR /tmp/ # COPY config.jupyter.tar config.jupyter.tar # RUN tar -xvf config.jupyter.tar -C /root/ #install apex ENV TORCH_CUDA_ARCH_LIST "6.0 6.2 7.0 7.2" # make sure we don't overwrite some existing directory called "apex" WORKDIR /tmp/unique_for_apex # uninstall Apex if present, twice to make absolutely sure :) RUN /opt/miniconda3/envs/py37/bin/pip3 uninstall -y apex || : RUN /opt/miniconda3/envs/py37/bin/pip3 uninstall -y apex || : # SHA is something the user can touch to force recreation of this Docker layer, # and therefore force cloning of the latest version of Apex RUN SHA=ToUcHMe git clone https://github.com/NVIDIA/apex.git WORKDIR /tmp/unique_for_apex/apex RUN /opt/miniconda3/envs/py37/bin/pip3 install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . #install pytorch3d # RUN /opt/miniconda3/envs/py37/bin/pip install pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py37_cu102_pyt171/download.html # RUN /opt/miniconda3/envs/py37/bin/pip install "git+https://github.com/facebookresearch/pytorch3d.git" # RUN /opt/miniconda3/bin/conda install pytorch3d==0.5.0 -c pytorch3d -n py37 #other pkgs RUN apt-get update \ && apt-get install -y -qq --no-install-recommends \ cmake build-essential vim xvfb unzip tmux psmisc \ libx11-dev libassimp-dev \ mesa-common-dev freeglut3-dev \ rsync \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* #create some directories RUN mkdir -p /home/RNNPose EXPOSE 8887 8888 8889 10000 10001 10002 WORKDIR /home/RNNPose ================================================ FILE: docker/freeze.yml ================================================ name: py37_tmp channels: - pytorch - pytorch3d - open3d-admin - bottler - iopath - fvcore - conda-forge - defaults dependencies: - pytorch3d=0.5.0 - _libgcc_mutex=0.1=main - _openmp_mutex=4.5=1_gnu - anyio=2.2.0=py37h06a4308_1 - argon2-cffi=20.1.0=py37h27cfd23_1 - async_generator=1.10=py37h28b3542_0 - attrs=21.2.0=pyhd3eb1b0_0 - babel=2.9.1=pyhd3eb1b0_0 - backcall=0.2.0=pyhd3eb1b0_0 - blas=1.0=mkl - bleach=3.3.0=pyhd3eb1b0_0 - brotlipy=0.7.0=py37h27cfd23_1003 - ca-certificates=2021.5.30=ha878542_0 - certifi=2021.5.30=py37h89c1867_0 - cffi=1.14.5=py37h261ae71_0 - charset-normalizer=2.0.4=pyhd3eb1b0_0 - cryptography=3.4.7=py37hd23ed53_0 - cudatoolkit=10.2.89=h8f6ccaa_8 - cycler=0.10.0=py37_0 - dbus=1.13.18=hb2f20db_0 - defusedxml=0.7.1=pyhd3eb1b0_0 - entrypoints=0.3=py37_0 - expat=2.4.1=h2531618_2 - fontconfig=2.13.1=h6c09931_0 - freetype=2.10.4=h5ab3b9f_0 - fvcore=0.1.5.post20210825=py37 - glib=2.68.2=h36276a3_0 - gst-plugins-base=1.14.0=h8213a91_2 - gstreamer=1.14.0=h28cd5cc_2 - icu=58.2=he6710b0_3 - idna=3.2=pyhd3eb1b0_0 - importlib-metadata=3.10.0=py37h06a4308_0 - importlib_metadata=3.10.0=hd3eb1b0_0 - intel-openmp=2021.2.0=h06a4308_610 - iopath=0.1.9=py37 - ipykernel=5.3.4=py37h5ca1d4c_0 - ipython=7.22.0=py37hb070fc8_0 - ipython_genutils=0.2.0=pyhd3eb1b0_1 - ipywidgets=7.6.3=pyhd3eb1b0_1 - jedi=0.17.0=py37_0 - jinja2=3.0.0=pyhd3eb1b0_0 - joblib=1.0.1=pyhd3eb1b0_0 - jpeg=9b=h024ee3a_2 - json5=0.9.6=pyhd3eb1b0_0 - jsonschema=3.2.0=py_2 - jupyter=1.0.0=py37h89c1867_6 - jupyter_client=6.1.12=pyhd3eb1b0_0 - jupyter_console=6.4.0=pyhd8ed1ab_0 - jupyter_core=4.7.1=py37h06a4308_0 - jupyter_server=1.4.1=py37h06a4308_0 - jupyterlab=3.0.16=pyhd8ed1ab_0 - jupyterlab_pygments=0.1.2=py_0 - jupyterlab_server=2.7.1=pyhd3eb1b0_0 - jupyterlab_widgets=1.0.0=pyhd3eb1b0_1 - kiwisolver=1.3.1=py37h2531618_0 - kornia=0.5.3=pyhd8ed1ab_0 - lcms2=2.12=h3be6417_0 - ld_impl_linux-64=2.35.1=h7274673_9 - libffi=3.3=he6710b0_2 - libgcc-ng=9.3.0=h5101ec6_17 - libgfortran-ng=7.5.0=ha8ba4b0_17 - libgfortran4=7.5.0=ha8ba4b0_17 - libgomp=9.3.0=h5101ec6_17 - libpng=1.6.37=hbc83047_0 - libsodium=1.0.18=h7b6447c_0 - libstdcxx-ng=9.3.0=hd4cf53a_17 - libtiff=4.2.0=h85742a9_0 - libuuid=1.0.3=h1bed415_2 - libuv=1.40.0=h7b6447c_0 - libwebp-base=1.2.0=h27cfd23_0 - libxcb=1.14=h7b6447c_0 - libxml2=2.9.10=hb55368b_3 - lz4-c=1.9.3=h2531618_0 - markupsafe=2.0.1=py37h27cfd23_0 - matplotlib=3.3.4=py37h06a4308_0 - matplotlib-base=3.3.4=py37h62a2d02_0 - mistune=0.8.4=py37h14c3975_1001 - mkl=2021.2.0=h06a4308_296 - mkl-service=2.3.0=py37h27cfd23_1 - mkl_fft=1.3.0=py37h42c9631_2 - mkl_random=1.2.1=py37ha9443f7_2 - nbclassic=0.2.6=pyhd3eb1b0_0 - nbclient=0.5.3=pyhd3eb1b0_0 - nbconvert=6.0.7=py37_0 - nbformat=5.1.3=pyhd3eb1b0_0 - ncurses=6.2=he6710b0_1 - nest-asyncio=1.5.1=pyhd3eb1b0_0 - ninja=1.10.2=hff7bd54_1 - notebook=6.4.0=py37h06a4308_0 - numpy=1.20.2=py37h2d18471_0 - numpy-base=1.20.2=py37hfae3a4d_0 - nvidiacub=1.10.0=0 - olefile=0.46=py37_0 - open3d=0.13.0=py37_0 - openssl=1.1.1k=h7f98852_0 - packaging=20.9=pyhd3eb1b0_0 - pandas=1.2.4=py37h2531618_0 - pandoc=2.12=h06a4308_0 - pandocfilters=1.4.3=py37h06a4308_1 - parso=0.8.2=pyhd3eb1b0_0 - pcre=8.44=he6710b0_0 - pexpect=4.8.0=pyhd3eb1b0_3 - pickleshare=0.7.5=pyhd3eb1b0_1003 - pillow=8.2.0=py37he98fc37_0 - pip=21.1.2=py37h06a4308_0 - plyfile=0.7.4=pyhd8ed1ab_0 - portalocker=2.3.0=py37h06a4308_0 - prometheus_client=0.11.0=pyhd3eb1b0_0 - prompt-toolkit=3.0.17=pyh06a4308_0 - prompt_toolkit=3.0.17=hd3eb1b0_0 - ptyprocess=0.7.0=pyhd3eb1b0_2 - pycparser=2.20=py_2 - pygments=2.9.0=pyhd3eb1b0_0 - pyopenssl=20.0.1=pyhd3eb1b0_1 - pyparsing=2.4.7=pyhd3eb1b0_0 - pyqt=5.9.2=py37h05f1152_2 - pyrsistent=0.17.3=py37h7b6447c_0 - pysocks=1.7.1=py37_1 - python=3.7.10=h12debd9_4 - python-dateutil=2.8.1=pyhd3eb1b0_0 - python_abi=3.7=1_cp37m - pytorch=1.7.1=py3.7_cuda10.2.89_cudnn7.6.5_0 - pytz=2021.1=pyhd3eb1b0_0 - pyzmq=20.0.0=py37h2531618_1 - qt=5.9.7=h5867ecd_1 - qtconsole=5.1.1=pyhd8ed1ab_0 - qtpy=1.10.0=pyhd8ed1ab_0 - readline=8.1=h27cfd23_0 - requests=2.26.0=pyhd3eb1b0_0 - scikit-learn=0.24.2=py37ha9443f7_0 - scipy=1.6.2=py37had2a1c9_1 - send2trash=1.5.0=pyhd3eb1b0_1 - setuptools=52.0.0=py37h06a4308_0 - sip=4.19.8=py37hf484d3e_0 - six=1.16.0=pyhd3eb1b0_0 - sniffio=1.2.0=py37h06a4308_1 - sqlite=3.35.4=hdfb4753_0 - tabulate=0.8.9=py37h06a4308_0 - terminado=0.9.4=py37h06a4308_0 - testpath=0.4.4=pyhd3eb1b0_0 - threadpoolctl=2.1.0=pyh5ca1d4c_0 - tk=8.6.10=hbc83047_0 - torchvision=0.8.2=py37_cu102 - tornado=6.1=py37h27cfd23_0 - traitlets=5.0.5=pyhd3eb1b0_0 - typing_extensions=3.7.4.3=pyha847dfd_0 - wcwidth=0.2.5=py_0 - webencodings=0.5.1=py37_1 - wheel=0.36.2=pyhd3eb1b0_0 - widgetsnbextension=3.5.1=py37_0 - xz=5.2.5=h7b6447c_0 - yacs=0.1.6=py_0 - yaml=0.2.5=h7b6447c_0 - zeromq=4.3.4=h2531618_0 - zipp=3.4.1=pyhd3eb1b0_0 - zlib=1.2.11=h7b6447c_3 - zstd=1.4.9=haebb681_0 - pip: - absl-py==0.13.0 - addict==2.4.0 - anykeystore==0.2 - cachetools==4.2.2 - cryptacular==1.5.5 - cython==0.29.24 - decorator==4.4.2 - easydict==1.9 - einops==0.3.0 - fire==0.4.0 - flow-vis==0.1 - freetype-py==2.2.0 - future==0.18.2 - glumpy==1.2.0 - google-auth==1.31.0 - google-auth-oauthlib==0.4.4 - greenlet==1.1.0 - grpcio==1.38.0 - hupper==1.10.3 - imageio==2.9.0 - llvmlite==0.36.0 - loguru==0.5.3 - markdown==3.3.4 - networkx==2.5.1 - numba==0.53.1 - numpy-quaternion==2021.6.9.13.34.11 - oauthlib==3.1.1 - opencv-python==4.5.2.54 - pastedeploy==2.1.1 - pbkdf2==1.3 - plaster==1.0 - plaster-pastedeploy==0.7 - protobuf==3.17.3 - pyasn1==0.4.8 - pyasn1-modules==0.2.8 - pyassimp==4.1.3 - pyglet==1.5.17 - pyopengl==3.1.5 - pyopengl-accelerate==3.1.5 - pyramid==2.0 - pyramid-mailer==0.15.1 - python3-openid==3.2.0 - pywavelets==1.1.1 - pyyaml==5.4.1 - repoze-sendmail==4.4.1 - requests-oauthlib==1.3.0 - rsa==4.7.2 - scikit-image==0.18.1 - sqlalchemy==1.4.18 - tensorboard==2.5.0 - tensorboard-data-server==0.6.1 - tensorboard-plugin-wit==1.8.0 - tensorboardx==2.2 - termcolor==1.1.0 - tifffile==2021.6.14 - tqdm==4.61.1 - transaction==3.0.1 - transforms3d==0.3.1 - translationstring==1.4 - triangle==20200424 - urllib3==1.26.5 - velruse==1.1.1 - venusian==3.0.0 - vispy==0.6.6 - webob==1.8.7 - werkzeug==2.0.1 - zope-deprecation==4.4.0 - zope-interface==5.4.0 - mmcv prefix: /opt/miniconda3/envs/py37_tmp ================================================ FILE: geometry/__init__.py ================================================ ================================================ FILE: geometry/cholesky.py ================================================ # import tensorflow as tf import torch #as tf import numpy as np # from utils.einsum import einsum from torch import einsum class _cholesky_solve(torch.autograd.Function): @staticmethod def forward(ctx, H, b): chol = torch.cholesky(H) xx = torch.cholesky_solve(b, chol) ctx.save_for_backward(chol, xx) return xx # see OptNet: https://arxiv.org/pdf/1703.00443.pdf @staticmethod def backward(ctx, dx): chol, xx = ctx.saved_tensors dz = torch.cholesky_solve(dx, chol) xs = torch.squeeze(xx, -1) zs = torch.squeeze(dz, -1) dH = -einsum('...i,...j->...ij', xs, zs) return dH, dz def cholesky_solve(H, b): return _cholesky_solve.apply(H,b) def solve(H, b, max_update=1.0): """ Solves the linear system Hx = b, H > 0""" # small system, solve on cpu H = H.to(dtype=torch.float64) b = b.to(dtype=torch.float64) b = torch.unsqueeze(b, -1) x = cholesky_solve(H, b) # replaces nans and clip large updates bad_values = torch.isnan(x) x = torch.where(bad_values, torch.zeros_like(x), x) x = torch.clamp(x, -max_update, max_update) x = torch.squeeze(x, -1) x = x.to(dtype=torch.float32) return x def __test__(): import numpy as np np.random.seed(0) M=np.random.uniform(size=(3,3)) H=torch.tensor(M@M.transpose(-1,-2), requires_grad=True ) b=torch.tensor(np.random.uniform(size=(3,) ), requires_grad=True ) x= solve(H,b ) x.backward(torch.ones_like(x) ) print(f"H={H}, b={b}, x={x}, grad={H.grad, b.grad}") if __name__=="__main__": __test__() ================================================ FILE: geometry/diff_render.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F import numpy from pytorch3d.renderer import ( OpenGLPerspectiveCameras, look_at_view_transform, look_at_rotation, RasterizationSettings, MeshRenderer, MeshRasterizer, BlendParams, camera_position_from_spherical_angles, HardPhongShader, PointLights,FoVPerspectiveCameras, PerspectiveCameras, SoftPhongShader, Materials ) try: from pytorch3d.structures import Meshes, Textures use_textures = True except: from pytorch3d.structures import Meshes from pytorch3d.renderer import TexturesVertex from pytorch3d.renderer import TexturesVertex as Textures use_textures = False import pytorch3d.renderer.mesh.utils as utils from pytorch3d.io import load_obj, load_ply, load_objs_as_meshes from pytorch3d.renderer.mesh.rasterizer import Fragments from plyfile import PlyData from utils.furthest_point_sample import fragmentation_fps import time def rasterize(R, T, meshes, rasterizer, blur_radius=0): # It will automatically update the camera settings -> R, T in rasterizer.camera fragments = rasterizer(meshes, R=R, T=T) # Copy from pytorch3D source code, try if it is necessary to do gradient decent if blur_radius > 0.0: clipped_bary_coords = utils._clip_barycentric_coordinates( fragments.bary_coords ) clipped_zbuf = utils._interpolate_zbuf( fragments.pix_to_face, clipped_bary_coords, meshes ) fragments = Fragments( bary_coords=clipped_bary_coords, zbuf=clipped_zbuf, dists=fragments.dists, pix_to_face=fragments.pix_to_face, ) return fragments def set_bary_coords_to_nearest(bary_coords_): ori_shape = bary_coords_.shape exr = bary_coords_ * (bary_coords_ < 0) bary_coords_ = bary_coords_.view(-1, bary_coords_.shape[-1]) arg_max_idx = bary_coords_.argmax(1) return torch.zeros_like(bary_coords_).scatter(1, arg_max_idx.unsqueeze(1), 1.0).view(*ori_shape) + exr class MeshRendererWithDepth(nn.Module): def __init__(self, rasterizer, shader): super().__init__() self.rasterizer = rasterizer self.shader = shader def to(self, device): # Rasterizer and shader have submodules which are not of type nn.Module self.rasterizer.to(device) self.shader.to(device) return self def forward(self, meshes_world, **kwargs) -> torch.Tensor: fragments = self.rasterizer(meshes_world, **kwargs) images = self.shader(fragments, meshes_world, **kwargs) return images, fragments.zbuf class DiffRender(nn.Module): def __init__(self, mesh_path, render_texture=False): super().__init__() # self.mesh = mesh if mesh_path.endswith('.ply'): verts, faces = load_ply(mesh_path) self.mesh = Meshes(verts=[verts], faces=[faces]) elif mesh_path.endswith('.obj'): verts, faces,_ = load_obj(mesh_path) faces=faces.verts_idx self.mesh=load_objs_as_meshes([mesh_path]) self.verts = verts self.faces = faces self.cam_opencv2pytch3d = torch.tensor( [[-1,0,0,0], [0,-1,0, 0], [0,0, 1, 0], [0,0, 0, 1]], dtype=torch.float32 ) self.render_texture = render_texture #get patch infos self.pat_centers, self.pat_center_inds, self.vert_frag_ids= fragmentation_fps(verts.detach().cpu().numpy(), 64) self.pat_centers = torch.from_numpy(self.pat_centers) self.pat_center_inds = torch.from_numpy(self.pat_center_inds) self.vert_frag_ids = torch.from_numpy(self.vert_frag_ids)[...,None] #Nx1 def to(self, *args, **kwargs): if 'device' in kwargs.keys(): device = kwargs['device'] else: device = args[0] super().to(device) self.mesh = self.mesh.to(device) self.verts = self.verts.to(device) self.faces = self.faces.to(device) self.pat_centers = self.pat_centers.to(device) self.pat_center_inds = self.pat_center_inds.to(device) self.vert_frag_ids = self.vert_frag_ids.to(device) return self def get_patch_center_depths(self, T, K): #no need to pre-transform, as here we do not use pytorch3d rendering # T = self.cam_opencv2pytch3d.to(device=T.device)@T ## X_cam = X_world R + t R = T[...,:3,:3].transpose(-1,-2) t = T[...,:3,3] #render depths X_cam= (self.pat_centers@R+t) #BxKx3 depth= X_cam[...,2:] #BxKx1 x=X_cam@K.transpose(-1,-2) #BxNx3 x = x/x[...,-1:] img_coords= x[...,:2] return depth, img_coords # Calculate interpolated maps -> [n, c, h, w] # face_memory.shape: [n_face, 3, c] @staticmethod def forward_interpolate(R, t, meshes, face_memory, rasterizer, blur_radius=0, mode='bilinear', return_depth=True): fragments = rasterize(R, t, meshes, rasterizer, blur_radius=blur_radius) # [n, h, w, 1, d] if mode == 'nearest': out_map = utils.interpolate_face_attributes(fragments.pix_to_face, set_bary_coords_to_nearest(fragments.bary_coords), face_memory) else: out_map = utils.interpolate_face_attributes(fragments.pix_to_face, fragments.bary_coords, face_memory) out_map = out_map.squeeze(dim=3) out_map = out_map.transpose(3, 2).transpose(2, 1) if return_depth: return out_map, fragments.zbuf.permute(0,3,1,2) # depth else: return out_map def render_mesh(self, T, K, render_image_size, near=0.1, far=6, lights=(1,1,-1) ): B=T.shape[0] device = T.device T = self.cam_opencv2pytch3d.to(device=T.device)@T ## X_cam = X_world R + t R = T[...,:3,:3].transpose(-1,-2) t = T[...,:3,3] cameras = PerspectiveCameras(focal_length= torch.stack([K[:,0,0], K[:,1,1] ], dim=-1), principal_point=K[:,:2,2], R=R, T=t, image_size=[render_image_size]*B, in_ndc=False, device=device) lights = PointLights(device=device, location=[lights]) raster_settings = RasterizationSettings( image_size=render_image_size, blur_radius=0.0, faces_per_pixel=1, bin_size=None, #0 perspective_correct=True ) materials = Materials( device=device, # specular_color=[[0.0, 1.0, 0.0]], shininess=0 ) renderer = MeshRendererWithDepth( rasterizer=MeshRasterizer( cameras=cameras, raster_settings=raster_settings ), shader=SoftPhongShader( device=device, cameras=cameras, lights=lights, blend_params=BlendParams(1e-4, 1e-4, (0, 0, 0)) ) ) image,depth =renderer(self.mesh, lights=lights, materials=materials) return image.permute(0,3,1,2)[:,:3], depth.permute(0,3,1,2) # to BCHW def render_offset_map(self, T, K, render_image_size, near=0.1, far=6): yy, xx = torch.meshgrid(torch.arange(render_image_size[0], device=T.device), torch.arange(render_image_size[1], device=T.device) ) # xx = xx.to(dtype=torch.float32) # yy = yy.to(dtype=torch.float32) coords_grid = torch.stack( [ xx.to(dtype=torch.float32), yy.to(dtype=torch.float32)], dim=-1 ) #no need to pre-transform, as here we do not use pytorch3d rendering # T = self.cam_opencv2pytch3d.to(device=T.device)@T ## X_cam = X_world R + t R = T[...,:3,:3].transpose(-1,-2) t = T[...,:3,3] #render depths X_cam= (self.pat_centers@R+t) x=X_cam@K.transpose(-1,-2) #BxNx3 x = x/x[...,-1:] offset = x[...,None,None,:2] - coords_grid #BxNx1x1x2-HxWx2 return offset.permute(0,1,4,2,3) #BxNx2xHxW def forward(self, vert_attribute, T, K, render_image_size, near=0.1, far=6, mode='bilinear') : """ Args: vert_attribute: (N,C) T: (B,3,4) or (B,4,4) K: (B,3,3) render_image_size (tuple): (h,w) near (float, optional): Defaults to 0.1. far (int, optional): Defaults to 6. """ if vert_attribute is None: return self.render_mesh(T, K, render_image_size, near=0.1, far=6 ) if self.render_texture: ren_tex=self.render_mesh(T, K, render_image_size, near=0.1, far=6 ) B=T.shape[0] face_attribute = vert_attribute[self.faces.long()] device = T.device T = self.cam_opencv2pytch3d.to(device=T.device)@T ## X_cam = X_world R + t R = T[...,:3,:3].transpose(-1,-2) t = T[...,:3,3] # t = -(R@T[...,:3,3:]).squeeze(-1) cameras = PerspectiveCameras(focal_length= torch.stack([K[:,0,0], K[:,1,1] ], dim=-1), principal_point=K[:,:2,2], image_size=[render_image_size]*B, in_ndc=False, device=device) raster_settings = RasterizationSettings( image_size=render_image_size, blur_radius=0.0, faces_per_pixel=1, bin_size=None, #0 perspective_correct=True ) rasterizer = MeshRasterizer( cameras=cameras, raster_settings=raster_settings ) out_map, out_depth=self.forward_interpolate(R, t, self.mesh, face_attribute, rasterizer, blur_radius=0, mode=mode) if not self.render_texture: return out_map, out_depth else: return torch.cat([ren_tex[0], out_map ], dim=1), out_depth def render_depth(self, T, K, render_image_size, near=0.1, far=6, mode='neareast'): """ Args: T: (B,3,4) or (B,4,4) K: (B,3,3) render_image_size (tuple): (h,w) near (float, optional): Defaults to 0.1. far (int, optional): Defaults to 6. mode: 'bilinear' or 'neareast' """ B=T.shape[0] device = T.device T = self.cam_opencv2pytch3d.to(device=T.device)@T ## X_cam = X_world R + t R = T[...,:3,:3].transpose(-1,-2) t = T[...,:3,3] cameras = PerspectiveCameras(focal_length= torch.stack([K[:,0,0], K[:,1,1] ], dim=-1), principal_point=K[:,:2,2], image_size=[render_image_size]*B, in_ndc=False, device=device) raster_settings = RasterizationSettings( image_size=render_image_size, blur_radius=0.0, faces_per_pixel=1, bin_size=0 ) rasterizer = MeshRasterizer( cameras=cameras, raster_settings=raster_settings ) #render depths vert_depths= (self.verts@R+t).squeeze(0)[...,2:] face_depths = vert_depths[self.faces.long()] out_depth=self.forward_interpolate(R, t, self.mesh, face_depths, rasterizer, blur_radius=0, mode='nearest', return_depth=False) return out_depth class DiffRendererWrapper(nn.Module): def __init__(self, obj_paths, device="cuda", render_texture=False ): super().__init__() self.renderers = [] for obj_path in obj_paths: self.renderers.append( DiffRender(obj_path, render_texture).to(device=device) ) self.renderers=nn.ModuleList(self.renderers) self.cls2idx=None #could be updated outside def get_patch_center_depths(self, model_names, T, K): depths= [] image_coords= [] for b,_ in enumerate(model_names): model_idx = self.cls2idx[model_names[b]] depth, img_coord = self.renderers[model_idx].get_patch_center_depths(T[b:b+1], K ) depths.append(depth) image_coords.append(img_coord) return torch.cat(depths, dim=0), torch.cat(image_coords, dim=0) def render_offset_map(self, model_names, T, K, render_image_size, near=0.1, far=6): offsets= [] for b,_ in enumerate(model_names): model_idx = self.cls2idx[model_names[b]] offset = self.renderers[model_idx].render_offset_map(T[b:b+1], K[b:b+1], render_image_size, near, far ) offsets.append(offset) return torch.cat(offsets, dim=0) def render_pat_id(self, model_names, T, K, render_image_size, near=0.1, far=6): pat_ids= [] for b,_ in enumerate(model_names): model_idx = self.cls2idx[model_names[b]] pat_id,_ = self.renderers[model_idx].forward(self.renderers[model_idx].vert_frag_ids.float()+1,T[b:b+1], K[b:b+1], render_image_size, near, far, 'nearest' ) pat_ids.append(pat_id-1) #+1 -1, set invalid parts as -1's return torch.cat(pat_ids, dim=0) def render_depth(self, model_names, T, K, render_image_size, near=0.1, far=6): depth_outputs= [] for b,_ in enumerate(model_names): model_idx = self.cls2idx[model_names[b]] depth = self.renderers[model_idx].render_depth( T[b:b+1], K[b:b+1], render_image_size, near, far, 'nearest' ) depth_outputs.append(depth) return torch.cat(depth_outputs, dim=0) def forward(self, model_names, vert_attribute, T, K, render_image_size, near=0.1, far=6): map_outputs= [] depth_outputs= [] for b,_ in enumerate(model_names): model_idx = self.cls2idx[model_names[b]] feamap, depth= self.renderers[model_idx]( vert_attribute[b], T[b:b+1], K[b:b+1], render_image_size, near, far ) map_outputs.append(feamap) depth_outputs.append(depth) return torch.cat(map_outputs, dim=0) , torch.cat(depth_outputs, dim=0) ================================================ FILE: geometry/diff_render_optim.py ================================================ ## Speed optimized: sharing the rasterization among different rendering process import torch import torch.nn as nn import torch.nn.functional as F import numpy from pytorch3d.renderer import ( OpenGLPerspectiveCameras, look_at_view_transform, look_at_rotation, RasterizationSettings, MeshRenderer, MeshRasterizer, BlendParams, camera_position_from_spherical_angles, HardPhongShader, PointLights,FoVPerspectiveCameras, PerspectiveCameras, SoftPhongShader, Materials ) try: from pytorch3d.structures import Meshes, Textures use_textures = True except: from pytorch3d.structures import Meshes from pytorch3d.renderer import TexturesVertex from pytorch3d.renderer import TexturesVertex as Textures use_textures = False import pytorch3d.renderer.mesh.utils as utils from pytorch3d.io import load_obj, load_ply, load_objs_as_meshes from pytorch3d.renderer.mesh.rasterizer import Fragments from plyfile import PlyData from utils.furthest_point_sample import fragmentation_fps def rasterize(R, T, meshes, rasterizer, blur_radius=0): # It will automatically update the camera settings -> R, T in rasterizer.camera fragments = rasterizer(meshes, R=R, T=T) # Copy from pytorch3D source code, try if it is necessary to do gradient decent if blur_radius > 0.0: clipped_bary_coords = utils._clip_barycentric_coordinates( fragments.bary_coords ) clipped_zbuf = utils._interpolate_zbuf( fragments.pix_to_face, clipped_bary_coords, meshes ) fragments = Fragments( bary_coords=clipped_bary_coords, zbuf=clipped_zbuf, dists=fragments.dists, pix_to_face=fragments.pix_to_face, ) return fragments def set_bary_coords_to_nearest(bary_coords_): ori_shape = bary_coords_.shape exr = bary_coords_ * (bary_coords_ < 0) bary_coords_ = bary_coords_.view(-1, bary_coords_.shape[-1]) arg_max_idx = bary_coords_.argmax(1) return torch.zeros_like(bary_coords_).scatter(1, arg_max_idx.unsqueeze(1), 1.0).view(*ori_shape) + exr class MeshRendererWithDepth(nn.Module): def __init__(self, rasterizer, shader): super().__init__() self.rasterizer = rasterizer self.shader = shader def to(self, device): # Rasterizer and shader have submodules which are not of type nn.Module self.rasterizer.to(device) self.shader.to(device) return self def forward(self, meshes_world, **kwargs) -> torch.Tensor: fragments = self.rasterizer(meshes_world, **kwargs) images = self.shader(fragments, meshes_world, **kwargs) return images, fragments.zbuf class MeshRendererWithDepth_v2(nn.Module): def __init__(self, rasterizer, shader): super().__init__() self.rasterizer = rasterizer self.shader = shader # def to(self, device): def to(self, *args, **kwargs): if 'device' in kwargs.keys(): device = kwargs['device'] else: device = args[0] super().to(device) # Rasterizer and shader have submodules which are not of type nn.Module self.rasterizer.to(device) self.shader.to(device) return self def forward(self, meshes_world, **kwargs) -> torch.Tensor: if 'fragments' not in kwargs.keys() or kwargs['fragments'] is None: # sharing fragment results with others for speed, as the rasterizing process occupies most of time if 'fragments' in kwargs: del kwargs['fragments'] fragments = self.rasterizer(meshes_world, **kwargs) else: fragments = kwargs['fragments'] del kwargs['fragments'] images = self.shader(fragments, meshes_world, **kwargs) return images, fragments.zbuf class DiffRender(nn.Module): def __init__(self, mesh_path, render_texture=False): super().__init__() # self.mesh = mesh if mesh_path.endswith('.ply'): verts, faces = load_ply(mesh_path) self.mesh = Meshes(verts=[verts], faces=[faces]) elif mesh_path.endswith('.obj'): verts, faces,_ = load_obj(mesh_path) # import pdb; pdb.set_trace() faces=faces.verts_idx self.mesh=load_objs_as_meshes([mesh_path]) # self.mesh = Meshes(verts=verts, faces=faces, textures=None) self.verts = verts self.faces = faces # self.mesh = Meshes(verts=[verts], faces=[faces]) # self.feature=feature self.cam_opencv2pytch3d = torch.tensor( [[-1,0,0,0], [0,-1,0, 0], [0,0, 1, 0], [0,0, 0, 1]], dtype=torch.float32 ) self.render_texture = render_texture #get patch infos self.pat_centers, self.pat_center_inds, self.vert_frag_ids= fragmentation_fps(verts.detach().cpu().numpy(), 64) self.pat_centers = torch.from_numpy(self.pat_centers) self.pat_center_inds = torch.from_numpy(self.pat_center_inds) self.vert_frag_ids = torch.from_numpy(self.vert_frag_ids)[...,None] #Nx1 def to(self, *args, **kwargs): if 'device' in kwargs.keys(): device = kwargs['device'] else: device = args[0] super().to(device) # self.rasterizer.cameras = self.rasterizer.cameras.to(device) # self.face_memory = self.face_memory.to(device) self.mesh = self.mesh.to(device) self.verts = self.verts.to(device) self.faces = self.faces.to(device) self.pat_centers = self.pat_centers.to(device) self.pat_center_inds = self.pat_center_inds.to(device) self.vert_frag_ids = self.vert_frag_ids.to(device) # self.cam_opencv2pytch3d = self.cam_opencv2pytch3d.to(device=device) return self def get_patch_center_depths(self, T, K): #no need to pre-transform, as here we do not use pytorch3d rendering # T = self.cam_opencv2pytch3d.to(device=T.device)@T ## X_cam = X_world R + t R = T[...,:3,:3].transpose(-1,-2) t = T[...,:3,3] #render depths X_cam= (self.pat_centers@R+t) #BxKx3 depth= X_cam[...,2:] #BxKx1 x=X_cam@K.transpose(-1,-2) #BxNx3 x = x/x[...,-1:] img_coords= x[...,:2] return depth, img_coords # Calculate interpolated maps -> [n, c, h, w] # face_memory.shape: [n_face, 3, c] @staticmethod def forward_interpolate(R, t, meshes, face_memory, rasterizer, blur_radius=0, mode='bilinear', return_depth=True): fragments = rasterize(R, t, meshes, rasterizer, blur_radius=blur_radius) # [n, h, w, 1, d] if mode == 'nearest': out_map = utils.interpolate_face_attributes(fragments.pix_to_face, set_bary_coords_to_nearest(fragments.bary_coords), face_memory) else: out_map = utils.interpolate_face_attributes(fragments.pix_to_face, fragments.bary_coords, face_memory) out_map = out_map.squeeze(dim=3) out_map = out_map.transpose(3, 2).transpose(2, 1) if return_depth: return out_map, fragments.zbuf.permute(0,3,1,2), fragments # depth else: return out_map, fragments def render_mesh(self, T, K, render_image_size, near=0.1, far=6, lights=(1,1,-1), fragments=None ): B=T.shape[0] # face_attribute = vert_attribute[self.faces.long()] device = T.device T = self.cam_opencv2pytch3d.to(device=T.device)@T ## X_cam = X_world R + t R = T[...,:3,:3].transpose(-1,-2) t = T[...,:3,3] cameras = PerspectiveCameras(focal_length= torch.stack([K[:,0,0], K[:,1,1] ], dim=-1), principal_point=K[:,:2,2], R=R, T=t, image_size=[render_image_size]*B, in_ndc=False, device=device) lights = PointLights(device=device, location=[lights]) raster_settings = RasterizationSettings( image_size=render_image_size, blur_radius=0.0, faces_per_pixel=1, #5, bin_size=None, #0 perspective_correct=True ) materials = Materials( device=device, # specular_color=[[0.0, 1.0, 0.0]], shininess=0 ) # renderer = MeshRendererWithDepth( renderer = MeshRendererWithDepth_v2( rasterizer=MeshRasterizer( cameras=cameras, raster_settings=raster_settings ), shader=SoftPhongShader( # shader=SoftGouraudShader( device=device, cameras=cameras, lights=lights, blend_params=BlendParams(1e-4, 1e-4, (0, 0, 0)) ) ) image,depth =renderer(self.mesh, lights=lights, materials=materials, fragments=fragments) return image.permute(0,3,1,2)[:,:3], depth.permute(0,3,1,2) # to BCHW def render_offset_map(self, T, K, render_image_size, near=0.1, far=6): yy, xx = torch.meshgrid(torch.arange(render_image_size[0], device=T.device), torch.arange(render_image_size[1], device=T.device) ) # xx = xx.to(dtype=torch.float32) # yy = yy.to(dtype=torch.float32) coords_grid = torch.stack( [ xx.to(dtype=torch.float32), yy.to(dtype=torch.float32)], dim=-1 ) #no need to pre-transform, as here we do not use pytorch3d rendering # T = self.cam_opencv2pytch3d.to(device=T.device)@T ## X_cam = X_world R + t R = T[...,:3,:3].transpose(-1,-2) t = T[...,:3,3] #render depths X_cam= (self.pat_centers@R+t)#.squeeze(0)[...,2:] x=X_cam@K.transpose(-1,-2) #BxNx3 x = x/x[...,-1:] offset = x[...,None,None,:2] - coords_grid #BxNx1x1x2-HxWx2 return offset.permute(0,1,4,2,3) #BxNx2xHxW # def forward(self, face_attribute, T, K, render_image_size, near=0.1, far=6): def forward(self, vert_attribute, T, K, render_image_size, near=0.1, far=6, render_texture=None, mode='bilinear') : """ Args: vert_attribute: (N,C) T: (B,3,4) or (B,4,4) K: (B,3,3) render_image_size (tuple): (h,w) near (float, optional): Defaults to 0.1. far (int, optional): Defaults to 6. """ # use default rendering settings if render_texture is None: render_texture= self.render_texture if vert_attribute is None: # only render the rgb image return self.render_mesh(T, K, render_image_size, near=0.1, far=6 ) B=T.shape[0] face_attribute = vert_attribute[self.faces.long()] device = T.device T = self.cam_opencv2pytch3d.to(device=T.device)@T ## X_cam = X_world R + t R = T[...,:3,:3].transpose(-1,-2) t = T[...,:3,3] # t = -(R@T[...,:3,3:]).squeeze(-1) cameras = PerspectiveCameras(focal_length= torch.stack([K[:,0,0], K[:,1,1] ], dim=-1), principal_point=K[:,:2,2], image_size=[render_image_size]*B, in_ndc=False, device=device) raster_settings = RasterizationSettings( image_size=render_image_size, blur_radius=0.0, faces_per_pixel=1, bin_size=None, #0 perspective_correct=True ) rasterizer = MeshRasterizer( cameras=cameras, raster_settings=raster_settings ) # forward_interpolate(R, T, meshes, face_memory, rasterizer, blur_radius=0, mode='bilinear') out_map, out_depth, fragments=self.forward_interpolate(R, t, self.mesh, face_attribute, rasterizer, blur_radius=0, mode=mode) if not render_texture: return out_map, out_depth else: ren_tex=self.render_mesh(T, K, render_image_size, near=0.1, far=6, fragments=fragments ) #The first 3 channels contain the rendered textures return torch.cat([ren_tex[0], out_map ], dim=1), out_depth def render_depth(self, T, K, render_image_size, near=0.1, far=6, mode='neareast'): """ Args: T: (B,3,4) or (B,4,4) K: (B,3,3) render_image_size (tuple): (h,w) near (float, optional): Defaults to 0.1. far (int, optional): Defaults to 6. mode: 'bilinear' or 'neareast' """ B=T.shape[0] device = T.device T = self.cam_opencv2pytch3d.to(device=T.device)@T ## X_cam = X_world R + t R = T[...,:3,:3].transpose(-1,-2) t = T[...,:3,3] cameras = PerspectiveCameras(focal_length= torch.stack([K[:,0,0], K[:,1,1] ], dim=-1), principal_point=K[:,:2,2], image_size=[render_image_size]*B, in_ndc=False, device=device) raster_settings = RasterizationSettings( image_size=render_image_size, blur_radius=0.0, faces_per_pixel=1, bin_size=0 ) rasterizer = MeshRasterizer( cameras=cameras, raster_settings=raster_settings ) #render depths vert_depths= (self.verts@R+t).squeeze(0)[...,2:] face_depths = vert_depths[self.faces.long()] out_depth, _ =self.forward_interpolate(R, t, self.mesh, face_depths, rasterizer, blur_radius=0, mode='nearest', return_depth=False) return out_depth def render_pointcloud(self, T, K, render_image_size, near=0.1, far=6): """ Args: T: (B,3,4) or (B,4,4) K: (B,3,3) render_image_size (tuple): (h,w) near (float, optional): Defaults to 0.1. far (int, optional): Defaults to 6. mode: 'bilinear' or 'neareast' """ B=T.shape[0] device = T.device # T = self.cam_opencv2pytch3d.to(device=T.device)@T ## X_cam = X_world R + t R = T[...,:3,:3].transpose(-1,-2) t = T[...,:3,3] #render depths # vert_depths= (self.verts@R+t).squeeze(0)[...,2:] X_cam= (self.verts@R+t)#.squeeze(0) x=X_cam@K.transpose(-1,-2) #BxNx3 depth = x[...,-1] x = x/x[...,-1:] out = torch.zeros([1,1, *render_image_size], dtype=R.dtype, device=R.device) out[:, :, torch.round(x[0, :, 1]).long().clamp(0, out.shape[2]-1), torch.round(x[0, :, 0]).long().clamp(0, out.shape[3]-1)] = depth return out #1x1xHxW class DiffRendererWrapper(nn.Module): def __init__(self, obj_paths, device="cuda", render_texture=False ): super().__init__() self.renderers = [] for obj_path in obj_paths: self.renderers.append( DiffRender(obj_path, render_texture).to(device=device) ) self.renderers=nn.ModuleList(self.renderers) self.cls2idx=None #updated outside def get_patch_center_depths(self, model_names, T, K): depths= [] image_coords= [] for b,_ in enumerate(model_names): model_idx = self.cls2idx[model_names[b]] depth, img_coord = self.renderers[model_idx].get_patch_center_depths(T[b:b+1], K ) depths.append(depth) image_coords.append(img_coord) return torch.cat(depths, dim=0), torch.cat(image_coords, dim=0) def render_offset_map(self, model_names, T, K, render_image_size, near=0.1, far=6): offsets= [] for b,_ in enumerate(model_names): model_idx = self.cls2idx[model_names[b]] offset = self.renderers[model_idx].render_offset_map(T[b:b+1], K[b:b+1], render_image_size, near, far ) offsets.append(offset) return torch.cat(offsets, dim=0) def render_pat_id(self, model_names, T, K, render_image_size, near=0.1, far=6): pat_ids= [] for b,_ in enumerate(model_names): model_idx = self.cls2idx[model_names[b]] # face_pat_id = self.renderers[model_idx].vert_frag_ids[self.renderers[model_idx].faces.long()] pat_id,_ = self.renderers[model_idx].forward(self.renderers[model_idx].vert_frag_ids.float()+1,T[b:b+1], K[b:b+1], render_image_size, near, far, 'nearest' ) pat_ids.append(pat_id-1) #+1 -1, set invalid parts as -1's return torch.cat(pat_ids, dim=0) def render_depth(self, model_names, T, K, render_image_size, near=0.1, far=6): depth_outputs= [] for b,_ in enumerate(model_names): model_idx = self.cls2idx[model_names[b]] depth = self.renderers[model_idx].render_depth( T[b:b+1], K[b:b+1], render_image_size, near, far, 'nearest' ) depth_outputs.append(depth) return torch.cat(depth_outputs, dim=0) def render_mesh(self, model_names, T, K, render_image_size, near=0.1, far=6): outputs= [] for b,_ in enumerate(model_names): model_idx = self.cls2idx[model_names[b]] img= self.renderers[model_idx].render_mesh( T[b:b+1], K[b:b+1], render_image_size, near, far, )[0] outputs.append(img) return torch.cat(outputs, dim=0) def render_pointcloud(self, model_names, T, K, render_image_size, near=0.1, far=6): outputs= [] for b,_ in enumerate(model_names): model_idx = self.cls2idx[model_names[b]] depth = self.renderers[model_idx].render_pointcloud( T[b:b+1], K[b:b+1], render_image_size, near, far ) outputs.append(depth) return torch.cat(outputs, dim=0) def forward(self, model_names, vert_attribute, T, K, render_image_size, near=0.1, far=6, render_tex=False): map_outputs= [] depth_outputs= [] for b,_ in enumerate(model_names): model_idx = self.cls2idx[model_names[b]] feamap, depth= self.renderers[model_idx]( vert_attribute[b], T[b:b+1], K[b:b+1], render_image_size, near, far, render_texture=render_tex ) map_outputs.append(feamap) depth_outputs.append(depth) return torch.cat(map_outputs, dim=0) , torch.cat(depth_outputs, dim=0) ================================================ FILE: geometry/einsum.py ================================================ # import tensorflow as torch import torch as torch import numpy as np import re import string def einsum(equation, *inputs): equation = equation.replace(' ', '') # input_shapes = [x.get_shape() for x in list(inputs)] input_shapes = [x.shape for x in list(inputs)] match = re.match('^([a-zA-Z,.]+)(->[a-zA-Z.]*)?$', equation) if not match: raise ValueError('Indices have incorrect format: %s' % equation) input_axis_labels = match.group(1).split(',') output_axis_labels = match.group(2)[2:] if match.group(2) else None if len(input_shapes) != len(input_axis_labels): raise ValueError('Got %d arguments for equation "%s", expecting %d' % (len(input_shapes), equation, len(input_axis_labels))) # Resolve Ellipsis # Assign axes labels for unspecified dimensions in inputs. Labels taken # from unused labels. Follow numpy einsum broadcasting conventions for # tensors of different length and unlabeled output. ellipsis_axes = '' if '...' in equation: unused = ''.join([c for c in string.ascii_lowercase if c not in ''.join(input_axis_labels)]) for i, ax in enumerate(input_axis_labels): if '...' in ax: parts = ax.split('...') if len(parts) != 2: raise ValueError('Unable to resolve ellipsis. Excess number found.') # n = input_shapes[i].ndims - len(''.join(parts)) n = len(input_shapes[i]) - len(''.join(parts)) if n < 0: raise ValueError('Ellipses lengths do not match.') if len(unused) < n: raise ValueError( 'Unable to resolve ellipsis, too many distinct labels.') replace_axes = unused[-n:] if n > 0 else '' input_axis_labels[i] = input_axis_labels[i].replace('...', replace_axes) if len(replace_axes) > len(ellipsis_axes): ellipsis_axes = replace_axes equation = equation.replace('...', ellipsis_axes) out = torch.einsum(equation, *inputs) # torch.add_to_collection("checkpoints", out) return out ================================================ FILE: geometry/intrinsics.py ================================================ import torch import numpy as np # from utils.einsum import einsum from .einsum import einsum def intrinsics_vec_to_matrix(kvec): fx, fy, cx, cy = torch.unbind(kvec, dim=-1) z = torch.zeros_like(fx) o = torch.ones_like(fx) K = torch.stack([fx, z, cx, z, fy, cy, z, z, o], dim=-1) K = torch.reshape(K, list(kvec.shape)[:-1] + [3,3]) return K def intrinsics_matrix_to_vec(kmat): fx = kmat[..., 0, 0] fy = kmat[..., 1, 1] cx = kmat[..., 0, 2] cy = kmat[..., 1, 2] return torch.stack([fx, fy, cx, cy], dim=-1) def update_intrinsics(intrinsics, delta_focal): kvec = intrinsics_matrix_to_vec(intrinsics) fx, fy, cx, cy = torch.unstack(kvec, num=4, axis=-1) df = torch.squeeze(delta_focal, -1) # update the focal lengths fx = torch.exp(df) * fx fy = torch.exp(df) * fy kvec = torch.stack([fx, fy, cx, cy], axis=-1) kmat = intrinsics_vec_to_matrix(kvec) return kmat def rescale_depth(depth, downscale=4): depth = depth[:,None] new_shape = [depth.shape[-2]//downscale, depth.shape[-1]//downscale] depth = torch.nn.functional.interpolate(depth, new_shape, mode='nearest') return torch.squeeze(depth, dim=1) def rescale_depth_and_intrinsics(depth, intrinsics, downscale=4): sc = torch.tensor([1.0/downscale, 1.0/downscale, 1.0], dtype=torch.float32, device=depth.device) intrinsics = einsum('...ij,i->...ij', intrinsics, sc) depth = rescale_depth(depth, downscale=downscale) return depth, intrinsics def rescale_depths_and_intrinsics(depth, intrinsics, downscale=4): batch, frames, height, width = [depth.shape[i] for i in range(4)] depth = torch.reshape(depth, [batch*frames, height, width]) depth, intrinsics = rescale_depth_and_intrinsics(depth, intrinsics, downscale) depth = torch.reshape(depth, [batch, frames]+list(depth.shape)[1:]) return depth, intrinsics ================================================ FILE: geometry/projective_ops.py ================================================ import numpy as np import torch # from utils.einsum import einsum from torch import einsum # MIN_DEPTH = 0.1 MIN_DEPTH = 0.01 def normalize_coords_grid(coords): """ normalize the coordinates to [-1,1] Args: coords: BxKxHxWx2 """ coords=coords.clone() B,K,H,W,_ = coords.shape coords[...,0] = 2*coords[...,0]/(W-1)-1 coords[...,1] = 2*coords[...,1]/(H-1)-1 return coords def coords_grid(ref, homogeneous=True): """ grid of pixel coordinates """ shape = ref.shape yy, xx = torch.meshgrid(torch.arange(shape[-2], device=ref.device), torch.arange(shape[-1], device=ref.device) ) xx = xx.to(dtype=torch.float32) yy = yy.to(dtype=torch.float32) if homogeneous: coords = torch.stack([xx, yy, torch.ones_like(xx)], dim=-1) else: coords = torch.stack([xx, yy], dim=-1) new_shape = [1]*len(shape[:-2]) + list(shape[-2:]) + [-1] coords = torch.reshape(coords, new_shape) tile = list(shape[:-2])+ [1,1,1] coords = coords.repeat(tile) return coords # BxKxHxWx2 def extract_and_reshape_intrinsics(intrinsics, shape=None): """ Extracts (fx, fy, cx, cy) from intrinsics matrix """ fx = intrinsics[:, 0, 0] fy = intrinsics[:, 1, 1] cx = intrinsics[:, 0, 2] cy = intrinsics[:, 1, 2] if shape is not None: batch = list(fx.shape[:1]) fillr = [1]*len(shape[1:]) k_shape = batch+fillr fx = torch.reshape(fx, k_shape) fy = torch.reshape(fy, k_shape) cx = torch.reshape(cx, k_shape) cy = torch.reshape(cy, k_shape) return (fx, fy, cx, cy) def backproject(depth, intrinsics, jacobian=False, depth_coords=None): """ backproject depth map to point cloud """ #depth_coords: (BxKxHxWx2) if depth_coords is None: coords = coords_grid(depth, homogeneous=True) x, y, _ = torch.unbind(coords, axis=-1) else: x, y = torch.unbind(depth_coords, axis=-1) x_shape = x.shape fx, fy, cx, cy = extract_and_reshape_intrinsics(intrinsics, x_shape) #Bx1x1x1 Z = depth #BxKxHxW X = Z * (x - cx) / fx Y = Z * (y - cy) / fy points = torch.stack([X, Y, Z], axis=-1) if jacobian: o = torch.zeros_like(Z) # used to fill in zeros # jacobian w.r.t (fx, fy) , of shape BxKxHxWx4x1 jacobian_intrinsics = torch.stack([ torch.stack([-X / fx], dim=-1), torch.stack([-Y / fy], dim=-1), torch.stack([o], dim=-1), torch.stack([o], dim=-1)], axis=-2) return points, jacobian_intrinsics return points # return points, coords def project(points, intrinsics, jacobian=False): """ project point cloud onto image """ X, Y, Z = torch.unbind(points, axis=-1) Z = torch.clamp(Z, min=MIN_DEPTH) x_shape = X.shape fx, fy, cx, cy = extract_and_reshape_intrinsics(intrinsics, x_shape) x = fx * (X / Z) + cx y = fy * (Y / Z) + cy coords = torch.stack([x, y], axis=-1) if jacobian: o = torch.zeros_like(x) # used to fill in zeros zinv1 = torch.where(Z <= MIN_DEPTH+.01, torch.zeros_like(Z), 1.0 / Z) zinv2 = torch.where(Z <= MIN_DEPTH+.01, torch.zeros_like(Z), 1.0 / Z**2) # jacobian w.r.t (X, Y, Z) jacobian_points = torch.stack([ torch.stack([fx * zinv1, o, -fx * X * zinv2], axis=-1), torch.stack([o, fy * zinv1, -fy * Y * zinv2], axis=-1)], axis=-2) # jacobian w.r.t (fx, fy) jacobian_intrinsics = torch.stack([ torch.stack([X * zinv1], axis=-1), torch.stack([Y * zinv1], axis=-1),], axis=-2) return coords, (jacobian_points, jacobian_intrinsics) return coords ================================================ FILE: geometry/se3.py ================================================ """ SO3 and SE3 operations, exponentials and logarithms adapted from Sophus """ import numpy as np import torch from .einsum import einsum MIN_THETA = 1e-4 def matdotv(A,b): return torch.squeeze(torch.matmul(A, torch.expand_dims(b, -1)), -1) def hat(a): a1, a2, a3 = torch.split(a, [1,1,1], dim=-1) zz = torch.zeros_like(a1) ax = torch.stack([ torch.cat([zz,-a3,a2], dim=-1), torch.cat([a3,zz,-a1], dim=-1), torch.cat([-a2,a1,zz], dim=-1) ], dim=-2) return ax ### quaternion functions ### def quaternion_rotate_point(q, pt, eq=None): if eq is None: w, vec = torch.split(q, [1, 3], axis=-1) uv = 2*matdotv(hat(vec), pt) return pt + w*uv + matdotv(hat(vec), uv) else: w, vec = torch.split(q, [1, 3], axis=-1) uv1 = 2*einsum(eq, hat(w*vec), pt) uv2 = 2*einsum(eq, hat(vec), pt) return pt + uv1 + einsum(eq, hat(vec), uv2) def quaternion_rotate_matrix(q, mat, eq=None): if eq is None: w, vec = torch.split(q, [1, 3], axis=-1) uv = 2*torch.matmul(hat(vec), mat) return mat + w*uv + torch.matmul(hat(vec), uv) else: w, vec = torch.split(q, [1, 3], axis=-1) uv1 = 2*einsum(eq, hat(w*vec), mat) uv2 = 2*einsum(eq, hat(vec), mat) return mat + uv1 + einsum(eq, hat(vec), uv2) def quaternion_inverse(q): return q * [1, -1, -1, -1] def quaternion_multiply(a, b): aw, ax, ay, az = torch.split(a, [1,1,1,1], axis=-1) bw, bx, by, bz = torch.split(b, [1,1,1,1], axis=-1) q = torch.concat([ aw * bw - ax * bx - ay * by - az * bz, aw * bx + ax * bw + ay * bz - az * by, aw * by + ay * bw + az * bx - ax * bz, aw * bz + az * bw + ax * by - ay * bx, ], axis=-1) return q def quaternion_to_matrix(q): w, x, y, z = torch.split(q, [1,1,1,1], axis=-1) r11 = 1 - 2 * y**2 - 2 * z**2 r12 = 2 * x * y - 2 * w * z r13 = 2 * z * x + 2 * w * y r21 = 2 * x * y + 2 * w * z r22 = 1 - 2 * x**2 - 2 * z**2 r23 = 2 * y * z - 2 * w * x r31 = 2 * z * x - 2 * w * y r32 = 2 * y * z + 2 * w * x r33 = 1 - 2 * x**2 - 2 * y**2 R = torch.stack([ torch.concat([r11,r12,r13], axis=-1), torch.concat([r21,r22,r23], axis=-1), torch.concat([r31,r32,r33], axis=-1) ], axis=-2) return R def rotation_matrix_to_quaternion(R): Rxx, Ryx, Rzx = R[...,0,0], R[...,0,1], R[...,0,2] Rxy, Ryy, Rzy = R[...,1,0], R[...,1,1], R[...,1,2] Rxz, Ryz, Rzz = R[...,2,0], R[...,2,1], R[...,2,2] zz = torch.zeros_like(Rxx) k1 = torch.stack([Rxx-Ryy-Rzz, zz, zz, zz], axis=-1) k2 = torch.stack([Ryx+Rxy, Ryy-Rxx-Rzz, zz, zz], axis=-1) k3 = torch.stack([Rzx+Rxz, Rzy+Ryz, Rzz-Rxx-Ryy,zz], axis=-1) k4 = torch.stack([Ryz-Rzy, Rzx-Rxz, Rxy-Ryx, Rxx+Ryy+Rzz], axis=-1) K = torch.stack([k1, k2, k3, k4], axis=-2) eigvals, eigvecs = torch.linalg.eigh(K) x, y, z, w = torch.split(eigvecs[...,-1], [1,1,1,1], axis=-1) qvec = torch.concat([w, x, y, z], axis=-1) qvec /= torch.sqrt(torch.reduce_sum(qvec**2, axis=-1, keepdims=True)) return qvec * torch.sign(w) def so3_expm_and_theta(omega): """ omega in \so3 """ theta_sq = torch.reduce_sum(omega**2, axis=-1) theta = torch.sqrt(theta_sq) half_theta = 0.5*theta ### small ### imag_factor = torch.where(theta>MIN_THETA, torch.sin(half_theta) / (theta + 1e-12), 0.5 - (1.0/48.0)*theta_sq + (1.0/3840.0)*theta_sq*theta_sq) real_factor = torch.where(theta>MIN_THETA, torch.cos(half_theta), 1.0 - (1.0/8.0)*theta_sq + (1.0/384.0)*theta_sq*theta_sq) qw = real_factor qx = imag_factor * omega[...,0] qy = imag_factor * omega[...,1] qz = imag_factor * omega[...,2] quat = torch.stack([qw, qx, qy, qz], axis=-1) return quat, theta def so3_logm_and_theta(so3): w, vec = torch.split(so3, [1,3], axis=-1) squared_n = torch.reduce_sum(vec**2, axis=-1, keepdims=True) n = torch.sqrt(squared_n) two_atan_nbyw_by_n = torch.where(n SE(3), works for arbitrary batch dimensions - Note: gradient is overridden with _se3_matrix_expm_grad, which approximates gradient for small upsilon_omega """ eps=1e-12 inp_shape = upsilon_omega.shape out_shape = list(inp_shape)[:-1]+[4,4] upsilon_omega = torch.reshape(upsilon_omega, [-1, 6]) batch = upsilon_omega.shape[0] v, w = torch.split(upsilon_omega, [3, 3], dim=-1) theta_sq = torch.sum(w**2, dim=1 ) theta_sq = torch.reshape(theta_sq, [-1, 1, 1]) theta = torch.sqrt(theta_sq) theta_po4 = theta_sq * theta_sq wx = hat(w) wx_sq = torch.matmul(wx, wx) I = torch.eye(3, dtype=upsilon_omega.dtype, device=upsilon_omega.device).repeat([batch,1,1]) ### taylor approximations ### R1 = I + (1.0 - (1.0/6.0)*theta_sq + (1.0/120.0)*theta_po4) * wx + \ (0.5 - (1.0/12.0)*theta_sq + (1.0/720.0)*theta_po4) * wx_sq V1 = I + (0.5 - (1.0/24.0)*theta_sq + (1.0/720.0)*theta_po4)*wx + \ ((1.0/6.0) - (1.0/120.0)*theta_sq + (1.0/5040.0)*theta_po4)*wx_sq ### exact values ### R2 = I + (torch.sin(theta) / (theta+eps)) * wx +\ ((1 - torch.cos(theta)) / (theta_sq+eps)) * wx_sq V2 = I + ((1 - torch.cos(theta)) / (theta_sq + eps)) * wx + \ ((theta - torch.sin(theta))/(theta_sq*theta + eps)) * wx_sq # print(theta.shape, R1.shape, R2.shape, ">>>", flush=True) # R = torch.where(theta[:, 0, 0] MIN_DEPTH) & (pt_new[..., -1] > MIN_DEPTH) # vmask = torch.cast(vmask, torch.float32)[..., torch.newaxis] # vmask = vmask.to(dtype=torch.float32)[..., None, :,:] #BxKx1xHxW vmask = vmask.to(dtype=torch.float32)[..., :, :, None] # BxKx1xHxW return coords, vmask return coords def induced_flow(self, depth, intrinsics, valid_mask=False): coords0 = pops.coords_grid(depth, homogeneous=False) if valid_mask: coords1, vmask = self.transform( depth, intrinsics, valid_mask=valid_mask) return coords1 - coords0, vmask coords1 = self.transform(depth, intrinsics, valid_mask=valid_mask) return coords1 - coords0 def depth_change(self, depth, intrinsics): pt = pops.backproject(depth, intrinsics) pt_new = self.__call__(pt) return pt_new[..., -1] - pt[..., -1] def identity(self): """ Push identity transformation to start of collection """ batch, frames = self.shape() if self.internal == 'matrix': # I = torch.eye(4, batch_shape=[batch, 1]) I = torch.eye(4, dtype=self.G.dtype, device=self.G.device).repeat( [batch, 1, 1, 1]) # return self.__class__(matrix=I, internal=self.internal, eq=self.eq) return self.__class__(matrix=I, internal=self.internal, eq=self.eq) else: raise NotImplementedError class SE3Sequence(SE3): """ Stores collection of SE3 objects """ def __init__(self, upsilon=None, matrix=None, so3=None, translation=None, eq= "aijk,ai...k->ai...j",internal=DEFAULT_INTERNAL): super().__init__( upsilon, matrix, so3, translation, internal=internal, eq=eq) # self.eq = "aijk,ai...k->ai...j" def __call__(self, pt, inds=None, jacobian=False): if self.internal == 'matrix': return super().__call__(pt, jacobian=jacobian) else: raise NotImplementedError def gather(self, inds): if self.internal == 'matrix': G = torch.index_select(self.G, index=inds, dim=1) return SE3Sequence(matrix=G, internal=self.internal) else: raise NotImplementedError # def append_identity(self): # """ Push identity transformation to start of collection """ # batch, frames = self.shape() # if self.internal == 'matrix': # # I = torch.eye(4, batch_shape=[batch, 1]) # I = torch.eye(4, dtype=self.G.dtype, device=self.G.device).repeat( # [batch, 1, 1, 1]) # G = torch.cat([I, self.G], dim=1) # return SE3Sequence(matrix=G, internal=self.internal) # else: # raise NotImplementedError def reprojction_optim(self, target, weight, depth, intrinsics, num_iters=2, depth_img_coords=None ): target = clip_dangerous_gradients(target).to(dtype=torch.float64) weight = clip_dangerous_gradients(weight).to(dtype=torch.float64) X0 = pops.backproject(depth, intrinsics, depth_coords=depth_img_coords) w = weight[..., None] lm_lmbda = get_cfg("LM").LM_LMBDA ep_lmbda = get_cfg("LM").EP_LMBDA T = self.copy(stop_gradients=False) for i in range(num_iters): ### compute the jacobians of the transformation ### X1, jtran = T(X0, jacobian=True) x1, (jproj, jkvec) = pops.project(X1, intrinsics, jacobian=True) v = (X0[..., -1] > MIN_DEPTH) & (X1[..., -1] > MIN_DEPTH) # v = v.to(dtype=torch.float32)[..., None, None] v = v.to(dtype=torch.float64)[..., None, None] ### weighted gauss-newton update ### J = einsum('...ij,...jk->...ik', jproj.to(dtype=torch.float64), jtran.to(dtype=torch.float64 )) H = einsum('ai...j,ai...k->aijk', v*w*J, J) b = einsum('ai...j,ai...->aij', v*w*J, target-x1) ### add dampening and apply increment ### H += ep_lmbda*torch.eye(6, dtype=H.dtype, device=H.device) + lm_lmbda*H*torch.eye(6,dtype=H.dtype, device=H.device) try: delta_upsilon = cholesky_solve(H, b) except: # print(w.shape,v.shape, w.mean(), v.mean(),H,b, '!!!!') raise T = T.increment(delta_upsilon) # update if self.internal == 'matrix': self.G = T.matrix() T = SE3Sequence( matrix=T.matrix(), internal=self.internal) else: raise NotImplementedError return T def transform(self, depth, intrinsics, valid_mask=False, return3d=False): return super().transform(depth, intrinsics, valid_mask, return3d) ================================================ FILE: model/CFNet.py ================================================ import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from thirdparty.raft.update import BasicUpdateBlock from thirdparty.raft.extractor import BasicEncoder from thirdparty.raft.corr import CorrBlock, AlternateCorrBlock from thirdparty.raft.utils.utils import bilinear_sampler, coords_grid, upflow try: autocast = torch.cuda.amp.autocast except: # dummy autocast for PyTorch < 1.6 class autocast: def __init__(self, enabled): pass def __enter__(self): pass def __exit__(self, *args): pass class ImageFeaEncoder(nn.Module): def __init__(self, input_dim=3, output_dim=256): super().__init__() self.fnet = BasicEncoder(output_dim=output_dim, norm_fn='instance', dropout=False, input_dim=input_dim) if 1:#self.args.pretrained_model is not None: print("Loading the weights of RAFT...") import os self.load_state_dict( # torch.load(self.args.pretrained_model, map_location='cpu'), strict=False torch.load( f"{os.path.dirname(os.path.abspath(__file__)) }/../weights/img_fea_enc.pth", map_location='cpu'), strict=True ) else: print("ImageFeaEncoder will be trained from scratch...") def forward(self, image1, image2): image1 = 2 * (image1 / 255.0) - 1.0 image2 = 2 * (image2 / 255.0) - 1.0 image1 = image1.contiguous() image2 = image2.contiguous() with autocast(enabled=True): fmap1, fmap2 = self.fnet([image1, image2]) return fmap1, fmap2 class GRU_CFUpdator(nn.Module): def __init__(self, args): super().__init__() self.args = args input_dim = args.get("input_dim", 3) self.hidden_dim = hdim = 128 self.context_dim = cdim = 128 args.corr_levels = 4 args.corr_radius = 4 if 'alternate_corr' not in self.args: self.args.alternate_corr = False self.update_block = BasicUpdateBlock(self.args, hidden_dim=hdim) if self.args.pretrained_model is not None: print("Loading the weights of RAFT...") import os self.load_state_dict( # torch.load(self.args.pretrained_model, map_location='cpu'), strict=False torch.load( f"{os.path.dirname(os.path.abspath(__file__)) }/../weights/gru_update.pth", map_location='cpu'), strict=True ) else: print("GRU_CFUpdator will be trained from scratch...") def freeze_bn(self): for m in self.modules(): if isinstance(m, nn.BatchNorm2d): m.eval() def initialize_flow(self, img, downsample_rate=8): """ Flow is represented as difference between two coordinate grids flow = coords1 - coords0""" N, C, H, W = img.shape coords0 = coords_grid(N, H//downsample_rate, W//downsample_rate).to(img.device) coords1 = coords_grid(N, H//downsample_rate, W//downsample_rate).to(img.device) # optical flow computed as difference: flow = coords1 - coords0 return coords0, coords1 def upsample_flow(self, flow, mask, upsample_scale=8): """ Upsample flow field [H/8, W/8, 2] -> [H, W, 2] using convex combination """ N, _, H, W = flow.shape mask = mask.view(N, 1, 9, upsample_scale, upsample_scale, H, W) mask = torch.softmax(mask, dim=2) up_flow = F.unfold(upsample_scale * flow, [3,3], padding=1) up_flow = up_flow.view(N, 2, 9, 1, 1, H, W) up_flow = torch.sum(mask * up_flow, dim=2) up_flow = up_flow.permute(0, 1, 4, 2, 5, 3) return up_flow.reshape(N, 2, upsample_scale*H, upsample_scale*W) def forward(self, fmap1, fmap2, iters=1, flow_init=None, upsample=True, test_mode=False, context_fea=None, update_corr_fn=True): """ Estimate optical flow between pair of frames """ hdim = self.hidden_dim cdim = self.context_dim if update_corr_fn: # need carful handling outside # run the feature network self.fmap1 = fmap1.float() self.fmap2 = fmap2.float() if self.args.alternate_corr: self.corr_fn = AlternateCorrBlock(self.fmap1, self.fmap2, radius=self.args.corr_radius) else: self.corr_fn = CorrBlock(self.fmap1, self.fmap2, radius=self.args.corr_radius) if update_corr_fn: # run the context network with autocast(enabled=self.args.mixed_precision): assert context_fea is not None ds = context_fea.shape[-1]//self.fmap1.shape[-1] cnet = F.interpolate(context_fea, scale_factor=1/ds, mode='bilinear', align_corners=True) self.net, self.inp = torch.split(cnet, [hdim, cdim], dim=1) self.net = torch.tanh(self.net) self.inp = torch.relu(self.inp) # coords0, coords1 = self.initialize_flow(image1) coords0, coords1 = self.initialize_flow(flow_init) if flow_init is not None: ds = flow_init.shape[-1]//coords0.shape[-1] if ds !=1: flow_init /=ds flow_init = F.interpolate(flow_init, scale_factor=1/ds, mode='bilinear', align_corners=True) coords1 = coords1 + flow_init flow_predictions = [] for itr in range(iters): coords1 = coords1.detach() corr = self.corr_fn(coords1) # index correlation volume flow = coords1 - coords0 with autocast(enabled=self.args.mixed_precision): # net, up_mask, delta_flow = self.update_block(net, inp, corr, flow) self.net, up_mask, delta_flow = self.update_block(self.net, self.inp, corr, flow) # F(t+1) = F(t) + \Delta(t) coords1 = coords1 + delta_flow # upsample predictions if up_mask is None: flow_up = upflow(coords1 - coords0, scale=image1.shape[2]//coords0.shape[2],) else: if self.args.fea_net in ["bigdx4"]: flow_up = self.upsample_flow(coords1 - coords0, up_mask, upsample_scale=4) else: flow_up = self.upsample_flow(coords1 - coords0, up_mask) flow_predictions.append(flow_up) if test_mode: return coords1 - coords0, flow_up return flow_predictions ================================================ FILE: model/HybridNet.py ================================================ import torch import torch.nn as nn from thirdparty.kpconv.kpconv_blocks import * import torch.nn.functional as F import numpy as np from kpconv.lib.utils import square_distance from model.descriptor2D import SuperPoint2D from model.descriptor3D import KPSuperpoint3Dv2 REGISTERED_HYBRID_NET_CLASSES={} def register_hybrid_net(cls, name=None): global REGISTERED_HYBRID_NET_CLASSES if name is None: name = cls.__name__ assert name not in REGISTERED_HYBRID_NET_CLASSES, f"exist class: {REGISTERED_HYBRID_NET_CLASSES}" REGISTERED_HYBRID_NET_CLASSES[name] = cls return cls def get_hybrid_net(name): global REGISTERED_HYBRID_NET_CLASSES assert name in REGISTERED_HYBRID_NET_CLASSES, f"available class: {REGISTERED_HYBRID_NET_CLASSES}" return REGISTERED_HYBRID_NET_CLASSES[name] class ContextFeatureNet(nn.Module): def __init__(self, config): super().__init__() self.context_fea_extractor_3d= KPSuperpoint3Dv2(config['context_fea_extractor_3d'] ) def forward(self, batch): # x = batch['features'].clone().detach() # assert len(batch['stack_lengths'][-1])==1, "Only support bs=1 for now" len_src_c = batch['stack_lengths'][-1][0] pcd_c = batch['model_points'][-1] pcd_c = pcd_c[:len_src_c] image=batch['image'] ############### encode 3d and 2d features ############### batch3d={ 'points': batch['model_points'], 'neighbors': batch['neighbors'], 'pools': batch['pools'], 'upsamples': batch['upsamples'], 'features': batch['model_point_features'], 'stack_lengths': batch['stack_lengths'], } ctx_descriptors_3d = self.context_fea_extractor_3d(batch3d) return { "ctx_fea_3d":ctx_descriptors_3d, } @register_hybrid_net class HybridDescNet(nn.Module): #independent 2d and 3d network def __init__(self, config): super().__init__() self.corr_fea_extractor_2d= SuperPoint2D(config['keypoints_detector_2d'] ) self.corr_fea_extractor_3d= KPSuperpoint3Dv2(config['keypoints_detector_3d'] ) self.descriptors_3d = {} def forward(self, batch): assert len(set(batch['class_name']))==1, "A batch should contain data of the same class." class_name = batch['class_name'][0] len_src_c = batch['stack_lengths'][-1][0] pcd_c = batch['model_points'][-1] pcd_c = pcd_c[:len_src_c]#, pcd_c[len_src_c:] image=batch['image'] ############### encode 3d and 2d features ############### batch3d={ 'points': batch['model_points'], 'neighbors': batch['neighbors'], 'pools': batch['pools'], 'upsamples': batch['upsamples'], 'features': batch['model_point_features'], 'stack_lengths': batch['stack_lengths'], } if self.training: self.descriptors_3d[class_name] = self.corr_fea_extractor_3d(batch3d) else: if class_name not in self.descriptors_3d: self.descriptors_3d[class_name] = self.corr_fea_extractor_3d(batch3d) descriptors_2d = self.corr_fea_extractor_2d(image)['descriptors'] return { "descriptors_2d":descriptors_2d, "descriptors_3d":self.descriptors_3d[class_name], "scores_saliency_3d": None, "scores_overlap_3d":None, } ================================================ FILE: model/PoseRefiner.py ================================================ import os import time import cv2 import torch import torch.nn as nn import torch.nn.functional as F import torch.distributed as dist import numpy as np from easydict import EasyDict as edict from functools import partial from geometry.transformation import * from geometry.intrinsics import * from geometry.projective_ops import coords_grid, normalize_coords_grid from model.CFNet import GRU_CFUpdator , ImageFeaEncoder from utils.pose_utils import pose_padding from config.default import get_cfg EPS = 1e-5 MIN_DEPTH = 0.1 MAX_ERROR = 100.0 # exclude extremly large displacements MAX_FLOW = 400 def raft_sequence_flow_loss(flow_preds, flow_gt, valid, gamma=0.8, max_flow=MAX_FLOW): """ Loss function defined over sequence of flow predictions """ n_predictions = len(flow_preds) flow_loss = 0.0 # exlude invalid pixels and extremely large diplacements mag = torch.sum(flow_gt**2, dim=1).sqrt() valid = (valid >= 0.5) & (mag < max_flow) for i in range(n_predictions): i_weight = gamma**(n_predictions - i - 1) i_loss = (flow_preds[i] - flow_gt).abs() flow_loss += i_weight * (valid[:, None] * i_loss).mean() epe = torch.sum((flow_preds[-1] - flow_gt)**2, dim=1).sqrt() epe = epe.view(-1)[valid.view(-1)] metrics = { 'epe': epe.mean().item(), '1px': (epe < 1).float().mean().item(), '3px': (epe < 3).float().mean().item(), '5px': (epe < 5).float().mean().item(), } return flow_loss, metrics class PoseRefiner(nn.Module): def __init__(self, cfg, reuse=False, schedule=None, use_regressor=True, is_calibrated=True, bn_is_training=False, is_training=True, renderer=None, ): super().__init__() self.legacy=True self.cfg = cfg self.reuse = reuse self.sigma=nn.ParameterList( [nn.Parameter(torch.ones(1)*1 )] ) self.with_corr_weight = self.cfg.get("with_corr_weight", True) if not self.with_corr_weight: print("Warning: the correlation weighting is disabled.") self.is_calibrated = cfg.IS_CALIBRATED if not is_calibrated: self.is_calibrated = is_calibrated self.is_training = is_training self.use_regressor = use_regressor self.residual_pose_history = [] self.Ti_history =[] self.coords_history = [] self.residual_history = [] self.inds_history = [] # self.weights_history = [] self.flow_history = [] self.intrinsics_history = [] self.summaries = [] if self.cfg.FLOW_NET=='raft': self.image_fea_enc= ImageFeaEncoder() self.cf_net = GRU_CFUpdator(self.cfg.raft) else: raise NotImplementedError self.renderer = renderer def _clear(self,): self.residual_pose_history = [] self.Ti_history =[] self.coords_history = [] self.residual_history = [] self.inds_history = [] # self.weights_history = [] self.flow_history = [] self.intrinsics_history = [] self.summaries = [] def __len__(self): return len(self.residual_pose_history) def render(self, params, render_tex=False): """ render a batch of images given the intrinsic and extrinsic params Args: params.K: np.array, of shape Bx3x3 params.camera_extrinsics: np.array, of shape Bx3x4 Returns: [type]: [description] """ bs=params.K.shape[0] colors=[] depths=[] color,depth= self.renderer( params.obj_cls, params.vert_attribute, T=params.camera_extrinsics, K=params.K, render_image_size=params.render_image_size, near=0.1, far=6, render_tex=render_tex) #set the invalid values to zeros depth[depth==-1] = 0 return { # 1x3xHxW "syn_img": color, "syn_depth": depth.detach(), # 1x1xHxW } def get_affine_transformation(self, mask, crop_center, with_intrinsic_transform=False, output_size=None, margin_ratio=0.4): B,_,H,W = mask.shape ratio = float(H) / float(W) affine_matrices = [] intrinsic_matrices = [] for b in range(B): zoom_c_x, zoom_c_y = crop_center[b] #crop_center ys, xs = np.nonzero(mask[b][0].detach().cpu().numpy() ) if len(ys)>0 and len(xs)>0: obj_imgn_start_x = xs.min() obj_imgn_start_y = ys.min() obj_imgn_end_x = xs.max() obj_imgn_end_y = ys.max() else: obj_imgn_start_x=0 obj_imgn_start_y=0 obj_imgn_end_x=0 obj_imgn_end_y=0 # mask region left_dist = zoom_c_x - obj_imgn_start_x right_dist = obj_imgn_end_x - zoom_c_x up_dist = zoom_c_y - obj_imgn_start_y down_dist = obj_imgn_end_y - zoom_c_y # crop_height = np.max([ratio * right_dist, ratio * left_dist, up_dist, down_dist]) * 2 * 1.4 crop_height = np.max([ratio * right_dist, ratio * left_dist, up_dist, down_dist]) * 2 * (1+margin_ratio) crop_width = crop_height / ratio # affine transformation for PyTorch x1 = (zoom_c_x - crop_width / 2) * 2 / W - 1; x2 = (zoom_c_x + crop_width / 2) * 2 / W - 1; y1 = (zoom_c_y - crop_height / 2) * 2 / H - 1; y2 = (zoom_c_y + crop_height / 2) * 2 / H - 1; pts1 = np.float32([[x1, y1], [x1, y2], [x2, y1]]) pts2 = np.float32([[-1, -1], [-1, 1], [1, -1]]) affine_matrix = torch.tensor(cv2.getAffineTransform(pts2, pts1), device=mask.device, dtype=torch.float32) affine_matrices.append(affine_matrix) if with_intrinsic_transform: # affine transformation for PyTorch x1 = (zoom_c_x - crop_width / 2) x2 = (zoom_c_x + crop_width / 2) y1 = (zoom_c_y - crop_height / 2) y2 = (zoom_c_y + crop_height / 2) pts1 = np.float32([[x1, y1], [x1, y2], [x2, y1]]) # pts2 = np.float32([[0, 0], [0, H-1], [W-1, 0]]) pts2 = np.float32([[0, 0], [0, output_size[0]-1], [output_size[1]-1, 0]]) # pts2 = np.float32([[0, 0], [0, 1], [1, 0]]) intrinsic_matrix = torch.tensor(cv2.getAffineTransform(pts2, pts1), device=mask.device, dtype=torch.float32) intrinsic_matrices.append(intrinsic_matrix) if with_intrinsic_transform: return torch.stack(affine_matrices, dim=0), torch.stack(intrinsic_matrices, dim=0) else: return torch.stack(affine_matrices, dim=0) def gen_zoom_crop_grids(self, fg_mask, K, T, output_size, model_center=[0,0,0], margin_ratio=0.4): ##Get the projected model center in image (assuming the model is zero-centered, which should be reconsidered!) crop_center=K@T[:,:3,3:] crop_center = crop_center[:,:2]/crop_center[:,2:3] ##calculate affine transformation parameters affine_matrices, crop_intrinsic_transform=self.get_affine_transformation(fg_mask, crop_center=crop_center.detach().cpu().numpy(), with_intrinsic_transform=True, output_size=(output_size[-2], output_size[-1]),margin_ratio=margin_ratio ) grids = F.affine_grid(affine_matrices, torch.Size(output_size) ) ##Get cropped intrinsic_transform intrinsics_crop= torch.inverse(pose_padding(crop_intrinsic_transform) )@K return grids, intrinsics_crop # def forward(self, image, Ts, intrinsics, fea_3d=None, inds=None, Tj_gt=None, obj_cls=None, geofea_3d=None, geofea_2d=None): def forward(self, image, Ts, intrinsics, fea_3d=None, Tj_gt=None, obj_cls=None, geofea_3d=None, geofea_2d=None): #clear the history data self._clear() cfg = self.cfg RANDER_IMAGE_SIZE = get_cfg("BASIC").render_image_size ZOOM_CROP_SIZE=get_cfg("BASIC").zoom_crop_size if cfg.RESCALE_IMAGES: images = 2 * (images / 255.0) - 1.0 Tij_gt=[] syn_imgs=[] syn_depths=[] Ti = Ts Tij = Ti.copy().identity() for ren_iter in range(cfg.RENDER_ITER_COUNT): # update rendering params Ti = Tij*Ti # accumulate Ti Tij.identity_() #set Tij to identity matrix at the begining of each ren_iter if self.legacy: Tij = Ti*Ti.inv() #set Tij to identity matrix at the begining of each ren_iter render_params = edict({ "K": intrinsics.detach(), "camera_extrinsics": Ti.matrix().detach().squeeze(1), "obj_cls": obj_cls, "render_image_size": RANDER_IMAGE_SIZE, }) pc_depth = self.renderer.render_pointcloud(obj_cls, T=render_params.camera_extrinsics, K=render_params.K, render_image_size=render_params.render_image_size) if self.cfg.ONLINE_CROP: #get the forground mask fg_mask = pc_depth>0 B,C,_,_= pc_depth.size() ############### Get zoom parameters ############### grids, intrinsics_crop = self.gen_zoom_crop_grids(fg_mask, render_params.K, render_params.camera_extrinsics, output_size=[B,C, *ZOOM_CROP_SIZE], model_center=None) ############### Render reference images ############### # Concatentate the 3D ctx feature "fea_3d" and 3d descriptor "geofea_3d" for feature rendering if geofea_3d is not None: fea_3d_cat = torch.cat([fea_3d, geofea_3d ], dim=-1) # BxNxC else: fea_3d_cat = fea_3d render_params.vert_attribute = fea_3d_cat #fea_3d render_params.K = intrinsics_crop.detach() render_params.render_image_size = ZOOM_CROP_SIZE ren_res = self.render(render_params, render_tex=True) if geofea_3d is not None: syn_img, cfea, geofea1 = torch.split(ren_res['syn_img'],[3, fea_3d.shape[-1], geofea_3d.shape[-1] ] ,dim=1) syn_depth = ren_res['syn_depth'] else: syn_img, cfea = torch.split(ren_res['syn_img'], [3, fea_3d.shape[-1] ] ,dim=1) geofea1=None syn_depth = ren_res['syn_depth'] cfea = cfea*0.1 # balance the learning rate with the scale 0.1 ## Crop and zoom images syn_image_crop = syn_img image_crop= F.grid_sample(image, grids) cfea_crop= cfea if geofea1 is not None and geofea_2d is not None: # geofea1_crop = F.grid_sample(geofea1, grids) geofea1_crop = geofea1 geofea2_crop = F.grid_sample(geofea_2d, grids) #Render again to get more accurate depth for supervisions in losses #TODO: could be merged into the rendering process above. -> Done if self.legacy:#self.training: depth_render_params=edict({ "K": intrinsics_crop.detach(), "camera_extrinsics": Ti.matrix().detach().squeeze(1), "obj_cls": obj_cls, "render_image_size": ZOOM_CROP_SIZE, }) syn_depth=self.renderer.render_depth(obj_cls, T=depth_render_params.camera_extrinsics, K=depth_render_params.K, render_image_size=depth_render_params.render_image_size, near=0.1, far=6) #for visualization only syn_imgs.append(syn_image_crop) syn_imgs.append(image_crop) # encode image features feats1, feats2=self.image_fea_enc(syn_image_crop, image_crop) # depths = torch.index_select(syn_depth, index=ii, dim=1) + EPS depths = syn_depth+EPS for i in range(cfg.ITER_COUNT): # save for loss calculation self.intrinsics_history.append(intrinsics_crop) syn_depths.append(syn_depth) Tij = Tij.copy(stop_gradients=True) intrinsics_crop = intrinsics_crop.detach() #Get the projection in frame j of visible model points in frame i with the current relative pose estimation Tij reproj_coords, vmask = Tij.transform( depths, intrinsics_crop, valid_mask=True) uniform_grids = coords_grid(depths) flow_init = torch.einsum( "...ijk->...kij", reproj_coords-uniform_grids[..., :2] ) * (depths>EPS) flow = self.cf_net(feats1, feats2, flow_init=flow_init.squeeze(1), context_fea=cfea_crop, update_corr_fn=i==0) self.flow_history.append(flow) # Get the correspondences in frame j for each point in frame i, based on the current flow estimates if isinstance (flow, (list, tuple)): # flow net may return a list of flow maps correspondence_target = torch.einsum("...ijk->...jki", flow[-1]) + uniform_grids[..., :2] else: correspondence_target = torch.einsum("...ijk->...jki", flow) + uniform_grids[..., :2] # Optimize for the pose by minimizing errors between the constructed correspondence field # (with the currently estimated pose) and the estimated correspondence field if self.with_corr_weight and geofea1 is not None and geofea_2d is not None: geofea2_crop_warpped = F.grid_sample(geofea2_crop, normalize_coords_grid(correspondence_target).squeeze(1) ) corr_weight= torch.sum(geofea1_crop*geofea2_crop_warpped, dim=1,keepdim=True).permute(0,2,3,1)[:,None] #insert frame axis corr_weight = torch.exp(-torch.abs(1-corr_weight)/self.sigma[0]) * (syn_depth>0)[...,None].float() else: corr_weight = weight Tij = Tij.reprojction_optim( correspondence_target, corr_weight, depths, intrinsics_crop, num_iters=cfg.OPTIM_ITER_COUNT ) reproj_coords, vmask1 = Tij.transform( depths, intrinsics_crop, valid_mask=True) # For the loss calculation later self.residual_pose_history.append(Tij) self.Ti_history.append(Ti.copy(stop_gradients=True) ) Tij_gt.append( (Tj_gt*Ti.inv()).copy(stop_gradients=True) ) self.residual_history.append( vmask*vmask1*(reproj_coords-correspondence_target)) # BxKxHxWx3 # The final update of Ti Ti = Tij*Ti return { "Tij": Tij, "Ti_pred": Ti, "intrinsics": intrinsics, "flow": self.flow_history[0], "vmask": syn_depth>0, "weight": torch.einsum("...ijk->...kij", corr_weight), "syn_depth": syn_depths, #ren_res['syn_depth'], "syn_img": syn_imgs+[image_crop, cfea_crop[:,:3]*10,geofea1[:,:3], geofea2_crop[:,:3]], "Tij_gt" : Tij_gt } def compute_loss(self, Tij_gts, depths, intrinsics, loss='l1', log_error=True, loss3d=None, ): total_loss = 0.0 for i in range(len(self.residual_pose_history)): intrinsics = self.intrinsics_history[i] depth, intrinsics = rescale_depths_and_intrinsics(depths[i], intrinsics, downscale=1) Tij = self.residual_pose_history[i] Gij = Tij_gts[i] # intrinsics_pred = intrinsics zstar = depth + EPS flow_pred, valid_mask_pred = Tij.induced_flow( zstar, intrinsics, valid_mask=True) flow_star, valid_mask_star = Gij.induced_flow( zstar, intrinsics, valid_mask=True) valid_mask = valid_mask_pred * valid_mask_star #3D alignment loss loss_3d_proj = 0 if loss3d is not None: Tj_pred=Tij*self.Ti_history[i] Tj_gt=Gij*self.Ti_history[i] loss_3d_proj = loss3d( R_pred=Tj_pred.G[:, 0, :3, :3], t_pred=Tj_pred.G[:, 0, :3, 3], R_tgt=Tj_gt.G[:, 0, :3, :3], t_tgt=Tj_gt.G[:, 0, :3, 3]) # flow loss if isinstance( self.flow_history[0], (list, tuple)): #squeeze the frame dimmension self.flow_history[i] = [self.flow_history[i][f].squeeze(1) for f in range(len(self.flow_history[i])) ] flow_mask = valid_mask.squeeze(1).squeeze(-1) loss_flow,_ = raft_sequence_flow_loss(self.flow_history[i], flow_gt=torch.einsum("...ijk->...kij", flow_star.squeeze(1)), valid= flow_mask, gamma=0.8, max_flow=MAX_FLOW) else: raise NotImplementedError # reprojection loss reproj_diff = valid_mask * \ torch.clamp( torch.abs(flow_pred - flow_star), -MAX_ERROR, MAX_ERROR) reproj_loss = torch.mean(reproj_diff) total_loss +=self.cfg.get("TRAIN_PCALIGN_WEIGHT", 1)*loss_3d_proj+ self.cfg.TRAIN_FLOW_WEIGHT* loss_flow + self.cfg.TRAIN_REPROJ_WEIGHT*reproj_loss # clear the intermediate values self._clear() return { "total_loss": total_loss, "reproj_loss": reproj_loss, "flow_loss":loss_flow, "loss_3d_proj": loss_3d_proj, "valid_mask": valid_mask, "Tij": Tj_pred.G, "Gij": Tj_gt.G } ================================================ FILE: model/RNNPose.py ================================================ # import time import torch from torch import nn from torch.nn import functional as F import torch.distributed as dist import apex import numpy as np import os from easydict import EasyDict as edict from transforms3d.euler import mat2euler, euler2mat, euler2quat, quat2euler from functools import partial from model.HybridNet import HybridDescNet,ContextFeatureNet from thirdparty.kpconv.lib.utils import square_distance from model.PoseRefiner import PoseRefiner from utils.pose_utils import pose_padding from geometry.transformation import SE3Sequence from geometry.diff_render_optim import DiffRendererWrapper from config.default import get_cfg from utils.util import dict_recursive_op # from model.RNNPose import register_posenet REGISTERED_NETWORK_CLASSES = {} def register_posenet(cls, name=None): global REGISTERED_NETWORK_CLASSES if name is None: name = cls.__name__ assert name not in REGISTERED_NETWORK_CLASSES, f"exist class: {REGISTERED_NETWORK_CLASSES}" REGISTERED_NETWORK_CLASSES[name] = cls return cls def get_posenet_class(name): global REGISTERED_NETWORK_CLASSES assert name in REGISTERED_NETWORK_CLASSES, f"available class: {REGISTERED_NETWORK_CLASSES}" return REGISTERED_NETWORK_CLASSES[name] @register_posenet class RNNPose(nn.Module): def __init__(self, criterions, opt, name="RNNPose", **kwargs): super().__init__() self.name = name self.opt = opt self.hybrid_desc_net=HybridDescNet(opt.descriptor_net) self.ctx_fea_net = ContextFeatureNet(opt.descriptor_net) self.ctx_fea = {} self.render_params = edict({ "width": opt.input_w, # 128, "height": opt.input_h, # 128, "gpu_id": opt.get('gpu_id', 0), "obj_seqs": opt.obj_seqs }) renderer, diff_renderer= self._render_init(self.render_params) self.diff_renderer = diff_renderer self.motion_net = PoseRefiner( opt.motion_net, bn_is_training=self.training, is_training=self.training, renderer=diff_renderer ) self.contrastive_loss = criterions.get( "metric_loss", None) self.pose_loss = criterions.get("pose_loss", None) self.register_buffer("global_step", torch.LongTensor(1).zero_()) def update_global_step(self): self.global_step += 1 def get_global_step(self): return int(self.global_step.cpu().numpy()[0]) def clear_global_step(self): self.global_step.zero_() def sample_poses(self, pose_tgt): SYN_STD_ROTATION = 15 SYN_STD_TRANSLATION = 0.01 ANGLE_MAX=45 pose_src = pose_tgt.copy() num = pose_tgt.shape[0] for i in range(num): euler = mat2euler(pose_tgt[i, :3, :3]) euler += SYN_STD_ROTATION * np.random.randn(3) * np.pi / 180.0 pose_src[i, :3, :3] = euler2mat(euler[0], euler[1], euler[2]) pose_src[i, 0, 3] = pose_tgt[i, 0, 3]+ SYN_STD_TRANSLATION * np.random.randn(1) pose_src[i, 1, 3] = pose_tgt[i, 1, 3] + SYN_STD_TRANSLATION * np.random.randn(1) pose_src[i, 2, 3] = pose_tgt[i, 2, 3] + 5 * SYN_STD_TRANSLATION * np.random.randn(1) r_dist = np.arccos((np.trace(pose_src[i, :3,:3].transpose(-1,-2) @ pose_tgt[i, :3,:3]) - 1 )/2)/np.pi*180 while r_dist > ANGLE_MAX:#or not (16 < center_x < (640 - 16) and 16 < center_y < (480 - 16)): print("r_dist > ANGLE_MAX, resampling...") euler = mat2euler(pose_tgt[i, :3, :3]) euler += SYN_STD_ROTATION * np.random.randn(3) * np.pi / 180.0 pose_src[i, :3, :3] = euler2mat(euler[0], euler[1], euler[2]) pose_src[i, 0, 3] = pose_tgt[i, 0, 3]+ SYN_STD_TRANSLATION * np.random.randn(1) pose_src[i, 1, 3] = pose_tgt[i, 1, 3] + SYN_STD_TRANSLATION * np.random.randn(1) pose_src[i, 2, 3] = pose_tgt[i, 2, 3] + 5 * SYN_STD_TRANSLATION * np.random.randn(1) r_dist = np.arccos((np.trace(pose_src[i, :3,:3].transpose(-1,-2) @ pose_tgt[i, :3,:3]) - 1 )/2)*np.pi/180 return pose_src def _render_init(self, config): # from data.ycb.basic import bop_ycb_class2idx print("config.gpu_id:", config.gpu_id) obj_paths = [] tex_paths = [] # build cls2idx table for the renderer cls2idx = {} LM_SEQ=["ape", "benchvise", "camera","cam", "can", "cat", "driller", "duck", "eggbox", "glue", "holepuncher", "iron", "lamp", "phone"] # YCB_SEQ=bop_ycb_class2idx.keys() for i, seq in enumerate(set(config.obj_seqs)): if seq in LM_SEQ: obj_path = f'{os.path.dirname(__file__)}/../EXPDATA/LM6d_converted/models/{seq}/textured.obj' tex_path = f'{os.path.dirname(__file__)}/../EXPDATA/LM6d_converted/models/{seq}/texture_map.png' assert os.path.exists(obj_path), f"'{obj_path}' dose not exist!" assert os.path.exists(tex_path), f"'{tex_path}' dose not exist!" obj_paths.append(obj_path) tex_paths.append(tex_path) cls2idx[seq] = i else: raise NotImplementedError renderer=None diff_renderer = DiffRendererWrapper(obj_paths) diff_renderer.cls2idx = cls2idx return renderer, diff_renderer def forward(self, sample): assert len(set(sample['class_name']))==1, "A batch should contain data of the same class." class_name = sample['class_name'][0] #encode 3d-2d descriptors preds_dict=self.hybrid_desc_net(sample) len_src_f = sample['stack_lengths'][0][0] geofea_3d = preds_dict.get('descriptors_3d', None) geofea_2d = preds_dict.get('descriptors_2d', None) #encode 3D context features if self.training: self.ctx_fea[class_name]=self.ctx_fea_net(sample) else: if class_name not in self.ctx_fea: self.ctx_fea[class_name]=self.ctx_fea_net(sample) preds_dict.update(self.ctx_fea[class_name]) ctx_fea_3d = preds_dict['ctx_fea_3d'][:len_src_f] if self.training: pose=pose_padding(sample['original_RT']) if sample.get("rendered_RT", None) is not None: syn_pose = pose_padding(sample['rendered_RT']) else: syn_pose = torch.tensor(self.sample_poses(sample['original_RT'].detach().cpu( ).numpy()), device=sample['original_RT'].device, dtype=sample['original_RT'].dtype) syn_pose = pose_padding(syn_pose) else: pose = pose_padding(sample['original_RT']) syn_pose = pose_padding(sample['rendered_RT']) # calculate the GT relative pose and the initial pose Ts_pred = SE3Sequence( matrix=torch.stack([syn_pose ], dim=1)) mot_res = self.motion_net( Ts=Ts_pred, intrinsics=sample['K'], image=sample['image'], fea_3d=ctx_fea_3d[None], Tj_gt=SE3Sequence(matrix=pose[:, None]), obj_cls=sample['class_name'], geofea_2d = geofea_2d, geofea_3d = geofea_3d[None] ) preds_dict.update(mot_res) sample['syn_depth'] = mot_res['syn_depth'] if self.training: ret = self.loss(sample, preds_dict) ret['syn_img'] = mot_res['syn_img'] ret['syn_depth'] = mot_res['syn_depth'] ret['flow'] = mot_res['flow'] ret['weight'] = mot_res['weight'] return ret else: # ret = self.loss(sample, preds_dict) ret={} ret.update(preds_dict) return ret def loss(self, sample, preds_dict): len_src_f = sample['stack_lengths'][0][0] RT =sample['RT'] camera_intrinsic=sample['K'] descriptors_2d_map = preds_dict['descriptors_2d'] descriptors_3d = preds_dict['descriptors_3d'][:len_src_f] rand_descriptors_3d = preds_dict['descriptors_3d'][len_src_f:] model_points=sample['model_points'][0][:len_src_f] orig_model_points = sample['original_model_points'] rand_model_points=sample['model_points'][0][len_src_f:] lifted_points=sample['lifted_points'][0].squeeze(0) correspondence=sample['correspondences_2d3d'].squeeze(0) depth = sample['depth'] # get the foreground 2d descriptors ys_, xs_=torch.nonzero(sample['depth'].squeeze(), as_tuple=True ) descriptors_2d=descriptors_2d_map[:,:,ys_,xs_].squeeze().permute([1,0]) if self.training: device= lifted_points.device fg_point_num = len(lifted_points) model_point_num = len(model_points) # append bg features ys_bg, xs_bg = torch.nonzero(sample['depth'].squeeze()<=0, as_tuple=True ) descriptors_2d_bg = descriptors_2d_map[:,:,ys_bg, xs_bg].squeeze().permute([1,0]) # descriptors_2d_bg = descriptors_2d_bg[np.random.randint(0,len(descriptors_2d_bg), size=len(lifted_points) )] descriptors_2d = torch.cat([descriptors_2d, descriptors_2d_bg], dim=0) descriptors_3d = torch.cat([descriptors_3d, descriptors_2d_bg], dim=0 ) # append handcrafted coordinates to simplify the code(assign the same coordinates for the bg points far away from the fg points, i.e. 10e6 ) lifted_points = torch.cat([lifted_points, torch.ones([len(descriptors_2d_bg) ,3],device=device )*10e6 ] ) #append very distant points model_points = torch.cat([model_points, torch.ones([len(descriptors_2d_bg) ,3], device=device)*10e6 ] ) #append very distant points #append one-to-one inds if len(descriptors_2d_bg)>0: # randomly sample the bg points to balance the learning process sample_inds=np.random.randint(0,len(descriptors_2d_bg), size=int(len(correspondence)*0.1) ) bg_corr= torch.stack([ torch.arange(fg_point_num, fg_point_num+len(descriptors_2d_bg), device=device )[sample_inds], torch.arange(model_point_num, model_point_num+len(descriptors_2d_bg), device=device)[sample_inds] ], dim=-1) #Nx2 correspondence = torch.cat([correspondence, bg_corr ], dim=0) if len(lifted_points)>0: contra_loss=self.contrastive_loss(src_pcd=lifted_points, tgt_pcd=model_points, src_feats=descriptors_2d, tgt_feats=descriptors_3d, correspondence=correspondence, scores_overlap=None, scores_saliency=None) else: print("Warning: Contrastive loss is skipped, as no lifted point is found!", flush=True) contra_loss={ 'circle_loss': torch.zeros([1], device=depth.device), 'recall': torch.zeros([1], device=depth.device) } loss3d = partial(self.pose_loss.forward, points=orig_model_points[None]) motion_loss = self.motion_net.compute_loss( preds_dict['Tij_gt'], sample['syn_depth'], intrinsics=sample['K'], loss='l1', log_error=True, loss3d=loss3d) loss = self.contrastive_loss.weight * contra_loss['circle_loss'] + motion_loss['total_loss'] res = { "loss": loss, "circle_loss": contra_loss['circle_loss'].detach(), "recall":contra_loss['recall'].detach(), # "geometric_loss": torch.zeros(1), "reproj_loss": motion_loss['reproj_loss'].detach(), "loss_3d_proj": motion_loss['loss_3d_proj'].detach(), "valid_mask": motion_loss["valid_mask"].detach(), } return res ================================================ FILE: model/descriptor2D.py ================================================ from easydict import EasyDict as edict from pathlib import Path import torch from torch import nn from torchplus.nn.modules.common import Empty class SuperPoint2D(nn.Module): """SuperPoint Convolutional Detector and Descriptor SuperPoint: Self-Supervised Interest Point Detection and Description. Daniel DeTone, Tomasz Malisiewicz, and Andrew Rabinovich. In CVPRW, 2019. https://arxiv.org/abs/1712.07629 """ default_config = { 'descriptor_dim': 256, 'nms_radius': 4, 'keypoint_threshold': 0.005, 'max_keypoints': -1, 'remove_borders': 4, 'saliency_score_normalization_fuc': 'sigmoid', "use_instance_norm": True } def __init__(self, config): super().__init__() self.default_config.update(config) self.config=edict(self.default_config) self.normalize_output=config.get('normalize_output', True) self.saliency_score_normalization_fuc= self.config.saliency_score_normalization_fuc assert self.saliency_score_normalization_fuc in ['sigmoid', 'softmax'] self.relu = nn.ReLU(inplace=True) self.pool = nn.MaxPool2d(kernel_size=2, stride=2) if self.config.use_instance_norm: self.Normalization=nn.InstanceNorm2d else: self.Normalization = Empty c1, c2, c3, c4, c5 = 64, 64, 128, 128, 256 self.input_dim=config.input_dim # self.conv1a = nn.Conv2d(1, c1, kernel_size=3, stride=1, padding=1) self.conv1a = nn.Conv2d(config.input_dim, c1, kernel_size=3, stride=1, padding=1) self.conv1b = nn.Conv2d(c1, c1, kernel_size=3, stride=1, padding=1) self.conv2a = nn.Conv2d(c1, c2, kernel_size=3, stride=1, padding=1) self.conv2b = nn.Conv2d(c2, c2, kernel_size=3, stride=1, padding=1) self.conv3a = nn.Conv2d(c2, c3, kernel_size=3, stride=1, padding=1) self.conv3b = nn.Conv2d(c3, c3, kernel_size=3, stride=1, padding=1) self.conv4a = nn.Conv2d(c3, c4, kernel_size=3, stride=1, padding=1) self.conv4b = nn.Conv2d(c4, c4, kernel_size=3, stride=1, padding=1) self.convPa = nn.Sequential( nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1), self.Normalization(c5) ) self.convPb = nn.Conv2d(c5, 1, kernel_size=1, stride=1, padding=0) self.convDa = nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1) self.convDb = nn.Conv2d( c5, self.config['descriptor_dim'], kernel_size=1, stride=1, padding=0) self.decode1=nn.Sequential( nn.Upsample(scale_factor=2,mode='bilinear'), nn.Conv2d(c4,c4, kernel_size=3, stride=1, padding=1), self.Normalization(c4), nn.ReLU()) self.decode2=nn.Sequential( nn.Upsample(scale_factor=2,mode='bilinear'), nn.Conv2d(c4+c3,c4, kernel_size=3, stride=1, padding=1), self.Normalization(c4), nn.ReLU() ) self.decode3=nn.Sequential( nn.Upsample(scale_factor=2,mode='bilinear'), nn.Conv2d(c4+c2,c4, kernel_size=3, stride=1, padding=1), self.Normalization(c4), nn.ReLU() ) path = Path(__file__).parent.parent/ 'weights/superpoint_v1.pth' self.load_state_dict(torch.load(str(path), map_location='cpu' ), strict=False) mk = self.config['max_keypoints'] if mk == 0 or mk < -1: raise ValueError('\"max_keypoints\" must be positive or \"-1\"') print('Loaded SuperPoint model') def load_state_dict(self,state_dict, strict=True): if not strict: updated_state_dict = {} model_dict = self.state_dict() for k, v in state_dict.items(): if k in model_dict and v.shape == model_dict[k].shape: updated_state_dict[k] = v else: updated_state_dict = state_dict super().load_state_dict(updated_state_dict, strict) def forward_encoder(self, x): if self.input_dim==1: x=x.mean(dim=1, keepdims=True) # x_skip=[] # Shared Encoder x = self.relu(self.conv1a(x)) x = self.relu(self.conv1b(x)) x_skip.append(x) x = self.pool(x) x = self.relu(self.conv2a(x)) x = self.relu(self.conv2b(x)) x_skip.append(x) x = self.pool(x) x = self.relu(self.conv3a(x)) x = self.relu(self.conv3b(x)) x_skip.append(x) x = self.pool(x) x = self.relu(self.conv4a(x)) x = self.relu(self.conv4b(x)) return x, x_skip def forward_decoder(self, x, x_skip): #upsample first x = self.decode1(x) x=torch.cat([x, x_skip[-1]], dim=1) x = self.decode2(x) x=torch.cat([x, x_skip[-2]], dim=1) x = self.decode3(x) # Compute the dense keypoint scores cPa = self.relu(self.convPa(x)) scores = self.convPb(cPa) if self.saliency_score_normalization_fuc == 'sigmoid': scores = nn.functional.sigmoid(scores) elif self.saliency_score_normalization_fuc == 'softmax': scores_shape = scores.shape scores = scores.reshape(*scores.shape[0:2], -1) scores = nn.functional.softmax(scores/1, dim=-1) scores= scores.reshape(*scores_shape)#.clone() else: raise ValueError keypoints=None # Compute the dense descriptors cDa = self.relu(self.convDa(x)) descriptors = self.convDb(cDa) if self.normalize_output: descriptors = torch.nn.functional.normalize(descriptors, p=2, dim=1) return keypoints, scores, descriptors def forward(self, data): """ Compute keypoints, scores, descriptors for image """ # Shared Encoder x, x_skip=self.forward_encoder(data) # Compute the dense keypoint scores keypoints, scores, descriptors = self.forward_decoder(x, x_skip) return { 'keypoints': keypoints, 'scores': scores, 'descriptors': descriptors, } ================================================ FILE: model/descriptor3D.py ================================================ import torch import torch.nn as nn from kpconv.kpconv_blocks import * import torch.nn.functional as F import numpy as np from kpconv.lib.utils import square_distance class KPSuperpoint3Dv2(nn.Module): #remove useless channels def __init__(self, config): super().__init__() self.normalize_output=config.get('normalize_output', True) # build the architectures config.architecture = [ 'simple', 'resnetb', ] for i in range(config.num_layers-1): config.architecture.append('resnetb_strided') config.architecture.append('resnetb') config.architecture.append('resnetb') for i in range(config.num_layers-2): config.architecture.append('nearest_upsample') config.architecture.append('unary') config.architecture.append('nearest_upsample') config.architecture.append('last_unary') ############ # Parameters ############ # Current radius of convolution and feature dimension layer = 0 r = config.first_subsampling_dl * config.conv_radius in_dim = config.in_features_dim out_dim = config.first_feats_dim self.K = config.num_kernel_points self.epsilon = torch.nn.Parameter(torch.tensor(-5.0)) self.final_feats_dim = config.final_feats_dim ##################### # List Encoder blocks ##################### # Save all block operations in a list of modules self.encoder_blocks = nn.ModuleList() self.encoder_skip_dims = [] self.encoder_skips = [] # Loop over consecutive blocks for block_i, block in enumerate(config.architecture): # Check equivariance if ('equivariant' in block) and (not out_dim % 3 == 0): raise ValueError('Equivariant block but features dimension is not a factor of 3') # Detect change to next layer for skip connection if np.any([tmp in block for tmp in ['pool', 'strided', 'upsample', 'global']]): self.encoder_skips.append(block_i) self.encoder_skip_dims.append(in_dim) # Detect upsampling block to stop if 'upsample' in block: break # Apply the good block function defining tf ops self.encoder_blocks.append(block_decider(block, r, in_dim, out_dim, layer, config)) # Update dimension of input from output if 'simple' in block: in_dim = out_dim // 2 else: in_dim = out_dim # Detect change to a subsampled layer if 'pool' in block or 'strided' in block: # Update radius and feature dimension for next layer layer += 1 r *= 2 out_dim *= 2 ##################### # bottleneck layer ##################### botneck_feats_dim = config.gnn_feats_dim self.bottle = nn.Conv1d(in_dim, botneck_feats_dim,kernel_size=1,bias=True) # num_head = config.num_head self.proj_gnn = nn.Conv1d(botneck_feats_dim,botneck_feats_dim,kernel_size=1, bias=True) ##################### # List Decoder blocks ##################### out_dim = botneck_feats_dim # + 2 # Save all block operations in a list of modules self.decoder_blocks = nn.ModuleList() self.decoder_concats = [] # Find first upsampling block start_i = 0 for block_i, block in enumerate(config.architecture): if 'upsample' in block: start_i = block_i break # Loop over consecutive blocks for block_i, block in enumerate(config.architecture[start_i:]): # Add dimension of skip connection concat if block_i > 0 and 'upsample' in config.architecture[start_i + block_i - 1]: in_dim += self.encoder_skip_dims[layer] self.decoder_concats.append(block_i) # Apply the good block function defining tf ops self.decoder_blocks.append(block_decider(block, r, in_dim, out_dim, layer, config)) # Update dimension of input from output in_dim = out_dim # Detect change to a subsampled layer if 'upsample' in block: # Update radius and feature dimension for next layer layer -= 1 r *= 0.5 out_dim = out_dim // 2 return def regular_score(self,score): score = torch.where(torch.isnan(score), torch.zeros_like(score), score) score = torch.where(torch.isinf(score), torch.zeros_like(score), score) return score def forward_encoder(self, batch): # Get input features x = batch['features'].clone().detach() len_src_c = batch['stack_lengths'][-1][0] len_src_f = batch['stack_lengths'][0][0] pcd_c = batch['points'][-1] pcd_f = batch['points'][0] src_pcd_c, tgt_pcd_c = pcd_c[:len_src_c], pcd_c[len_src_c:] sigmoid = nn.Sigmoid() ################################# # 1. encoder skip_x = [] for block_i, block_op in enumerate(self.encoder_blocks): if block_i in self.encoder_skips: skip_x.append(x) x = block_op(x, batch) ################################# # 2. project the bottleneck features feats_c = x.transpose(0,1).unsqueeze(0) #[1, C, N] feats_c = self.bottle(feats_c) #[1, C, N] return feats_c,skip_x def forward_middle(self, x): feats_c = self.proj_gnn(x) feats_gnn_raw = feats_c.squeeze(0).transpose(0,1) return feats_gnn_raw def forward_decoder(self, x, skip_x, batch ): sigmoid = nn.Sigmoid() for block_i, block_op in enumerate(self.decoder_blocks): if block_i in self.decoder_concats: x = torch.cat([x, skip_x.pop()], dim=1) x = block_op(x, batch) feats_f = x[:,:self.final_feats_dim] # normalise point-wise features if self.normalize_output: feats_f = F.normalize(feats_f, p=2, dim=1) return feats_f def forward(self, batch): # Get input features feats_c,skip_x = self.forward_encoder(batch) x = self.forward_middle(feats_c) feats_f = self.forward_decoder(x, skip_x, batch) return feats_f ================================================ FILE: model/losses.py ================================================ from sklearn.metrics import precision_recall_fscore_support from thirdparty.kpconv.lib.utils import square_distance from abc import ABCMeta, abstractmethod import time import numpy as np import torch from torch import nn from torch.autograd import Variable from torch.nn import functional as F import torchplus # from utils.pca import pca_tch import kornia # import self_voxelo.utils.pose_utils as pose_utils import apex.amp as amp # class Loss(object): class Loss(nn.Module): """Abstract base class for loss functions.""" __metaclass__ = ABCMeta def __init__(self, loss_weight=1): super(Loss, self).__init__() self._loss_weight = loss_weight # def __call__(self, def forward(self, prediction_tensor, target_tensor, ignore_nan_targets=False, scope=None, **params): """Call the loss function. Args: prediction_tensor: an N-d tensor of shape [batch, anchors, ...] representing predicted quantities. target_tensor: an N-d tensor of shape [batch, anchors, ...] representing regression or classification targets. ignore_nan_targets: whether to ignore nan targets in the loss computation. E.g. can be used if the target tensor is missing groundtruth data that shouldn't be factored into the loss. scope: Op scope name. Defaults to 'Loss' if None. **params: Additional keyword arguments for specific implementations of the Loss. Returns: loss: a tensor representing the value of the loss function. """ if ignore_nan_targets: target_tensor = torch.where(torch.isnan(target_tensor), prediction_tensor, target_tensor) # ret = self._compute_loss(prediction_tensor, target_tensor, **params) # if isinstance(ret, (list, tuple)): # return [self._loss_weight*ret[0]] + list(ret[1:]) # else: return self._loss_weight*self._compute_loss(prediction_tensor, target_tensor, **params) @abstractmethod @amp.float_function def _compute_loss(self, prediction_tensor, target_tensor, **params): """Method to be overridden by implementations. Args: prediction_tensor: a tensor representing predicted quantities target_tensor: a tensor representing regression or classification targets **params: Additional keyword arguments for specific implementations of the Loss. Returns: loss: an N-d tensor of shape [batch, anchors, ...] containing the loss per anchor """ raise NotImplementedError class L2Loss(Loss): def __init__(self, loss_weight=1): super(L2Loss, self).__init__(loss_weight) def _compute_loss(self, prediction_tensor, target_tensor, mask=None): """Compute loss function. Args: prediction_tensor: A float tensor of shape [batch_size, num_anchors, code_size] representing the (encoded) predicted locations of objects. target_tensor: A float tensor of shape [batch_size, num_anchors, code_size] representing the regression targets Returns: loss: a float tensor of shape [batch_size, num_anchors] tensor representing the value of the loss function. """ diff = prediction_tensor - target_tensor if mask is not None: mask = mask.expand_as(diff).byte() diff = diff[mask] # square_diff = 0.5 * weighted_diff * weighted_diff square_diff = diff * diff return square_diff.mean() class AdaptiveWeightedL2Loss(Loss): def __init__(self, init_alpha, learn_alpha=True, loss_weight=1, focal_gamma=0): super(AdaptiveWeightedL2Loss, self).__init__(loss_weight) self.learn_alpha = learn_alpha self.alpha = nn.Parameter(torch.Tensor( [init_alpha]), requires_grad=learn_alpha) self.focal_gamma = focal_gamma # self.alpha_shift = -13 # -10# TODO: temporarily test def _compute_loss(self, prediction_tensor, target_tensor, mask=None, alpha=None, focal_gamma=None): """Compute loss function. Args: prediction_tensor: A float tensor of shape [batch_size, num_anchors, code_size] representing the (encoded) predicted locations of objects. target_tensor: A float tensor of shape [batch_size, num_anchors, code_size] representing the regression targets Returns: loss: a float tensor of shape [batch_size, num_anchors] tensor representing the value of the loss function. """ if focal_gamma is None: focal_gamma = self.focal_gamma _alpha = self.alpha if mask is None: mask = torch.ones_like(target_tensor) else: mask = mask.expand_as(target_tensor) diff = prediction_tensor - target_tensor square_diff = (diff * diff) * mask # loss = square_diff.mean() input_shape = prediction_tensor.shape loss = torch.sum(square_diff, dim=list(range(1, len(input_shape)))) / \ (torch.sum(mask, dim=list(range(1, len(input_shape)))) + 1e-12) # (B,) focal_weight = (torch.exp(-_alpha) * loss)**focal_gamma focal_weight = focal_weight/(torch.sum(focal_weight) + 1e-12) loss = focal_weight*(torch.exp(-_alpha) * loss) loss = loss.sum() + _alpha return loss class MetricLoss(nn.Module): """ We evaluate both contrastive loss and circle loss """ def __init__(self, configs, log_scale=16, pos_optimal=0.1, neg_optimal=1.4, ): super(MetricLoss, self).__init__() self.log_scale = log_scale self.pos_optimal = pos_optimal self.neg_optimal = neg_optimal self.pos_margin = configs.pos_margin self.neg_margin = configs.neg_margin self.max_points = configs.max_points self.safe_radius = configs.safe_radius self.matchability_radius = configs.matchability_radius # just to take care of the numeric precision self.pos_radius = configs.pos_radius + 0.001 self.weight = configs.get('loss_weight', 1) def get_circle_loss(self, coords_dist, feats_dist): """ Modified from: https://github.com/XuyangBai/D3Feat.pytorch """ pos_mask = coords_dist < self.pos_radius neg_mask = coords_dist > self.safe_radius # get anchors that have both positive and negative pairs row_sel = ((pos_mask.sum(-1) > 0) * (neg_mask.sum(-1) > 0)).detach() col_sel = ((pos_mask.sum(-2) > 0) * (neg_mask.sum(-2) > 0)).detach() # get alpha for both positive and negative pairs pos_weight = feats_dist - 1e5 * \ (~pos_mask).float() # mask the non-positive # mask the uninformative positive pos_weight = (pos_weight - self.pos_optimal) pos_weight = torch.max(torch.zeros_like( pos_weight), pos_weight).detach() neg_weight = feats_dist + 1e5 * \ (~neg_mask).float() # mask the non-negative # mask the uninformative negative neg_weight = (self.neg_optimal - neg_weight) neg_weight = torch.max(torch.zeros_like( neg_weight), neg_weight).detach() lse_pos_row = torch.logsumexp( self.log_scale * (feats_dist - self.pos_margin) * pos_weight, dim=-1) lse_pos_col = torch.logsumexp( self.log_scale * (feats_dist - self.pos_margin) * pos_weight, dim=-2) lse_neg_row = torch.logsumexp( self.log_scale * (self.neg_margin - feats_dist) * neg_weight, dim=-1) lse_neg_col = torch.logsumexp( self.log_scale * (self.neg_margin - feats_dist) * neg_weight, dim=-2) loss_row = F.softplus(lse_pos_row + lse_neg_row)/self.log_scale loss_col = F.softplus(lse_pos_col + lse_neg_col)/self.log_scale circle_loss = (loss_row[row_sel].mean() + loss_col[col_sel].mean()) / 2 return circle_loss def get_recall(self, coords_dist, feats_dist): """ Get feature match recall, divided by number of true inliers """ pos_mask = coords_dist < self.pos_radius n_gt_pos = (pos_mask.sum(-1) > 0).float().sum()+1e-12 _, sel_idx = torch.min(feats_dist, -1) sel_dist = torch.gather(coords_dist, dim=-1, index=sel_idx[:, None])[pos_mask.sum(-1) > 0] n_pred_pos = (sel_dist < self.pos_radius).float().sum() recall = n_pred_pos / n_gt_pos return recall def get_weighted_bce_loss(self, prediction, gt): loss = nn.BCELoss(reduction='none') class_loss = loss(prediction, gt) weights = torch.ones_like(gt) w_negative = gt.sum()/gt.size(0) w_positive = 1 - w_negative weights[gt >= 0.5] = w_positive weights[gt < 0.5] = w_negative w_class_loss = torch.mean(weights * class_loss) ####################################### # get classification precision and recall predicted_labels = prediction.detach().cpu().round().numpy() cls_precision, cls_recall, _, _ = precision_recall_fscore_support( gt.cpu().numpy(), predicted_labels, average='binary') return w_class_loss, cls_precision, cls_recall def forward(self, src_pcd, tgt_pcd, src_feats, tgt_feats, correspondence, scores_overlap, scores_saliency, rot=None, trans=None): """ Circle loss for metric learning, here we feed the positive pairs only Input: src_pcd: [N, 3], pcd of the 3d model tgt_pcd: [M, 3], pcd of the lifted model from 2d depth rot: [3, 3], rotation used to rotate the src_pcd to the current frame trans: [3, 1], translation used to translate the src_pcd to the current frame src_feats: [N, C] tgt_feats: [M, C] """ if rot is not None and trans is not None: src_pcd = (torch.matmul(rot, src_pcd.transpose(0, 1)) + trans).transpose(0, 1) stats = dict() ####################################### # filter some of correspondence if(correspondence.size(0) > self.max_points): choice = np.random.permutation(correspondence.size(0))[ :self.max_points] correspondence = correspondence[choice] src_idx = correspondence[:, 0] tgt_idx = correspondence[:, 1] src_pcd, tgt_pcd = src_pcd[src_idx], tgt_pcd[tgt_idx] src_feats, tgt_feats = src_feats[src_idx], tgt_feats[tgt_idx] ####################### # get L2 distance between source / target point cloud coords_dist = torch.sqrt(square_distance( src_pcd[None, :, :], tgt_pcd[None, :, :]).squeeze(0)) feats_dist = torch.sqrt(square_distance( src_feats[None, :, :], tgt_feats[None, :, :], normalised=True)).squeeze(0) ############################## # get FMR and circle loss ############################## recall = self.get_recall(coords_dist, feats_dist) circle_loss = self.get_circle_loss(coords_dist, feats_dist) stats['circle_loss'] = circle_loss stats['recall'] = recall return stats class PointAlignmentLoss(nn.Module): def __init__(self, loss_weight=1, ): super().__init__() self._loss_weight = loss_weight def forward(self, R_pred, t_pred, R_tgt, t_tgt, points): return self._loss_weight*self._compute_loss(R_pred, t_pred, R_tgt, t_tgt, points) def _compute_loss(self, R_pred, t_pred, R_tgt, t_tgt, points, ): """[summary] Args: R_pred ([type]): [Bx3x3] t_pred ([type]): [Bx3] R_tgt ([type]): [Bx3x3] t_tgt ([type]): [Bx3] points ([type]): [BxNx3] Returns: loss [type]: [loss value] """ loss = 0 for b in range(len(points)): diff = points[b]@R_pred[b].transpose(-1, -2) + t_pred[b] - ( points[b]@R_tgt[b].transpose(-1, -2)+t_tgt[b]) square_diff = diff.abs() loss += torch.mean(square_diff)*3 # loss/=len(points) return loss ================================================ FILE: scripts/compile_3rdparty.sh ================================================ #!/usr/bin/bash SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" cd $SCRIPT_DIR/../thirdparty/kpconv/cpp_wrappers bash compile_wrappers.sh cd $SCRIPT_DIR/../thirdparty/nn python setup.py build_ext --inplace ================================================ FILE: scripts/eval.sh ================================================ export PROJECT_ROOT_PATH=/home/RNNPose/Projects/Works/RNNPose_release export PYTHONPATH="$PROJECT_ROOT_PATH:$PYTHONPATH" export PYTHONPATH="$PROJECT_ROOT_PATH/thirdparty:$PYTHONPATH" export model_dir='outputs' seq=cat gpu=1 start_gpu_id=0 mkdir $model_dir train_file=$PROJECT_ROOT_PATH/tools/eval.py config_path=/$PROJECT_ROOT_PATH/config/linemod/"$seq"_fw0.5.yml pretrain=$PROJECT_ROOT_PATH/weights/trained_models/"$seq".tckpt python -u $train_file multi_proc_train \ --config_path $config_path \ --model_dir $model_dir/results \ --use_dist True \ --dist_port 10000 \ --gpus_per_node $gpu \ --optim_eval True \ --use_apex False \ --world_size $gpu \ --start_gpu_id $start_gpu_id \ --pretrained_path $pretrain ================================================ FILE: scripts/eval_lmocc.sh ================================================ export PROJECT_ROOT_PATH=/home/RNNPose/Projects/Works/RNNPose_release export PYTHONPATH="$PROJECT_ROOT_PATH:$PYTHONPATH" export PYTHONPATH="$PROJECT_ROOT_PATH/thirdparty:$PYTHONPATH" export model_dir='outputs' seq=ape gpu=1 start_gpu_id=0 mkdir $model_dir train_file=$PROJECT_ROOT_PATH/tools/eval.py config_path=/$PROJECT_ROOT_PATH/config/linemod/"$seq"_fw0.5_occ.yml pretrain=$PROJECT_ROOT_PATH/weights/trained_models/"$seq".tckpt python -u $train_file multi_proc_train \ --config_path $config_path \ --model_dir $model_dir/results \ --use_dist True \ --dist_port 10000 \ --gpus_per_node $gpu \ --optim_eval True \ --use_apex False \ --world_size $gpu \ --start_gpu_id $start_gpu_id \ --pretrained_path $pretrain # --use_apex True \ ================================================ FILE: scripts/run_dataformatter.sh ================================================ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" PROJ_ROOT=$SCRIPT_DIR/../ python $PROJ_ROOT/tools/transform_data_format.py run --data_type "LM_FUSE_PVNET" --data_info_path $PROJ_ROOT/EXPDATA/"/data_info/linemod_all_10k_default.info.all" --image_root $PROJ_ROOT/EXPDATA/raw_data/fuse --depth_root $PROJ_ROOT/EXPDATA/raw_data/orig_renders --save_dir $PROJ_ROOT/EXPDATA/LINEMOD/fuse_formatted && touch 3.finished ================================================ FILE: scripts/run_datainfo_generation.sh ================================================ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" PROJ_ROOT=$SCRIPT_DIR/../ export PYTHONPATH=$PROJ_ROOT:PYTHONPATH mkdir --parent $PROJ_ROOT/EXPDATA/data_info/deepim/ python $PROJ_ROOT/tools/generate_data_info_deepim_0_orig.py create_data_info --data_root $PROJ_ROOT/EXPDATA/LM6d_converted/LM6d_refine --saving_path $PROJ_ROOT/EXPDATA/data_info/deepim/linemod_orig_deepim.info python $PROJ_ROOT/tools/generate_data_info_deepim_1_syn.py create_data_info --data_root $PROJ_ROOT/EXPDATA/LM6d_converted/LM6d_refine_syn --saving_path $PROJ_ROOT/EXPDATA/data_info/deepim/linemod_syn_deepim.info --with_assertion True python $PROJ_ROOT/tools/generate_data_info_deepim_2_posecnnval.py create_data_info --data_root $PROJ_ROOT/EXPDATA/LM6d_converted/LM6d_refine --saving_path $PROJ_ROOT/EXPDATA/data_info/linemod_posecnn.info --with_assertion True python $PROJ_ROOT/tools/generate_data_info_v2_deepim.py create_data_info --data_root $PROJ_ROOT/EXPDATA/LINEMOD/fuse_formatted/ --saving_path $PROJ_ROOT/EXPDATA/data_info/linemod_fusesformatted_all10k_deepim.info --training_data_ratio 1 --shuffle=False ================================================ FILE: scripts/train.sh ================================================ export PROJECT_ROOT_PATH=/home/RNNPose/Projects/Works/RNNPose_release export PYTHONPATH="$PROJECT_ROOT_PATH:$PYTHONPATH" export PYTHONPATH="$PROJECT_ROOT_PATH/thirdparty:$PYTHONPATH" export model_dir='outputs' seq=cat gpu=1 start_gpu_id=0 mkdir $model_dir train_file=$PROJECT_ROOT_PATH/tools/train.py config_path=/$PROJECT_ROOT_PATH/config/linemod/"$seq"_fw0.5.yml # pretrain=$PROJECT_ROOT_PATH/weights/trained_models/"$seq".tckpt python -u $train_file multi_proc_train \ --config_path $config_path \ --model_dir $model_dir/results \ --use_dist True \ --dist_port 10000 \ --gpus_per_node $gpu \ --optim_eval True \ --use_apex False \ --world_size $gpu \ --start_gpu_id $start_gpu_id \ # --pretrained_path $pretrain ================================================ FILE: thirdparty/kpconv/__init__.py ================================================ ================================================ FILE: thirdparty/kpconv/cpp_wrappers/compile_wrappers.sh ================================================ #!/bin/bash # Compile cpp subsampling cd cpp_subsampling python3 setup.py build_ext --inplace cd .. # Compile cpp neighbors cd cpp_neighbors python3 setup.py build_ext --inplace cd .. ================================================ FILE: thirdparty/kpconv/cpp_wrappers/cpp_neighbors/build.bat ================================================ @echo off py setup.py build_ext --inplace pause ================================================ FILE: thirdparty/kpconv/cpp_wrappers/cpp_neighbors/neighbors/neighbors.cpp ================================================ #include "neighbors.h" void brute_neighbors(vector& queries, vector& supports, vector& neighbors_indices, float radius, int verbose) { // Initialize variables // ****************** // square radius float r2 = radius * radius; // indices int i0 = 0; // Counting vector int max_count = 0; vector> tmp(queries.size()); // Search neigbors indices // *********************** for (auto& p0 : queries) { int i = 0; for (auto& p : supports) { if ((p0 - p).sq_norm() < r2) { tmp[i0].push_back(i); if (tmp[i0].size() > max_count) max_count = tmp[i0].size(); } i++; } i0++; } // Reserve the memory neighbors_indices.resize(queries.size() * max_count); i0 = 0; for (auto& inds : tmp) { for (int j = 0; j < max_count; j++) { if (j < inds.size()) neighbors_indices[i0 * max_count + j] = inds[j]; else neighbors_indices[i0 * max_count + j] = -1; } i0++; } return; } void ordered_neighbors(vector& queries, vector& supports, vector& neighbors_indices, float radius) { // Initialize variables // ****************** // square radius float r2 = radius * radius; // indices int i0 = 0; // Counting vector int max_count = 0; float d2; vector> tmp(queries.size()); vector> dists(queries.size()); // Search neigbors indices // *********************** for (auto& p0 : queries) { int i = 0; for (auto& p : supports) { d2 = (p0 - p).sq_norm(); if (d2 < r2) { // Find order of the new point auto it = std::upper_bound(dists[i0].begin(), dists[i0].end(), d2); int index = std::distance(dists[i0].begin(), it); // Insert element dists[i0].insert(it, d2); tmp[i0].insert(tmp[i0].begin() + index, i); // Update max count if (tmp[i0].size() > max_count) max_count = tmp[i0].size(); } i++; } i0++; } // Reserve the memory neighbors_indices.resize(queries.size() * max_count); i0 = 0; for (auto& inds : tmp) { for (int j = 0; j < max_count; j++) { if (j < inds.size()) neighbors_indices[i0 * max_count + j] = inds[j]; else neighbors_indices[i0 * max_count + j] = -1; } i0++; } return; } void batch_ordered_neighbors(vector& queries, vector& supports, vector& q_batches, vector& s_batches, vector& neighbors_indices, float radius) { // Initialize variables // ****************** // square radius float r2 = radius * radius; // indices int i0 = 0; // Counting vector int max_count = 0; float d2; vector> tmp(queries.size()); vector> dists(queries.size()); // batch index int b = 0; int sum_qb = 0; int sum_sb = 0; // Search neigbors indices // *********************** for (auto& p0 : queries) { // Check if we changed batch if (i0 == sum_qb + q_batches[b]) { sum_qb += q_batches[b]; sum_sb += s_batches[b]; b++; } // Loop only over the supports of current batch vector::iterator p_it; int i = 0; for(p_it = supports.begin() + sum_sb; p_it < supports.begin() + sum_sb + s_batches[b]; p_it++ ) { d2 = (p0 - *p_it).sq_norm(); if (d2 < r2) { // Find order of the new point auto it = std::upper_bound(dists[i0].begin(), dists[i0].end(), d2); int index = std::distance(dists[i0].begin(), it); // Insert element dists[i0].insert(it, d2); tmp[i0].insert(tmp[i0].begin() + index, sum_sb + i); // Update max count if (tmp[i0].size() > max_count) max_count = tmp[i0].size(); } i++; } i0++; } // Reserve the memory neighbors_indices.resize(queries.size() * max_count); i0 = 0; for (auto& inds : tmp) { for (int j = 0; j < max_count; j++) { if (j < inds.size()) neighbors_indices[i0 * max_count + j] = inds[j]; else neighbors_indices[i0 * max_count + j] = supports.size(); } i0++; } return; } void batch_nanoflann_neighbors(vector& queries, vector& supports, vector& q_batches, vector& s_batches, vector& neighbors_indices, float radius) { // Initialize variables // ****************** // indices int i0 = 0; // Square radius float r2 = radius * radius; // Counting vector int max_count = 0; float d2; vector>> all_inds_dists(queries.size()); // batch index int b = 0; int sum_qb = 0; int sum_sb = 0; // Nanoflann related variables // *************************** // CLoud variable PointCloud current_cloud; // Tree parameters nanoflann::KDTreeSingleIndexAdaptorParams tree_params(10 /* max leaf */); // KDTree type definition typedef nanoflann::KDTreeSingleIndexAdaptor< nanoflann::L2_Simple_Adaptor , PointCloud, 3 > my_kd_tree_t; // Pointer to trees my_kd_tree_t* index; // Build KDTree for the first batch element current_cloud.pts = vector(supports.begin() + sum_sb, supports.begin() + sum_sb + s_batches[b]); index = new my_kd_tree_t(3, current_cloud, tree_params); index->buildIndex(); // Search neigbors indices // *********************** // Search params nanoflann::SearchParams search_params; search_params.sorted = true; for (auto& p0 : queries) { // Check if we changed batch if (i0 == sum_qb + q_batches[b]) { sum_qb += q_batches[b]; sum_sb += s_batches[b]; b++; // Change the points current_cloud.pts.clear(); current_cloud.pts = vector(supports.begin() + sum_sb, supports.begin() + sum_sb + s_batches[b]); // Build KDTree of the current element of the batch delete index; index = new my_kd_tree_t(3, current_cloud, tree_params); index->buildIndex(); } // Initial guess of neighbors size all_inds_dists[i0].reserve(max_count); // Find neighbors float query_pt[3] = { p0.x, p0.y, p0.z}; size_t nMatches = index->radiusSearch(query_pt, r2, all_inds_dists[i0], search_params); // Update max count if (nMatches > max_count) max_count = nMatches; // Increment query idx i0++; } // Reserve the memory neighbors_indices.resize(queries.size() * max_count); i0 = 0; sum_sb = 0; sum_qb = 0; b = 0; for (auto& inds_dists : all_inds_dists) { // Check if we changed batch if (i0 == sum_qb + q_batches[b]) { sum_qb += q_batches[b]; sum_sb += s_batches[b]; b++; } for (int j = 0; j < max_count; j++) { if (j < inds_dists.size()) neighbors_indices[i0 * max_count + j] = inds_dists[j].first + sum_sb; else neighbors_indices[i0 * max_count + j] = supports.size(); } i0++; } delete index; return; } ================================================ FILE: thirdparty/kpconv/cpp_wrappers/cpp_neighbors/neighbors/neighbors.h ================================================ #include "../../cpp_utils/cloud/cloud.h" #include "../../cpp_utils/nanoflann/nanoflann.hpp" #include #include using namespace std; void ordered_neighbors(vector& queries, vector& supports, vector& neighbors_indices, float radius); void batch_ordered_neighbors(vector& queries, vector& supports, vector& q_batches, vector& s_batches, vector& neighbors_indices, float radius); void batch_nanoflann_neighbors(vector& queries, vector& supports, vector& q_batches, vector& s_batches, vector& neighbors_indices, float radius); ================================================ FILE: thirdparty/kpconv/cpp_wrappers/cpp_neighbors/setup.py ================================================ from distutils.core import setup, Extension import numpy.distutils.misc_util # Adding OpenCV to project # ************************ # Adding sources of the project # ***************************** SOURCES = ["../cpp_utils/cloud/cloud.cpp", "neighbors/neighbors.cpp", "wrapper.cpp"] module = Extension(name="radius_neighbors", sources=SOURCES, extra_compile_args=['-std=c++11', '-D_GLIBCXX_USE_CXX11_ABI=0']) setup(ext_modules=[module], include_dirs=numpy.distutils.misc_util.get_numpy_include_dirs()) ================================================ FILE: thirdparty/kpconv/cpp_wrappers/cpp_neighbors/wrapper.cpp ================================================ #include #include #include "neighbors/neighbors.h" #include // docstrings for our module // ************************* static char module_docstring[] = "This module provides two methods to compute radius neighbors from pointclouds or batch of pointclouds"; static char batch_query_docstring[] = "Method to get radius neighbors in a batch of stacked pointclouds"; // Declare the functions // ********************* static PyObject *batch_neighbors(PyObject *self, PyObject *args, PyObject *keywds); // Specify the members of the module // ********************************* static PyMethodDef module_methods[] = { { "batch_query", (PyCFunction)batch_neighbors, METH_VARARGS | METH_KEYWORDS, batch_query_docstring }, {NULL, NULL, 0, NULL} }; // Initialize the module // ********************* static struct PyModuleDef moduledef = { PyModuleDef_HEAD_INIT, "radius_neighbors", // m_name module_docstring, // m_doc -1, // m_size module_methods, // m_methods NULL, // m_reload NULL, // m_traverse NULL, // m_clear NULL, // m_free }; PyMODINIT_FUNC PyInit_radius_neighbors(void) { import_array(); return PyModule_Create(&moduledef); } // Definition of the batch_subsample method // ********************************** static PyObject* batch_neighbors(PyObject* self, PyObject* args, PyObject* keywds) { // Manage inputs // ************* // Args containers PyObject* queries_obj = NULL; PyObject* supports_obj = NULL; PyObject* q_batches_obj = NULL; PyObject* s_batches_obj = NULL; // Keywords containers static char* kwlist[] = { "queries", "supports", "q_batches", "s_batches", "radius", NULL }; float radius = 0.1; // Parse the input if (!PyArg_ParseTupleAndKeywords(args, keywds, "OOOO|$f", kwlist, &queries_obj, &supports_obj, &q_batches_obj, &s_batches_obj, &radius)) { PyErr_SetString(PyExc_RuntimeError, "Error parsing arguments"); return NULL; } // Interpret the input objects as numpy arrays. PyObject* queries_array = PyArray_FROM_OTF(queries_obj, NPY_FLOAT, NPY_IN_ARRAY); PyObject* supports_array = PyArray_FROM_OTF(supports_obj, NPY_FLOAT, NPY_IN_ARRAY); PyObject* q_batches_array = PyArray_FROM_OTF(q_batches_obj, NPY_INT, NPY_IN_ARRAY); PyObject* s_batches_array = PyArray_FROM_OTF(s_batches_obj, NPY_INT, NPY_IN_ARRAY); // Verify data was load correctly. if (queries_array == NULL) { Py_XDECREF(queries_array); Py_XDECREF(supports_array); Py_XDECREF(q_batches_array); Py_XDECREF(s_batches_array); PyErr_SetString(PyExc_RuntimeError, "Error converting query points to numpy arrays of type float32"); return NULL; } if (supports_array == NULL) { Py_XDECREF(queries_array); Py_XDECREF(supports_array); Py_XDECREF(q_batches_array); Py_XDECREF(s_batches_array); PyErr_SetString(PyExc_RuntimeError, "Error converting support points to numpy arrays of type float32"); return NULL; } if (q_batches_array == NULL) { Py_XDECREF(queries_array); Py_XDECREF(supports_array); Py_XDECREF(q_batches_array); Py_XDECREF(s_batches_array); PyErr_SetString(PyExc_RuntimeError, "Error converting query batches to numpy arrays of type int32"); return NULL; } if (s_batches_array == NULL) { Py_XDECREF(queries_array); Py_XDECREF(supports_array); Py_XDECREF(q_batches_array); Py_XDECREF(s_batches_array); PyErr_SetString(PyExc_RuntimeError, "Error converting support batches to numpy arrays of type int32"); return NULL; } // Check that the input array respect the dims if ((int)PyArray_NDIM(queries_array) != 2 || (int)PyArray_DIM(queries_array, 1) != 3) { Py_XDECREF(queries_array); Py_XDECREF(supports_array); Py_XDECREF(q_batches_array); Py_XDECREF(s_batches_array); PyErr_SetString(PyExc_RuntimeError, "Wrong dimensions : query.shape is not (N, 3)"); return NULL; } if ((int)PyArray_NDIM(supports_array) != 2 || (int)PyArray_DIM(supports_array, 1) != 3) { Py_XDECREF(queries_array); Py_XDECREF(supports_array); Py_XDECREF(q_batches_array); Py_XDECREF(s_batches_array); PyErr_SetString(PyExc_RuntimeError, "Wrong dimensions : support.shape is not (N, 3)"); return NULL; } if ((int)PyArray_NDIM(q_batches_array) > 1) { Py_XDECREF(queries_array); Py_XDECREF(supports_array); Py_XDECREF(q_batches_array); Py_XDECREF(s_batches_array); PyErr_SetString(PyExc_RuntimeError, "Wrong dimensions : queries_batches.shape is not (B,) "); return NULL; } if ((int)PyArray_NDIM(s_batches_array) > 1) { Py_XDECREF(queries_array); Py_XDECREF(supports_array); Py_XDECREF(q_batches_array); Py_XDECREF(s_batches_array); PyErr_SetString(PyExc_RuntimeError, "Wrong dimensions : supports_batches.shape is not (B,) "); return NULL; } if ((int)PyArray_DIM(q_batches_array, 0) != (int)PyArray_DIM(s_batches_array, 0)) { Py_XDECREF(queries_array); Py_XDECREF(supports_array); Py_XDECREF(q_batches_array); Py_XDECREF(s_batches_array); PyErr_SetString(PyExc_RuntimeError, "Wrong number of batch elements: different for queries and supports "); return NULL; } // Number of points int Nq = (int)PyArray_DIM(queries_array, 0); int Ns= (int)PyArray_DIM(supports_array, 0); // Number of batches int Nb = (int)PyArray_DIM(q_batches_array, 0); // Call the C++ function // ********************* // Convert PyArray to Cloud C++ class vector queries; vector supports; vector q_batches; vector s_batches; queries = vector((PointXYZ*)PyArray_DATA(queries_array), (PointXYZ*)PyArray_DATA(queries_array) + Nq); supports = vector((PointXYZ*)PyArray_DATA(supports_array), (PointXYZ*)PyArray_DATA(supports_array) + Ns); q_batches = vector((int*)PyArray_DATA(q_batches_array), (int*)PyArray_DATA(q_batches_array) + Nb); s_batches = vector((int*)PyArray_DATA(s_batches_array), (int*)PyArray_DATA(s_batches_array) + Nb); // Create result containers vector neighbors_indices; // Compute results //batch_ordered_neighbors(queries, supports, q_batches, s_batches, neighbors_indices, radius); batch_nanoflann_neighbors(queries, supports, q_batches, s_batches, neighbors_indices, radius); // Check result if (neighbors_indices.size() < 1) { PyErr_SetString(PyExc_RuntimeError, "Error"); return NULL; } // Manage outputs // ************** // Maximal number of neighbors int max_neighbors = neighbors_indices.size() / Nq; // Dimension of output containers npy_intp* neighbors_dims = new npy_intp[2]; neighbors_dims[0] = Nq; neighbors_dims[1] = max_neighbors; // Create output array PyObject* res_obj = PyArray_SimpleNew(2, neighbors_dims, NPY_INT); PyObject* ret = NULL; // Fill output array with values size_t size_in_bytes = Nq * max_neighbors * sizeof(int); memcpy(PyArray_DATA(res_obj), neighbors_indices.data(), size_in_bytes); // Merge results ret = Py_BuildValue("N", res_obj); // Clean up // ******** Py_XDECREF(queries_array); Py_XDECREF(supports_array); Py_XDECREF(q_batches_array); Py_XDECREF(s_batches_array); return ret; } ================================================ FILE: thirdparty/kpconv/cpp_wrappers/cpp_subsampling/build.bat ================================================ @echo off py setup.py build_ext --inplace pause ================================================ FILE: thirdparty/kpconv/cpp_wrappers/cpp_subsampling/grid_subsampling/grid_subsampling.cpp ================================================ #include "grid_subsampling.h" void grid_subsampling(vector& original_points, vector& subsampled_points, vector& original_features, vector& subsampled_features, vector& original_classes, vector& subsampled_classes, float sampleDl, int verbose) { // Initialize variables // ****************** // Number of points in the cloud size_t N = original_points.size(); // Dimension of the features size_t fdim = original_features.size() / N; size_t ldim = original_classes.size() / N; // Limits of the cloud PointXYZ minCorner = min_point(original_points); PointXYZ maxCorner = max_point(original_points); PointXYZ originCorner = floor(minCorner * (1/sampleDl)) * sampleDl; // Dimensions of the grid size_t sampleNX = (size_t)floor((maxCorner.x - originCorner.x) / sampleDl) + 1; size_t sampleNY = (size_t)floor((maxCorner.y - originCorner.y) / sampleDl) + 1; //size_t sampleNZ = (size_t)floor((maxCorner.z - originCorner.z) / sampleDl) + 1; // Check if features and classes need to be processed bool use_feature = original_features.size() > 0; bool use_classes = original_classes.size() > 0; // Create the sampled map // ********************** // Verbose parameters int i = 0; int nDisp = N / 100; // Initialize variables size_t iX, iY, iZ, mapIdx; unordered_map data; for (auto& p : original_points) { // Position of point in sample map iX = (size_t)floor((p.x - originCorner.x) / sampleDl); iY = (size_t)floor((p.y - originCorner.y) / sampleDl); iZ = (size_t)floor((p.z - originCorner.z) / sampleDl); mapIdx = iX + sampleNX*iY + sampleNX*sampleNY*iZ; // If not already created, create key if (data.count(mapIdx) < 1) data.emplace(mapIdx, SampledData(fdim, ldim)); // Fill the sample map if (use_feature && use_classes) data[mapIdx].update_all(p, original_features.begin() + i * fdim, original_classes.begin() + i * ldim); else if (use_feature) data[mapIdx].update_features(p, original_features.begin() + i * fdim); else if (use_classes) data[mapIdx].update_classes(p, original_classes.begin() + i * ldim); else data[mapIdx].update_points(p); // Display i++; if (verbose > 1 && i%nDisp == 0) std::cout << "\rSampled Map : " << std::setw(3) << i / nDisp << "%"; } // Divide for barycentre and transfer to a vector subsampled_points.reserve(data.size()); if (use_feature) subsampled_features.reserve(data.size() * fdim); if (use_classes) subsampled_classes.reserve(data.size() * ldim); for (auto& v : data) { subsampled_points.push_back(v.second.point * (1.0 / v.second.count)); if (use_feature) { float count = (float)v.second.count; transform(v.second.features.begin(), v.second.features.end(), v.second.features.begin(), [count](float f) { return f / count;}); subsampled_features.insert(subsampled_features.end(),v.second.features.begin(),v.second.features.end()); } if (use_classes) { for (int i = 0; i < ldim; i++) subsampled_classes.push_back(max_element(v.second.labels[i].begin(), v.second.labels[i].end(), [](const pair&a, const pair&b){return a.second < b.second;})->first); } } return; } void batch_grid_subsampling(vector& original_points, vector& subsampled_points, vector& original_features, vector& subsampled_features, vector& original_classes, vector& subsampled_classes, vector& original_batches, vector& subsampled_batches, float sampleDl, int max_p) { // Initialize variables // ****************** int b = 0; int sum_b = 0; // Number of points in the cloud size_t N = original_points.size(); // Dimension of the features size_t fdim = original_features.size() / N; size_t ldim = original_classes.size() / N; // Handle max_p = 0 if (max_p < 1) max_p = N; // Loop over batches // ***************** for (b = 0; b < original_batches.size(); b++) { // Extract batch points features and labels vector b_o_points = vector(original_points.begin () + sum_b, original_points.begin () + sum_b + original_batches[b]); vector b_o_features; if (original_features.size() > 0) { b_o_features = vector(original_features.begin () + sum_b * fdim, original_features.begin () + (sum_b + original_batches[b]) * fdim); } vector b_o_classes; if (original_classes.size() > 0) { b_o_classes = vector(original_classes.begin () + sum_b * ldim, original_classes.begin () + sum_b + original_batches[b] * ldim); } // Create result containers vector b_s_points; vector b_s_features; vector b_s_classes; // Compute subsampling on current batch grid_subsampling(b_o_points, b_s_points, b_o_features, b_s_features, b_o_classes, b_s_classes, sampleDl, 0); // Stack batches points features and labels // **************************************** // If too many points remove some if (b_s_points.size() <= max_p) { subsampled_points.insert(subsampled_points.end(), b_s_points.begin(), b_s_points.end()); if (original_features.size() > 0) subsampled_features.insert(subsampled_features.end(), b_s_features.begin(), b_s_features.end()); if (original_classes.size() > 0) subsampled_classes.insert(subsampled_classes.end(), b_s_classes.begin(), b_s_classes.end()); subsampled_batches.push_back(b_s_points.size()); } else { subsampled_points.insert(subsampled_points.end(), b_s_points.begin(), b_s_points.begin() + max_p); if (original_features.size() > 0) subsampled_features.insert(subsampled_features.end(), b_s_features.begin(), b_s_features.begin() + max_p * fdim); if (original_classes.size() > 0) subsampled_classes.insert(subsampled_classes.end(), b_s_classes.begin(), b_s_classes.begin() + max_p * ldim); subsampled_batches.push_back(max_p); } // Stack new batch lengths sum_b += original_batches[b]; } return; } ================================================ FILE: thirdparty/kpconv/cpp_wrappers/cpp_subsampling/grid_subsampling/grid_subsampling.h ================================================ #include "../../cpp_utils/cloud/cloud.h" #include #include using namespace std; class SampledData { public: // Elements // ******** int count; PointXYZ point; vector features; vector> labels; // Methods // ******* // Constructor SampledData() { count = 0; point = PointXYZ(); } SampledData(const size_t fdim, const size_t ldim) { count = 0; point = PointXYZ(); features = vector(fdim); labels = vector>(ldim); } // Method Update void update_all(const PointXYZ p, vector::iterator f_begin, vector::iterator l_begin) { count += 1; point += p; transform (features.begin(), features.end(), f_begin, features.begin(), plus()); int i = 0; for(vector::iterator it = l_begin; it != l_begin + labels.size(); ++it) { labels[i][*it] += 1; i++; } return; } void update_features(const PointXYZ p, vector::iterator f_begin) { count += 1; point += p; transform (features.begin(), features.end(), f_begin, features.begin(), plus()); return; } void update_classes(const PointXYZ p, vector::iterator l_begin) { count += 1; point += p; int i = 0; for(vector::iterator it = l_begin; it != l_begin + labels.size(); ++it) { labels[i][*it] += 1; i++; } return; } void update_points(const PointXYZ p) { count += 1; point += p; return; } }; void grid_subsampling(vector& original_points, vector& subsampled_points, vector& original_features, vector& subsampled_features, vector& original_classes, vector& subsampled_classes, float sampleDl, int verbose); void batch_grid_subsampling(vector& original_points, vector& subsampled_points, vector& original_features, vector& subsampled_features, vector& original_classes, vector& subsampled_classes, vector& original_batches, vector& subsampled_batches, float sampleDl, int max_p); ================================================ FILE: thirdparty/kpconv/cpp_wrappers/cpp_subsampling/setup.py ================================================ from distutils.core import setup, Extension import numpy.distutils.misc_util # Adding OpenCV to project # ************************ # Adding sources of the project # ***************************** SOURCES = ["../cpp_utils/cloud/cloud.cpp", "grid_subsampling/grid_subsampling.cpp", "wrapper.cpp"] module = Extension(name="grid_subsampling", sources=SOURCES, extra_compile_args=['-std=c++11', '-D_GLIBCXX_USE_CXX11_ABI=0']) setup(ext_modules=[module], include_dirs=numpy.distutils.misc_util.get_numpy_include_dirs()) ================================================ FILE: thirdparty/kpconv/cpp_wrappers/cpp_subsampling/wrapper.cpp ================================================ #include #include #include "grid_subsampling/grid_subsampling.h" #include // docstrings for our module // ************************* static char module_docstring[] = "This module provides an interface for the subsampling of a batch of stacked pointclouds"; static char subsample_docstring[] = "function subsampling a pointcloud"; static char subsample_batch_docstring[] = "function subsampling a batch of stacked pointclouds"; // Declare the functions // ********************* static PyObject *cloud_subsampling(PyObject* self, PyObject* args, PyObject* keywds); static PyObject *batch_subsampling(PyObject *self, PyObject *args, PyObject *keywds); // Specify the members of the module // ********************************* static PyMethodDef module_methods[] = { { "subsample", (PyCFunction)cloud_subsampling, METH_VARARGS | METH_KEYWORDS, subsample_docstring }, { "subsample_batch", (PyCFunction)batch_subsampling, METH_VARARGS | METH_KEYWORDS, subsample_batch_docstring }, {NULL, NULL, 0, NULL} }; // Initialize the module // ********************* static struct PyModuleDef moduledef = { PyModuleDef_HEAD_INIT, "grid_subsampling", // m_name module_docstring, // m_doc -1, // m_size module_methods, // m_methods NULL, // m_reload NULL, // m_traverse NULL, // m_clear NULL, // m_free }; PyMODINIT_FUNC PyInit_grid_subsampling(void) { import_array(); return PyModule_Create(&moduledef); } // Definition of the batch_subsample method // ********************************** static PyObject* batch_subsampling(PyObject* self, PyObject* args, PyObject* keywds) { // Manage inputs // ************* // Args containers PyObject* points_obj = NULL; PyObject* features_obj = NULL; PyObject* classes_obj = NULL; PyObject* batches_obj = NULL; // Keywords containers static char* kwlist[] = { "points", "batches", "features", "classes", "sampleDl", "method", "max_p", "verbose", NULL }; float sampleDl = 0.1; const char* method_buffer = "barycenters"; int verbose = 0; int max_p = 0; // Parse the input if (!PyArg_ParseTupleAndKeywords(args, keywds, "OO|$OOfsii", kwlist, &points_obj, &batches_obj, &features_obj, &classes_obj, &sampleDl, &method_buffer, &max_p, &verbose)) { PyErr_SetString(PyExc_RuntimeError, "Error parsing arguments"); return NULL; } // Get the method argument string method(method_buffer); // Interpret method if (method.compare("barycenters") && method.compare("voxelcenters")) { PyErr_SetString(PyExc_RuntimeError, "Error parsing method. Valid method names are \"barycenters\" and \"voxelcenters\" "); return NULL; } // Check if using features or classes bool use_feature = true, use_classes = true; if (features_obj == NULL) use_feature = false; if (classes_obj == NULL) use_classes = false; // Interpret the input objects as numpy arrays. PyObject* points_array = PyArray_FROM_OTF(points_obj, NPY_FLOAT, NPY_IN_ARRAY); PyObject* batches_array = PyArray_FROM_OTF(batches_obj, NPY_INT, NPY_IN_ARRAY); PyObject* features_array = NULL; PyObject* classes_array = NULL; if (use_feature) features_array = PyArray_FROM_OTF(features_obj, NPY_FLOAT, NPY_IN_ARRAY); if (use_classes) classes_array = PyArray_FROM_OTF(classes_obj, NPY_INT, NPY_IN_ARRAY); // Verify data was load correctly. if (points_array == NULL) { Py_XDECREF(points_array); Py_XDECREF(batches_array); Py_XDECREF(classes_array); Py_XDECREF(features_array); PyErr_SetString(PyExc_RuntimeError, "Error converting input points to numpy arrays of type float32"); return NULL; } if (batches_array == NULL) { Py_XDECREF(points_array); Py_XDECREF(batches_array); Py_XDECREF(classes_array); Py_XDECREF(features_array); PyErr_SetString(PyExc_RuntimeError, "Error converting input batches to numpy arrays of type int32"); return NULL; } if (use_feature && features_array == NULL) { Py_XDECREF(points_array); Py_XDECREF(batches_array); Py_XDECREF(classes_array); Py_XDECREF(features_array); PyErr_SetString(PyExc_RuntimeError, "Error converting input features to numpy arrays of type float32"); return NULL; } if (use_classes && classes_array == NULL) { Py_XDECREF(points_array); Py_XDECREF(batches_array); Py_XDECREF(classes_array); Py_XDECREF(features_array); PyErr_SetString(PyExc_RuntimeError, "Error converting input classes to numpy arrays of type int32"); return NULL; } // Check that the input array respect the dims if ((int)PyArray_NDIM(points_array) != 2 || (int)PyArray_DIM(points_array, 1) != 3) { Py_XDECREF(points_array); Py_XDECREF(batches_array); Py_XDECREF(classes_array); Py_XDECREF(features_array); PyErr_SetString(PyExc_RuntimeError, "Wrong dimensions : points.shape is not (N, 3)"); return NULL; } if ((int)PyArray_NDIM(batches_array) > 1) { Py_XDECREF(points_array); Py_XDECREF(batches_array); Py_XDECREF(classes_array); Py_XDECREF(features_array); PyErr_SetString(PyExc_RuntimeError, "Wrong dimensions : batches.shape is not (B,) "); return NULL; } if (use_feature && ((int)PyArray_NDIM(features_array) != 2)) { Py_XDECREF(points_array); Py_XDECREF(batches_array); Py_XDECREF(classes_array); Py_XDECREF(features_array); PyErr_SetString(PyExc_RuntimeError, "Wrong dimensions : features.shape is not (N, d)"); return NULL; } if (use_classes && (int)PyArray_NDIM(classes_array) > 2) { Py_XDECREF(points_array); Py_XDECREF(batches_array); Py_XDECREF(classes_array); Py_XDECREF(features_array); PyErr_SetString(PyExc_RuntimeError, "Wrong dimensions : classes.shape is not (N,) or (N, d)"); return NULL; } // Number of points int N = (int)PyArray_DIM(points_array, 0); // Number of batches int Nb = (int)PyArray_DIM(batches_array, 0); // Dimension of the features int fdim = 0; if (use_feature) fdim = (int)PyArray_DIM(features_array, 1); //Dimension of labels int ldim = 1; if (use_classes && (int)PyArray_NDIM(classes_array) == 2) ldim = (int)PyArray_DIM(classes_array, 1); // Check that the input array respect the number of points if (use_feature && (int)PyArray_DIM(features_array, 0) != N) { Py_XDECREF(points_array); Py_XDECREF(batches_array); Py_XDECREF(classes_array); Py_XDECREF(features_array); PyErr_SetString(PyExc_RuntimeError, "Wrong dimensions : features.shape is not (N, d)"); return NULL; } if (use_classes && (int)PyArray_DIM(classes_array, 0) != N) { Py_XDECREF(points_array); Py_XDECREF(batches_array); Py_XDECREF(classes_array); Py_XDECREF(features_array); PyErr_SetString(PyExc_RuntimeError, "Wrong dimensions : classes.shape is not (N,) or (N, d)"); return NULL; } // Call the C++ function // ********************* // Create pyramid if (verbose > 0) cout << "Computing cloud pyramid with support points: " << endl; // Convert PyArray to Cloud C++ class vector original_points; vector original_batches; vector original_features; vector original_classes; original_points = vector((PointXYZ*)PyArray_DATA(points_array), (PointXYZ*)PyArray_DATA(points_array) + N); original_batches = vector((int*)PyArray_DATA(batches_array), (int*)PyArray_DATA(batches_array) + Nb); if (use_feature) original_features = vector((float*)PyArray_DATA(features_array), (float*)PyArray_DATA(features_array) + N * fdim); if (use_classes) original_classes = vector((int*)PyArray_DATA(classes_array), (int*)PyArray_DATA(classes_array) + N * ldim); // Subsample vector subsampled_points; vector subsampled_features; vector subsampled_classes; vector subsampled_batches; batch_grid_subsampling(original_points, subsampled_points, original_features, subsampled_features, original_classes, subsampled_classes, original_batches, subsampled_batches, sampleDl, max_p); // Check result if (subsampled_points.size() < 1) { PyErr_SetString(PyExc_RuntimeError, "Error"); return NULL; } // Manage outputs // ************** // Dimension of input containers npy_intp* point_dims = new npy_intp[2]; point_dims[0] = subsampled_points.size(); point_dims[1] = 3; npy_intp* feature_dims = new npy_intp[2]; feature_dims[0] = subsampled_points.size(); feature_dims[1] = fdim; npy_intp* classes_dims = new npy_intp[2]; classes_dims[0] = subsampled_points.size(); classes_dims[1] = ldim; npy_intp* batches_dims = new npy_intp[1]; batches_dims[0] = Nb; // Create output array PyObject* res_points_obj = PyArray_SimpleNew(2, point_dims, NPY_FLOAT); PyObject* res_batches_obj = PyArray_SimpleNew(1, batches_dims, NPY_INT); PyObject* res_features_obj = NULL; PyObject* res_classes_obj = NULL; PyObject* ret = NULL; // Fill output array with values size_t size_in_bytes = subsampled_points.size() * 3 * sizeof(float); memcpy(PyArray_DATA(res_points_obj), subsampled_points.data(), size_in_bytes); size_in_bytes = Nb * sizeof(int); memcpy(PyArray_DATA(res_batches_obj), subsampled_batches.data(), size_in_bytes); if (use_feature) { size_in_bytes = subsampled_points.size() * fdim * sizeof(float); res_features_obj = PyArray_SimpleNew(2, feature_dims, NPY_FLOAT); memcpy(PyArray_DATA(res_features_obj), subsampled_features.data(), size_in_bytes); } if (use_classes) { size_in_bytes = subsampled_points.size() * ldim * sizeof(int); res_classes_obj = PyArray_SimpleNew(2, classes_dims, NPY_INT); memcpy(PyArray_DATA(res_classes_obj), subsampled_classes.data(), size_in_bytes); } // Merge results if (use_feature && use_classes) ret = Py_BuildValue("NNNN", res_points_obj, res_batches_obj, res_features_obj, res_classes_obj); else if (use_feature) ret = Py_BuildValue("NNN", res_points_obj, res_batches_obj, res_features_obj); else if (use_classes) ret = Py_BuildValue("NNN", res_points_obj, res_batches_obj, res_classes_obj); else ret = Py_BuildValue("NN", res_points_obj, res_batches_obj); // Clean up // ******** Py_DECREF(points_array); Py_DECREF(batches_array); Py_XDECREF(features_array); Py_XDECREF(classes_array); return ret; } // Definition of the subsample method // **************************************** static PyObject* cloud_subsampling(PyObject* self, PyObject* args, PyObject* keywds) { // Manage inputs // ************* // Args containers PyObject* points_obj = NULL; PyObject* features_obj = NULL; PyObject* classes_obj = NULL; // Keywords containers static char* kwlist[] = { "points", "features", "classes", "sampleDl", "method", "verbose", NULL }; float sampleDl = 0.1; const char* method_buffer = "barycenters"; int verbose = 0; // Parse the input if (!PyArg_ParseTupleAndKeywords(args, keywds, "O|$OOfsi", kwlist, &points_obj, &features_obj, &classes_obj, &sampleDl, &method_buffer, &verbose)) { PyErr_SetString(PyExc_RuntimeError, "Error parsing arguments"); return NULL; } // Get the method argument string method(method_buffer); // Interpret method if (method.compare("barycenters") && method.compare("voxelcenters")) { PyErr_SetString(PyExc_RuntimeError, "Error parsing method. Valid method names are \"barycenters\" and \"voxelcenters\" "); return NULL; } // Check if using features or classes bool use_feature = true, use_classes = true; if (features_obj == NULL) use_feature = false; if (classes_obj == NULL) use_classes = false; // Interpret the input objects as numpy arrays. PyObject* points_array = PyArray_FROM_OTF(points_obj, NPY_FLOAT, NPY_IN_ARRAY); PyObject* features_array = NULL; PyObject* classes_array = NULL; if (use_feature) features_array = PyArray_FROM_OTF(features_obj, NPY_FLOAT, NPY_IN_ARRAY); if (use_classes) classes_array = PyArray_FROM_OTF(classes_obj, NPY_INT, NPY_IN_ARRAY); // Verify data was load correctly. if (points_array == NULL) { Py_XDECREF(points_array); Py_XDECREF(classes_array); Py_XDECREF(features_array); PyErr_SetString(PyExc_RuntimeError, "Error converting input points to numpy arrays of type float32"); return NULL; } if (use_feature && features_array == NULL) { Py_XDECREF(points_array); Py_XDECREF(classes_array); Py_XDECREF(features_array); PyErr_SetString(PyExc_RuntimeError, "Error converting input features to numpy arrays of type float32"); return NULL; } if (use_classes && classes_array == NULL) { Py_XDECREF(points_array); Py_XDECREF(classes_array); Py_XDECREF(features_array); PyErr_SetString(PyExc_RuntimeError, "Error converting input classes to numpy arrays of type int32"); return NULL; } // Check that the input array respect the dims if ((int)PyArray_NDIM(points_array) != 2 || (int)PyArray_DIM(points_array, 1) != 3) { Py_XDECREF(points_array); Py_XDECREF(classes_array); Py_XDECREF(features_array); PyErr_SetString(PyExc_RuntimeError, "Wrong dimensions : points.shape is not (N, 3)"); return NULL; } if (use_feature && ((int)PyArray_NDIM(features_array) != 2)) { Py_XDECREF(points_array); Py_XDECREF(classes_array); Py_XDECREF(features_array); PyErr_SetString(PyExc_RuntimeError, "Wrong dimensions : features.shape is not (N, d)"); return NULL; } if (use_classes && (int)PyArray_NDIM(classes_array) > 2) { Py_XDECREF(points_array); Py_XDECREF(classes_array); Py_XDECREF(features_array); PyErr_SetString(PyExc_RuntimeError, "Wrong dimensions : classes.shape is not (N,) or (N, d)"); return NULL; } // Number of points int N = (int)PyArray_DIM(points_array, 0); // Dimension of the features int fdim = 0; if (use_feature) fdim = (int)PyArray_DIM(features_array, 1); //Dimension of labels int ldim = 1; if (use_classes && (int)PyArray_NDIM(classes_array) == 2) ldim = (int)PyArray_DIM(classes_array, 1); // Check that the input array respect the number of points if (use_feature && (int)PyArray_DIM(features_array, 0) != N) { Py_XDECREF(points_array); Py_XDECREF(classes_array); Py_XDECREF(features_array); PyErr_SetString(PyExc_RuntimeError, "Wrong dimensions : features.shape is not (N, d)"); return NULL; } if (use_classes && (int)PyArray_DIM(classes_array, 0) != N) { Py_XDECREF(points_array); Py_XDECREF(classes_array); Py_XDECREF(features_array); PyErr_SetString(PyExc_RuntimeError, "Wrong dimensions : classes.shape is not (N,) or (N, d)"); return NULL; } // Call the C++ function // ********************* // Create pyramid if (verbose > 0) cout << "Computing cloud pyramid with support points: " << endl; // Convert PyArray to Cloud C++ class vector original_points; vector original_features; vector original_classes; original_points = vector((PointXYZ*)PyArray_DATA(points_array), (PointXYZ*)PyArray_DATA(points_array) + N); if (use_feature) original_features = vector((float*)PyArray_DATA(features_array), (float*)PyArray_DATA(features_array) + N * fdim); if (use_classes) original_classes = vector((int*)PyArray_DATA(classes_array), (int*)PyArray_DATA(classes_array) + N * ldim); // Subsample vector subsampled_points; vector subsampled_features; vector subsampled_classes; grid_subsampling(original_points, subsampled_points, original_features, subsampled_features, original_classes, subsampled_classes, sampleDl, verbose); // Check result if (subsampled_points.size() < 1) { PyErr_SetString(PyExc_RuntimeError, "Error"); return NULL; } // Manage outputs // ************** // Dimension of input containers npy_intp* point_dims = new npy_intp[2]; point_dims[0] = subsampled_points.size(); point_dims[1] = 3; npy_intp* feature_dims = new npy_intp[2]; feature_dims[0] = subsampled_points.size(); feature_dims[1] = fdim; npy_intp* classes_dims = new npy_intp[2]; classes_dims[0] = subsampled_points.size(); classes_dims[1] = ldim; // Create output array PyObject* res_points_obj = PyArray_SimpleNew(2, point_dims, NPY_FLOAT); PyObject* res_features_obj = NULL; PyObject* res_classes_obj = NULL; PyObject* ret = NULL; // Fill output array with values size_t size_in_bytes = subsampled_points.size() * 3 * sizeof(float); memcpy(PyArray_DATA(res_points_obj), subsampled_points.data(), size_in_bytes); if (use_feature) { size_in_bytes = subsampled_points.size() * fdim * sizeof(float); res_features_obj = PyArray_SimpleNew(2, feature_dims, NPY_FLOAT); memcpy(PyArray_DATA(res_features_obj), subsampled_features.data(), size_in_bytes); } if (use_classes) { size_in_bytes = subsampled_points.size() * ldim * sizeof(int); res_classes_obj = PyArray_SimpleNew(2, classes_dims, NPY_INT); memcpy(PyArray_DATA(res_classes_obj), subsampled_classes.data(), size_in_bytes); } // Merge results if (use_feature && use_classes) ret = Py_BuildValue("NNN", res_points_obj, res_features_obj, res_classes_obj); else if (use_feature) ret = Py_BuildValue("NN", res_points_obj, res_features_obj); else if (use_classes) ret = Py_BuildValue("NN", res_points_obj, res_classes_obj); else ret = Py_BuildValue("N", res_points_obj); // Clean up // ******** Py_DECREF(points_array); Py_XDECREF(features_array); Py_XDECREF(classes_array); return ret; } ================================================ FILE: thirdparty/kpconv/cpp_wrappers/cpp_utils/cloud/cloud.cpp ================================================ // // // 0==========================0 // | Local feature test | // 0==========================0 // // version 1.0 : // > // //--------------------------------------------------- // // Cloud source : // Define usefull Functions/Methods // //---------------------------------------------------- // // Hugues THOMAS - 10/02/2017 // #include "cloud.h" // Getters // ******* PointXYZ max_point(std::vector points) { // Initialize limits PointXYZ maxP(points[0]); // Loop over all points for (auto p : points) { if (p.x > maxP.x) maxP.x = p.x; if (p.y > maxP.y) maxP.y = p.y; if (p.z > maxP.z) maxP.z = p.z; } return maxP; } PointXYZ min_point(std::vector points) { // Initialize limits PointXYZ minP(points[0]); // Loop over all points for (auto p : points) { if (p.x < minP.x) minP.x = p.x; if (p.y < minP.y) minP.y = p.y; if (p.z < minP.z) minP.z = p.z; } return minP; } ================================================ FILE: thirdparty/kpconv/cpp_wrappers/cpp_utils/cloud/cloud.h ================================================ // // // 0==========================0 // | Local feature test | // 0==========================0 // // version 1.0 : // > // //--------------------------------------------------- // // Cloud header // //---------------------------------------------------- // // Hugues THOMAS - 10/02/2017 // # pragma once #include #include #include #include #include #include #include #include #include // Point class // *********** class PointXYZ { public: // Elements // ******** float x, y, z; // Methods // ******* // Constructor PointXYZ() { x = 0; y = 0; z = 0; } PointXYZ(float x0, float y0, float z0) { x = x0; y = y0; z = z0; } // array type accessor float operator [] (int i) const { if (i == 0) return x; else if (i == 1) return y; else return z; } // opperations float dot(const PointXYZ P) const { return x * P.x + y * P.y + z * P.z; } float sq_norm() { return x*x + y*y + z*z; } PointXYZ cross(const PointXYZ P) const { return PointXYZ(y*P.z - z*P.y, z*P.x - x*P.z, x*P.y - y*P.x); } PointXYZ& operator+=(const PointXYZ& P) { x += P.x; y += P.y; z += P.z; return *this; } PointXYZ& operator-=(const PointXYZ& P) { x -= P.x; y -= P.y; z -= P.z; return *this; } PointXYZ& operator*=(const float& a) { x *= a; y *= a; z *= a; return *this; } }; // Point Opperations // ***************** inline PointXYZ operator + (const PointXYZ A, const PointXYZ B) { return PointXYZ(A.x + B.x, A.y + B.y, A.z + B.z); } inline PointXYZ operator - (const PointXYZ A, const PointXYZ B) { return PointXYZ(A.x - B.x, A.y - B.y, A.z - B.z); } inline PointXYZ operator * (const PointXYZ P, const float a) { return PointXYZ(P.x * a, P.y * a, P.z * a); } inline PointXYZ operator * (const float a, const PointXYZ P) { return PointXYZ(P.x * a, P.y * a, P.z * a); } inline std::ostream& operator << (std::ostream& os, const PointXYZ P) { return os << "[" << P.x << ", " << P.y << ", " << P.z << "]"; } inline bool operator == (const PointXYZ A, const PointXYZ B) { return A.x == B.x && A.y == B.y && A.z == B.z; } inline PointXYZ floor(const PointXYZ P) { return PointXYZ(std::floor(P.x), std::floor(P.y), std::floor(P.z)); } PointXYZ max_point(std::vector points); PointXYZ min_point(std::vector points); struct PointCloud { std::vector pts; // Must return the number of data points inline size_t kdtree_get_point_count() const { return pts.size(); } // Returns the dim'th component of the idx'th point in the class: // Since this is inlined and the "dim" argument is typically an immediate value, the // "if/else's" are actually solved at compile time. inline float kdtree_get_pt(const size_t idx, const size_t dim) const { if (dim == 0) return pts[idx].x; else if (dim == 1) return pts[idx].y; else return pts[idx].z; } // Optional bounding-box computation: return false to default to a standard bbox computation loop. // Return true if the BBOX was already computed by the class and returned in "bb" so it can be avoided to redo it again. // Look at bb.size() to find out the expected dimensionality (e.g. 2 or 3 for point clouds) template bool kdtree_get_bbox(BBOX& /* bb */) const { return false; } }; ================================================ FILE: thirdparty/kpconv/cpp_wrappers/cpp_utils/nanoflann/nanoflann.hpp ================================================ /*********************************************************************** * Software License Agreement (BSD License) * * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved. * Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved. * Copyright 2011-2016 Jose Luis Blanco (joseluisblancoc@gmail.com). * All rights reserved. * * THE BSD LICENSE * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************************/ /** \mainpage nanoflann C++ API documentation * nanoflann is a C++ header-only library for building KD-Trees, mostly * optimized for 2D or 3D point clouds. * * nanoflann does not require compiling or installing, just an * #include in your code. * * See: * - C++ API organized by modules * - Online README * - Doxygen * documentation */ #ifndef NANOFLANN_HPP_ #define NANOFLANN_HPP_ #include #include #include #include // for abs() #include // for fwrite() #include // for abs() #include #include // std::reference_wrapper #include #include /** Library version: 0xMmP (M=Major,m=minor,P=patch) */ #define NANOFLANN_VERSION 0x130 // Avoid conflicting declaration of min/max macros in windows headers #if !defined(NOMINMAX) && \ (defined(_WIN32) || defined(_WIN32_) || defined(WIN32) || defined(_WIN64)) #define NOMINMAX #ifdef max #undef max #undef min #endif #endif namespace nanoflann { /** @addtogroup nanoflann_grp nanoflann C++ library for ANN * @{ */ /** the PI constant (required to avoid MSVC missing symbols) */ template T pi_const() { return static_cast(3.14159265358979323846); } /** * Traits if object is resizable and assignable (typically has a resize | assign * method) */ template struct has_resize : std::false_type {}; template struct has_resize().resize(1), 0)> : std::true_type {}; template struct has_assign : std::false_type {}; template struct has_assign().assign(1, 0), 0)> : std::true_type {}; /** * Free function to resize a resizable object */ template inline typename std::enable_if::value, void>::type resize(Container &c, const size_t nElements) { c.resize(nElements); } /** * Free function that has no effects on non resizable containers (e.g. * std::array) It raises an exception if the expected size does not match */ template inline typename std::enable_if::value, void>::type resize(Container &c, const size_t nElements) { if (nElements != c.size()) throw std::logic_error("Try to change the size of a std::array."); } /** * Free function to assign to a container */ template inline typename std::enable_if::value, void>::type assign(Container &c, const size_t nElements, const T &value) { c.assign(nElements, value); } /** * Free function to assign to a std::array */ template inline typename std::enable_if::value, void>::type assign(Container &c, const size_t nElements, const T &value) { for (size_t i = 0; i < nElements; i++) c[i] = value; } /** @addtogroup result_sets_grp Result set classes * @{ */ template class KNNResultSet { public: typedef _DistanceType DistanceType; typedef _IndexType IndexType; typedef _CountType CountType; private: IndexType *indices; DistanceType *dists; CountType capacity; CountType count; public: inline KNNResultSet(CountType capacity_) : indices(0), dists(0), capacity(capacity_), count(0) {} inline void init(IndexType *indices_, DistanceType *dists_) { indices = indices_; dists = dists_; count = 0; if (capacity) dists[capacity - 1] = (std::numeric_limits::max)(); } inline CountType size() const { return count; } inline bool full() const { return count == capacity; } /** * Called during search to add an element matching the criteria. * @return true if the search should be continued, false if the results are * sufficient */ inline bool addPoint(DistanceType dist, IndexType index) { CountType i; for (i = count; i > 0; --i) { #ifdef NANOFLANN_FIRST_MATCH // If defined and two points have the same // distance, the one with the lowest-index will be // returned first. if ((dists[i - 1] > dist) || ((dist == dists[i - 1]) && (indices[i - 1] > index))) { #else if (dists[i - 1] > dist) { #endif if (i < capacity) { dists[i] = dists[i - 1]; indices[i] = indices[i - 1]; } } else break; } if (i < capacity) { dists[i] = dist; indices[i] = index; } if (count < capacity) count++; // tell caller that the search shall continue return true; } inline DistanceType worstDist() const { return dists[capacity - 1]; } }; /** operator "<" for std::sort() */ struct IndexDist_Sorter { /** PairType will be typically: std::pair */ template inline bool operator()(const PairType &p1, const PairType &p2) const { return p1.second < p2.second; } }; /** * A result-set class used when performing a radius based search. */ template class RadiusResultSet { public: typedef _DistanceType DistanceType; typedef _IndexType IndexType; public: const DistanceType radius; std::vector> &m_indices_dists; inline RadiusResultSet( DistanceType radius_, std::vector> &indices_dists) : radius(radius_), m_indices_dists(indices_dists) { init(); } inline void init() { clear(); } inline void clear() { m_indices_dists.clear(); } inline size_t size() const { return m_indices_dists.size(); } inline bool full() const { return true; } /** * Called during search to add an element matching the criteria. * @return true if the search should be continued, false if the results are * sufficient */ inline bool addPoint(DistanceType dist, IndexType index) { if (dist < radius) m_indices_dists.push_back(std::make_pair(index, dist)); return true; } inline DistanceType worstDist() const { return radius; } /** * Find the worst result (furtherest neighbor) without copying or sorting * Pre-conditions: size() > 0 */ std::pair worst_item() const { if (m_indices_dists.empty()) throw std::runtime_error("Cannot invoke RadiusResultSet::worst_item() on " "an empty list of results."); typedef typename std::vector>::const_iterator DistIt; DistIt it = std::max_element(m_indices_dists.begin(), m_indices_dists.end(), IndexDist_Sorter()); return *it; } }; /** @} */ /** @addtogroup loadsave_grp Load/save auxiliary functions * @{ */ template void save_value(FILE *stream, const T &value, size_t count = 1) { fwrite(&value, sizeof(value), count, stream); } template void save_value(FILE *stream, const std::vector &value) { size_t size = value.size(); fwrite(&size, sizeof(size_t), 1, stream); fwrite(&value[0], sizeof(T), size, stream); } template void load_value(FILE *stream, T &value, size_t count = 1) { size_t read_cnt = fread(&value, sizeof(value), count, stream); if (read_cnt != count) { throw std::runtime_error("Cannot read from file"); } } template void load_value(FILE *stream, std::vector &value) { size_t size; size_t read_cnt = fread(&size, sizeof(size_t), 1, stream); if (read_cnt != 1) { throw std::runtime_error("Cannot read from file"); } value.resize(size); read_cnt = fread(&value[0], sizeof(T), size, stream); if (read_cnt != size) { throw std::runtime_error("Cannot read from file"); } } /** @} */ /** @addtogroup metric_grp Metric (distance) classes * @{ */ struct Metric {}; /** Manhattan distance functor (generic version, optimized for * high-dimensionality data sets). Corresponding distance traits: * nanoflann::metric_L1 \tparam T Type of the elements (e.g. double, float, * uint8_t) \tparam _DistanceType Type of distance variables (must be signed) * (e.g. float, double, int64_t) */ template struct L1_Adaptor { typedef T ElementType; typedef _DistanceType DistanceType; const DataSource &data_source; L1_Adaptor(const DataSource &_data_source) : data_source(_data_source) {} inline DistanceType evalMetric(const T *a, const size_t b_idx, size_t size, DistanceType worst_dist = -1) const { DistanceType result = DistanceType(); const T *last = a + size; const T *lastgroup = last - 3; size_t d = 0; /* Process 4 items with each loop for efficiency. */ while (a < lastgroup) { const DistanceType diff0 = std::abs(a[0] - data_source.kdtree_get_pt(b_idx, d++)); const DistanceType diff1 = std::abs(a[1] - data_source.kdtree_get_pt(b_idx, d++)); const DistanceType diff2 = std::abs(a[2] - data_source.kdtree_get_pt(b_idx, d++)); const DistanceType diff3 = std::abs(a[3] - data_source.kdtree_get_pt(b_idx, d++)); result += diff0 + diff1 + diff2 + diff3; a += 4; if ((worst_dist > 0) && (result > worst_dist)) { return result; } } /* Process last 0-3 components. Not needed for standard vector lengths. */ while (a < last) { result += std::abs(*a++ - data_source.kdtree_get_pt(b_idx, d++)); } return result; } template inline DistanceType accum_dist(const U a, const V b, const size_t) const { return std::abs(a - b); } }; /** Squared Euclidean distance functor (generic version, optimized for * high-dimensionality data sets). Corresponding distance traits: * nanoflann::metric_L2 \tparam T Type of the elements (e.g. double, float, * uint8_t) \tparam _DistanceType Type of distance variables (must be signed) * (e.g. float, double, int64_t) */ template struct L2_Adaptor { typedef T ElementType; typedef _DistanceType DistanceType; const DataSource &data_source; L2_Adaptor(const DataSource &_data_source) : data_source(_data_source) {} inline DistanceType evalMetric(const T *a, const size_t b_idx, size_t size, DistanceType worst_dist = -1) const { DistanceType result = DistanceType(); const T *last = a + size; const T *lastgroup = last - 3; size_t d = 0; /* Process 4 items with each loop for efficiency. */ while (a < lastgroup) { const DistanceType diff0 = a[0] - data_source.kdtree_get_pt(b_idx, d++); const DistanceType diff1 = a[1] - data_source.kdtree_get_pt(b_idx, d++); const DistanceType diff2 = a[2] - data_source.kdtree_get_pt(b_idx, d++); const DistanceType diff3 = a[3] - data_source.kdtree_get_pt(b_idx, d++); result += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3; a += 4; if ((worst_dist > 0) && (result > worst_dist)) { return result; } } /* Process last 0-3 components. Not needed for standard vector lengths. */ while (a < last) { const DistanceType diff0 = *a++ - data_source.kdtree_get_pt(b_idx, d++); result += diff0 * diff0; } return result; } template inline DistanceType accum_dist(const U a, const V b, const size_t) const { return (a - b) * (a - b); } }; /** Squared Euclidean (L2) distance functor (suitable for low-dimensionality * datasets, like 2D or 3D point clouds) Corresponding distance traits: * nanoflann::metric_L2_Simple \tparam T Type of the elements (e.g. double, * float, uint8_t) \tparam _DistanceType Type of distance variables (must be * signed) (e.g. float, double, int64_t) */ template struct L2_Simple_Adaptor { typedef T ElementType; typedef _DistanceType DistanceType; const DataSource &data_source; L2_Simple_Adaptor(const DataSource &_data_source) : data_source(_data_source) {} inline DistanceType evalMetric(const T *a, const size_t b_idx, size_t size) const { DistanceType result = DistanceType(); for (size_t i = 0; i < size; ++i) { const DistanceType diff = a[i] - data_source.kdtree_get_pt(b_idx, i); result += diff * diff; } return result; } template inline DistanceType accum_dist(const U a, const V b, const size_t) const { return (a - b) * (a - b); } }; /** SO2 distance functor * Corresponding distance traits: nanoflann::metric_SO2 * \tparam T Type of the elements (e.g. double, float) * \tparam _DistanceType Type of distance variables (must be signed) (e.g. * float, double) orientation is constrained to be in [-pi, pi] */ template struct SO2_Adaptor { typedef T ElementType; typedef _DistanceType DistanceType; const DataSource &data_source; SO2_Adaptor(const DataSource &_data_source) : data_source(_data_source) {} inline DistanceType evalMetric(const T *a, const size_t b_idx, size_t size) const { return accum_dist(a[size - 1], data_source.kdtree_get_pt(b_idx, size - 1), size - 1); } /** Note: this assumes that input angles are already in the range [-pi,pi] */ template inline DistanceType accum_dist(const U a, const V b, const size_t) const { DistanceType result = DistanceType(), PI = pi_const(); result = b - a; if (result > PI) result -= 2 * PI; else if (result < -PI) result += 2 * PI; return result; } }; /** SO3 distance functor (Uses L2_Simple) * Corresponding distance traits: nanoflann::metric_SO3 * \tparam T Type of the elements (e.g. double, float) * \tparam _DistanceType Type of distance variables (must be signed) (e.g. * float, double) */ template struct SO3_Adaptor { typedef T ElementType; typedef _DistanceType DistanceType; L2_Simple_Adaptor distance_L2_Simple; SO3_Adaptor(const DataSource &_data_source) : distance_L2_Simple(_data_source) {} inline DistanceType evalMetric(const T *a, const size_t b_idx, size_t size) const { return distance_L2_Simple.evalMetric(a, b_idx, size); } template inline DistanceType accum_dist(const U a, const V b, const size_t idx) const { return distance_L2_Simple.accum_dist(a, b, idx); } }; /** Metaprogramming helper traits class for the L1 (Manhattan) metric */ struct metric_L1 : public Metric { template struct traits { typedef L1_Adaptor distance_t; }; }; /** Metaprogramming helper traits class for the L2 (Euclidean) metric */ struct metric_L2 : public Metric { template struct traits { typedef L2_Adaptor distance_t; }; }; /** Metaprogramming helper traits class for the L2_simple (Euclidean) metric */ struct metric_L2_Simple : public Metric { template struct traits { typedef L2_Simple_Adaptor distance_t; }; }; /** Metaprogramming helper traits class for the SO3_InnerProdQuat metric */ struct metric_SO2 : public Metric { template struct traits { typedef SO2_Adaptor distance_t; }; }; /** Metaprogramming helper traits class for the SO3_InnerProdQuat metric */ struct metric_SO3 : public Metric { template struct traits { typedef SO3_Adaptor distance_t; }; }; /** @} */ /** @addtogroup param_grp Parameter structs * @{ */ /** Parameters (see README.md) */ struct KDTreeSingleIndexAdaptorParams { KDTreeSingleIndexAdaptorParams(size_t _leaf_max_size = 10) : leaf_max_size(_leaf_max_size) {} size_t leaf_max_size; }; /** Search options for KDTreeSingleIndexAdaptor::findNeighbors() */ struct SearchParams { /** Note: The first argument (checks_IGNORED_) is ignored, but kept for * compatibility with the FLANN interface */ SearchParams(int checks_IGNORED_ = 32, float eps_ = 0, bool sorted_ = true) : checks(checks_IGNORED_), eps(eps_), sorted(sorted_) {} int checks; //!< Ignored parameter (Kept for compatibility with the FLANN //!< interface). float eps; //!< search for eps-approximate neighbours (default: 0) bool sorted; //!< only for radius search, require neighbours sorted by //!< distance (default: true) }; /** @} */ /** @addtogroup memalloc_grp Memory allocation * @{ */ /** * Allocates (using C's malloc) a generic type T. * * Params: * count = number of instances to allocate. * Returns: pointer (of type T*) to memory buffer */ template inline T *allocate(size_t count = 1) { T *mem = static_cast(::malloc(sizeof(T) * count)); return mem; } /** * Pooled storage allocator * * The following routines allow for the efficient allocation of storage in * small chunks from a specified pool. Rather than allowing each structure * to be freed individually, an entire pool of storage is freed at once. * This method has two advantages over just using malloc() and free(). First, * it is far more efficient for allocating small objects, as there is * no overhead for remembering all the information needed to free each * object or consolidating fragmented memory. Second, the decision about * how long to keep an object is made at the time of allocation, and there * is no need to track down all the objects to free them. * */ const size_t WORDSIZE = 16; const size_t BLOCKSIZE = 8192; class PooledAllocator { /* We maintain memory alignment to word boundaries by requiring that all allocations be in multiples of the machine wordsize. */ /* Size of machine word in bytes. Must be power of 2. */ /* Minimum number of bytes requested at a time from the system. Must be * multiple of WORDSIZE. */ size_t remaining; /* Number of bytes left in current block of storage. */ void *base; /* Pointer to base of current block of storage. */ void *loc; /* Current location in block to next allocate memory. */ void internal_init() { remaining = 0; base = NULL; usedMemory = 0; wastedMemory = 0; } public: size_t usedMemory; size_t wastedMemory; /** Default constructor. Initializes a new pool. */ PooledAllocator() { internal_init(); } /** * Destructor. Frees all the memory allocated in this pool. */ ~PooledAllocator() { free_all(); } /** Frees all allocated memory chunks */ void free_all() { while (base != NULL) { void *prev = *(static_cast(base)); /* Get pointer to prev block. */ ::free(base); base = prev; } internal_init(); } /** * Returns a pointer to a piece of new memory of the given size in bytes * allocated from the pool. */ void *malloc(const size_t req_size) { /* Round size up to a multiple of wordsize. The following expression only works for WORDSIZE that is a power of 2, by masking last bits of incremented size to zero. */ const size_t size = (req_size + (WORDSIZE - 1)) & ~(WORDSIZE - 1); /* Check whether a new block must be allocated. Note that the first word of a block is reserved for a pointer to the previous block. */ if (size > remaining) { wastedMemory += remaining; /* Allocate new storage. */ const size_t blocksize = (size + sizeof(void *) + (WORDSIZE - 1) > BLOCKSIZE) ? size + sizeof(void *) + (WORDSIZE - 1) : BLOCKSIZE; // use the standard C malloc to allocate memory void *m = ::malloc(blocksize); if (!m) { fprintf(stderr, "Failed to allocate memory.\n"); return NULL; } /* Fill first word of new block with pointer to previous block. */ static_cast(m)[0] = base; base = m; size_t shift = 0; // int size_t = (WORDSIZE - ( (((size_t)m) + sizeof(void*)) & // (WORDSIZE-1))) & (WORDSIZE-1); remaining = blocksize - sizeof(void *) - shift; loc = (static_cast(m) + sizeof(void *) + shift); } void *rloc = loc; loc = static_cast(loc) + size; remaining -= size; usedMemory += size; return rloc; } /** * Allocates (using this pool) a generic type T. * * Params: * count = number of instances to allocate. * Returns: pointer (of type T*) to memory buffer */ template T *allocate(const size_t count = 1) { T *mem = static_cast(this->malloc(sizeof(T) * count)); return mem; } }; /** @} */ /** @addtogroup nanoflann_metaprog_grp Auxiliary metaprogramming stuff * @{ */ /** Used to declare fixed-size arrays when DIM>0, dynamically-allocated vectors * when DIM=-1. Fixed size version for a generic DIM: */ template struct array_or_vector_selector { typedef std::array container_t; }; /** Dynamic size version */ template struct array_or_vector_selector<-1, T> { typedef std::vector container_t; }; /** @} */ /** kd-tree base-class * * Contains the member functions common to the classes KDTreeSingleIndexAdaptor * and KDTreeSingleIndexDynamicAdaptor_. * * \tparam Derived The name of the class which inherits this class. * \tparam DatasetAdaptor The user-provided adaptor (see comments above). * \tparam Distance The distance metric to use, these are all classes derived * from nanoflann::Metric \tparam DIM Dimensionality of data points (e.g. 3 for * 3D points) \tparam IndexType Will be typically size_t or int */ template class KDTreeBaseClass { public: /** Frees the previously-built index. Automatically called within * buildIndex(). */ void freeIndex(Derived &obj) { obj.pool.free_all(); obj.root_node = NULL; obj.m_size_at_index_build = 0; } typedef typename Distance::ElementType ElementType; typedef typename Distance::DistanceType DistanceType; /*--------------------- Internal Data Structures --------------------------*/ struct Node { /** Union used because a node can be either a LEAF node or a non-leaf node, * so both data fields are never used simultaneously */ union { struct leaf { IndexType left, right; //!< Indices of points in leaf node } lr; struct nonleaf { int divfeat; //!< Dimension used for subdivision. DistanceType divlow, divhigh; //!< The values used for subdivision. } sub; } node_type; Node *child1, *child2; //!< Child nodes (both=NULL mean its a leaf node) }; typedef Node *NodePtr; struct Interval { ElementType low, high; }; /** * Array of indices to vectors in the dataset. */ std::vector vind; NodePtr root_node; size_t m_leaf_max_size; size_t m_size; //!< Number of current points in the dataset size_t m_size_at_index_build; //!< Number of points in the dataset when the //!< index was built int dim; //!< Dimensionality of each data point /** Define "BoundingBox" as a fixed-size or variable-size container depending * on "DIM" */ typedef typename array_or_vector_selector::container_t BoundingBox; /** Define "distance_vector_t" as a fixed-size or variable-size container * depending on "DIM" */ typedef typename array_or_vector_selector::container_t distance_vector_t; /** The KD-tree used to find neighbours */ BoundingBox root_bbox; /** * Pooled memory allocator. * * Using a pooled memory allocator is more efficient * than allocating memory directly when there is a large * number small of memory allocations. */ PooledAllocator pool; /** Returns number of points in dataset */ size_t size(const Derived &obj) const { return obj.m_size; } /** Returns the length of each point in the dataset */ size_t veclen(const Derived &obj) { return static_cast(DIM > 0 ? DIM : obj.dim); } /// Helper accessor to the dataset points: inline ElementType dataset_get(const Derived &obj, size_t idx, int component) const { return obj.dataset.kdtree_get_pt(idx, component); } /** * Computes the inde memory usage * Returns: memory used by the index */ size_t usedMemory(Derived &obj) { return obj.pool.usedMemory + obj.pool.wastedMemory + obj.dataset.kdtree_get_point_count() * sizeof(IndexType); // pool memory and vind array memory } void computeMinMax(const Derived &obj, IndexType *ind, IndexType count, int element, ElementType &min_elem, ElementType &max_elem) { min_elem = dataset_get(obj, ind[0], element); max_elem = dataset_get(obj, ind[0], element); for (IndexType i = 1; i < count; ++i) { ElementType val = dataset_get(obj, ind[i], element); if (val < min_elem) min_elem = val; if (val > max_elem) max_elem = val; } } /** * Create a tree node that subdivides the list of vecs from vind[first] * to vind[last]. The routine is called recursively on each sublist. * * @param left index of the first vector * @param right index of the last vector */ NodePtr divideTree(Derived &obj, const IndexType left, const IndexType right, BoundingBox &bbox) { NodePtr node = obj.pool.template allocate(); // allocate memory /* If too few exemplars remain, then make this a leaf node. */ if ((right - left) <= static_cast(obj.m_leaf_max_size)) { node->child1 = node->child2 = NULL; /* Mark as leaf node. */ node->node_type.lr.left = left; node->node_type.lr.right = right; // compute bounding-box of leaf points for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) { bbox[i].low = dataset_get(obj, obj.vind[left], i); bbox[i].high = dataset_get(obj, obj.vind[left], i); } for (IndexType k = left + 1; k < right; ++k) { for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) { if (bbox[i].low > dataset_get(obj, obj.vind[k], i)) bbox[i].low = dataset_get(obj, obj.vind[k], i); if (bbox[i].high < dataset_get(obj, obj.vind[k], i)) bbox[i].high = dataset_get(obj, obj.vind[k], i); } } } else { IndexType idx; int cutfeat; DistanceType cutval; middleSplit_(obj, &obj.vind[0] + left, right - left, idx, cutfeat, cutval, bbox); node->node_type.sub.divfeat = cutfeat; BoundingBox left_bbox(bbox); left_bbox[cutfeat].high = cutval; node->child1 = divideTree(obj, left, left + idx, left_bbox); BoundingBox right_bbox(bbox); right_bbox[cutfeat].low = cutval; node->child2 = divideTree(obj, left + idx, right, right_bbox); node->node_type.sub.divlow = left_bbox[cutfeat].high; node->node_type.sub.divhigh = right_bbox[cutfeat].low; for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) { bbox[i].low = std::min(left_bbox[i].low, right_bbox[i].low); bbox[i].high = std::max(left_bbox[i].high, right_bbox[i].high); } } return node; } void middleSplit_(Derived &obj, IndexType *ind, IndexType count, IndexType &index, int &cutfeat, DistanceType &cutval, const BoundingBox &bbox) { const DistanceType EPS = static_cast(0.00001); ElementType max_span = bbox[0].high - bbox[0].low; for (int i = 1; i < (DIM > 0 ? DIM : obj.dim); ++i) { ElementType span = bbox[i].high - bbox[i].low; if (span > max_span) { max_span = span; } } ElementType max_spread = -1; cutfeat = 0; for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) { ElementType span = bbox[i].high - bbox[i].low; if (span > (1 - EPS) * max_span) { ElementType min_elem, max_elem; computeMinMax(obj, ind, count, i, min_elem, max_elem); ElementType spread = max_elem - min_elem; ; if (spread > max_spread) { cutfeat = i; max_spread = spread; } } } // split in the middle DistanceType split_val = (bbox[cutfeat].low + bbox[cutfeat].high) / 2; ElementType min_elem, max_elem; computeMinMax(obj, ind, count, cutfeat, min_elem, max_elem); if (split_val < min_elem) cutval = min_elem; else if (split_val > max_elem) cutval = max_elem; else cutval = split_val; IndexType lim1, lim2; planeSplit(obj, ind, count, cutfeat, cutval, lim1, lim2); if (lim1 > count / 2) index = lim1; else if (lim2 < count / 2) index = lim2; else index = count / 2; } /** * Subdivide the list of points by a plane perpendicular on axe corresponding * to the 'cutfeat' dimension at 'cutval' position. * * On return: * dataset[ind[0..lim1-1]][cutfeat]cutval */ void planeSplit(Derived &obj, IndexType *ind, const IndexType count, int cutfeat, DistanceType &cutval, IndexType &lim1, IndexType &lim2) { /* Move vector indices for left subtree to front of list. */ IndexType left = 0; IndexType right = count - 1; for (;;) { while (left <= right && dataset_get(obj, ind[left], cutfeat) < cutval) ++left; while (right && left <= right && dataset_get(obj, ind[right], cutfeat) >= cutval) --right; if (left > right || !right) break; // "!right" was added to support unsigned Index types std::swap(ind[left], ind[right]); ++left; --right; } /* If either list is empty, it means that all remaining features * are identical. Split in the middle to maintain a balanced tree. */ lim1 = left; right = count - 1; for (;;) { while (left <= right && dataset_get(obj, ind[left], cutfeat) <= cutval) ++left; while (right && left <= right && dataset_get(obj, ind[right], cutfeat) > cutval) --right; if (left > right || !right) break; // "!right" was added to support unsigned Index types std::swap(ind[left], ind[right]); ++left; --right; } lim2 = left; } DistanceType computeInitialDistances(const Derived &obj, const ElementType *vec, distance_vector_t &dists) const { assert(vec); DistanceType distsq = DistanceType(); for (int i = 0; i < (DIM > 0 ? DIM : obj.dim); ++i) { if (vec[i] < obj.root_bbox[i].low) { dists[i] = obj.distance.accum_dist(vec[i], obj.root_bbox[i].low, i); distsq += dists[i]; } if (vec[i] > obj.root_bbox[i].high) { dists[i] = obj.distance.accum_dist(vec[i], obj.root_bbox[i].high, i); distsq += dists[i]; } } return distsq; } void save_tree(Derived &obj, FILE *stream, NodePtr tree) { save_value(stream, *tree); if (tree->child1 != NULL) { save_tree(obj, stream, tree->child1); } if (tree->child2 != NULL) { save_tree(obj, stream, tree->child2); } } void load_tree(Derived &obj, FILE *stream, NodePtr &tree) { tree = obj.pool.template allocate(); load_value(stream, *tree); if (tree->child1 != NULL) { load_tree(obj, stream, tree->child1); } if (tree->child2 != NULL) { load_tree(obj, stream, tree->child2); } } /** Stores the index in a binary file. * IMPORTANT NOTE: The set of data points is NOT stored in the file, so when * loading the index object it must be constructed associated to the same * source of data points used while building it. See the example: * examples/saveload_example.cpp \sa loadIndex */ void saveIndex_(Derived &obj, FILE *stream) { save_value(stream, obj.m_size); save_value(stream, obj.dim); save_value(stream, obj.root_bbox); save_value(stream, obj.m_leaf_max_size); save_value(stream, obj.vind); save_tree(obj, stream, obj.root_node); } /** Loads a previous index from a binary file. * IMPORTANT NOTE: The set of data points is NOT stored in the file, so the * index object must be constructed associated to the same source of data * points used while building the index. See the example: * examples/saveload_example.cpp \sa loadIndex */ void loadIndex_(Derived &obj, FILE *stream) { load_value(stream, obj.m_size); load_value(stream, obj.dim); load_value(stream, obj.root_bbox); load_value(stream, obj.m_leaf_max_size); load_value(stream, obj.vind); load_tree(obj, stream, obj.root_node); } }; /** @addtogroup kdtrees_grp KD-tree classes and adaptors * @{ */ /** kd-tree static index * * Contains the k-d trees and other information for indexing a set of points * for nearest-neighbor matching. * * The class "DatasetAdaptor" must provide the following interface (can be * non-virtual, inlined methods): * * \code * // Must return the number of data poins * inline size_t kdtree_get_point_count() const { ... } * * * // Must return the dim'th component of the idx'th point in the class: * inline T kdtree_get_pt(const size_t idx, const size_t dim) const { ... } * * // Optional bounding-box computation: return false to default to a standard * bbox computation loop. * // Return true if the BBOX was already computed by the class and returned * in "bb" so it can be avoided to redo it again. * // Look at bb.size() to find out the expected dimensionality (e.g. 2 or 3 * for point clouds) template bool kdtree_get_bbox(BBOX &bb) const * { * bb[0].low = ...; bb[0].high = ...; // 0th dimension limits * bb[1].low = ...; bb[1].high = ...; // 1st dimension limits * ... * return true; * } * * \endcode * * \tparam DatasetAdaptor The user-provided adaptor (see comments above). * \tparam Distance The distance metric to use: nanoflann::metric_L1, * nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc. \tparam DIM * Dimensionality of data points (e.g. 3 for 3D points) \tparam IndexType Will * be typically size_t or int */ template class KDTreeSingleIndexAdaptor : public KDTreeBaseClass< KDTreeSingleIndexAdaptor, Distance, DatasetAdaptor, DIM, IndexType> { public: /** Deleted copy constructor*/ KDTreeSingleIndexAdaptor( const KDTreeSingleIndexAdaptor &) = delete; /** * The dataset used by this index */ const DatasetAdaptor &dataset; //!< The source of our data const KDTreeSingleIndexAdaptorParams index_params; Distance distance; typedef typename nanoflann::KDTreeBaseClass< nanoflann::KDTreeSingleIndexAdaptor, Distance, DatasetAdaptor, DIM, IndexType> BaseClassRef; typedef typename BaseClassRef::ElementType ElementType; typedef typename BaseClassRef::DistanceType DistanceType; typedef typename BaseClassRef::Node Node; typedef Node *NodePtr; typedef typename BaseClassRef::Interval Interval; /** Define "BoundingBox" as a fixed-size or variable-size container depending * on "DIM" */ typedef typename BaseClassRef::BoundingBox BoundingBox; /** Define "distance_vector_t" as a fixed-size or variable-size container * depending on "DIM" */ typedef typename BaseClassRef::distance_vector_t distance_vector_t; /** * KDTree constructor * * Refer to docs in README.md or online in * https://github.com/jlblancoc/nanoflann * * The KD-Tree point dimension (the length of each point in the datase, e.g. 3 * for 3D points) is determined by means of: * - The \a DIM template parameter if >0 (highest priority) * - Otherwise, the \a dimensionality parameter of this constructor. * * @param inputData Dataset with the input features * @param params Basically, the maximum leaf node size */ KDTreeSingleIndexAdaptor(const int dimensionality, const DatasetAdaptor &inputData, const KDTreeSingleIndexAdaptorParams ¶ms = KDTreeSingleIndexAdaptorParams()) : dataset(inputData), index_params(params), distance(inputData) { BaseClassRef::root_node = NULL; BaseClassRef::m_size = dataset.kdtree_get_point_count(); BaseClassRef::m_size_at_index_build = BaseClassRef::m_size; BaseClassRef::dim = dimensionality; if (DIM > 0) BaseClassRef::dim = DIM; BaseClassRef::m_leaf_max_size = params.leaf_max_size; // Create a permutable array of indices to the input vectors. init_vind(); } /** * Builds the index */ void buildIndex() { BaseClassRef::m_size = dataset.kdtree_get_point_count(); BaseClassRef::m_size_at_index_build = BaseClassRef::m_size; init_vind(); this->freeIndex(*this); BaseClassRef::m_size_at_index_build = BaseClassRef::m_size; if (BaseClassRef::m_size == 0) return; computeBoundingBox(BaseClassRef::root_bbox); BaseClassRef::root_node = this->divideTree(*this, 0, BaseClassRef::m_size, BaseClassRef::root_bbox); // construct the tree } /** \name Query methods * @{ */ /** * Find set of nearest neighbors to vec[0:dim-1]. Their indices are stored * inside the result object. * * Params: * result = the result object in which the indices of the * nearest-neighbors are stored vec = the vector for which to search the * nearest neighbors * * \tparam RESULTSET Should be any ResultSet * \return True if the requested neighbors could be found. * \sa knnSearch, radiusSearch */ template bool findNeighbors(RESULTSET &result, const ElementType *vec, const SearchParams &searchParams) const { assert(vec); if (this->size(*this) == 0) return false; if (!BaseClassRef::root_node) throw std::runtime_error( "[nanoflann] findNeighbors() called before building the index."); float epsError = 1 + searchParams.eps; distance_vector_t dists; // fixed or variable-sized container (depending on DIM) auto zero = static_cast(0); assign(dists, (DIM > 0 ? DIM : BaseClassRef::dim), zero); // Fill it with zeros. DistanceType distsq = this->computeInitialDistances(*this, vec, dists); searchLevel(result, vec, BaseClassRef::root_node, distsq, dists, epsError); // "count_leaf" parameter removed since was neither // used nor returned to the user. return result.full(); } /** * Find the "num_closest" nearest neighbors to the \a query_point[0:dim-1]. * Their indices are stored inside the result object. \sa radiusSearch, * findNeighbors \note nChecks_IGNORED is ignored but kept for compatibility * with the original FLANN interface. \return Number `N` of valid points in * the result set. Only the first `N` entries in `out_indices` and * `out_distances_sq` will be valid. Return may be less than `num_closest` * only if the number of elements in the tree is less than `num_closest`. */ size_t knnSearch(const ElementType *query_point, const size_t num_closest, IndexType *out_indices, DistanceType *out_distances_sq, const int /* nChecks_IGNORED */ = 10) const { nanoflann::KNNResultSet resultSet(num_closest); resultSet.init(out_indices, out_distances_sq); this->findNeighbors(resultSet, query_point, nanoflann::SearchParams()); return resultSet.size(); } /** * Find all the neighbors to \a query_point[0:dim-1] within a maximum radius. * The output is given as a vector of pairs, of which the first element is a * point index and the second the corresponding distance. Previous contents of * \a IndicesDists are cleared. * * If searchParams.sorted==true, the output list is sorted by ascending * distances. * * For a better performance, it is advisable to do a .reserve() on the vector * if you have any wild guess about the number of expected matches. * * \sa knnSearch, findNeighbors, radiusSearchCustomCallback * \return The number of points within the given radius (i.e. indices.size() * or dists.size() ) */ size_t radiusSearch(const ElementType *query_point, const DistanceType &radius, std::vector> &IndicesDists, const SearchParams &searchParams) const { RadiusResultSet resultSet(radius, IndicesDists); const size_t nFound = radiusSearchCustomCallback(query_point, resultSet, searchParams); if (searchParams.sorted) std::sort(IndicesDists.begin(), IndicesDists.end(), IndexDist_Sorter()); return nFound; } /** * Just like radiusSearch() but with a custom callback class for each point * found in the radius of the query. See the source of RadiusResultSet<> as a * start point for your own classes. \sa radiusSearch */ template size_t radiusSearchCustomCallback( const ElementType *query_point, SEARCH_CALLBACK &resultSet, const SearchParams &searchParams = SearchParams()) const { this->findNeighbors(resultSet, query_point, searchParams); return resultSet.size(); } /** @} */ public: /** Make sure the auxiliary list \a vind has the same size than the current * dataset, and re-generate if size has changed. */ void init_vind() { // Create a permutable array of indices to the input vectors. BaseClassRef::m_size = dataset.kdtree_get_point_count(); if (BaseClassRef::vind.size() != BaseClassRef::m_size) BaseClassRef::vind.resize(BaseClassRef::m_size); for (size_t i = 0; i < BaseClassRef::m_size; i++) BaseClassRef::vind[i] = i; } void computeBoundingBox(BoundingBox &bbox) { resize(bbox, (DIM > 0 ? DIM : BaseClassRef::dim)); if (dataset.kdtree_get_bbox(bbox)) { // Done! It was implemented in derived class } else { const size_t N = dataset.kdtree_get_point_count(); if (!N) throw std::runtime_error("[nanoflann] computeBoundingBox() called but " "no data points found."); for (int i = 0; i < (DIM > 0 ? DIM : BaseClassRef::dim); ++i) { bbox[i].low = bbox[i].high = this->dataset_get(*this, 0, i); } for (size_t k = 1; k < N; ++k) { for (int i = 0; i < (DIM > 0 ? DIM : BaseClassRef::dim); ++i) { if (this->dataset_get(*this, k, i) < bbox[i].low) bbox[i].low = this->dataset_get(*this, k, i); if (this->dataset_get(*this, k, i) > bbox[i].high) bbox[i].high = this->dataset_get(*this, k, i); } } } } /** * Performs an exact search in the tree starting from a node. * \tparam RESULTSET Should be any ResultSet * \return true if the search should be continued, false if the results are * sufficient */ template bool searchLevel(RESULTSET &result_set, const ElementType *vec, const NodePtr node, DistanceType mindistsq, distance_vector_t &dists, const float epsError) const { /* If this is a leaf node, then do check and return. */ if ((node->child1 == NULL) && (node->child2 == NULL)) { // count_leaf += (node->lr.right-node->lr.left); // Removed since was // neither used nor returned to the user. DistanceType worst_dist = result_set.worstDist(); for (IndexType i = node->node_type.lr.left; i < node->node_type.lr.right; ++i) { const IndexType index = BaseClassRef::vind[i]; // reorder... : i; DistanceType dist = distance.evalMetric( vec, index, (DIM > 0 ? DIM : BaseClassRef::dim)); if (dist < worst_dist) { if (!result_set.addPoint(dist, BaseClassRef::vind[i])) { // the resultset doesn't want to receive any more points, we're done // searching! return false; } } } return true; } /* Which child branch should be taken first? */ int idx = node->node_type.sub.divfeat; ElementType val = vec[idx]; DistanceType diff1 = val - node->node_type.sub.divlow; DistanceType diff2 = val - node->node_type.sub.divhigh; NodePtr bestChild; NodePtr otherChild; DistanceType cut_dist; if ((diff1 + diff2) < 0) { bestChild = node->child1; otherChild = node->child2; cut_dist = distance.accum_dist(val, node->node_type.sub.divhigh, idx); } else { bestChild = node->child2; otherChild = node->child1; cut_dist = distance.accum_dist(val, node->node_type.sub.divlow, idx); } /* Call recursively to search next level down. */ if (!searchLevel(result_set, vec, bestChild, mindistsq, dists, epsError)) { // the resultset doesn't want to receive any more points, we're done // searching! return false; } DistanceType dst = dists[idx]; mindistsq = mindistsq + cut_dist - dst; dists[idx] = cut_dist; if (mindistsq * epsError <= result_set.worstDist()) { if (!searchLevel(result_set, vec, otherChild, mindistsq, dists, epsError)) { // the resultset doesn't want to receive any more points, we're done // searching! return false; } } dists[idx] = dst; return true; } public: /** Stores the index in a binary file. * IMPORTANT NOTE: The set of data points is NOT stored in the file, so when * loading the index object it must be constructed associated to the same * source of data points used while building it. See the example: * examples/saveload_example.cpp \sa loadIndex */ void saveIndex(FILE *stream) { this->saveIndex_(*this, stream); } /** Loads a previous index from a binary file. * IMPORTANT NOTE: The set of data points is NOT stored in the file, so the * index object must be constructed associated to the same source of data * points used while building the index. See the example: * examples/saveload_example.cpp \sa loadIndex */ void loadIndex(FILE *stream) { this->loadIndex_(*this, stream); } }; // class KDTree /** kd-tree dynamic index * * Contains the k-d trees and other information for indexing a set of points * for nearest-neighbor matching. * * The class "DatasetAdaptor" must provide the following interface (can be * non-virtual, inlined methods): * * \code * // Must return the number of data poins * inline size_t kdtree_get_point_count() const { ... } * * // Must return the dim'th component of the idx'th point in the class: * inline T kdtree_get_pt(const size_t idx, const size_t dim) const { ... } * * // Optional bounding-box computation: return false to default to a standard * bbox computation loop. * // Return true if the BBOX was already computed by the class and returned * in "bb" so it can be avoided to redo it again. * // Look at bb.size() to find out the expected dimensionality (e.g. 2 or 3 * for point clouds) template bool kdtree_get_bbox(BBOX &bb) const * { * bb[0].low = ...; bb[0].high = ...; // 0th dimension limits * bb[1].low = ...; bb[1].high = ...; // 1st dimension limits * ... * return true; * } * * \endcode * * \tparam DatasetAdaptor The user-provided adaptor (see comments above). * \tparam Distance The distance metric to use: nanoflann::metric_L1, * nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc. \tparam DIM * Dimensionality of data points (e.g. 3 for 3D points) \tparam IndexType Will * be typically size_t or int */ template class KDTreeSingleIndexDynamicAdaptor_ : public KDTreeBaseClass, Distance, DatasetAdaptor, DIM, IndexType> { public: /** * The dataset used by this index */ const DatasetAdaptor &dataset; //!< The source of our data KDTreeSingleIndexAdaptorParams index_params; std::vector &treeIndex; Distance distance; typedef typename nanoflann::KDTreeBaseClass< nanoflann::KDTreeSingleIndexDynamicAdaptor_, Distance, DatasetAdaptor, DIM, IndexType> BaseClassRef; typedef typename BaseClassRef::ElementType ElementType; typedef typename BaseClassRef::DistanceType DistanceType; typedef typename BaseClassRef::Node Node; typedef Node *NodePtr; typedef typename BaseClassRef::Interval Interval; /** Define "BoundingBox" as a fixed-size or variable-size container depending * on "DIM" */ typedef typename BaseClassRef::BoundingBox BoundingBox; /** Define "distance_vector_t" as a fixed-size or variable-size container * depending on "DIM" */ typedef typename BaseClassRef::distance_vector_t distance_vector_t; /** * KDTree constructor * * Refer to docs in README.md or online in * https://github.com/jlblancoc/nanoflann * * The KD-Tree point dimension (the length of each point in the datase, e.g. 3 * for 3D points) is determined by means of: * - The \a DIM template parameter if >0 (highest priority) * - Otherwise, the \a dimensionality parameter of this constructor. * * @param inputData Dataset with the input features * @param params Basically, the maximum leaf node size */ KDTreeSingleIndexDynamicAdaptor_( const int dimensionality, const DatasetAdaptor &inputData, std::vector &treeIndex_, const KDTreeSingleIndexAdaptorParams ¶ms = KDTreeSingleIndexAdaptorParams()) : dataset(inputData), index_params(params), treeIndex(treeIndex_), distance(inputData) { BaseClassRef::root_node = NULL; BaseClassRef::m_size = 0; BaseClassRef::m_size_at_index_build = 0; BaseClassRef::dim = dimensionality; if (DIM > 0) BaseClassRef::dim = DIM; BaseClassRef::m_leaf_max_size = params.leaf_max_size; } /** Assignment operator definiton */ KDTreeSingleIndexDynamicAdaptor_ operator=(const KDTreeSingleIndexDynamicAdaptor_ &rhs) { KDTreeSingleIndexDynamicAdaptor_ tmp(rhs); std::swap(BaseClassRef::vind, tmp.BaseClassRef::vind); std::swap(BaseClassRef::m_leaf_max_size, tmp.BaseClassRef::m_leaf_max_size); std::swap(index_params, tmp.index_params); std::swap(treeIndex, tmp.treeIndex); std::swap(BaseClassRef::m_size, tmp.BaseClassRef::m_size); std::swap(BaseClassRef::m_size_at_index_build, tmp.BaseClassRef::m_size_at_index_build); std::swap(BaseClassRef::root_node, tmp.BaseClassRef::root_node); std::swap(BaseClassRef::root_bbox, tmp.BaseClassRef::root_bbox); std::swap(BaseClassRef::pool, tmp.BaseClassRef::pool); return *this; } /** * Builds the index */ void buildIndex() { BaseClassRef::m_size = BaseClassRef::vind.size(); this->freeIndex(*this); BaseClassRef::m_size_at_index_build = BaseClassRef::m_size; if (BaseClassRef::m_size == 0) return; computeBoundingBox(BaseClassRef::root_bbox); BaseClassRef::root_node = this->divideTree(*this, 0, BaseClassRef::m_size, BaseClassRef::root_bbox); // construct the tree } /** \name Query methods * @{ */ /** * Find set of nearest neighbors to vec[0:dim-1]. Their indices are stored * inside the result object. * * Params: * result = the result object in which the indices of the * nearest-neighbors are stored vec = the vector for which to search the * nearest neighbors * * \tparam RESULTSET Should be any ResultSet * \return True if the requested neighbors could be found. * \sa knnSearch, radiusSearch */ template bool findNeighbors(RESULTSET &result, const ElementType *vec, const SearchParams &searchParams) const { assert(vec); if (this->size(*this) == 0) return false; if (!BaseClassRef::root_node) return false; float epsError = 1 + searchParams.eps; // fixed or variable-sized container (depending on DIM) distance_vector_t dists; // Fill it with zeros. assign(dists, (DIM > 0 ? DIM : BaseClassRef::dim), static_cast(0)); DistanceType distsq = this->computeInitialDistances(*this, vec, dists); searchLevel(result, vec, BaseClassRef::root_node, distsq, dists, epsError); // "count_leaf" parameter removed since was neither // used nor returned to the user. return result.full(); } /** * Find the "num_closest" nearest neighbors to the \a query_point[0:dim-1]. * Their indices are stored inside the result object. \sa radiusSearch, * findNeighbors \note nChecks_IGNORED is ignored but kept for compatibility * with the original FLANN interface. \return Number `N` of valid points in * the result set. Only the first `N` entries in `out_indices` and * `out_distances_sq` will be valid. Return may be less than `num_closest` * only if the number of elements in the tree is less than `num_closest`. */ size_t knnSearch(const ElementType *query_point, const size_t num_closest, IndexType *out_indices, DistanceType *out_distances_sq, const int /* nChecks_IGNORED */ = 10) const { nanoflann::KNNResultSet resultSet(num_closest); resultSet.init(out_indices, out_distances_sq); this->findNeighbors(resultSet, query_point, nanoflann::SearchParams()); return resultSet.size(); } /** * Find all the neighbors to \a query_point[0:dim-1] within a maximum radius. * The output is given as a vector of pairs, of which the first element is a * point index and the second the corresponding distance. Previous contents of * \a IndicesDists are cleared. * * If searchParams.sorted==true, the output list is sorted by ascending * distances. * * For a better performance, it is advisable to do a .reserve() on the vector * if you have any wild guess about the number of expected matches. * * \sa knnSearch, findNeighbors, radiusSearchCustomCallback * \return The number of points within the given radius (i.e. indices.size() * or dists.size() ) */ size_t radiusSearch(const ElementType *query_point, const DistanceType &radius, std::vector> &IndicesDists, const SearchParams &searchParams) const { RadiusResultSet resultSet(radius, IndicesDists); const size_t nFound = radiusSearchCustomCallback(query_point, resultSet, searchParams); if (searchParams.sorted) std::sort(IndicesDists.begin(), IndicesDists.end(), IndexDist_Sorter()); return nFound; } /** * Just like radiusSearch() but with a custom callback class for each point * found in the radius of the query. See the source of RadiusResultSet<> as a * start point for your own classes. \sa radiusSearch */ template size_t radiusSearchCustomCallback( const ElementType *query_point, SEARCH_CALLBACK &resultSet, const SearchParams &searchParams = SearchParams()) const { this->findNeighbors(resultSet, query_point, searchParams); return resultSet.size(); } /** @} */ public: void computeBoundingBox(BoundingBox &bbox) { resize(bbox, (DIM > 0 ? DIM : BaseClassRef::dim)); if (dataset.kdtree_get_bbox(bbox)) { // Done! It was implemented in derived class } else { const size_t N = BaseClassRef::m_size; if (!N) throw std::runtime_error("[nanoflann] computeBoundingBox() called but " "no data points found."); for (int i = 0; i < (DIM > 0 ? DIM : BaseClassRef::dim); ++i) { bbox[i].low = bbox[i].high = this->dataset_get(*this, BaseClassRef::vind[0], i); } for (size_t k = 1; k < N; ++k) { for (int i = 0; i < (DIM > 0 ? DIM : BaseClassRef::dim); ++i) { if (this->dataset_get(*this, BaseClassRef::vind[k], i) < bbox[i].low) bbox[i].low = this->dataset_get(*this, BaseClassRef::vind[k], i); if (this->dataset_get(*this, BaseClassRef::vind[k], i) > bbox[i].high) bbox[i].high = this->dataset_get(*this, BaseClassRef::vind[k], i); } } } } /** * Performs an exact search in the tree starting from a node. * \tparam RESULTSET Should be any ResultSet */ template void searchLevel(RESULTSET &result_set, const ElementType *vec, const NodePtr node, DistanceType mindistsq, distance_vector_t &dists, const float epsError) const { /* If this is a leaf node, then do check and return. */ if ((node->child1 == NULL) && (node->child2 == NULL)) { // count_leaf += (node->lr.right-node->lr.left); // Removed since was // neither used nor returned to the user. DistanceType worst_dist = result_set.worstDist(); for (IndexType i = node->node_type.lr.left; i < node->node_type.lr.right; ++i) { const IndexType index = BaseClassRef::vind[i]; // reorder... : i; if (treeIndex[index] == -1) continue; DistanceType dist = distance.evalMetric( vec, index, (DIM > 0 ? DIM : BaseClassRef::dim)); if (dist < worst_dist) { if (!result_set.addPoint( static_cast(dist), static_cast( BaseClassRef::vind[i]))) { // the resultset doesn't want to receive any more points, we're done // searching! return; // false; } } } return; } /* Which child branch should be taken first? */ int idx = node->node_type.sub.divfeat; ElementType val = vec[idx]; DistanceType diff1 = val - node->node_type.sub.divlow; DistanceType diff2 = val - node->node_type.sub.divhigh; NodePtr bestChild; NodePtr otherChild; DistanceType cut_dist; if ((diff1 + diff2) < 0) { bestChild = node->child1; otherChild = node->child2; cut_dist = distance.accum_dist(val, node->node_type.sub.divhigh, idx); } else { bestChild = node->child2; otherChild = node->child1; cut_dist = distance.accum_dist(val, node->node_type.sub.divlow, idx); } /* Call recursively to search next level down. */ searchLevel(result_set, vec, bestChild, mindistsq, dists, epsError); DistanceType dst = dists[idx]; mindistsq = mindistsq + cut_dist - dst; dists[idx] = cut_dist; if (mindistsq * epsError <= result_set.worstDist()) { searchLevel(result_set, vec, otherChild, mindistsq, dists, epsError); } dists[idx] = dst; } public: /** Stores the index in a binary file. * IMPORTANT NOTE: The set of data points is NOT stored in the file, so when * loading the index object it must be constructed associated to the same * source of data points used while building it. See the example: * examples/saveload_example.cpp \sa loadIndex */ void saveIndex(FILE *stream) { this->saveIndex_(*this, stream); } /** Loads a previous index from a binary file. * IMPORTANT NOTE: The set of data points is NOT stored in the file, so the * index object must be constructed associated to the same source of data * points used while building the index. See the example: * examples/saveload_example.cpp \sa loadIndex */ void loadIndex(FILE *stream) { this->loadIndex_(*this, stream); } }; /** kd-tree dynaimic index * * class to create multiple static index and merge their results to behave as * single dynamic index as proposed in Logarithmic Approach. * * Example of usage: * examples/dynamic_pointcloud_example.cpp * * \tparam DatasetAdaptor The user-provided adaptor (see comments above). * \tparam Distance The distance metric to use: nanoflann::metric_L1, * nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc. \tparam DIM * Dimensionality of data points (e.g. 3 for 3D points) \tparam IndexType Will * be typically size_t or int */ template class KDTreeSingleIndexDynamicAdaptor { public: typedef typename Distance::ElementType ElementType; typedef typename Distance::DistanceType DistanceType; protected: size_t m_leaf_max_size; size_t treeCount; size_t pointCount; /** * The dataset used by this index */ const DatasetAdaptor &dataset; //!< The source of our data std::vector treeIndex; //!< treeIndex[idx] is the index of tree in which //!< point at idx is stored. treeIndex[idx]=-1 //!< means that point has been removed. KDTreeSingleIndexAdaptorParams index_params; int dim; //!< Dimensionality of each data point typedef KDTreeSingleIndexDynamicAdaptor_ index_container_t; std::vector index; public: /** Get a const ref to the internal list of indices; the number of indices is * adapted dynamically as the dataset grows in size. */ const std::vector &getAllIndices() const { return index; } private: /** finds position of least significant unset bit */ int First0Bit(IndexType num) { int pos = 0; while (num & 1) { num = num >> 1; pos++; } return pos; } /** Creates multiple empty trees to handle dynamic support */ void init() { typedef KDTreeSingleIndexDynamicAdaptor_ my_kd_tree_t; std::vector index_( treeCount, my_kd_tree_t(dim /*dim*/, dataset, treeIndex, index_params)); index = index_; } public: Distance distance; /** * KDTree constructor * * Refer to docs in README.md or online in * https://github.com/jlblancoc/nanoflann * * The KD-Tree point dimension (the length of each point in the datase, e.g. 3 * for 3D points) is determined by means of: * - The \a DIM template parameter if >0 (highest priority) * - Otherwise, the \a dimensionality parameter of this constructor. * * @param inputData Dataset with the input features * @param params Basically, the maximum leaf node size */ KDTreeSingleIndexDynamicAdaptor(const int dimensionality, const DatasetAdaptor &inputData, const KDTreeSingleIndexAdaptorParams ¶ms = KDTreeSingleIndexAdaptorParams(), const size_t maximumPointCount = 1000000000U) : dataset(inputData), index_params(params), distance(inputData) { treeCount = static_cast(std::log2(maximumPointCount)); pointCount = 0U; dim = dimensionality; treeIndex.clear(); if (DIM > 0) dim = DIM; m_leaf_max_size = params.leaf_max_size; init(); const size_t num_initial_points = dataset.kdtree_get_point_count(); if (num_initial_points > 0) { addPoints(0, num_initial_points - 1); } } /** Deleted copy constructor*/ KDTreeSingleIndexDynamicAdaptor( const KDTreeSingleIndexDynamicAdaptor &) = delete; /** Add points to the set, Inserts all points from [start, end] */ void addPoints(IndexType start, IndexType end) { size_t count = end - start + 1; treeIndex.resize(treeIndex.size() + count); for (IndexType idx = start; idx <= end; idx++) { int pos = First0Bit(pointCount); index[pos].vind.clear(); treeIndex[pointCount] = pos; for (int i = 0; i < pos; i++) { for (int j = 0; j < static_cast(index[i].vind.size()); j++) { index[pos].vind.push_back(index[i].vind[j]); if (treeIndex[index[i].vind[j]] != -1) treeIndex[index[i].vind[j]] = pos; } index[i].vind.clear(); index[i].freeIndex(index[i]); } index[pos].vind.push_back(idx); index[pos].buildIndex(); pointCount++; } } /** Remove a point from the set (Lazy Deletion) */ void removePoint(size_t idx) { if (idx >= pointCount) return; treeIndex[idx] = -1; } /** * Find set of nearest neighbors to vec[0:dim-1]. Their indices are stored * inside the result object. * * Params: * result = the result object in which the indices of the * nearest-neighbors are stored vec = the vector for which to search the * nearest neighbors * * \tparam RESULTSET Should be any ResultSet * \return True if the requested neighbors could be found. * \sa knnSearch, radiusSearch */ template bool findNeighbors(RESULTSET &result, const ElementType *vec, const SearchParams &searchParams) const { for (size_t i = 0; i < treeCount; i++) { index[i].findNeighbors(result, &vec[0], searchParams); } return result.full(); } }; /** An L2-metric KD-tree adaptor for working with data directly stored in an * Eigen Matrix, without duplicating the data storage. Each row in the matrix * represents a point in the state space. * * Example of usage: * \code * Eigen::Matrix mat; * // Fill out "mat"... * * typedef KDTreeEigenMatrixAdaptor< Eigen::Matrix > * my_kd_tree_t; const int max_leaf = 10; my_kd_tree_t mat_index(mat, max_leaf * ); mat_index.index->buildIndex(); mat_index.index->... \endcode * * \tparam DIM If set to >0, it specifies a compile-time fixed dimensionality * for the points in the data set, allowing more compiler optimizations. \tparam * Distance The distance metric to use: nanoflann::metric_L1, * nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc. */ template struct KDTreeEigenMatrixAdaptor { typedef KDTreeEigenMatrixAdaptor self_t; typedef typename MatrixType::Scalar num_t; typedef typename MatrixType::Index IndexType; typedef typename Distance::template traits::distance_t metric_t; typedef KDTreeSingleIndexAdaptor index_t; index_t *index; //! The kd-tree index for the user to call its methods as //! usual with any other FLANN index. /// Constructor: takes a const ref to the matrix object with the data points KDTreeEigenMatrixAdaptor(const size_t dimensionality, const std::reference_wrapper &mat, const int leaf_max_size = 10) : m_data_matrix(mat) { const auto dims = mat.get().cols(); if (size_t(dims) != dimensionality) throw std::runtime_error( "Error: 'dimensionality' must match column count in data matrix"); if (DIM > 0 && int(dims) != DIM) throw std::runtime_error( "Data set dimensionality does not match the 'DIM' template argument"); index = new index_t(static_cast(dims), *this /* adaptor */, nanoflann::KDTreeSingleIndexAdaptorParams(leaf_max_size)); index->buildIndex(); } public: /** Deleted copy constructor */ KDTreeEigenMatrixAdaptor(const self_t &) = delete; ~KDTreeEigenMatrixAdaptor() { delete index; } const std::reference_wrapper m_data_matrix; /** Query for the \a num_closest closest points to a given point (entered as * query_point[0:dim-1]). Note that this is a short-cut method for * index->findNeighbors(). The user can also call index->... methods as * desired. \note nChecks_IGNORED is ignored but kept for compatibility with * the original FLANN interface. */ inline void query(const num_t *query_point, const size_t num_closest, IndexType *out_indices, num_t *out_distances_sq, const int /* nChecks_IGNORED */ = 10) const { nanoflann::KNNResultSet resultSet(num_closest); resultSet.init(out_indices, out_distances_sq); index->findNeighbors(resultSet, query_point, nanoflann::SearchParams()); } /** @name Interface expected by KDTreeSingleIndexAdaptor * @{ */ const self_t &derived() const { return *this; } self_t &derived() { return *this; } // Must return the number of data points inline size_t kdtree_get_point_count() const { return m_data_matrix.get().rows(); } // Returns the dim'th component of the idx'th point in the class: inline num_t kdtree_get_pt(const IndexType idx, size_t dim) const { return m_data_matrix.get().coeff(idx, IndexType(dim)); } // Optional bounding-box computation: return false to default to a standard // bbox computation loop. // Return true if the BBOX was already computed by the class and returned in // "bb" so it can be avoided to redo it again. Look at bb.size() to find out // the expected dimensionality (e.g. 2 or 3 for point clouds) template bool kdtree_get_bbox(BBOX & /*bb*/) const { return false; } /** @} */ }; // end of KDTreeEigenMatrixAdaptor /** @} */ /** @} */ // end of grouping } // namespace nanoflann #endif /* NANOFLANN_HPP_ */ ================================================ FILE: thirdparty/kpconv/kernels/kernel_points.py ================================================ # # # 0=================================0 # | Kernel Point Convolutions | # 0=================================0 # # # ---------------------------------------------------------------------------------------------------------------------- # # Functions handling the disposition of kernel points. # # ---------------------------------------------------------------------------------------------------------------------- # # Hugues THOMAS - 11/06/2018 # import time import numpy as np from os import makedirs from os.path import join, exists # from lib.ply import read_ply, write_ply from kpconv.lib.ply import read_ply, write_ply # ------------------------------------------------------------------------------------------ # # Functions # \***************/ # # def create_3D_rotations(axis, angle): """ Create rotation matrices from a list of axes and angles. Code from wikipedia on quaternions :param axis: float32[N, 3] :param angle: float32[N,] :return: float32[N, 3, 3] """ t1 = np.cos(angle) t2 = 1 - t1 t3 = axis[:, 0] * axis[:, 0] t6 = t2 * axis[:, 0] t7 = t6 * axis[:, 1] t8 = np.sin(angle) t9 = t8 * axis[:, 2] t11 = t6 * axis[:, 2] t12 = t8 * axis[:, 1] t15 = axis[:, 1] * axis[:, 1] t19 = t2 * axis[:, 1] * axis[:, 2] t20 = t8 * axis[:, 0] t24 = axis[:, 2] * axis[:, 2] R = np.stack([t1 + t2 * t3, t7 - t9, t11 + t12, t7 + t9, t1 + t2 * t15, t19 - t20, t11 - t12, t19 + t20, t1 + t2 * t24], axis=1) return np.reshape(R, (-1, 3, 3)) def spherical_Lloyd(radius, num_cells, dimension=3, fixed='center', approximation='monte-carlo', approx_n=5000, max_iter=500, momentum=0.9, verbose=0): """ Creation of kernel point via Lloyd algorithm. We use an approximation of the algorithm, and compute the Voronoi cell centers with discretization of space. The exact formula is not trivial with part of the sphere as sides. :param radius: Radius of the kernels :param num_cells: Number of cell (kernel points) in the Voronoi diagram. :param dimension: dimension of the space :param fixed: fix position of certain kernel points ('none', 'center' or 'verticals') :param approximation: Approximation method for Lloyd's algorithm ('discretization', 'monte-carlo') :param approx_n: Number of point used for approximation. :param max_iter: Maximum nu;ber of iteration for the algorithm. :param momentum: Momentum of the low pass filter smoothing kernel point positions :param verbose: display option :return: points [num_kernels, num_points, dimension] """ ####################### # Parameters definition ####################### # Radius used for optimization (points are rescaled afterwards) radius0 = 1.0 ####################### # Kernel initialization ####################### # Random kernel points (Uniform distribution in a sphere) kernel_points = np.zeros((0, dimension)) while kernel_points.shape[0] < num_cells: new_points = np.random.rand(num_cells, dimension) * 2 * radius0 - radius0 kernel_points = np.vstack((kernel_points, new_points)) d2 = np.sum(np.power(kernel_points, 2), axis=1) kernel_points = kernel_points[np.logical_and(d2 < radius0 ** 2, (0.9 * radius0) ** 2 < d2), :] kernel_points = kernel_points[:num_cells, :].reshape((num_cells, -1)) # Optional fixing if fixed == 'center': kernel_points[0, :] *= 0 if fixed == 'verticals': kernel_points[:3, :] *= 0 kernel_points[1, -1] += 2 * radius0 / 3 kernel_points[2, -1] -= 2 * radius0 / 3 ############################## # Approximation initialization ############################## # Initialize figure if verbose > 1: fig = plt.figure() # Initialize discretization in this method is chosen if approximation == 'discretization': side_n = int(np.floor(approx_n ** (1. / dimension))) dl = 2 * radius0 / side_n coords = np.arange(-radius0 + dl/2, radius0, dl) if dimension == 2: x, y = np.meshgrid(coords, coords) X = np.vstack((np.ravel(x), np.ravel(y))).T elif dimension == 3: x, y, z = np.meshgrid(coords, coords, coords) X = np.vstack((np.ravel(x), np.ravel(y), np.ravel(z))).T elif dimension == 4: x, y, z, t = np.meshgrid(coords, coords, coords, coords) X = np.vstack((np.ravel(x), np.ravel(y), np.ravel(z), np.ravel(t))).T else: raise ValueError('Unsupported dimension (max is 4)') elif approximation == 'monte-carlo': X = np.zeros((0, dimension)) else: raise ValueError('Wrong approximation method chosen: "{:s}"'.format(approximation)) # Only points inside the sphere are used d2 = np.sum(np.power(X, 2), axis=1) X = X[d2 < radius0 * radius0, :] ##################### # Kernel optimization ##################### # Warning if at least one kernel point has no cell warning = False # moving vectors of kernel points saved to detect convergence max_moves = np.zeros((0,)) for iter in range(max_iter): # In the case of monte-carlo, renew the sampled points if approximation == 'monte-carlo': X = np.random.rand(approx_n, dimension) * 2 * radius0 - radius0 d2 = np.sum(np.power(X, 2), axis=1) X = X[d2 < radius0 * radius0, :] # Get the distances matrix [n_approx, K, dim] differences = np.expand_dims(X, 1) - kernel_points sq_distances = np.sum(np.square(differences), axis=2) # Compute cell centers cell_inds = np.argmin(sq_distances, axis=1) centers = [] for c in range(num_cells): bool_c = (cell_inds == c) num_c = np.sum(bool_c.astype(np.int32)) if num_c > 0: centers.append(np.sum(X[bool_c, :], axis=0) / num_c) else: warning = True centers.append(kernel_points[c]) # Update kernel points with low pass filter to smooth mote carlo centers = np.vstack(centers) moves = (1 - momentum) * (centers - kernel_points) kernel_points += moves # Check moves for convergence max_moves = np.append(max_moves, np.max(np.linalg.norm(moves, axis=1))) # Optional fixing if fixed == 'center': kernel_points[0, :] *= 0 if fixed == 'verticals': kernel_points[0, :] *= 0 kernel_points[:3, :-1] *= 0 if verbose: print('iter {:5d} / max move = {:f}'.format(iter, np.max(np.linalg.norm(moves, axis=1)))) if warning: print('{:}WARNING: at least one point has no cell{:}'.format(bcolors.WARNING, bcolors.ENDC)) if verbose > 1: plt.clf() plt.scatter(X[:, 0], X[:, 1], c=cell_inds, s=20.0, marker='.', cmap=plt.get_cmap('tab20')) #plt.scatter(kernel_points[:, 0], kernel_points[:, 1], c=np.arange(num_cells), s=100.0, # marker='+', cmap=plt.get_cmap('tab20')) plt.plot(kernel_points[:, 0], kernel_points[:, 1], 'k+') circle = plt.Circle((0, 0), radius0, color='r', fill=False) fig.axes[0].add_artist(circle) fig.axes[0].set_xlim((-radius0 * 1.1, radius0 * 1.1)) fig.axes[0].set_ylim((-radius0 * 1.1, radius0 * 1.1)) fig.axes[0].set_aspect('equal') plt.draw() plt.pause(0.001) plt.show(block=False) ################### # User verification ################### # Show the convergence to ask user if this kernel is correct if verbose: if dimension == 2: fig, (ax1, ax2) = plt.subplots(1, 2, figsize=[10.4, 4.8]) ax1.plot(max_moves) ax2.scatter(X[:, 0], X[:, 1], c=cell_inds, s=20.0, marker='.', cmap=plt.get_cmap('tab20')) # plt.scatter(kernel_points[:, 0], kernel_points[:, 1], c=np.arange(num_cells), s=100.0, # marker='+', cmap=plt.get_cmap('tab20')) ax2.plot(kernel_points[:, 0], kernel_points[:, 1], 'k+') circle = plt.Circle((0, 0), radius0, color='r', fill=False) ax2.add_artist(circle) ax2.set_xlim((-radius0 * 1.1, radius0 * 1.1)) ax2.set_ylim((-radius0 * 1.1, radius0 * 1.1)) ax2.set_aspect('equal') plt.title('Check if kernel is correct.') plt.draw() plt.show() if dimension > 2: plt.figure() plt.plot(max_moves) plt.title('Check if kernel is correct.') plt.show() # Rescale kernels with real radius return kernel_points * radius def kernel_point_optimization_debug(radius, num_points, num_kernels=1, dimension=3, fixed='center', ratio=0.66, verbose=0): """ Creation of kernel point via optimization of potentials. :param radius: Radius of the kernels :param num_points: points composing kernels :param num_kernels: number of wanted kernels :param dimension: dimension of the space :param fixed: fix position of certain kernel points ('none', 'center' or 'verticals') :param ratio: ratio of the radius where you want the kernels points to be placed :param verbose: display option :return: points [num_kernels, num_points, dimension] """ ####################### # Parameters definition ####################### # Radius used for optimization (points are rescaled afterwards) radius0 = 1 diameter0 = 2 # Factor multiplicating gradients for moving points (~learning rate) moving_factor = 1e-2 continuous_moving_decay = 0.9995 # Gradient threshold to stop optimization thresh = 1e-5 # Gradient clipping value clip = 0.05 * radius0 ####################### # Kernel initialization ####################### # import pdb # pdb.set_trace() # Random kernel points kernel_points = np.random.rand(num_kernels * num_points - 1, dimension) * diameter0 - radius0 while (kernel_points.shape[0] < num_kernels * num_points): new_points = np.random.rand(num_kernels * num_points - 1, dimension) * diameter0 - radius0 kernel_points = np.vstack((kernel_points, new_points)) d2 = np.sum(np.power(kernel_points, 2), axis=1) kernel_points = kernel_points[d2 < 0.5 * radius0 * radius0, :] kernel_points = kernel_points[:num_kernels * num_points, :].reshape((num_kernels, num_points, -1)) # Optionnal fixing if fixed == 'center': kernel_points[:, 0, :] *= 0 if fixed == 'verticals': kernel_points[:, :3, :] *= 0 kernel_points[:, 1, -1] += 2 * radius0 / 3 kernel_points[:, 2, -1] -= 2 * radius0 / 3 ##################### # Kernel optimization ##################### # Initialize figure if verbose>1: fig = plt.figure() saved_gradient_norms = np.zeros((10000, num_kernels)) old_gradient_norms = np.zeros((num_kernels, num_points)) for iter in range(10000): # Compute gradients # ***************** # Derivative of the sum of potentials of all points A = np.expand_dims(kernel_points, axis=2) B = np.expand_dims(kernel_points, axis=1) interd2 = np.sum(np.power(A - B, 2), axis=-1) inter_grads = (A - B) / (np.power(np.expand_dims(interd2, -1), 3/2) + 1e-6) inter_grads = np.sum(inter_grads, axis=1) # Derivative of the radius potential circle_grads = 10*kernel_points # All gradients gradients = inter_grads + circle_grads if fixed == 'verticals': gradients[:, 1:3, :-1] = 0 # Stop condition # ************** # Compute norm of gradients gradients_norms = np.sqrt(np.sum(np.power(gradients, 2), axis=-1)) saved_gradient_norms[iter, :] = np.max(gradients_norms, axis=1) # Stop if all moving points are gradients fixed (low gradients diff) if fixed == 'center' and np.max(np.abs(old_gradient_norms[:, 1:] - gradients_norms[:, 1:])) < thresh: break elif fixed == 'verticals' and np.max(np.abs(old_gradient_norms[:, 3:] - gradients_norms[:, 3:])) < thresh: break elif np.max(np.abs(old_gradient_norms - gradients_norms)) < thresh: break old_gradient_norms = gradients_norms # Move points # *********** # Clip gradient to get moving dists moving_dists = np.minimum(moving_factor * gradients_norms, clip) # Fix central point if fixed == 'center': moving_dists[:, 0] = 0 if fixed == 'verticals': moving_dists[:, 0] = 0 # Move points kernel_points -= np.expand_dims(moving_dists, -1) * gradients / np.expand_dims(gradients_norms + 1e-6, -1) if verbose: print('iter {:5d} / max grad = {:f}'.format(iter, np.max(gradients_norms[:, 3:]))) if verbose > 1: plt.clf() plt.plot(kernel_points[0, :, 0], kernel_points[0, :, 1], '.') circle = plt.Circle((0, 0), radius, color='r', fill=False) fig.axes[0].add_artist(circle) fig.axes[0].set_xlim((-radius*1.1, radius*1.1)) fig.axes[0].set_ylim((-radius*1.1, radius*1.1)) fig.axes[0].set_aspect('equal') plt.draw() plt.pause(0.001) plt.show(block=False) print(moving_factor) # moving factor decay moving_factor *= continuous_moving_decay # Rescale radius to fit the wanted ratio of radius r = np.sqrt(np.sum(np.power(kernel_points, 2), axis=-1)) kernel_points *= ratio / np.mean(r[:, 1:]) # Rescale kernels with real radius return kernel_points * radius, saved_gradient_norms def load_kernels(radius, num_kpoints, dimension, fixed, lloyd=False): # Kernel directory kernel_dir = 'kernels/dispositions' if not exists(kernel_dir): makedirs(kernel_dir) # To many points switch to Lloyds if num_kpoints > 30: lloyd = True # Kernel_file kernel_file = join(kernel_dir, 'k_{:03d}_{:s}_{:d}D.ply'.format(num_kpoints, fixed, dimension)) # Check if already done if not exists(kernel_file): if lloyd: # Create kernels kernel_points = spherical_Lloyd(1.0, num_kpoints, dimension=dimension, fixed=fixed, verbose=0) else: # Create kernels kernel_points, grad_norms = kernel_point_optimization_debug(1.0, num_kpoints, num_kernels=100, dimension=dimension, fixed=fixed, verbose=0) # Find best candidate best_k = np.argmin(grad_norms[-1, :]) # Save points kernel_points = kernel_points[best_k, :, :] write_ply(kernel_file, kernel_points, ['x', 'y', 'z']) else: data = read_ply(kernel_file) kernel_points = np.vstack((data['x'], data['y'], data['z'])).T # Random roations for the kernel # N.B. 4D random rotations not supported yet R = np.eye(dimension) theta = np.random.rand() * 2 * np.pi if dimension == 2: if fixed != 'vertical': c, s = np.cos(theta), np.sin(theta) R = np.array([[c, -s], [s, c]], dtype=np.float32) elif dimension == 3: if fixed != 'vertical': c, s = np.cos(theta), np.sin(theta) R = np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]], dtype=np.float32) else: phi = (np.random.rand() - 0.5) * np.pi # Create the first vector in carthesian coordinates u = np.array([np.cos(theta) * np.cos(phi), np.sin(theta) * np.cos(phi), np.sin(phi)]) # Choose a random rotation angle alpha = np.random.rand() * 2 * np.pi # Create the rotation matrix with this vector and angle R = create_3D_rotations(np.reshape(u, (1, -1)), np.reshape(alpha, (1, -1)))[0] R = R.astype(np.float32) # Add a small noise kernel_points = kernel_points + np.random.normal(scale=0.01, size=kernel_points.shape) # Scale kernels kernel_points = radius * kernel_points # Rotate kernels kernel_points = np.matmul(kernel_points, R) return kernel_points.astype(np.float32) ================================================ FILE: thirdparty/kpconv/kpconv_blocks.py ================================================ # # # 0=================================0 # | Kernel Point Convolutions | # 0=================================0 # # # ---------------------------------------------------------------------------------------------------------------------- # # Define network blocks # # ---------------------------------------------------------------------------------------------------------------------- # # Hugues THOMAS - 06/03/2020 import time import math import torch import torch.nn as nn from torch.nn.parameter import Parameter from torch.nn.init import kaiming_uniform_ # from kernels.kernel_points import load_kernels from kpconv.kernels.kernel_points import load_kernels # from lib.ply import write_ply from kpconv.lib.ply import write_ply def gather(x, idx, method=2): """ implementation of a custom gather operation for faster backwards. :param x: input with shape [N, D_1, ... D_d] :param idx: indexing with shape [n_1, ..., n_m] :param method: Choice of the method :return: x[idx] with shape [n_1, ..., n_m, D_1, ... D_d] """ if method == 0: return x[idx] elif method == 1: x = x.unsqueeze(1) x = x.expand((-1, idx.shape[-1], -1)) idx = idx.unsqueeze(2) idx = idx.expand((-1, -1, x.shape[-1])) return x.gather(0, idx) elif method == 2: for i, ni in enumerate(idx.size()[1:]): x = x.unsqueeze(i+1) new_s = list(x.size()) new_s[i+1] = ni x = x.expand(new_s) n = len(idx.size()) for i, di in enumerate(x.size()[n:]): idx = idx.unsqueeze(i+n) new_s = list(idx.size()) new_s[i+n] = di idx = idx.expand(new_s) return x.gather(0, idx) else: raise ValueError('Unkown method') def radius_gaussian(sq_r, sig, eps=1e-9): """ Compute a radius gaussian (gaussian of distance) :param sq_r: input radiuses [dn, ..., d1, d0] :param sig: extents of gaussians [d1, d0] or [d0] or float :return: gaussian of sq_r [dn, ..., d1, d0] """ return torch.exp(-sq_r / (2 * sig**2 + eps)) def closest_pool(x, inds): """ Pools features from the closest neighbors. WARNING: this function assumes the neighbors are ordered. :param x: [n1, d] features matrix :param inds: [n2, max_num] Only the first column is used for pooling :return: [n2, d] pooled features matrix """ # Add a last row with minimum features for shadow pools x = torch.cat((x, torch.zeros_like(x[:1, :])), 0) # Get features for each pooling location [n2, d] return gather(x, inds[:, 0]) def max_pool(x, inds): """ Pools features with the maximum values. :param x: [n1, d] features matrix :param inds: [n2, max_num] pooling indices :return: [n2, d] pooled features matrix """ # Add a last row with minimum features for shadow pools x = torch.cat((x, torch.zeros_like(x[:1, :])), 0) # Get all features for each pooling location [n2, max_num, d] pool_features = gather(x, inds) # Pool the maximum [n2, d] max_features, _ = torch.max(pool_features, 1) return max_features def global_average(x, batch_lengths): """ Block performing a global average over batch pooling :param x: [N, D] input features :param batch_lengths: [B] list of batch lengths :return: [B, D] averaged features """ # Loop over the clouds of the batch averaged_features = [] i0 = 0 for b_i, length in enumerate(batch_lengths): # Average features for each batch cloud averaged_features.append(torch.mean(x[i0:i0 + length], dim=0)) # Increment for next cloud i0 += length # Average features in each batch return torch.stack(averaged_features) # ---------------------------------------------------------------------------------------------------------------------- # # KPConv class # \******************/ # class KPConv(nn.Module): def __init__(self, kernel_size, p_dim, in_channels, out_channels, KP_extent, radius, fixed_kernel_points='center', KP_influence='linear', aggregation_mode='sum', deformable=False, modulated=False): """ Initialize parameters for KPConvDeformable. :param kernel_size: Number of kernel points. :param p_dim: dimension of the point space. :param in_channels: dimension of input features. :param out_channels: dimension of output features. :param KP_extent: influence radius of each kernel point. :param radius: radius used for kernel point init. Even for deformable, use the config.conv_radius :param fixed_kernel_points: fix position of certain kernel points ('none', 'center' or 'verticals'). :param KP_influence: influence function of the kernel points ('constant', 'linear', 'gaussian'). :param aggregation_mode: choose to sum influences, or only keep the closest ('closest', 'sum'). :param deformable: choose deformable or not :param modulated: choose if kernel weights are modulated in addition to deformed """ super(KPConv, self).__init__() # Save parameters self.K = kernel_size self.p_dim = p_dim self.in_channels = in_channels self.out_channels = out_channels self.radius = radius self.KP_extent = KP_extent self.fixed_kernel_points = fixed_kernel_points self.KP_influence = KP_influence self.aggregation_mode = aggregation_mode self.deformable = deformable self.modulated = modulated # Running variable containing deformed KP distance to input points. (used in regularization loss) self.min_d2 = None self.deformed_KP = None self.offset_features = None # Initialize weights self.weights = Parameter(torch.zeros((self.K, in_channels, out_channels), dtype=torch.float32), requires_grad=True) # Initiate weights for offsets if deformable: if modulated: self.offset_dim = (self.p_dim + 1) * self.K else: self.offset_dim = self.p_dim * self.K self.offset_conv = KPConv(self.K, self.p_dim, self.in_channels, self.offset_dim, KP_extent, radius, fixed_kernel_points=fixed_kernel_points, KP_influence=KP_influence, aggregation_mode=aggregation_mode) self.offset_bias = Parameter(torch.zeros(self.offset_dim, dtype=torch.float32), requires_grad=True) else: self.offset_dim = None self.offset_conv = None self.offset_bias = None # Reset parameters self.reset_parameters() # Initialize kernel points self.kernel_points = self.init_KP() return def reset_parameters(self): kaiming_uniform_(self.weights, a=math.sqrt(5)) if self.deformable: nn.init.zeros_(self.offset_bias) return def init_KP(self): """ Initialize the kernel point positions in a sphere :return: the tensor of kernel points """ # Create one kernel disposition (as numpy array). Choose the KP distance to center thanks to the KP extent K_points_numpy = load_kernels(self.radius, self.K, dimension=self.p_dim, fixed=self.fixed_kernel_points) return Parameter(torch.tensor(K_points_numpy, dtype=torch.float32), requires_grad=False) def forward(self, q_pts, s_pts, neighb_inds, x): ################### # Offset generation ################### if self.deformable: # Get offsets with a KPConv that only takes part of the features self.offset_features = self.offset_conv(q_pts, s_pts, neighb_inds, x) + self.offset_bias if self.modulated: # Get offset (in normalized scale) from features unscaled_offsets = self.offset_features[:, :self.p_dim * self.K] unscaled_offsets = unscaled_offsets.view(-1, self.K, self.p_dim) # Get modulations modulations = 2 * torch.sigmoid(self.offset_features[:, self.p_dim * self.K:]) else: # Get offset (in normalized scale) from features unscaled_offsets = self.offset_features.view(-1, self.K, self.p_dim) # No modulations modulations = None # Rescale offset for this layer offsets = unscaled_offsets * self.KP_extent else: offsets = None modulations = None ###################### # Deformed convolution ###################### # Add a fake point in the last row for shadow neighbors s_pts = torch.cat((s_pts, torch.zeros_like(s_pts[:1, :]) + 1e6), 0) # Get neighbor points [n_points, n_neighbors, dim] neighbors = s_pts[neighb_inds, :] # Center every neighborhood neighbors = neighbors - q_pts.unsqueeze(1) # Apply offsets to kernel points [n_points, n_kpoints, dim] if self.deformable: self.deformed_KP = offsets + self.kernel_points deformed_K_points = self.deformed_KP.unsqueeze(1) else: deformed_K_points = self.kernel_points # Get all difference matrices [n_points, n_neighbors, n_kpoints, dim] neighbors.unsqueeze_(2) differences = neighbors - deformed_K_points # Get the square distances [n_points, n_neighbors, n_kpoints] sq_distances = torch.sum(differences ** 2, dim=3) # Optimization by ignoring points outside a deformed KP range if self.deformable: # Save distances for loss self.min_d2, _ = torch.min(sq_distances, dim=1) # Boolean of the neighbors in range of a kernel point [n_points, n_neighbors] in_range = torch.any(sq_distances < self.KP_extent ** 2, dim=2).type(torch.int32) # New value of max neighbors new_max_neighb = torch.max(torch.sum(in_range, dim=1)) # For each row of neighbors, indices of the ones that are in range [n_points, new_max_neighb] neighb_row_bool, neighb_row_inds = torch.topk(in_range, new_max_neighb.item(), dim=1) # Gather new neighbor indices [n_points, new_max_neighb] new_neighb_inds = neighb_inds.gather(1, neighb_row_inds, sparse_grad=False) # Gather new distances to KP [n_points, new_max_neighb, n_kpoints] neighb_row_inds.unsqueeze_(2) neighb_row_inds = neighb_row_inds.expand(-1, -1, self.K) sq_distances = sq_distances.gather(1, neighb_row_inds, sparse_grad=False) # New shadow neighbors have to point to the last shadow point new_neighb_inds *= neighb_row_bool new_neighb_inds -= (neighb_row_bool.type(torch.int64) - 1) * int(s_pts.shape[0] - 1) else: new_neighb_inds = neighb_inds # Get Kernel point influences [n_points, n_kpoints, n_neighbors] if self.KP_influence == 'constant': # Every point get an influence of 1. all_weights = torch.ones_like(sq_distances) all_weights = torch.transpose(all_weights, 1, 2) elif self.KP_influence == 'linear': # Influence decrease linearly with the distance, and get to zero when d = KP_extent. all_weights = torch.clamp(1 - torch.sqrt(sq_distances) / self.KP_extent, min=0.0) all_weights = torch.transpose(all_weights, 1, 2) elif self.KP_influence == 'gaussian': # Influence in gaussian of the distance. sigma = self.KP_extent * 0.3 all_weights = radius_gaussian(sq_distances, sigma) all_weights = torch.transpose(all_weights, 1, 2) else: raise ValueError('Unknown influence function type (config.KP_influence)') # In case of closest mode, only the closest KP can influence each point if self.aggregation_mode == 'closest': neighbors_1nn = torch.argmin(sq_distances, dim=2) all_weights *= torch.transpose(nn.functional.one_hot(neighbors_1nn, self.K), 1, 2) elif self.aggregation_mode != 'sum': raise ValueError("Unknown convolution mode. Should be 'closest' or 'sum'") # Add a zero feature for shadow neighbors x = torch.cat((x, torch.zeros_like(x[:1, :])), 0) # Get the features of each neighborhood [n_points, n_neighbors, in_fdim] neighb_x = gather(x, new_neighb_inds) # Apply distance weights [n_points, n_kpoints, in_fdim] weighted_features = torch.matmul(all_weights, neighb_x) # Apply modulations if self.deformable and self.modulated: weighted_features *= modulations.unsqueeze(2) # Apply network weights [n_kpoints, n_points, out_fdim] weighted_features = weighted_features.permute((1, 0, 2)) kernel_outputs = torch.matmul(weighted_features, self.weights) # Convolution sum [n_points, out_fdim] # return torch.sum(kernel_outputs, dim=0) output_features = torch.sum(kernel_outputs, dim=0, keepdim=False) # normalization term. neighbor_features_sum = torch.sum(neighb_x, dim=-1) neighbor_num = torch.sum(torch.gt(neighbor_features_sum, 0.0), dim=-1) neighbor_num = torch.max(neighbor_num, torch.ones_like(neighbor_num)) output_features = output_features / neighbor_num.unsqueeze(1) return output_features def __repr__(self): return 'KPConv(radius: {:.2f}, extent: {:.2f}, in_feat: {:d}, out_feat: {:d})'.format(self.radius, self.KP_extent, self.in_channels, self.out_channels) # ---------------------------------------------------------------------------------------------------------------------- # # Complex blocks # \********************/ # def block_decider(block_name, radius, in_dim, out_dim, layer_ind, config): if block_name == 'unary': return UnaryBlock(in_dim, out_dim, config.use_batch_norm, config.batch_norm_momentum) if block_name == 'last_unary': return LastUnaryBlock(in_dim, config.final_feats_dim+2, config.use_batch_norm, config.batch_norm_momentum) if block_name == 'last_unary_v2': return LastUnaryBlock(in_dim, config.final_feats_dim+config.keypoint_num+1, config.use_batch_norm, config.batch_norm_momentum) # if block_name == 'last_unary_pose': # return LastUnaryBlock(in_dim, 7+1, config.use_batch_norm, config.batch_norm_momentum) elif block_name in ['simple', 'simple_deformable', 'simple_invariant', 'simple_equivariant', 'simple_strided', 'simple_deformable_strided', 'simple_invariant_strided', 'simple_equivariant_strided']: return SimpleBlock(block_name, in_dim, out_dim, radius, layer_ind, config) elif block_name in ['resnetb', 'resnetb_invariant', 'resnetb_equivariant', 'resnetb_deformable', 'resnetb_strided', 'resnetb_deformable_strided', 'resnetb_equivariant_strided', 'resnetb_invariant_strided']: return ResnetBottleneckBlock(block_name, in_dim, out_dim, radius, layer_ind, config) elif block_name == 'max_pool' or block_name == 'max_pool_wide': return MaxPoolBlock(layer_ind) elif block_name == 'global_average': return GlobalAverageBlock() elif block_name == 'nearest_upsample': return NearestUpsampleBlock(layer_ind) else: raise ValueError('Unknown block name in the architecture definition : ' + block_name) class BatchNormBlock(nn.Module): def __init__(self, in_dim, use_bn, bn_momentum): """ Initialize a batch normalization block. If network does not use batch normalization, replace with biases. :param in_dim: dimension input features :param use_bn: boolean indicating if we use Batch Norm :param bn_momentum: Batch norm momentum """ super(BatchNormBlock, self).__init__() self.bn_momentum = bn_momentum self.use_bn = use_bn self.in_dim = in_dim if self.use_bn: #self.batch_norm = nn.BatchNorm1d(in_dim, momentum=bn_momentum) self.batch_norm = nn.InstanceNorm1d(in_dim, momentum=bn_momentum) else: self.bias = Parameter(torch.zeros(in_dim, dtype=torch.float32), requires_grad=True) return def reset_parameters(self): nn.init.zeros_(self.bias) def forward(self, x): if self.use_bn: x = x.unsqueeze(2) x = x.transpose(0, 2) x = self.batch_norm(x) x = x.transpose(0, 2) return x.squeeze() else: return x + self.bias def __repr__(self): return 'BatchNormBlock(in_feat: {:d}, momentum: {:.3f}, only_bias: {:s})'.format(self.in_dim, self.bn_momentum, str(not self.use_bn)) class UnaryBlock(nn.Module): def __init__(self, in_dim, out_dim, use_bn, bn_momentum, no_relu=False): """ Initialize a standard unary block with its ReLU and BatchNorm. :param in_dim: dimension input features :param out_dim: dimension input features :param use_bn: boolean indicating if we use Batch Norm :param bn_momentum: Batch norm momentum """ super(UnaryBlock, self).__init__() self.bn_momentum = bn_momentum self.use_bn = use_bn self.no_relu = no_relu self.in_dim = in_dim self.out_dim = out_dim self.mlp = nn.Linear(in_dim, out_dim, bias=False) self.batch_norm = BatchNormBlock(out_dim, self.use_bn, self.bn_momentum) if not no_relu: self.leaky_relu = nn.LeakyReLU(0.1) return def forward(self, x, batch=None): x = self.mlp(x) x = self.batch_norm(x) if not self.no_relu: x = self.leaky_relu(x) return x def __repr__(self): return 'UnaryBlock(in_feat: {:d}, out_feat: {:d}, BN: {:s}, ReLU: {:s})'.format(self.in_dim, self.out_dim, str(self.use_bn), str(not self.no_relu)) class LastUnaryBlock(nn.Module): def __init__(self, in_dim, out_dim, use_bn, bn_momentum, no_relu=False): """ Initialize a standard last_unary block without BN, ReLU. :param in_dim: dimension input features :param out_dim: dimension input features :param use_bn: boolean indicating if we use Batch Norm :param bn_momentum: Batch norm momentum """ super(LastUnaryBlock, self).__init__() self.in_dim = in_dim self.out_dim = out_dim self.mlp = nn.Linear(in_dim, out_dim, bias=False) return def forward(self, x, batch=None): x = self.mlp(x) return x def __repr__(self): return 'LastUnaryBlock(in_feat: {:d}, out_feat: {:d})'.format(self.in_dim, self.out_dim) class SimpleBlock(nn.Module): def __init__(self, block_name, in_dim, out_dim, radius, layer_ind, config): """ Initialize a simple convolution block with its ReLU and BatchNorm. :param in_dim: dimension input features :param out_dim: dimension input features :param radius: current radius of convolution :param config: parameters """ super(SimpleBlock, self).__init__() # get KP_extent from current radius current_extent = radius * config.KP_extent / config.conv_radius # Get other parameters self.bn_momentum = config.batch_norm_momentum self.use_bn = config.use_batch_norm self.layer_ind = layer_ind self.block_name = block_name self.in_dim = in_dim self.out_dim = out_dim # Define the KPConv class self.KPConv = KPConv(config.num_kernel_points, config.in_points_dim, in_dim, out_dim // 2, current_extent, radius, fixed_kernel_points=config.fixed_kernel_points, KP_influence=config.KP_influence, aggregation_mode=config.aggregation_mode, deformable='deform' in block_name, modulated=config.modulated) # Other opperations self.batch_norm = BatchNormBlock(out_dim // 2, self.use_bn, self.bn_momentum) self.leaky_relu = nn.LeakyReLU(0.1) return def forward(self, x, batch): if 'strided' in self.block_name: q_pts = batch['points'][self.layer_ind + 1] s_pts = batch['points'][self.layer_ind] neighb_inds = batch['pools'][self.layer_ind] else: q_pts = batch['points'][self.layer_ind] s_pts = batch['points'][self.layer_ind] neighb_inds = batch['neighbors'][self.layer_ind] x = self.KPConv(q_pts, s_pts, neighb_inds, x) return self.leaky_relu(self.batch_norm(x)) class ResnetBottleneckBlock(nn.Module): def __init__(self, block_name, in_dim, out_dim, radius, layer_ind, config): """ Initialize a resnet bottleneck block. :param in_dim: dimension input features :param out_dim: dimension input features :param radius: current radius of convolution :param config: parameters """ super(ResnetBottleneckBlock, self).__init__() # get KP_extent from current radius current_extent = radius * config.KP_extent / config.conv_radius # Get other parameters self.bn_momentum = config.batch_norm_momentum self.use_bn = config.use_batch_norm self.block_name = block_name self.layer_ind = layer_ind self.in_dim = in_dim self.out_dim = out_dim # First downscaling mlp if in_dim != out_dim // 4: self.unary1 = UnaryBlock(in_dim, out_dim // 4, self.use_bn, self.bn_momentum) else: self.unary1 = nn.Identity() # KPConv block self.KPConv = KPConv(config.num_kernel_points, config.in_points_dim, out_dim // 4, out_dim // 4, current_extent, radius, fixed_kernel_points=config.fixed_kernel_points, KP_influence=config.KP_influence, aggregation_mode=config.aggregation_mode, deformable='deform' in block_name, modulated=config.modulated) self.batch_norm_conv = BatchNormBlock(out_dim // 4, self.use_bn, self.bn_momentum) # Second upscaling mlp self.unary2 = UnaryBlock(out_dim // 4, out_dim, self.use_bn, self.bn_momentum, no_relu=True) # Shortcut optional mpl if in_dim != out_dim: self.unary_shortcut = UnaryBlock(in_dim, out_dim, self.use_bn, self.bn_momentum, no_relu=True) else: self.unary_shortcut = nn.Identity() # Other operations self.leaky_relu = nn.LeakyReLU(0.1) return def forward(self, features, batch): if 'strided' in self.block_name: q_pts = batch['points'][self.layer_ind + 1] s_pts = batch['points'][self.layer_ind] neighb_inds = batch['pools'][self.layer_ind] else: q_pts = batch['points'][self.layer_ind] s_pts = batch['points'][self.layer_ind] neighb_inds = batch['neighbors'][self.layer_ind] # First downscaling mlp x = self.unary1(features) # Convolution x = self.KPConv(q_pts, s_pts, neighb_inds, x) x = self.leaky_relu(self.batch_norm_conv(x)) # Second upscaling mlp x = self.unary2(x) # Shortcut if 'strided' in self.block_name: shortcut = max_pool(features, neighb_inds) else: shortcut = features shortcut = self.unary_shortcut(shortcut) return self.leaky_relu(x + shortcut) class GlobalAverageBlock(nn.Module): def __init__(self): """ Initialize a global average block with its ReLU and BatchNorm. """ super(GlobalAverageBlock, self).__init__() return def forward(self, x, batch): return global_average(x, batch['stack_lengths'][-1]) class NearestUpsampleBlock(nn.Module): def __init__(self, layer_ind): """ Initialize a nearest upsampling block with its ReLU and BatchNorm. """ super(NearestUpsampleBlock, self).__init__() self.layer_ind = layer_ind return def forward(self, x, batch): return closest_pool(x, batch['upsamples'][self.layer_ind - 1]) def __repr__(self): return 'NearestUpsampleBlock(layer: {:d} -> {:d})'.format(self.layer_ind, self.layer_ind - 1) class MaxPoolBlock(nn.Module): def __init__(self, layer_ind): """ Initialize a max pooling block with its ReLU and BatchNorm. """ super(MaxPoolBlock, self).__init__() self.layer_ind = layer_ind return def forward(self, x, batch): return max_pool(x, batch['pools'][self.layer_ind + 1]) ================================================ FILE: thirdparty/kpconv/lib/__init__.py ================================================ ================================================ FILE: thirdparty/kpconv/lib/ply.py ================================================ # # # 0===============================0 # | PLY files reader/writer | # 0===============================0 # # # ---------------------------------------------------------------------------------------------------------------------- # # function to read/write .ply files # # ---------------------------------------------------------------------------------------------------------------------- # # Hugues THOMAS - 10/02/2017 # # ---------------------------------------------------------------------------------------------------------------------- # # Imports and global variables # \**********************************/ # # Basic libs import numpy as np import sys # Define PLY types ply_dtypes = dict([ (b'int8', 'i1'), (b'char', 'i1'), (b'uint8', 'u1'), (b'uchar', 'u1'), (b'int16', 'i2'), (b'short', 'i2'), (b'uint16', 'u2'), (b'ushort', 'u2'), (b'int32', 'i4'), (b'int', 'i4'), (b'uint32', 'u4'), (b'uint', 'u4'), (b'float32', 'f4'), (b'float', 'f4'), (b'float64', 'f8'), (b'double', 'f8') ]) # Numpy reader format valid_formats = {'ascii': '', 'binary_big_endian': '>', 'binary_little_endian': '<'} # ---------------------------------------------------------------------------------------------------------------------- # # Functions # \***************/ # def parse_header(plyfile, ext): # Variables line = [] properties = [] num_points = None while b'end_header' not in line and line != b'': line = plyfile.readline() if b'element' in line: line = line.split() num_points = int(line[2]) elif b'property' in line: line = line.split() properties.append((line[2].decode(), ext + ply_dtypes[line[1]])) return num_points, properties def parse_mesh_header(plyfile, ext): # Variables line = [] vertex_properties = [] num_points = None num_faces = None current_element = None while b'end_header' not in line and line != b'': line = plyfile.readline() # Find point element if b'element vertex' in line: current_element = 'vertex' line = line.split() num_points = int(line[2]) elif b'element face' in line: current_element = 'face' line = line.split() num_faces = int(line[2]) elif b'property' in line: if current_element == 'vertex': line = line.split() vertex_properties.append((line[2].decode(), ext + ply_dtypes[line[1]])) elif current_element == 'vertex': if not line.startswith('property list uchar int'): raise ValueError('Unsupported faces property : ' + line) return num_points, num_faces, vertex_properties def read_ply(filename, triangular_mesh=False): """ Read ".ply" files Parameters ---------- filename : string the name of the file to read. Returns ------- result : array data stored in the file Examples -------- Store data in file >>> points = np.random.rand(5, 3) >>> values = np.random.randint(2, size=10) >>> write_ply('example.ply', [points, values], ['x', 'y', 'z', 'values']) Read the file >>> data = read_ply('example.ply') >>> values = data['values'] array([0, 0, 1, 1, 0]) >>> points = np.vstack((data['x'], data['y'], data['z'])).T array([[ 0.466 0.595 0.324] [ 0.538 0.407 0.654] [ 0.850 0.018 0.988] [ 0.395 0.394 0.363] [ 0.873 0.996 0.092]]) """ with open(filename, 'rb') as plyfile: # Check if the file start with ply if b'ply' not in plyfile.readline(): raise ValueError('The file does not start whith the word ply') # get binary_little/big or ascii fmt = plyfile.readline().split()[1].decode() if fmt == "ascii": raise ValueError('The file is not binary') # get extension for building the numpy dtypes ext = valid_formats[fmt] # PointCloud reader vs mesh reader if triangular_mesh: # Parse header num_points, num_faces, properties = parse_mesh_header(plyfile, ext) # Get point data vertex_data = np.fromfile(plyfile, dtype=properties, count=num_points) # Get face data face_properties = [('k', ext + 'u1'), ('v1', ext + 'i4'), ('v2', ext + 'i4'), ('v3', ext + 'i4')] faces_data = np.fromfile(plyfile, dtype=face_properties, count=num_faces) # Return vertex data and concatenated faces faces = np.vstack((faces_data['v1'], faces_data['v2'], faces_data['v3'])).T data = [vertex_data, faces] else: # Parse header num_points, properties = parse_header(plyfile, ext) # Get data data = np.fromfile(plyfile, dtype=properties, count=num_points) return data def header_properties(field_list, field_names): # List of lines to write lines = [] # First line describing element vertex lines.append('element vertex %d' % field_list[0].shape[0]) # Properties lines i = 0 for fields in field_list: for field in fields.T: lines.append('property %s %s' % (field.dtype.name, field_names[i])) i += 1 return lines def write_ply(filename, field_list, field_names, triangular_faces=None): """ Write ".ply" files Parameters ---------- filename : string the name of the file to which the data is saved. A '.ply' extension will be appended to the file name if it does no already have one. field_list : list, tuple, numpy array the fields to be saved in the ply file. Either a numpy array, a list of numpy arrays or a tuple of numpy arrays. Each 1D numpy array and each column of 2D numpy arrays are considered as one field. field_names : list the name of each fields as a list of strings. Has to be the same length as the number of fields. Examples -------- >>> points = np.random.rand(10, 3) >>> write_ply('example1.ply', points, ['x', 'y', 'z']) >>> values = np.random.randint(2, size=10) >>> write_ply('example2.ply', [points, values], ['x', 'y', 'z', 'values']) >>> colors = np.random.randint(255, size=(10,3), dtype=np.uint8) >>> field_names = ['x', 'y', 'z', 'red', 'green', 'blue', values'] >>> write_ply('example3.ply', [points, colors, values], field_names) """ # Format list input to the right form field_list = list(field_list) if (type(field_list) == list or type(field_list) == tuple) else list((field_list,)) for i, field in enumerate(field_list): if field.ndim < 2: field_list[i] = field.reshape(-1, 1) if field.ndim > 2: print('fields have more than 2 dimensions') return False # check all fields have the same number of data n_points = [field.shape[0] for field in field_list] if not np.all(np.equal(n_points, n_points[0])): print('wrong field dimensions') return False # Check if field_names and field_list have same nb of column n_fields = np.sum([field.shape[1] for field in field_list]) if (n_fields != len(field_names)): print('wrong number of field names') return False # Add extension if not there if not filename.endswith('.ply'): filename += '.ply' # open in text mode to write the header with open(filename, 'w') as plyfile: # First magical word header = ['ply'] # Encoding format header.append('format binary_' + sys.byteorder + '_endian 1.0') # Points properties description header.extend(header_properties(field_list, field_names)) # Add faces if needded if triangular_faces is not None: header.append('element face {:d}'.format(triangular_faces.shape[0])) header.append('property list uchar int vertex_indices') # End of header header.append('end_header') # Write all lines for line in header: plyfile.write("%s\n" % line) # open in binary/append to use tofile with open(filename, 'ab') as plyfile: # Create a structured array i = 0 type_list = [] for fields in field_list: for field in fields.T: type_list += [(field_names[i], field.dtype.str)] i += 1 data = np.empty(field_list[0].shape[0], dtype=type_list) i = 0 for fields in field_list: for field in fields.T: data[field_names[i]] = field i += 1 data.tofile(plyfile) if triangular_faces is not None: triangular_faces = triangular_faces.astype(np.int32) type_list = [('k', 'uint8')] + [(str(ind), 'int32') for ind in range(3)] data = np.empty(triangular_faces.shape[0], dtype=type_list) data['k'] = np.full((triangular_faces.shape[0],), 3, dtype=np.uint8) data['0'] = triangular_faces[:, 0] data['1'] = triangular_faces[:, 1] data['2'] = triangular_faces[:, 2] data.tofile(plyfile) return True def describe_element(name, df): """ Takes the columns of the dataframe and builds a ply-like description Parameters ---------- name: str df: pandas DataFrame Returns ------- element: list[str] """ property_formats = {'f': 'float', 'u': 'uchar', 'i': 'int'} element = ['element ' + name + ' ' + str(len(df))] if name == 'face': element.append("property list uchar int points_indices") else: for i in range(len(df.columns)): # get first letter of dtype to infer format f = property_formats[str(df.dtypes[i])[0]] element.append('property ' + f + ' ' + df.columns.values[i]) return element ================================================ FILE: thirdparty/kpconv/lib/timer.py ================================================ import time class AverageMeter(object): """Computes and stores the average and current value""" def __init__(self): self.reset() def reset(self): self.val = 0 self.avg = 0 self.sum = 0.0 self.sq_sum = 0.0 self.count = 0 def update(self, val, n=1): self.val = val self.sum += val * n self.count += n self.avg = self.sum / self.count self.sq_sum += val ** 2 * n self.var = self.sq_sum / self.count - self.avg ** 2 class Timer(object): """A simple timer.""" def __init__(self): self.total_time = 0. self.calls = 0 self.start_time = 0. self.diff = 0. self.avg = 0. def reset(self): self.total_time = 0 self.calls = 0 self.start_time = 0 self.diff = 0 self.avg = 0 def tic(self): # using time.time instead of time.clock because time time.clock # does not normalize for multithreading self.start_time = time.time() def toc(self, average=True): self.diff = time.time() - self.start_time self.total_time += self.diff self.calls += 1 self.avg = self.total_time / self.calls if average: return self.avg else: return self.diff ================================================ FILE: thirdparty/kpconv/lib/utils.py ================================================ """ General utility functions Author: Shengyu Huang Last modified: 30.11.2020 """ import os,re,sys,json,yaml,random, argparse, torch, pickle import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import numpy as np from scipy.spatial.transform import Rotation from sklearn.neighbors import NearestNeighbors from scipy.spatial.distance import minkowski _EPS = 1e-7 # To prevent division by zero class Logger: def __init__(self, path): self.path = path self.fw = open(self.path+'/log','a') def write(self, text): self.fw.write(text) self.fw.flush() def close(self): self.fw.close() def save_obj(obj, path ): """ save a dictionary to a pickle file """ with open(path, 'wb') as f: pickle.dump(obj, f) def load_obj(path): """ read a dictionary from a pickle file """ with open(path, 'rb') as f: return pickle.load(f) def load_config(path): """ Loads config file: Args: path (str): path to the config file Returns: config (dict): dictionary of the configuration parameters """ with open(path,'r') as f: cfg = yaml.safe_load(f) return cfg def setup_seed(seed): """ fix random seed for deterministic training """ torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) np.random.seed(seed) random.seed(seed) torch.backends.cudnn.deterministic = True def square_distance(src, dst, normalised = False): """ Calculate Euclid distance between each two points. Args: src: source points, [B, N, C] dst: target points, [B, M, C] Returns: dist: per-point square distance, [B, N, M] """ # print(src.shape, dst.shape, '!!!',flush=True) B, N, _ = src.shape _, M, _ = dst.shape dist = -2 * torch.matmul(src, dst.permute(0, 2, 1)) if(normalised): dist += 2 else: dist += torch.sum(src ** 2, dim=-1)[:, :, None] dist += torch.sum(dst ** 2, dim=-1)[:, None, :] dist = torch.clamp(dist, min=1e-12, max=None) return dist def validate_gradient(model): """ Confirm all the gradients are non-nan and non-inf """ for name, param in model.named_parameters(): if param.grad is not None: if torch.any(torch.isnan(param.grad)): return False if torch.any(torch.isinf(param.grad)): return False return True def natural_key(string_): """ Sort strings by numbers in the name """ return [int(s) if s.isdigit() else s for s in re.split(r'(\d+)', string_)] ================================================ FILE: thirdparty/nn/_ext.c ================================================ #define _CFFI_ /* We try to define Py_LIMITED_API before including Python.h. Mess: we can only define it if Py_DEBUG, Py_TRACE_REFS and Py_REF_DEBUG are not defined. This is a best-effort approximation: we can learn about Py_DEBUG from pyconfig.h, but it is unclear if the same works for the other two macros. Py_DEBUG implies them, but not the other way around. Issue #350 is still open: on Windows, the code here causes it to link with PYTHON36.DLL (for example) instead of PYTHON3.DLL. A fix was attempted in 164e526a5515 and 14ce6985e1c3, but reverted: virtualenv does not make PYTHON3.DLL available, and so the "correctly" compiled version would not run inside a virtualenv. We will re-apply the fix after virtualenv has been fixed for some time. For explanation, see issue #355. For a workaround if you want PYTHON3.DLL and don't worry about virtualenv, see issue #350. See also 'py_limited_api' in setuptools_ext.py. */ #if !defined(_CFFI_USE_EMBEDDING) && !defined(Py_LIMITED_API) # include # if !defined(Py_DEBUG) && !defined(Py_TRACE_REFS) && !defined(Py_REF_DEBUG) # define Py_LIMITED_API # endif #endif #include #ifdef __cplusplus extern "C" { #endif #include /* This part is from file 'cffi/parse_c_type.h'. It is copied at the beginning of C sources generated by CFFI's ffi.set_source(). */ typedef void *_cffi_opcode_t; #define _CFFI_OP(opcode, arg) (_cffi_opcode_t)(opcode | (((uintptr_t)(arg)) << 8)) #define _CFFI_GETOP(cffi_opcode) ((unsigned char)(uintptr_t)cffi_opcode) #define _CFFI_GETARG(cffi_opcode) (((intptr_t)cffi_opcode) >> 8) #define _CFFI_OP_PRIMITIVE 1 #define _CFFI_OP_POINTER 3 #define _CFFI_OP_ARRAY 5 #define _CFFI_OP_OPEN_ARRAY 7 #define _CFFI_OP_STRUCT_UNION 9 #define _CFFI_OP_ENUM 11 #define _CFFI_OP_FUNCTION 13 #define _CFFI_OP_FUNCTION_END 15 #define _CFFI_OP_NOOP 17 #define _CFFI_OP_BITFIELD 19 #define _CFFI_OP_TYPENAME 21 #define _CFFI_OP_CPYTHON_BLTN_V 23 // varargs #define _CFFI_OP_CPYTHON_BLTN_N 25 // noargs #define _CFFI_OP_CPYTHON_BLTN_O 27 // O (i.e. a single arg) #define _CFFI_OP_CONSTANT 29 #define _CFFI_OP_CONSTANT_INT 31 #define _CFFI_OP_GLOBAL_VAR 33 #define _CFFI_OP_DLOPEN_FUNC 35 #define _CFFI_OP_DLOPEN_CONST 37 #define _CFFI_OP_GLOBAL_VAR_F 39 #define _CFFI_OP_EXTERN_PYTHON 41 #define _CFFI_PRIM_VOID 0 #define _CFFI_PRIM_BOOL 1 #define _CFFI_PRIM_CHAR 2 #define _CFFI_PRIM_SCHAR 3 #define _CFFI_PRIM_UCHAR 4 #define _CFFI_PRIM_SHORT 5 #define _CFFI_PRIM_USHORT 6 #define _CFFI_PRIM_INT 7 #define _CFFI_PRIM_UINT 8 #define _CFFI_PRIM_LONG 9 #define _CFFI_PRIM_ULONG 10 #define _CFFI_PRIM_LONGLONG 11 #define _CFFI_PRIM_ULONGLONG 12 #define _CFFI_PRIM_FLOAT 13 #define _CFFI_PRIM_DOUBLE 14 #define _CFFI_PRIM_LONGDOUBLE 15 #define _CFFI_PRIM_WCHAR 16 #define _CFFI_PRIM_INT8 17 #define _CFFI_PRIM_UINT8 18 #define _CFFI_PRIM_INT16 19 #define _CFFI_PRIM_UINT16 20 #define _CFFI_PRIM_INT32 21 #define _CFFI_PRIM_UINT32 22 #define _CFFI_PRIM_INT64 23 #define _CFFI_PRIM_UINT64 24 #define _CFFI_PRIM_INTPTR 25 #define _CFFI_PRIM_UINTPTR 26 #define _CFFI_PRIM_PTRDIFF 27 #define _CFFI_PRIM_SIZE 28 #define _CFFI_PRIM_SSIZE 29 #define _CFFI_PRIM_INT_LEAST8 30 #define _CFFI_PRIM_UINT_LEAST8 31 #define _CFFI_PRIM_INT_LEAST16 32 #define _CFFI_PRIM_UINT_LEAST16 33 #define _CFFI_PRIM_INT_LEAST32 34 #define _CFFI_PRIM_UINT_LEAST32 35 #define _CFFI_PRIM_INT_LEAST64 36 #define _CFFI_PRIM_UINT_LEAST64 37 #define _CFFI_PRIM_INT_FAST8 38 #define _CFFI_PRIM_UINT_FAST8 39 #define _CFFI_PRIM_INT_FAST16 40 #define _CFFI_PRIM_UINT_FAST16 41 #define _CFFI_PRIM_INT_FAST32 42 #define _CFFI_PRIM_UINT_FAST32 43 #define _CFFI_PRIM_INT_FAST64 44 #define _CFFI_PRIM_UINT_FAST64 45 #define _CFFI_PRIM_INTMAX 46 #define _CFFI_PRIM_UINTMAX 47 #define _CFFI_PRIM_FLOATCOMPLEX 48 #define _CFFI_PRIM_DOUBLECOMPLEX 49 #define _CFFI_PRIM_CHAR16 50 #define _CFFI_PRIM_CHAR32 51 #define _CFFI__NUM_PRIM 52 #define _CFFI__UNKNOWN_PRIM (-1) #define _CFFI__UNKNOWN_FLOAT_PRIM (-2) #define _CFFI__UNKNOWN_LONG_DOUBLE (-3) #define _CFFI__IO_FILE_STRUCT (-1) struct _cffi_global_s { const char *name; void *address; _cffi_opcode_t type_op; void *size_or_direct_fn; // OP_GLOBAL_VAR: size, or 0 if unknown // OP_CPYTHON_BLTN_*: addr of direct function }; struct _cffi_getconst_s { unsigned long long value; const struct _cffi_type_context_s *ctx; int gindex; }; struct _cffi_struct_union_s { const char *name; int type_index; // -> _cffi_types, on a OP_STRUCT_UNION int flags; // _CFFI_F_* flags below size_t size; int alignment; int first_field_index; // -> _cffi_fields array int num_fields; }; #define _CFFI_F_UNION 0x01 // is a union, not a struct #define _CFFI_F_CHECK_FIELDS 0x02 // complain if fields are not in the // "standard layout" or if some are missing #define _CFFI_F_PACKED 0x04 // for CHECK_FIELDS, assume a packed struct #define _CFFI_F_EXTERNAL 0x08 // in some other ffi.include() #define _CFFI_F_OPAQUE 0x10 // opaque struct _cffi_field_s { const char *name; size_t field_offset; size_t field_size; _cffi_opcode_t field_type_op; }; struct _cffi_enum_s { const char *name; int type_index; // -> _cffi_types, on a OP_ENUM int type_prim; // _CFFI_PRIM_xxx const char *enumerators; // comma-delimited string }; struct _cffi_typename_s { const char *name; int type_index; /* if opaque, points to a possibly artificial OP_STRUCT which is itself opaque */ }; struct _cffi_type_context_s { _cffi_opcode_t *types; const struct _cffi_global_s *globals; const struct _cffi_field_s *fields; const struct _cffi_struct_union_s *struct_unions; const struct _cffi_enum_s *enums; const struct _cffi_typename_s *typenames; int num_globals; int num_struct_unions; int num_enums; int num_typenames; const char *const *includes; int num_types; int flags; /* future extension */ }; struct _cffi_parse_info_s { const struct _cffi_type_context_s *ctx; _cffi_opcode_t *output; unsigned int output_size; size_t error_location; const char *error_message; }; struct _cffi_externpy_s { const char *name; size_t size_of_result; void *reserved1, *reserved2; }; #ifdef _CFFI_INTERNAL static int parse_c_type(struct _cffi_parse_info_s *info, const char *input); static int search_in_globals(const struct _cffi_type_context_s *ctx, const char *search, size_t search_len); static int search_in_struct_unions(const struct _cffi_type_context_s *ctx, const char *search, size_t search_len); #endif /* this block of #ifs should be kept exactly identical between c/_cffi_backend.c, cffi/vengine_cpy.py, cffi/vengine_gen.py and cffi/_cffi_include.h */ #if defined(_MSC_VER) # include /* for alloca() */ # if _MSC_VER < 1600 /* MSVC < 2010 */ typedef __int8 int8_t; typedef __int16 int16_t; typedef __int32 int32_t; typedef __int64 int64_t; typedef unsigned __int8 uint8_t; typedef unsigned __int16 uint16_t; typedef unsigned __int32 uint32_t; typedef unsigned __int64 uint64_t; typedef __int8 int_least8_t; typedef __int16 int_least16_t; typedef __int32 int_least32_t; typedef __int64 int_least64_t; typedef unsigned __int8 uint_least8_t; typedef unsigned __int16 uint_least16_t; typedef unsigned __int32 uint_least32_t; typedef unsigned __int64 uint_least64_t; typedef __int8 int_fast8_t; typedef __int16 int_fast16_t; typedef __int32 int_fast32_t; typedef __int64 int_fast64_t; typedef unsigned __int8 uint_fast8_t; typedef unsigned __int16 uint_fast16_t; typedef unsigned __int32 uint_fast32_t; typedef unsigned __int64 uint_fast64_t; typedef __int64 intmax_t; typedef unsigned __int64 uintmax_t; # else # include # endif # if _MSC_VER < 1800 /* MSVC < 2013 */ # ifndef __cplusplus typedef unsigned char _Bool; # endif # endif #else # include # if (defined (__SVR4) && defined (__sun)) || defined(_AIX) || defined(__hpux) # include # endif #endif #ifdef __GNUC__ # define _CFFI_UNUSED_FN __attribute__((unused)) #else # define _CFFI_UNUSED_FN /* nothing */ #endif #ifdef __cplusplus # ifndef _Bool typedef bool _Bool; /* semi-hackish: C++ has no _Bool; bool is builtin */ # endif #endif /********** CPython-specific section **********/ #ifndef PYPY_VERSION #if PY_MAJOR_VERSION >= 3 # define PyInt_FromLong PyLong_FromLong #endif #define _cffi_from_c_double PyFloat_FromDouble #define _cffi_from_c_float PyFloat_FromDouble #define _cffi_from_c_long PyInt_FromLong #define _cffi_from_c_ulong PyLong_FromUnsignedLong #define _cffi_from_c_longlong PyLong_FromLongLong #define _cffi_from_c_ulonglong PyLong_FromUnsignedLongLong #define _cffi_from_c__Bool PyBool_FromLong #define _cffi_to_c_double PyFloat_AsDouble #define _cffi_to_c_float PyFloat_AsDouble #define _cffi_from_c_int(x, type) \ (((type)-1) > 0 ? /* unsigned */ \ (sizeof(type) < sizeof(long) ? \ PyInt_FromLong((long)x) : \ sizeof(type) == sizeof(long) ? \ PyLong_FromUnsignedLong((unsigned long)x) : \ PyLong_FromUnsignedLongLong((unsigned long long)x)) : \ (sizeof(type) <= sizeof(long) ? \ PyInt_FromLong((long)x) : \ PyLong_FromLongLong((long long)x))) #define _cffi_to_c_int(o, type) \ ((type)( \ sizeof(type) == 1 ? (((type)-1) > 0 ? (type)_cffi_to_c_u8(o) \ : (type)_cffi_to_c_i8(o)) : \ sizeof(type) == 2 ? (((type)-1) > 0 ? (type)_cffi_to_c_u16(o) \ : (type)_cffi_to_c_i16(o)) : \ sizeof(type) == 4 ? (((type)-1) > 0 ? (type)_cffi_to_c_u32(o) \ : (type)_cffi_to_c_i32(o)) : \ sizeof(type) == 8 ? (((type)-1) > 0 ? (type)_cffi_to_c_u64(o) \ : (type)_cffi_to_c_i64(o)) : \ (Py_FatalError("unsupported size for type " #type), (type)0))) #define _cffi_to_c_i8 \ ((int(*)(PyObject *))_cffi_exports[1]) #define _cffi_to_c_u8 \ ((int(*)(PyObject *))_cffi_exports[2]) #define _cffi_to_c_i16 \ ((int(*)(PyObject *))_cffi_exports[3]) #define _cffi_to_c_u16 \ ((int(*)(PyObject *))_cffi_exports[4]) #define _cffi_to_c_i32 \ ((int(*)(PyObject *))_cffi_exports[5]) #define _cffi_to_c_u32 \ ((unsigned int(*)(PyObject *))_cffi_exports[6]) #define _cffi_to_c_i64 \ ((long long(*)(PyObject *))_cffi_exports[7]) #define _cffi_to_c_u64 \ ((unsigned long long(*)(PyObject *))_cffi_exports[8]) #define _cffi_to_c_char \ ((int(*)(PyObject *))_cffi_exports[9]) #define _cffi_from_c_pointer \ ((PyObject *(*)(char *, struct _cffi_ctypedescr *))_cffi_exports[10]) #define _cffi_to_c_pointer \ ((char *(*)(PyObject *, struct _cffi_ctypedescr *))_cffi_exports[11]) #define _cffi_get_struct_layout \ not used any more #define _cffi_restore_errno \ ((void(*)(void))_cffi_exports[13]) #define _cffi_save_errno \ ((void(*)(void))_cffi_exports[14]) #define _cffi_from_c_char \ ((PyObject *(*)(char))_cffi_exports[15]) #define _cffi_from_c_deref \ ((PyObject *(*)(char *, struct _cffi_ctypedescr *))_cffi_exports[16]) #define _cffi_to_c \ ((int(*)(char *, struct _cffi_ctypedescr *, PyObject *))_cffi_exports[17]) #define _cffi_from_c_struct \ ((PyObject *(*)(char *, struct _cffi_ctypedescr *))_cffi_exports[18]) #define _cffi_to_c_wchar_t \ ((_cffi_wchar_t(*)(PyObject *))_cffi_exports[19]) #define _cffi_from_c_wchar_t \ ((PyObject *(*)(_cffi_wchar_t))_cffi_exports[20]) #define _cffi_to_c_long_double \ ((long double(*)(PyObject *))_cffi_exports[21]) #define _cffi_to_c__Bool \ ((_Bool(*)(PyObject *))_cffi_exports[22]) #define _cffi_prepare_pointer_call_argument \ ((Py_ssize_t(*)(struct _cffi_ctypedescr *, \ PyObject *, char **))_cffi_exports[23]) #define _cffi_convert_array_from_object \ ((int(*)(char *, struct _cffi_ctypedescr *, PyObject *))_cffi_exports[24]) #define _CFFI_CPIDX 25 #define _cffi_call_python \ ((void(*)(struct _cffi_externpy_s *, char *))_cffi_exports[_CFFI_CPIDX]) #define _cffi_to_c_wchar3216_t \ ((int(*)(PyObject *))_cffi_exports[26]) #define _cffi_from_c_wchar3216_t \ ((PyObject *(*)(int))_cffi_exports[27]) #define _CFFI_NUM_EXPORTS 28 struct _cffi_ctypedescr; static void *_cffi_exports[_CFFI_NUM_EXPORTS]; #define _cffi_type(index) ( \ assert((((uintptr_t)_cffi_types[index]) & 1) == 0), \ (struct _cffi_ctypedescr *)_cffi_types[index]) static PyObject *_cffi_init(const char *module_name, Py_ssize_t version, const struct _cffi_type_context_s *ctx) { PyObject *module, *o_arg, *new_module; void *raw[] = { (void *)module_name, (void *)version, (void *)_cffi_exports, (void *)ctx, }; module = PyImport_ImportModule("_cffi_backend"); if (module == NULL) goto failure; o_arg = PyLong_FromVoidPtr((void *)raw); if (o_arg == NULL) goto failure; new_module = PyObject_CallMethod( module, (char *)"_init_cffi_1_0_external_module", (char *)"O", o_arg); Py_DECREF(o_arg); Py_DECREF(module); return new_module; failure: Py_XDECREF(module); return NULL; } #ifdef HAVE_WCHAR_H typedef wchar_t _cffi_wchar_t; #else typedef uint16_t _cffi_wchar_t; /* same random pick as _cffi_backend.c */ #endif _CFFI_UNUSED_FN static uint16_t _cffi_to_c_char16_t(PyObject *o) { if (sizeof(_cffi_wchar_t) == 2) return (uint16_t)_cffi_to_c_wchar_t(o); else return (uint16_t)_cffi_to_c_wchar3216_t(o); } _CFFI_UNUSED_FN static PyObject *_cffi_from_c_char16_t(uint16_t x) { if (sizeof(_cffi_wchar_t) == 2) return _cffi_from_c_wchar_t((_cffi_wchar_t)x); else return _cffi_from_c_wchar3216_t((int)x); } _CFFI_UNUSED_FN static int _cffi_to_c_char32_t(PyObject *o) { if (sizeof(_cffi_wchar_t) == 4) return (int)_cffi_to_c_wchar_t(o); else return (int)_cffi_to_c_wchar3216_t(o); } _CFFI_UNUSED_FN static PyObject *_cffi_from_c_char32_t(int x) { if (sizeof(_cffi_wchar_t) == 4) return _cffi_from_c_wchar_t((_cffi_wchar_t)x); else return _cffi_from_c_wchar3216_t(x); } /********** end CPython-specific section **********/ #else _CFFI_UNUSED_FN static void (*_cffi_call_python_org)(struct _cffi_externpy_s *, char *); # define _cffi_call_python _cffi_call_python_org #endif #define _cffi_array_len(array) (sizeof(array) / sizeof((array)[0])) #define _cffi_prim_int(size, sign) \ ((size) == 1 ? ((sign) ? _CFFI_PRIM_INT8 : _CFFI_PRIM_UINT8) : \ (size) == 2 ? ((sign) ? _CFFI_PRIM_INT16 : _CFFI_PRIM_UINT16) : \ (size) == 4 ? ((sign) ? _CFFI_PRIM_INT32 : _CFFI_PRIM_UINT32) : \ (size) == 8 ? ((sign) ? _CFFI_PRIM_INT64 : _CFFI_PRIM_UINT64) : \ _CFFI__UNKNOWN_PRIM) #define _cffi_prim_float(size) \ ((size) == sizeof(float) ? _CFFI_PRIM_FLOAT : \ (size) == sizeof(double) ? _CFFI_PRIM_DOUBLE : \ (size) == sizeof(long double) ? _CFFI__UNKNOWN_LONG_DOUBLE : \ _CFFI__UNKNOWN_FLOAT_PRIM) #define _cffi_check_int(got, got_nonpos, expected) \ ((got_nonpos) == (expected <= 0) && \ (got) == (unsigned long long)expected) #ifdef MS_WIN32 # define _cffi_stdcall __stdcall #else # define _cffi_stdcall /* nothing */ #endif #ifdef __cplusplus } #endif /************************************************************/ #include "src/ext.h" /************************************************************/ static void *_cffi_types[] = { /* 0 */ _CFFI_OP(_CFFI_OP_FUNCTION, 11), // void()(float *, float *, int *, int, int, int, int, int) /* 1 */ _CFFI_OP(_CFFI_OP_POINTER, 10), // float * /* 2 */ _CFFI_OP(_CFFI_OP_NOOP, 1), /* 3 */ _CFFI_OP(_CFFI_OP_POINTER, 4), // int * /* 4 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), // int /* 5 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), /* 6 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), /* 7 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), /* 8 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 7), /* 9 */ _CFFI_OP(_CFFI_OP_FUNCTION_END, 0), /* 10 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 13), // float /* 11 */ _CFFI_OP(_CFFI_OP_PRIMITIVE, 0), // void }; static void _cffi_d_findNearestPointIdxLauncher(float * x0, float * x1, int * x2, int x3, int x4, int x5, int x6, int x7) { findNearestPointIdxLauncher(x0, x1, x2, x3, x4, x5, x6, x7); } #ifndef PYPY_VERSION static PyObject * _cffi_f_findNearestPointIdxLauncher(PyObject *self, PyObject *args) { float * x0; float * x1; int * x2; int x3; int x4; int x5; int x6; int x7; Py_ssize_t datasize; PyObject *arg0; PyObject *arg1; PyObject *arg2; PyObject *arg3; PyObject *arg4; PyObject *arg5; PyObject *arg6; PyObject *arg7; if (!PyArg_UnpackTuple(args, "findNearestPointIdxLauncher", 8, 8, &arg0, &arg1, &arg2, &arg3, &arg4, &arg5, &arg6, &arg7)) return NULL; datasize = _cffi_prepare_pointer_call_argument( _cffi_type(1), arg0, (char **)&x0); if (datasize != 0) { if (datasize < 0) return NULL; x0 = (float *)alloca((size_t)datasize); memset((void *)x0, 0, (size_t)datasize); if (_cffi_convert_array_from_object((char *)x0, _cffi_type(1), arg0) < 0) return NULL; } datasize = _cffi_prepare_pointer_call_argument( _cffi_type(1), arg1, (char **)&x1); if (datasize != 0) { if (datasize < 0) return NULL; x1 = (float *)alloca((size_t)datasize); memset((void *)x1, 0, (size_t)datasize); if (_cffi_convert_array_from_object((char *)x1, _cffi_type(1), arg1) < 0) return NULL; } datasize = _cffi_prepare_pointer_call_argument( _cffi_type(3), arg2, (char **)&x2); if (datasize != 0) { if (datasize < 0) return NULL; x2 = (int *)alloca((size_t)datasize); memset((void *)x2, 0, (size_t)datasize); if (_cffi_convert_array_from_object((char *)x2, _cffi_type(3), arg2) < 0) return NULL; } x3 = _cffi_to_c_int(arg3, int); if (x3 == (int)-1 && PyErr_Occurred()) return NULL; x4 = _cffi_to_c_int(arg4, int); if (x4 == (int)-1 && PyErr_Occurred()) return NULL; x5 = _cffi_to_c_int(arg5, int); if (x5 == (int)-1 && PyErr_Occurred()) return NULL; x6 = _cffi_to_c_int(arg6, int); if (x6 == (int)-1 && PyErr_Occurred()) return NULL; x7 = _cffi_to_c_int(arg7, int); if (x7 == (int)-1 && PyErr_Occurred()) return NULL; Py_BEGIN_ALLOW_THREADS _cffi_restore_errno(); { findNearestPointIdxLauncher(x0, x1, x2, x3, x4, x5, x6, x7); } _cffi_save_errno(); Py_END_ALLOW_THREADS (void)self; /* unused */ Py_INCREF(Py_None); return Py_None; } #else # define _cffi_f_findNearestPointIdxLauncher _cffi_d_findNearestPointIdxLauncher #endif static const struct _cffi_global_s _cffi_globals[] = { { "findNearestPointIdxLauncher", (void *)_cffi_f_findNearestPointIdxLauncher, _CFFI_OP(_CFFI_OP_CPYTHON_BLTN_V, 0), (void *)_cffi_d_findNearestPointIdxLauncher }, }; static const struct _cffi_type_context_s _cffi_type_context = { _cffi_types, _cffi_globals, NULL, /* no fields */ NULL, /* no struct_unions */ NULL, /* no enums */ NULL, /* no typenames */ 1, /* num_globals */ 0, /* num_struct_unions */ 0, /* num_enums */ 0, /* num_typenames */ NULL, /* no includes */ 12, /* num_types */ 0, /* flags */ }; #ifdef __GNUC__ # pragma GCC visibility push(default) /* for -fvisibility= */ #endif #ifdef PYPY_VERSION PyMODINIT_FUNC _cffi_pypyinit__ext(const void *p[]) { p[0] = (const void *)0x2601; p[1] = &_cffi_type_context; #if PY_MAJOR_VERSION >= 3 return NULL; #endif } # ifdef _MSC_VER PyMODINIT_FUNC # if PY_MAJOR_VERSION >= 3 PyInit__ext(void) { return NULL; } # else init_ext(void) { } # endif # endif #elif PY_MAJOR_VERSION >= 3 PyMODINIT_FUNC PyInit__ext(void) { return _cffi_init("_ext", 0x2601, &_cffi_type_context); } #else PyMODINIT_FUNC init_ext(void) { _cffi_init("_ext", 0x2601, &_cffi_type_context); } #endif #ifdef __GNUC__ # pragma GCC visibility pop #endif ================================================ FILE: thirdparty/nn/nn_utils.py ================================================ # from lib.csrc.nn._ext import lib, ffi from thirdparty.nn._ext import lib, ffi import numpy as np def find_nearest_point_idx(ref_pts, que_pts): assert(ref_pts.shape[1] == que_pts.shape[1] and 1 < que_pts.shape[1] <= 3) pn1 = ref_pts.shape[0] pn2 = que_pts.shape[0] dim = ref_pts.shape[1] ref_pts = np.ascontiguousarray(ref_pts[None,:,:], np.float32) que_pts = np.ascontiguousarray(que_pts[None,:,:], np.float32) idxs = np.zeros([1, pn2], np.int32) ref_pts_ptr = ffi.cast('float *', ref_pts.ctypes.data) que_pts_ptr = ffi.cast('float *', que_pts.ctypes.data) idxs_ptr = ffi.cast('int *', idxs.ctypes.data) lib.findNearestPointIdxLauncher(ref_pts_ptr, que_pts_ptr, idxs_ptr, 1, pn1, pn2, dim, 0) return idxs[0] ================================================ FILE: thirdparty/nn/setup.py ================================================ import os cuda_include=os.path.join(os.environ.get('CUDA_HOME'), 'include') os.system('nvcc src/nearest_neighborhood.cu -c -o src/nearest_neighborhood.cu.o -x cu -Xcompiler -fPIC -O2 -arch=sm_52 -I {}'.format(cuda_include)) from cffi import FFI ffibuilder = FFI() with open(os.path.join(os.path.dirname(__file__), "src/ext.h")) as f: ffibuilder.cdef(f.read()) ffibuilder.set_source( "_ext", """ #include "src/ext.h" """, extra_objects=['src/nearest_neighborhood.cu.o', os.path.join(os.environ.get('CUDA_HOME'),'lib64/libcudart.so')], libraries=['stdc++'] ) if __name__ == "__main__": ffibuilder.compile(verbose=True) os.system("rm src/*.o") os.system("rm *.o") ================================================ FILE: thirdparty/nn/src/ext.h ================================================ void findNearestPointIdxLauncher( float* ref_pts, // [b,pn1,dim] float* que_pts, // [b,pn2,dim] int* idxs, // [b,pn2] int b, int pn1, int pn2, int dim, int exclude_self ); ================================================ FILE: thirdparty/nn/src/nearest_neighborhood.cu ================================================ #include #include #include #include #include #include #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) { if (code != cudaSuccess) { fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); if (abort) exit(code); } } int infTwoExp(int val) { int inf=1; while(val>inf) inf<<=1; return inf; } void getGPULayout( int dim0,int dim1,int dim2, int* bdim0,int* bdim1,int* bdim2, int* tdim0,int* tdim1,int* tdim2 ) { (*tdim2)=64; if(dim2<(*tdim2)) (*tdim2)=infTwoExp(dim2); (*bdim2)=dim2/(*tdim2); if(dim2%(*tdim2)>0) (*bdim2)++; (*tdim1)=1024/(*tdim2); if(dim1<(*tdim1)) (*tdim1)=infTwoExp(dim1); (*bdim1)=dim1/(*tdim1); if(dim1%(*tdim1)>0) (*bdim1)++; (*tdim0)=1024/((*tdim1)*(*tdim2)); if(dim0<(*tdim0)) (*tdim0)=infTwoExp(dim0); (*bdim0)=dim0/(*tdim0); if(dim0%(*tdim0)>0) (*bdim0)++; } __global__ void findNearestPoint3DIdxKernel( float* ref_pts, // [b,pn1,3] float* que_pts, // [b,pn2,3] int* idxs, // [b,pn2] int b, int pn1, int pn2, int exclude_self ) { int bi = threadIdx.x + blockIdx.x*blockDim.x; int p2i = threadIdx.y + blockIdx.y*blockDim.y; if(p2i>=pn2||bi>=b) return; float x2=que_pts[bi*pn2*3+p2i*3]; float y2=que_pts[bi*pn2*3+p2i*3+1]; float z2=que_pts[bi*pn2*3+p2i*3+2]; float min_dist=FLT_MAX; int min_idx=0; for(int p1i=0;p1i=pn2||bi>=b) return; float x2=que_pts[bi*pn2*2+p2i*2]; float y2=que_pts[bi*pn2*2+p2i*2+1]; float min_dist=FLT_MAX; int min_idx=0; for(int p1i=0;p1i>>(ref_pts_dev,que_pts_dev,idxs_dev,b,pn1,pn2,exclude_self); else findNearestPoint2DIdxKernel<<>>(ref_pts_dev,que_pts_dev,idxs_dev,b,pn1,pn2,exclude_self); gpuErrchk(cudaGetLastError()) gpuErrchk(cudaMemcpy(idxs,idxs_dev,b*pn2*sizeof(int),cudaMemcpyDeviceToHost)) gpuErrchk(cudaFree(ref_pts_dev)) gpuErrchk(cudaFree(que_pts_dev)) gpuErrchk(cudaFree(idxs_dev)) } #ifdef __cplusplus } #endif ================================================ FILE: thirdparty/raft/corr.py ================================================ import torch import torch.nn.functional as F from .utils.utils import bilinear_sampler, coords_grid try: import alt_cuda_corr except: # alt_cuda_corr is not compiled pass class CorrBlock: def __init__(self, fmap1, fmap2, num_levels=4, radius=4, downsample_rate=1): self.num_levels = num_levels self.radius = radius self.corr_pyramid = [] # all pairs correlation corr = CorrBlock.corr(fmap1, fmap2) if downsample_rate>1: batch, h1, w1, dim, h2, w2 = corr.shape corr=torch.nn.MaxPool2d(corr.reshape(batch, -1, dim, h2,w2), downsample_rate, stride=downsample_rate) corr=torch.nn.MaxPool2d(corr.reshape(batch, h1,w1, dim, -1).permute(0,4,3,1,2), downsample_rate, stride=downsample_rate) corr = corr.permute(0,3,4,3,1).reshape(batch,h1//downsample_rate, w1//downsample_rate, dim, h2//downsample_rate, w2//downsample_rate) batch, h1, w1, dim, h2, w2 = corr.shape corr = corr.reshape(batch*h1*w1, dim, h2, w2) self.corr_pyramid.append(corr) for i in range(self.num_levels-1): corr = F.avg_pool2d(corr, 2, stride=2) self.corr_pyramid.append(corr) def __call__(self, coords): r = self.radius coords = coords.permute(0, 2, 3, 1) batch, h1, w1, _ = coords.shape out_pyramid = [] for i in range(self.num_levels): corr = self.corr_pyramid[i] dx = torch.linspace(-r, r, 2*r+1) dy = torch.linspace(-r, r, 2*r+1) delta = torch.stack(torch.meshgrid(dy, dx), axis=-1).to(coords.device) centroid_lvl = coords.reshape(batch*h1*w1, 1, 1, 2) / 2**i delta_lvl = delta.view(1, 2*r+1, 2*r+1, 2) coords_lvl = centroid_lvl + delta_lvl corr = bilinear_sampler(corr, coords_lvl) corr = corr.view(batch, h1, w1, -1) out_pyramid.append(corr) out = torch.cat(out_pyramid, dim=-1) return out.permute(0, 3, 1, 2).contiguous().float() @staticmethod def corr(fmap1, fmap2): batch, dim, ht, wd = fmap1.shape fmap1 = fmap1.view(batch, dim, ht*wd) fmap2 = fmap2.view(batch, dim, ht*wd) corr = torch.matmul(fmap1.transpose(1,2), fmap2) corr = corr.view(batch, ht, wd, 1, ht, wd) return corr / torch.sqrt(torch.tensor(dim).float()) class AlternateCorrBlock: def __init__(self, fmap1, fmap2, num_levels=4, radius=4): self.num_levels = num_levels self.radius = radius self.pyramid = [(fmap1, fmap2)] for i in range(self.num_levels): fmap1 = F.avg_pool2d(fmap1, 2, stride=2) fmap2 = F.avg_pool2d(fmap2, 2, stride=2) self.pyramid.append((fmap1, fmap2)) def __call__(self, coords): coords = coords.permute(0, 2, 3, 1) B, H, W, _ = coords.shape dim = self.pyramid[0][0].shape[1] corr_list = [] for i in range(self.num_levels): r = self.radius fmap1_i = self.pyramid[0][0].permute(0, 2, 3, 1).contiguous() fmap2_i = self.pyramid[i][1].permute(0, 2, 3, 1).contiguous() coords_i = (coords / 2**i).reshape(B, 1, H, W, 2).contiguous() corr, = alt_cuda_corr.forward(fmap1_i, fmap2_i, coords_i, r) corr_list.append(corr.squeeze(1)) corr = torch.stack(corr_list, dim=1) corr = corr.reshape(B, -1, H, W) return corr / torch.sqrt(torch.tensor(dim).float()) ================================================ FILE: thirdparty/raft/extractor.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F class ResidualBlock(nn.Module): def __init__(self, in_planes, planes, norm_fn='group', stride=1): super(ResidualBlock, self).__init__() self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, padding=1, stride=stride) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1) self.relu = nn.ReLU(inplace=True) num_groups = planes // 8 if norm_fn == 'group': self.norm1 = nn.GroupNorm(num_groups=num_groups, num_channels=planes) self.norm2 = nn.GroupNorm(num_groups=num_groups, num_channels=planes) if not stride == 1: self.norm3 = nn.GroupNorm(num_groups=num_groups, num_channels=planes) elif norm_fn == 'batch': self.norm1 = nn.BatchNorm2d(planes) self.norm2 = nn.BatchNorm2d(planes) if not stride == 1: self.norm3 = nn.BatchNorm2d(planes) elif norm_fn == 'instance': self.norm1 = nn.InstanceNorm2d(planes) self.norm2 = nn.InstanceNorm2d(planes) if not stride == 1: self.norm3 = nn.InstanceNorm2d(planes) elif norm_fn == 'none': self.norm1 = nn.Sequential() self.norm2 = nn.Sequential() if not stride == 1: self.norm3 = nn.Sequential() if stride == 1: self.downsample = None else: self.downsample = nn.Sequential( nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride), self.norm3) def forward(self, x): y = x y = self.relu(self.norm1(self.conv1(y))) y = self.relu(self.norm2(self.conv2(y))) if self.downsample is not None: x = self.downsample(x) return self.relu(x+y) class BottleneckBlock(nn.Module): def __init__(self, in_planes, planes, norm_fn='group', stride=1): super(BottleneckBlock, self).__init__() self.conv1 = nn.Conv2d(in_planes, planes//4, kernel_size=1, padding=0) self.conv2 = nn.Conv2d(planes//4, planes//4, kernel_size=3, padding=1, stride=stride) self.conv3 = nn.Conv2d(planes//4, planes, kernel_size=1, padding=0) self.relu = nn.ReLU(inplace=True) num_groups = planes // 8 if norm_fn == 'group': self.norm1 = nn.GroupNorm(num_groups=num_groups, num_channels=planes//4) self.norm2 = nn.GroupNorm(num_groups=num_groups, num_channels=planes//4) self.norm3 = nn.GroupNorm(num_groups=num_groups, num_channels=planes) if not stride == 1: self.norm4 = nn.GroupNorm(num_groups=num_groups, num_channels=planes) elif norm_fn == 'batch': self.norm1 = nn.BatchNorm2d(planes//4) self.norm2 = nn.BatchNorm2d(planes//4) self.norm3 = nn.BatchNorm2d(planes) if not stride == 1: self.norm4 = nn.BatchNorm2d(planes) elif norm_fn == 'instance': self.norm1 = nn.InstanceNorm2d(planes//4) self.norm2 = nn.InstanceNorm2d(planes//4) self.norm3 = nn.InstanceNorm2d(planes) if not stride == 1: self.norm4 = nn.InstanceNorm2d(planes) elif norm_fn == 'none': self.norm1 = nn.Sequential() self.norm2 = nn.Sequential() self.norm3 = nn.Sequential() if not stride == 1: self.norm4 = nn.Sequential() if stride == 1: self.downsample = None else: self.downsample = nn.Sequential( nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride), self.norm4) def forward(self, x): y = x y = self.relu(self.norm1(self.conv1(y))) y = self.relu(self.norm2(self.conv2(y))) y = self.relu(self.norm3(self.conv3(y))) if self.downsample is not None: x = self.downsample(x) return self.relu(x+y) class BasicEncoder(nn.Module): def __init__(self, output_dim=128, norm_fn='batch', dropout=0.0, input_dim=3, with_decoder=False, decoder_dim=0): super(BasicEncoder, self).__init__() self.norm_fn = norm_fn if self.norm_fn == 'group': self.norm1 = nn.GroupNorm(num_groups=8, num_channels=64) elif self.norm_fn == 'batch': self.norm1 = nn.BatchNorm2d(64) elif self.norm_fn == 'instance': self.norm1 = nn.InstanceNorm2d(64) elif self.norm_fn == 'none': self.norm1 = nn.Sequential() # self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3) self.conv1 = nn.Conv2d(input_dim, 64, kernel_size=7, stride=2, padding=3) self.relu1 = nn.ReLU(inplace=True) self.in_planes = 64 self.layer1 = self._make_layer(64, stride=1) self.layer2 = self._make_layer(96, stride=2) self.layer3 = self._make_layer(128, stride=2) # output convolution self.conv2 = nn.Conv2d(128, output_dim, kernel_size=1) self.dropout = None if dropout > 0: self.dropout = nn.Dropout2d(p=dropout) self.with_decoder = with_decoder if with_decoder: self.dec_layer3 = nn.Sequential( nn.Conv2d(128, 96, 3, 1, 1, bias=False), nn.BatchNorm2d(96), self.relu1, nn.UpsamplingBilinear2d(scale_factor=2), ) self.dec_layer2 = nn.Sequential( nn.Conv2d(96+96, 64, 3, 1, 1, bias=False), nn.BatchNorm2d(64), self.relu1, nn.UpsamplingBilinear2d(scale_factor=2), ) self.dec_layer1 = nn.Sequential( nn.Conv2d(64+64, 64, 3, 1, 1, bias=False), nn.BatchNorm2d(64), self.relu1, nn.UpsamplingBilinear2d(scale_factor=2), ) self.reg_layer = nn.Conv2d(64, decoder_dim, 1, 1, 0, bias=False) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, (nn.BatchNorm2d, nn.InstanceNorm2d, nn.GroupNorm)): if m.weight is not None: nn.init.constant_(m.weight, 1) if m.bias is not None: nn.init.constant_(m.bias, 0) def _make_layer(self, dim, stride=1): layer1 = ResidualBlock(self.in_planes, dim, self.norm_fn, stride=stride) layer2 = ResidualBlock(dim, dim, self.norm_fn, stride=1) layers = (layer1, layer2) self.in_planes = dim return nn.Sequential(*layers) def forward(self, x): ups=[] # if input is list, combine batch dimension is_list = isinstance(x, tuple) or isinstance(x, list) if is_list: batch_dim = x[0].shape[0] x = torch.cat(x, dim=0) x = self.conv1(x) x = self.norm1(x) x = self.relu1(x) ups.append(x) #added x = self.layer1(x) x = self.layer2(x) ups.append(x) #added x = self.layer3(x) if self.with_decoder: dec_x=self.dec_layer3(x) dec_x=self.dec_layer2(torch.cat( [dec_x, ups[-1]], dim=1 ) ) dec_x=self.dec_layer1(torch.cat([dec_x, ups[-2]], dim=1 ) ) dec_x = self.reg_layer(dec_x) x = self.conv2(x) if self.training and self.dropout is not None: x = self.dropout(x) if is_list: x = torch.split(x, [batch_dim, batch_dim], dim=0) if self.with_decoder: dec_x = torch.split(dec_x, [batch_dim, batch_dim], dim=0) if self.with_decoder: return x, dec_x else: return x class BasicEncoder_dx4(nn.Module): def __init__(self, output_dim=128, norm_fn='batch', dropout=0.0): super(BasicEncoder_dx4, self).__init__() self.norm_fn = norm_fn if self.norm_fn == 'group': self.norm1 = nn.GroupNorm(num_groups=8, num_channels=64) elif self.norm_fn == 'batch': self.norm1 = nn.BatchNorm2d(64) elif self.norm_fn == 'instance': self.norm1 = nn.InstanceNorm2d(64) elif self.norm_fn == 'none': self.norm1 = nn.Sequential() # self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3) self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=1, padding=3) self.relu1 = nn.ReLU(inplace=True) self.in_planes = 64 self.layer1 = self._make_layer(64, stride=1) self.layer2 = self._make_layer(96, stride=2) self.layer3 = self._make_layer(128, stride=2) # output convolution self.conv2 = nn.Conv2d(128, output_dim, kernel_size=1) self.dropout = None if dropout > 0: self.dropout = nn.Dropout2d(p=dropout) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, (nn.BatchNorm2d, nn.InstanceNorm2d, nn.GroupNorm)): if m.weight is not None: nn.init.constant_(m.weight, 1) if m.bias is not None: nn.init.constant_(m.bias, 0) def _make_layer(self, dim, stride=1): layer1 = ResidualBlock(self.in_planes, dim, self.norm_fn, stride=stride) layer2 = ResidualBlock(dim, dim, self.norm_fn, stride=1) layers = (layer1, layer2) self.in_planes = dim return nn.Sequential(*layers) def forward(self, x): # if input is list, combine batch dimension is_list = isinstance(x, tuple) or isinstance(x, list) if is_list: batch_dim = x[0].shape[0] x = torch.cat(x, dim=0) x = self.conv1(x) x = self.norm1(x) x = self.relu1(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.conv2(x) if self.training and self.dropout is not None: x = self.dropout(x) if is_list: x = torch.split(x, [batch_dim, batch_dim], dim=0) return x class SmallEncoder(nn.Module): def __init__(self, output_dim=128, norm_fn='batch', dropout=0.0): super(SmallEncoder, self).__init__() self.norm_fn = norm_fn if self.norm_fn == 'group': self.norm1 = nn.GroupNorm(num_groups=8, num_channels=32) elif self.norm_fn == 'batch': self.norm1 = nn.BatchNorm2d(32) elif self.norm_fn == 'instance': self.norm1 = nn.InstanceNorm2d(32) elif self.norm_fn == 'none': self.norm1 = nn.Sequential() self.conv1 = nn.Conv2d(3, 32, kernel_size=7, stride=2, padding=3) self.relu1 = nn.ReLU(inplace=True) self.in_planes = 32 self.layer1 = self._make_layer(32, stride=1) self.layer2 = self._make_layer(64, stride=2) self.layer3 = self._make_layer(96, stride=2) self.dropout = None if dropout > 0: self.dropout = nn.Dropout2d(p=dropout) self.conv2 = nn.Conv2d(96, output_dim, kernel_size=1) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, (nn.BatchNorm2d, nn.InstanceNorm2d, nn.GroupNorm)): if m.weight is not None: nn.init.constant_(m.weight, 1) if m.bias is not None: nn.init.constant_(m.bias, 0) def _make_layer(self, dim, stride=1): layer1 = BottleneckBlock(self.in_planes, dim, self.norm_fn, stride=stride) layer2 = BottleneckBlock(dim, dim, self.norm_fn, stride=1) layers = (layer1, layer2) self.in_planes = dim return nn.Sequential(*layers) def forward(self, x): # if input is list, combine batch dimension is_list = isinstance(x, tuple) or isinstance(x, list) if is_list: batch_dim = x[0].shape[0] x = torch.cat(x, dim=0) x = self.conv1(x) x = self.norm1(x) x = self.relu1(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.conv2(x) if self.training and self.dropout is not None: x = self.dropout(x) if is_list: x = torch.split(x, [batch_dim, batch_dim], dim=0) return x class SmallEncoder_dx4(nn.Module): def __init__(self, output_dim=128, norm_fn='batch', dropout=0.0): super(SmallEncoder_dx4, self).__init__() self.norm_fn = norm_fn if self.norm_fn == 'group': self.norm1 = nn.GroupNorm(num_groups=8, num_channels=32) elif self.norm_fn == 'batch': self.norm1 = nn.BatchNorm2d(32) elif self.norm_fn == 'instance': self.norm1 = nn.InstanceNorm2d(32) elif self.norm_fn == 'none': self.norm1 = nn.Sequential() # self.conv1 = nn.Conv2d(3, 32, kernel_size=7, stride=2, padding=3) self.conv1 = nn.Conv2d(3, 32, kernel_size=7, stride=1, padding=3) self.relu1 = nn.ReLU(inplace=True) self.in_planes = 32 self.layer1 = self._make_layer(32, stride=1) self.layer2 = self._make_layer(64, stride=2) self.layer3 = self._make_layer(96, stride=2) self.dropout = None if dropout > 0: self.dropout = nn.Dropout2d(p=dropout) self.conv2 = nn.Conv2d(96, output_dim, kernel_size=1) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, (nn.BatchNorm2d, nn.InstanceNorm2d, nn.GroupNorm)): if m.weight is not None: nn.init.constant_(m.weight, 1) if m.bias is not None: nn.init.constant_(m.bias, 0) def _make_layer(self, dim, stride=1): layer1 = BottleneckBlock(self.in_planes, dim, self.norm_fn, stride=stride) layer2 = BottleneckBlock(dim, dim, self.norm_fn, stride=1) layers = (layer1, layer2) self.in_planes = dim return nn.Sequential(*layers) def forward(self, x): # if input is list, combine batch dimension is_list = isinstance(x, tuple) or isinstance(x, list) if is_list: batch_dim = x[0].shape[0] x = torch.cat(x, dim=0) x = self.conv1(x) x = self.norm1(x) x = self.relu1(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.conv2(x) if self.training and self.dropout is not None: x = self.dropout(x) if is_list: x = torch.split(x, [batch_dim, batch_dim], dim=0) return x ================================================ FILE: thirdparty/raft/update.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F class FlowHead(nn.Module): def __init__(self, input_dim=128, hidden_dim=256): super(FlowHead, self).__init__() self.conv1 = nn.Conv2d(input_dim, hidden_dim, 3, padding=1) self.conv2 = nn.Conv2d(hidden_dim, 2, 3, padding=1) self.relu = nn.ReLU(inplace=True) def forward(self, x): return self.conv2(self.relu(self.conv1(x))) class ConvGRU(nn.Module): def __init__(self, hidden_dim=128, input_dim=192+128): super(ConvGRU, self).__init__() self.convz = nn.Conv2d(hidden_dim+input_dim, hidden_dim, 3, padding=1) self.convr = nn.Conv2d(hidden_dim+input_dim, hidden_dim, 3, padding=1) self.convq = nn.Conv2d(hidden_dim+input_dim, hidden_dim, 3, padding=1) def forward(self, h, x): hx = torch.cat([h, x], dim=1) z = torch.sigmoid(self.convz(hx)) r = torch.sigmoid(self.convr(hx)) q = torch.tanh(self.convq(torch.cat([r*h, x], dim=1))) h = (1-z) * h + z * q return h class SepConvGRU(nn.Module): def __init__(self, hidden_dim=128, input_dim=192+128): super(SepConvGRU, self).__init__() self.convz1 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (1,5), padding=(0,2)) self.convr1 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (1,5), padding=(0,2)) self.convq1 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (1,5), padding=(0,2)) self.convz2 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (5,1), padding=(2,0)) self.convr2 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (5,1), padding=(2,0)) self.convq2 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (5,1), padding=(2,0)) def forward(self, h, x): # horizontal hx = torch.cat([h, x], dim=1) z = torch.sigmoid(self.convz1(hx)) r = torch.sigmoid(self.convr1(hx)) q = torch.tanh(self.convq1(torch.cat([r*h, x], dim=1))) h = (1-z) * h + z * q # vertical hx = torch.cat([h, x], dim=1) z = torch.sigmoid(self.convz2(hx)) r = torch.sigmoid(self.convr2(hx)) q = torch.tanh(self.convq2(torch.cat([r*h, x], dim=1))) h = (1-z) * h + z * q return h class SmallMotionEncoder(nn.Module): def __init__(self, args): super(SmallMotionEncoder, self).__init__() cor_planes = args.corr_levels * (2*args.corr_radius + 1)**2 self.convc1 = nn.Conv2d(cor_planes, 96, 1, padding=0) self.convf1 = nn.Conv2d(2, 64, 7, padding=3) self.convf2 = nn.Conv2d(64, 32, 3, padding=1) self.conv = nn.Conv2d(128, 80, 3, padding=1) def forward(self, flow, corr): cor = F.relu(self.convc1(corr)) flo = F.relu(self.convf1(flow)) flo = F.relu(self.convf2(flo)) cor_flo = torch.cat([cor, flo], dim=1) out = F.relu(self.conv(cor_flo)) return torch.cat([out, flow], dim=1) class BasicMotionEncoder(nn.Module): def __init__(self, args): super(BasicMotionEncoder, self).__init__() cor_planes = args.corr_levels * (2*args.corr_radius + 1)**2 self.convc1 = nn.Conv2d(cor_planes, 256, 1, padding=0) self.convc2 = nn.Conv2d(256, 192, 3, padding=1) self.convf1 = nn.Conv2d(2, 128, 7, padding=3) self.convf2 = nn.Conv2d(128, 64, 3, padding=1) self.conv = nn.Conv2d(64+192, 128-2, 3, padding=1) def forward(self, flow, corr): cor = F.relu(self.convc1(corr)) cor = F.relu(self.convc2(cor)) flo = F.relu(self.convf1(flow)) flo = F.relu(self.convf2(flo)) cor_flo = torch.cat([cor, flo], dim=1) out = F.relu(self.conv(cor_flo)) return torch.cat([out, flow], dim=1) class BasicMotionEncoderGeo(nn.Module): def __init__(self, args): super(BasicMotionEncoderGeo, self).__init__() cor_planes = args.corr_levels * (2*args.corr_radius + 1)**2 self.convc1 = nn.Conv2d(cor_planes, 256, 1, padding=0) self.convc2 = nn.Conv2d(256, 192, 3, padding=1) self.convc1_geo = nn.Conv2d(cor_planes, 256, 1, padding=0) self.convc2_geo = nn.Conv2d(256, 192, 3, padding=1) self.convf1 = nn.Conv2d(2, 128, 7, padding=3) self.convf2 = nn.Conv2d(128, 64, 3, padding=1) self.conv = nn.Conv2d(64+192, 128-2, 3, padding=1) def forward(self, flow, corr, corr_geo): cor = F.relu(self.convc1(corr)) cor = F.relu(self.convc2(cor)) cor_geo = F.relu(self.convc1_geo(corr_geo)) cor_geo = F.relu(self.convc2_geo(cor_geo)) # cor_geo = F.leaky_relu(self.convc1_geo(corr_geo)) # cor_geo = F.leaky_relu(self.convc2_geo(cor_geo)) flo = F.relu(self.convf1(flow)) flo = F.relu(self.convf2(flo)) # cor_flo = torch.cat([cor, flo], dim=1) cor_flo = torch.cat([cor+cor_geo, flo], dim=1) out = F.relu(self.conv(cor_flo)) return torch.cat([out, flow], dim=1) class SmallUpdateBlock(nn.Module): def __init__(self, args, hidden_dim=96): super(SmallUpdateBlock, self).__init__() self.encoder = SmallMotionEncoder(args) self.gru = ConvGRU(hidden_dim=hidden_dim, input_dim=82+64) self.flow_head = FlowHead(hidden_dim, hidden_dim=128) def forward(self, net, inp, corr, flow): motion_features = self.encoder(flow, corr) inp = torch.cat([inp, motion_features], dim=1) net = self.gru(net, inp) delta_flow = self.flow_head(net) return net, None, delta_flow class SmallUpdateBlockUpMask(nn.Module): def __init__(self, args, hidden_dim=96): super(SmallUpdateBlockUpMask, self).__init__() self.encoder = SmallMotionEncoder(args) self.gru = ConvGRU(hidden_dim=hidden_dim, input_dim=82+64) self.flow_head = FlowHead(hidden_dim, hidden_dim=128) self.mask = nn.Sequential( nn.Conv2d(96, 192, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(192, 64*9, 1, padding=0)) def forward(self, net, inp, corr, flow): motion_features = self.encoder(flow, corr) inp = torch.cat([inp, motion_features], dim=1) net = self.gru(net, inp) delta_flow = self.flow_head(net) mask = .25 * self.mask(net) # return net, None, delta_flow return net, mask, delta_flow class BasicUpdateBlock(nn.Module): def __init__(self, args, hidden_dim=128, input_dim=128, downsample_scale=8): super(BasicUpdateBlock, self).__init__() self.args = args self.encoder = BasicMotionEncoder(args) self.gru = SepConvGRU(hidden_dim=hidden_dim, input_dim=128+hidden_dim) self.flow_head = FlowHead(hidden_dim, hidden_dim=256) self.mask = nn.Sequential( nn.Conv2d(128, 256, 3, padding=1), nn.ReLU(inplace=True), # nn.Conv2d(256, 64*9, 1, padding=0)) nn.Conv2d(256, downsample_scale*downsample_scale*9, 1, padding=0)) def forward(self, net, inp, corr, flow, upsample=True): motion_features = self.encoder(flow, corr) inp = torch.cat([inp, motion_features], dim=1) net = self.gru(net, inp) delta_flow = self.flow_head(net) # scale mask to balence gradients mask = .25 * self.mask(net) return net, mask, delta_flow class BasicUpdateBlockGeo(nn.Module): #add geo corr input def __init__(self, args, hidden_dim=128, input_dim=128, downsample_scale=8): super(BasicUpdateBlockGeo, self).__init__() self.args = args self.encoder = BasicMotionEncoderGeo(args) self.gru = SepConvGRU(hidden_dim=hidden_dim, input_dim=128+hidden_dim) self.flow_head = FlowHead(hidden_dim, hidden_dim=256) self.mask = nn.Sequential( nn.Conv2d(128, 256, 3, padding=1), nn.ReLU(inplace=True), # nn.Conv2d(256, 64*9, 1, padding=0)) nn.Conv2d(256, downsample_scale*downsample_scale*9, 1, padding=0)) def forward(self, net, inp, corr, geo_corr, flow, upsample=True): motion_features = self.encoder(flow, corr, geo_corr) inp = torch.cat([inp, motion_features], dim=1) net = self.gru(net, inp) delta_flow = self.flow_head(net) # scale mask to balence gradients mask = .25 * self.mask(net) return net, mask, delta_flow ================================================ FILE: thirdparty/raft/utils/__init__.py ================================================ ================================================ FILE: thirdparty/raft/utils/augmentor.py ================================================ import numpy as np import random import math from PIL import Image import cv2 cv2.setNumThreads(0) cv2.ocl.setUseOpenCL(False) import torch from torchvision.transforms import ColorJitter import torch.nn.functional as F class FlowAugmentor: def __init__(self, crop_size, min_scale=-0.2, max_scale=0.5, do_flip=True): # spatial augmentation params self.crop_size = crop_size self.min_scale = min_scale self.max_scale = max_scale self.spatial_aug_prob = 0.8 self.stretch_prob = 0.8 self.max_stretch = 0.2 # flip augmentation params self.do_flip = do_flip self.h_flip_prob = 0.5 self.v_flip_prob = 0.1 # photometric augmentation params self.photo_aug = ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.5/3.14) self.asymmetric_color_aug_prob = 0.2 self.eraser_aug_prob = 0.5 def color_transform(self, img1, img2): """ Photometric augmentation """ # asymmetric if np.random.rand() < self.asymmetric_color_aug_prob: img1 = np.array(self.photo_aug(Image.fromarray(img1)), dtype=np.uint8) img2 = np.array(self.photo_aug(Image.fromarray(img2)), dtype=np.uint8) # symmetric else: image_stack = np.concatenate([img1, img2], axis=0) image_stack = np.array(self.photo_aug(Image.fromarray(image_stack)), dtype=np.uint8) img1, img2 = np.split(image_stack, 2, axis=0) return img1, img2 def eraser_transform(self, img1, img2, bounds=[50, 100]): """ Occlusion augmentation """ ht, wd = img1.shape[:2] if np.random.rand() < self.eraser_aug_prob: mean_color = np.mean(img2.reshape(-1, 3), axis=0) for _ in range(np.random.randint(1, 3)): x0 = np.random.randint(0, wd) y0 = np.random.randint(0, ht) dx = np.random.randint(bounds[0], bounds[1]) dy = np.random.randint(bounds[0], bounds[1]) img2[y0:y0+dy, x0:x0+dx, :] = mean_color return img1, img2 def spatial_transform(self, img1, img2, flow): # randomly sample scale ht, wd = img1.shape[:2] min_scale = np.maximum( (self.crop_size[0] + 8) / float(ht), (self.crop_size[1] + 8) / float(wd)) scale = 2 ** np.random.uniform(self.min_scale, self.max_scale) scale_x = scale scale_y = scale if np.random.rand() < self.stretch_prob: scale_x *= 2 ** np.random.uniform(-self.max_stretch, self.max_stretch) scale_y *= 2 ** np.random.uniform(-self.max_stretch, self.max_stretch) scale_x = np.clip(scale_x, min_scale, None) scale_y = np.clip(scale_y, min_scale, None) if np.random.rand() < self.spatial_aug_prob: # rescale the images img1 = cv2.resize(img1, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR) img2 = cv2.resize(img2, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR) flow = cv2.resize(flow, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR) flow = flow * [scale_x, scale_y] if self.do_flip: if np.random.rand() < self.h_flip_prob: # h-flip img1 = img1[:, ::-1] img2 = img2[:, ::-1] flow = flow[:, ::-1] * [-1.0, 1.0] if np.random.rand() < self.v_flip_prob: # v-flip img1 = img1[::-1, :] img2 = img2[::-1, :] flow = flow[::-1, :] * [1.0, -1.0] y0 = np.random.randint(0, img1.shape[0] - self.crop_size[0]) x0 = np.random.randint(0, img1.shape[1] - self.crop_size[1]) img1 = img1[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]] img2 = img2[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]] flow = flow[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]] return img1, img2, flow def __call__(self, img1, img2, flow): img1, img2 = self.color_transform(img1, img2) img1, img2 = self.eraser_transform(img1, img2) img1, img2, flow = self.spatial_transform(img1, img2, flow) img1 = np.ascontiguousarray(img1) img2 = np.ascontiguousarray(img2) flow = np.ascontiguousarray(flow) return img1, img2, flow class SparseFlowAugmentor: def __init__(self, crop_size, min_scale=-0.2, max_scale=0.5, do_flip=False): # spatial augmentation params self.crop_size = crop_size self.min_scale = min_scale self.max_scale = max_scale self.spatial_aug_prob = 0.8 self.stretch_prob = 0.8 self.max_stretch = 0.2 # flip augmentation params self.do_flip = do_flip self.h_flip_prob = 0.5 self.v_flip_prob = 0.1 # photometric augmentation params self.photo_aug = ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.3/3.14) self.asymmetric_color_aug_prob = 0.2 self.eraser_aug_prob = 0.5 def color_transform(self, img1, img2): image_stack = np.concatenate([img1, img2], axis=0) image_stack = np.array(self.photo_aug(Image.fromarray(image_stack)), dtype=np.uint8) img1, img2 = np.split(image_stack, 2, axis=0) return img1, img2 def eraser_transform(self, img1, img2): ht, wd = img1.shape[:2] if np.random.rand() < self.eraser_aug_prob: mean_color = np.mean(img2.reshape(-1, 3), axis=0) for _ in range(np.random.randint(1, 3)): x0 = np.random.randint(0, wd) y0 = np.random.randint(0, ht) dx = np.random.randint(50, 100) dy = np.random.randint(50, 100) img2[y0:y0+dy, x0:x0+dx, :] = mean_color return img1, img2 def resize_sparse_flow_map(self, flow, valid, fx=1.0, fy=1.0): ht, wd = flow.shape[:2] coords = np.meshgrid(np.arange(wd), np.arange(ht)) coords = np.stack(coords, axis=-1) coords = coords.reshape(-1, 2).astype(np.float32) flow = flow.reshape(-1, 2).astype(np.float32) valid = valid.reshape(-1).astype(np.float32) coords0 = coords[valid>=1] flow0 = flow[valid>=1] ht1 = int(round(ht * fy)) wd1 = int(round(wd * fx)) coords1 = coords0 * [fx, fy] flow1 = flow0 * [fx, fy] xx = np.round(coords1[:,0]).astype(np.int32) yy = np.round(coords1[:,1]).astype(np.int32) v = (xx > 0) & (xx < wd1) & (yy > 0) & (yy < ht1) xx = xx[v] yy = yy[v] flow1 = flow1[v] flow_img = np.zeros([ht1, wd1, 2], dtype=np.float32) valid_img = np.zeros([ht1, wd1], dtype=np.int32) flow_img[yy, xx] = flow1 valid_img[yy, xx] = 1 return flow_img, valid_img def spatial_transform(self, img1, img2, flow, valid): # randomly sample scale ht, wd = img1.shape[:2] min_scale = np.maximum( (self.crop_size[0] + 1) / float(ht), (self.crop_size[1] + 1) / float(wd)) scale = 2 ** np.random.uniform(self.min_scale, self.max_scale) scale_x = np.clip(scale, min_scale, None) scale_y = np.clip(scale, min_scale, None) if np.random.rand() < self.spatial_aug_prob: # rescale the images img1 = cv2.resize(img1, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR) img2 = cv2.resize(img2, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR) flow, valid = self.resize_sparse_flow_map(flow, valid, fx=scale_x, fy=scale_y) if self.do_flip: if np.random.rand() < 0.5: # h-flip img1 = img1[:, ::-1] img2 = img2[:, ::-1] flow = flow[:, ::-1] * [-1.0, 1.0] valid = valid[:, ::-1] margin_y = 20 margin_x = 50 y0 = np.random.randint(0, img1.shape[0] - self.crop_size[0] + margin_y) x0 = np.random.randint(-margin_x, img1.shape[1] - self.crop_size[1] + margin_x) y0 = np.clip(y0, 0, img1.shape[0] - self.crop_size[0]) x0 = np.clip(x0, 0, img1.shape[1] - self.crop_size[1]) img1 = img1[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]] img2 = img2[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]] flow = flow[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]] valid = valid[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]] return img1, img2, flow, valid def __call__(self, img1, img2, flow, valid): img1, img2 = self.color_transform(img1, img2) img1, img2 = self.eraser_transform(img1, img2) img1, img2, flow, valid = self.spatial_transform(img1, img2, flow, valid) img1 = np.ascontiguousarray(img1) img2 = np.ascontiguousarray(img2) flow = np.ascontiguousarray(flow) valid = np.ascontiguousarray(valid) return img1, img2, flow, valid ================================================ FILE: thirdparty/raft/utils/flow_viz.py ================================================ # Flow visualization code used from https://github.com/tomrunia/OpticalFlow_Visualization # MIT License # # Copyright (c) 2018 Tom Runia # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to conditions. # # Author: Tom Runia # Date Created: 2018-08-03 import numpy as np def make_colorwheel(): """ Generates a color wheel for optical flow visualization as presented in: Baker et al. "A Database and Evaluation Methodology for Optical Flow" (ICCV, 2007) URL: http://vision.middlebury.edu/flow/flowEval-iccv07.pdf Code follows the original C++ source code of Daniel Scharstein. Code follows the the Matlab source code of Deqing Sun. Returns: np.ndarray: Color wheel """ RY = 15 YG = 6 GC = 4 CB = 11 BM = 13 MR = 6 ncols = RY + YG + GC + CB + BM + MR colorwheel = np.zeros((ncols, 3)) col = 0 # RY colorwheel[0:RY, 0] = 255 colorwheel[0:RY, 1] = np.floor(255*np.arange(0,RY)/RY) col = col+RY # YG colorwheel[col:col+YG, 0] = 255 - np.floor(255*np.arange(0,YG)/YG) colorwheel[col:col+YG, 1] = 255 col = col+YG # GC colorwheel[col:col+GC, 1] = 255 colorwheel[col:col+GC, 2] = np.floor(255*np.arange(0,GC)/GC) col = col+GC # CB colorwheel[col:col+CB, 1] = 255 - np.floor(255*np.arange(CB)/CB) colorwheel[col:col+CB, 2] = 255 col = col+CB # BM colorwheel[col:col+BM, 2] = 255 colorwheel[col:col+BM, 0] = np.floor(255*np.arange(0,BM)/BM) col = col+BM # MR colorwheel[col:col+MR, 2] = 255 - np.floor(255*np.arange(MR)/MR) colorwheel[col:col+MR, 0] = 255 return colorwheel def flow_uv_to_colors(u, v, convert_to_bgr=False): """ Applies the flow color wheel to (possibly clipped) flow components u and v. According to the C++ source code of Daniel Scharstein According to the Matlab source code of Deqing Sun Args: u (np.ndarray): Input horizontal flow of shape [H,W] v (np.ndarray): Input vertical flow of shape [H,W] convert_to_bgr (bool, optional): Convert output image to BGR. Defaults to False. Returns: np.ndarray: Flow visualization image of shape [H,W,3] """ flow_image = np.zeros((u.shape[0], u.shape[1], 3), np.uint8) colorwheel = make_colorwheel() # shape [55x3] ncols = colorwheel.shape[0] rad = np.sqrt(np.square(u) + np.square(v)) a = np.arctan2(-v, -u)/np.pi fk = (a+1) / 2*(ncols-1) k0 = np.floor(fk).astype(np.int32) k1 = k0 + 1 k1[k1 == ncols] = 0 f = fk - k0 for i in range(colorwheel.shape[1]): tmp = colorwheel[:,i] col0 = tmp[k0] / 255.0 col1 = tmp[k1] / 255.0 col = (1-f)*col0 + f*col1 idx = (rad <= 1) col[idx] = 1 - rad[idx] * (1-col[idx]) col[~idx] = col[~idx] * 0.75 # out of range # Note the 2-i => BGR instead of RGB ch_idx = 2-i if convert_to_bgr else i flow_image[:,:,ch_idx] = np.floor(255 * col) return flow_image def flow_to_image(flow_uv, clip_flow=None, convert_to_bgr=False): """ Expects a two dimensional flow image of shape. Args: flow_uv (np.ndarray): Flow UV image of shape [H,W,2] clip_flow (float, optional): Clip maximum of flow values. Defaults to None. convert_to_bgr (bool, optional): Convert output image to BGR. Defaults to False. Returns: np.ndarray: Flow visualization image of shape [H,W,3] """ assert flow_uv.ndim == 3, 'input flow must have three dimensions' assert flow_uv.shape[2] == 2, 'input flow must have shape [H,W,2]' if clip_flow is not None: flow_uv = np.clip(flow_uv, 0, clip_flow) u = flow_uv[:,:,0] v = flow_uv[:,:,1] rad = np.sqrt(np.square(u) + np.square(v)) rad_max = np.max(rad) epsilon = 1e-5 u = u / (rad_max + epsilon) v = v / (rad_max + epsilon) return flow_uv_to_colors(u, v, convert_to_bgr) ================================================ FILE: thirdparty/raft/utils/frame_utils.py ================================================ import numpy as np from PIL import Image from os.path import * import re import cv2 cv2.setNumThreads(0) cv2.ocl.setUseOpenCL(False) TAG_CHAR = np.array([202021.25], np.float32) def readFlow(fn): """ Read .flo file in Middlebury format""" # Code adapted from: # http://stackoverflow.com/questions/28013200/reading-middlebury-flow-files-with-python-bytes-array-numpy # WARNING: this will work on little-endian architectures (eg Intel x86) only! # print 'fn = %s'%(fn) with open(fn, 'rb') as f: magic = np.fromfile(f, np.float32, count=1) if 202021.25 != magic: print('Magic number incorrect. Invalid .flo file') return None else: w = np.fromfile(f, np.int32, count=1) h = np.fromfile(f, np.int32, count=1) # print 'Reading %d x %d flo file\n' % (w, h) data = np.fromfile(f, np.float32, count=2*int(w)*int(h)) # Reshape data into 3D array (columns, rows, bands) # The reshape here is for visualization, the original code is (w,h,2) return np.resize(data, (int(h), int(w), 2)) def readPFM(file): file = open(file, 'rb') color = None width = None height = None scale = None endian = None header = file.readline().rstrip() if header == b'PF': color = True elif header == b'Pf': color = False else: raise Exception('Not a PFM file.') dim_match = re.match(rb'^(\d+)\s(\d+)\s$', file.readline()) if dim_match: width, height = map(int, dim_match.groups()) else: raise Exception('Malformed PFM header.') scale = float(file.readline().rstrip()) if scale < 0: # little-endian endian = '<' scale = -scale else: endian = '>' # big-endian data = np.fromfile(file, endian + 'f') shape = (height, width, 3) if color else (height, width) data = np.reshape(data, shape) data = np.flipud(data) return data def writeFlow(filename,uv,v=None): """ Write optical flow to file. If v is None, uv is assumed to contain both u and v channels, stacked in depth. Original code by Deqing Sun, adapted from Daniel Scharstein. """ nBands = 2 if v is None: assert(uv.ndim == 3) assert(uv.shape[2] == 2) u = uv[:,:,0] v = uv[:,:,1] else: u = uv assert(u.shape == v.shape) height,width = u.shape f = open(filename,'wb') # write the header f.write(TAG_CHAR) np.array(width).astype(np.int32).tofile(f) np.array(height).astype(np.int32).tofile(f) # arrange into matrix form tmp = np.zeros((height, width*nBands)) tmp[:,np.arange(width)*2] = u tmp[:,np.arange(width)*2 + 1] = v tmp.astype(np.float32).tofile(f) f.close() def readFlowKITTI(filename): flow = cv2.imread(filename, cv2.IMREAD_ANYDEPTH|cv2.IMREAD_COLOR) flow = flow[:,:,::-1].astype(np.float32) flow, valid = flow[:, :, :2], flow[:, :, 2] flow = (flow - 2**15) / 64.0 return flow, valid def readDispKITTI(filename): disp = cv2.imread(filename, cv2.IMREAD_ANYDEPTH) / 256.0 valid = disp > 0.0 flow = np.stack([-disp, np.zeros_like(disp)], -1) return flow, valid def writeFlowKITTI(filename, uv): uv = 64.0 * uv + 2**15 valid = np.ones([uv.shape[0], uv.shape[1], 1]) uv = np.concatenate([uv, valid], axis=-1).astype(np.uint16) cv2.imwrite(filename, uv[..., ::-1]) def read_gen(file_name, pil=False): ext = splitext(file_name)[-1] if ext == '.png' or ext == '.jpeg' or ext == '.ppm' or ext == '.jpg': return Image.open(file_name) elif ext == '.bin' or ext == '.raw': return np.load(file_name) elif ext == '.flo': return readFlow(file_name).astype(np.float32) elif ext == '.pfm': flow = readPFM(file_name).astype(np.float32) if len(flow.shape) == 2: return flow else: return flow[:, :, :-1] return [] ================================================ FILE: thirdparty/raft/utils/utils.py ================================================ import torch import torch.nn.functional as F import numpy as np from scipy import interpolate class InputPadder: """ Pads images such that dimensions are divisible by 8 """ def __init__(self, dims, mode='sintel'): self.ht, self.wd = dims[-2:] pad_ht = (((self.ht // 8) + 1) * 8 - self.ht) % 8 pad_wd = (((self.wd // 8) + 1) * 8 - self.wd) % 8 if mode == 'sintel': self._pad = [pad_wd//2, pad_wd - pad_wd//2, pad_ht//2, pad_ht - pad_ht//2] else: self._pad = [pad_wd//2, pad_wd - pad_wd//2, 0, pad_ht] def pad(self, *inputs): return [F.pad(x, self._pad, mode='replicate') for x in inputs] def unpad(self,x): ht, wd = x.shape[-2:] c = [self._pad[2], ht-self._pad[3], self._pad[0], wd-self._pad[1]] return x[..., c[0]:c[1], c[2]:c[3]] def forward_interpolate(flow): flow = flow.detach().cpu().numpy() dx, dy = flow[0], flow[1] ht, wd = dx.shape x0, y0 = np.meshgrid(np.arange(wd), np.arange(ht)) x1 = x0 + dx y1 = y0 + dy x1 = x1.reshape(-1) y1 = y1.reshape(-1) dx = dx.reshape(-1) dy = dy.reshape(-1) valid = (x1 > 0) & (x1 < wd) & (y1 > 0) & (y1 < ht) x1 = x1[valid] y1 = y1[valid] dx = dx[valid] dy = dy[valid] flow_x = interpolate.griddata( (x1, y1), dx, (x0, y0), method='nearest', fill_value=0) flow_y = interpolate.griddata( (x1, y1), dy, (x0, y0), method='nearest', fill_value=0) flow = np.stack([flow_x, flow_y], axis=0) return torch.from_numpy(flow).float() def bilinear_sampler(img, coords, mode='bilinear', mask=False): """ Wrapper for grid_sample, uses pixel coordinates """ H, W = img.shape[-2:] xgrid, ygrid = coords.split([1,1], dim=-1) xgrid = 2*xgrid/(W-1) - 1 ygrid = 2*ygrid/(H-1) - 1 grid = torch.cat([xgrid, ygrid], dim=-1) img = F.grid_sample(img, grid, align_corners=True) if mask: mask = (xgrid > -1) & (ygrid > -1) & (xgrid < 1) & (ygrid < 1) return img, mask.float() return img def coords_grid(batch, ht, wd): coords = torch.meshgrid(torch.arange(ht), torch.arange(wd)) coords = torch.stack(coords[::-1], dim=0).float() return coords[None].repeat(batch, 1, 1, 1) def upflow8(flow, mode='bilinear'): new_size = (8 * flow.shape[2], 8 * flow.shape[3]) return 8 * F.interpolate(flow, size=new_size, mode=mode, align_corners=True) def upflow(flow, mode='bilinear', scale=8): new_size = (scale * flow.shape[2], scale * flow.shape[3]) return scale * F.interpolate(flow, size=new_size, mode=mode, align_corners=True) ================================================ FILE: thirdparty/vsd/inout.py ================================================ # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) # Center for Machine Perception, Czech Technical University in Prague import struct import itertools import numpy as np import scipy.misc def load_depth(path): d = scipy.misc.imread(path) d = d.astype(np.float32) return d def load_ply(path): """ Loads a 3D mesh model from a PLY file. :param path: Path to a PLY file. :return: The loaded model given by a dictionary with items: 'pts' (nx3 ndarray), 'normals' (nx3 ndarray), 'colors' (nx3 ndarray), 'faces' (mx3 ndarray) - the latter three are optional. """ f = open(path, 'r') n_pts = 0 n_faces = 0 face_n_corners = 3 # Only triangular faces are supported pt_props = [] face_props = [] is_binary = False header_vertex_section = False header_face_section = False # Read header while True: line = f.readline().rstrip('\n').rstrip('\r') # Strip the newline character(s) if line.startswith('element vertex'): n_pts = int(line.split()[-1]) header_vertex_section = True header_face_section = False elif line.startswith('element face'): n_faces = int(line.split()[-1]) header_vertex_section = False header_face_section = True elif line.startswith('element'): # Some other element header_vertex_section = False header_face_section = False elif line.startswith('property') and header_vertex_section: # (name of the property, data type) pt_props.append((line.split()[-1], line.split()[-2])) elif line.startswith('property list') and header_face_section: elems = line.split() if elems[-1] == 'vertex_indices': # (name of the property, data type) face_props.append(('n_corners', elems[2])) for i in range(face_n_corners): face_props.append(('ind_' + str(i), elems[3])) else: print(('Warning: Not supported face property: ' + elems[-1])) elif line.startswith('format'): if 'binary' in line: is_binary = True elif line.startswith('end_header'): break # Prepare data structures model = {} model['pts'] = np.zeros((n_pts, 3), np.float) if n_faces > 0: model['faces'] = np.zeros((n_faces, face_n_corners), np.float) pt_props_names = [p[0] for p in pt_props] is_normal = False if {'nx', 'ny', 'nz'}.issubset(set(pt_props_names)): is_normal = True model['normals'] = np.zeros((n_pts, 3), np.float) is_color = False if {'red', 'green', 'blue'}.issubset(set(pt_props_names)): is_color = True model['colors'] = np.zeros((n_pts, 3), np.float) is_texture = False if {'texture_u', 'texture_v'}.issubset(set(pt_props_names)): is_texture = True model['texture_uv'] = np.zeros((n_pts, 2), np.float) formats = { # For binary format 'float': ('f', 4), 'double': ('d', 8), 'int': ('i', 4), 'uchar': ('B', 1) } # Load vertices for pt_id in range(n_pts): prop_vals = {} load_props = ['x', 'y', 'z', 'nx', 'ny', 'nz', 'red', 'green', 'blue', 'texture_u', 'texture_v'] if is_binary: for prop in pt_props: format = formats[prop[1]] val = struct.unpack(format[0], f.read(format[1]))[0] if prop[0] in load_props: prop_vals[prop[0]] = val else: elems = f.readline().rstrip('\n').rstrip('\r').split() for prop_id, prop in enumerate(pt_props): if prop[0] in load_props: prop_vals[prop[0]] = elems[prop_id] model['pts'][pt_id, 0] = float(prop_vals['x']) model['pts'][pt_id, 1] = float(prop_vals['y']) model['pts'][pt_id, 2] = float(prop_vals['z']) if is_normal: model['normals'][pt_id, 0] = float(prop_vals['nx']) model['normals'][pt_id, 1] = float(prop_vals['ny']) model['normals'][pt_id, 2] = float(prop_vals['nz']) if is_color: model['colors'][pt_id, 0] = float(prop_vals['red']) model['colors'][pt_id, 1] = float(prop_vals['green']) model['colors'][pt_id, 2] = float(prop_vals['blue']) if is_texture: model['texture_uv'][pt_id, 0] = float(prop_vals['texture_u']) model['texture_uv'][pt_id, 1] = float(prop_vals['texture_v']) # Load faces for face_id in range(n_faces): prop_vals = {} if is_binary: for prop in face_props: format = formats[prop[1]] val = struct.unpack(format[0], f.read(format[1]))[0] if prop[0] == 'n_corners': if val != face_n_corners: print('Error: Only triangular faces are supported.') print(('Number of face corners: ' + str(val))) exit(-1) else: prop_vals[prop[0]] = val else: elems = f.readline().rstrip('\n').rstrip('\r').split() for prop_id, prop in enumerate(face_props): if prop[0] == 'n_corners': if int(elems[prop_id]) != face_n_corners: print('Error: Only triangular faces are supported.') print(('Number of face corners: ' + str(int(elems[prop_id])))) exit(-1) else: prop_vals[prop[0]] = elems[prop_id] model['faces'][face_id, 0] = int(prop_vals['ind_0']) model['faces'][face_id, 1] = int(prop_vals['ind_1']) model['faces'][face_id, 2] = int(prop_vals['ind_2']) f.close() return model ================================================ FILE: tools/eval.py ================================================ #CERTIFICATED import torch import numpy as np import tensorboard from pathlib import Path import json import random import re import torch.backends.cudnn as cudnn import torch.multiprocessing as mp import time import fire import torch.distributed as dist import os from collections import defaultdict import ast import flow_vis import copy from utils.progress_bar import ProgressBar from utils.log_tool import SimpleModelLog from data.preprocess import merge_batch, get_dataloader, get_dataloader_deepim # merge_second_batch_multigpu from utils.config_io import merge_cfg, save_cfg import torchplus from builder import ( dataset_builder, input_reader_builder, lr_scheduler_builder, optimizer_builder, rnnpose_builder ) from utils.distributed_utils import dist_init, average_gradients, DistModule, ParallelWrapper, DistributedSequatialSampler, DistributedGivenIterationSampler, DistributedGivenIterationSamplerEpoch from utils.util import modify_parameter_name_with_map from utils.eval_metric import * from config.default import get_cfg GLOBAL_GPUS_PER_DEVICE = 1 GLOBAL_STEP = 0 RANK=-1 WORLD_SIZE=-1 def load_example_to_device(example, device=None) -> dict: example_torch = {} for k, v in example.items(): if k in ['idx', 'class_name']: example_torch[k]=v continue if type(v) == list: example_torch[k] = [item.to(device=device) for item in v] else: example_torch[k] = v.to(device=device) return example_torch def build_network(model_cfg, measure_time=False, testing=False): net = rnnpose_builder.build( model_cfg, measure_time=measure_time, testing=testing) return net def _worker_init_fn(worker_id): global GLOBAL_STEP time_seed = GLOBAL_STEP np.random.seed(time_seed + worker_id) print(f"WORKER {worker_id} seed:", np.random.get_state()[1][0]) def freeze_params(params: dict, include: str = None, exclude: str = None): assert isinstance(params, dict) include_re = None if include is not None: include_re = re.compile(include) exclude_re = None if exclude is not None: exclude_re = re.compile(exclude) remain_params = [] for k, p in params.items(): if include_re is not None: if include_re.match(k) is not None: continue if exclude_re is not None: if exclude_re.match(k) is None: continue remain_params.append(p) return remain_params def freeze_params_v2(params: dict, include: str = None, exclude: str = None): assert isinstance(params, dict) include_re = None if include is not None: include_re = re.compile(include) exclude_re = None if exclude is not None: exclude_re = re.compile(exclude) for k, p in params.items(): if include_re is not None: if include_re.match(k) is not None: p.requires_grad = False if exclude_re is not None: if exclude_re.match(k) is None: p.requires_grad = False def filter_param_dict(state_dict: dict, include: str = None, exclude: str = None): assert isinstance(state_dict, dict) include_re = None if include is not None: include_re = re.compile(include) exclude_re = None if exclude is not None: exclude_re = re.compile(exclude) res_dict = {} for k, p in state_dict.items(): if include_re is not None: if include_re.match(k) is None: continue if exclude_re is not None: if exclude_re.match(k) is not None: continue res_dict[k] = p return res_dict def chk_rank(rank_, use_dist=False): if not use_dist: return True global RANK if RANK<0: RANK=dist.get_rank() cur_rank = RANK#dist.get_rank() # self.world_size = dist.get_world_size() return cur_rank == rank_ def get_rank(use_dist=False): if not use_dist: return 0 else: # return dist.get_rank() global RANK if RANK<0: RANK=dist.get_rank() return RANK def get_world(use_dist): if not use_dist: return 1 else: global WORLD_SIZE if WORLD_SIZE<0: WORLD_SIZE=dist.get_world_size() return WORLD_SIZE #dist.get_world_size() def get_ngpus_per_node(): global GLOBAL_GPUS_PER_DEVICE return GLOBAL_GPUS_PER_DEVICE def multi_proc_train( config_path, model_dir, use_apex, world_size, result_path=None, create_folder=False, display_step=50, summary_step=5, pretrained_path=None, pretrained_include=None, pretrained_exclude=None, pretrained_param_map=None, freeze_include=None, freeze_exclude=None, measure_time=False, resume=False, use_dist=False, gpus_per_node=1, start_gpu_id=0, optim_eval=False, seed=7, dist_port="23335", force_resume_step=None, batch_size=None, apex_opt_level='O0' ): params = { "config_path": config_path, "model_dir": model_dir, "use_apex": use_apex, "result_path": result_path, "create_folder": create_folder, "display_step": display_step, "summary_step": summary_step, "pretrained_path": pretrained_path, "pretrained_include": pretrained_include, "pretrained_exclude": pretrained_exclude, "pretrained_param_map": pretrained_param_map, "freeze_include": freeze_include, "freeze_exclude": freeze_exclude, # "multi_gpu": multi_gpu, "measure_time": measure_time, "resume": resume, "use_dist": use_dist, "gpus_per_node": gpus_per_node, "optim_eval": optim_eval, "seed": seed, "dist_port": dist_port, "world_size": world_size, "force_resume_step":force_resume_step, "batch_size": batch_size, "apex_opt_level":apex_opt_level } from types import SimpleNamespace params = SimpleNamespace(**params) os.environ["CUDA_VISIBLE_DEVICES"] = ','.join( str(x) for x in range(start_gpu_id, start_gpu_id+gpus_per_node)) print(f"CUDA_VISIBLE_DEVICES={os.environ['CUDA_VISIBLE_DEVICES']}" ) mp.spawn(train_worker, nprocs=gpus_per_node, args=( params,) ) def train_worker(rank, params): global RANK, WORLD_SIZE RANK = rank WORLD_SIZE=params.world_size eval(config_path=params.config_path, model_dir=params.model_dir, use_apex=params.use_apex, result_path=params.result_path, create_folder=params.create_folder, display_step=params.display_step, pretrained_path=params.pretrained_path, pretrained_include=params.pretrained_include, pretrained_exclude=params.pretrained_exclude, pretrained_param_map=params.pretrained_param_map, freeze_include=params.freeze_include, freeze_exclude=params.freeze_exclude, measure_time=params.measure_time, resume=params.resume, use_dist=params.use_dist, dist_port=params.dist_port, gpus_per_node=params.gpus_per_node, optim_eval=params.optim_eval, seed=params.seed, force_resume_step=params.force_resume_step, batch_size = params.batch_size, apex_opt_level=params.apex_opt_level ) def eval( config_path, model_dir, use_apex, result_path=None, create_folder=False, display_step=50, summary_step=5, pretrained_path=None, pretrained_include=None, pretrained_exclude=None, pretrained_param_map=None, freeze_include=None, freeze_exclude=None, multi_gpu=False, measure_time=False, resume=False, use_dist=False, dist_port="23335", gpus_per_node=1, optim_eval=False, seed=7, force_resume_step=None, batch_size=None, apex_opt_level='O0', verbose=False ): """train a VoxelNet model specified by a config file. """ print("force_resume_step:", force_resume_step) print("torch.cuda.is_available()=", torch.cuda.is_available()) print("torch.version.cuda=",torch.version.cuda) dist_url=f"tcp://127.0.0.1:{dist_port}" print(f"dist_url={dist_url}", flush=True) global RANK, WORLD_SIZE # RANK, WORLD_SIZE=rank, world_size if RANK<0: RANK=0 if WORLD_SIZE<0: WORLD_SIZE=1 global GLOBAL_GPUS_PER_DEVICE GLOBAL_GPUS_PER_DEVICE = gpus_per_node ######################################## initialize the distributed env ######################################### if use_dist: if use_apex: dist.init_process_group( backend="nccl", init_method=dist_url, world_size=get_world(use_dist), rank=get_rank(use_dist)) else: # rank, world_size = dist_init(str(dist_port)) dist.init_process_group( backend="nccl", init_method=dist_url, world_size=get_world(use_dist), rank=get_rank(use_dist)) print(get_rank(use_dist)%GLOBAL_GPUS_PER_DEVICE, flush=True) #set cuda device number torch.cuda.set_device(get_rank(use_dist)%GLOBAL_GPUS_PER_DEVICE) ############################################ create folders ############################################ print(f"Set seed={seed}", flush=True) random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) model_dir = str(Path(model_dir).resolve()) model_dir = Path(model_dir) if chk_rank(0, use_dist): if not resume and model_dir.exists(): raise ValueError("model dir exists and you don't specify resume.") print("Warning: model dir exists and you don't specify resume.") model_dir.mkdir(parents=True, exist_ok=True) if result_path is None: result_path = model_dir / 'results' config_file_bkp = "pipeline.config" ############################################# read config proto ############################################ config = merge_cfg( [config_path], intersection=True) if chk_rank(0, use_dist): print(json.dumps(config, indent=4)) if chk_rank(0, use_dist): # save_cfg([default_config_path, custom_config_path], save_cfg([config_path, config_path], str(model_dir / config_file_bkp)) #update the global config object get_cfg().merge(config.get("BASIC",{}),"BASIC" ) input_cfg = config.train_input_reader eval_input_cfg = config.eval_input_reader model_cfg = config.model train_cfg = config.train_config optimizer_cfg = train_cfg.optimizer loss_scale = train_cfg.loss_scale_factor ############################################# Update default options ############################################ if batch_size is not None: input_cfg.batch_size = batch_size eval_input_cfg.batch_size = batch_size print(input_cfg.batch_size) ############################################# build network, optimizer etc. ############################################ #dummy dataset dataset_tmp = input_reader_builder.build( eval_input_cfg, training=False, ) model_cfg.obj_seqs = copy.copy(dataset_tmp._dataset.infos["seqs"]) net = build_network(model_cfg, measure_time) # .to(device) net.cuda() fastai_optimizer = optimizer_builder.build( optimizer_cfg, net, mixed=False, loss_scale=loss_scale) print("# parameters:", len(list(net.parameters()))) ############################################# load pretrained model ############################################ if pretrained_path is not None: model_dict = net.state_dict() pretrained_dict = torch.load(pretrained_path, map_location='cpu') if verbose: print("Pretrained keys:", pretrained_dict.keys()) print("Model keys:", model_dict.keys()) pretrained_dict = filter_param_dict( pretrained_dict, pretrained_include, pretrained_exclude) pretrained_dict = modify_parameter_name_with_map( pretrained_dict, ast.literal_eval(str(pretrained_param_map))) new_pretrained_dict = {} for k, v in pretrained_dict.items(): if k in model_dict and v.shape == model_dict[k].shape: new_pretrained_dict[k] = v else: print("Fail to load:", k ) model_dict.update(new_pretrained_dict) net.load_state_dict(model_dict) freeze_params_v2(dict(net.named_parameters()), freeze_include, freeze_exclude) net.clear_global_step() # net.clear_metrics() del pretrained_dict else: ############################################# try to resume from the latest chkpt ############################################ torchplus.train.try_restore_latest_checkpoints(model_dir, [net]) # torchplus.train.try_restore_latest_checkpoints(model_dir, # [fastai_optimizer]) ######################################## parallel the network ######################################### if use_dist: if use_apex: import apex net, amp_optimizer = apex.amp.initialize(net.cuda( ), fastai_optimizer, opt_level="O0", keep_batchnorm_fp32=None, loss_scale=None) net_parallel = apex.parallel.DistributedDataParallel(net) else: # net_parallel = ParallelWrapper(net.cuda(), 'dist') # amp_optimizer = fastai_optimizer amp_optimizer = optimizer_builder.build( optimizer_cfg, net, mixed=False, loss_scale=loss_scale) net_parallel = torch.nn.parallel.DistributedDataParallel(net, device_ids=[get_rank(use_dist)], output_device=get_rank(use_dist) ,find_unused_parameters=True) else: net_parallel = net.cuda() ############################################# build lr_scheduler ############################################ lr_scheduler = lr_scheduler_builder.build(optimizer_cfg, amp_optimizer, train_cfg.steps) float_dtype = torch.float32 ######################################## build dataloaders ######################################### if use_dist: num_gpu = 1 collate_fn = merge_batch else: raise NotImplementedError print(f"MULTI-GPU: using {num_gpu} GPU(s)") ###################### # PREPARE INPUT ###################### dataset = input_reader_builder.build( input_cfg, training=True, ) eval_dataset = input_reader_builder.build( eval_input_cfg, training=False, ) if use_dist: train_sampler = DistributedGivenIterationSamplerEpoch( dataset, train_cfg.steps, input_cfg.batch_size, last_iter=net.get_global_step()-1, review_cycle=-1) # train_sampler=DistributedSequatialSampler(dataset) shuffle = False eval_sampler = DistributedSequatialSampler(eval_dataset) else: train_sampler = None eval_sampler = None eval_train_sampler = None shuffle = True dataloader, neighborhood_limits=get_dataloader_deepim(dataset=dataset, kpconv_config=model_cfg.descriptor_net.keypoints_detector_3d, batch_size=input_cfg.batch_size * num_gpu, shuffle=shuffle, num_workers= 2 ,#input_cfg.preprocess.num_workers * num_gpu, sampler=train_sampler ) eval_dataloader, _ =get_dataloader_deepim(dataset=eval_dataset, kpconv_config=model_cfg.descriptor_net.keypoints_detector_3d, batch_size=eval_input_cfg.batch_size, shuffle=False, num_workers= 2, #eval_input_cfg.preprocess.num_workers, sampler=eval_sampler, neighborhood_limits=neighborhood_limits ) ######################################################################################### # TRAINING ########################################################################################## model_logging = SimpleModelLog(model_dir, disable=get_rank(use_dist) != 0) model_logging.open() start_step = net.get_global_step() total_step = train_cfg.steps t = time.time() steps_per_eval = train_cfg.steps_per_eval amp_optimizer.zero_grad() step = start_step epoch = 0 net_parallel.eval() classes=list(set(eval_dataset.dataset.infos['seqs'])) evaluator = dict([(c,LineMODEvaluator(f"{c}",result_path) ) for c in classes ] ) ang_errs=[] trans_errs=[] try: for example in eval_dataloader: global GLOBAL_STEP GLOBAL_STEP = step lr_scheduler.step(net.get_global_step()) example_torch=load_example_to_device(example, device=torch.device("cuda")) batch_size = example["image"].shape[0] with torch.no_grad(): # t1=time.time() ret_dict = net_parallel(example_torch) # print("time:", time.time()-t1) eval_results=evaluator[example['class_name'][0]].evaluate_rnnpose(ret_dict, example_torch ) ang_errs.append(eval_results['ang_err']) trans_errs.append(eval_results['trans_err']) if step%10 ==0: model_logging.log_metrics({ "ang_err": float(ang_errs[-1]), "trans_err": float(trans_errs[-1]), }, GLOBAL_STEP) model_logging.log_images( { "pc_proj_vis": eval_results['pc_proj_vis'].transpose([2,0,1])[None], # HWC->NCHW "pc_proj_vis_pred": eval_results['pc_proj_vis_pred'].transpose([2,0,1])[None], # HWC->NCHW "syn_img": torch.cat(ret_dict["syn_img"], dim=0).detach().cpu(), # "image": example["image"].cpu(), # "image_f": (example["image"].cpu()+ret_dict["syn_img"][0].detach().cpu())/2, }, GLOBAL_STEP, prefix="") net.update_global_step() metrics = defaultdict(dict) GLOBAL_STEP = net.get_global_step() step += 1 for k in evaluator: print(f"###############Evaluation results of class {k}###############") evaluator[k].summarize() except Exception as e: model_logging.log_text(str(e), step) raise e finally: model_logging.close() if __name__ == '__main__': fire.Fire() ================================================ FILE: tools/generate_data_info_deepim_0_orig.py ================================================ import os import numpy as np import copy import pickle import fire import glob import re from data.linemod import linemod_config def parse_pose_file(file): with open(file) as f: lines = [line.strip() for line in f.readlines()] poses = [] for line in lines: poses.append( np.array([np.float32(l) for l in line.split()], dtype=np.float32).reshape((3, 4)) ) return poses def parse_calib_file(file): info = {} with open(file) as f: lines = [line.strip() for line in f.readlines()] for i, l in enumerate(lines): nums = np.array([np.float32(x) for x in l.split(' ')[1:]], dtype=np.float32) if i < 4: info[f"calib/P{i}"] = nums.reshape((3, 4)) else: info[f"calib/Tr_velo_to_cam"] = nums.reshape((3, 4)) return info # def create_data_info(data_root, saving_path, is_test_data=False): # def create_data_info(data_root, saving_path, data_type='train'): # def create_data_info(data_root, saving_path, training_data_ratio=0.8, shuffle=True, ): def create_data_info(data_root, saving_path, with_assertion=True): """[summary] info structure: { 0:[ { "index": idx, "lidar_bin_path": lidar_bin_paths[idx], "RT": poses[idx], "K": poses[idx], }, { "index": idx, "lidar_bin_path": lidar_bin_paths[idx], "RT": poses[idx], "K": poses[idx], }, } ... ], 1:[ ] ... } """ idx2class = { 1: "ape", 2: "benchvise", # 3: 'bowl', 4: "camera", 5: "can", 6: "cat", # 7: 'cup', 8: "driller", 9: "duck", 10: "eggbox", 11: "glue", 12: "holepuncher", 13: "iron", 14: "lamp", 15: "phone", } class2idx = dict([[idx2class[k],k ] for k in idx2class.keys() ]) seqs=class2idx.keys() observed_dir = os.path.join('', 'data/observed') gt_observed_dir = os.path.join('', 'data/gt_observed') rendered_dir = os.path.join('', 'data/rendered') set_split_dir = os.path.join('','image_set/observed') # max_items_per_seq=10000#8000#100#10000#2000 # create training data res = {} for seq in seqs: res[seq] = [] rgb_orig_dir = os.path.join(observed_dir, f"{class2idx[seq]:02d}") rgb_noisy_rendered_dir = os.path.join(rendered_dir, seq ) depth_orig_dir= os.path.join(observed_dir, f"{class2idx[seq]:02d}") depth_rendered_dir= os.path.join(gt_observed_dir, seq) depth_noisy_rendered_dir= os.path.join(rendered_dir, seq) gt_pose_dir = os.path.join(gt_observed_dir, seq) noisy_pose_dir = os.path.join(rendered_dir, seq) label_dir = os.path.join(observed_dir, f"{class2idx[seq]:02d}" ) rgb_orig_paths = glob.glob(r'{}/*color.png'.format(rgb_orig_dir) ) train_split_file=os.path.join(data_root,set_split_dir, f"{seq}_train.txt") with open(train_split_file, 'r') as f: train_split = f.readlines() train_split = [ int(t.split('/')[-1] ) for t in train_split] rgb_orig_paths.sort(key=lambda s: int(re.split( '\.|_|-' ,os.path.basename(s))[0]) ) NUM_RENDERED=10 for idx in train_split: #original data paths gt_pose=np.loadtxt(os.path.join(data_root,gt_pose_dir, f"{idx:06d}-pose.txt"), skiprows=1).reshape(3,4) rgb_orig = os.path.join(rgb_orig_dir, f"{idx:06d}-color.png" ) depth_orig=os.path.join(depth_orig_dir, f"{idx:06d}-depth.png" ) depth_rendered = os.path.join(depth_rendered_dir, f"{idx:06d}-depth.png") label_orig=os.path.join(label_dir, f"{idx:06d}-label.png" ) #rendered data paths rgb_noisy_rendered = [os.path.join(rgb_noisy_rendered_dir, f"{idx:06d}_{i}-color.png" ) for i in range(NUM_RENDERED) ] depth_noisy_rendered = [os.path.join(depth_noisy_rendered_dir, f"{idx:06d}_{i}-depth.png" ) for i in range(NUM_RENDERED) ] pose_noisy_rendered = [os.path.join(data_root, noisy_pose_dir, f"{idx:06d}_{i}-pose.txt" ) for i in range(NUM_RENDERED) ] pose_noisy_rendered = [np.loadtxt(p, skiprows=1).reshape(3,4) for p in pose_noisy_rendered ] #generate data pairs for noisy_data_idx in range(NUM_RENDERED): if with_assertion: assert os.path.exists(os.path.join(data_root, rgb_orig) ), os.path.join(data_root, rgb_orig) assert os.path.exists(os.path.join(data_root, depth_orig) ), os.path.join(data_root, depth_orig) assert os.path.exists(os.path.join(data_root, label_orig) ), os.path.join(data_root, label_orig) assert os.path.exists(os.path.join(data_root, rgb_noisy_rendered[noisy_data_idx]) ), os.path.join(data_root, rgb_noisy_rendered[noisy_data_idx]) assert os.path.exists(os.path.join(data_root, depth_noisy_rendered[noisy_data_idx] ) ), os.path.join(data_root, depth_noisy_rendered[noisy_data_idx] ) info = { "index": idx, # "rgb_orig_path": rgb_orig, "rgb_observed_path": rgb_orig, "depth_observed_path": depth_orig, "depth_gt_observed_path": depth_rendered, "gt_pose": gt_pose, "rgb_noisy_rendered": rgb_noisy_rendered[noisy_data_idx], "depth_noisy_rendered": depth_noisy_rendered[noisy_data_idx], "pose_noisy_rendered": pose_noisy_rendered[noisy_data_idx], "model_points_path": f"{seq}.bin", #legacy "RT": gt_pose, "K": linemod_config.linemod_K, } res[seq].append(info) print(info['rgb_observed_path'], info['rgb_noisy_rendered']) train_saving_path=saving_path+'.train' with open(train_saving_path, 'wb+') as f: print("Total data amount:", np.sum([len(res[r]) for r in res])) pickle.dump(res, f) # eval_saving_path=saving_path+'.eval' # with open(eval_saving_path, 'wb+') as f: # print("Total data amount:", np.sum([len(test_res[r]) for r in test_res])) # pickle.dump(test_res, f) if __name__ == '__main__': fire.Fire() ================================================ FILE: tools/generate_data_info_deepim_1_syn.py ================================================ import os import numpy as np import copy import pickle import fire import glob import re from data.linemod import linemod_config def parse_pose_file(file): with open(file) as f: lines = [line.strip() for line in f.readlines()] poses = [] for line in lines: poses.append( np.array([np.float32(l) for l in line.split()], dtype=np.float32).reshape((3, 4)) ) return poses def parse_calib_file(file): info = {} with open(file) as f: lines = [line.strip() for line in f.readlines()] for i, l in enumerate(lines): nums = np.array([np.float32(x) for x in l.split(' ')[1:]], dtype=np.float32) if i < 4: info[f"calib/P{i}"] = nums.reshape((3, 4)) else: info[f"calib/Tr_velo_to_cam"] = nums.reshape((3, 4)) return info def create_data_info(data_root, saving_path, with_assertion=False ): """[summary] info structure: { 0:[ { "index": idx, "lidar_bin_path": lidar_bin_paths[idx], "RT": poses[idx], "K": poses[idx], }, { "index": idx, "lidar_bin_path": lidar_bin_paths[idx], "RT": poses[idx], "K": poses[idx], }, } ... ], 1:[ ] ... } """ idx2class = { 1: "ape", 2: "benchvise", # 3: 'bowl', 4: "camera", 5: "can", 6: "cat", # 7: 'cup', 8: "driller", 9: "duck", 10: "eggbox", 11: "glue", 12: "holepuncher", 13: "iron", 14: "lamp", 15: "phone", } class2idx = dict([[idx2class[k],k ] for k in idx2class.keys() ]) seqs=class2idx.keys() observed_dir = os.path.join('', 'data/observed') gt_observed_dir = os.path.join('', 'data/gt_observed') rendered_dir = os.path.join('', 'data/rendered') set_split_dir = os.path.join('','image_set/observed') # create training data res = {} test_res = {} for seq in seqs: res[seq] = [] # rgb_orig_dir = os.path.join(observed_dir, f"{class2idx[seq]:02d}") rgb_orig_dir = os.path.join(observed_dir, seq) rgb_noisy_rendered_dir = os.path.join(rendered_dir, seq ) # depth_orig_dir= os.path.join(observed_dir, f"{class2idx[seq]:02d}") depth_orig_dir= os.path.join(observed_dir, seq) # depth_renderd_dir= os.path.join(gt_observed_dir, seq) depth_rendered_dir= os.path.join(gt_observed_dir, seq) depth_noisy_rendered_dir= os.path.join(rendered_dir, seq) gt_pose_dir = os.path.join(gt_observed_dir, seq) noisy_pose_dir = os.path.join(rendered_dir, seq) label_dir = os.path.join(observed_dir, seq) rgb_orig_paths = glob.glob(r'{}/*color.png'.format(rgb_orig_dir) ) # train_split_file=os.path.join(data_root,set_split_dir, f"{seq}_train.txt") train_split_file=os.path.join(data_root,set_split_dir, f"LM6d_data_syn_train_observed_{seq}.txt") with open(train_split_file, 'r') as f: train_split = f.readlines() train_split = [ int(t.split('/')[-1] ) for t in train_split] rgb_orig_paths.sort(key=lambda s: int(re.split( '\.|_|-' ,os.path.basename(s))[0]) ) # data_num=len(image_paths[:max_items_per_seq] ) # if shuffle: # permute=np.random.permutation(data_num) # else: # permute = np.arange(data_num) # train_split=permute[:int(data_num*training_data_ratio)] # eval_split= permute[int(data_num*training_data_ratio):] # for idx in range(len(image_paths[:max_items_per_seq])): NUM_RENDERED=1 for idx in train_split: #original data paths gt_pose=np.loadtxt(os.path.join(data_root,gt_pose_dir, f"{idx:06d}-pose.txt"), skiprows=1).reshape(3,4) rgb_orig = os.path.join(rgb_orig_dir, f"{idx:06d}-color.png" ) depth_orig=os.path.join(depth_orig_dir, f"{idx:06d}-depth.png" ) label_orig=os.path.join(label_dir, f"{idx:06d}-label.png" ) depth_rendered = os.path.join(depth_rendered_dir, f"{idx:06d}-depth.png") #rendered data paths rgb_noisy_rendered = [os.path.join(rgb_noisy_rendered_dir, f"{seq}_{idx:06d}_{i}-color.png" ) for i in range(NUM_RENDERED) ] depth_noisy_rendered = [os.path.join(depth_noisy_rendered_dir, f"{seq}_{idx:06d}_{i}-depth.png" ) for i in range(NUM_RENDERED) ] pose_noisy_rendered = [os.path.join(data_root, noisy_pose_dir, f"{seq}_{idx:06d}_{i}-pose.txt" ) for i in range(NUM_RENDERED) ] pose_noisy_rendered = [np.loadtxt(p, skiprows=1).reshape(3,4) for p in pose_noisy_rendered ] #generate data pairs for noisy_data_idx in range(NUM_RENDERED): if with_assertion: assert os.path.exists(os.path.join(data_root, rgb_orig) ), os.path.join(data_root, rgb_orig) assert os.path.exists(os.path.join(data_root, depth_orig) ), os.path.join(data_root, depth_orig) assert os.path.exists(os.path.join(data_root, label_orig) ), os.path.join(data_root, label_orig) assert os.path.exists(os.path.join(data_root, rgb_noisy_rendered[noisy_data_idx]) ), os.path.join(data_root, rgb_noisy_rendered[noisy_data_idx]) assert os.path.exists(os.path.join(data_root, depth_noisy_rendered[noisy_data_idx] ) ), os.path.join(data_root, depth_noisy_rendered[noisy_data_idx] ) assert os.path.exists(os.path.join(data_root, depth_rendered) ), os.path.join(data_root, depth_rendered) # assert os.path.exists(os.path.join(data_root, pose_noisy_rendered[noisy_data_idx] ) ), os.path.join(data_root, pose_noisy_rendered[noisy_data_idx] ) info = { "index": idx, # "rgb_orig_path": rgb_orig, "rgb_observed_path": rgb_orig, "depth_observed_path": depth_orig, "depth_gt_observed_path": depth_rendered, "gt_pose": gt_pose, "rgb_noisy_rendered": rgb_noisy_rendered[noisy_data_idx], "depth_noisy_rendered": depth_noisy_rendered[noisy_data_idx], "pose_noisy_rendered": pose_noisy_rendered[noisy_data_idx], "model_points_path": f"{seq}.bin", #legacy "RT": gt_pose, "K": linemod_config.linemod_K, } print(info['rgb_observed_path'], info['rgb_noisy_rendered']) res[seq].append(info) train_saving_path=saving_path+'.train' with open(train_saving_path, 'wb+') as f: print("Total data amount:", np.sum([len(res[r]) for r in res])) pickle.dump(res, f) # eval_saving_path=saving_path+'.eval' # with open(eval_saving_path, 'wb+') as f: # print("Total data amount:", np.sum([len(test_res[r]) for r in test_res])) # pickle.dump(test_res, f) if __name__ == '__main__': fire.Fire() ================================================ FILE: tools/generate_data_info_deepim_2_posecnnval.py ================================================ import os import numpy as np import copy import pickle import fire import glob import re from data.linemod import linemod_config import scipy.io as sio def parse_pose_file(file): with open(file) as f: lines = [line.strip() for line in f.readlines()] poses = [] for line in lines: poses.append( np.array([np.float32(l) for l in line.split()], dtype=np.float32).reshape((3, 4)) ) return poses def parse_calib_file(file): info = {} with open(file) as f: lines = [line.strip() for line in f.readlines()] for i, l in enumerate(lines): nums = np.array([np.float32(x) for x in l.split(' ')[1:]], dtype=np.float32) if i < 4: info[f"calib/P{i}"] = nums.reshape((3, 4)) else: info[f"calib/Tr_velo_to_cam"] = nums.reshape((3, 4)) return info def create_data_info(data_root, saving_path, with_assertion=True): """[summary] info structure: { 0:[ { "index": idx, "lidar_bin_path": lidar_bin_paths[idx], "RT": poses[idx], "K": poses[idx], }, { "index": idx, "lidar_bin_path": lidar_bin_paths[idx], "RT": poses[idx], "K": poses[idx], }, } ... ], 1:[ ] ... } """ # seqs=['cat', 'ape', 'camera', 'duck', 'glue', 'iron', 'phone','benchvise', 'can', 'driller', 'eggbox', 'holepuncher', 'lamp'] idx2class = { 1: "ape", 2: "benchvise", # 3: 'bowl', 4: "camera", 5: "can", 6: "cat", # 7: 'cup', 8: "driller", 9: "duck", 10: "eggbox", 11: "glue", 12: "holepuncher", 13: "iron", 14: "lamp", 15: "phone", } class2idx = dict([[idx2class[k],k ] for k in idx2class.keys() ]) seqs=class2idx.keys() observed_dir = os.path.join('', 'data/observed') gt_observed_dir = os.path.join('', 'data/gt_observed') # rendered_dir = os.path.join('', 'data/rendered') rendered_dir = os.path.join('', 'data/rendered_val_PoseCNN') set_split_dir = os.path.join('','image_set/observed') # max_items_per_seq=10000#8000#100#10000#2000 # create training data res = {} for seq in seqs: res[seq] = [] rgb_orig_dir = os.path.join(observed_dir, f"{class2idx[seq]:02d}") rgb_noisy_rendered_dir = os.path.join(rendered_dir, f"{class2idx[seq]:02d}", seq ) depth_orig_dir= os.path.join(observed_dir, f"{class2idx[seq]:02d}") depth_rendered_dir= os.path.join(gt_observed_dir, seq) depth_noisy_rendered_dir= os.path.join(rendered_dir, f"{class2idx[seq]:02d}", seq) gt_pose_dir = os.path.join(gt_observed_dir, seq) noisy_pose_dir = os.path.join(rendered_dir, f"{class2idx[seq]:02d}" ,seq) label_dir = os.path.join(observed_dir, f"{class2idx[seq]:02d}" ) meta_dir = os.path.join(observed_dir, f"{class2idx[seq]:02d}" ) rgb_orig_paths = glob.glob(r'{}/*color.png'.format(rgb_orig_dir) ) test_split_file=os.path.join(data_root,set_split_dir, f"{seq}_test.txt") with open(test_split_file, 'r') as f: test_split = f.readlines() test_split = [ int(t.split('/')[-1] ) for t in test_split] rgb_orig_paths.sort(key=lambda s: int(re.split( '\.|_|-' ,os.path.basename(s))[0]) ) # for idx in range(len(image_paths[:max_items_per_seq])): NUM_RENDERED=1 for idx in test_split: #original data paths gt_pose=np.loadtxt(os.path.join(data_root,gt_pose_dir, f"{idx:06d}-pose.txt"), skiprows=1).reshape(3,4) rgb_orig = os.path.join(rgb_orig_dir, f"{idx:06d}-color.png" ) depth_orig=os.path.join(depth_orig_dir, f"{idx:06d}-depth.png" ) depth_rendered = os.path.join(depth_rendered_dir, f"{idx:06d}-depth.png") label_orig=os.path.join(label_dir, f"{idx:06d}-label.png" ) meta_path=os.path.join(meta_dir, f"{idx:06d}-meta.mat") #rendered data paths rgb_noisy_rendered = [os.path.join(rgb_noisy_rendered_dir, f"{seq}_{idx:06d}_{i}-color.png" ) for i in range(NUM_RENDERED) ] depth_noisy_rendered = [os.path.join(depth_noisy_rendered_dir, f"{seq}_{idx:06d}_{i}-depth.png" ) for i in range(NUM_RENDERED) ] pose_noisy_rendered = [os.path.join(data_root, noisy_pose_dir, f"{seq}_{idx:06d}_{i}-pose.txt" ) for i in range(NUM_RENDERED) ] pose_noisy_rendered = [np.loadtxt(p, skiprows=1).reshape(3,4) for p in pose_noisy_rendered ] # meta_data=sio.loadmat(meta_path) # assert meta_data["boxes"].shape[0] == 1 #generate data pairs for noisy_data_idx in range(NUM_RENDERED): if with_assertion: assert os.path.exists(os.path.join(data_root, rgb_orig) ), os.path.join(data_root, rgb_orig) assert os.path.exists(os.path.join(data_root, depth_orig) ), os.path.join(data_root, depth_orig) assert os.path.exists(os.path.join(data_root, label_orig) ), os.path.join(data_root, label_orig) assert os.path.exists(os.path.join(data_root, rgb_noisy_rendered[noisy_data_idx]) ), os.path.join(data_root, rgb_noisy_rendered[noisy_data_idx]) assert os.path.exists(os.path.join(data_root, depth_noisy_rendered[noisy_data_idx] ) ), os.path.join(data_root, depth_noisy_rendered[noisy_data_idx] ) info = { "index": idx, # "rgb_orig_path": rgb_orig, "rgb_observed_path": rgb_orig, "depth_observed_path": depth_orig, "depth_gt_observed_path": depth_rendered, "gt_pose": gt_pose, "rgb_noisy_rendered": rgb_noisy_rendered[noisy_data_idx], "depth_noisy_rendered": depth_noisy_rendered[noisy_data_idx], "pose_noisy_rendered": pose_noisy_rendered[noisy_data_idx], "model_points_path": f"{seq}.bin", #legacy "RT": gt_pose, "K": linemod_config.linemod_K, # "boxes": meta_data["boxes"] } res[seq].append(info) print(info['rgb_observed_path'], info['rgb_noisy_rendered']) # train_saving_path=saving_path+'.train' train_saving_path=saving_path+'.eval' with open(train_saving_path, 'wb+') as f: print("Total data amount:", np.sum([len(res[r]) for r in res])) pickle.dump(res, f) # eval_saving_path=saving_path+'.eval' # with open(eval_saving_path, 'wb+') as f: # print("Total data amount:", np.sum([len(test_res[r]) for r in test_res])) # pickle.dump(test_res, f) if __name__ == '__main__': fire.Fire() ================================================ FILE: tools/generate_data_info_v2_deepim.py ================================================ #The version compatible with deepim import os import numpy as np import copy import pickle import fire import glob import re def parse_pose_file(file): with open(file) as f: lines = [line.strip() for line in f.readlines()] poses = [] for line in lines: poses.append( np.array([np.float32(l) for l in line.split()], dtype=np.float32).reshape((3, 4)) ) return poses def parse_calib_file(file): info = {} with open(file) as f: lines = [line.strip() for line in f.readlines()] for i, l in enumerate(lines): nums = np.array([np.float32(x) for x in l.split(' ')[1:]], dtype=np.float32) if i < 4: info[f"calib/P{i}"] = nums.reshape((3, 4)) else: info[f"calib/Tr_velo_to_cam"] = nums.reshape((3, 4)) return info # def create_data_info(data_root, saving_path, is_test_data=False): # def create_data_info(data_root, saving_path, data_type='train'): def create_data_info(data_root, saving_path, training_data_ratio=0.8, shuffle=True, ): """[summary] info structure: { 0:[ { "index": idx, "lidar_bin_path": lidar_bin_paths[idx], "RT": poses[idx], "K": poses[idx], }, { "index": idx, "lidar_bin_path": lidar_bin_paths[idx], "RT": poses[idx], "K": poses[idx], }, } ... ], 1:[ ] ... } """ image_dir = os.path.join(data_root, ) pose_dir = os.path.join(data_root ) depth_dir = os.path.join(data_root) # blender_to_bop_pose=np.load("/DATA/yxu/LINEMOD/metricpose/blender2bop_RT.npy", allow_pickle=True).flat[0] blender_to_bop_pose=np.load(f"{os.path.dirname(os.path.abspath(__file__)) }/../EXPDATA/init_poses/pose_conversion/blender2bop_RT.npy", allow_pickle=True).flat[0] # seqs=['cat'] seqs=['cat', 'ape', 'cam', 'duck', 'glue', 'iron', 'phone','benchvise', 'can', 'driller', 'eggbox', 'holepuncher', 'lamp'] print(seqs) max_items_per_seq=10000 # create training data res = {} eval_res = {} for seq in seqs: res[seq] = [] eval_res[seq]=[] image_path_dir = os.path.join(image_dir, seq) depth_path_dir = os.path.join(depth_dir, seq) pose_path_dir = os.path.join(pose_dir, seq) # image_paths = os.listdir(lidar_bin_dir) image_paths = glob.glob(r'{}/*.jpg'.format(image_path_dir) ) depth_paths = glob.glob(r'{}/*depth*.npy'.format(depth_path_dir) ) pose_paths = glob.glob(r'{}/*RT.pkl'.format(pose_path_dir) ) #for compatibility if len(pose_paths) ==0: pose_paths = glob.glob(r'{}/*params.pkl'.format(pose_path_dir) ) # image_paths.sort(key=lambda s: int(os.path.basename(s).split('.')[0]) ) image_paths.sort(key=lambda s: int(re.split( '\.|_' ,os.path.basename(s))[0]) ) depth_paths.sort(key=lambda s: int(os.path.basename(s).split('_')[0])) pose_paths.sort(key=lambda s: int(os.path.basename(s).split('_')[0])) data_num=len(image_paths[:max_items_per_seq] ) if shuffle: permute=np.random.permutation(data_num) else: permute = np.arange(data_num) train_split=permute[:int(data_num*training_data_ratio)] eval_split= permute[int(data_num*training_data_ratio):] # for idx in range(len(image_paths[:max_items_per_seq])): for idx in train_split: # print(image_paths[idx], depth_paths[idx]) with open(pose_paths[idx],'rb') as f: pose = pickle.load(f) # pose['K'] = np.array([[572.4114, 0., 325.2611], # [0., 573.57043, 242.04899], # [0., 0., 1.]]) if seq=='cam': bl2bo = blender_to_bop_pose['camera'] else: bl2bo = blender_to_bop_pose[seq] pose["RT"][:3,:3] = pose["RT"][:3,:3]@bl2bo[:3,:3].T # pose["RT"][:3,3:] = -pose["RT"][:3,:3]@bl2bo[:3,:3].T @bl2bo[:3,3:] + pose["RT"][:3,3:] pose["RT"][:3,3:] = -pose["RT"][:3,:3] @bl2bo[:3,3:] + pose["RT"][:3,3:] info = { "index": idx, # "image_path": image_paths[idx].replace(image_dir+'/',''), # "depth_path": depth_paths[idx].replace(depth_dir+'/',''), "rgb_observed_path": image_paths[idx].replace(image_dir,'./'), "depth_gt_observed_path": depth_paths[idx].replace(depth_dir,'./'), "rgb_noisy_rendered": None, "depth_noisy_rendered": None, "pose_noisy_rendered": None, "model_points_path": f"{seq}.bin", # "RT": pose["RT"], "gt_pose": pose["RT"], "K": pose["K"], "bbox":pose.get('bbox', None) } print(info['rgb_observed_path'], info['depth_gt_observed_path'], image_dir)#, bl2bo[:3,:3]) res[seq].append(info) train_saving_path=saving_path+'.train' # eval_saving_path=saving_path+'.eval' with open(train_saving_path, 'wb+') as f: print("Total data amount:", np.sum([len(res[r]) for r in res])) pickle.dump(res, f) # with open(eval_saving_path, 'wb+') as f: # print("Total data amount:", np.sum([len(eval_res[r]) for r in eval_res])) # pickle.dump(eval_res, f) if __name__ == '__main__': fire.Fire() ================================================ FILE: tools/train.py ================================================ import numpy as np import torch from pathlib import Path import json import random import re import torch.backends.cudnn as cudnn import torch.multiprocessing as mp import time import fire import torch.distributed as dist import os from collections import defaultdict import kornia import flow_vis import copy import ast from utils.progress_bar import ProgressBar from utils.log_tool import SimpleModelLog from data.preprocess import merge_batch, get_dataloader_deepim # merge_second_batch_multigpu from utils.config_io import merge_cfg, save_cfg import torchplus from builder import ( dataset_builder, input_reader_builder, lr_scheduler_builder, optimizer_builder, rnnpose_builder ) from utils.distributed_utils import dist_init, average_gradients, DistModule, ParallelWrapper, DistributedSequatialSampler, DistributedGivenIterationSampler, DistributedGivenIterationSamplerEpoch # from utils.visualize import vis_pointclouds_cv2, vis_2d_keypoints_cv2 from utils.util import modify_parameter_name_with_map from config.default import get_cfg from data.ycb.basic import bop_ycb_class2idx GLOBAL_GPUS_PER_DEVICE = 1 # None GLOBAL_STEP = 0 RANK=-1 WORLD_SIZE=-1 def load_example_to_device(example, device=None) -> dict: # global GLOBAL_GPUS_PER_DEVICE # device = device % GLOBAL_GPUS_PER_DEVICE or torch.device("cuda:0") # example_torch = defaultdict(list) example_torch = {} for k, v in example.items(): if k in ['class_name', 'idx'] or example[k] is None: example_torch[k] = v continue if type(v) == list: example_torch[k] = [item.to(device=device) for item in v] else: example_torch[k] = v.to(device=device) return example_torch def build_network(model_cfg, measure_time=False, testing=False): net = rnnpose_builder.build( model_cfg, measure_time=measure_time, testing=testing) return net def _worker_init_fn(worker_id): global GLOBAL_STEP time_seed = GLOBAL_STEP np.random.seed(time_seed + worker_id) print(f"WORKER {worker_id} seed:", np.random.get_state()[1][0]) def freeze_params(params: dict, include: str = None, exclude: str = None): assert isinstance(params, dict) include_re = None if include is not None: include_re = re.compile(include) exclude_re = None if exclude is not None: exclude_re = re.compile(exclude) remain_params = [] for k, p in params.items(): if include_re is not None: if include_re.match(k) is not None: continue if exclude_re is not None: if exclude_re.match(k) is None: continue remain_params.append(p) return remain_params def freeze_params_v2(params: dict, include: str = None, exclude: str = None): assert isinstance(params, dict) include_re = None if include is not None: include_re = re.compile(include) exclude_re = None if exclude is not None: exclude_re = re.compile(exclude) for k, p in params.items(): if include_re is not None: if include_re.match(k) is not None: p.requires_grad = False if exclude_re is not None: if exclude_re.match(k) is None: p.requires_grad = False def filter_param_dict(state_dict: dict, include: str = None, exclude: str = None): assert isinstance(state_dict, dict) include_re = None if include is not None: include_re = re.compile(include) exclude_re = None if exclude is not None: exclude_re = re.compile(exclude) res_dict = {} for k, p in state_dict.items(): if include_re is not None: if include_re.match(k) is None: continue if exclude_re is not None: if exclude_re.match(k) is not None: continue res_dict[k] = p return res_dict def chk_rank(rank_, use_dist=False): if not use_dist: return True global RANK if RANK<0: RANK=dist.get_rank() cur_rank = RANK#dist.get_rank() # self.world_size = dist.get_world_size() return cur_rank == rank_ def get_rank(use_dist=False): if not use_dist: return 0 else: # return dist.get_rank() global RANK if RANK<0: RANK=dist.get_rank() return RANK def get_world(use_dist): if not use_dist: return 1 else: global WORLD_SIZE if WORLD_SIZE<0: WORLD_SIZE=dist.get_world_size() return WORLD_SIZE #dist.get_world_size() def get_ngpus_per_node(): global GLOBAL_GPUS_PER_DEVICE return GLOBAL_GPUS_PER_DEVICE def get_logger(): logger_name = "main-logger" logger = logging.getLogger(logger_name) logger.setLevel(logging.INFO) handler = logging.StreamHandler() fmt = "[%(asctime)s %(levelname)s %(filename)s line %(lineno)d %(process)d] %(message)s" handler.setFormatter(logging.Formatter(fmt)) logger.addHandler(handler) return logger def multi_proc_train( config_path, model_dir, use_apex, world_size, result_path=None, create_folder=False, display_step=50, summary_step=5, pretrained_path=None, pretrained_include=None, pretrained_exclude=None, pretrained_param_map=None, freeze_include=None, freeze_exclude=None, measure_time=False, resume=False, use_dist=False, gpus_per_node=1, start_gpu_id=0, optim_eval=False, seed=7, dist_port="23335", force_resume_step=None, batch_size=None, apex_opt_level='O0' ): params = { "config_path": config_path, "model_dir": model_dir, "use_apex": use_apex, "result_path": result_path, "create_folder": create_folder, "display_step": display_step, "summary_step": summary_step, "pretrained_path": pretrained_path, "pretrained_include": pretrained_include, "pretrained_exclude": pretrained_exclude, "pretrained_param_map": pretrained_param_map, "freeze_include": freeze_include, "freeze_exclude": freeze_exclude, # "multi_gpu": multi_gpu, "measure_time": measure_time, "resume": resume, "use_dist": use_dist, "gpus_per_node": gpus_per_node, "optim_eval": optim_eval, "seed": seed, "dist_port": dist_port, "world_size": world_size, "force_resume_step":force_resume_step, "batch_size": batch_size, "apex_opt_level":apex_opt_level, "start_gpu_id": start_gpu_id } from types import SimpleNamespace params = SimpleNamespace(**params) os.environ["CUDA_VISIBLE_DEVICES"] = ','.join( str(x) for x in range(start_gpu_id, start_gpu_id+gpus_per_node)) print(f"CUDA_VISIBLE_DEVICES={os.environ['CUDA_VISIBLE_DEVICES']}" ) mp.spawn(train_worker, nprocs=gpus_per_node, args=( params,) ) def train_worker(rank, params): global RANK, WORLD_SIZE RANK = rank WORLD_SIZE=params.world_size # import os # os.environ["CUDA_VISIBLE_DEVICES"] = str(params.start_gpu_id+rank) print(f"CUDA_VISIBLE_DEVICES={os.environ['CUDA_VISIBLE_DEVICES']}") torch.cuda.set_device(rank%params.gpus_per_node) train(config_path=params.config_path, model_dir=params.model_dir, use_apex=params.use_apex, result_path=params.result_path, create_folder=params.create_folder, display_step=params.display_step, pretrained_path=params.pretrained_path, pretrained_include=params.pretrained_include, pretrained_exclude=params.pretrained_exclude, pretrained_param_map=params.pretrained_param_map, freeze_include=params.freeze_include, freeze_exclude=params.freeze_exclude, # multi_gpu=params.multi_gpu, measure_time=params.measure_time, resume=params.resume, use_dist=params.use_dist, dist_port=params.dist_port, gpus_per_node=params.gpus_per_node, optim_eval=params.optim_eval, seed=params.seed, force_resume_step=params.force_resume_step, batch_size = params.batch_size, apex_opt_level=params.apex_opt_level, gpu_id=params.start_gpu_id+rank, ) def train( config_path, model_dir, use_apex, result_path=None, create_folder=False, display_step=50, summary_step=5, pretrained_path=None, pretrained_include=None, pretrained_exclude=None, pretrained_param_map=None, freeze_include=None, freeze_exclude=None, multi_gpu=False, measure_time=False, resume=False, use_dist=False, dist_port="23335", gpus_per_node=1, optim_eval=False, seed=7, force_resume_step=None, batch_size=None, apex_opt_level='O0', gpu_id=None ): """train a VoxelNet model specified by a config file. """ print("force_resume_step:", force_resume_step) print("torch.cuda.is_available()=", torch.cuda.is_available()) print("torch.version.cuda=",torch.version.cuda) dist_url=f"tcp://127.0.0.1:{dist_port}" print(f"dist_url={dist_url}", flush=True) global RANK, WORLD_SIZE # RANK, WORLD_SIZE=rank, world_size if RANK<0: RANK=0 if WORLD_SIZE<0: WORLD_SIZE=1 global GLOBAL_GPUS_PER_DEVICE GLOBAL_GPUS_PER_DEVICE = gpus_per_node #fix the seeds print(f"Set seed={seed}", flush=True) random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) ######################################## initialize the distributed env ######################################### if use_dist: # torch.cuda.set_device(get_rank(use_dist)) if use_apex: dist.init_process_group( backend="nccl", init_method=dist_url, world_size=get_world(use_dist), rank=get_rank(use_dist)) else: # rank, world_size = dist_init(str(dist_port)) dist.init_process_group( backend="nccl", init_method=dist_url, world_size=get_world(use_dist), rank=get_rank(use_dist)) ############################################ create folders ############################################ model_dir = str(Path(model_dir).resolve()) model_dir = Path(model_dir) if chk_rank(0, use_dist): if not resume and model_dir.exists(): raise ValueError("model dir exists and you don't specify resume.") print("Warning: model dir exists and you don't specify resume.") model_dir.mkdir(parents=True, exist_ok=True) if result_path is None: result_path = model_dir / 'results' config_file_bkp = "pipeline.config" ############################################# read config proto ############################################ config = merge_cfg( [config_path], intersection=True) if chk_rank(0, use_dist): print(json.dumps(config, indent=4)) if chk_rank(0, use_dist): # save_cfg([default_config_path, custom_config_path], save_cfg([config_path, config_path], str(model_dir / config_file_bkp)) #update the global config object get_cfg().merge(config.get("BASIC",{}),"BASIC" ) input_cfg = config.train_input_reader eval_input_cfg = config.eval_input_reader model_cfg = config.model train_cfg = config.train_config optimizer_cfg = train_cfg.optimizer loss_scale = train_cfg.loss_scale_factor ############################################# Update default options ############################################ if batch_size is not None: input_cfg.batch_size = batch_size print(input_cfg.batch_size) ############################################# build network, optimizer etc. ############################################ #dummy dataset to get obj_seqs dataset_tmp = input_reader_builder.build( input_cfg, training=True, ) model_cfg.obj_seqs = copy.copy(dataset_tmp._dataset.infos["seqs"]) print(model_cfg.obj_seqs, type(model_cfg.obj_seqs), flush=True) model_cfg.gpu_id=gpu_id net = build_network(model_cfg, measure_time) #.to(device) net.cuda() fastai_optimizer = optimizer_builder.build( optimizer_cfg, net, mixed=False, loss_scale=loss_scale) print("num parameters:", len(list(net.parameters()))) ############################################# load pretrained model ############################################ if pretrained_path is not None: model_dict = net.state_dict() pretrained_dict = torch.load(pretrained_path, map_location='cpu') print("Pretrained keys:", pretrained_dict.keys()) print("Model keys:", model_dict.keys()) pretrained_dict = filter_param_dict( pretrained_dict, pretrained_include, pretrained_exclude) pretrained_dict = modify_parameter_name_with_map( pretrained_dict, ast.literal_eval(str(pretrained_param_map))) new_pretrained_dict = {} for k, v in pretrained_dict.items(): if k in model_dict and v.shape == model_dict[k].shape: new_pretrained_dict[k] = v print("Load pretrained parameters:") for k, v in new_pretrained_dict.items(): print(k, v.shape) model_dict.update(new_pretrained_dict) net.load_state_dict(model_dict) freeze_params_v2(dict(net.named_parameters()), freeze_include, freeze_exclude) net.clear_global_step() # net.clear_metrics() del pretrained_dict ############################################# try to resume from the latest chkpt ############################################ torchplus.train.try_restore_latest_checkpoints(model_dir, [net]) torchplus.train.try_restore_latest_checkpoints(model_dir, [fastai_optimizer]) ######################################## parallel the network ######################################### if use_dist: if use_apex: import apex net = apex.parallel.convert_syncbn_model(net) net, amp_optimizer = apex.amp.initialize(net.cuda( ), fastai_optimizer, opt_level="O0", keep_batchnorm_fp32=None, loss_scale=None) net_parallel = apex.parallel.DistributedDataParallel(net) else: amp_optimizer = optimizer_builder.build( optimizer_cfg, net, mixed=False, loss_scale=loss_scale) net_parallel = torch.nn.parallel.DistributedDataParallel(net, device_ids=[get_rank(use_dist)], output_device=get_rank(use_dist) ,find_unused_parameters=True) else: net_parallel = net.cuda() ############################################# build lr_scheduler ############################################ lr_scheduler = lr_scheduler_builder.build(optimizer_cfg, amp_optimizer, train_cfg.steps) if 0: # TODO: float_dtype = torch.float16 else: float_dtype = torch.float32 ######################################## build dataloaders ######################################### if use_dist: num_gpu = 1 collate_fn = merge_batch else: raise NotImplementedError print(f"MULTI-GPU: use {num_gpu} gpu") ###################### # PREPARE INPUT ###################### dataset = input_reader_builder.build( input_cfg, training=True, ) eval_dataset = input_reader_builder.build( eval_input_cfg, training=False, ) if use_dist: train_sampler = DistributedGivenIterationSamplerEpoch( dataset, train_cfg.steps, input_cfg.batch_size, last_iter=net.get_global_step()-1, review_cycle=-1) shuffle = False eval_sampler = DistributedSequatialSampler(eval_dataset) else: train_sampler = None eval_sampler = None eval_train_sampler = None shuffle = True dataloader, neighborhood_limits=get_dataloader_deepim(dataset=dataset, kpconv_config=model_cfg.descriptor_net.keypoints_detector_3d, batch_size=input_cfg.batch_size * num_gpu, shuffle=shuffle, num_workers=input_cfg.preprocess.num_workers * num_gpu, sampler=train_sampler ) eval_dataloader, _ =get_dataloader_deepim(dataset=eval_dataset, kpconv_config=model_cfg.descriptor_net.keypoints_detector_3d, batch_size=eval_input_cfg.batch_size, shuffle=False, num_workers=eval_input_cfg.preprocess.num_workers, sampler=eval_sampler, neighborhood_limits=neighborhood_limits ) ########################################################################################## # TRAINING ########################################################################################## model_logging = SimpleModelLog(model_dir, disable=get_rank(use_dist) != 0) model_logging.open() start_step = net.get_global_step() total_step = train_cfg.steps t = time.time() steps_per_eval = train_cfg.steps_per_eval amp_optimizer.zero_grad() step_times = [] step = start_step epoch = 0 net_parallel.train() try: while True: if use_dist: epoch = (net.get_global_step() * input_cfg.batch_size) // len(dataloader) dataloader.sampler.set_epoch(epoch) else: epoch += 1 for example in dataloader: global GLOBAL_STEP GLOBAL_STEP = step lr_scheduler.step(net.get_global_step()) example_torch=load_example_to_device(example, device=torch.device("cuda")) batch_size = example["image"].shape[0] ret_dict = net_parallel(example_torch) loss = ret_dict["loss"].mean() # /get_world(use_dist) recall = ret_dict['recall'].mean() reduced_loss = loss.data.clone() / get_world(use_dist) reduced_recall = recall.data.clone() / get_world(use_dist) if use_dist: dist.all_reduce_multigpu( [reduced_loss]) dist.all_reduce_multigpu( [reduced_recall]) amp_optimizer.zero_grad() if use_apex: with apex.amp.scale_loss(loss, amp_optimizer) as scaled_loss: scaled_loss.backward() else: loss = loss/get_world(use_dist) loss.backward() if use_dist: average_gradients(net_parallel) torch.nn.utils.clip_grad_norm_(net.parameters(), 10.0) amp_optimizer.step() net.update_global_step() step_time = (time.time() - t) step_times.append(step_time) t = time.time() metrics = defaultdict(dict) GLOBAL_STEP = net.get_global_step() if chk_rank(0, use_dist) and GLOBAL_STEP % display_step == 0: print(f'Model directory: {str(model_dir)}') if measure_time: for name, val in net.get_avg_time_dict().items(): print(f"avg {name} time = {val * 1000:.3f} ms") metrics["runtime"] = { "step": GLOBAL_STEP, "steptime": np.mean(step_times), } metrics["loss"]["loss"] = float( reduced_loss.detach().cpu().numpy()) # metrics["loss"]["translation_loss"] = float( metrics["recall"] = float(reduced_recall.detach().cpu().numpy()) metrics['learning_rate'] = amp_optimizer.lr metrics['reproj_loss'] = float(ret_dict['reproj_loss'].median().detach().cpu().numpy()) metrics['loss_3d_proj'] = float(ret_dict['loss_3d_proj'].median().detach().cpu().numpy()) # metrics['chamfer_loss'] = float(ret_dict['chamfer_loss'].median().detach().cpu().numpy()) if hasattr(net.motion_net, "sigma"): metrics['sigma'] = float(net.motion_net.sigma[0].detach().cpu().numpy()) metrics['epoch'] = epoch model_logging.log_metrics(metrics, GLOBAL_STEP) if isinstance(ret_dict["flow"], (list, tuple) ): ret_dict["flow"] = ret_dict["flow"][-1] flow=flow_vis.flow_to_color(ret_dict["flow"][0].squeeze().permute(1,2,0).detach().cpu().numpy(), convert_to_bgr=False) model_logging.log_images( { "image": example["image"].cpu(), "flow": flow.transpose(2,0,1)[None], "weight": ret_dict["weight"].squeeze(1).mean(1,keepdims=True).detach().cpu(), "syn_img": torch.cat(ret_dict["syn_img"], dim=0)[:,:3].detach().cpu(), "syn_depth": (ret_dict["syn_depth"][-1]/ret_dict["syn_depth"][-1].max()).detach().cpu(), "valid_mask": ret_dict["valid_mask"].detach().squeeze(1).permute(0,3,1,2).cpu(), "ren_mask": example["ren_mask"][:,None].cpu() }, GLOBAL_STEP, prefix="") if optim_eval and GLOBAL_STEP < total_step/2 and GLOBAL_STEP > train_cfg.steps_per_eval*2: steps_per_eval = 2*train_cfg.steps_per_eval else: steps_per_eval = train_cfg.steps_per_eval if GLOBAL_STEP % steps_per_eval == 0: if chk_rank(0, use_dist): # logging torchplus.train.save_models(model_dir, [net, amp_optimizer], net.get_global_step()) eval_once(net, eval_dataset=eval_dataset, eval_dataloader=eval_dataloader, eval_input_cfg=eval_input_cfg, result_path=result_path, global_step=GLOBAL_STEP, model_logging=model_logging, metrics=metrics, float_dtype=float_dtype, use_dist=use_dist, prefix='eval_') net.train() step += 1 if step >= total_step: break if step >= total_step: break except Exception as e: model_logging.log_text(str(e), step) raise e finally: model_logging.close() torchplus.train.save_models_cpu(model_dir, [net, amp_optimizer], net.get_global_step()) def eval_once(net, eval_dataset, eval_dataloader, eval_input_cfg, result_path, global_step, model_logging, metrics, float_dtype, use_dist, prefix='eval_'): from utils.eval_metric import LineMODEvaluator#, YCBEvaluator net.eval() result_path_step = result_path / \ f"step_{global_step}" if chk_rank(0, use_dist): result_path_step.mkdir(parents=True, exist_ok=True) model_logging.log_text("#################################", global_step) model_logging.log_text("# EVAL", global_step) model_logging.log_text("#################################", global_step) model_logging.log_text( "Generate output labels...", global_step) prog_bar = ProgressBar() prog_bar.start(len(eval_dataloader)) # RES={ # # "recall":[] # "recall":[], # "3d_proj_error":[], # "2d_proj_error":[], # } t = 0 detections = [] #construct evaluator classes=list(set(eval_dataset.dataset.infos['seqs'])) if classes[0] in bop_ycb_class2idx.keys(): evaluator = dict([(c,YCBEvaluator(f"{c}",result_path) ) for c in classes ] ) else: evaluator = dict([(c,LineMODEvaluator(f"{c}",result_path) ) for c in classes ] ) # cnt = 0 for i, example in enumerate(eval_dataloader): example_torch=load_example_to_device(example, device=torch.device("cuda")) batch_size = example["image"].shape[0] device = example_torch["image"].device with torch.no_grad(): ret_dict = net(example_torch) # RES['recall'].append(recall.detach()) # RES['3d_proj_error'].append(ret_dict['loss_3d_proj'].mean().detach() ) # RES['2d_proj_error'].append(ret_dict['reproj_loss'].mean().detach() ) #do evaluation evaluator[example['class_name'][0]].evaluate_rnnpose(ret_dict, example) if chk_rank(0, use_dist) and i % 10 == 0: prog_bar.print_bar(finished_size=10) eval_res={} for k in evaluator: eval_res[k]=evaluator[k].summarize() if use_dist: # chk_rank(0, use_dist): #gather and summarize evaluation results for k in eval_res: eval_res[k]['seq_len'] = torch.tensor(eval_res[k]['seq_len'], device=device) gather_list = [torch.zeros_like(eval_res[k]['seq_len']) for i in range(get_world(use_dist))] dist.all_gather(gather_list, eval_res[k]['seq_len']) seq_len = torch.stack(gather_list, dim=-1).sum() for kk in eval_res[k]: if kk =='seq_len': continue eval_res[k][kk] = torch.tensor(eval_res[k][kk], device=device) eval_res[k][kk] = eval_res[k][kk]*eval_res[k]['seq_len'] # calculate the sum gather_list = [torch.zeros_like(eval_res[k][kk] ) for i in range(get_world(use_dist))] dist.all_gather(gather_list, eval_res[k][kk]) eval_res[k][kk] = torch.stack(gather_list, dim=-1).sum()/seq_len #divide with the whole length to get the mean value if chk_rank(0, use_dist): for k in eval_res: for kk in eval_res[k]: if kk == 'seq_len': metrics[f'{k}_{kk}'] = float(seq_len.cpu().numpy() ) metrics[f'{k}_{kk}'] = float(eval_res[k][kk].cpu().numpy()) if chk_rank(0, use_dist): model_logging.log_metrics(metrics, global_step) # del RES del eval_res net.train() if __name__ == '__main__': fire.Fire() ================================================ FILE: tools/transform_data_format.py ================================================ import numpy as np import cv2 import pickle import fire import os import argparse linemod_K = np.array([[572.4114, 0., 325.2611], [0., 573.57043, 242.04899], [0., 0., 1.]]) blender_K = np.array([[700., 0., 320.], [0., 700., 240.], [0., 0., 1.]]) def range_to_depth(mask, range, K): ''' Transform the range image to depth image ''' f=K[0,0] cx=K[0,2] cy=K[1,2] ys_, xs_=np.nonzero(mask) rngs=range[ys_,xs_] xs,ys=np.asarray(xs_,np.float32)+0.5,np.asarray(ys_,np.float32)+0.5 Zs=f*rngs/( f**2 + (cx-xs)**2 + (cy-ys)**2 )**0.5 depth = np.zeros_like(range) depth[ys_,xs_] = Zs return depth def crop(image, depth, mask, K_old, margin_ratio=0.1, output_size=128 ): ''' image: HxWx3 mask: HxW K_old: 3x3 ''' H,W, _ = image.shape mask=mask.astype('uint8')*255 _x,_y,_w,_h = cv2.boundingRect(mask) center=[_x+_w/2, _y+_h/2] L=int (max(_w,_h)* (1+2*margin_ratio)) x=max(0, int(center[0]- L/2) ) y=max(0, int(center[1]- L/2) ) crop=image[y:y+L, x:x+L] depth_crop=depth[y:y+L, x:x+L] w=h=L # actual crop size #automatically handle the "out of range" problem patch=np.zeros([h,w,3], dtype=image.dtype) depth_patch=np.ones([h,w], dtype=depth.dtype) try: xp = 0 yp = 0 patch[xp : xp+crop.shape[0], yp:yp+crop.shape[1] ] = crop depth_patch[xp : xp+crop.shape[0], yp:yp+crop.shape[1] ] = depth_crop except: import pdb pdb.set_trace() patch=cv2.resize(patch, (output_size,output_size), interpolation=cv2.INTER_LINEAR ) depth_patch=cv2.resize(depth_patch, (output_size,output_size), interpolation=cv2.INTER_NEAREST ) #update the intrinsic parameters K_new=np.zeros_like(K_old) scale=output_size/L K_new[0,2] = (K_old[0,2]-x)*scale K_new[1,2] = (K_old[1,2]-y)*scale K_new[0,0] = K_old[0,0]*scale K_new[1,1] = K_old[1,1]*scale K_new[2,2] = 1 return patch, depth_patch, K_new class DataFormatter(object): def __init__(self, data_type, data_info_path, crop_param=None ): assert data_type in ['LM_SYN_PVNET', "LM_SYN_PVNET_LMK",'LM_FUSE_PVNET','LM_FUSE_SINGLE_PVNET' ] self.data_type=data_type self.crop_param=crop_param with open(data_info_path, 'rb') as f: self.data_info=pickle.load(f) pass def process(self, data_root,depth_root, save_root): if self.data_type == "LM_SYN_PVNET": self._proc_LM_SYN_PVNET(self.data_info, data_root, save_root) elif self.data_type=='LM_SYN_PVNET_LMK': self._proc_LM_SYN_PVNET_LMK(self.data_info, data_root, save_root) elif self.data_type=='LM_FUSE_PVNET': self._proc_LM_FUSE_PVNET(self.data_info, data_root,depth_root, save_root) elif self.data_type=='LM_FUSE_SINGLE_PVNET': self._proc_LM_FUSE_SINGLE_PVNET(self.data_info, data_root,depth_root, save_root) else: raise NotImplementedError def _proc_LM_SYN_PVNET(self, data_info, data_root, save_root): for seq in data_info: for idx in range(len(data_info[seq]) ): # info = { # "index": idx, # "image_path": image_paths[idx].replace(image_path_dir+'/',''), # "depth_path": depth_paths[idx].replace(depth_path_dir+'/',''), # "RT": pose["RT"], # "K": pose["K"], # } info = data_info[seq][idx] # image=cv2.imread( os.path.join(data_root, seq, info['image_path']) ) # depth=np.load(os.path.join(data_root, seq, info['depth_path'])) image=cv2.imread( os.path.join(data_root, info['image_path']) ) depth=np.load(os.path.join(data_root, info['depth_path'])) # K_old = info["K"] K_old = blender_K.copy() # maximum depth value = 1, which indicates the invalid regions hs,ws=np.nonzero(depth<1) hmin,hmax=np.min(hs),np.max(hs) wmin,wmax=np.min(ws),np.max(ws) bbox= [hmin, wmin, hmax, wmax] mask=depth<1 #transform the range map to depth map depth = range_to_depth(depth<1, depth*2, K_old) if self.crop_param is not None: image, depth, K_new=crop(image, depth, mask, K_old, margin_ratio=self.crop_param['margin_ratio'], output_size=self.crop_param['output_size'] ) else: K_new = K_old print(info['image_path'], info['depth_path']) patch_save_path=os.path.join(save_root, seq, f"{info['index']:05d}.jpg") depth_patch_save_path=os.path.join(save_root, seq, f"{info['index']:05d}_depth.npy") pose_save_path=os.path.join(save_root, seq, f"{info['index']:05d}_params.pkl") if not os.path.exists(os.path.join(save_root, seq)): os.makedirs(os.path.join(save_root, seq)) #save cv2.imwrite(patch_save_path,image) np.save(depth_patch_save_path, depth) with open(pose_save_path, 'wb+') as f: pickle.dump({ "RT": info["RT"], "K": K_new, "bbox": bbox },f) def _proc_LM_SYN_PVNET_LMK(self, data_info, data_root, save_root): #cam intrinsic is LM for seq in data_info: for idx in range(len(data_info[seq]) ): # info = { # "index": idx, # "image_path": image_paths[idx].replace(image_path_dir+'/',''), # "depth_path": depth_paths[idx].replace(depth_path_dir+'/',''), # "RT": pose["RT"], # "K": pose["K"], # } info = data_info[seq][idx] image=cv2.imread( os.path.join(data_root, info['image_path']) ) depth=np.load(os.path.join(data_root, info['depth_path'])) with open( os.path.join(data_root, info['image_path'].replace(".jpg", "_RT.pkl")), 'rb' ) as f: old_params=pickle.load(f) # K_old = info["K"] K_old = old_params["K"] #linemod_K.copy() # maximum depth value = 1, which indicates the invalid regions hs,ws=np.nonzero(depth<1) hmin,hmax=np.min(hs),np.max(hs) wmin,wmax=np.min(ws),np.max(ws) bbox= [hmin, wmin, hmax, wmax] mask=depth<1 #transform the range map to depth map depth = range_to_depth(depth<1, depth*2, K_old) if self.crop_param is not None: image, depth, K_new=crop(image, depth, mask, K_old, margin_ratio=self.crop_param['margin_ratio'], output_size=self.crop_param['output_size'] ) else: K_new = K_old print(info['image_path'], info['depth_path'], "...") patch_save_path=os.path.join(save_root, seq, f"{info['index']:05d}.jpg") depth_patch_save_path=os.path.join(save_root, seq, f"{info['index']:05d}_depth.npy") pose_save_path=os.path.join(save_root, seq, f"{info['index']:05d}_params.pkl") if not os.path.exists(os.path.join(save_root, seq)): os.makedirs(os.path.join(save_root, seq)) #save cv2.imwrite(patch_save_path,image) np.save(depth_patch_save_path, depth) with open(pose_save_path, 'wb+') as f: pickle.dump({ "RT": old_params["RT"] ,#info["RT"], "K": K_new, "bbox": bbox },f) def _proc_LM_FUSE_PVNET(self, data_info, data_root, depth_root, save_root): # The class name list used during the fusing process, which is used to find the respective mask index linemod_cls_names=['ape','cam','cat','duck','glue','iron','phone', 'benchvise','can','driller','eggbox','holepuncher','lamp'] for seq in data_info: seq_idx = linemod_cls_names.index(seq) for idx in range(len(data_info[seq]) ): # info = { # "index": idx, # "image_path": image_paths[idx].replace(image_path_dir+'/',''), # "depth_path": depth_paths[idx].replace(depth_path_dir+'/',''), # "RT": pose["RT"], # "K": pose["K"], # } info = data_info[seq][idx] # if info['image_path'] =='cat/2744.jpg': # info = data_info[seq][idx+1] with open(os.path.join(data_root, info['image_path']).split('.jpg')[0].replace(seq,'')+'_info.pkl', 'rb' ) as f: fuse_info = pickle.load(f ) image=cv2.imread( os.path.join(data_root, info['image_path']).split('.jpg')[0].replace(seq,'')+'_rgb.jpg' ) try: depth_idx = fuse_info[2][seq_idx]['img_idx'] except: import pdb pdb.set_trace() rendered_depth=np.load( os.path.dirname(os.path.join(depth_root, info['image_path'] ))+ f'/{depth_idx}_depth.png.npy' ) fuse_mask=cv2.imread( os.path.join(data_root, info['image_path']).split('.jpg')[0].replace(seq,'')+'_mask.png', ) fuse_mask = fuse_mask[...,0]==(seq_idx+1) # fuse mask id starts from 1 # """ #may have bug hs,ws=np.nonzero(rendered_depth<1) hmin,hmax=np.min(hs),np.max(hs) wmin,wmax=np.min(ws),np.max(ws) # """ bbox= [hmin+fuse_info[0][seq_idx][0], wmin+fuse_info[0][seq_idx][1], hmax+fuse_info[0][seq_idx][0], wmax+fuse_info[0][seq_idx][1]] depth = np.ones_like(rendered_depth) try: depth[hmin+fuse_info[0][seq_idx][0]: fuse_info[0][seq_idx][0]+hmax+1, wmin+fuse_info[0][seq_idx][1]: wmax+fuse_info[0][seq_idx][1] +1] = rendered_depth[hmin:hmax+1, wmin:wmax+1] except: print(info['image_path'],"failed!") continue """ #TODO: temp fix, may fail patch_save_path=os.path.join(save_root, seq, f"{info['index']:05d}_pat.jpg") depth_patch_save_path=os.path.join(save_root, seq, f"{info['index']:05d}_pat_depth.npy") pose_save_path=os.path.join(save_root, seq, f"{info['index']:05d}_RT.pkl") cv2.imwrite(patch_save_path, patch) np.save(depth_patch_save_path, depth_patch) with open(pose_save_path, 'wb+') as f: pickle.dump({ # "RT": fuse_info[1][seq_idx], #info["RT"], "RT": fuse_info_old[1][seq_idx], #info["RT"], "K": K_new },f) # import pdb # pdb.set_trace() # instance_inds.append(len(instance_inds)) continue """ # fuse_info_old = copy.deepcopy(fuse_info) # K_old = info["K"] K_old = linemod_K.copy() K_old[0,2] = (K_old[0,2]+fuse_info[0][seq_idx][1]) K_old[1,2] = (K_old[1,2]+fuse_info[0][seq_idx][0]) # maximum depth value = 1, which indicates the invalid regions mask=depth<1 # transform the range map to depth map depth = range_to_depth(mask, depth*2, K_old) # depth = depth* fuse_mask + (1-fuse_mask) # use 1's to indicate the invalid depths # depth = depth* fuse_mask # use 0's to indicate the invalid depths depth = depth # keep all the depths including occluded ones if self.crop_param is not None: # image, depth, K_new=crop(image, depth, mask, K_old, margin_ratio=0.1, output_size=128 ) image, depth, K_new=crop(image, depth, mask, K_old, margin_ratio=self.crop_param['margin_ratio'], output_size=self.crop_param['output_size'] ) else: K_new = K_old print(info['image_path'], info['depth_path'], bbox) patch_save_path=os.path.join(save_root, seq, f"{info['index']:05d}.jpg") depth_patch_save_path=os.path.join(save_root, seq, f"{info['index']:05d}_depth.npy") # pose_save_path=os.path.join(save_root, seq, f"{info['index']:05d}_RT.pkl") pose_save_path=os.path.join(save_root, seq, f"{info['index']:05d}_params.pkl") mask_visb_save_path = os.path.join(save_root, seq, f"{info['index']:05d}_mask_visb.png") if not os.path.exists(os.path.join(save_root, seq)): os.makedirs(os.path.join(save_root, seq)) #save cv2.imwrite(patch_save_path,image) cv2.imwrite(mask_visb_save_path, fuse_mask*255 ) np.save(depth_patch_save_path, depth) with open(pose_save_path, 'wb+') as f: pickle.dump({ "RT": fuse_info[1][seq_idx], #info["RT"], "K": K_new, "bbox": bbox },f) def _proc_LM_FUSE_SINGLE_PVNET(self, data_info, data_root, depth_root, save_root): # The class name list used during the fusing process, which is used to find the respective mask index linemod_cls_names=['ape','cam','cat','duck','glue','iron','phone', 'benchvise','can','driller','eggbox','holepuncher','lamp'] for seq in data_info: # seq_idx = linemod_cls_names.index(seq) seq_idx = 0 for idx in range(len(data_info[seq]) ): # info = { # "index": idx, # "image_path": image_paths[idx].replace(image_path_dir+'/',''), # "depth_path": depth_paths[idx].replace(depth_path_dir+'/',''), # "RT": pose["RT"], # "K": pose["K"], # } info = data_info[seq][idx] # if info['image_path'] =='cat/2744.jpg': # info = data_info[seq][idx+1] with open(os.path.join(data_root, info['image_path'].split('.jpg')[0]+'_info.pkl' ), 'rb' ) as f: fuse_info = pickle.load(f ) # image=cv2.imread( os.path.join(data_root, seq, info['image_path'].split('.jpg')[0].replace(seq,'')+'_rgb.jpg' ) ) image=cv2.imread( os.path.join(data_root, info['image_path'].split('.jpg')[0]+'_rgb.jpg' ) ) try: depth_idx = fuse_info[2][seq_idx]['img_idx'] except: import pdb pdb.set_trace() rendered_depth=np.load( os.path.dirname(os.path.join(depth_root, info['image_path'] ))+ f'/{depth_idx}_depth.png.npy' ) fuse_mask=cv2.imread( os.path.join(data_root, info['image_path']).split('.jpg')[0]+'_mask.png', ) fuse_mask = fuse_mask[...,0]==(seq_idx+1) # fuse mask id starts from 1 # """ #may have bug hs,ws=np.nonzero(rendered_depth<1) hmin,hmax=np.min(hs),np.max(hs) wmin,wmax=np.min(ws),np.max(ws) # """ bbox= [hmin+fuse_info[0][seq_idx][0], wmin+fuse_info[0][seq_idx][1], hmax+fuse_info[0][seq_idx][0], wmax+fuse_info[0][seq_idx][1]] depth = np.ones_like(rendered_depth) try: depth[hmin+fuse_info[0][seq_idx][0]: fuse_info[0][seq_idx][0]+hmax+1, wmin+fuse_info[0][seq_idx][1]: wmax+fuse_info[0][seq_idx][1] +1] = rendered_depth[hmin:hmax+1, wmin:wmax+1] except: import pdb pdb.set_trace() print(info['image_path'],"failed!") continue """ #TODO: temp fix, may fail patch_save_path=os.path.join(save_root, seq, f"{info['index']:05d}_pat.jpg") depth_patch_save_path=os.path.join(save_root, seq, f"{info['index']:05d}_pat_depth.npy") pose_save_path=os.path.join(save_root, seq, f"{info['index']:05d}_RT.pkl") cv2.imwrite(patch_save_path, patch) np.save(depth_patch_save_path, depth_patch) with open(pose_save_path, 'wb+') as f: pickle.dump({ # "RT": fuse_info[1][seq_idx], #info["RT"], "RT": fuse_info_old[1][seq_idx], #info["RT"], "K": K_new },f) # import pdb # pdb.set_trace() # instance_inds.append(len(instance_inds)) continue """ # fuse_info_old = copy.deepcopy(fuse_info) # K_old = info["K"] K_old = linemod_K.copy() K_old[0,2] = (K_old[0,2]+fuse_info[0][seq_idx][1]) K_old[1,2] = (K_old[1,2]+fuse_info[0][seq_idx][0]) # maximum depth value = 1, which indicates the invalid regions mask=depth<1 # transform the range map to depth map depth = range_to_depth(mask, depth*2, K_old) # depth = depth* fuse_mask + (1-fuse_mask) # use 1's to indicate the invalid depths depth = depth* fuse_mask # use 0's to indicate the invalid depths if self.crop_param is not None: # image, depth, K_new=crop(image, depth, mask, K_old, margin_ratio=0.1, output_size=128 ) image, depth, K_new=crop(image, depth, mask, K_old, margin_ratio=self.crop_param['margin_ratio'], output_size=self.crop_param['output_size'] ) else: K_new = K_old print(info['image_path'], info['depth_path'], bbox) patch_save_path=os.path.join(save_root, seq, f"{info['index']:05d}.jpg") depth_patch_save_path=os.path.join(save_root, seq, f"{info['index']:05d}_depth.npy") # pose_save_path=os.path.join(save_root, seq, f"{info['index']:05d}_RT.pkl") pose_save_path=os.path.join(save_root, seq, f"{info['index']:05d}_params.pkl") if not os.path.exists(os.path.join(save_root, seq)): os.makedirs(os.path.join(save_root, seq)) #save cv2.imwrite(patch_save_path,image) np.save(depth_patch_save_path, depth) with open(pose_save_path, 'wb+') as f: pickle.dump({ "RT": fuse_info[1][seq_idx], #info["RT"], "K": K_new, "bbox": bbox },f) def run(data_type,data_info_path, image_root, depth_root, save_dir, crop_param=None): df = DataFormatter(data_type,data_info_path) df.process(image_root, depth_root,save_dir) if __name__=='__main__': fire.Fire(run) ================================================ FILE: torchplus/__init__.py ================================================ from . import train from . import nn from . import metrics from . import tools from .tools import change_default_args from torchplus.ops.array_ops import scatter_nd, gather_nd, roll ================================================ FILE: torchplus/metrics.py ================================================ import numpy as np import torch import torch.nn.functional as F from torch import nn class Scalar(nn.Module): def __init__(self): super().__init__() self.register_buffer('total', torch.FloatTensor([0.0])) self.register_buffer('count', torch.FloatTensor([0.0])) def forward(self, scalar): if not scalar.eq(0.0): self.count += 1 self.total += scalar.data.float() return self.value.cpu() @property def value(self): return self.total / self.count def clear(self): self.total.zero_() self.count.zero_() class Accuracy(nn.Module): def __init__(self, dim=1, ignore_idx=-1, threshold=0.5, encode_background_as_zeros=True): super().__init__() self.register_buffer('total', torch.FloatTensor([0.0])) self.register_buffer('count', torch.FloatTensor([0.0])) self._ignore_idx = ignore_idx self._dim = dim self._threshold = threshold self._encode_background_as_zeros = encode_background_as_zeros def forward(self, labels, preds, weights=None): # labels: [N, ...] # preds: [N, C, ...] if self._encode_background_as_zeros: scores = torch.sigmoid(preds) labels_pred = torch.max(preds, dim=self._dim)[1] + 1 pred_labels = torch.where((scores > self._threshold).any(self._dim), labels_pred, torch.tensor(0).type_as(labels_pred)) else: pred_labels = torch.max(preds, dim=self._dim)[1] N, *Ds = labels.shape labels = labels.view(N, int(np.prod(Ds))) pred_labels = pred_labels.view(N, int(np.prod(Ds))) if weights is None: weights = (labels != self._ignore_idx).float() else: weights = weights.float() num_examples = torch.sum(weights) num_examples = torch.clamp(num_examples, min=1.0).float() total = torch.sum((pred_labels == labels.long()).float()) self.count += num_examples self.total += total return self.value.cpu() # return (total / num_examples.data).cpu() @property def value(self): return self.total / self.count def clear(self): self.total.zero_() self.count.zero_() class Precision(nn.Module): def __init__(self, dim=1, ignore_idx=-1, threshold=0.5): super().__init__() self.register_buffer('total', torch.FloatTensor([0.0])) self.register_buffer('count', torch.FloatTensor([0.0])) self._ignore_idx = ignore_idx self._dim = dim self._threshold = threshold def forward(self, labels, preds, weights=None): # labels: [N, ...] # preds: [N, C, ...] if preds.shape[self._dim] == 1: # BCE pred_labels = (torch.sigmoid(preds) > self._threshold).long().squeeze(self._dim) else: assert preds.shape[ self._dim] == 2, "precision only support 2 class" pred_labels = torch.max(preds, dim=self._dim)[1] N, *Ds = labels.shape labels = labels.view(N, int(np.prod(Ds))) pred_labels = pred_labels.view(N, int(np.prod(Ds))) if weights is None: weights = (labels != self._ignore_idx).float() else: weights = weights.float() pred_trues = pred_labels > 0 pred_falses = pred_labels == 0 trues = labels > 0 falses = labels == 0 true_positives = (weights * (trues & pred_trues).float()).sum() true_negatives = (weights * (falses & pred_falses).float()).sum() false_positives = (weights * (falses & pred_trues).float()).sum() false_negatives = (weights * (trues & pred_falses).float()).sum() count = true_positives + false_positives # print(count, true_positives) if count > 0: self.count += count self.total += true_positives return self.value.cpu() # return (total / num_examples.data).cpu() @property def value(self): return self.total / self.count def clear(self): self.total.zero_() self.count.zero_() class Recall(nn.Module): def __init__(self, dim=1, ignore_idx=-1, threshold=0.5): super().__init__() self.register_buffer('total', torch.FloatTensor([0.0])) self.register_buffer('count', torch.FloatTensor([0.0])) self._ignore_idx = ignore_idx self._dim = dim self._threshold = threshold def forward(self, labels, preds, weights=None): # labels: [N, ...] # preds: [N, C, ...] if preds.shape[self._dim] == 1: # BCE pred_labels = (torch.sigmoid(preds) > self._threshold).long().squeeze(self._dim) else: assert preds.shape[ self._dim] == 2, "precision only support 2 class" pred_labels = torch.max(preds, dim=self._dim)[1] N, *Ds = labels.shape labels = labels.view(N, int(np.prod(Ds))) pred_labels = pred_labels.view(N, int(np.prod(Ds))) if weights is None: weights = (labels != self._ignore_idx).float() else: weights = weights.float() pred_trues = pred_labels == 1 pred_falses = pred_labels == 0 trues = labels == 1 falses = labels == 0 true_positives = (weights * (trues & pred_trues).float()).sum() true_negatives = (weights * (falses & pred_falses).float()).sum() false_positives = (weights * (falses & pred_trues).float()).sum() false_negatives = (weights * (trues & pred_falses).float()).sum() count = true_positives + false_negatives if count > 0: self.count += count self.total += true_positives return self.value.cpu() # return (total / num_examples.data).cpu() @property def value(self): return self.total / self.count def clear(self): self.total.zero_() self.count.zero_() def _calc_binary_metrics(labels, scores, weights=None, ignore_idx=-1, threshold=0.5): pred_labels = (scores > threshold).long() N, *Ds = labels.shape labels = labels.view(N, int(np.prod(Ds))) pred_labels = pred_labels.view(N, int(np.prod(Ds))) pred_trues = pred_labels > 0 pred_falses = pred_labels == 0 trues = labels > 0 falses = labels == 0 true_positives = (weights * (trues & pred_trues).float()).sum() true_negatives = (weights * (falses & pred_falses).float()).sum() false_positives = (weights * (falses & pred_trues).float()).sum() false_negatives = (weights * (trues & pred_falses).float()).sum() return true_positives, true_negatives, false_positives, false_negatives class PrecisionRecall(nn.Module): def __init__(self, dim=1, ignore_idx=-1, thresholds=0.5, use_sigmoid_score=False, encode_background_as_zeros=True): super().__init__() if not isinstance(thresholds, (list, tuple)): thresholds = [thresholds] self.register_buffer('prec_total', torch.FloatTensor(len(thresholds)).zero_()) self.register_buffer('prec_count', torch.FloatTensor(len(thresholds)).zero_()) self.register_buffer('rec_total', torch.FloatTensor(len(thresholds)).zero_()) self.register_buffer('rec_count', torch.FloatTensor(len(thresholds)).zero_()) self._ignore_idx = ignore_idx self._dim = dim self._thresholds = thresholds self._use_sigmoid_score = use_sigmoid_score self._encode_background_as_zeros = encode_background_as_zeros def forward(self, labels, preds, weights=None): # labels: [N, ...] # preds: [N, ..., C] if self._encode_background_as_zeros: # this don't support softmax assert self._use_sigmoid_score is True total_scores = torch.sigmoid(preds) # scores, label_preds = torch.max(total_scores, dim=1) else: if self._use_sigmoid_score: total_scores = torch.sigmoid(preds)[..., 1:] else: total_scores = F.softmax(preds, dim=-1)[..., 1:] """ if preds.shape[self._dim] == 1: # BCE scores = torch.sigmoid(preds) else: # assert preds.shape[ # self._dim] == 2, "precision only support 2 class" # TODO: add support for [N, C, ...] format. # TODO: add multiclass support if self._use_sigmoid_score: scores = torch.sigmoid(preds)[:, ..., 1:].sum(-1) else: scores = F.softmax(preds, dim=self._dim)[:, ..., 1:].sum(-1) """ scores = torch.max(total_scores, dim=-1)[0] if weights is None: weights = (labels != self._ignore_idx).float() else: weights = weights.float() for i, thresh in enumerate(self._thresholds): tp, tn, fp, fn = _calc_binary_metrics(labels, scores, weights, self._ignore_idx, thresh) rec_count = tp + fn prec_count = tp + fp if rec_count > 0: self.rec_count[i] += rec_count self.rec_total[i] += tp if prec_count > 0: self.prec_count[i] += prec_count self.prec_total[i] += tp return self.value # return (total / num_examples.data).cpu() @property def value(self): prec_count = torch.clamp(self.prec_count, min=1.0) rec_count = torch.clamp(self.rec_count, min=1.0) return ((self.prec_total / prec_count).cpu(), (self.rec_total / rec_count).cpu()) @property def thresholds(self): return self._thresholds def clear(self): self.rec_count.zero_() self.prec_count.zero_() self.prec_total.zero_() self.rec_total.zero_() ================================================ FILE: torchplus/nn/__init__.py ================================================ from torchplus.nn.functional import one_hot from torchplus.nn.modules.common import Empty, Sequential from torchplus.nn.modules.normalization import GroupNorm ================================================ FILE: torchplus/nn/functional.py ================================================ import torch def one_hot(tensor, depth, dim=-1, on_value=1.0, dtype=torch.float32): tensor_onehot = torch.zeros( *list(tensor.shape), depth, dtype=dtype, device=tensor.device) tensor_onehot.scatter_(dim, tensor.unsqueeze(dim).long(), on_value) return tensor_onehot ================================================ FILE: torchplus/nn/modules/__init__.py ================================================ ================================================ FILE: torchplus/nn/modules/common.py ================================================ import sys from collections import OrderedDict import torch from torch.nn import functional as F class Empty(torch.nn.Module): def __init__(self, *args, **kwargs): super(Empty, self).__init__() self.weight = torch.zeros([1, ]) # dummy varaible def forward(self, *args, **kwargs): if len(args) == 1: return args[0] elif len(args) == 0: return None return args class Sequential(torch.nn.Module): r"""A sequential container. Modules will be added to it in the order they are passed in the constructor. Alternatively, an ordered dict of modules can also be passed in. To make it easier to understand, given is a small example:: # Example of using Sequential model = Sequential( nn.Conv2d(1,20,5), nn.ReLU(), nn.Conv2d(20,64,5), nn.ReLU() ) # Example of using Sequential with OrderedDict model = Sequential(OrderedDict([ ('conv1', nn.Conv2d(1,20,5)), ('relu1', nn.ReLU()), ('conv2', nn.Conv2d(20,64,5)), ('relu2', nn.ReLU()) ])) # Example of using Sequential with kwargs(python 3.6+) model = Sequential( conv1=nn.Conv2d(1,20,5), relu1=nn.ReLU(), conv2=nn.Conv2d(20,64,5), relu2=nn.ReLU() ) """ def __init__(self, *args, **kwargs): super(Sequential, self).__init__() if len(args) == 1 and isinstance(args[0], OrderedDict): for key, module in args[0].items(): self.add_module(key, module) else: for idx, module in enumerate(args): self.add_module(str(idx), module) for name, module in kwargs.items(): if sys.version_info < (3, 6): raise ValueError("kwargs only supported in py36+") if name in self._modules: raise ValueError("name exists.") self.add_module(name, module) def __getitem__(self, idx): if not (-len(self) <= idx < len(self)): raise IndexError('index {} is out of range'.format(idx)) if idx < 0: idx += len(self) it = iter(self._modules.values()) for i in range(idx): next(it) return next(it) def __len__(self): return len(self._modules) def add(self, module, name=None): if name is None: name = str(len(self._modules)) if name in self._modules: raise KeyError("name exists") self.add_module(name, module) def forward(self, input): # i = 0 for module in self._modules.values(): # print(i) input = module(input) # i += 1 return input ================================================ FILE: torchplus/nn/modules/normalization.py ================================================ import torch class GroupNorm(torch.nn.GroupNorm): def __init__(self, num_channels, num_groups, eps=1e-5, affine=True): super().__init__( num_groups=num_groups, num_channels=num_channels, eps=eps, affine=affine) ================================================ FILE: torchplus/ops/__init__.py ================================================ ================================================ FILE: torchplus/ops/array_ops.py ================================================ import ctypes import math import time import torch from typing import Optional def scatter_nd(indices, updates, shape): """pytorch edition of tensorflow scatter_nd. this function don't contain except handle code. so use this carefully when indice repeats, don't support repeat add which is supported in tensorflow. """ ret = torch.zeros(*shape, dtype=updates.dtype, device=updates.device) ndim = indices.shape[-1] output_shape = list(indices.shape[:-1]) + shape[indices.shape[-1]:] flatted_indices = indices.view(-1, ndim) slices = [flatted_indices[:, i] for i in range(ndim)] slices += [Ellipsis] ret[slices] = updates.view(*output_shape) return ret def gather_nd(params, indices): # this function has a limit that MAX_ADVINDEX_CALC_DIMS=5 ndim = indices.shape[-1] output_shape = list(indices.shape[:-1]) + list(params.shape[indices.shape[-1]:]) flatted_indices = indices.view(-1, ndim) slices = [flatted_indices[:, i] for i in range(ndim)] slices += [Ellipsis] return params[slices].view(*output_shape) def roll(x: torch.Tensor, shift: int, dim: int = -1, fill_pad: Optional[int] = None): """ shift<0: left roll shift>0: right roll """ device = x.device if 0 == shift: return x elif shift < 0: shift = -shift gap = x.index_select(dim, torch.arange(shift, device=device)) if fill_pad is not None: gap = fill_pad * torch.ones_like(gap, device=device) return torch.cat([x.index_select(dim, torch.arange(shift, x.size(dim), device=device)), gap], dim=dim) else: shift = x.size(dim) - shift gap = x.index_select(dim, torch.arange(shift, x.size(dim), device=device)) if fill_pad is not None: gap = fill_pad * torch.ones_like(gap, device=device) return torch.cat([gap, x.index_select(dim, torch.arange(shift, device=device))], dim=dim) ================================================ FILE: torchplus/tools.py ================================================ import functools import inspect import sys from collections import OrderedDict import numba import numpy as np import torch def get_pos_to_kw_map(func): pos_to_kw = {} fsig = inspect.signature(func) pos = 0 for name, info in fsig.parameters.items(): if info.kind is info.POSITIONAL_OR_KEYWORD: pos_to_kw[pos] = name pos += 1 return pos_to_kw def get_kw_to_default_map(func): kw_to_default = {} fsig = inspect.signature(func) for name, info in fsig.parameters.items(): if info.kind is info.POSITIONAL_OR_KEYWORD: if info.default is not info.empty: kw_to_default[name] = info.default return kw_to_default # def change_default_args(**kwargs): # def layer_wrapper(layer_class): # class DefaultArgLayer(layer_class): # def __init__(self, *args, **kw): # pos_to_kw = get_pos_to_kw_map(layer_class.__init__) # kw_to_pos = {kw: pos for pos, kw in pos_to_kw.items()} # for key, val in kwargs.items(): # if key not in kw and kw_to_pos[key] > len(args): # kw[key] = val # super().__init__(*args, **kw) # return DefaultArgLayer # return layer_wrapper def change_default_args(**kwargs): def layer_wrapper(layer_class): class DefaultArgLayer(layer_class): def __init__(self, *args, **kw): pos_to_kw = get_pos_to_kw_map(layer_class.__init__) kw_to_pos = {kw: pos for pos, kw in pos_to_kw.items()} for key, val in kwargs.items(): if key not in kw and kw_to_pos[key] > len(args): kw[key] = val super(DefaultArgLayer,self).__init__(*args, **kw) return DefaultArgLayer return layer_wrapper def torch_to_np_dtype(ttype): type_map = { torch.float16: np.dtype(np.float16), torch.float32: np.dtype(np.float32), torch.float16: np.dtype(np.float64), torch.int32: np.dtype(np.int32), torch.int64: np.dtype(np.int64), torch.uint8: np.dtype(np.uint8), } return type_map[ttype] ================================================ FILE: torchplus/train/__init__.py ================================================ from torchplus.train.checkpoint import (latest_checkpoint, restore, restore_latest_checkpoints, restore_models, save, save_models, try_restore_latest_checkpoints, save_models_cpu ) from torchplus.train.common import create_folder from torchplus.train.optim import MixedPrecisionWrapper ================================================ FILE: torchplus/train/checkpoint.py ================================================ import json import logging import os import signal from pathlib import Path import torch class DelayedKeyboardInterrupt(object): def __enter__(self): self.signal_received = False self.old_handler = signal.signal(signal.SIGINT, self.handler) def handler(self, sig, frame): self.signal_received = (sig, frame) logging.debug('SIGINT received. Delaying KeyboardInterrupt.') def __exit__(self, type, value, traceback): signal.signal(signal.SIGINT, self.old_handler) if self.signal_received: self.old_handler(*self.signal_received) def latest_checkpoint(model_dir, model_name): """return path of latest checkpoint in a model_dir Args: model_dir: string, indicate your model dir(save ckpts, summarys, logs, etc). model_name: name of your model. we find ckpts by name Returns: path: None if isn't exist or latest checkpoint path. """ ckpt_info_path = Path(model_dir) / "checkpoints.json" if not ckpt_info_path.is_file(): return None with open(ckpt_info_path, 'r') as f: ckpt_dict = json.loads(f.read()) if model_name not in ckpt_dict['latest_ckpt']: return None latest_ckpt = ckpt_dict['latest_ckpt'][model_name] ckpt_file_name = Path(model_dir) / latest_ckpt if not ckpt_file_name.is_file(): return None return str(ckpt_file_name) def _ordered_unique(seq): seen = set() return [x for x in seq if not (x in seen or seen.add(x))] def save(model_dir, model, model_name, global_step, max_to_keep=8, keep_latest=True): """save a model into model_dir. Args: model_dir: string, indicate your model dir(save ckpts, summarys, logs, etc). model: torch.nn.Module instance. model_name: name of your model. we find ckpts by name global_step: int, indicate current global step. max_to_keep: int, maximum checkpoints to keep. keep_latest: bool, if True and there are too much ckpts, will delete oldest ckpt. else will delete ckpt which has smallest global step. Returns: path: None if isn't exist or latest checkpoint path. """ # prevent save incomplete checkpoint due to key interrupt with DelayedKeyboardInterrupt(): ckpt_info_path = Path(model_dir) / "checkpoints.json" ckpt_filename = "{}-{}.tckpt".format(model_name, global_step) ckpt_path = Path(model_dir) / ckpt_filename if not ckpt_info_path.is_file(): ckpt_info_dict = {'latest_ckpt': {}, 'all_ckpts': {}} else: with open(ckpt_info_path, 'r') as f: ckpt_info_dict = json.loads(f.read()) ckpt_info_dict['latest_ckpt'][model_name] = ckpt_filename if model_name in ckpt_info_dict['all_ckpts']: ckpt_info_dict['all_ckpts'][model_name].append(ckpt_filename) else: ckpt_info_dict['all_ckpts'][model_name] = [ckpt_filename] all_ckpts = ckpt_info_dict['all_ckpts'][model_name] torch.save(model.state_dict(), ckpt_path) # check ckpt in all_ckpts is exist, if not, delete it from all_ckpts all_ckpts_checked = [] for ckpt in all_ckpts: ckpt_path_uncheck = Path(model_dir) / ckpt if ckpt_path_uncheck.is_file(): all_ckpts_checked.append(str(ckpt_path_uncheck)) all_ckpts = all_ckpts_checked if len(all_ckpts) > max_to_keep: if keep_latest: ckpt_to_delete = all_ckpts.pop(0) else: # delete smallest step def get_step(name): return int( name.split('.')[0].split('-')[1]) min_step = min([get_step(name) for name in all_ckpts]) ckpt_to_delete = "{}-{}.tckpt".format(model_name, min_step) all_ckpts.remove(ckpt_to_delete) os.remove(str(Path(model_dir) / ckpt_to_delete)) all_ckpts_filename = _ordered_unique([Path(f).name for f in all_ckpts]) ckpt_info_dict['all_ckpts'][model_name] = all_ckpts_filename with open(ckpt_info_path, 'w') as f: f.write(json.dumps(ckpt_info_dict, indent=2)) def restore(ckpt_path, model, map_func=None, map_location='cpu'): if not Path(ckpt_path).is_file(): raise ValueError("checkpoint {} not exist.".format(ckpt_path)) state_dict = torch.load(ckpt_path, map_location=map_location) if map_func is not None: # map_func(state_dict) state_dict = map_func(state_dict) #modified @01/01/2020 model.load_state_dict(state_dict) print("Restoring parameters from {}".format(ckpt_path)) def _check_model_names(models): model_names = [] for model in models: if not hasattr(model, "name"): raise ValueError("models must have name attr") model_names.append(model.name) if len(model_names) != len(set(model_names)): raise ValueError("models must have unique name: {}".format( ", ".join(model_names))) def _get_name_to_model_map(models): if isinstance(models, dict): name_to_model = {name: m for name, m in models.items()} else: _check_model_names(models) name_to_model = {m.name: m for m in models} return name_to_model def try_restore_latest_checkpoints(model_dir, models, map_func=None, map_location='cpu'): name_to_model = _get_name_to_model_map(models) for name, model in name_to_model.items(): latest_ckpt = latest_checkpoint(model_dir, name) if latest_ckpt is not None: restore(latest_ckpt, model, map_func, map_location) def restore_latest_checkpoints(model_dir, models, map_func=None, map_location='cpu'): name_to_model = _get_name_to_model_map(models) for name, model in name_to_model.items(): latest_ckpt = latest_checkpoint(model_dir, name) if latest_ckpt is not None: restore(latest_ckpt, model, map_func,map_location) else: raise ValueError("model {}\'s ckpt isn't exist".format(name)) def restore_models(model_dir, models, global_step, map_func=None, map_location='cpu'): name_to_model = _get_name_to_model_map(models) for name, model in name_to_model.items(): ckpt_filename = "{}-{}.tckpt".format(name, global_step) ckpt_path = model_dir + "/" + ckpt_filename restore(ckpt_path, model, map_func, map_location) def save_models(model_dir, models, global_step, max_to_keep=15, keep_latest=True): with DelayedKeyboardInterrupt(): name_to_model = _get_name_to_model_map(models) for name, model in name_to_model.items(): save(model_dir, model, name, global_step, max_to_keep, keep_latest) def gpu_to_cpu(models): if len(models) == 1: models[0].cpu() return for state in models[1].state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cpu() models[0].cpu() def cpu_to_gpu(models): if len(models) == 1: models[0].cuda() return for state in models[1].state.values(): for k, v in state.items(): if torch.is_tensor(v): state[k] = v.cuda() models[0].cuda() def save_models_cpu(model_dir, models, global_step, max_to_keep=15, keep_latest=True): with DelayedKeyboardInterrupt(): name_to_model = _get_name_to_model_map(models) gpu_to_cpu(models) for name, model in name_to_model.items(): save(model_dir, model, name, global_step, max_to_keep, keep_latest) cpu_to_gpu(models) ================================================ FILE: torchplus/train/common.py ================================================ import datetime import os import shutil def create_folder(prefix, add_time=True, add_str=None, delete=False): additional_str = '' if delete is True: if os.path.exists(prefix): shutil.rmtree(prefix) os.makedirs(prefix) folder = prefix if add_time is True: # additional_str has a form such as '170903_220351' additional_str += datetime.datetime.now().strftime("%y%m%d_%H%M%S") if add_str is not None: folder += '/' + additional_str + '_' + add_str else: folder += '/' + additional_str if delete is True: if os.path.exists(folder): shutil.rmtree(folder) os.makedirs(folder) return folder ================================================ FILE: torchplus/train/fastai_optim.py ================================================ from collections import Iterable, defaultdict from copy import deepcopy from itertools import chain import torch from torch import nn from torch._utils import _unflatten_dense_tensors from torch.autograd import Variable from torch.nn.utils import parameters_to_vector bn_types = (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, ) def split_bn_bias(layer_groups): "Split the layers in `layer_groups` into batchnorm (`bn_types`) and non-batchnorm groups." split_groups = [] for l in layer_groups: l1, l2 = [], [] for c in l.children(): if isinstance(c, bn_types): l2.append(c) else: l1.append(c) split_groups += [nn.Sequential(*l1), nn.Sequential(*l2)] return split_groups def get_master(layer_groups, flat_master: bool = False): "Return two lists, one for the model parameters in FP16 and one for the master parameters in FP32." split_groups = split_bn_bias(layer_groups) model_params = [[ param for param in lg.parameters() if param.requires_grad ] for lg in split_groups] if flat_master: master_params = [] for lg in model_params: if len(lg) != 0: mp = parameters_to_vector([param.data.float() for param in lg]) mp = torch.nn.Parameter(mp, requires_grad=True) if mp.grad is None: mp.grad = mp.new(*mp.size()) master_params.append([mp]) else: master_params.append([]) return model_params, master_params else: master_params = [[param.clone().float().detach() for param in lg] for lg in model_params] for mp in master_params: for param in mp: param.requires_grad = True return model_params, master_params def model_g2master_g(model_params, master_params, flat_master: bool = False) -> None: "Copy the `model_params` gradients to `master_params` for the optimizer step." if flat_master: for model_group, master_group in zip(model_params, master_params): if len(master_group) != 0: master_group[0].grad.data.copy_( parameters_to_vector( [p.grad.data.float() for p in model_group])) else: for model_group, master_group in zip(model_params, master_params): for model, master in zip(model_group, master_group): if model.grad is not None: if master.grad is None: master.grad = master.data.new(*master.data.size()) master.grad.data.copy_(model.grad.data) else: master.grad = None def master2model(model_params, master_params, flat_master: bool = False) -> None: "Copy `master_params` to `model_params`." if flat_master: for model_group, master_group in zip(model_params, master_params): if len(model_group) != 0: for model, master in zip( model_group, _unflatten_dense_tensors(master_group[0].data, model_group)): model.data.copy_(master) else: for model_group, master_group in zip(model_params, master_params): for model, master in zip(model_group, master_group): model.data.copy_(master.data) def listify(p=None, q=None): "Make `p` listy and the same length as `q`." if p is None: p = [] elif isinstance(p, str): p = [p] elif not isinstance(p, Iterable): p = [p] n = q if type(q) == int else len(p) if q is None else len(q) if len(p) == 1: p = p * n assert len(p) == n, f'List len mismatch ({len(p)} vs {n})' return list(p) def trainable_params(m: nn.Module): "Return list of trainable params in `m`." res = filter(lambda p: p.requires_grad, m.parameters()) return res def is_tuple(x) -> bool: return isinstance(x, tuple) # copy from fastai. class OptimWrapper(torch.optim.Optimizer): "Basic wrapper around `opt` to simplify hyper-parameters changes." def __init__(self, opt, wd, true_wd: bool = False, bn_wd: bool = True): # super().__init__(opt.param_groups, dict()) self.opt, self.true_wd, self.bn_wd = opt, true_wd, bn_wd self.opt_keys = list(self.opt.param_groups[0].keys()) self.opt_keys.remove('params') self.read_defaults() self.wd = wd self.param_segs=[] @classmethod def create(cls, opt_func, lr, layer_groups, **kwargs): "Create an `optim.Optimizer` from `opt_func` with `lr`. Set lr on `layer_groups`." # param_segs=[] if len(layer_groups)==1: split_groups = split_bn_bias(layer_groups) #non-bn and bns else: split_groups = [] for lg in layer_groups: split_groups+=split_bn_bias(lg) # param_segs.append(len(lg )) # # buf = layer_groups[0] # for i in range(1,len(layer_groups)): # buf+=layer_groups[i] # layer_groups =buf # nn.Sequential(*buf) opt = opt_func([{ 'params': trainable_params(l), 'lr': 0 } for l in split_groups]) # import pdb # pdb.set_trace() opt = cls(opt, **kwargs) opt.lr, opt.opt_func = listify(lr, layer_groups), opt_func # opt.param_segs=param_segs return opt def new(self, layer_groups): "Create a new `OptimWrapper` from `self` with another `layer_groups` but the same hyper-parameters." opt_func = getattr(self, 'opt_func', self.opt.__class__) split_groups = split_bn_bias(layer_groups) opt = opt_func([{ 'params': trainable_params(l), 'lr': 0 } for l in split_groups]) return self.create( opt_func, self.lr, layer_groups, wd=self.wd, true_wd=self.true_wd, bn_wd=self.bn_wd) def __repr__(self) -> str: return f'OptimWrapper over {repr(self.opt)}.\nTrue weight decay: {self.true_wd}' # Pytorch optimizer methods def step(self) -> None: "Set weight decay and step optimizer." # weight decay outside of optimizer step (AdamW) if self.true_wd: for lr, wd, pg1, pg2 in zip(self._lr, self._wd, self.opt.param_groups[::2], self.opt.param_groups[1::2]): for p in pg1['params']: p.data.mul_(1 - wd * lr) if self.bn_wd: for p in pg2['params']: p.data.mul_(1 - wd * lr) self.set_val('weight_decay', listify(0, self._wd)) self.opt.step() def zero_grad(self) -> None: "Clear optimizer gradients." self.opt.zero_grad() # Passthrough to the inner opt. def __getstate__(self): return self.opt.__getstate__() def __setstate__(self, state): return self.opt.__setstate__(state) def state_dict(self): return self.opt.state_dict() def load_state_dict(self, state_dict): return self.opt.load_state_dict(state_dict) def add_param_group(self, param_group): return self.opt.add_param_group(param_group) def clear(self): "Reset the state of the inner optimizer." sd = self.state_dict() sd['state'] = {} self.load_state_dict(sd) @property def param_groups(self): return self.opt.param_groups @property def defaults(self): return self.opt.defaults @property def state(self): return self.opt.state # Hyperparameters as properties @property def lr(self) -> float: return self._lr[-1] @lr.setter def lr(self, val: float) -> None: self._lr = self.set_val('lr', listify(val, self._lr)) @property def mom(self) -> float: return self._mom[-1] @mom.setter def mom(self, val: float) -> None: if 'momentum' in self.opt_keys: self.set_val('momentum', listify(val, self._mom)) elif 'betas' in self.opt_keys: self.set_val('betas', (listify(val, self._mom), self._beta)) self._mom = listify(val, self._mom) @property def beta(self) -> float: return None if self._beta is None else self._beta[-1] @beta.setter def beta(self, val: float) -> None: "Set beta (or alpha as makes sense for given optimizer)." if val is None: return if 'betas' in self.opt_keys: self.set_val('betas', (self._mom, listify(val, self._beta))) elif 'alpha' in self.opt_keys: self.set_val('alpha', listify(val, self._beta)) self._beta = listify(val, self._beta) @property def wd(self) -> float: return self._wd[-1] @wd.setter def wd(self, val: float) -> None: "Set weight decay." if not self.true_wd: self.set_val( 'weight_decay', listify(val, self._wd), bn_groups=self.bn_wd) self._wd = listify(val, self._wd) # Helper functions def read_defaults(self) -> None: "Read the values inside the optimizer for the hyper-parameters." self._beta = None if 'lr' in self.opt_keys: self._lr = self.read_val('lr') if 'momentum' in self.opt_keys: self._mom = self.read_val('momentum') if 'alpha' in self.opt_keys: self._beta = self.read_val('alpha') if 'betas' in self.opt_keys: self._mom, self._beta = self.read_val('betas') if 'weight_decay' in self.opt_keys: self._wd = self.read_val('weight_decay') def set_val(self, key: str, val, bn_groups: bool = True): "Set `val` inside the optimizer dictionary at `key`." if is_tuple(val): val = [(v1, v2) for v1, v2 in zip(*val)] for v, pg1, pg2 in zip(val, self.opt.param_groups[::2], self.opt.param_groups[1::2]): pg1[key] = v if bn_groups: pg2[key] = v return val def read_val(self, key: str): "Read a hyperparameter `key` in the optimizer dictionary." val = [pg[key] for pg in self.opt.param_groups[::2]] if is_tuple(val[0]): val = [o[0] for o in val], [o[1] for o in val] return val class FastAIMixedOptim(OptimWrapper): @classmethod def create(cls, opt_func, lr, layer_groups, model, flat_master=False, loss_scale=512.0, **kwargs): "Create an `optim.Optimizer` from `opt_func` with `lr`. Set lr on `layer_groups`." opt = OptimWrapper.create(opt_func, lr, layer_groups, **kwargs) opt.model_params, opt.master_params = get_master( layer_groups, flat_master) opt.flat_master = flat_master opt.loss_scale = loss_scale opt.model = model # Changes the optimizer so that the optimization step is done in FP32. # opt = self.learn.opt mom, wd, beta = opt.mom, opt.wd, opt.beta lrs = [lr for lr in opt._lr for _ in range(2)] opt_params = [{ 'params': mp, 'lr': lr } for mp, lr in zip(opt.master_params, lrs)] opt.opt = opt_func(opt_params) opt.mom, opt.wd, opt.beta = mom, wd, beta return opt def step(self): model_g2master_g(self.model_params, self.master_params, self.flat_master) for group in self.master_params: for param in group: param.grad.div_(self.loss_scale) super(FastAIMixedOptim, self).step() self.model.zero_grad() # Update the params from master to model. master2model(self.model_params, self.master_params, self.flat_master) ================================================ FILE: torchplus/train/learning_schedules.py ================================================ """PyTorch edition of TensorFlow learning schedule in tensorflow object detection API. """ import numpy as np from torch.optim.optimizer import Optimizer class _LRSchedulerStep(object): def __init__(self, optimizer, last_step=-1): if not isinstance(optimizer, Optimizer): raise TypeError('{} is not an Optimizer'.format( type(optimizer).__name__)) self.optimizer = optimizer if last_step == -1: for group in optimizer.param_groups: group.setdefault('initial_lr', group['lr']) else: for i, group in enumerate(optimizer.param_groups): if 'initial_lr' not in group: raise KeyError( "param 'initial_lr' is not specified " "in param_groups[{}] when resuming an optimizer". format(i)) self.base_lrs = list( map(lambda group: group['initial_lr'], optimizer.param_groups)) self.step(last_step + 1) self.last_step = last_step """ def get_lr(self): raise NotImplementedError """ def get_lr(self): ret = [self._get_lr_per_group(base_lr) for base_lr in self.base_lrs] return ret def _get_lr_per_group(self, base_lr): raise NotImplementedError def step(self, step=None): if step is None: step = self.last_step + 1 self.last_step = step for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()): param_group['lr'] = lr class Constant(_LRSchedulerStep): def __init__(self, optimizer, last_step=-1): super().__init__(optimizer, last_step) def _get_lr_per_group(self, base_lr): return base_lr class ManualStepping(_LRSchedulerStep): """Pytorch edition of manual_stepping in tensorflow. DON'T SUPPORT PARAM GROUPS. """ def __init__(self, optimizer, boundaries, rates, last_step=-1): self._boundaries = boundaries self._num_boundaries = len(boundaries) self._learning_rates = rates if any([b < 0 for b in boundaries]) or any( [not isinstance(b, int) for b in boundaries]): raise ValueError('boundaries must be a list of positive integers') if any( [bnext <= b for bnext, b in zip(boundaries[1:], boundaries[:-1])]): raise ValueError( 'Entries in boundaries must be strictly increasing.') if any([not isinstance(r, float) for r in rates]): raise ValueError('Learning rates must be floats') if len(rates) != len(boundaries) + 1: raise ValueError('Number of provided learning rates must exceed ' 'number of boundary points by exactly 1.') super().__init__(optimizer, last_step) def _get_lr_per_group(self, base_lr): step = self.last_step ret = None for i, bound in enumerate(self._boundaries): if step > bound: ret = self._learning_rates[i + 1] if ret is not None: return ret return self._learning_rates[0] class ExponentialDecayWithBurnin(_LRSchedulerStep): """Pytorch edition of manual_stepping in tensorflow. """ def __init__(self, optimizer, learning_rate_decay_steps, learning_rate_decay_factor, burnin_learning_rate, burnin_steps, last_step=-1): self._decay_steps = learning_rate_decay_steps self._decay_factor = learning_rate_decay_factor self._burnin_learning_rate = burnin_learning_rate self._burnin_steps = burnin_steps super().__init__(optimizer, last_step) def _get_lr_per_group(self, base_lr): if self._burnin_learning_rate == 0: burnin_learning_rate = base_lr step = self.last_step post_burnin_learning_rate = (base_lr * self._decay_factor ^ (step // self._decay_steps)) if step < self._burnin_steps: return burnin_learning_rate else: return post_burnin_learning_rate class ExponentialDecay(_LRSchedulerStep): def __init__(self, optimizer, learning_rate_decay_steps, learning_rate_decay_factor, staircase=True, last_step=-1): self._decay_steps = learning_rate_decay_steps self._decay_factor = learning_rate_decay_factor self._staircase = staircase super().__init__(optimizer, last_step) def _get_lr_per_group(self, base_lr): step = self.last_step if self._staircase: post_burnin_learning_rate = base_lr * pow(self._decay_factor, (step // self._decay_steps)) else: post_burnin_learning_rate = base_lr * pow(self._decay_factor, (step / self._decay_steps)) return post_burnin_learning_rate class CosineDecayWithWarmup(_LRSchedulerStep): def __init__(self, optimizer, total_steps, warmup_learning_rate, warmup_steps, last_step=-1): if total_steps < warmup_steps: raise ValueError('total_steps must be larger or equal to ' 'warmup_steps.') self._total_steps = total_steps self._warmup_learning_rate = warmup_learning_rate self._warmup_steps = warmup_steps super().__init__(optimizer, last_step) def _get_lr_per_group(self, base_lr): if base_lr < self._warmup_learning_rate: raise ValueError('learning_rate_base must be larger ' 'or equal to warmup_learning_rate.') step = self.last_step learning_rate = 0.5 * base_lr * ( 1 + np.cos(np.pi * (float(step) - self._warmup_steps ) / float(self._total_steps - self._warmup_steps))) if self._warmup_steps > 0: slope = (base_lr - self._warmup_learning_rate) / self._warmup_steps pre_cosine_learning_rate = slope * float( step) + self._warmup_learning_rate if step < self._warmup_steps: return pre_cosine_learning_rate else: return learning_rate class OneCycle(_LRSchedulerStep): def __init__(self, optimizer, total_steps, lr_max, moms, div_factor=25, pct_start=0.3, last_step=-1): if total_steps < warmup_steps: raise ValueError('total_steps must be larger or equal to ' 'warmup_steps.') self._total_steps = total_steps self._lr_max = lr_max self._moms = moms self._warmup_learning_rate = warmup_learning_rate self._warmup_steps = warmup_steps super().__init__(optimizer, last_step) def _get_lr_per_group(self, base_lr): if base_lr < self._warmup_learning_rate: raise ValueError('learning_rate_base must be larger ' 'or equal to warmup_learning_rate.') step = self.last_step learning_rate = 0.5 * base_lr * ( 1 + np.cos(np.pi * (float(step) - self._warmup_steps ) / float(self._total_steps - self._warmup_steps))) if self._warmup_steps > 0: slope = (base_lr - self._warmup_learning_rate) / self._warmup_steps pre_cosine_learning_rate = slope * float( step) + self._warmup_learning_rate if step < self._warmup_steps: return pre_cosine_learning_rate else: return learning_rate ================================================ FILE: torchplus/train/learning_schedules_fastai.py ================================================ import numpy as np import math from functools import partial import torch class LRSchedulerStep(object): def __init__(self, fai_optimizer, total_step, lr_phases, mom_phases): self.optimizer = fai_optimizer self.total_step = total_step self.lr_phases = [] for i, (start, lambda_func) in enumerate(lr_phases): if len(self.lr_phases) != 0: assert self.lr_phases[-1][0] < int(start * total_step) if isinstance(lambda_func, str): lambda_func = eval(lambda_func) if i < len(lr_phases) - 1: self.lr_phases.append((int(start * total_step), int(lr_phases[i + 1][0] * total_step), lambda_func)) else: self.lr_phases.append((int(start * total_step), total_step, lambda_func)) assert self.lr_phases[0][0] == 0 self.mom_phases = [] for i, (start, lambda_func) in enumerate(mom_phases): if len(self.mom_phases) != 0: assert self.mom_phases[-1][0] < int(start * total_step) if isinstance(lambda_func, str): lambda_func = eval(lambda_func) if i < len(mom_phases) - 1: self.mom_phases.append((int(start * total_step), int(mom_phases[i + 1][0] * total_step), lambda_func)) else: self.mom_phases.append((int(start * total_step), total_step, lambda_func)) if len(mom_phases) > 0: assert self.mom_phases[0][0] == 0 def step(self, step): lrs = [] moms = [] for start, end, func in self.lr_phases: if step >= start: # func: lr decay function lrs.append(func((step - start) / (end - start))) if len(lrs) > 0: self.optimizer.lr = lrs[-1] # import pdb # pdb.set_trace() # print(self.optimizer.lr,'!!!!') for start, end, func in self.mom_phases: if step >= start: moms.append(func((step - start) / (end - start))) self.optimizer.mom = func((step - start) / (end - start)) if len(moms) > 0: self.optimizer.mom = moms[-1] @property def learning_rate(self): return self.optimizer.lr def annealing_cos(start, end, pct): # print(pct, start, end) "Cosine anneal from `start` to `end` as pct goes from 0.0 to 1.0." cos_out = np.cos(np.pi * pct) + 1 return end + (start - end) / 2 * cos_out class OneCycle(LRSchedulerStep): def __init__(self, fai_optimizer, total_step, lr_max, moms, div_factor, pct_start): self.lr_max = lr_max self.moms = moms self.div_factor = div_factor self.pct_start = pct_start a1 = int(total_step * self.pct_start) a2 = total_step - a1 low_lr = self.lr_max / self.div_factor lr_phases = ((0, partial(annealing_cos, low_lr, self.lr_max)), (self.pct_start, partial(annealing_cos, self.lr_max, low_lr / 1e4)) ) mom_phases = ((0, partial(annealing_cos, *self.moms)), (self.pct_start, partial(annealing_cos, *self.moms[::-1]))) fai_optimizer.lr, fai_optimizer.mom = low_lr, self.moms[0] super().__init__(fai_optimizer, total_step, lr_phases, mom_phases) class ExponentialDecayWarmup(LRSchedulerStep): def __init__(self, fai_optimizer, total_step, initial_learning_rate, decay_length, decay_factor, div_factor=1, pct_start=0, staircase=True): """ Args: decay_length: must in (0, 1) """ assert decay_length > 0 assert decay_length < 1 self._decay_steps_unified = decay_length self._decay_factor = decay_factor self._staircase = staircase self.div_factor = div_factor self.pct_start = pct_start step = pct_start*total_step # 0 stage = 1 lr_phases = [ (0, partial(annealing_cos, initial_learning_rate/div_factor, initial_learning_rate))] if staircase: while step <= total_step: func = lambda p, _d=initial_learning_rate * stage: _d lr_phases.append((step / total_step, func)) stage *= decay_factor step += int(decay_length * total_step) else: def func(p): return pow(decay_factor, (p / decay_length)) lr_phases.append((pct_start, func)) # lr_phases.append((step/total_step, func)) super().__init__(fai_optimizer, total_step, lr_phases, []) class ExponentialDecay(LRSchedulerStep): def __init__(self, fai_optimizer, total_step, initial_learning_rate, decay_length, decay_factor, staircase=True): """ Args: decay_length: must in (0, 1) """ assert decay_length > 0 assert decay_length < 1 self._decay_steps_unified = decay_length self._decay_factor = decay_factor self._staircase = staircase step = 0 stage = 1 lr_phases = [] if staircase: while step <= total_step: func = lambda p, _d=initial_learning_rate * stage: _d lr_phases.append((step / total_step, func)) stage *= decay_factor step += int(decay_length * total_step) else: def func(p): return pow(decay_factor, (p / decay_length)) lr_phases.append((0, func)) super().__init__(fai_optimizer, total_step, lr_phases, []) class ManualStepping(LRSchedulerStep): def __init__(self, fai_optimizer, total_step, boundaries, rates): assert all([b > 0 and b < 1 for b in boundaries]) assert len(boundaries) + 1 == len(rates) boundaries.insert(0, 0.0) lr_phases = [] for start, rate in zip(boundaries, rates): def func(p, _d=rate): return _d lr_phases.append((start, func)) super().__init__(fai_optimizer, total_step, lr_phases, []) class FakeOptim: def __init__(self): self.lr = 0 self.mom = 0 if __name__ == "__main__": import matplotlib.pyplot as plt opt = FakeOptim() # 3e-3, wd=0.4, div_factor=10 # schd = OneCycle(opt, 100, 3e-3, (0.95, 0.85), 10.0, 0.4) schd = ExponentialDecay(opt, 100, 3e-4, 0.1, 0.8, staircase=True) schd = ManualStepping(opt, 100, [0.8, 0.9], [0.001, 0.0001, 0.00005]) lrs = [] moms = [] for i in range(100): schd.step(i) lrs.append(opt.lr) moms.append(opt.mom) plt.plot(lrs) # plt.plot(moms) # plt.show() # plt.plot(moms) plt.show() ================================================ FILE: torchplus/train/optim.py ================================================ from collections import defaultdict, Iterable import torch from copy import deepcopy from itertools import chain from torch.autograd import Variable required = object() def param_fp32_copy(params): param_copy = [ param.clone().type(torch.cuda.FloatTensor).detach() for param in params ] for param in param_copy: param.requires_grad = True return param_copy def set_grad(params, params_with_grad, scale=1.0): for param, param_w_grad in zip(params, params_with_grad): if param.grad is None: param.grad = torch.nn.Parameter( param.data.new().resize_(*param.data.size())) grad = param_w_grad.grad.data if scale is not None: grad /= scale if torch.isnan(grad).any() or torch.isinf(grad).any(): return True # invalid grad param.grad.data.copy_(grad) return False class MixedPrecisionWrapper(object): """mixed precision optimizer wrapper. Arguments: optimizer (torch.optim.Optimizer): an instance of :class:`torch.optim.Optimizer` scale: (float): a scalar for grad scale. auto_scale: (bool): whether enable auto scale. The algorihm of auto scale is discribled in http://docs.nvidia.com/deeplearning/sdk/mixed-precision-training/index.html """ def __init__(self, optimizer, scale=None, auto_scale=True, inc_factor=2.0, dec_factor=0.5, num_iters_be_stable=500): # if not isinstance(optimizer, torch.optim.Optimizer): # raise ValueError("must provide a torch.optim.Optimizer") self.optimizer = optimizer if hasattr(self.optimizer, 'name'): self.name = self.optimizer.name # for ckpt system param_groups_copy = [] for i, group in enumerate(optimizer.param_groups): group_copy = {n: v for n, v in group.items() if n != 'params'} group_copy['params'] = param_fp32_copy(group['params']) param_groups_copy.append(group_copy) # switch param_groups, may be dangerous self.param_groups = optimizer.param_groups optimizer.param_groups = param_groups_copy self.grad_scale = scale self.auto_scale = auto_scale self.inc_factor = inc_factor self.dec_factor = dec_factor self.stable_iter_count = 0 self.num_iters_be_stable = num_iters_be_stable def __getstate__(self): return self.optimizer.__getstate__() def __setstate__(self, state): return self.optimizer.__setstate__(state) def __repr__(self): return self.optimizer.__repr__() def state_dict(self): return self.optimizer.state_dict() def load_state_dict(self, state_dict): return self.optimizer.load_state_dict(state_dict) def zero_grad(self): return self.optimizer.zero_grad() def step(self, closure=None): for g, g_copy in zip(self.param_groups, self.optimizer.param_groups): invalid = set_grad(g_copy['params'], g['params'], self.grad_scale) if invalid: if self.grad_scale is None or self.auto_scale is False: raise ValueError("nan/inf detected but auto_scale disabled.") self.grad_scale *= self.dec_factor print('scale decay to {}'.format(self.grad_scale)) return if self.auto_scale is True: self.stable_iter_count += 1 if self.stable_iter_count > self.num_iters_be_stable: if self.grad_scale is not None: self.grad_scale *= self.inc_factor self.stable_iter_count = 0 if closure is None: self.optimizer.step() else: self.optimizer.step(closure) for g, g_copy in zip(self.param_groups, self.optimizer.param_groups): for p_copy, p in zip(g_copy['params'], g['params']): p.data.copy_(p_copy.data) ================================================ FILE: utils/__init__.py ================================================ ================================================ FILE: utils/config_io.py ================================================ from easydict import EasyDict as edict import os import shutil import yaml def mkdir_if_not_exists(path): """Make a directory if it does not exist. Args: path: directory to create """ if not os.path.exists(path): os.makedirs(path) def read_yaml(filename): """Load yaml file as a dictionary item Args: filename (str): .yaml file path Returns: cfg (dict): configuration """ if filename is not None: with open(filename, 'r') as f: return yaml.load(f, Loader=yaml.FullLoader) else: return {} def copy_file(src_file, tgt_file): """Copy a file Args: src_file (str): source file tgt_file (str): target file """ shutil.copyfile(src_file, tgt_file) def update_dict(dict1, dict2, intersection=False): """update dict1 according to dict2 Args: dict1 (dict): reference dictionary dict2 (dict): new dictionary return dict1 (dict): updated reference dictionary """ for item in dict2: # if dict1.get(item, -1) != -1: if item in dict1: if isinstance(dict1[item], dict): dict1[item] = update_dict(dict1[item], dict2[item], intersection) else: dict1[item] = dict2[item] else: if not intersection: dict1[item] = dict2[item] else: raise ValueError(f"Key '{item}' is in the second dict but not in the first dict!") #inverse check for item in dict1: if item not in dict2: print(f"Warning: key {item} is not given and will use the default values") return dict1 def merge_cfg(cfg_files, intersection=False): """merge default configuration and custom configuration Args: cfg_files (str): configuration file paths [default, custom] Returns: cfg (edict): merged EasyDict """ edict_items = [] # cfg = {} cfg = read_yaml(cfg_files[0]) # for f in cfg_files: for f in cfg_files[1:]: # if f is not None: if os.path.exists(f): cfg = update_dict(cfg, read_yaml(f), intersection) else: raise ValueError(f"File {f} does not exist." ) return edict(cfg) def write_cfg(default, custom, f, level_cnt=0): """write configuration to file Args: default (dict): default configuration dictionary custom (dict): custom configuration dictionary file (TextIOWrapper) """ offset_len = 100 for item in default: if isinstance(default[item], dict): if custom.get(item, -1) == -1: custom[item] = {} line = " "*level_cnt + item + ": " offset = offset_len - len(line) line += " "*offset + " # |" f.writelines(line + "\n") write_cfg(default[item], custom[item], f, level_cnt+1) else: line = " " * level_cnt + item + ": " if custom.get(item, -1) == -1: if default[item] is not None: line += str(default[item]) offset = offset_len - len(line) line += " "*offset + " # | " else: if custom[item] is not None: line += str(custom[item]) offset = offset_len - len(line) line += " "*offset + " # | " if custom[item] != default[item]: line += str(default[item]) f.writelines(line) f.writelines("\n") def save_cfg(cfg_files, file_path): """Save configuration file Args: cfg_files (str): configuration file paths [default, custom] Returns: cfg (edict): merged EasyDict """ # read configurations default = read_yaml(cfg_files[0]) custom = read_yaml(cfg_files[1]) # create file to be written f = open(file_path, 'w') # write header line line = "# " + "-"*20 + " Setup " + "-"*74 line += "|" + "-"*10 + " Default " + "-"*20 + "\n" f.writelines(line) # write configurations write_cfg(default, custom, f) f.close() ================================================ FILE: utils/distributed_utils.py ================================================ from torch.utils.data import Sampler import math import os import pdb import torch import torch.distributed as dist from torch.nn import Module import multiprocessing as mp import numpy as np class ParallelWrapper(Module): def __init__(self, net, parallel_mode='none'): super(ParallelWrapper, self).__init__() assert parallel_mode in ['dist', 'data_parallel', 'none'] self.parallel_mode = parallel_mode if parallel_mode == 'none': self.net = net self.module = net elif parallel_mode == 'dist': self.net = DistModule(net) self.module = self.net.module else: self.net = torch.nn.DataParallel(net) self.module = self.net.module def forward(self, *inputs, **kwargs): return self.net.forward(*inputs, **kwargs) def train(self, mode=True): super(ParallelWrapper, self).train(mode) self.net.train(mode) class DistModule(Module): def __init__(self, module): super(DistModule, self).__init__() self.module = module broadcast_params(self.module) def forward(self, *inputs, **kwargs): return self.module(*inputs, **kwargs) def train(self, mode=True): super(DistModule, self).train(mode) self.module.train(mode) def gradients_multiply(model, multiplier=1): for param in model.parameters(): if param.requires_grad and param.grad is not None: param.grad.data *= multiplier def average_gradients(model): """ average gradients """ # for n, param, in model.named_parameters(): # if 'dynamic_sigma' in n: # print(param.requires_grad, param.grad.data, param.data) # if param.requires_grad and param.grad is not None: # dist.all_reduce(param.grad.data) for param in model.parameters(): if param.requires_grad and param.grad is not None: dist.all_reduce(param.grad.data) # param.grad.data *= multiplier def broadcast_params(model): """ broadcast model parameters """ for p in model.state_dict().values(): dist.broadcast(p, 0) def dist_init(port): # os.environ["OMP_NUM_THREADS"] = "1" if mp.get_start_method(allow_none=True) != 'spawn': mp.set_start_method('spawn', force=True) proc_id = int(os.environ['SLURM_PROCID']) ntasks = int(os.environ['SLURM_NTASKS']) node_list = os.environ['SLURM_NODELIST'] num_gpus = torch.cuda.device_count() torch.cuda.set_device(proc_id % num_gpus) if '[' in node_list: beg = node_list.find('[') pos1 = node_list.find('-', beg) if pos1 < 0: pos1 = 1000 pos2 = node_list.find(',', beg) if pos2 < 0: pos2 = 1000 node_list = node_list[:min(pos1, pos2)].replace('[', '') #added by dy # addr = node_list[8:].replace('-', '.') addr = node_list.replace('-', '.') if ',' in addr: addr = addr.split(',')[0] addr = addr[8:] # addr = ','.join([ad[8:] for ad in addrs ]) print(addr) os.environ['MASTER_PORT'] = port os.environ['MASTER_ADDR'] = addr os.environ['WORLD_SIZE'] = str(ntasks) os.environ['RANK'] = str(proc_id) dist.init_process_group(backend='nccl') rank = dist.get_rank() world_size = dist.get_world_size() return rank, world_size # from . import Sampler class DistributedSequatialSampler(Sampler): """Sampler that restricts data loading to a subset of the dataset. It is especially useful in conjunction with :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each process can pass a DistributedSampler instance as a DataLoader sampler, and load a subset of the original dataset that is exclusive to it. .. note:: Dataset is assumed to be of constant size. Arguments: dataset: Dataset used for sampling. num_replicas (optional): Number of processes participating in distributed training. rank (optional): Rank of the current process within num_replicas. """ def __init__(self, dataset, num_replicas=None, rank=None): if num_replicas is None: if not dist.is_available(): raise RuntimeError( "Requires distributed package to be available") num_replicas = dist.get_world_size() if rank is None: if not dist.is_available(): raise RuntimeError( "Requires distributed package to be available") rank = dist.get_rank() self.dataset = dataset self.num_replicas = num_replicas self.rank = rank self.epoch = 0 self.num_samples = int( math.ceil(len(self.dataset) * 1.0 / self.num_replicas)) self.total_size = self.num_samples * self.num_replicas def __iter__(self): # deterministically shuffle based on epoch # g = torch.Generator() # g.manual_seed(self.epoch) # indices = torch.randperm(len(self.dataset), generator=g).tolist() indices = list(range(len(self.dataset))) # add extra samples to make it evenly divisible indices += indices[:(self.total_size - len(indices))] assert len(indices) == self.total_size # subsample indices = indices[self.rank:self.total_size:self.num_replicas] assert len(indices) == self.num_samples return iter(indices) def __len__(self): return self.num_samples def set_epoch(self, epoch): self.epoch = epoch class DistributedGivenIterationSampler(Sampler): def __init__(self, dataset, total_iter, batch_size, world_size=None, rank=None, last_iter=-1): if world_size is None: world_size = dist.get_world_size() # link.get_world_size() if rank is None: rank = dist.get_rank() # link.get_rank() assert rank < world_size self.dataset = dataset self.total_iter = total_iter self.batch_size = batch_size self.world_size = world_size self.rank = rank self.last_iter = last_iter self.total_size = self.total_iter*self.batch_size self.indices = self.gen_new_list() self.call = 0 def __iter__(self): if self.call == 0: self.call = 1 return iter(self.indices[(self.last_iter+1)*self.batch_size:]) else: return iter(self.indices[(self.last_iter+1)*self.batch_size:]) raise RuntimeError( "this sampler is not designed to be called more than once!!") def gen_new_list(self): # each process shuffle all list with same seed, and pick one piece according to rank # np.random.seed(0) np.random.seed(7) all_size = self.total_size * self.world_size indices = np.arange(len(self.dataset)) indices = indices[:all_size] num_repeat = (all_size-1) // indices.shape[0] + 1 indices = np.tile(indices, num_repeat) indices = indices[:all_size] np.random.shuffle(indices) beg = self.total_size * self.rank indices = indices[beg:beg+self.total_size] assert len(indices) == self.total_size return indices def __len__(self): # note here we do not take last iter into consideration, since __len__ # should only be used for displaying, the correct remaining size is # handled by dataloader # return self.total_size - (self.last_iter+1)*self.batch_size return self.total_size def set_epoch(self, epoch): pass class DistributedGivenIterationSamplerEpoch(Sampler): def __init__(self, dataset, total_iter, batch_size, world_size=None, rank=None, last_iter=-1, review_cycle=-1): if world_size is None: world_size = dist.get_world_size() # link.get_world_size() if rank is None: rank = dist.get_rank() # link.get_rank() assert rank < world_size self.dataset = dataset self.total_iter = total_iter self.batch_size = batch_size self.world_size = world_size self.rank = rank self.last_iter = last_iter self.total_size = self.total_iter*self.batch_size self.review_cycle = review_cycle # in unit of epoch self.indices = self.gen_new_list() self.call = 0 def __iter__(self): if self.call == 0: self.call = 1 return iter(self.indices[(self.last_iter+1)*self.batch_size:]) else: return iter(self.indices[(self.last_iter+1)*self.batch_size:]) # raise RuntimeError( # "this sampler is not designed to be called more than once!!") def gen_new_list(self): # each process shuffle all list with same seed, and pick one piece according to rank # np.random.seed(0) np.random.seed(7) all_size = self.total_size * self.world_size indices = np.arange(len(self.dataset)) indices = indices[:all_size] num_repeat = (all_size-1) // indices.shape[0] + 1 # indices = np.tile(indices, num_repeat) indices = np.concatenate([np.random.permutation(indices) for i in range(num_repeat) ] ) seeds = np.arange(indices.size).reshape(indices.shape) if self.review_cycle>0: assert (1/self.review_cycle)%1==0 # review_freq = 1/1/self.review_cycle h = len(indices) // int(self.review_cycle*len(self.dataset)) # print(indices.shape,'???!!!', indices[:h*int(self.review_cycle*len(self.dataset) ) ].shape) indices = indices[:h*int(self.review_cycle*len(self.dataset) ) ].reshape([h,-1] ) seeds = seeds[:h*int(self.review_cycle*len(self.dataset) ) ].reshape([h,-1] ) indices = np.concatenate([indices, indices], axis=1).reshape(-1) seeds = np.concatenate([seeds, seeds], axis=1).reshape(-1) indices = indices[:all_size] seeds = seeds[:all_size] # np.random.shuffle(indices) beg = self.total_size * self.rank indices = indices[beg:beg+self.total_size] seeds = seeds[beg:beg+self.total_size] assert len(indices) == self.total_size # return indices return list(zip(list(indices), list(seeds))) def __len__(self): # note here we do not take last iter into consideration, since __len__ # should only be used for displaying, the correct remaining size is # handled by dataloader # return self.total_size - (self.last_iter+1)*self.batch_size return self.total_size def set_epoch(self, epoch): pass ================================================ FILE: utils/eval_metric.py ================================================ import os import numpy as np from plyfile import PlyData # from utils import icp_utils from data.linemod import linemod_config from thirdparty.vsd import inout from thirdparty.nn import nn_utils from utils.img_utils import read_depth from thirdparty.kpconv.lib.utils import square_distance from utils.geometric import rotation_angle from utils.visualize import * # from thirdparty.fps import fps_utils import torch import open3d as o3d from transforms3d.quaternions import mat2quat, quat2mat, qmult # import data.bop_ycb.ycb_config as ycb_config #import bop_ycb_class2idx, model_info def get_ply_model(model_path, scale=1): ply = PlyData.read(model_path) data = ply.elements[0].data x = data['x']*scale y = data['y']*scale z = data['z']*scale model = np.stack([x, y, z], axis=-1) return model def project(xyz, K, RT): """ xyz: [N, 3] K: [3, 3] RT: [3, 4] """ xyz = np.dot(xyz, RT[:, :3].T) + RT[:, 3:].T xyz = np.dot(xyz, K.T) xy = xyz[:, :2] / xyz[:, 2:] return xy def find_nearest_point_idx(ref_pts, que_pts): assert(ref_pts.shape[1] == que_pts.shape[1] and 1 < que_pts.shape[1] <= 3) pn1 = ref_pts.shape[0] pn2 = que_pts.shape[0] dim = ref_pts.shape[1] ref_pts = np.ascontiguousarray(ref_pts[None, :, :], np.float32) que_pts = np.ascontiguousarray(que_pts[None, :, :], np.float32) idxs = np.zeros([1, pn2], np.int32) ref_pts_ptr = ffi.cast('float *', ref_pts.ctypes.data) que_pts_ptr = ffi.cast('float *', que_pts.ctypes.data) idxs_ptr = ffi.cast('int *', idxs.ctypes.data) lib.findNearestPointIdxLauncher( ref_pts_ptr, que_pts_ptr, idxs_ptr, 1, pn1, pn2, dim, 0) return idxs[0] class LineMODEvaluator: def __init__(self, class_name, result_dir, icp_refine=False): # self.result_dir = os.path.join(result_dir, cfg.test.dataset) self.result_dir = os.path.join(result_dir, "LINEMOD") os.system('mkdir -p {}'.format(self.result_dir)) # data_root = args['data_root'] # cls = cfg.cls_type self.class_name = class_name self.icp_refine = icp_refine # model_path = os.path.join(os.path.dirname(os.path.abspath( # __file__)), '../EXPDATA/LINEMOD', class_name, class_name + '.ply') model_path = os.path.join(os.path.dirname(os.path.abspath( __file__)), '../EXPDATA/LM6d_converted/models', class_name, class_name + '.ply') # self.model = pvnet_data_utils.get_ply_model(model_path) self.model = get_ply_model(model_path) self.diameter = linemod_config.diameters[class_name] / 100 self.proj2d = [] self.add = [] self.adds = [] #force sym self.add2 = [] self.add5 = [] self.cmd5 = [] self.icp_proj2d = [] self.icp_add = [] self.icp_cmd5 = [] self.mask_ap = [] self.pose_preds=[] self.height = 480 self.width = 640 model = inout.load_ply(model_path) model['pts'] = model['pts'] * 1000 self.icp_refiner = icp_utils.ICPRefiner( model, (self.width, self.height)) if icp_refine else None def projection_2d(self, pose_pred, pose_targets, K, icp=False, threshold=5): model_2d_pred = project(self.model, K, pose_pred) model_2d_targets = project(self.model, K, pose_targets) proj_mean_diff = np.mean(np.linalg.norm( model_2d_pred - model_2d_targets, axis=-1)) if icp: self.icp_proj2d.append(proj_mean_diff < threshold) else: self.proj2d.append(proj_mean_diff < threshold) def projection_2d_sym(self, pose_pred, pose_targets, K, threshold=5): model_2d_pred = project(self.model, K, pose_pred) model_2d_targets = project(self.model, K, pose_targets) proj_mean_diff=np.mean(find_nearest_point_distance(model_2d_pred,model_2d_targets)) self.proj_mean_diffs.append(proj_mean_diff) self.projection_2d_recorder.append(proj_mean_diff < threshold) def add2_metric(self, pose_pred, pose_targets, icp=False, syn=False, percentage=0.02): diameter = self.diameter * percentage model_pred = np.dot(self.model, pose_pred[:, :3].T) + pose_pred[:, 3] model_targets = np.dot( self.model, pose_targets[:, :3].T) + pose_targets[:, 3] if syn: idxs = nn_utils.find_nearest_point_idx(model_pred, model_targets) # idxs = find_nearest_point_idx(model_pred, model_targets) mean_dist = np.mean(np.linalg.norm( model_pred[idxs] - model_targets, 2, 1)) else: mean_dist = np.mean(np.linalg.norm( model_pred - model_targets, axis=-1)) if icp: self.icp_add.append(mean_dist < diameter) else: self.add2.append(mean_dist < diameter) def add5_metric(self, pose_pred, pose_targets, icp=False, syn=False, percentage=0.05): diameter = self.diameter * percentage model_pred = np.dot(self.model, pose_pred[:, :3].T) + pose_pred[:, 3] model_targets = np.dot( self.model, pose_targets[:, :3].T) + pose_targets[:, 3] if syn: idxs = nn_utils.find_nearest_point_idx(model_pred, model_targets) # idxs = find_nearest_point_idx(model_pred, model_targets) mean_dist = np.mean(np.linalg.norm( model_pred[idxs] - model_targets, 2, 1)) else: mean_dist = np.mean(np.linalg.norm( model_pred - model_targets, axis=-1)) if icp: self.icp_add.append(mean_dist < diameter) else: self.add5.append(mean_dist < diameter) def add_metric(self, pose_pred, pose_targets, icp=False, syn=False, percentage=0.1): diameter = self.diameter * percentage model_pred = np.dot(self.model, pose_pred[:, :3].T) + pose_pred[:, 3] model_targets = np.dot( self.model, pose_targets[:, :3].T) + pose_targets[:, 3] if syn: idxs = nn_utils.find_nearest_point_idx(model_pred, model_targets) # idxs = find_nearest_point_idx(model_pred, model_targets) mean_dist = np.mean(np.linalg.norm( model_pred[idxs] - model_targets, 2, 1)) else: mean_dist = np.mean(np.linalg.norm( model_pred - model_targets, axis=-1)) if icp: self.icp_add.append(mean_dist < diameter) else: self.add.append(mean_dist < diameter) def cm_degree_5_metric(self, pose_pred, pose_targets, icp=False): translation_distance = np.linalg.norm( pose_pred[:, 3] - pose_targets[:, 3]) * 100 rotation_diff = np.dot(pose_pred[:, :3], pose_targets[:, :3].T) trace = np.trace(rotation_diff) trace = trace if trace <= 3 else 3 angular_distance = np.rad2deg(np.arccos((trace - 1.) / 2.)) if icp: self.icp_cmd5.append(translation_distance < 5 and angular_distance < 5) else: self.cmd5.append(translation_distance < 5 and angular_distance < 5) def mask_iou(self, output, batch): mask_pred = torch.argmax(output['seg'], dim=1)[ 0].detach().cpu().numpy() mask_gt = batch['mask'][0].detach().cpu().numpy() iou = (mask_pred & mask_gt).sum() / (mask_pred | mask_gt).sum() self.mask_ap.append(iou > 0.7) def icp_refine(self, pose_pred, anno, output, K): depth = read_depth(anno['depth_path']) mask = torch.argmax(output['seg'], dim=1)[0].detach().cpu().numpy() if pose_pred[2, 3] <= 0: return pose_pred depth[mask != 1] = 0 pose_pred_tmp = pose_pred.copy() pose_pred_tmp[:3, 3] = pose_pred_tmp[:3, 3] * 1000 R_refined, t_refined = self.icp_refiner.refine( depth, pose_pred_tmp[:3, :3], pose_pred_tmp[:3, 3], K.copy(), depth_only=True, max_mean_dist_factor=5.0) R_refined, _ = self.icp_refiner.refine( depth, R_refined, t_refined, K.copy(), no_depth=True) pose_pred = np.hstack((R_refined, t_refined.reshape((3, 1)) / 1000)) return pose_pred def icp_refine_(self, pose, anno, output): depth = read_depth(anno['depth_path']).astype(np.uint16) mask = torch.argmax(output['seg'], dim=1)[0].detach().cpu().numpy() mask = mask.astype(np.int32) pose = pose.astype(np.float32) poses = np.zeros([1, 7], dtype=np.float32) poses[0, :4] = mat2quat(pose[:, :3]) poses[0, 4:] = pose[:, 3] poses_new = np.zeros([1, 7], dtype=np.float32) poses_icp = np.zeros([1, 7], dtype=np.float32) fx = 572.41140 fy = 573.57043 px = 325.26110 py = 242.04899 zfar = 6.0 znear = 0.25 factor = 1000.0 error_threshold = 0.01 rois = np.zeros([1, 6], dtype=np.float32) rois[:, :] = 1 self.icp_refiner.solveICP(mask, depth, self.height, self.width, fx, fy, px, py, znear, zfar, factor, rois.shape[0], rois, poses, poses_new, poses_icp, error_threshold ) pose_icp = np.zeros([3, 4], dtype=np.float32) pose_icp[:, :3] = quat2mat(poses_icp[0, :4]) pose_icp[:, 3] = poses_icp[0, 4:] return pose_icp def summarize(self): proj2d = np.mean(self.proj2d) add = np.mean(self.add) # adds = np.mean(self.adds) add2 = np.mean(self.add2) add5 = np.mean(self.add5) cmd5 = np.mean(self.cmd5) ap = np.mean(self.mask_ap) seq_len=len(self.add) print('2d projections metric: {}'.format(proj2d)) print('ADD metric: {}'.format(add)) print('ADD2 metric: {}'.format(add2)) print('ADD5 metric: {}'.format(add5)) # print('ADDS metric: {}'.format(adds)) print('5 cm 5 degree metric: {}'.format(cmd5)) print('mask ap70: {}'.format(ap)) print('seq_len: {}'.format(seq_len)) # if cfg.test.icp: if self.icp_refine: print('2d projections metric after icp: {}'.format( np.mean(self.icp_proj2d))) print('ADD metric after icp: {}'.format(np.mean(self.icp_add))) print('5 cm 5 degree metric after icp: {}'.format( np.mean(self.icp_cmd5))) self.proj2d = [] self.add = [] self.add2 = [] self.add5 = [] # self.adds = [] self.cmd5 = [] self.mask_ap = [] self.icp_proj2d = [] self.icp_add = [] self.icp_cmd5 = [] #save pose predictions if len(self.pose_preds)> 0: np.save(f"{self.class_name}_pose_preds.npy",self.pose_preds) self.pose_preds=[] return {'proj2d': proj2d, 'add': add, 'add2': add2, 'add5': add5,'cmd5': cmd5, 'ap': ap, "seq_len": seq_len} def evaluate_rnnpose(self, preds_dict, example): # sample_correspondence_pairs=False, direct_align=False, use_cnnpose=True): len_src_f = example['stack_lengths'][0][0] # lifted_points = example['lifted_points'].squeeze(0) assert len( example['lifted_points']) == 1, "TODO: support bs>1" lifted_points = example['lifted_points'][0].squeeze(0) model_points = example['original_model_points'][:len_src_f] K = example["K"].cpu().numpy().squeeze() R_pred = preds_dict['Ti_pred'].G[:,0, :3,:3].squeeze().detach().cpu().numpy() t_pred = preds_dict['Ti_pred'].G[:,0, :3,3:].squeeze(0).detach().cpu().numpy() pose_pred= preds_dict['Ti_pred'].G[:,0, :3].squeeze().detach().cpu().numpy() # print(example['POSECNN_RT'].dtype, example['rendered_RT'].dtype, flush=True) # R_pred = example['POSECNN_RT'][:,:3,:3].squeeze().detach().cpu().numpy() # t_pred = example['POSECNN_RT'][:,:3,3:].squeeze(0).detach().cpu().numpy() # pose_pred= example['POSECNN_RT'][:, :3].squeeze().detach().cpu().numpy() pose_gt = example['original_RT'].squeeze()[:3].cpu().numpy() ang_err = rotation_angle(pose_gt[:3, :3], R_pred) trans_err = np.linalg.norm(t_pred-pose_gt[:3, -1:]) # 3x1 if self.class_name in ['eggbox', 'glue']: self.add_metric(pose_pred, pose_gt, syn=True) self.add2_metric(pose_pred, pose_gt, syn=True) self.add5_metric(pose_pred, pose_gt, syn=True) else: self.add_metric(pose_pred, pose_gt) self.add2_metric(pose_pred, pose_gt) self.add5_metric(pose_pred, pose_gt) self.projection_2d(pose_pred, pose_gt, K=linemod_config.linemod_K) self.cm_degree_5_metric(pose_pred, pose_gt) # self.mask_iou(output, batch) # vis pc_proj_vis = vis_pointclouds_cv2((pose_gt[:3, :3]@model_points.cpu().numpy( ).T+pose_gt[:3, -1:]).T, example["K"].cpu().numpy().squeeze(), [480,640]) pc_proj_vis_pred = vis_pointclouds_cv2((pose_pred[:3, :3]@model_points.cpu().numpy( ).T+pose_pred[:3, -1:]).T, example["K"].cpu().numpy().squeeze(), [ 480, 640]) return { "ang_err": ang_err, "trans_err": trans_err, "pnp_inliers": -1,#len(inliers), "pc_proj_vis": pc_proj_vis, "pc_proj_vis_pred": pc_proj_vis_pred, "keypoints_2d_vis": np.zeros_like(pc_proj_vis_pred) #keypoints_2d_vis } # class YCBEvaluator: # def __init__(self, class_name, result_dir, icp_refine=False): # self.result_dir = os.path.join(result_dir, "LINEMOD") # os.system('mkdir -p {}'.format(self.result_dir)) # self.class_name = class_name # self.icp_refine = icp_refine # model_path = os.path.join(os.path.dirname(os.path.abspath( # __file__)), '../EXPDATA/BOP_YCB/models', f'obj_{ycb_config.bop_ycb_class2idx[class_name]:06d}.ply' ) # self.model = get_ply_model(model_path, scale=0.001) # # self.diameter = linemod_config.diameters[class_name] / 100 # self.diameter = ycb_config.model_info[ str(ycb_config.bop_ycb_class2idx[class_name]) ]["diameter"]*0.001 # in mm # / 1000 # self.proj2d = [] # self.add = [] # self.adds=[] # self.cmd5 = [] # self.add_dist=[] # self.adds_dist=[] # self.icp_proj2d = [] # self.icp_add = [] # self.icp_cmd5 = [] # self.mask_ap = [] # self.pose_preds=[] # self.height = 480 # self.width = 640 # model = inout.load_ply(model_path) # model['pts'] = model['pts'] * 1000 # # self.icp_refiner = icp_utils.ICPRefiner(model, (self.width, self.height)) if cfg.test.icp else None # self.icp_refiner = icp_utils.ICPRefiner( # model, (self.width, self.height)) if icp_refine else None # self.direct_align_module = DirectAlignment(None) # # if cfg.test.icp: # # self.icp_refiner = ext_.Synthesizer(os.path.realpath(model_path)) # # self.icp_refiner.setup(self.width, self.height) # def projection_2d(self, pose_pred, pose_targets, K, icp=False, threshold=5): # model_2d_pred = project(self.model, K, pose_pred) # model_2d_targets = project(self.model, K, pose_targets) # proj_mean_diff = np.mean(np.linalg.norm( # model_2d_pred - model_2d_targets, axis=-1)) # if icp: # self.icp_proj2d.append(proj_mean_diff < threshold) # else: # self.proj2d.append(proj_mean_diff < threshold) # def projection_2d_sym(self, pose_pred, pose_targets, K, threshold=5): # model_2d_pred = project(self.model, K, pose_pred) # model_2d_targets = project(self.model, K, pose_targets) # proj_mean_diff=np.mean(find_nearest_point_distance(model_2d_pred,model_2d_targets)) # self.proj_mean_diffs.append(proj_mean_diff) # self.projection_2d_recorder.append(proj_mean_diff < threshold) # def add_metric(self, pose_pred, pose_targets, icp=False, syn=False, percentage=0.1): # diameter = self.diameter * percentage # model_pred = np.dot(self.model, pose_pred[:, :3].T) + pose_pred[:, 3] # model_targets = np.dot( # self.model, pose_targets[:, :3].T) + pose_targets[:, 3] # if syn: # idxs = nn_utils.find_nearest_point_idx(model_pred, model_targets) # # idxs = find_nearest_point_idx(model_pred, model_targets) # mean_dist = np.mean(np.linalg.norm( # model_pred[idxs] - model_targets, 2, 1)) # else: # mean_dist = np.mean(np.linalg.norm( # model_pred - model_targets, axis=-1)) # self.add_dist.append(mean_dist) # if icp: # self.icp_add.append(mean_dist < diameter) # else: # self.add.append(mean_dist < diameter) # def auc_add(self, max_thresh=0.1): # add_dist = np.array(self.add_dist) # interval=0.001 # acc=0 # for k in range(int(max_thresh/interval)): # acc+= interval* np.sum( ((k+1)*interval)>=add_dist)/ add_dist.shape[0] # return acc/max_thresh # def auc_adds(self, max_thresh=0.1): # add_dist = np.array(self.adds_dist) # interval=0.001 # acc=0 # for k in range(int(max_thresh/interval)): # acc+= interval* np.sum( ((k+1)*interval)>=add_dist )/ add_dist.shape[0] # return acc/max_thresh # def adds_metric(self, pose_pred, pose_targets, icp=False, syn=False, percentage=0.1): # diameter = self.diameter * percentage # model_pred = np.dot(self.model, pose_pred[:, :3].T) + pose_pred[:, 3] # model_targets = np.dot( # self.model, pose_targets[:, :3].T) + pose_targets[:, 3] # if syn: # idxs = nn_utils.find_nearest_point_idx(model_pred, model_targets) # # idxs = find_nearest_point_idx(model_pred, model_targets) # mean_dist = np.mean(np.linalg.norm( # model_pred[idxs] - model_targets, 2, 1)) # else: # mean_dist = np.mean(np.linalg.norm( # model_pred - model_targets, axis=-1)) # self.adds_dist.append(mean_dist) # if icp: # self.icp_add.append(mean_dist < diameter) # else: # self.adds.append(mean_dist < diameter) # def cm_degree_5_metric(self, pose_pred, pose_targets, icp=False): # translation_distance = np.linalg.norm( # pose_pred[:, 3] - pose_targets[:, 3]) * 100 # rotation_diff = np.dot(pose_pred[:, :3], pose_targets[:, :3].T) # trace = np.trace(rotation_diff) # trace = trace if trace <= 3 else 3 # angular_distance = np.rad2deg(np.arccos((trace - 1.) / 2.)) # if icp: # self.icp_cmd5.append(translation_distance < # 5 and angular_distance < 5) # else: # self.cmd5.append(translation_distance < 5 and angular_distance < 5) # def mask_iou(self, output, batch): # mask_pred = torch.argmax(output['seg'], dim=1)[ # 0].detach().cpu().numpy() # mask_gt = batch['mask'][0].detach().cpu().numpy() # iou = (mask_pred & mask_gt).sum() / (mask_pred | mask_gt).sum() # self.mask_ap.append(iou > 0.7) # def icp_refine(self, pose_pred, anno, output, K): # depth = read_depth(anno['depth_path']) # mask = torch.argmax(output['seg'], dim=1)[0].detach().cpu().numpy() # if pose_pred[2, 3] <= 0: # return pose_pred # depth[mask != 1] = 0 # pose_pred_tmp = pose_pred.copy() # pose_pred_tmp[:3, 3] = pose_pred_tmp[:3, 3] * 1000 # R_refined, t_refined = self.icp_refiner.refine( # depth, pose_pred_tmp[:3, :3], pose_pred_tmp[:3, 3], K.copy(), depth_only=True, max_mean_dist_factor=5.0) # R_refined, _ = self.icp_refiner.refine( # depth, R_refined, t_refined, K.copy(), no_depth=True) # pose_pred = np.hstack((R_refined, t_refined.reshape((3, 1)) / 1000)) # return pose_pred # def icp_refine_(self, pose, anno, output): # depth = read_depth(anno['depth_path']).astype(np.uint16) # mask = torch.argmax(output['seg'], dim=1)[0].detach().cpu().numpy() # mask = mask.astype(np.int32) # pose = pose.astype(np.float32) # poses = np.zeros([1, 7], dtype=np.float32) # poses[0, :4] = mat2quat(pose[:, :3]) # poses[0, 4:] = pose[:, 3] # poses_new = np.zeros([1, 7], dtype=np.float32) # poses_icp = np.zeros([1, 7], dtype=np.float32) # fx = 572.41140 # fy = 573.57043 # px = 325.26110 # py = 242.04899 # zfar = 6.0 # znear = 0.25 # factor = 1000.0 # error_threshold = 0.01 # rois = np.zeros([1, 6], dtype=np.float32) # rois[:, :] = 1 # self.icp_refiner.solveICP(mask, depth, # self.height, self.width, # fx, fy, px, py, # znear, zfar, # factor, # rois.shape[0], rois, # poses, poses_new, poses_icp, # error_threshold # ) # pose_icp = np.zeros([3, 4], dtype=np.float32) # pose_icp[:, :3] = quat2mat(poses_icp[0, :4]) # pose_icp[:, 3] = poses_icp[0, 4:] # return pose_icp # def summarize(self): # proj2d = np.mean(self.proj2d) # add = np.mean(self.add) # adds = np.mean(self.adds) # cmd5 = np.mean(self.cmd5) # ap = np.mean(self.mask_ap) # try: # auc_add=self.auc_add(max_thresh=0.1) # except: # auc_add=0 # try: # auc_adds=self.auc_adds(max_thresh=0.1) # except: # auc_adds=0 # seq_len=len(self.add) # print('2d projections metric: {}'.format(proj2d)) # print('ADD metric: {}'.format(add)) # print('AUC ADD metric: {}'.format(auc_add)) # print('ADDS metric: {}'.format(adds)) # print('AUC ADDS metric: {}'.format(auc_adds)) # print('5 cm 5 degree metric: {}'.format(cmd5)) # print('mask ap70: {}'.format(ap)) # print('seq_len: {}'.format(seq_len)) # # if cfg.test.icp: # if self.icp_refine: # print('2d projections metric after icp: {}'.format( # np.mean(self.icp_proj2d))) # print('ADD metric after icp: {}'.format(np.mean(self.icp_add))) # print('5 cm 5 degree metric after icp: {}'.format( # np.mean(self.icp_cmd5))) # self.proj2d = [] # self.add = [] # self.adds = [] # self.cmd5 = [] # self.mask_ap = [] # self.icp_proj2d = [] # self.icp_add = [] # self.icp_cmd5 = [] # self.add_dist = [] # self.adds_dist = [] # #save pose predictions # if len(self.pose_preds)> 0: # np.save(f"{self.class_name}_pose_preds.npy",self.pose_preds) # self.pose_preds=[] # return {'proj2d': proj2d, 'add': add, 'adds': adds,'cmd5': cmd5, 'ap': ap, "seq_len": seq_len} # def evaluate_flowpose(self, preds_dict, example, sample_correspondence_pairs=False, direct_align=False, use_cnnpose=True): # len_src_f = example['stack_lengths'][0][0] # # lifted_points = example['lifted_points'].squeeze(0) # assert len( example['lifted_points']) == 1, "TODO: support bs>1" # lifted_points = example['lifted_points'][0].squeeze(0) # model_points = example['original_model_points'][:len_src_f] # K = example["K"].cpu().numpy().squeeze() # if not use_cnnpose: # use pnp # len_src_f = example['stack_lengths'][0][0] # descriptors_2d = preds_dict['descriptors_2d'] # descriptors_3d = preds_dict['descriptors_3d'][:len_src_f] # mask = cv2.erode(( example['depth'].detach().cpu().numpy().squeeze()>0).astype(np.uint8)*255, kernel=np.ones([3,3], np.uint8),iterations = 1) # mask=torch.tensor(mask, device=descriptors_3d.device) # ys_, xs_ = torch.nonzero(mask, as_tuple=True) # # ys_, xs_ = torch.nonzero(example['depth'].squeeze(), as_tuple=True) # descriptors_2d = descriptors_2d[:, :, # ys_, xs_].squeeze().permute([1, 0]) # img_coods = torch.stack([xs_, ys_], dim=-1) # if sample_correspondence_pairs: # correspondences_2d3d = example['correspondences_2d3d'].squeeze() # if(correspondences_2d3d.size(0) > 256): # choice = np.random.permutation( # correspondences_2d3d.size(0))[:256] # correspondences_2d3d = correspondences_2d3d[choice] # src_idx = correspondences_2d3d[:, 0] # tgt_idx = correspondences_2d3d[:, 1] # else: # # src_idx = np.random.permutation(len(xs_))[:256] # # src_idx = np.random.permutation(len(xs_))[:256] # _, idx = fps_utils.farthest_point_sampling_withidx(np.stack([xs_.cpu().numpy( # ), ys_.cpu().numpy(), np.zeros_like(xs_.cpu().numpy())], axis=-1), 256, False) # # _, idx = fps_utils.farthest_point_sampling_withidx(np.stack([xs_.cpu().numpy( # # ), ys_.cpu().numpy(), np.zeros_like(xs_.cpu().numpy())], axis=-1), 1024, False) # # src_idx = np.arange(len(xs_))[::len(xs_)//256] # src_idx = np.arange(len(xs_))[idx] # tgt_idx = np.arange(len(model_points)) # src_pcd, tgt_pcd = lifted_points[src_idx], model_points[tgt_idx] # img_coods = img_coods[src_idx] # src_feats, tgt_feats = descriptors_2d[src_idx], descriptors_3d[tgt_idx] # feats_dist = torch.sqrt(square_distance( # src_feats[None, :, :], tgt_feats[None, :, :], normalised=True)).squeeze(0) # _, sel_idx = torch.min(feats_dist, -1) # K = example["K"].cpu().numpy().squeeze() # try: # # retval, R_pred,t_pred, inliers =cv2.solvePnPRansac(tgt_pcd[sel_idx].cpu().numpy(), img_coods.cpu().numpy().astype(np.float32), K,distCoeffs=np.zeros(4),reprojectionError=1) # retval, R_pred, t_pred, inliers = cv2.solvePnPRansac(tgt_pcd[sel_idx].cpu().numpy(), img_coods.cpu( # ).numpy().astype(np.float32), K, distCoeffs=np.zeros(4), reprojectionError=1, iterationsCount=1000) # if inliers is None: # raise ValueError # except: # # try: # print("PNP RANSAC reprojectionError threshold =3") # retval, R_pred, t_pred, inliers = cv2.solvePnPRansac(tgt_pcd[sel_idx].cpu().numpy(), img_coods.cpu( # ).numpy().astype(np.float32), K, distCoeffs=np.zeros(4), reprojectionError=3, iterationsCount=1000) # R_pred, _ = cv2.Rodrigues(R_pred) # pose_pred = np.concatenate([R_pred, t_pred], axis=-1) # else: # K = example["K"].cpu().numpy().squeeze() # R_pred = preds_dict['Ti_pred'].G[:,0, :3,:3].squeeze().detach().cpu().numpy() # t_pred = preds_dict['Ti_pred'].G[:,0, :3,3:].squeeze(0).detach().cpu().numpy() # pose_pred= preds_dict['Ti_pred'].G[:,0, :3].squeeze().detach().cpu().numpy() # # print(example['POSECNN_RT'].dtype, example['rendered_RT'].dtype, flush=True) # # R_pred = example['POSECNN_RT'][:,:3,:3].squeeze().detach().cpu().numpy() # # t_pred = example['POSECNN_RT'][:,:3,3:].squeeze(0).detach().cpu().numpy() # # pose_pred= example['POSECNN_RT'][:, :3].squeeze().detach().cpu().numpy() # # pose_gt = example['RT'].squeeze()[:3].cpu().numpy() # pose_gt = example['original_RT'].squeeze()[:3].cpu().numpy() # ang_err = rotation_angle(pose_gt[:3, :3], R_pred) # trans_err = np.linalg.norm(t_pred-pose_gt[:3, -1:]) # 3x1 # if self.class_name in ['024_bowl', '036_wood_block', '051_large_clamp', '052_extra_large_clamp', '061_foam_brick']: # self.add_metric(pose_pred, pose_gt, syn=True) # else: # self.add_metric(pose_pred, pose_gt) # self.adds_metric(pose_pred, pose_gt, syn=True) # self.projection_2d(pose_pred, pose_gt, K=linemod_config.linemod_K) # self.cm_degree_5_metric(pose_pred, pose_gt) # # self.mask_iou(output, batch) # # vis # pc_proj_vis = vis_pointclouds_cv2((pose_gt[:3, :3]@model_points.cpu().numpy( # ).T+pose_gt[:3, -1:]).T, example["K"].cpu().numpy().squeeze(), [480,640]) # pc_proj_vis_pred = vis_pointclouds_cv2((pose_pred[:3, :3]@model_points.cpu().numpy( # ).T+pose_pred[:3, -1:]).T, example["K"].cpu().numpy().squeeze(), [ 480, 640]) # if trans_err > 0.5: # print("translation err>0.5") # # cv2.imwrite(f'tmp/{len(self.add)}.png', # # keypoints_2d_vis[..., ::-1]*255,) # # torch.save( example, f'tmp/{len(self.add)}.pt' ) # return { # "ang_err": ang_err, # "trans_err": trans_err, # "pnp_inliers": -1,#len(inliers), # "pc_proj_vis": pc_proj_vis, # "pc_proj_vis_pred": pc_proj_vis_pred, # "keypoints_2d_vis": np.zeros_like(pc_proj_vis_pred) #keypoints_2d_vis # } ================================================ FILE: utils/furthest_point_sample.py ================================================ import numpy as np from scipy import spatial def fragmentation_fps(vertices, num_frags): """Fragmentation by the furthest point sampling algorithm. The fragment centers are found by iterative selection of the vertex from vertices that is furthest from the already selected vertices. The algorithm starts with the centroid of the object model which is then discarded from the final set of fragment centers. A fragment is defined by a set of points on the object model that are the closest to the fragment center. Args: vertices: [num_vertices, 3] ndarray with 3D vertices of the object model. num_frags: Number of fragments to define. Returns: [num_frags, 3] ndarray with fragment centers and [num_vertices] ndarray storing for each vertex the ID of the assigned fragment. """ # Start with the origin of the model coordinate system. frag_centers = [np.array([0., 0., 0.])] # Calculate distances to the center from all the vertices. nn_index = spatial.cKDTree(frag_centers) nn_dists, _ = nn_index.query(vertices, k=1) center_inds=[] for _ in range(num_frags): # Select the furthest vertex as the next center. new_center_ind = np.argmax(nn_dists) new_center = vertices[new_center_ind] frag_centers.append(vertices[new_center_ind]) center_inds.append(new_center_ind) # Update the distances to the nearest center. nn_dists[new_center_ind] = -1 nn_dists = np.minimum( nn_dists, np.linalg.norm(vertices - new_center, axis=1)) # Remove the origin. frag_centers.pop(0) frag_centers = np.array(frag_centers) # Assign vertices to the fragments. # TODO: This information can be maintained during the FPS algorithm. nn_index = spatial.cKDTree(frag_centers) _, vertex_frag_ids = nn_index.query(vertices, k=1) # return frag_centers, vertex_frag_ids return frag_centers, np.array(center_inds), vertex_frag_ids if __name__=="__main__": #test pass ================================================ FILE: utils/geometric.py ================================================ import numpy as np def range_to_depth(mask, range, K): ''' Transform the range image to depth image ''' f=K[0,0] cx=K[0,2] cy=K[1,2] ys_, xs_=np.nonzero(mask) rngs=range[ys_,xs_] # xs,ys=np.asarray(xs,np.float32),np.asarray(ys,np.float32) xs,ys=np.asarray(xs_,np.float32)+0.5,np.asarray(ys_,np.float32)+0.5 Zs=f*rngs/( f**2 + (cx-xs)**2 + (cy-ys)**2 )**0.5 depth = np.zeros_like(range) depth[ys_,xs_] = Zs return depth def mask_depth_to_point_cloud(mask,depth,K): ''' lift the depth under the mask to 3D point clouds ''' ys, xs=np.nonzero(mask) dpts=depth[ys,xs] # xs,ys=np.asarray(xs,np.float32),np.asarray(ys,np.float32) xs,ys=np.asarray(xs,np.float32)+0.5,np.asarray(ys,np.float32)+0.5 xys=np.concatenate([xs[:,None],ys[:,None]],1) xys*=dpts[:,None] xyds=np.concatenate([xys,dpts[:,None]],1) pts=np.matmul(xyds,np.linalg.inv(K).transpose()) return pts.astype(np.float32), np.stack([xs,ys], axis=-1 ) def chordal_distance(R1,R2): return np.sqrt(np.sum((R1-R2)*(R1-R2))) def rotation_angle(R1, R2): return 2*np.arcsin( chordal_distance(R1,R2)/np.sqrt(8) ) def render_pointcloud(pc, T, K, render_image_size): """ Args: T: (B,3,4) or (B,4,4) K: (B,3,3) render_image_size (tuple): (h,w) near (float, optional): Defaults to 0.1. far (int, optional): Defaults to 6. mode: 'bilinear' or 'neareast' """ B=T.shape[0] # T = self.cam_opencv2pytch3d.to(device=T.device)@T ## X_cam = X_world R + t # R = T[...,:3,:3].transpose(-1,-2) R = T[...,:3,:3].transpose( [0,2,1] ) t = T[...,:3,3] #render depths # vert_depths= (self.verts@R+t).squeeze(0)[...,2:] X_cam= (pc@R+t)#.squeeze(0) x=X_cam@K.transpose([0,2,1]) #BxNx3 depth = x[...,-1] x = x/x[...,-1:] out = np.zeros([1,1, *render_image_size], dtype=R.dtype) out[:, :, np.round(x[0, :, 1]).astype(np.int64).clip(0, out.shape[2]-1), np.round(x[0, :, 0]).astype(np.int64).clip(0, out.shape[3]-1)] = depth return out #1x1xHxW ================================================ FILE: utils/img_utils.py ================================================ import torch from matplotlib import cm import matplotlib.pyplot as plt import matplotlib.patches as patches import numpy as np import cv2 from PIL import Image def read_depth(path): if (path[-3:] == 'dpt'): with open(path) as f: h,w = np.fromfile(f,dtype=np.uint32,count=2) data = np.fromfile(f,dtype=np.uint16,count=w*h) depth = data.reshape((h,w)) else: depth = np.asarray(Image.open(path)).copy() return depth def unnormalize_img(img, mean, std, in_gpu=True): """ img: [3, h, w] """ img = img.detach().cpu().clone() # img = img / 255. img *= torch.tensor(std).view(3, 1, 1) img += torch.tensor(mean).view(3, 1, 1) min_v = torch.min(img) img = (img - min_v) / (torch.max(img) - min_v) return img def draw_seg_th(seg, num_cls=-1): """ seg: [h, w] """ r = seg.clone() g = seg.clone() b = seg.clone() num_cls = len(colors) if num_cls == -1 else num_cls seg_colors = 1 - colors[:, 0, 0] for l in range(num_cls): inds = (seg == l) r[inds] = int(seg_colors[l][0]) g[inds] = int(seg_colors[l][1]) b[inds] = int(seg_colors[l][2]) seg = torch.stack([r, g, b], dim=0).float() / 255. return seg def draw_seg_prob_th(seg_prob): """ seg_prob: [num_cls, h, w] """ num_cls = seg_prob.shape[0] seg = torch.argmax(seg_prob, dim=0).long() return draw_seg_th(seg, num_cls) def draw_vertex_th(vertex): """ vertex: [h, w] """ min_ver = torch.min(vertex) max_ver = torch.max(vertex) vertex = (vertex - min_ver) / (max_ver - min_ver) vertex = cmap(vertex.detach().cpu().numpy())[..., :3] return torch.tensor(vertex).permute(2, 0, 1) def visualize_coco_bbox(img, boxes): """ img: [h, w, 3] boxes: [n, 4], [[x, y, x_max, y_max]] """ _, ax = plt.subplots(1) ax.imshow(img) n = len(boxes) for ni in range(n): x, y, x_max, y_max = boxes[ni] ax.add_patch(patches.Polygon(xy=[[x, y], [x, y_max], [x_max, y_max], [x_max, y]], fill=False, linewidth=1, edgecolor='r')) plt.show() def visualize_heatmap(img, hm): """ img: [h, w, 3] hm: [c, h, w] """ hm = np.max(hm, axis=0) h, w = hm.shape[:2] img = cv2.resize(img, dsize=(w, h), interpolation=cv2.INTER_LINEAR) hm = np.array([255, 255, 255]) - (hm.reshape(h, w, 1) * colors[0]).astype(np.uint8) ratio = 0.5 blend = (img * ratio + hm * (1 - ratio)).astype(np.uint8) _, (ax1, ax2) = plt.subplots(1, 2) ax1.imshow(img) ax2.imshow(blend) plt.show() def visualize_coco_img_mask(img, mask): _, (ax1, ax2) = plt.subplots(1, 2) ax1.imshow(img) ax2.imshow(mask) plt.show() def visualize_color_aug(orig_img, aug_img): _, (ax1, ax2) = plt.subplots(1, 2) ax1.imshow(orig_img[:, :, [2, 1, 0]]) ax2.imshow(aug_img[:, :, [2, 1, 0]]) plt.show() def visualize_coco_ann(coco, img, ann): plt.imshow(img) coco.showAnns(ann) plt.show() def bgr_to_rgb(img): return img[:, :, [2, 1, 0]] cmap = cm.get_cmap() color_list = np.array( [ 0.000, 0.447, 0.741, 0.850, 0.325, 0.098, 0.929, 0.694, 0.125, 0.494, 0.184, 0.556, 0.466, 0.674, 0.188, 0.301, 0.745, 0.933, 0.635, 0.078, 0.184, 0.300, 0.300, 0.300, 0.600, 0.600, 0.600, 1.000, 0.000, 0.000, 1.000, 0.500, 0.000, 0.749, 0.749, 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 1.000, 0.667, 0.000, 1.000, 0.333, 0.333, 0.000, 0.333, 0.667, 0.000, 0.333, 1.000, 0.000, 0.667, 0.333, 0.000, 0.667, 0.667, 0.000, 0.667, 1.000, 0.000, 1.000, 0.333, 0.000, 1.000, 0.667, 0.000, 1.000, 1.000, 0.000, 0.000, 0.333, 0.500, 0.000, 0.667, 0.500, 0.000, 1.000, 0.500, 0.333, 0.000, 0.500, 0.333, 0.333, 0.500, 0.333, 0.667, 0.500, 0.333, 1.000, 0.500, 0.667, 0.000, 0.500, 0.667, 0.333, 0.500, 0.667, 0.667, 0.500, 0.667, 1.000, 0.500, 1.000, 0.000, 0.500, 1.000, 0.333, 0.500, 1.000, 0.667, 0.500, 1.000, 1.000, 0.500, 0.000, 0.333, 1.000, 0.000, 0.667, 1.000, 0.000, 1.000, 1.000, 0.333, 0.000, 1.000, 0.333, 0.333, 1.000, 0.333, 0.667, 1.000, 0.333, 1.000, 1.000, 0.667, 0.000, 1.000, 0.667, 0.333, 1.000, 0.667, 0.667, 1.000, 0.667, 1.000, 1.000, 1.000, 0.000, 1.000, 1.000, 0.333, 1.000, 1.000, 0.667, 1.000, 0.167, 0.000, 0.000, 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 0.000, 0.833, 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.167, 0.000, 0.000, 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 0.000, 0.833, 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.167, 0.000, 0.000, 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 0.000, 0.833, 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.143, 0.143, 0.143, 0.286, 0.286, 0.286, 0.429, 0.429, 0.429, 0.571, 0.571, 0.571, 0.714, 0.714, 0.714, 0.857, 0.857, 0.857, 1.000, 1.000, 1.000, 0.50, 0.5, 0 ] ).astype(np.float32) colors = color_list.reshape((-1, 3)) * 255 colors = np.array(colors, dtype=np.uint8).reshape(len(colors), 1, 1, 3) ================================================ FILE: utils/log_tool.py ================================================ import numpy as np from tensorboardX import SummaryWriter import json from pathlib import Path import logging def _flat_nested_json_dict(json_dict, flatted, sep=".", start=""): for k, v in json_dict.items(): if isinstance(v, dict): _flat_nested_json_dict(v, flatted, sep, start + sep + str(k)) else: flatted[start + sep + str(k)] = v def flat_nested_json_dict(json_dict, sep=".") -> dict: """flat a nested json-like dict. this function make shadow copy. """ flatted = {} for k, v in json_dict.items(): if isinstance(v, dict): _flat_nested_json_dict(v, flatted, sep, str(k)) else: flatted[str(k)] = v return flatted def metric_to_str(metrics, sep='.'): flatted_metrics = flat_nested_json_dict(metrics, sep) metrics_str_list = [] for k, v in flatted_metrics.items(): if isinstance(v, float): metrics_str_list.append(f"{k}={v:.5}") elif isinstance(v, (list, tuple)): if v and isinstance(v[0], float): v_str = ', '.join([f"{e:.5}" for e in v]) metrics_str_list.append(f"{k}=[{v_str}]") else: metrics_str_list.append(f"{k}={v}") else: metrics_str_list.append(f"{k}={v}") return ', '.join(metrics_str_list) class SimpleModelLog: """For simple log. generate 4 kinds of log: 1. simple log.txt, all metric dicts are flattened to produce readable results. 2. TensorBoard scalars and texts 3. multi-line json file log.json.lst 4. tensorboard_scalars.json, all scalars are stored in this file in tensorboard json format. """ def __init__(self, model_dir, disable=False): self.model_dir = Path(model_dir) self.log_file = None self.log_mjson_file = None self.summary_writter = None self.metrics = [] self._text_current_gstep = -1 self._tb_texts = [] self.disable = disable logging.basicConfig(level = logging.INFO,format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s') self.logger = logging.getLogger(__name__) def open(self): if self.disable: return self model_dir = self.model_dir assert model_dir.exists() summary_dir = model_dir / 'summary' summary_dir.mkdir(parents=True, exist_ok=True) log_mjson_file_path = model_dir / f'log.json.lst' if log_mjson_file_path.exists(): with open(log_mjson_file_path, 'r') as f: for line in f.readlines(): self.metrics.append(json.loads(line)) log_file_path = model_dir / f'log.txt' self.log_mjson_file = open(log_mjson_file_path, 'a') self.log_file = open(log_file_path, 'a') self.summary_writter = SummaryWriter(str(summary_dir)) return self def close(self): if self.disable: return assert self.summary_writter is not None self.log_mjson_file.close() self.log_file.close() tb_json_path = str(self.model_dir / "tensorboard_scalars.json") self.summary_writter.export_scalars_to_json(tb_json_path) self.summary_writter.close() self.log_mjson_file = None self.log_file = None self.summary_writter = None def log_text(self, text, step, tag="regular log"): if self.disable: return """This function only add text to log.txt and tensorboard texts """ print(text,flush=True) print(text, file=self.log_file,flush=True) if step > self._text_current_gstep and self._text_current_gstep != -1: total_text = '\n'.join(self._tb_texts) self.summary_writter.add_text(tag, total_text, global_step=step) self._tb_texts = [] self._text_current_gstep = step else: self._tb_texts.append(text) if self._text_current_gstep == -1: self._text_current_gstep = step def log_metrics(self, metrics: dict, step): if self.disable: return flatted_summarys = flat_nested_json_dict(metrics, "/") for k, v in flatted_summarys.items(): if isinstance(v, (list, tuple)): if any([isinstance(e, str) for e in v]): continue v_dict = {str(i): e for i, e in enumerate(v)} for k1, v1 in v_dict.items(): self.summary_writter.add_scalar(k + "/" + k1, v1, step) else: if isinstance(v, str): continue self.summary_writter.add_scalar(k, v, step) log_str = metric_to_str(metrics) # print(log_str, flush=True) self.logger.info(log_str) print(log_str, file=self.log_file, flush=True) print(json.dumps(metrics), file=self.log_mjson_file, flush=True) def log_images(self, images: dict, step, prefix=''): if self.disable: return for k, v in images.items(): self.summary_writter.add_images(prefix+str(k), v, step) print(f"Summarize images {k}",flush=True) def log_histograms(self, vals: dict, step, prefix=''): if self.disable: return for k, v in vals.items(): self.summary_writter.add_histogram(prefix+str(k), v, step) print(f"Summarize histograms {k}",flush=True) # def log_distributions(self, vals: dict, step, prefix=''): # if self.disable: # return # for k, v in vals.items(): # self.summary_writter.histogram(prefix+str(k), v, step) # print(f"Summarize histograms {k}",flush=True) ================================================ FILE: utils/pose_utils.py ================================================ """ Copyright (C) 2018 NVIDIA Corporation. All rights reserved. Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). """ import torch from torch.nn import Module from torch.autograd import Variable from torch.nn.functional import pad import numpy as np import scipy.linalg as slin import math import transforms3d.quaternions as txq import transforms3d.euler as txe # see for formulas: # https://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-801-machine-vision-fall-2004/readings/quaternions.pdf # and "Quaternion and Rotation" - Yan-Bin Jia, September 18, 2016 #from IPython.core.debugger import set_trace # PYTORCH def pose_padding(P): """ Padding 3x4 SE3 to 4x4 Args: P ([type]): [description] dim ([type]): [description] """ assert (P.shape[-2] == 3 and P.shape[-1] == 4) or (P.shape[-2] == 2 and P.shape[-1] == 3) pad = torch.zeros_like(P[...,:1,:] ) pad[...,-1]=1 return torch.cat([P,pad], dim=-2) def vdot(v1, v2): """ Dot product along the dim=1 :param v1: N x d :param v2: N x d :return: N x 1 """ out = torch.mul(v1, v2) out = torch.sum(out, 1) return out def normalize(x, p=2, dim=0): """ Divides a tensor along a certain dim by the Lp norm :param x: :param p: Lp norm :param dim: Dimension to normalize along :return: """ xn = x.norm(p=p, dim=dim) x = x / xn.unsqueeze(dim=dim) return x def qmult(q1, q2): """ Multiply 2 quaternions :param q1: Tensor N x 4 :param q2: Tensor N x 4 :return: quaternion product, Tensor N x 4 """ q1s, q1v = q1[:, :1], q1[:, 1:] q2s, q2v = q2[:, :1], q2[:, 1:] qs = q1s*q2s - vdot(q1v, q2v) qv = q1v.mul(q2s.expand_as(q1v)) + q2v.mul(q1s.expand_as(q2v)) +\ torch.cross(q1v, q2v, dim=1) q = torch.cat((qs, qv), dim=1) # normalize q = normalize(q, dim=1) return q def qinv(q): """ Inverts quaternions :param q: N x 4 :return: q*: N x 4 """ q_inv = torch.cat((q[:, :1], -q[:, 1:]), dim=1) return q_inv def qexp_t(q): """ Applies exponential map to log quaternion :param q: N x 3 :return: N x 4 """ n = torch.norm(q, p=2, dim=1, keepdim=True) n = torch.clamp(n, min=1e-8) q = q * torch.sin(n) q = q / n q = torch.cat((torch.cos(n), q), dim=1) return q def qlog_t(q): """ Applies the log map to a quaternion :param q: N x 4 :return: N x 3 """ n = torch.norm(q[:, 1:], p=2, dim=1, keepdim=True) n = torch.clamp(n, min=1e-8) q = q[:, 1:] * torch.acos(torch.clamp(q[:, :1], min=-1.0, max=1.0)) q = q / n return q def qexp_t_safe(q): """ Applies exponential map to log quaternion (safe implementation that does not maintain gradient flow) :param q: N x 3 :return: N x 4 """ q = torch.from_numpy(np.asarray([qexp(qq) for qq in q.numpy()], dtype=np.float32)) return q def qlog_t_safe(q): """ Applies the log map to a quaternion (safe implementation that does not maintain gradient flow) :param q: N x 4 :return: N x 3 """ q = torch.from_numpy(np.asarray([qlog(qq) for qq in q.numpy()], dtype=np.float32)) return q def rotate_vec_by_q(t, q): """ rotates vector t by quaternion q :param t: vector, Tensor N x 3 :param q: quaternion, Tensor N x 4 :return: t rotated by q: t' = t + 2*qs*(qv x t) + 2*qv x (qv x r) """ qs, qv = q[:, :1], q[:, 1:] b = torch.cross(qv, t, dim=1) c = 2 * torch.cross(qv, b, dim=1) b = 2 * b.mul(qs.expand_as(b)) tq = t + b + c return tq def compose_pose_quaternion(p1, p2): """ pyTorch implementation :param p1: input pose, Tensor N x 7 :param p2: pose to apply, Tensor N x 7 :return: output pose, Tensor N x 7 all poses are translation + quaternion #!comments: first apply p2 and then p1 !! """ p1t, p1q = p1[:, :3], p1[:, 3:] p2t, p2q = p2[:, :3], p2[:, 3:] q = qmult(p1q, p2q) t = p1t + rotate_vec_by_q(p2t, p1q) return torch.cat((t, q), dim=1) def invert_pose_quaternion(p): """ inverts the pose :param p: pose, Tensor N x 7 :return: inverted pose """ t, q = p[:, :3], p[:, 3:] q_inv = qinv(q) tinv = -rotate_vec_by_q(t, q_inv) return torch.cat((tinv, q_inv), dim=1) def calc_vo(p0, p1): """ calculates VO (in the p0 frame) from 2 poses :param p0: N x 7 :param p1: N x 7 """ # assert p0.shape==p1.shape return compose_pose_quaternion(invert_pose_quaternion(p0), p1) def calc_vo_logq(p0, p1): """ VO (in the p0 frame) (logq) :param p0: N x 6 :param p1: N x 6 :return: N-1 x 6 """ q0 = qexp_t(p0[:, 3:]) q1 = qexp_t(p1[:, 3:]) vos = calc_vo(torch.cat((p0[:, :3], q0), dim=1), torch.cat((p1[:, :3], q1), dim=1)) vos_q = qlog_t(vos[:, 3:]) return torch.cat((vos[:, :3], vos_q), dim=1) def calc_vo_relative(p0, p1): """ calculates VO (in the world frame) from 2 poses :param p0: N x 7 :param p1: N x 7 """ vos_t = p1[:, :3] - p0[:, :3] vos_q = qmult(qinv(p0[:, 3:]), p1[:, 3:]) return torch.cat((vos_t, vos_q), dim=1) def calc_vo_relative_logq(p0, p1): """ Calculates VO (in the world frame) from 2 poses (log q) :param p0: N x 6 :param p1: N x 6 :return: """ q0 = qexp_t(p0[:, 3:]) q1 = qexp_t(p1[:, 3:]) vos = calc_vo_relative(torch.cat((p0[:, :3], q0), dim=1), torch.cat((p1[:, :3], q1), dim=1)) vos_q = qlog_t(vos[:, 3:]) return torch.cat((vos[:, :3], vos_q), dim=1) def calc_vo_relative_logq_safe(p0, p1): """ Calculates VO (in the world frame) from 2 poses (log q) through numpy fns :param p0: N x 6 :param p1: N x 6 :return: """ vos_t = p1[:, :3] - p0[:, :3] q0 = qexp_t_safe(p0[:, 3:]) q1 = qexp_t_safe(p1[:, 3:]) vos_q = qmult(qinv(q0), q1) vos_q = qlog_t_safe(vos_q) return torch.cat((vos_t, vos_q), dim=1) def calc_vo_logq_safe(p0, p1): """ VO in the p0 frame using numpy fns :param p0: :param p1: :return: """ vos_t = p1[:, :3] - p0[:, :3] q0 = qexp_t_safe(p0[:, 3:]) q1 = qexp_t_safe(p1[:, 3:]) vos_t = rotate_vec_by_q(vos_t, qinv(q0)) vos_q = qmult(qinv(q0), q1) vos_q = qlog_t_safe(vos_q) return torch.cat((vos_t, vos_q), dim=1) def calc_vos_simple(poses): """ calculate the VOs, from a list of consecutive poses :param poses: N x T x 7 :return: N x (T-1) x 7 """ vos = [] for p in poses: pvos = [p[i+1].unsqueeze(0) - p[i].unsqueeze(0) for i in range(len(p)-1)] vos.append(torch.cat(pvos, dim=0)) vos = torch.stack(vos, dim=0) return vos def calc_vos(poses): """ calculate the VOs, from a list of consecutive poses (in the p0 frame) :param poses: N x T x 7 :return: N x (T-1) x 7 """ vos = [] for p in poses: pvos = [calc_vo_logq(p[i].unsqueeze(0), p[i+1].unsqueeze(0)) for i in range(len(p)-1)] vos.append(torch.cat(pvos, dim=0)) vos = torch.stack(vos, dim=0) return vos def calc_vos_relative(poses): """ calculate the VOs, from a list of consecutive poses (in the world frame) :param poses: N x T x 7 :return: N x (T-1) x 7 """ vos = [] for p in poses: pvos = [calc_vo_relative_logq(p[i].unsqueeze(0), p[i+1].unsqueeze(0)) for i in range(len(p)-1)] vos.append(torch.cat(pvos, dim=0)) vos = torch.stack(vos, dim=0) return vos def calc_vos_safe(poses): """ calculate the VOs, from a list of consecutive poses :param poses: N x T x 7 :return: N x (T-1) x 7 """ vos = [] for p in poses: pvos = [calc_vo_logq_safe(p[i].unsqueeze(0), p[i+1].unsqueeze(0)) for i in range(len(p)-1)] vos.append(torch.cat(pvos, dim=0)) vos = torch.stack(vos, dim=0) return vos def calc_vos_safe_fc(poses): """ calculate the VOs, from a list of consecutive poses (fully connected) :param poses: N x T x 7 :return: N x TC2 x 7 """ vos = [] for p in poses: pvos = [] for i in range(p.size(0)): for j in range(i+1, p.size(0)): pvos.append(calc_vo_logq_safe( p[i].unsqueeze(0), p[j].unsqueeze(0))) vos.append(torch.cat(pvos, dim=0)) vos = torch.stack(vos, dim=0) return vos # NUMPY def qlog(q): """ Applies logarithm map to q :param q: (4,) :return: (3,) """ if all(q[1:] == 0): q = np.zeros(3) else: q = np.arccos(q[0]) * q[1:] / np.linalg.norm(q[1:]) return q def qexp(q): """ Applies the exponential map to q :param q: (3,) :return: (4,) """ n = np.linalg.norm(q) q = np.hstack((np.cos(n), np.sinc(n/np.pi)*q)) return q def process_poses(poses_in, mean_t, std_t, align_R, align_t, align_s): """ processes the 1x12 raw pose from dataset by aligning and then normalizing :param poses_in: N x 12 :param mean_t: 3 :param std_t: 3 :param align_R: 3 x 3 :param align_t: 3 :param align_s: 1 :return: processed poses (translation + quaternion) N x 7 """ poses_out = np.zeros((len(poses_in), 6)) poses_out[:, 0:3] = poses_in[:, [3, 7, 11]] # align for i in range(len(poses_out)): R = poses_in[i].reshape((3, 4))[:3, :3] q = txq.mat2quat(np.dot(align_R, R)) q *= np.sign(q[0]) # constrain to hemisphere q = qlog(q) poses_out[i, 3:] = q t = poses_out[i, :3] - align_t poses_out[i, :3] = align_s * \ np.dot(align_R, t[:, np.newaxis]).squeeze() # normalize translation poses_out[:, :3] -= mean_t poses_out[:, :3] /= std_t return poses_out def log_quaternion_angular_error(q1, q2): return quaternion_angular_error(qexp(q1), qexp(q2)) def quaternion_angular_error(q1, q2): """ angular error between two quaternions :param q1: (4, ) :param q2: (4, ) :return: """ d = abs(np.dot(q1, q2)) d = min(1.0, max(-1.0, d)) theta = 2 * np.arccos(d) * 180 / np.pi return theta def skew(x): """ returns skew symmetric matrix from vector :param x: 3 x 1 :return: """ s = np.asarray([[0, -x[2], x[1]], [x[2], 0, -x[0]], [-x[1], x[0], 0]]) return s def dpq_q(p): """ returns the jacobian of quaternion product pq w.r.t. q :param p: 4 x 1 :return: 4 x 4 """ J = np.zeros((4, 4)) J[0, 0] = p[0] J[0, 1:] = -p[1:].squeeze() J[1:, 0] = p[1:].squeeze() J[1:, 1:] = p[0] * np.eye(3) + skew(p[1:]) return J def dpsq_q(p): """ returns the jacobian of quaternion product (p*)q w.r.t. q :param p: 4 x 1 :return: 4 x 4 """ J = np.zeros((4, 4)) J[0, 0] = p[0] J[0, 1:] = -p[1:].squeeze() J[1:, 0] = -p[1:].squeeze() J[1:, 1:] = p[0] * np.eye(3) - skew(p[1:]) return J def dpsq_p(q): """ returns the jacobian of quaternion product (p*)q w.r.t. p :param q: 4 x 1 :return: 4 x 4 """ J = np.zeros((4, 4)) J[0, 0] = q[0] J[0, 1:] = q[1:].squeeze() J[1:, 0] = q[1:].squeeze() J[1:, 1:] = -q[0] * np.eye(3) + skew(q[1:]) return J def dqstq_q(q, t): """ jacobian of q* t q w.r.t. q :param q: 4 x 1 :param t: 3 x 1 :return: 3 x 4 """ J = np.zeros((3, 4)) J[:, :1] = q[0]*t - np.cross(q[1:], t, axis=0) J[:, 1:] = -np.dot(t, q[1:].T) + np.dot(t.T, q[1:])*np.eye(3) + \ np.dot(q[1:], t.T) + q[0]*skew(t) J *= 2 return J def dqstq_t(q): """ jacobian of q* t q w.r.t. t :param q: 4 x 1 :return: 3 x 3 """ J = (q[0]*q[0] - np.dot(q[1:].T, q[1:])) * np.eye(3) + 2*np.dot(q[1:], q[1:].T) -\ 2*q[0]*skew(q[1:]) return J def m_rot(x): """ returns Jacobian of exponential map w.r.t. manifold increment :param x: part of state vector affected by increment, 4 x 1 :return: 4 x 3 """ # jacobian of full q wrt qm (quaternion update on manifold), # evaluated at qv = (0, 0, 0) # full q is derived using either the exponential map or q0 = sqrt(1-qm^2) jm = np.vstack((np.zeros((1, 3)), np.eye(3))) # 4 x 3 m = np.dot(dpq_q(p=x), jm) return m class PoseGraph: def __init__(self): """ implements pose graph optimization from "Hybrid Hessians for Optimization of Pose Graphs" - Y. LeCun et al and "A Tutorial on Graph-Based SLAM" - W. Burgard et al """ self.N = 0 self.z = np.zeros((0, 0)) def jacobian(self, L_ax, L_aq, L_rx, L_rq): # 6 because updates for rotation are on manifold J = np.zeros((0, 6*self.N)) # unary constraints for i in range(self.N): # translation constraint jt = np.zeros((3, J.shape[1])) jt[:, 6*i: 6*i+3] = np.eye(3) J = np.vstack((J, np.dot(L_ax, jt))) # rotation constraint jr = np.zeros((4, J.shape[1])) jr[:, 6*i+3: 6*i+6] = m_rot(x=self.z[7*i+3: 7*i+7]) J = np.vstack((J, np.dot(L_aq, jr))) # pairwise constraints for i in range(self.N-1): # translation constraint jt = np.zeros((3, J.shape[1])) dt = dqstq_t(q=self.z[7*i+3: 7*i+7]) # dt = np.eye(3) jt[:, 6*i: 6*i+3] = -dt jt[:, 6*(i+1): 6*(i+1)+3] = dt # m = m_rot(x=self.z[7*i+3 : 7*i+7]) # a = dqstq_q(q=self.z[7*i+3 : 7*i+7], # t=self.z[7*(i+1) : 7*(i+1)+3]-self.z[7*i : 7*i+3]) # jt[:, 6*i+3 : 6*i+6] = np.dot(a, m) J = np.vstack((J, np.dot(L_rx, jt))) # rotation constraint jr = np.zeros((4, J.shape[1])) m = m_rot(x=self.z[7*i+3: 7*i+7]) a = dpsq_p(q=self.z[7*(i+1)+3: 7*(i+1)+7]) jr[:, 6*i+3: 6*i+6] = np.dot(a, m) m = m_rot(x=self.z[7*(i+1)+3: 7*(i+1)+7]) b = dpsq_q(p=self.z[7*i+3: 7*i+7]) jr[:, 6*(i+1)+3: 6*(i+1)+6] = np.dot(b, m) J = np.vstack((J, np.dot(L_rq, jr))) return J def residuals(self, poses, vos, L_ax, L_aq, L_rx, L_rq): """ computes the residuals :param poses: N x 7 :param vos: (N-1) x 7 :param L_ax: 3 x 3 :param L_aq: 4 x 4 :param L_rx: 3 x 3 :param L_rq: 4 x 4 :return: """ r = np.zeros((0, 1)) # unary residuals L = np.zeros((7, 7)) L[:3, :3] = L_ax L[3:, 3:] = L_aq for i in range(self.N): rr = self.z[7*i: 7*(i+1)] - np.reshape(poses[i], (-1, 1)) r = np.vstack((r, np.dot(L, rr))) # pairwise residuals for i in range(self.N-1): # translation residual v = self.z[7*(i+1):7*(i+1)+3, 0]-self.z[7*i:7*i+3, 0] q = txq.qinverse(self.z[7*i+3:7*i+7, 0]) rt = txq.rotate_vector(v, q) rt = rt[:, np.newaxis] - vos[i, :3].reshape((-1, 1)) # rt = self.z[7*(i+1) : 7*(i+1)+3] - self.z[7*i : 7*i+3] - \ # vos[i, :3].reshape((-1, 1)) r = np.vstack((r, np.dot(L_rx, rt))) # rotation residual q0 = self.z[7*i+3: 7*i+7].squeeze() q1 = self.z[7*(i+1)+3: 7*(i+1)+7].squeeze() qvo = txq.qmult(txq.qinverse(q0), q1).reshape((-1, 1)) rq = qvo - vos[i, 3:].reshape((-1, 1)) r = np.vstack((r, np.dot(L_rq, rq))) return r def update_on_manifold(self, x): """ Updates the state vector on manifold :param x: manifold increment, column vector :return: """ for i in range(self.N): # update translation t = x[6*i: 6*i+3] self.z[7*i: 7*i+3] += t # update rotation qm = x[6*i+3: 6*i+6] # quaternion on the manifold dq = np.zeros(4) # method in Burgard paper # dq[1:] = qm.squeeze() # dq[0] = math.sqrt(1 - sum(np.square(qm))) # incremental quaternion # method of exponential map n = np.linalg.norm(qm) dq[0] = math.cos(n) dq[1:] = np.sinc(n/np.pi) * qm.squeeze() q = self.z[7*i+3: 7*i+7].squeeze() q = txq.qmult(q, dq).reshape((-1, 1)) self.z[7*i+3: 7*i+7] = q def optimize(self, poses, vos, sax=1, saq=1, srx=1, srq=1, n_iters=10): """ run PGO, with init = poses :param poses: :param vos: :param sax: sigma for absolute translation :param saq: sigma for absolute rotation :param srx: sigma for relative translation :param srq: sigma for relative rotation :param n_iters: :return: """ self.N = len(poses) # init state vector with the predicted poses self.z = np.reshape(poses.copy(), (-1, 1)) # construct the information matrices L_ax = np.linalg.cholesky(np.eye(3) / sax) L_aq = np.linalg.cholesky(np.eye(4) / saq) L_rx = np.linalg.cholesky(np.eye(3) / srx) L_rq = np.linalg.cholesky(np.eye(4) / srq) for n_iter in range(n_iters): J = self.jacobian(L_ax.T, L_aq.T, L_rx.T, L_rq.T) r = self.residuals(poses.copy(), vos.copy(), L_ax.T, L_aq.T, L_rx.T, L_rq.T) H = np.dot(J.T, J) # hessian b = np.dot(J.T, r) # residuals # solve Hx = -b for x R = slin.cholesky(H) # H = R' R y = slin.solve_triangular(R.T, -b) x = slin.solve_triangular(R, y) self.update_on_manifold(x) return self.z.reshape((-1, 7)) class PoseGraphFC: def __init__(self): """ implements pose graph optimization from "Hybrid Hessians for Optimization of Pose Graphs" - Y. LeCun et al and "A Tutorial on Graph-Based SLAM" - W. Burgard et al fully connected version """ self.N = 0 self.z = np.zeros((0, 0)) def jacobian(self, L_ax, L_aq, L_rx, L_rq): # 6 because updates for rotation are on manifold J = np.zeros((0, 6*self.N)) # unary constraints for i in range(self.N): # translation constraint jt = np.zeros((3, J.shape[1])) jt[:, 6*i: 6*i+3] = np.eye(3) J = np.vstack((J, np.dot(L_ax, jt))) # rotation constraint jr = np.zeros((4, J.shape[1])) jr[:, 6*i+3: 6*i+6] = m_rot(x=self.z[7*i+3: 7*i+7]) J = np.vstack((J, np.dot(L_aq, jr))) # pairwise constraints for i in range(self.N): for j in range(i+1, self.N): # translation constraint jt = np.zeros((3, J.shape[1])) dt = dqstq_t(q=self.z[7*i+3: 7*i+7]) # dt = np.eye(3) jt[:, 6*i: 6*i+3] = -dt jt[:, 6*j: 6*j+3] = dt # m = m_rot(x=self.z[7*i+3 : 7*i+7]) # a = dqstq_q(q=self.z[7*i+3 : 7*i+7], # t=self.z[7*(i+1) : 7*(i+1)+3]-self.z[7*i : 7*i+3]) # jt[:, 6*i+3 : 6*i+6] = np.dot(a, m) J = np.vstack((J, np.dot(L_rx, jt))) # rotation constraint jr = np.zeros((4, J.shape[1])) m = m_rot(x=self.z[7*i+3: 7*i+7]) a = dpsq_p(q=self.z[7*j+3: 7*j+7]) jr[:, 6*i+3: 6*i+6] = np.dot(a, m) m = m_rot(x=self.z[7*j+3: 7*j+7]) b = dpsq_q(p=self.z[7*i+3: 7*i+7]) jr[:, 6*j+3: 6*j+6] = np.dot(b, m) J = np.vstack((J, np.dot(L_rq, jr))) return J def residuals(self, poses, vos, L_ax, L_aq, L_rx, L_rq): """ computes the residuals :param poses: N x 7 :param vos: (N-1) x 7 :param L_ax: 3 x 3 :param L_aq: 4 x 4 :param L_rx: 3 x 3 :param L_rq: 4 x 4 :return: """ r = np.zeros((0, 1)) # unary residuals L = np.zeros((7, 7)) L[:3, :3] = L_ax L[3:, 3:] = L_aq for i in range(self.N): rr = self.z[7*i: 7*(i+1)] - np.reshape(poses[i], (-1, 1)) r = np.vstack((r, np.dot(L, rr))) # pairwise residuals k = 0 for i in range(self.N): for j in range(i+1, self.N): # translation residual v = self.z[7*j:7*j+3, 0]-self.z[7*i:7*i+3, 0] q = txq.qinverse(self.z[7*i+3:7*i+7, 0]) rt = txq.rotate_vector(v, q) rt = rt[:, np.newaxis] - vos[k, :3].reshape((-1, 1)) # rt = self.z[7*(i+1) : 7*(i+1)+3] - self.z[7*i : 7*i+3] - \ # vos[i, :3].reshape((-1, 1)) r = np.vstack((r, np.dot(L_rx, rt))) # rotation residual q0 = self.z[7*i+3: 7*i+7].squeeze() q1 = self.z[7*j+3: 7*j+7].squeeze() qvo = txq.qmult(txq.qinverse(q0), q1).reshape((-1, 1)) rq = qvo - vos[k, 3:].reshape((-1, 1)) r = np.vstack((r, np.dot(L_rq, rq))) k += 1 return r def update_on_manifold(self, x): """ Updates the state vector on manifold :param x: manifold increment, column vector :return: """ for i in range(self.N): # update translation t = x[6*i: 6*i+3] self.z[7*i: 7*i+3] += t # update rotation qm = x[6*i+3: 6*i+6] # quaternion on the manifold dq = np.zeros(4) # method in Burgard paper # dq[1:] = qm.squeeze() # dq[0] = math.sqrt(1 - sum(np.square(qm))) # incremental quaternion # method of exponential map n = np.linalg.norm(qm) dq[0] = math.cos(n) dq[1:] = np.sinc(n/np.pi) * qm.squeeze() q = self.z[7*i+3: 7*i+7].squeeze() q = txq.qmult(q, dq).reshape((-1, 1)) self.z[7*i+3: 7*i+7] = q def optimize(self, poses, vos, sax=1, saq=1, srx=1, srq=1, n_iters=10): """ run PGO, with init = poses :param poses: :param vos: :param sax: sigma for absolute translation :param saq: sigma for absolute rotation :param srx: sigma for relative translation :param srq: sigma for relative rotation :param n_iters: :return: """ self.N = len(poses) # init state vector with the predicted poses self.z = np.reshape(poses.copy(), (-1, 1)) # construct the information matrices L_ax = np.linalg.cholesky(np.eye(3) / sax) L_aq = np.linalg.cholesky(np.eye(4) / saq) L_rx = np.linalg.cholesky(np.eye(3) / srx) L_rq = np.linalg.cholesky(np.eye(4) / srq) for n_iter in range(n_iters): J = self.jacobian(L_ax.T, L_aq.T, L_rx.T, L_rq.T) r = self.residuals(poses.copy(), vos.copy(), L_ax.T, L_aq.T, L_rx.T, L_rq.T) H = np.dot(J.T, J) # hessian b = np.dot(J.T, r) # residuals # solve Hx = -b for x R = slin.cholesky(H) # H = R' R y = slin.solve_triangular(R.T, -b) x = slin.solve_triangular(R, y) self.update_on_manifold(x) return self.z.reshape((-1, 7)) def optimize_poses(pred_poses, vos=None, fc_vos=False, target_poses=None, sax=1, saq=1, srx=1, srq=1): """ optimizes poses using either the VOs or the target poses (calculates VOs from them) :param pred_poses: N x 7 :param vos: (N-1) x 7 :param fc_vos: whether to use relative transforms between all frames in a fully connected manner, not just consecutive frames :param target_poses: N x 7 :param: sax: covariance of pose translation (1 number) :param: saq: covariance of pose rotation (1 number) :param: srx: covariance of VO translation (1 number) :param: srq: covariance of VO rotation (1 number) :return: """ pgo = PoseGraphFC() if fc_vos else PoseGraph() if vos is None: if target_poses is not None: # calculate the VOs (in the pred_poses frame) vos = np.zeros((len(target_poses)-1, 7)) for i in range(len(vos)): vos[i, :3] = target_poses[i+1, :3] - target_poses[i, :3] q0 = target_poses[i, 3:] q1 = target_poses[i+1, 3:] vos[i, 3:] = txq.qmult(txq.qinverse(q0), q1) else: print('Specify either VO or target poses') return None optim_poses = pgo.optimize(poses=pred_poses, vos=vos, sax=sax, saq=saq, srx=srx, srq=srq) return optim_poses def align_3d_pts(x1, x2): """Align two sets of 3d points using the method of Horn (closed-form). Find optimal s, R, t, such that s*R*(x1-t) = x2 Input: x1 -- first trajectory (3xn) x2 -- second trajectory (3xn) Output: R -- rotation matrix (3x3) t -- translation vector (3x1) s -- scale (1x1) written by Jinwei Gu """ x1c = x1.mean(1, keepdims=True) x2c = x2.mean(1, keepdims=True) x1_zerocentered = x1 - x1c x2_zerocentered = x2 - x2c W = np.zeros((3, 3)) r1 = 0 r2 = 0 for i in range(x1.shape[1]): a = x1_zerocentered[:, i] b = x2_zerocentered[:, i] W += np.outer(b, a) r1 += np.dot(a.T, a) r2 += np.dot(b.T, b) s = np.asscalar(np.sqrt(r2/r1)) U, d, Vh = np.linalg.svd(W) S = np.eye(3) if np.linalg.det(np.dot(U, Vh)) < 0: S[2, 2] = -1 R = np.dot(U, np.dot(S, Vh)) t = x1c - (1/s) * np.dot(R.transpose(), x2c) # ---- align ---- #x2a = s * np.dot(R, x1-t) #error = x2a - x2 return R, t, s def align_2d_pts(x1, x2): """Align two sets of 3d points using the method of Horn (closed-form). Find optimal s, R, t, such that s*R*(x1-t) = x2 Input: x1 -- first trajectory (2xn) x2 -- second trajectory (2xn) Output: R -- rotation matrix (2x2) t -- translation vector (2x1) s -- scale (1x1) written by Jinwei Gu """ x1c = x1.mean(1, keepdims=True) x2c = x2.mean(1, keepdims=True) x1_zerocentered = x1 - x1c x2_zerocentered = x2 - x2c W = np.zeros((2, 2)) r1 = 0 r2 = 0 for i in range(x1.shape[1]): a = x1_zerocentered[:, i] b = x2_zerocentered[:, i] W += np.outer(b, a) r1 += np.dot(a.T, a) r2 += np.dot(b.T, b) s = np.asscalar(np.sqrt(r2/r1)) U, d, Vh = np.linalg.svd(W) S = np.eye(2) if np.linalg.det(np.dot(U, Vh)) < 0: S[1, 1] = -1 R = np.dot(U, np.dot(S, Vh)) t = x1c - (1/s) * np.dot(R.transpose(), x2c) # ---- align ---- #x2a = s * np.dot(R, x1-t) #error = x2a - x2 return R, t, s def align_3d_pts_noscale(x1, x2): """Align two sets of 3d points using the method of Horn (closed-form). Find optimal s, R, t, such that s*R*(x1-t) = x2 Input: x1 -- first trajectory (3xn) x2 -- second trajectory (3xn) Output: R -- rotation matrix (3x3) t -- translation vector (3x1) written by Jinwei Gu """ x1c = x1.mean(1, keepdims=True) x2c = x2.mean(1, keepdims=True) x1_zerocentered = x1 - x1c x2_zerocentered = x2 - x2c W = np.zeros((3, 3)) r1 = 0 r2 = 0 for i in range(x1.shape[1]): a = x1_zerocentered[:, i] b = x2_zerocentered[:, i] W += np.outer(b, a) r1 += np.dot(a.T, a) r2 += np.dot(b.T, b) #s = np.asscalar(np.sqrt(r2/r1)) s = 1 U, d, Vh = np.linalg.svd(W) S = np.eye(3) if np.linalg.det(np.dot(U, Vh)) < 0: S[2, 2] = -1 R = np.dot(U, np.dot(S, Vh)) t = x1c - np.dot(R.transpose(), x2c) # ---- align ---- #x2a = s * np.dot(R, x1-t) #error = x2a - x2 return R, t, s def align_2d_pts_noscale(x1, x2): """Align two sets of 3d points using the method of Horn (closed-form). Find optimal s, R, t, such that s*R*(x1-t) = x2 Input: x1 -- first trajectory (2xn) x2 -- second trajectory (2xn) Output: R -- rotation matrix (2x2) t -- translation vector (2x1) s -- scale (1x1) written by Jinwei Gu """ x1c = x1.mean(1, keepdims=True) x2c = x2.mean(1, keepdims=True) x1_zerocentered = x1 - x1c x2_zerocentered = x2 - x2c W = np.zeros((2, 2)) r1 = 0 r2 = 0 for i in range(x1.shape[1]): a = x1_zerocentered[:, i] b = x2_zerocentered[:, i] W += np.outer(b, a) r1 += np.dot(a.T, a) r2 += np.dot(b.T, b) #s = np.asscalar(np.sqrt(r2/r1)) s = 1 U, d, Vh = np.linalg.svd(W) S = np.eye(2) if np.linalg.det(np.dot(U, Vh)) < 0: S[1, 1] = -1 R = np.dot(U, np.dot(S, Vh)) t = x1c - (1/s) * np.dot(R.transpose(), x2c) # ---- align ---- #x2a = s * np.dot(R, x1-t) #error = x2a - x2 return R, t, s def align_camera_poses(o1, o2, R1, R2, use_rotation_constraint=True): """Align two sets of camera poses (R1,o1/R2,o2) using the method of Horn (closed-form). Find optimal s, R, t, such that s*R*(o1-t) = o2 (1) R*R1 = R2 (2) where R1/R2 are the camera-to-world matrices, o1/o2 are the center of the cameras. Input: o1 -- camera centers (3xn) o2 -- camera centers (3xn) R1 -- camera poses (camera-to-world matrices) (nx3x3) R2 -- camera poses (camera-to-world matrices) (nx3x3) use_rotation_constraint -- if False, uses only Eq(1) to solve. Output: R -- rotation matrix (3x3) t -- translation vector (3x1) s -- scale (1x1) Note, when use_rotation_constraint=False, it is the same problem as above, i.e., to align two sets of 3D points. When use_rotation_constraint=True, we note Eq(2) is the same equation as Eq(1), after we zero-center and remove the scale. So, we can use the same approach (SVD). written by Jinwei Gu """ if not use_rotation_constraint: return align_3d_pts(o1, o2) o1c = o1.mean(1, keepdims=True) o2c = o2.mean(1, keepdims=True) o1_zerocentered = o1 - o1c o2_zerocentered = o2 - o2c W = np.zeros((3, 3)) r1 = 0 r2 = 0 for i in range(o1.shape[1]): a = o1_zerocentered[:, i] b = o2_zerocentered[:, i] W += np.outer(b, a) r1 += np.dot(a.T, a) r2 += np.dot(b.T, b) s = np.asscalar(np.sqrt(r2/r1)) # add rotation constraints for i in range(o1.shape[1]): d1 = np.squeeze(R1[i, :, :]) d2 = np.squeeze(R2[i, :, :]) for c in range(3): a = d1[:, c] b = d2[:, c] W += np.outer(b, a) U, d, Vh = np.linalg.svd(W) S = np.eye(3) if np.linalg.det(np.dot(U, Vh)) < 0: S[2, 2] = -1 R = np.dot(U, np.dot(S, Vh)) t = o1c - (1/s) * np.dot(R.transpose(), o2c) # ---- align ---- #o2a = s * np.dot(R, o1-t) #R2a = np.dot(R, R1) return R, t, s def test_align_3d_pts(): import transforms3d.euler as txe N = 10 x1 = np.random.rand(3, N) noise = np.random.rand(3, N)*0.01 s = np.random.rand() t = np.random.rand(3, 1) R = txe.euler2mat(np.random.rand(), np.random.rand(), np.random.rand()) R = R[:3, :3] x2 = s*np.dot(R, x1-t) + noise Re, te, se = align_3d_pts(x1, x2) print('scale ', s, se) print('rotation matrx ', R, Re) print('translation ', t, te) def test_align_camera_poses(): import transforms3d.euler as txe N = 10 o1 = np.random.rand(3, N) noise = np.random.rand(3, N)*0.01 s = np.random.rand() t = np.random.rand(3, 1) R = txe.euler2mat(np.random.rand(), np.random.rand(), np.random.rand()) R = R[:3, :3] o2 = s*np.dot(R, o1-t) + noise R1 = np.zeros((N, 3, 3)) R2 = np.zeros((N, 3, 3)) for i in range(N): Ri = txe.euler2mat( np.random.rand(), np.random.rand(), np.random.rand()) R1[i, :, :] = Ri[:3, :3] R2[i, :, :] = np.dot(R, Ri[:3, :3]) Re1, te1, se1 = align_camera_poses(o1, o2, R1, R2, False) Re2, te2, se2 = align_camera_poses(o1, o2, R1, R2, True) print('scale ', s, se1, se2) print('rotation matrx ', R, Re1, Re2) print('translation ', t, te1, te2) def pgo_test_poses(): """ generates test poses and vos for the various PGO implementations :return: """ poses = np.zeros((3, 7)) for i in range(poses.shape[0]): poses[i, :3] = i angle = math.radians(10*i) R = txe.euler2mat(angle, angle, angle) q = txq.mat2quat(R) poses[i, 3:] = q vos = np.zeros((poses.shape[0]-1, 7)) for i in range(vos.shape[0]): vos[i, 0] = 1.5 vos[i, 1] = 0.5 vos[i, 2] = 1.0 R = txe.euler2mat(math.radians(15), math.radians(10), math.radians(5)) q = txq.mat2quat(R) vos[i, 3:] = q return poses, vos def pgo_test_poses1(): poses = np.zeros((3, 7)) R = txe.euler2mat(0, 0, np.deg2rad(45)) q = txq.mat2quat(R) poses[:, 3:] = q for i in range(len(poses)): poses[i, :3] = np.asarray([i, i, 0]) pt = np.zeros((len(poses), 6)) pt[:, :3] = poses[:, :3] for i, p in enumerate(poses): pt[i, 3:] = qlog(p[3:]) pt = torch.from_numpy(pt.astype(np.float32)) vost = calc_vos_safe_fc(pt.unsqueeze(0))[0].numpy() vos = np.zeros((len(vost), 7)) vos[:, :3] = vost[:, :3] for i, p in enumerate(vost): vos[i, 3:] = qexp(p[3:]) # perturbation vos[0, 0] = np.sqrt(2) - 0.5 vos[1, 0] = np.sqrt(2) - 0.5 return poses, vos def print_poses(poses): print('translations') print(poses[:, :3]) print('euler') for i in range(poses.shape[0]): a = txe.mat2euler(txq.quat2mat(poses[i, 3:])) print([np.rad2deg(aa) for aa in a]) def test_pgo(): """ Tests the full pose graph optimization implementation :return: bool """ pred_poses, vos = pgo_test_poses1() print('pred poses') print_poses(pred_poses) print('vos') print_poses(vos) pgo = PoseGraph() optimized_poses = pgo.optimize(pred_poses, vos) print('optimized') print_poses(optimized_poses) def test_pose_utils(): """ Tests the pose utils :return: """ TEST_COMPOSE = True TEST_INV = True def ra(_): return np.random.uniform(0, 2*math.pi) if TEST_COMPOSE: print('Testing pose composing...') R1 = txe.euler2mat(ra(1), ra(1), ra(1)) t1 = np.random.rand(3) R2 = txe.euler2mat(ra(1), ra(1), ra(1)) t2 = np.random.rand(3) # homogeneous matrix method R = np.dot(R1, R2) t = t1 + np.dot(R1, t2) print('From homogeneous matrices, t = ') print(t) print('R = ') print(R) # quaternion method q1 = txq.mat2quat(R1) q2 = txq.mat2quat(R2) p1 = torch.cat((torch.from_numpy(t1), torch.from_numpy(q1))) p2 = torch.cat((torch.from_numpy(t2), torch.from_numpy(q2))) p = compose_pose_quaternion( torch.unsqueeze(p1, 0), torch.unsqueeze(p2, 0)) t = p[:, :3].numpy().squeeze() q = p[:, 3:].numpy().squeeze() print('From quaternions, t = ') print(t) print('R = ') print(txe.quat2mat(q)) if TEST_INV: print('Testing pose inversion...') R = txe.euler2mat(ra(1), ra(1), ra(1)) t = np.random.rand(3) T = np.eye(4) T[:3, :3] = R T[:3, -1] = t q = txq.mat2quat(R) p = torch.cat((torch.from_numpy(t), torch.from_numpy(q))) pinv = invert_pose_quaternion(torch.unsqueeze(p, 0)) tinv, qinv = pinv[:, :3], pinv[:, 3:] Rinv = txq.quat2mat(qinv.numpy().squeeze()) Tinv = np.eye(4) Tinv[:3, :3] = Rinv Tinv[:3, -1] = tinv.numpy().squeeze() print('T * T^(-1) = ') print(np.dot(T, Tinv)) def test_q_error(): def ra(_): return np.random.uniform(0, 2*math.pi) # rotation along x axis a1 = ra(1) a2 = ra(1) q1 = txq.mat2quat(txe.euler2mat(a1, 0, 0)) q2 = txq.mat2quat(txe.euler2mat(a2, 0, 0)) a1 = np.rad2deg(a1) a2 = np.rad2deg(a2) print('Angles: {:f}, {:f}, difference = {:f}'.format(a1, a2, a1-a2)) print('Error: {:f}'.format(quaternion_angular_error(q1, q2))) def test_log_q_error(): def ra(_): return np.random.uniform(0, 2*math.pi) # rotation along x axis a1 = ra(1) a2 = ra(1) q1 = txq.mat2quat(txe.euler2mat(0, a1, 0)) q2 = txq.mat2quat(txe.euler2mat(0, a2, 0)) # apply log map q1 = np.arccos(q1[0]) * q1[1:] / np.linalg.norm(q1[1:]) q2 = np.arccos(q2[0]) * q2[1:] / np.linalg.norm(q2[1:]) a1 = np.rad2deg(a1) a2 = np.rad2deg(a2) print('Angles: {:f}, {:f}, difference = {:f}'.format(a1, a2, a1-a2)) print('Error: {:f}'.format(log_quaternion_angular_error(q1, q2))) if __name__ == '__main__': test_pgo() # test_dumb_pgo() # test_align_camera_poses() # test_q_error() # test_log_q_error() ================================================ FILE: utils/pose_utils_np.py ================================================ """ Copyright (C) 2018 NVIDIA Corporation. All rights reserved. Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). """ import torch from torch.nn import Module from torch.autograd import Variable from torch.nn.functional import pad import numpy as np import scipy.linalg as slin import math import transforms3d.quaternions as txq import transforms3d.euler as txe import quaternion import bisect # see for formulas: # https://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-801-machine-vision-fall-2004/readings/quaternions.pdf # and "Quaternion and Rotation" - Yan-Bin Jia, September 18, 2016 # from IPython.core.debugger import set_trace # PYTORCH def tq2RT(poses, square=False): """ :param poses: N x 7, (t,q) :return: (N,3,4) """ N,_ = poses.shape T = poses[:,:3] q = quaternion.from_float_array(poses[:,3:]) R = quaternion.as_rotation_matrix(q) #Nx3x3 RT = np.concatenate([R,T[...,None]], axis=-1) #Nx3x4 if square: padding = np.zeros([N,1,4]) padding[:,:,-1] = 1 RT = np.concatenate([RT,padding], axis=1) #Nx4x4 return RT def RT2tq(poses, square=False): """ !!NOT TESETED!! :param poses: N x 3 x 4, (R|T) :return: (N, 7) """ N,_,_ = poses.shape R = poses[:,:,:3] T = poses[:,:,3:] # Nx3x1 q = quaternion.as_float_array(quaternion.from_rotation_matrix(R)) #Nx4 t= T.squeeze(-1) tq = np.concatenate([t,q], axis=-1) return tq def pose_interp(poses, timestamps_in, timestamps_out, r_interp='slerp'): """ :param poses: N x 7, (t,q) :param timestamps: (N,) :param t: (K,) :return: (K,) """ # assert t_interp in ['linear', 'spline'] assert r_interp in ['slerp', 'squad'] assert len(poses)>1 assert len(poses) == len(timestamps_in) input_ts = poses[:,:3] input_rs= poses[:,3:] #quaternions timestamps_in = np.array(timestamps_in) #sort the inputs inds = np.argsort(timestamps_in) poses = poses[inds] timestamps_in = timestamps_in[inds] if r_interp == 'squad': input_rs_ = quaternion.from_float_array(input_rs) output_rs = quaternion.squad( input_rs, timestamps_in, timestamps_out) output_rs = quaternion.as_float_array(output_rs) elif r_interp == 'slerp': output_rs = [] for t in timestamps_out: input_rs_ = quaternion.from_float_array(input_rs) idx = bisect.bisect_left(timestamps_in) output_r = quaternion.slerp(input_rs_[idx],input_rs_[idx+1], timestamps_in[idx], timestamps_in[idx+1],t ) output_r = quaternion.as_float_array(output_r) output_rs.append(output_r) output_ts = [] for t in timestamps_out: idx = bisect_left.bisect_left(timestamps_in) if idx>=len(timestamps_in)-1: idx -= 1 t1 = timestamps_in[idx] t2 = timestamps_in[idx+1] output_t = ((t-t1)*input_ts[idx+1] + (t2-t) *input_ts[idx]) / (t2-t1) output_ts.append(output_t) output_ts =np.concatenate(output_ts, axis=0 ) output_rs =np.concatenate(output_rs, axis=0 ) new_pose = np.concatenate([output_ts, output_rs], axis=1) return new_pose def vdot(v1, v2): """ Dot product along the dim=1 :param v1: N x d :param v2: N x d :return: N x 1 """ # out = torch.mul(v1, v2) out = v1 * v2 out = np.sum(out, axis=1) return out def normalize(x, p=2, dim=0, eps=1e-6): """ Divides a tensor along a certain dim by the Lp norm :param x: :param p: Lp norm :param dim: Dimension to normalize along :return: """ # xn=x.norm(p = p, dim = dim) xn = np.linalg.norm(x, ord=p, axis=dim, keepdims=True) # x=x / xn.unsqueeze(dim = dim) x = x / (xn+eps) # x *= np.sign(x[0]) #added on 22/4/2020 return x def qmult(q1, q2): """ Multiply 2 quaternions :param q1: Tensor N x 4 :param q2: Tensor N x 4 :return: quaternion product, Tensor N x 4 """ q1s, q1v = q1[:, :1], q1[:, 1:] q2s, q2v = q2[:, :1], q2[:, 1:] qs = q1s*q2s - vdot(q1v, q2v) # qv=q1v.mul(q2s.expand_as(q1v)) + q2v.mul(q1s.expand_as(q2v)) + # torch.cross(q1v, q2v, dim = 1) qv = q1v*q2s + q2v*q1s + np.cross(q1v, q2v, axis=1) q = np.concatenate((qs, qv), axis=1) # normalize q = normalize(q, dim=1) return q def qinv(q): """ Inverts quaternions :param q: N x 4 :return: q*: N x 4 """ # q_inv = torch.cat((q[:, :1], -q[:, 1:]), dim=1) q_inv = np.concatenate((q[:, :1], -q[:, 1:]), axis=1) return q_inv def qexp_t(q): """ Applies exponential map to log quaternion :param q: N x 3 :return: N x 4 """ n = torch.norm(q, p=2, dim=1, keepdim=True) n = torch.clamp(n, min=1e-8) q = q * torch.sin(n) q = q / n q = torch.cat((torch.cos(n), q), dim=1) return q def qlog_t(q): """ Applies the log map to a quaternion :param q: N x 4 :return: N x 3 """ n = torch.norm(q[:, 1:], p=2, dim=1, keepdim=True) n = torch.clamp(n, min=1e-8) q = q[:, 1:] * torch.acos(torch.clamp(q[:, :1], min=-1.0, max=1.0)) q = q / n return q def qexp_t_safe(q): """ Applies exponential map to log quaternion (safe implementation that does not maintain gradient flow) :param q: N x 3 :return: N x 4 """ q = torch.from_numpy(np.asarray([qexp(qq) for qq in q.numpy()], dtype=np.float32)) return q def qlog_t_safe(q): """ Applies the log map to a quaternion (safe implementation that does not maintain gradient flow) :param q: N x 4 :return: N x 3 """ q = torch.from_numpy(np.asarray([qlog(qq) for qq in q.numpy()], dtype=np.float32)) return q def rotate_vec_by_q(t, q): """ rotates vector t by quaternion q :param t: vector, Tensor N x 3 :param q: quaternion, Tensor N x 4 :return: t rotated by q: t' = t + 2*qs*(qv x t) + 2*qv x (qv x r) """ qs, qv = q[:, :1], q[:, 1:] # b = torch.cross(qv, t, dim=1) b = np.cross(qv, t, axis=1) # c = 2 * torch.cross(qv, b, dim=1) c = 2 * np.cross(qv, b, axis=1) # b = 2 * b.mul(qs.expand_as(b)) b = 2 * b*qs tq = t + b + c return tq def compose_pose_quaternion(p1, p2): """ pyTorch implementation :param p1: input pose, Tensor N x 7 :param p2: pose to apply, Tensor N x 7 :return: output pose, Tensor N x 7 all poses are translation + quaternion #!comments: first apply p2 and then p1 !! """ p1t, p1q = p1[:, :3], p1[:, 3:] p2t, p2q = p2[:, :3], p2[:, 3:] q = qmult(p1q, p2q) t = p1t + rotate_vec_by_q(p2t, p1q) return np.concatenate((t, q), axis=1) def invert_pose_quaternion(p): """ inverts the pose :param p: pose, Tensor N x 7 :return: inverted pose """ t, q = p[:, :3], p[:, 3:] q_inv = qinv(q) tinv = -rotate_vec_by_q(t, q_inv) return np.concatenate((tinv, q_inv), axis=1) def calc_vo(p0, p1): """ calculates VO (in the p0 frame) from 2 poses :param p0: N x 7 :param p1: N x 7 """ # assert p0.shape==p1.shape return compose_pose_quaternion(invert_pose_quaternion(p0), p1) def calc_vo_logq(p0, p1): """ VO (in the p0 frame) (logq) :param p0: N x 6 :param p1: N x 6 :return: N-1 x 6 """ q0 = qexp_t(p0[:, 3:]) q1 = qexp_t(p1[:, 3:]) vos = calc_vo(torch.cat((p0[:, :3], q0), dim=1), torch.cat((p1[:, :3], q1), dim=1)) vos_q = qlog_t(vos[:, 3:]) return torch.cat((vos[:, :3], vos_q), dim=1) def calc_vo_relative(p0, p1): """ calculates VO (in the world frame) from 2 poses :param p0: N x 7 :param p1: N x 7 """ vos_t = p1[:, :3] - p0[:, :3] vos_q = qmult(qinv(p0[:, 3:]), p1[:, 3:]) return np.concatenate((vos_t, vos_q), axis=1) def calc_vo_relative_logq(p0, p1): """ Calculates VO (in the world frame) from 2 poses (log q) :param p0: N x 6 :param p1: N x 6 :return: """ q0 = qexp_t(p0[:, 3:]) q1 = qexp_t(p1[:, 3:]) vos = calc_vo_relative(torch.cat((p0[:, :3], q0), dim=1), torch.cat((p1[:, :3], q1), dim=1)) vos_q = qlog_t(vos[:, 3:]) return torch.cat((vos[:, :3], vos_q), dim=1) def calc_vo_relative_logq_safe(p0, p1): """ Calculates VO (in the world frame) from 2 poses (log q) through numpy fns :param p0: N x 6 :param p1: N x 6 :return: """ vos_t = p1[:, :3] - p0[:, :3] q0 = qexp_t_safe(p0[:, 3:]) q1 = qexp_t_safe(p1[:, 3:]) vos_q = qmult(qinv(q0), q1) vos_q = qlog_t_safe(vos_q) return torch.cat((vos_t, vos_q), dim=1) def calc_vo_logq_safe(p0, p1): """ VO in the p0 frame using numpy fns :param p0: :param p1: :return: """ vos_t = p1[:, :3] - p0[:, :3] q0 = qexp_t_safe(p0[:, 3:]) q1 = qexp_t_safe(p1[:, 3:]) vos_t = rotate_vec_by_q(vos_t, qinv(q0)) vos_q = qmult(qinv(q0), q1) vos_q = qlog_t_safe(vos_q) return torch.cat((vos_t, vos_q), dim=1) def calc_vos_simple(poses): """ calculate the VOs, from a list of consecutive poses :param poses: N x T x 7 :return: N x (T-1) x 7 """ vos = [] for p in poses: pvos = [p[i+1].unsqueeze(0) - p[i].unsqueeze(0) for i in range(len(p)-1)] vos.append(torch.cat(pvos, dim=0)) vos = torch.stack(vos, dim=0) return vos def calc_vos(poses): """ calculate the VOs, from a list of consecutive poses (in the p0 frame) :param poses: N x T x 7 :return: N x (T-1) x 7 """ vos = [] for p in poses: pvos = [calc_vo_logq(p[i].unsqueeze(0), p[i+1].unsqueeze(0)) for i in range(len(p)-1)] vos.append(torch.cat(pvos, dim=0)) vos = torch.stack(vos, dim=0) return vos def calc_vos_relative(poses): """ calculate the VOs, from a list of consecutive poses (in the world frame) :param poses: N x T x 7 :return: N x (T-1) x 7 """ vos = [] for p in poses: pvos = [calc_vo_relative_logq(p[i].unsqueeze(0), p[i+1].unsqueeze(0)) for i in range(len(p)-1)] vos.append(torch.cat(pvos, dim=0)) vos = torch.stack(vos, dim=0) return vos def calc_vos_safe(poses): """ calculate the VOs, from a list of consecutive poses :param poses: N x T x 7 :return: N x (T-1) x 7 """ vos = [] for p in poses: pvos = [calc_vo_logq_safe(p[i].unsqueeze(0), p[i+1].unsqueeze(0)) for i in range(len(p)-1)] vos.append(torch.cat(pvos, dim=0)) vos = torch.stack(vos, dim=0) return vos def calc_vos_safe_fc(poses): """ calculate the VOs, from a list of consecutive poses (fully connected) :param poses: N x T x 7 :return: N x TC2 x 7 """ vos = [] for p in poses: pvos = [] for i in range(p.size(0)): for j in range(i+1, p.size(0)): pvos.append(calc_vo_logq_safe( p[i].unsqueeze(0), p[j].unsqueeze(0))) vos.append(torch.cat(pvos, dim=0)) vos = torch.stack(vos, dim=0) return vos # NUMPY def qlog(q): """ Applies logarithm map to q :param q: (4,) :return: (3,) """ if all(q[1:] == 0): q = np.zeros(3) else: q = np.arccos(q[0]) * q[1:] / np.linalg.norm(q[1:]) return q def qexp(q): """ Applies the exponential map to q :param q: (3,) :return: (4,) """ n = np.linalg.norm(q) q = np.hstack((np.cos(n), np.sinc(n/np.pi)*q)) return q def process_poses(poses_in, mean_t, std_t, align_R, align_t, align_s): """ processes the 1x12 raw pose from dataset by aligning and then normalizing :param poses_in: N x 12 :param mean_t: 3 :param std_t: 3 :param align_R: 3 x 3 :param align_t: 3 :param align_s: 1 :return: processed poses (translation + quaternion) N x 7 """ poses_out = np.zeros((len(poses_in), 6)) poses_out[:, 0:3] = poses_in[:, [3, 7, 11]] # align for i in range(len(poses_out)): R = poses_in[i].reshape((3, 4))[:3, :3] q = txq.mat2quat(np.dot(align_R, R)) q *= np.sign(q[0]) # constrain to hemisphere q = qlog(q) poses_out[i, 3:] = q t = poses_out[i, :3] - align_t poses_out[i, :3] = align_s * \ np.dot(align_R, t[:, np.newaxis]).squeeze() # normalize translation poses_out[:, :3] -= mean_t poses_out[:, :3] /= std_t return poses_out def log_quaternion_angular_error(q1, q2): return quaternion_angular_error(qexp(q1), qexp(q2)) def quaternion_angular_error(q1, q2): """ angular error between two quaternions :param q1: (4, ) :param q2: (4, ) :return: """ d = abs(np.dot(q1, q2)) d = min(1.0, max(-1.0, d)) theta = 2 * np.arccos(d) * 180 / np.pi return theta def skew(x): """ returns skew symmetric matrix from vector :param x: 3 x 1 :return: """ s = np.asarray([[0, -x[2], x[1]], [x[2], 0, -x[0]], [-x[1], x[0], 0]]) return s def dpq_q(p): """ returns the jacobian of quaternion product pq w.r.t. q :param p: 4 x 1 :return: 4 x 4 """ J = np.zeros((4, 4)) J[0, 0] = p[0] J[0, 1:] = -p[1:].squeeze() J[1:, 0] = p[1:].squeeze() J[1:, 1:] = p[0] * np.eye(3) + skew(p[1:]) return J def dpsq_q(p): """ returns the jacobian of quaternion product (p*)q w.r.t. q :param p: 4 x 1 :return: 4 x 4 """ J = np.zeros((4, 4)) J[0, 0] = p[0] J[0, 1:] = -p[1:].squeeze() J[1:, 0] = -p[1:].squeeze() J[1:, 1:] = p[0] * np.eye(3) - skew(p[1:]) return J def dpsq_p(q): """ returns the jacobian of quaternion product (p*)q w.r.t. p :param q: 4 x 1 :return: 4 x 4 """ J = np.zeros((4, 4)) J[0, 0] = q[0] J[0, 1:] = q[1:].squeeze() J[1:, 0] = q[1:].squeeze() J[1:, 1:] = -q[0] * np.eye(3) + skew(q[1:]) return J def dqstq_q(q, t): """ jacobian of q* t q w.r.t. q :param q: 4 x 1 :param t: 3 x 1 :return: 3 x 4 """ J = np.zeros((3, 4)) J[:, :1] = q[0]*t - np.cross(q[1:], t, axis=0) J[:, 1:] = -np.dot(t, q[1:].T) + np.dot(t.T, q[1:])*np.eye(3) + \ np.dot(q[1:], t.T) + q[0]*skew(t) J *= 2 return J def dqstq_t(q): """ jacobian of q* t q w.r.t. t :param q: 4 x 1 :return: 3 x 3 """ J = (q[0]*q[0] - np.dot(q[1:].T, q[1:])) * np.eye(3) + 2*np.dot(q[1:], q[1:].T) -\ 2*q[0]*skew(q[1:]) return J def m_rot(x): """ returns Jacobian of exponential map w.r.t. manifold increment :param x: part of state vector affected by increment, 4 x 1 :return: 4 x 3 """ # jacobian of full q wrt qm (quaternion update on manifold), # evaluated at qv = (0, 0, 0) # full q is derived using either the exponential map or q0 = sqrt(1-qm^2) jm = np.vstack((np.zeros((1, 3)), np.eye(3))) # 4 x 3 m = np.dot(dpq_q(p=x), jm) return m class PoseGraph: def __init__(self): """ implements pose graph optimization from "Hybrid Hessians for Optimization of Pose Graphs" - Y. LeCun et al and "A Tutorial on Graph-Based SLAM" - W. Burgard et al """ self.N = 0 self.z = np.zeros((0, 0)) def jacobian(self, L_ax, L_aq, L_rx, L_rq): # 6 because updates for rotation are on manifold J = np.zeros((0, 6*self.N)) # unary constraints for i in range(self.N): # translation constraint jt = np.zeros((3, J.shape[1])) jt[:, 6*i: 6*i+3] = np.eye(3) J = np.vstack((J, np.dot(L_ax, jt))) # rotation constraint jr = np.zeros((4, J.shape[1])) jr[:, 6*i+3: 6*i+6] = m_rot(x=self.z[7*i+3: 7*i+7]) J = np.vstack((J, np.dot(L_aq, jr))) # pairwise constraints for i in range(self.N-1): # translation constraint jt = np.zeros((3, J.shape[1])) dt = dqstq_t(q=self.z[7*i+3: 7*i+7]) # dt = np.eye(3) jt[:, 6*i: 6*i+3] = -dt jt[:, 6*(i+1): 6*(i+1)+3] = dt # m = m_rot(x=self.z[7*i+3 : 7*i+7]) # a = dqstq_q(q=self.z[7*i+3 : 7*i+7], # t=self.z[7*(i+1) : 7*(i+1)+3]-self.z[7*i : 7*i+3]) # jt[:, 6*i+3 : 6*i+6] = np.dot(a, m) J = np.vstack((J, np.dot(L_rx, jt))) # rotation constraint jr = np.zeros((4, J.shape[1])) m = m_rot(x=self.z[7*i+3: 7*i+7]) a = dpsq_p(q=self.z[7*(i+1)+3: 7*(i+1)+7]) jr[:, 6*i+3: 6*i+6] = np.dot(a, m) m = m_rot(x=self.z[7*(i+1)+3: 7*(i+1)+7]) b = dpsq_q(p=self.z[7*i+3: 7*i+7]) jr[:, 6*(i+1)+3: 6*(i+1)+6] = np.dot(b, m) J = np.vstack((J, np.dot(L_rq, jr))) return J def residuals(self, poses, vos, L_ax, L_aq, L_rx, L_rq): """ computes the residuals :param poses: N x 7 :param vos: (N-1) x 7 :param L_ax: 3 x 3 :param L_aq: 4 x 4 :param L_rx: 3 x 3 :param L_rq: 4 x 4 :return: """ r = np.zeros((0, 1)) # unary residuals L = np.zeros((7, 7)) L[:3, :3] = L_ax L[3:, 3:] = L_aq for i in range(self.N): rr = self.z[7*i: 7*(i+1)] - np.reshape(poses[i], (-1, 1)) r = np.vstack((r, np.dot(L, rr))) # pairwise residuals for i in range(self.N-1): # translation residual v = self.z[7*(i+1):7*(i+1)+3, 0]-self.z[7*i:7*i+3, 0] q = txq.qinverse(self.z[7*i+3:7*i+7, 0]) rt = txq.rotate_vector(v, q) rt = rt[:, np.newaxis] - vos[i, :3].reshape((-1, 1)) # rt = self.z[7*(i+1) : 7*(i+1)+3] - self.z[7*i : 7*i+3] - \ # vos[i, :3].reshape((-1, 1)) r = np.vstack((r, np.dot(L_rx, rt))) # rotation residual q0 = self.z[7*i+3: 7*i+7].squeeze() q1 = self.z[7*(i+1)+3: 7*(i+1)+7].squeeze() qvo = txq.qmult(txq.qinverse(q0), q1).reshape((-1, 1)) rq = qvo - vos[i, 3:].reshape((-1, 1)) r = np.vstack((r, np.dot(L_rq, rq))) return r def update_on_manifold(self, x): """ Updates the state vector on manifold :param x: manifold increment, column vector :return: """ for i in range(self.N): # update translation t = x[6*i: 6*i+3] self.z[7*i: 7*i+3] += t # update rotation qm = x[6*i+3: 6*i+6] # quaternion on the manifold dq = np.zeros(4) # method in Burgard paper # dq[1:] = qm.squeeze() # dq[0] = math.sqrt(1 - sum(np.square(qm))) # incremental quaternion # method of exponential map n = np.linalg.norm(qm) dq[0] = math.cos(n) dq[1:] = np.sinc(n/np.pi) * qm.squeeze() q = self.z[7*i+3: 7*i+7].squeeze() q = txq.qmult(q, dq).reshape((-1, 1)) self.z[7*i+3: 7*i+7] = q def optimize(self, poses, vos, sax=1, saq=1, srx=1, srq=1, n_iters=10): """ run PGO, with init = poses :param poses: :param vos: :param sax: sigma for absolute translation :param saq: sigma for absolute rotation :param srx: sigma for relative translation :param srq: sigma for relative rotation :param n_iters: :return: """ self.N = len(poses) # init state vector with the predicted poses self.z = np.reshape(poses.copy(), (-1, 1)) # construct the information matrices L_ax = np.linalg.cholesky(np.eye(3) / sax) L_aq = np.linalg.cholesky(np.eye(4) / saq) L_rx = np.linalg.cholesky(np.eye(3) / srx) L_rq = np.linalg.cholesky(np.eye(4) / srq) for n_iter in range(n_iters): J = self.jacobian(L_ax.T, L_aq.T, L_rx.T, L_rq.T) r = self.residuals(poses.copy(), vos.copy(), L_ax.T, L_aq.T, L_rx.T, L_rq.T) H = np.dot(J.T, J) # hessian b = np.dot(J.T, r) # residuals # solve Hx = -b for x R = slin.cholesky(H) # H = R' R y = slin.solve_triangular(R.T, -b) x = slin.solve_triangular(R, y) self.update_on_manifold(x) return self.z.reshape((-1, 7)) class PoseGraphFC: def __init__(self): """ implements pose graph optimization from "Hybrid Hessians for Optimization of Pose Graphs" - Y. LeCun et al and "A Tutorial on Graph-Based SLAM" - W. Burgard et al fully connected version """ self.N = 0 self.z = np.zeros((0, 0)) def jacobian(self, L_ax, L_aq, L_rx, L_rq): # 6 because updates for rotation are on manifold J = np.zeros((0, 6*self.N)) # unary constraints for i in range(self.N): # translation constraint jt = np.zeros((3, J.shape[1])) jt[:, 6*i: 6*i+3] = np.eye(3) J = np.vstack((J, np.dot(L_ax, jt))) # rotation constraint jr = np.zeros((4, J.shape[1])) jr[:, 6*i+3: 6*i+6] = m_rot(x=self.z[7*i+3: 7*i+7]) J = np.vstack((J, np.dot(L_aq, jr))) # pairwise constraints for i in range(self.N): for j in range(i+1, self.N): # translation constraint jt = np.zeros((3, J.shape[1])) dt = dqstq_t(q=self.z[7*i+3: 7*i+7]) # dt = np.eye(3) jt[:, 6*i: 6*i+3] = -dt jt[:, 6*j: 6*j+3] = dt # m = m_rot(x=self.z[7*i+3 : 7*i+7]) # a = dqstq_q(q=self.z[7*i+3 : 7*i+7], # t=self.z[7*(i+1) : 7*(i+1)+3]-self.z[7*i : 7*i+3]) # jt[:, 6*i+3 : 6*i+6] = np.dot(a, m) J = np.vstack((J, np.dot(L_rx, jt))) # rotation constraint jr = np.zeros((4, J.shape[1])) m = m_rot(x=self.z[7*i+3: 7*i+7]) a = dpsq_p(q=self.z[7*j+3: 7*j+7]) jr[:, 6*i+3: 6*i+6] = np.dot(a, m) m = m_rot(x=self.z[7*j+3: 7*j+7]) b = dpsq_q(p=self.z[7*i+3: 7*i+7]) jr[:, 6*j+3: 6*j+6] = np.dot(b, m) J = np.vstack((J, np.dot(L_rq, jr))) return J def residuals(self, poses, vos, L_ax, L_aq, L_rx, L_rq): """ computes the residuals :param poses: N x 7 :param vos: (N-1) x 7 :param L_ax: 3 x 3 :param L_aq: 4 x 4 :param L_rx: 3 x 3 :param L_rq: 4 x 4 :return: """ r = np.zeros((0, 1)) # unary residuals L = np.zeros((7, 7)) L[:3, :3] = L_ax L[3:, 3:] = L_aq for i in range(self.N): rr = self.z[7*i: 7*(i+1)] - np.reshape(poses[i], (-1, 1)) r = np.vstack((r, np.dot(L, rr))) # pairwise residuals k = 0 for i in range(self.N): for j in range(i+1, self.N): # translation residual v = self.z[7*j:7*j+3, 0]-self.z[7*i:7*i+3, 0] q = txq.qinverse(self.z[7*i+3:7*i+7, 0]) rt = txq.rotate_vector(v, q) rt = rt[:, np.newaxis] - vos[k, :3].reshape((-1, 1)) # rt = self.z[7*(i+1) : 7*(i+1)+3] - self.z[7*i : 7*i+3] - \ # vos[i, :3].reshape((-1, 1)) r = np.vstack((r, np.dot(L_rx, rt))) # rotation residual q0 = self.z[7*i+3: 7*i+7].squeeze() q1 = self.z[7*j+3: 7*j+7].squeeze() qvo = txq.qmult(txq.qinverse(q0), q1).reshape((-1, 1)) rq = qvo - vos[k, 3:].reshape((-1, 1)) r = np.vstack((r, np.dot(L_rq, rq))) k += 1 return r def update_on_manifold(self, x): """ Updates the state vector on manifold :param x: manifold increment, column vector :return: """ for i in range(self.N): # update translation t = x[6*i: 6*i+3] self.z[7*i: 7*i+3] += t # update rotation qm = x[6*i+3: 6*i+6] # quaternion on the manifold dq = np.zeros(4) # method in Burgard paper # dq[1:] = qm.squeeze() # dq[0] = math.sqrt(1 - sum(np.square(qm))) # incremental quaternion # method of exponential map n = np.linalg.norm(qm) dq[0] = math.cos(n) dq[1:] = np.sinc(n/np.pi) * qm.squeeze() q = self.z[7*i+3: 7*i+7].squeeze() q = txq.qmult(q, dq).reshape((-1, 1)) self.z[7*i+3: 7*i+7] = q def optimize(self, poses, vos, sax=1, saq=1, srx=1, srq=1, n_iters=10): """ run PGO, with init = poses :param poses: :param vos: :param sax: sigma for absolute translation :param saq: sigma for absolute rotation :param srx: sigma for relative translation :param srq: sigma for relative rotation :param n_iters: :return: """ self.N = len(poses) # init state vector with the predicted poses self.z = np.reshape(poses.copy(), (-1, 1)) # construct the information matrices L_ax = np.linalg.cholesky(np.eye(3) / sax) L_aq = np.linalg.cholesky(np.eye(4) / saq) L_rx = np.linalg.cholesky(np.eye(3) / srx) L_rq = np.linalg.cholesky(np.eye(4) / srq) for n_iter in range(n_iters): J = self.jacobian(L_ax.T, L_aq.T, L_rx.T, L_rq.T) r = self.residuals(poses.copy(), vos.copy(), L_ax.T, L_aq.T, L_rx.T, L_rq.T) H = np.dot(J.T, J) # hessian b = np.dot(J.T, r) # residuals # solve Hx = -b for x R = slin.cholesky(H) # H = R' R y = slin.solve_triangular(R.T, -b) x = slin.solve_triangular(R, y) self.update_on_manifold(x) return self.z.reshape((-1, 7)) def optimize_poses(pred_poses, vos=None, fc_vos=False, target_poses=None, sax=1, saq=1, srx=1, srq=1): """ optimizes poses using either the VOs or the target poses (calculates VOs from them) :param pred_poses: N x 7 :param vos: (N-1) x 7 :param fc_vos: whether to use relative transforms between all frames in a fully connected manner, not just consecutive frames :param target_poses: N x 7 :param: sax: covariance of pose translation (1 number) :param: saq: covariance of pose rotation (1 number) :param: srx: covariance of VO translation (1 number) :param: srq: covariance of VO rotation (1 number) :return: """ pgo = PoseGraphFC() if fc_vos else PoseGraph() if vos is None: if target_poses is not None: # calculate the VOs (in the pred_poses frame) vos = np.zeros((len(target_poses)-1, 7)) for i in range(len(vos)): vos[i, :3] = target_poses[i+1, :3] - target_poses[i, :3] q0 = target_poses[i, 3:] q1 = target_poses[i+1, 3:] vos[i, 3:] = txq.qmult(txq.qinverse(q0), q1) else: print('Specify either VO or target poses') return None optim_poses = pgo.optimize(poses=pred_poses, vos=vos, sax=sax, saq=saq, srx=srx, srq=srq) return optim_poses def align_3d_pts(x1, x2): """Align two sets of 3d points using the method of Horn (closed-form). Find optimal s, R, t, such that s*R*(x1-t) = x2 Input: x1 -- first trajectory (3xn) x2 -- second trajectory (3xn) Output: R -- rotation matrix (3x3) t -- translation vector (3x1) s -- scale (1x1) written by Jinwei Gu """ x1c = x1.mean(1, keepdims=True) x2c = x2.mean(1, keepdims=True) x1_zerocentered = x1 - x1c x2_zerocentered = x2 - x2c W = np.zeros((3, 3)) r1 = 0 r2 = 0 for i in range(x1.shape[1]): a = x1_zerocentered[:, i] b = x2_zerocentered[:, i] W += np.outer(b, a) r1 += np.dot(a.T, a) r2 += np.dot(b.T, b) s = np.asscalar(np.sqrt(r2/r1)) U, d, Vh = np.linalg.svd(W) S = np.eye(3) if np.linalg.det(np.dot(U, Vh)) < 0: S[2, 2] = -1 R = np.dot(U, np.dot(S, Vh)) t = x1c - (1/s) * np.dot(R.transpose(), x2c) # ---- align ---- # x2a = s * np.dot(R, x1-t) # error = x2a - x2 return R, t, s def align_2d_pts(x1, x2): """Align two sets of 3d points using the method of Horn (closed-form). Find optimal s, R, t, such that s*R*(x1-t) = x2 Input: x1 -- first trajectory (2xn) x2 -- second trajectory (2xn) Output: R -- rotation matrix (2x2) t -- translation vector (2x1) s -- scale (1x1) written by Jinwei Gu """ x1c = x1.mean(1, keepdims=True) x2c = x2.mean(1, keepdims=True) x1_zerocentered = x1 - x1c x2_zerocentered = x2 - x2c W = np.zeros((2, 2)) r1 = 0 r2 = 0 for i in range(x1.shape[1]): a = x1_zerocentered[:, i] b = x2_zerocentered[:, i] W += np.outer(b, a) r1 += np.dot(a.T, a) r2 += np.dot(b.T, b) s = np.asscalar(np.sqrt(r2/r1)) U, d, Vh = np.linalg.svd(W) S = np.eye(2) if np.linalg.det(np.dot(U, Vh)) < 0: S[1, 1] = -1 R = np.dot(U, np.dot(S, Vh)) t = x1c - (1/s) * np.dot(R.transpose(), x2c) # ---- align ---- # x2a = s * np.dot(R, x1-t) # error = x2a - x2 return R, t, s def align_3d_pts_noscale(x1, x2): """Align two sets of 3d points using the method of Horn (closed-form). Find optimal s, R, t, such that s*R*(x1-t) = x2 Input: x1 -- first trajectory (3xn) x2 -- second trajectory (3xn) Output: R -- rotation matrix (3x3) t -- translation vector (3x1) written by Jinwei Gu """ x1c = x1.mean(1, keepdims=True) x2c = x2.mean(1, keepdims=True) x1_zerocentered = x1 - x1c x2_zerocentered = x2 - x2c W = np.zeros((3, 3)) r1 = 0 r2 = 0 for i in range(x1.shape[1]): a = x1_zerocentered[:, i] b = x2_zerocentered[:, i] W += np.outer(b, a) r1 += np.dot(a.T, a) r2 += np.dot(b.T, b) # s = np.asscalar(np.sqrt(r2/r1)) s = 1 U, d, Vh = np.linalg.svd(W) S = np.eye(3) if np.linalg.det(np.dot(U, Vh)) < 0: S[2, 2] = -1 R = np.dot(U, np.dot(S, Vh)) t = x1c - np.dot(R.transpose(), x2c) # ---- align ---- # x2a = s * np.dot(R, x1-t) # error = x2a - x2 return R, t, s def align_2d_pts_noscale(x1, x2): """Align two sets of 3d points using the method of Horn (closed-form). Find optimal s, R, t, such that s*R*(x1-t) = x2 Input: x1 -- first trajectory (2xn) x2 -- second trajectory (2xn) Output: R -- rotation matrix (2x2) t -- translation vector (2x1) s -- scale (1x1) written by Jinwei Gu """ x1c = x1.mean(1, keepdims=True) x2c = x2.mean(1, keepdims=True) x1_zerocentered = x1 - x1c x2_zerocentered = x2 - x2c W = np.zeros((2, 2)) r1 = 0 r2 = 0 for i in range(x1.shape[1]): a = x1_zerocentered[:, i] b = x2_zerocentered[:, i] W += np.outer(b, a) r1 += np.dot(a.T, a) r2 += np.dot(b.T, b) # s = np.asscalar(np.sqrt(r2/r1)) s = 1 U, d, Vh = np.linalg.svd(W) S = np.eye(2) if np.linalg.det(np.dot(U, Vh)) < 0: S[1, 1] = -1 R = np.dot(U, np.dot(S, Vh)) t = x1c - (1/s) * np.dot(R.transpose(), x2c) # ---- align ---- # x2a = s * np.dot(R, x1-t) # error = x2a - x2 return R, t, s def align_camera_poses(o1, o2, R1, R2, use_rotation_constraint=True): """Align two sets of camera poses (R1,o1/R2,o2) using the method of Horn (closed-form). Find optimal s, R, t, such that s*R*(o1-t) = o2 (1) R*R1 = R2 (2) where R1/R2 are the camera-to-world matrices, o1/o2 are the center of the cameras. Input: o1 -- camera centers (3xn) o2 -- camera centers (3xn) R1 -- camera poses (camera-to-world matrices) (nx3x3) R2 -- camera poses (camera-to-world matrices) (nx3x3) use_rotation_constraint -- if False, uses only Eq(1) to solve. Output: R -- rotation matrix (3x3) t -- translation vector (3x1) s -- scale (1x1) Note, when use_rotation_constraint=False, it is the same problem as above, i.e., to align two sets of 3D points. When use_rotation_constraint=True, we note Eq(2) is the same equation as Eq(1), after we zero-center and remove the scale. So, we can use the same approach (SVD). written by Jinwei Gu """ if not use_rotation_constraint: return align_3d_pts(o1, o2) o1c = o1.mean(1, keepdims=True) o2c = o2.mean(1, keepdims=True) o1_zerocentered = o1 - o1c o2_zerocentered = o2 - o2c W = np.zeros((3, 3)) r1 = 0 r2 = 0 for i in range(o1.shape[1]): a = o1_zerocentered[:, i] b = o2_zerocentered[:, i] W += np.outer(b, a) r1 += np.dot(a.T, a) r2 += np.dot(b.T, b) s = np.asscalar(np.sqrt(r2/r1)) # add rotation constraints for i in range(o1.shape[1]): d1 = np.squeeze(R1[i, :, :]) d2 = np.squeeze(R2[i, :, :]) for c in range(3): a = d1[:, c] b = d2[:, c] W += np.outer(b, a) U, d, Vh = np.linalg.svd(W) S = np.eye(3) if np.linalg.det(np.dot(U, Vh)) < 0: S[2, 2] = -1 R = np.dot(U, np.dot(S, Vh)) t = o1c - (1/s) * np.dot(R.transpose(), o2c) # ---- align ---- # o2a = s * np.dot(R, o1-t) # R2a = np.dot(R, R1) return R, t, s def test_align_3d_pts(): import transforms3d.euler as txe N = 10 x1 = np.random.rand(3, N) noise = np.random.rand(3, N)*0.01 s = np.random.rand() t = np.random.rand(3, 1) R = txe.euler2mat(np.random.rand(), np.random.rand(), np.random.rand()) R = R[:3, :3] x2 = s*np.dot(R, x1-t) + noise Re, te, se = align_3d_pts(x1, x2) print('scale ', s, se) print('rotation matrx ', R, Re) print('translation ', t, te) def test_align_camera_poses(): import transforms3d.euler as txe N = 10 o1 = np.random.rand(3, N) noise = np.random.rand(3, N)*0.01 s = np.random.rand() t = np.random.rand(3, 1) R = txe.euler2mat(np.random.rand(), np.random.rand(), np.random.rand()) R = R[:3, :3] o2 = s*np.dot(R, o1-t) + noise R1 = np.zeros((N, 3, 3)) R2 = np.zeros((N, 3, 3)) for i in range(N): Ri = txe.euler2mat( np.random.rand(), np.random.rand(), np.random.rand()) R1[i, :, :] = Ri[:3, :3] R2[i, :, :] = np.dot(R, Ri[:3, :3]) Re1, te1, se1 = align_camera_poses(o1, o2, R1, R2, False) Re2, te2, se2 = align_camera_poses(o1, o2, R1, R2, True) print('scale ', s, se1, se2) print('rotation matrx ', R, Re1, Re2) print('translation ', t, te1, te2) def pgo_test_poses(): """ generates test poses and vos for the various PGO implementations :return: """ poses = np.zeros((3, 7)) for i in range(poses.shape[0]): poses[i, :3] = i angle = math.radians(10*i) R = txe.euler2mat(angle, angle, angle) q = txq.mat2quat(R) poses[i, 3:] = q vos = np.zeros((poses.shape[0]-1, 7)) for i in range(vos.shape[0]): vos[i, 0] = 1.5 vos[i, 1] = 0.5 vos[i, 2] = 1.0 R = txe.euler2mat(math.radians(15), math.radians(10), math.radians(5)) q = txq.mat2quat(R) vos[i, 3:] = q return poses, vos def pgo_test_poses1(): poses = np.zeros((3, 7)) R = txe.euler2mat(0, 0, np.deg2rad(45)) q = txq.mat2quat(R) poses[:, 3:] = q for i in range(len(poses)): poses[i, :3] = np.asarray([i, i, 0]) pt = np.zeros((len(poses), 6)) pt[:, :3] = poses[:, :3] for i, p in enumerate(poses): pt[i, 3:] = qlog(p[3:]) pt = torch.from_numpy(pt.astype(np.float32)) vost = calc_vos_safe_fc(pt.unsqueeze(0))[0].numpy() vos = np.zeros((len(vost), 7)) vos[:, :3] = vost[:, :3] for i, p in enumerate(vost): vos[i, 3:] = qexp(p[3:]) # perturbation vos[0, 0] = np.sqrt(2) - 0.5 vos[1, 0] = np.sqrt(2) - 0.5 return poses, vos def print_poses(poses): print('translations') print(poses[:, :3]) print('euler') for i in range(poses.shape[0]): a = txe.mat2euler(txq.quat2mat(poses[i, 3:])) print([np.rad2deg(aa) for aa in a]) def test_pgo(): """ Tests the full pose graph optimization implementation :return: bool """ pred_poses, vos = pgo_test_poses1() print('pred poses') print_poses(pred_poses) print('vos') print_poses(vos) pgo = PoseGraph() optimized_poses = pgo.optimize(pred_poses, vos) print('optimized') print_poses(optimized_poses) def test_pose_utils(): """ Tests the pose utils :return: """ TEST_COMPOSE = True TEST_INV = True def ra(_): return np.random.uniform(0, 2*math.pi) if TEST_COMPOSE: print('Testing pose composing...') R1 = txe.euler2mat(ra(1), ra(1), ra(1)) t1 = np.random.rand(3) R2 = txe.euler2mat(ra(1), ra(1), ra(1)) t2 = np.random.rand(3) # homogeneous matrix method R = np.dot(R1, R2) t = t1 + np.dot(R1, t2) print('From homogeneous matrices, t = ') print(t) print('R = ') print(R) # quaternion method q1 = txq.mat2quat(R1) q2 = txq.mat2quat(R2) p1 = torch.cat((torch.from_numpy(t1), torch.from_numpy(q1))) p2 = torch.cat((torch.from_numpy(t2), torch.from_numpy(q2))) p = compose_pose_quaternion( torch.unsqueeze(p1, 0), torch.unsqueeze(p2, 0)) t = p[:, :3].numpy().squeeze() q = p[:, 3:].numpy().squeeze() print('From quaternions, t = ') print(t) print('R = ') print(txe.quat2mat(q)) if TEST_INV: print('Testing pose inversion...') R = txe.euler2mat(ra(1), ra(1), ra(1)) t = np.random.rand(3) T = np.eye(4) T[:3, :3] = R T[:3, -1] = t q = txq.mat2quat(R) p = torch.cat((torch.from_numpy(t), torch.from_numpy(q))) pinv = invert_pose_quaternion(torch.unsqueeze(p, 0)) tinv, qinv = pinv[:, :3], pinv[:, 3:] Rinv = txq.quat2mat(qinv.numpy().squeeze()) Tinv = np.eye(4) Tinv[:3, :3] = Rinv Tinv[:3, -1] = tinv.numpy().squeeze() print('T * T^(-1) = ') print(np.dot(T, Tinv)) def test_q_error(): def ra(_): return np.random.uniform(0, 2*math.pi) # rotation along x axis a1 = ra(1) a2 = ra(1) q1 = txq.mat2quat(txe.euler2mat(a1, 0, 0)) q2 = txq.mat2quat(txe.euler2mat(a2, 0, 0)) a1 = np.rad2deg(a1) a2 = np.rad2deg(a2) print('Angles: {:f}, {:f}, difference = {:f}'.format(a1, a2, a1-a2)) print('Error: {:f}'.format(quaternion_angular_error(q1, q2))) def test_log_q_error(): def ra(_): return np.random.uniform(0, 2*math.pi) # rotation along x axis a1 = ra(1) a2 = ra(1) q1 = txq.mat2quat(txe.euler2mat(0, a1, 0)) q2 = txq.mat2quat(txe.euler2mat(0, a2, 0)) # apply log map q1 = np.arccos(q1[0]) * q1[1:] / np.linalg.norm(q1[1:]) q2 = np.arccos(q2[0]) * q2[1:] / np.linalg.norm(q2[1:]) a1 = np.rad2deg(a1) a2 = np.rad2deg(a2) print('Angles: {:f}, {:f}, difference = {:f}'.format(a1, a2, a1-a2)) print('Error: {:f}'.format(log_quaternion_angular_error(q1, q2))) if __name__ == '__main__': test_pgo() # test_dumb_pgo() # test_align_camera_poses() # test_q_error() # test_log_q_error() ================================================ FILE: utils/progress_bar.py ================================================ import contextlib import enum import math import time import numpy as np def progress_str(val, *args, width=20, with_ptg=True): val = max(0., min(val, 1.)) assert width > 1 pos = round(width * val) - 1 if with_ptg is True: log = '[{}%]'.format(max_point_str(val * 100.0, 4)) log += '[' for i in range(width): if i < pos: log += '=' elif i == pos: log += '>' else: log += '.' log += ']' for arg in args: log += '[{}]'.format(arg) return log def second_to_time_str(second, omit_hours_if_possible=True): second = int(second) m, s = divmod(second, 60) h, m = divmod(m, 60) if omit_hours_if_possible: if h == 0: return '{:02d}:{:02d}'.format(m, s) return '{:02d}:{:02d}:{:02d}'.format(h, m, s) def progress_bar_iter(task_list, width=20, with_ptg=True, step_time_average=50, name=None): total_step = len(task_list) step_times = [] start_time = 0.0 name = '' if name is None else f"[{name}]" for i, task in enumerate(task_list): t = time.time() yield task step_times.append(time.time() - t) start_time += step_times[-1] start_time_str = second_to_time_str(start_time) average_step_time = np.mean(step_times[-step_time_average:]) + 1e-6 speed_str = "{:.2f}it/s".format(1 / average_step_time) remain_time = (total_step - i) * average_step_time remain_time_str = second_to_time_str(remain_time) time_str = start_time_str + '>' + remain_time_str prog_str = progress_str( (i + 1) / total_step, speed_str, time_str, width=width, with_ptg=with_ptg) print(name + prog_str + ' ', end='\r', flush=True) print("") list_bar = progress_bar_iter def enumerate_bar(task_list, width=20, with_ptg=True, step_time_average=50, name=None): total_step = len(task_list) step_times = [] start_time = 0.0 name = '' if name is None else f"[{name}]" for i, task in enumerate(task_list): t = time.time() yield i, task step_times.append(time.time() - t) start_time += step_times[-1] start_time_str = second_to_time_str(start_time) average_step_time = np.mean(step_times[-step_time_average:]) + 1e-6 speed_str = "{:.2f}it/s".format(1 / average_step_time) remain_time = (total_step - i) * average_step_time remain_time_str = second_to_time_str(remain_time) time_str = start_time_str + '>' + remain_time_str prog_str = progress_str( (i + 1) / total_step, speed_str, time_str, width=width, with_ptg=with_ptg) print(name + prog_str + ' ', end='\r', flush=True) print("") def max_point_str(val, max_point): positive = bool(val >= 0.0) val = np.abs(val) if val == 0: point = 1 else: point = max(int(np.log10(val)), 0) + 1 fmt = "{:." + str(max(max_point - point, 0)) + "f}" if positive is True: return fmt.format(val) else: return fmt.format(-val) class Unit(enum.Enum): Iter = 'iter' Byte = 'byte' def convert_size(size_bytes): # from https://stackoverflow.com/questions/5194057/better-way-to-convert-file-sizes-in-python if size_bytes == 0: return "0B" size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB") i = int(math.floor(math.log(size_bytes, 1024))) p = math.pow(1024, i) s = round(size_bytes / p, 2) return s, size_name[i] class ProgressBar: def __init__(self, width=20, with_ptg=True, step_time_average=50, speed_unit=Unit.Iter): self._width = width self._with_ptg = with_ptg self._step_time_average = step_time_average self._step_times = [] self._start_time = 0.0 self._total_size = None self._speed_unit = speed_unit def start(self, total_size): self._start = True self._step_times = [] self._finished_sizes = [] self._time_elapsed = 0.0 self._current_time = time.time() self._total_size = total_size self._progress = 0 def print_bar(self, finished_size=1, pre_string=None, post_string=None): self._step_times.append(time.time() - self._current_time) self._finished_sizes.append(finished_size) self._time_elapsed += self._step_times[-1] start_time_str = second_to_time_str(self._time_elapsed) time_per_size = np.array(self._step_times[-self._step_time_average:]) time_per_size /= np.array( self._finished_sizes[-self._step_time_average:]) average_step_time = np.mean(time_per_size) + 1e-6 if self._speed_unit == Unit.Iter: speed_str = "{:.2f}it/s".format(1 / average_step_time) elif self._speed_unit == Unit.Byte: size, size_unit = convert_size(1 / average_step_time) speed_str = "{:.2f}{}/s".format(size, size_unit) else: raise ValueError("unknown speed unit") remain_time = (self._total_size - self._progress) * average_step_time remain_time_str = second_to_time_str(remain_time) time_str = start_time_str + '>' + remain_time_str prog_str = progress_str( (self._progress + 1) / self._total_size, speed_str, time_str, width=self._width, with_ptg=self._with_ptg) self._progress += finished_size if pre_string is not None: prog_str = pre_string + prog_str if post_string is not None: prog_str += post_string if self._progress >= self._total_size: print(prog_str + ' ',flush=True) else: print(prog_str + ' ', end='\r', flush=True) self._current_time = time.time() ================================================ FILE: utils/rand_utils.py ================================================ import numpy as np def truncated_normal(u, sigma, min, max, shape=None): """ Generate data following truncated normal distribution Args: u ([type]): mean sigma ([type]): var=sigma^2 min ([type]): lower bound of the truncating range max ([type]): higher bound of the truncating range shape ([type], optional): [description]. Defaults to None. """ val = min-1 while valmax: #iterative sampling until the first qualified data emerge if shape is not None: val = sigma*np.random.randn(shape)+u else: val = sigma*np.random.randn()+u assert val != min-1 return val ================================================ FILE: utils/singleton.py ================================================ # import h5py class Singleton(type): _instances = {} def __call__(cls, *args, **kwargs): if cls not in cls._instances: cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) return cls._instances[cls] ================================================ FILE: utils/timer.py ================================================ import time from contextlib import contextmanager @contextmanager def simple_timer(name=''): t = time.time() yield print(f"{name} exec time: {time.time() - t}") def singleton(class_): instances = {} def getinstance(*args, **kwargs): if class_ not in instances: instances[class_] = class_(*args, **kwargs) return instances[class_] return getinstance @singleton class timming(object): def __init__(self): super().__init__() self.items={} def start(self, item_name): if item_name not in self.items: self.items[item_name]={ "end_cnt": 0, "avg": 0, "start_t": time.time(), "is_finished": False } else: assert self.items[item_name]["is_finished"]==True self.items[item_name].update( { # "start_cnt": self.items[item_name]["start_cnt"]+1, "end_cnt": self.items[item_name]["end_cnt"], #unchanged "avg": self.items[item_name]["avg"], #unchanged "start_t": time.time(), "is_finished": False } ) def end(self, item_name): assert item_name in self.items t=time.time() interval=t-self.items[item_name]['start_t'] self.items[item_name].update( { "end_cnt": self.items[item_name]["end_cnt"]+1, "avg": (self.items[item_name]["avg"]*self.items[item_name]["end_cnt"]+interval)/(self.items[item_name]["end_cnt"]+1) , "is_finished": True } ) def summarize(self): for k in self.items: print(f"Average time of {k} = {self.items[k]['avg']*1000} ms", f"(averaged on {self.items[k]['end_cnt']} testing cycles.") ================================================ FILE: utils/util.py ================================================ import torch import numpy as np import collections def freeze_params(params: dict, include: str = None, exclude: str = None): assert isinstance(params, dict) include_re = None if include is not None: include_re = re.compile(include) exclude_re = None if exclude is not None: exclude_re = re.compile(exclude) remain_params = [] for k, p in params.items(): if include_re is not None: if include_re.match(k) is not None: continue if exclude_re is not None: if exclude_re.match(k) is None: continue remain_params.append(p) return remain_params def freeze_params_v2(params: dict, include: str = None, exclude: str = None): assert isinstance(params, dict) include_re = None if include is not None: include_re = re.compile(include) exclude_re = None if exclude is not None: exclude_re = re.compile(exclude) for k, p in params.items(): if include_re is not None: if include_re.match(k) is not None: p.requires_grad = False if exclude_re is not None: if exclude_re.match(k) is None: p.requires_grad = False def filter_param_dict(state_dict: dict, include: str = None, exclude: str = None): assert isinstance(state_dict, dict) include_re = None if include is not None: include_re = re.compile(include) exclude_re = None if exclude is not None: exclude_re = re.compile(exclude) res_dict = {} for k, p in state_dict.items(): if include_re is not None: if include_re.match(k) is None: continue if exclude_re is not None: if exclude_re.match(k) is not None: continue res_dict[k] = p return res_dict def modify_parameter_name_with_map(state_dict, parameteter_name_map=None): if parameteter_name_map is None: return state_dict for old,new in parameteter_name_map: for key in list(state_dict.keys()) : if old in key: new_key=key.replace(old, new) state_dict[new_key] = state_dict.pop(key) return state_dict def load_pretrained_model_map_func(state_dict,parameteter_name_map = None, include:str=None, exclude:str=None): state_dict = filter_param_dict(state_dict, include, exclude) state_dict = modify_parameter_name_with_map(state_dict, parameteter_name_map) def list_recursive_op(input_list, op): assert isinstance(input_list, list) for i, v in enumerate(input_list): if isinstance(v, list): input_list[i] = list_recursive_op(v, op) elif isinstance(v, dict): input_list[i] = dict_recursive_op(v, op) else: input_list[i] = op(v) return input_list def dict_recursive_op(input_dict, op): assert isinstance(input_dict, dict) for k, v in input_dict.items(): if isinstance(v, dict): input_dict[k] = dict_recursive_op(v, op) elif isinstance(v, (list,tuple) ): input_dict[k] = list_recursive_op(v, op) else: input_dict[k] = op(v) return input_dict ================================================ FILE: utils/visualize.py ================================================ import numpy as np import cv2 import copy def vis_pointclouds_cv2(pc, K, win_size, init_transform=None, color=None, img=None): ''' pc: input point cloud of shape Nx3 K: camera intrinsic of shape 3x3 win_size: visualization window size (Wx,Wy) ''' x = (K@pc.T).T #Nx3 x = x/x[:,-1:] x = x.astype(np.int32) x[:,0] = np.where((x[:,0]<0) | (x[:,0]>=win_size[1]), np.zeros_like(x[:,0]), x[:,0]) x[:,1] = np.where((x[:,1]<0) | (x[:,1]>=win_size[0]), np.zeros_like(x[:,1]), x[:,1]) if img is None: img=np.zeros(list(win_size)+[3], dtype=np.uint8) if color is None: # color = [255, 255, 0] color = [255, 255, 0] img[x[:, 1], x[:, 0]] = color img[x[pc[:,-1]<0][:,1], x[pc[:,-1]<0][:,0] ] = [255,0,0] return img def vis_2d_keypoints_cv2(img, keypoints, color=None): ''' img: input point cloud of shape HxWx3 keypoints: Nx2 , (x,y) ''' keypoints = np.around(keypoints).astype(np.int32) img=copy.copy(img) if color is None: color = [255, 255, 0] img[keypoints[:,1], keypoints[:,0]] = color return img def get_model_corners(model): min_x, max_x = np.min(model[:, 0]), np.max(model[:, 0]) min_y, max_y = np.min(model[:, 1]), np.max(model[:, 1]) min_z, max_z = np.min(model[:, 2]), np.max(model[:, 2]) corners_3d = np.array([ [min_x, min_y, min_z], [min_x, min_y, max_z], [min_x, max_y, min_z], [min_x, max_y, max_z], [max_x, min_y, min_z], [max_x, min_y, max_z], [max_x, max_y, min_z], [max_x, max_y, max_z], ]) return corners_3d def vis_pose_box(RT,K, model, background=None,fig=None, ax=None, title='', label='', x_label='x', y_label='y', color='g', dot='-'): if fig is None: if background is not None: # dpi = float(matplotlib.rcParams['figure.dpi']) dpi=100 # print(float(matplotlib.rcParams['figure.dpi'])) fig = plt.figure(figsize=[s/dpi for s in background.shape[:2]], dpi=dpi ) else: fig = plt.figure() if ax is None: ax=fig.gca() # ax.set_axis_off() corner_3d=get_model_corners(model) corner_2d = project(corner_3d, K, RT) if background is not None: ax.imshow(background) ax.add_patch(patches.Polygon( xy=corner_2d[[0, 1, 3, 2, 0, 4, 6, 2]], fill=False, linewidth=1, edgecolor=color)) ax.add_patch(patches.Polygon( xy=corner_2d[[5, 4, 6, 7, 5, 1, 3, 7]], fill=False, linewidth=1, edgecolor=color)) # line,=ax.plot(x, y, dot, color=color, linewidth=1) # line.set_label(label) # ax.set_title(title) # ax.set_xlabel(x_label) # ax.set_ylabel(y_label) # ax.axis('equal') # if label !='': # ax.legend() return fig, ax ================================================ FILE: weights/gru_update.pth ================================================ [File too large to display: 11.9 MB]