Repository: DecaYale/RNNPose
Branch: main
Commit: ff223f3cb6bf
Files: 126
Total size: 12.7 MB
Directory structure:
gitextract_p7a82okx/
├── .gitignore
├── LICENSE.md
├── README.md
├── builder/
│ ├── __init__.py
│ ├── dataset_builder.py
│ ├── input_reader_builder.py
│ ├── losses_builder.py
│ ├── lr_scheduler_builder.py
│ ├── optimizer_builder.py
│ └── rnnpose_builder.py
├── config/
│ ├── default.py
│ └── linemod/
│ ├── copy.sh
│ ├── copy_occ.sh
│ ├── template_fw0.5.yml
│ └── template_fw0.5_occ.yml
├── data/
│ ├── __init__.py
│ ├── dataset.py
│ ├── linemod/
│ │ └── linemod_config.py
│ ├── linemod_dataset.py
│ ├── preprocess.py
│ ├── transforms.py
│ └── ycb/
│ └── basic.py
├── doc/
│ └── prepare_data.md
├── docker/
│ ├── Dockerfile
│ └── freeze.yml
├── geometry/
│ ├── __init__.py
│ ├── cholesky.py
│ ├── diff_render.py
│ ├── diff_render_optim.py
│ ├── einsum.py
│ ├── intrinsics.py
│ ├── projective_ops.py
│ ├── se3.py
│ └── transformation.py
├── model/
│ ├── CFNet.py
│ ├── HybridNet.py
│ ├── PoseRefiner.py
│ ├── RNNPose.py
│ ├── descriptor2D.py
│ ├── descriptor3D.py
│ └── losses.py
├── scripts/
│ ├── compile_3rdparty.sh
│ ├── eval.sh
│ ├── eval_lmocc.sh
│ ├── run_dataformatter.sh
│ ├── run_datainfo_generation.sh
│ └── train.sh
├── thirdparty/
│ ├── kpconv/
│ │ ├── __init__.py
│ │ ├── cpp_wrappers/
│ │ │ ├── compile_wrappers.sh
│ │ │ ├── cpp_neighbors/
│ │ │ │ ├── build.bat
│ │ │ │ ├── neighbors/
│ │ │ │ │ ├── neighbors.cpp
│ │ │ │ │ └── neighbors.h
│ │ │ │ ├── setup.py
│ │ │ │ └── wrapper.cpp
│ │ │ ├── cpp_subsampling/
│ │ │ │ ├── build.bat
│ │ │ │ ├── grid_subsampling/
│ │ │ │ │ ├── grid_subsampling.cpp
│ │ │ │ │ └── grid_subsampling.h
│ │ │ │ ├── setup.py
│ │ │ │ └── wrapper.cpp
│ │ │ └── cpp_utils/
│ │ │ ├── cloud/
│ │ │ │ ├── cloud.cpp
│ │ │ │ └── cloud.h
│ │ │ └── nanoflann/
│ │ │ └── nanoflann.hpp
│ │ ├── kernels/
│ │ │ ├── dispositions/
│ │ │ │ └── k_015_center_3D.ply
│ │ │ └── kernel_points.py
│ │ ├── kpconv_blocks.py
│ │ └── lib/
│ │ ├── __init__.py
│ │ ├── ply.py
│ │ ├── timer.py
│ │ └── utils.py
│ ├── nn/
│ │ ├── _ext.c
│ │ ├── nn_utils.py
│ │ ├── setup.py
│ │ └── src/
│ │ ├── ext.h
│ │ └── nearest_neighborhood.cu
│ ├── raft/
│ │ ├── corr.py
│ │ ├── extractor.py
│ │ ├── update.py
│ │ └── utils/
│ │ ├── __init__.py
│ │ ├── augmentor.py
│ │ ├── flow_viz.py
│ │ ├── frame_utils.py
│ │ └── utils.py
│ └── vsd/
│ └── inout.py
├── tools/
│ ├── eval.py
│ ├── generate_data_info_deepim_0_orig.py
│ ├── generate_data_info_deepim_1_syn.py
│ ├── generate_data_info_deepim_2_posecnnval.py
│ ├── generate_data_info_v2_deepim.py
│ ├── train.py
│ └── transform_data_format.py
├── torchplus/
│ ├── __init__.py
│ ├── metrics.py
│ ├── nn/
│ │ ├── __init__.py
│ │ ├── functional.py
│ │ └── modules/
│ │ ├── __init__.py
│ │ ├── common.py
│ │ └── normalization.py
│ ├── ops/
│ │ ├── __init__.py
│ │ └── array_ops.py
│ ├── tools.py
│ └── train/
│ ├── __init__.py
│ ├── checkpoint.py
│ ├── common.py
│ ├── fastai_optim.py
│ ├── learning_schedules.py
│ ├── learning_schedules_fastai.py
│ └── optim.py
├── utils/
│ ├── __init__.py
│ ├── config_io.py
│ ├── distributed_utils.py
│ ├── eval_metric.py
│ ├── furthest_point_sample.py
│ ├── geometric.py
│ ├── img_utils.py
│ ├── log_tool.py
│ ├── pose_utils.py
│ ├── pose_utils_np.py
│ ├── progress_bar.py
│ ├── rand_utils.py
│ ├── singleton.py
│ ├── timer.py
│ ├── util.py
│ └── visualize.py
└── weights/
├── gru_update.pth
├── img_fea_enc.pth
└── superpoint_v1.pth
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
**/*.old
**/*.bak
.DS_Store
# Created by https://www.toptal.com/developers/gitignore/api/vscode,python,jupyternotebooks
# Edit at https://www.toptal.com/developers/gitignore?templates=vscode,python,jupyternotebooks
### JupyterNotebooks ###
# gitignore template for Jupyter Notebooks
# website: http://jupyter.org/
.ipynb_checkpoints
*/.ipynb_checkpoints/*
# IPython
profile_default/
ipython_config.py
# Remove previous ipynb_checkpoints
# git rm -r .ipynb_checkpoints/
### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
#lib/
#lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
pytestdebug.log
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
doc/_build/
# PyBuilder
target/
# Jupyter Notebook
# IPython
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
pythonenv*
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# profiling data
.prof
### vscode ###
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
*.code-workspace
# End of https://www.toptal.com/developers/gitignore/api/vscode,python,jupyternotebooks
================================================
FILE: LICENSE.md
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: README.md
================================================
# RNNPose: Recurrent 6-DoF Object Pose Refinement with Robust Correspondence Field Estimation and Pose Optimization
[Yan Xu](https://decayale.github.io/), [Kwan-Yee Lin](https://kwanyeelin.github.io/), [Guofeng Zhang](http://www.cad.zju.edu.cn/home/gfzhang/), [Xiaogang Wang](https://www.ee.cuhk.edu.hk/en-gb/people/academic-staff/professors/prof-xiaogang-wang), [Hongsheng Li](http://www.ee.cuhk.edu.hk/~hsli/).
*Conference on Computer Vision and Pattern Recognition (CVPR), 2022.*
[[Paper]](https://scholar.google.com/scholar?hl=zh-CN&as_sdt=0%2C5&q=RNNPose%3A+Recurrent+6-DoF+Object+Pose+Refinement+with+Robust+Correspondence+Field+Estimation+and+Pose+Optimization&btnG=)
## 1. Framework
The basic pipeline of our proposed RNNPose. (a) Before refinement, a reference image is rendered according to the object initial pose (shown in a fused view).
(b) Our RNN-based framework recurrently refines the object pose based on the estimated correspondence field between the reference and target images. The pose is optimized to be consistent with the reliable correspondence estimations highlighted by the similarity score map (built from learned 3D-2D descriptors) via differentiable LM optimization. (c) The output refined pose.
## 2. Pose Estimation with Occlusions and Erroneous Pose Initializations
### Estimated Poses and Intermediate System Outputs from Different Recurrent Iterations.

### Pose Estimates with Erroneous Pose Initializations
Visualization of our pose estimations (first row) on Occlusion LINEMOD dataset and the similarity score maps (second row) for downweighting unreliable correspondences during pose optimization.
For pose visualization, the white boxes represent the erroneous initial poses, the red boxes are estimated by our algorithm and the ground-truth boxes are in blue. Here, the initial poses for pose refinement are originally from PVNet but added with significant disturbances for robustness testing.
## 3. Installation
### Install the Docker
A dockerfile is provided to help with the environment setup.
You need to install [docker](https://docs.docker.com/get-docker/) and [nvidia-docker2](https://github.com/NVIDIA/nvidia-docker) first and then set up the docker image and start up a container with the following commands:
```
cd RNNPose/docker
sudo docker build -t rnnpose .
sudo docker run -it --runtime=nvidia --ipc=host --volume="HOST_VOLUME_YOU_WANT_TO_MAP:DOCKER_VOLUME" -e DISPLAY=$DISPLAY -e QT_X11_NO_MITSHM=1 rnnpose bash
```
If you are not familiar with [docker](https://docs.docker.com/get-docker/), you could also install the dependencies manually following the provided dockerfile.
### Compile the Dependencies
```
cd RNNPose/scripts
bash compile_3rdparty.sh
```
## 4. Data Preparation
We follow [DeepIM](https://github.com/liyi14/mx-DeepIM) and [PVNet](https://github.com/zju3dv/pvnet-rendering) to preprocess the training data for LINEMOD.
You could follow the steps [here](doc/prepare_data.md) for data preparation.
## 5. Test with the Pretrained Models
We train our model with the mixture of the real data and the synthetic data on LINEMOD dataset.
The trained models on the LINEMOD dataset have been uploaded to the [OneDrive](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155139432_link_cuhk_edu_hk/ESPTVyUryHdGl65fRAxN51gBBayJJb9NpCqWA-tY2CFKJQ?e=R9bcLW).
You can download them
and put them into the directory *weight/* for testing.
An example bash script is provided below for reference.
```
export PYTHONPATH="$PROJECT_ROOT_PATH:$PYTHONPATH"
export PYTHONPATH="$PROJECT_ROOT_PATH/thirdparty:$PYTHONPATH"
seq=cat
gpu=1
start_gpu_id=0
mkdir $model_dir
train_file=/home/yxu/Projects/Works/RNNPose_release/tools/eval.py
config_path=/mnt/workspace/Works/RNNPose_release/config/linemod/"$seq"_fw0.5.yml
pretrain=$PROJECT_ROOT_PATH/weights/trained_models/"$seq".tckpt
python -u $train_file multi_proc_train \
--config_path $config_path \
--model_dir $model_dir/results \
--use_dist True \
--dist_port 10000 \
--gpus_per_node $gpu \
--optim_eval True \
--use_apex True \
--world_size $gpu \
--start_gpu_id $start_gpu_id \
--pretrained_path $pretrain
```
Note that you need to specify the PROJECT_ROOT_PATH, i.e. the absolute directory of the project folder *RNNPose* and modify the respective data paths in the configuration files to the locations of downloaded data before executing the commands. You could also refer to the commands below for evaluation with our provide scripts.
### Evaluation on LINEMOD
```
bash scripts/eval.sh
```
### Evaluation on LINEMOD OCCLUSION
```
bash scripts/eval_lmocc.sh
```
## Training from Scratch
An example training script is provided.
```
bash scripts/train.sh
```
## 6. Citation
If you find our code useful, please cite our paper.
```
@inproceedings{xu2022rnnpose,
title={RNNPose: Recurrent 6-DoF Object Pose Refinement with Robust Correspondence Field Estimation and Pose Optimization},
author={Xu, Yan and Kwan-Yee Lin and Zhang, Guofeng and Wang, Xiaogang and Li, Hongsheng},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
year={2022}
}
@article{xu2024rnnpose,
title={Rnnpose: 6-dof object pose estimation via recurrent correspondence field estimation and pose optimization},
author={Xu, Yan and Lin, Kwan-Yee and Zhang, Guofeng and Wang, Xiaogang and Li, Hongsheng},
journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
year={2024},
publisher={IEEE}
```
## 7. Acknowledgement
The skeleton of this code is borrowed from [RSLO](https://github.com/DecaYale/RSLO). We also would like to thank the public codebases [PVNet](https://github.com/zju3dv/pvnet), [RAFT](https://github.com/princeton-vl/RAFT), [SuperGlue](https://github.com/magicleap/SuperGluePretrainedNetwork) and [DeepV2D](https://github.com/princeton-vl/DeepV2D).
================================================
FILE: builder/__init__.py
================================================
================================================
FILE: builder/dataset_builder.py
================================================
from data.dataset import get_dataset_class
import numpy as np
from functools import partial
from data.preprocess import preprocess, preprocess_deepim, patch_crop
def build(input_reader_config,
training,
):
prep_cfg = input_reader_config.preprocess
dataset_cfg = input_reader_config.dataset
cfg = input_reader_config
dataset_cls = get_dataset_class(dataset_cfg.dataset_class_name)
if 0:# 'DeepIM' in dataset_cfg.dataset_class_name:
# patch_cropper = partial(patch_crop, margin_ratio=0.2, output_size=256 )
patch_cropper = None
prep_func = partial(preprocess_deepim,
max_points=dataset_cfg.max_points,
correspondence_radius=prep_cfg.correspondence_radius_threshold,
patch_cropper=patch_cropper,
image_scale=prep_cfg.get('image_scale', 1),
)
else:
prep_func = partial(preprocess,
max_points=dataset_cfg.max_points,
correspondence_radius=prep_cfg.correspondence_radius_threshold,
image_scale=prep_cfg.get('image_scale', 1),
crop_param=prep_cfg.get('crop_param', None),
kp_3d_param=prep_cfg.get('kp_3d_param', {"kp_num":30} ),
use_coords_as_3d_feat=prep_cfg.get('use_coords_as_3d_feat', False)
)
dataset = dataset_cls(
info_path=dataset_cfg.info_path,
root_path=dataset_cfg.root_path,
model_point_dim=dataset_cfg.model_point_dim,
is_train=training,
prep_func=prep_func,
seq_names=dataset_cfg.get('seq_names', None),
cfg=dataset_cfg
)
return dataset
================================================
FILE: builder/input_reader_builder.py
================================================
from torch.utils.data import Dataset
from builder import dataset_builder
class DatasetWrapper(Dataset):
""" convert our dataset to Dataset class in pytorch.
"""
def __init__(self, dataset):
self._dataset = dataset
def __len__(self):
return len(self._dataset)
def __getitem__(self, idx):
return self._dataset[idx]
@property
def dataset(self):
return self._dataset
def build(input_reader_config,
training,
) -> DatasetWrapper:
dataset = dataset_builder.build(
input_reader_config,
training,
)
dataset = DatasetWrapper(dataset)
return dataset
================================================
FILE: builder/losses_builder.py
================================================
from model import losses
def build(loss_config):
criterions = {}
criterions["metric_loss"] =losses.MetricLoss(configs=loss_config.metric_loss,)
criterions["pose_loss"] = losses.PointAlignmentLoss(loss_weight=1)
return criterions
================================================
FILE: builder/lr_scheduler_builder.py
================================================
from torchplus.train import learning_schedules_fastai as lsf
import torch
import numpy as np
def build(optimizer_config, optimizer, total_step):
optimizer_type = list(optimizer_config.keys())[0]
if optimizer_type == 'rms_prop_optimizer':
config = optimizer_config.rms_prop_optimizer
lr_scheduler = _create_learning_rate_scheduler(
config.learning_rate, optimizer, total_step=total_step)
if optimizer_type == 'momentum_optimizer':
config = optimizer_config.momentum_optimizer
lr_scheduler = _create_learning_rate_scheduler(
config.learning_rate, optimizer, total_step=total_step)
if optimizer_type == 'adam_optimizer':
config = optimizer_config.adam_optimizer
lr_scheduler = _create_learning_rate_scheduler(
config.learning_rate, optimizer, total_step=total_step)
return lr_scheduler
def _create_learning_rate_scheduler(learning_rate_config, optimizer, total_step):
"""Create optimizer learning rate scheduler based on config.
Args:
learning_rate_config: A LearningRate proto message.
Returns:
A learning rate.
Raises:
ValueError: when using an unsupported input data type.
"""
lr_scheduler = None
# learning_rate_type = learning_rate_config.WhichOneof('learning_rate')
learning_rate_type = list(learning_rate_config.keys())[0]
if learning_rate_type == 'multi_phase':
config = learning_rate_config.multi_phase
lr_phases = []
mom_phases = []
for phase_cfg in config.phases:
lr_phases.append((phase_cfg.start, phase_cfg.lambda_func))
mom_phases.append(
(phase_cfg.start, phase_cfg.momentum_lambda_func))
lr_scheduler = lsf.LRSchedulerStep(
optimizer, total_step, lr_phases, mom_phases)
if learning_rate_type == 'one_cycle':
config = learning_rate_config.one_cycle
if len(config.lr_maxs)>1:
assert(len(config.lr_maxs)==4 )
lr_max=[]
# for i in range(len(config.lr_maxs)):
# lr_max += [config.lr_maxs[i]]*optimizer.param_segs[i]
lr_max = np.array(list(config.lr_maxs) )
else:
lr_max = config.lr_max
lr_scheduler = lsf.OneCycle(
optimizer, total_step, lr_max, list(config.moms), config.div_factor, config.pct_start)
if learning_rate_type == 'exponential_decay':
config = learning_rate_config.exponential_decay
lr_scheduler = lsf.ExponentialDecay(
optimizer, total_step, config.initial_learning_rate, config.decay_length, config.decay_factor, config.staircase)
if learning_rate_type == 'exponential_decay_warmup':
config = learning_rate_config.exponential_decay_warmup
lr_scheduler = lsf.ExponentialDecayWarmup(
optimizer, total_step, config.initial_learning_rate, config.decay_length, config.decay_factor, config.div_factor,
config.pct_start, config.staircase)
if learning_rate_type == 'manual_stepping':
config = learning_rate_config.manual_stepping
lr_scheduler = lsf.ManualStepping(
optimizer, total_step, list(config.boundaries), list(config.rates))
if lr_scheduler is None:
raise ValueError('Learning_rate %s not supported.' %
learning_rate_type)
return lr_scheduler
================================================
FILE: builder/optimizer_builder.py
================================================
from torchplus.train import learning_schedules
from torchplus.train import optim
import torch
from torch import nn
from torchplus.train.fastai_optim import OptimWrapper, FastAIMixedOptim
from functools import partial
def children(m: nn.Module):
"Get children of `m`."
return list(m.children())
def num_children(m: nn.Module) -> int:
"Get number of children modules in `m`."
return len(children(m))
# return a list of smallest modules dy
def flatten_model(m):
if m is None:
return []
return sum(
map(flatten_model, m.children()), []) if num_children(m) else [m]
# def get_layer_groups(m): return [nn.Sequential(*flatten_model(m))]
def get_layer_groups(m): return [nn.ModuleList(flatten_model(m))]
def get_voxeLO_net_layer_groups(net):
vfe_grp = get_layer_groups(net)#[0]
other_grp = get_layer_groups(nn.Sequential(net._rotation_loss,
net._translation_loss,
net._pyramid_rotation_loss,
net._pyramid_translation_loss,
net._consistency_loss,
))
return [vfe_grp, mfe_grp, op_grp,other_grp]
def get_voxeLO_net_layer_groups(net):
vfe_grp = get_layer_groups(net.voxel_feature_extractor)#[0]
mfe_grp = get_layer_groups(net.middle_feature_extractor)#[0]
op_grp = get_layer_groups(net.odom_predictor)#[0]
# other_grp = get_layer_groups(net._rotation_loss) + \
# get_layer_groups(net._translation_loss) \
# + get_layer_groups(net._pyramid_rotation_loss) \
# + get_layer_groups(net._pyramid_translation_loss) \
# + get_layer_groups(net._consistency_loss)\
other_grp = get_layer_groups(nn.Sequential(net._rotation_loss,
net._translation_loss,
net._pyramid_rotation_loss,
net._pyramid_translation_loss,
net._consistency_loss,
))
return [vfe_grp, mfe_grp, op_grp,other_grp]
def build(optimizer_config, net, name=None, mixed=False, loss_scale=512.0):
optimizer_type = list(optimizer_config.keys())[0]
print("Optimizer:", optimizer_type)
optimizer=None
if optimizer_type == 'rms_prop_optimizer':
config=optimizer_config.rms_prop_optimizer
optimizer_func=partial(
torch.optim.RMSprop,
alpha=config.decay,
momentum=config.momentum_optimizer_value,
eps=config.epsilon)
if optimizer_type == 'momentum_optimizer':
config=optimizer_config.momentum_optimizer
optimizer_func=partial(
torch.optim.SGD,
momentum=config.momentum_optimizer_value,
eps=config.epsilon)
if optimizer_type == 'adam_optimizer':
config=optimizer_config.adam_optimizer
if optimizer_config.fixed_weight_decay:
optimizer_func=partial(
torch.optim.Adam, betas=(0.9, 0.99), amsgrad=config.amsgrad)
else:
# regular adam
optimizer_func=partial(
torch.optim.Adam, amsgrad=config.amsgrad)
optimizer=OptimWrapper.create(
optimizer_func,
3e-3,
get_layer_groups(net),
# get_voxeLO_net_layer_groups(net),
wd=config.weight_decay,
true_wd=optimizer_config.fixed_weight_decay,
bn_wd=True)
print(hasattr(optimizer, "_amp_stash"), '_amp_stash')
if optimizer is None:
raise ValueError('Optimizer %s not supported.' % optimizer_type)
if optimizer_config.use_moving_average:
raise ValueError('torch don\'t support moving average')
if name is None:
# assign a name to optimizer for checkpoint system
optimizer.name=optimizer_type
else:
optimizer.name=name
return optimizer
================================================
FILE: builder/rnnpose_builder.py
================================================
from builder import losses_builder
from model.RNNPose import get_posenet_class
import model.RNNPose
def build(model_cfg,
measure_time=False, testing=False):
"""build second pytorch instance.
"""
criterions=losses_builder.build(model_cfg.loss)
net = get_posenet_class(model_cfg.network_class_name)(
criterions=criterions,
opt=model_cfg)
return net
================================================
FILE: config/default.py
================================================
from yacs.config import CfgNode as CN
from utils.singleton import Singleton
import os
def _merge_a_into_b(a, b):
"""Merge config dictionary a into config dictionary b, clobbering the
options in b whenever they are also specified in a.
"""
# if type(a) is not dict:
if not isinstance(a, (dict)):
return
for k, v in a.items():
# a must specify keys that are in b
if not k in b:
raise KeyError('{} is not a valid config key'.format(k))
# the types must match, too
old_type = type(b[k])
if old_type is not type(v):
if isinstance(b[k], np.ndarray):
v = np.array(v, dtype=b[k].dtype)
else:
raise ValueError(('Type mismatch ({} vs. {}) '
'for config key: {}').format(type(b[k]),
type(v), k))
# recursively merge dicts
# if type(v) is dict:
if isinstance(v, (dict)):
try:
_merge_a_into_b(a[k], b[k])
except:
print('Error under config key: {}'.format(k))
raise
else:
b[k] = v
class Config(metaclass=Singleton):
def __init__(self):
############## ↓ Basic ↓ ##############
self.ROOT_CN = CN()
self.ROOT_CN.BASIC = CN()
self.ROOT_CN.BASIC.input_size=[480,640] #h,w
self.ROOT_CN.BASIC.crop_size=[320,320] #h,w
self.ROOT_CN.BASIC.zoom_crop_size=[320,320] #h,w
self.ROOT_CN.BASIC.render_image_size=[320,320]#h,w
self.ROOT_CN.BASIC.patch_num=64#h,w
############## ↓ LM OPTIM ↓ ##############
self.ROOT_CN.LM=CN()
self.ROOT_CN.LM.LM_LMBDA= 0.0001
self.ROOT_CN.LM.EP_LMBDA=100
############## ↓ data ↓ ##############
self.ROOT_CN.DATA=CN()
self.ROOT_CN.DATA.OBJ_ROOT="" #h,w
self.ROOT_CN.DATA.VOC_ROOT=f"{os.path.dirname(os.path.abspath(__file__)) }/../EXPDATA/" #h,w
def __get_item__(self, key):
return self.ROOT_CN.__getitem__(key)
def merge(self, config_dict, sub_key=None):
if sub_key is not None:
_merge_a_into_b(config_dict, self.ROOT_CN[sub_key])
else:
_merge_a_into_b(config_dict, self.ROOT_CN)
############## ↓ Model ↓ ##############
# _CN.model = CN()
# _CN.model.input_size=[480,640]
# _CN.model.crop_size=[320,320]
# def get_cfg_defaults():
def get_cfg(Node=None):
"""Get a yacs CfgNode object with default values for my_project."""
# Return a clone so that the defaults will not be altered
# This is for the "local variable" use pattern
# return _CN.clone()
if Node is None:
return Config()
else:
return Config().__get_item__(Node)
================================================
FILE: config/linemod/copy.sh
================================================
declare -a arr=("glue" "ape" "cat" "phone" "eggbox" "benchvise" "lamp" "camera" "can" "driller" "duck" "holepuncher" "iron" )
#create training scripts
for seq in "${arr[@]}"
do
echo "$seq"
cat template_fw0.5.yml > "$seq"_fw0.5.yml
sed -i "s/SEQ_NAME/$seq/g" "$seq"_fw0.5.yml
done
arraylength=${#arr[@]}
================================================
FILE: config/linemod/copy_occ.sh
================================================
declare -a arr=("glue" "ape" "cat" "phone" "eggbox" "benchvise" "lamp" "camera" "can" "driller" "duck" "holepuncher" "iron" )
#create training scripts
for seq in "${arr[@]}"
do
echo "$seq"
cat template_fw0.5_occ.yml > "$seq"_fw0.5_occ.yml
sed -i "s/SEQ_NAME/$seq/g" "$seq"_fw0.5_occ.yml
done
================================================
FILE: config/linemod/template_fw0.5.yml
================================================
vars:
input_h: &input_h
320
input_w: &input_w
320
batch_size: &batch_size
1
descriptor_dim: &descriptor_dim
32
correspondence_radius_threshold: &correspondence_radius_threshold
0.01 #0.04
seq_name: &seq_name
["SEQ_NAME"]
BASIC:
zoom_crop_size: [240,240]
model:
input_h: *input_h
input_w: *input_w
batch_size: *batch_size
seq_len: 2
network_class_name: RNNPose
descriptor_net:
module_class_name: HybridFeaNet
keypoints_detector_2d:
input_dim: 3
descriptor_dim: *descriptor_dim
remove_borders: 4
normalize_output: True
keypoints_detector_3d:
#KPCONV configurations
num_layers: 4
KP_extent: 2.0
batch_norm_momentum: 0.02
use_batch_norm: true
in_points_dim: 3
fixed_kernel_points: 'center' #['center', 'verticals', 'none']
KP_influence: 'linear'
aggregation_mode: 'sum' #['closest', 'sum']
modulated: false
first_subsampling_dl: 0.025
conv_radius: 2.5
deform_radius: 5
in_features_dim: 1 #3
first_feats_dim: 128
num_kernel_points: 15
final_feats_dim: *descriptor_dim #256 #32
normalize_output: True
gnn_feats_dim: 128 #256
context_fea_extractor_3d:
#KPCONV configurations
num_layers: 4
KP_extent: 2.0
batch_norm_momentum: 0.02
use_batch_norm: true
in_points_dim: 3
fixed_kernel_points: 'center' #['center', 'verticals', 'none']
KP_influence: 'linear'
aggregation_mode: 'sum' #['closest', 'sum']
modulated: false
first_subsampling_dl: 0.025
conv_radius: 2.5
deform_radius: 5
in_features_dim: 1 #3
first_feats_dim: 128
num_kernel_points: 15
final_feats_dim: 256 #*descriptor_dim #256 #32
normalize_output: False
gnn_feats_dim: 128 #256
motion_net:
IS_CALIBRATED: True
RESCALE_IMAGES: False
ITER_COUNT: 4
RENDER_ITER_COUNT: 3 #2 #1 #3
TRAIN_RESIDUAL_WEIGHT: 0 #0.1
TRAIN_FLOW_WEIGHT: 0.5 #0.1 #1
TRAIN_REPROJ_WEIGHT: 0
OPTIM_ITER_COUNT: 1
FLOW_NET: 'raft'
SYN_OBSERVED: False
ONLINE_CROP: True
raft:
small: False #True
fea_net: "default"
mixed_precision: True
# pretrained_model: "/mnt/workspace/datasets/weights/models/raft-small.pth"
pretrained_model: "/mnt/workspace/datasets/weights/models/raft-chairs.pth"
input_dim: 3
iters: 1
loss:
metric_loss:
type: "normal"
pos_radius: *correspondence_radius_threshold # the radius used to find the positive correspondences
safe_radius: 0.02 #0.13
pos_margin: 0.1
neg_margin: 1.4
max_points: 256
matchability_radius: 0.06
weight: 0.001
saliency_loss:
loss_weight: 1
reg_weight: 0.01
geometric_loss:
loss_weight: 1
reg_weight: 0.5 #0.1
train_config:
optimizer:
adam_optimizer:
learning_rate:
one_cycle:
lr_maxs: []
lr_max: 0.0001 #
moms: [0.95, 0.85]
div_factor: 10.0
pct_start: 0.01 #0.05
amsgrad: false
weight_decay: 0.0001
fixed_weight_decay: true
use_moving_average: false
steps: 200000
steps_per_eval: 10000
loss_scale_factor: -1
clear_metrics_every_epoch: true
train_input_reader:
dataset:
dataset_class_name: "LinemodDeepIMSynRealV2"
info_path: ["/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/data_info/deepim/linemod_orig_deepim.info.train", "/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/data_info/deepim/linemod_syn_deepim.info.train",
"/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/data_info/linemod_fusesformatted_all10k_deepim.info.train",
]
root_path: ["/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/LM6d_converted/LM6d_refine", "/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/LM6d_converted/LM6d_refine_syn",
"/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/LINEMOD/fuse_formatted/"
]
model_point_dim: 3
max_points: 20000
seq_names: *seq_name
batch_size: *batch_size
preprocess:
correspondence_radius_threshold: *correspondence_radius_threshold
num_workers: 3
image_scale: 1
crop_param:
rand_crop: false
margin_ratio: 0.85
output_size: *input_h
crop_with_init_pose: True
eval_input_reader:
dataset:
dataset_class_name: "LinemodDeepIMSynRealV2"
info_path: ["/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/data_info/linemod_posecnn.info.eval" ]
root_path: ["/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/LM6d_converted/LM6d_refine" ]
model_point_dim: 3
max_points: 20000
seq_names: *seq_name
batch_size: *batch_size
preprocess:
correspondence_radius_threshold: *correspondence_radius_threshold
num_workers: 3
image_scale: 1
crop_param:
rand_crop: false
margin_ratio: 0.85 #0.5
output_size: *input_h #
crop_with_init_pose: True
================================================
FILE: config/linemod/template_fw0.5_occ.yml
================================================
vars:
input_h: &input_h
320
input_w: &input_w
320
batch_size: &batch_size
1
descriptor_dim: &descriptor_dim
32
correspondence_radius_threshold: &correspondence_radius_threshold
0.01 #0.04
seq_name: &seq_name
["SEQ_NAME"]
BASIC:
zoom_crop_size: [240,240]
model:
input_h: *input_h
input_w: *input_w
batch_size: *batch_size
seq_len: 2
network_class_name: RNNPose
descriptor_net:
module_class_name: HybridFeaNet
keypoints_detector_2d:
input_dim: 3
descriptor_dim: *descriptor_dim
remove_borders: 4
normalize_output: True
keypoints_detector_3d:
#KPCONV configurations
num_layers: 4
KP_extent: 2.0
batch_norm_momentum: 0.02
use_batch_norm: true
in_points_dim: 3
fixed_kernel_points: 'center' #['center', 'verticals', 'none']
KP_influence: 'linear'
aggregation_mode: 'sum' #['closest', 'sum']
modulated: false
first_subsampling_dl: 0.025
conv_radius: 2.5
deform_radius: 5
in_features_dim: 1 #3
first_feats_dim: 128
num_kernel_points: 15
final_feats_dim: *descriptor_dim #256 #32
normalize_output: True
gnn_feats_dim: 128 #256
context_fea_extractor_3d:
#KPCONV configurations
num_layers: 4
KP_extent: 2.0
batch_norm_momentum: 0.02
use_batch_norm: true
in_points_dim: 3
fixed_kernel_points: 'center' #['center', 'verticals', 'none']
KP_influence: 'linear'
aggregation_mode: 'sum' #['closest', 'sum']
modulated: false
first_subsampling_dl: 0.025
conv_radius: 2.5
deform_radius: 5
in_features_dim: 1 #3
first_feats_dim: 128
num_kernel_points: 15
final_feats_dim: 256 #*descriptor_dim #256 #32
normalize_output: False
gnn_feats_dim: 128 #256
motion_net:
IS_CALIBRATED: True
RESCALE_IMAGES: False
ITER_COUNT: 4
RENDER_ITER_COUNT: 3 #2 #1 #3
TRAIN_RESIDUAL_WEIGHT: 0 #0.1
TRAIN_FLOW_WEIGHT: 0.5 #0.1 #1
TRAIN_REPROJ_WEIGHT: 0
OPTIM_ITER_COUNT: 1
FLOW_NET: 'raft'
SYN_OBSERVED: False
ONLINE_CROP: True
raft:
small: False #True
fea_net: "default"
mixed_precision: True
# pretrained_model: "/mnt/workspace/datasets/weights/models/raft-small.pth"
pretrained_model: "/mnt/workspace/datasets/weights/models/raft-chairs.pth"
input_dim: 3
iters: 1
loss:
metric_loss:
type: "normal"
pos_radius: *correspondence_radius_threshold # the radius used to find the positive correspondences
safe_radius: 0.02 #0.13
pos_margin: 0.1
neg_margin: 1.4
max_points: 256
matchability_radius: 0.06
weight: 0.001
saliency_loss:
loss_weight: 1
reg_weight: 0.01
geometric_loss:
loss_weight: 1
reg_weight: 0.5 #0.1
train_config:
optimizer:
adam_optimizer:
learning_rate:
one_cycle:
lr_maxs: []
lr_max: 0.0001 #
moms: [0.95, 0.85]
div_factor: 10.0
pct_start: 0.01 #0.05
amsgrad: false
weight_decay: 0.0001
fixed_weight_decay: true
use_moving_average: false
steps: 200000
steps_per_eval: 10000
loss_scale_factor: -1
clear_metrics_every_epoch: true
train_input_reader:
dataset:
dataset_class_name: "LinemodDeepIMSynRealV2"
info_path: ["/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/data_info/deepim/linemod_orig_deepim.info.train", "/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/data_info/deepim/linemod_syn_deepim.info.train",
"/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/data_info/linemod_fusesformatted_all10k_deepim.info.train",
]
root_path: ["/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/LM6d_converted/LM6d_refine", "/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/LM6d_converted/LM6d_refine_syn",
"/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/LINEMOD/fuse_formatted/"
]
model_point_dim: 3
max_points: 20000
seq_names: *seq_name
batch_size: *batch_size
preprocess:
correspondence_radius_threshold: *correspondence_radius_threshold
num_workers: 3
image_scale: 1
crop_param:
rand_crop: false
margin_ratio: 0.85
output_size: *input_h
crop_with_init_pose: True
eval_input_reader:
dataset:
dataset_class_name: "LinemodDeepIMSynRealV2"
info_path: ["/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/data_info/deepim/linemod_bop_lmocc_pvnetdr.info.eval"]
root_path: ["/home/RNNPose/Projects/Works/RNNPose_release/EXPDATA/lmo"]
init_post_type: "PVNET_LINEMOD_OCC"
model_point_dim: 3
max_points: 20000
seq_names: *seq_name
batch_size: *batch_size
preprocess:
correspondence_radius_threshold: *correspondence_radius_threshold
num_workers: 3
image_scale: 1
crop_param:
rand_crop: false
margin_ratio: 0.85 #0.5
output_size: *input_h #
crop_with_init_pose: True
================================================
FILE: data/__init__.py
================================================
from . import dataset
from . import linemod_dataset
================================================
FILE: data/dataset.py
================================================
import pathlib
import pickle
import time
from functools import partial
import numpy as np
REGISTERED_DATASET_CLASSES = {}
def register_dataset(cls, name=None):
global REGISTERED_DATASET_CLASSES
if name is None:
name = cls.__name__
assert name not in REGISTERED_DATASET_CLASSES, f"exist class: {REGISTERED_DATASET_CLASSES}"
REGISTERED_DATASET_CLASSES[name] = cls
return cls
def get_dataset_class(name):
global REGISTERED_DATASET_CLASSES
assert name in REGISTERED_DATASET_CLASSES, f"available class: {REGISTERED_DATASET_CLASSES}"
return REGISTERED_DATASET_CLASSES[name]
class Dataset(object):
NumPointFeatures = -1
def __getitem__(self, index):
raise NotImplementedError
def __len__(self):
raise NotImplementedError
def _read_data(self, query):
raise NotImplementedError
def evaluation(self, dt_annos, output_dir):
"""Dataset must provide a evaluation function to evaluate model."""
raise NotImplementedError
================================================
FILE: data/linemod/linemod_config.py
================================================
import numpy as np
diameters = {
'cat': 15.2633,
'ape': 9.74298,
'benchvise': 28.6908,
'bowl': 17.1185,
'cam': 17.1593,
'camera': 17.1593,
'can': 19.3416,
'cup': 12.5961,
'driller': 25.9425,
'duck': 10.7131,
'eggbox': 17.6364,
'glue': 16.4857,
'holepuncher': 14.8204,
'iron': 30.3153,
'lamp': 28.5155,
'phone': 20.8394
}
linemod_cls_names = ['ape', 'cam', 'cat', 'duck', 'glue', 'iron', 'phone', 'benchvise', 'can', 'driller', 'eggbox', 'holepuncher', 'lamp']
linemod_K = np.array([[572.4114, 0., 325.2611],
[0., 573.57043, 242.04899],
[0., 0., 1.]])
blender_K = np.array([[700., 0., 320.],
[0., 700., 240.],
[0., 0., 1.]])
================================================
FILE: data/linemod_dataset.py
================================================
import numpy as np
import random
import os
from data.dataset import Dataset, register_dataset
import pickle
import PIL
import cv2
import torch
import time
import scipy
from utils.geometric import range_to_depth, render_pointcloud
from .transforms import make_transforms
from thirdparty.kpconv.lib.utils import square_distance
from utils.geometric import rotation_angle
# from utils.visualize import *
from transforms3d.quaternions import mat2quat, quat2mat, qmult
from transforms3d.euler import mat2euler, euler2mat, euler2quat, quat2euler
import math
from config.default import get_cfg
CURRENT_DIR=os.path.dirname(os.path.abspath(__file__))
try:
from pytorch3d.io import load_obj, load_ply
except:
print("Warning: error occurs when importing pytorch3d ")
pass
def se3_q2m(se3_q):
assert se3_q.size == 7
se3_mx = np.zeros((3, 4))
# quat = se3_q[0:4] / LA.norm(se3_q[0:4])
quat = se3_q[:4]
R = quat2mat(quat)
se3_mx[:, :3] = R
se3_mx[:, 3] = se3_q[4:]
return se3_mx
def info_convertor(info,):
"""
[Transform the original kitti info file]
"""
seqs = info.keys() #['cat']#
seq_lengths = [len(info[i]) for i in seqs]
data = []
for seq in seqs:
print(seq)
data.append(info[seq])
new_infos = {
"seqs": list(seqs),
"seq_lengths": seq_lengths,
"data": data
}
return new_infos
def resize(im, target_size, max_size, stride=0, interpolation=cv2.INTER_LINEAR):
"""
only resize input image to target size and return scale
:param im: BGR image input by opencv
:param target_size: one dimensional size (the short side)
:param max_size: one dimensional max size (the long side)
:param stride: if given, pad the image to designated stride
:param interpolation: if given, using given interpolation method to resize image
:return:
"""
im_shape = im.shape
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
im_scale = float(target_size) / float(im_size_min)
# prevent bigger axis from being more than max_size:
if np.round(im_scale * im_size_max) > max_size:
im_scale = float(max_size) / float(im_size_max)
im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=interpolation)
if stride == 0:
return im, im_scale
else:
# pad to product of stride
im_height = int(np.ceil(im.shape[0] / float(stride)) * stride)
im_width = int(np.ceil(im.shape[1] / float(stride)) * stride)
im_channel = im.shape[2]
padded_im = np.zeros((im_height, im_width, im_channel))
padded_im[: im.shape[0], : im.shape[1], :] = im
return padded_im, im_scale
def sample_poses(pose_tgt):
SYN_STD_ROTATION = 15
SYN_STD_TRANSLATION = 0.01
ANGLE_MAX=45
pose_src = pose_tgt.copy()
num = pose_tgt.shape[0]
for i in range(num):
euler = mat2euler(pose_tgt[i, :3, :3])
euler += SYN_STD_ROTATION * np.random.randn(3) * math.pi / 180.0
pose_src[i, :3, :3] = euler2mat(euler[0], euler[1], euler[2])
pose_src[i, 0, 3] = pose_tgt[i, 0, 3]+ SYN_STD_TRANSLATION * np.random.randn(1)
pose_src[i, 1, 3] = pose_tgt[i, 1, 3] + SYN_STD_TRANSLATION * np.random.randn(1)
pose_src[i, 2, 3] = pose_tgt[i, 2, 3] + 5 * SYN_STD_TRANSLATION * np.random.randn(1)
r_dist = np.arccos((np.trace(pose_src[i, :3,:3].transpose(-1,-2) @ pose_tgt[i, :3,:3]) - 1 )/2)/math.pi*180
while r_dist > ANGLE_MAX:#or not (16 < center_x < (640 - 16) and 16 < center_y < (480 - 16)):
# print("r_dist > ANGLE_MAX, resampling...")
print("Too large angular differences. Resample the pose...")
euler = mat2euler(pose_tgt[i, :3, :3])
euler += SYN_STD_ROTATION * np.random.randn(3) * math.pi / 180.0
pose_src[i, :3, :3] = euler2mat(euler[0], euler[1], euler[2])
pose_src[i, 0, 3] = pose_tgt[i, 0, 3]+ SYN_STD_TRANSLATION * np.random.randn(1)
pose_src[i, 1, 3] = pose_tgt[i, 1, 3] + SYN_STD_TRANSLATION * np.random.randn(1)
pose_src[i, 2, 3] = pose_tgt[i, 2, 3] + 5 * SYN_STD_TRANSLATION * np.random.randn(1)
r_dist = np.arccos((np.trace(pose_src[i, :3,:3].transpose(-1,-2) @ pose_tgt[i, :3,:3]) - 1 )/2)*math.pi/180
return pose_src.squeeze()
@register_dataset
class LinemodDeepIMSynRealV2(Dataset):
# use deepim 3d model for geometric feature extraction, mingle the synthetic and real data
def __init__(self, root_path,
info_path, model_point_dim,
is_train,
prep_func=None,
seq_names=None,
cfg={}
):
super().__init__()
assert info_path is not None
assert isinstance(root_path, (tuple, list)) and isinstance(info_path, (tuple, list))
assert len(root_path) == len(info_path)
print("Info:",info_path)
# assert split in ['train', 'val', 'test']
self.is_train = is_train
self.VOC_ROOT = get_cfg('DATA').VOC_ROOT#"/DATA/yxu/LINEMOD_DEEPIM/"
infos=[]
for ipath in info_path:
with open(ipath, 'rb') as f:
info = pickle.load(f)
if seq_names is not None:
for k in list(info.keys()):
if k not in seq_names:
del info[k]
infos.append( info_convertor(info) )
#merge multiple infos
self.infos = infos[0]
self.infos['dataset_idx'] = [0]*len(self.infos['seqs'])
for i, info in enumerate(infos[1:]):
for k in self.infos:
if k == 'dataset_idx':
self.infos[k].extend([i+1]*len(info['seqs']))
else:
self.infos[k].extend(info[k])
self.root_paths = root_path
self.model_point_dim = model_point_dim
# self.max_points=max_points#30000
self.prep_func=prep_func
# self.rgb_transformer = None #make_transforms(None, is_train=is_train)
self.rgb_transformer = make_transforms(None, is_train=is_train)
print("dataset size:",self.__len__())
self.init_pose_type = cfg.get("init_post_type", "POSECNN_LINEMOD" )
# self.init_pose_type = cfg.get("init_post_type", "PVNET_LINEMOD_OCC" )
# self.init_pose_type = cfg.get("init_post_type", "PVNET_LINEMOD" )
print("INIT_POSE_TYPE:", self.init_pose_type)
#Load posecnn results
if not self.is_train:
with open(f"{CURRENT_DIR}/../EXPDATA/init_poses/linemod_posecnn_results.pkl", 'rb') as f:
self.pose_cnn_results_test_posecnn=pickle.load(f)
try:
if self.init_pose_type == "POSECNN_LINEMOD":
#load posecnn results
self.pose_cnn_results_test=self.pose_cnn_results_test_posecnn
elif self.init_pose_type =="PVNET_LINEMOD":
self.pose_cnn_results_test=np.load(f"{CURRENT_DIR}/../EXPDATA/init_poses/pvnet/pvnet_linemod_test.npy", allow_pickle=True).flat[0]
elif self.init_pose_type =="PVNET_LINEMOD_OCC":
self.pose_cnn_results_test=np.load(f"{CURRENT_DIR}/../EXPDATA/init_poses/pvnet/pvnet_linemodocc_test.npy", allow_pickle=True).flat[0]
else:
raise NotImplementedError
except:
print("Loading posecnn results failed!")
self.pose_cnn_results_test=None
try:
# self.blender_to_bop_pose=np.load(f"{CURRENT_DIR}/../EXPDATA/init_poses/metricpose/blender2bop_RT.npy", allow_pickle=True).flat[0]
self.blender_to_bop_pose=np.load(f"{CURRENT_DIR}/../EXPDATA/init_poses/pose_conversion/blender2bop_RT.npy", allow_pickle=True).flat[0]
except:
print("Loading pose conversion matrix failed!")
self.blender_to_bop_pose=None
else:
self.pose_cnn_results_test=None
self.blender_to_bop_pose=None
def load_random_background(self, im_observed, mask):
VOC_root = os.path.join(self.VOC_ROOT, "VOCdevkit/VOC2012")
VOC_image_set_dir = os.path.join(VOC_root, "ImageSets/Main")
VOC_bg_list_path = os.path.join(VOC_image_set_dir, "diningtable_trainval.txt")
with open(VOC_bg_list_path, "r") as f:
VOC_bg_list = [
line.strip("\r\n").split()[0] for line in f.readlines() if line.strip("\r\n").split()[1] == "1"
]
height, width, channel = im_observed.shape
target_size = min(height, width)
max_size = max(height, width)
observed_hw_ratio = float(height) / float(width)
k = random.randint(0, len(VOC_bg_list) - 1)
bg_idx = VOC_bg_list[k]
bg_path = os.path.join(VOC_root, "JPEGImages/{}.jpg".format(bg_idx))
bg_image = cv2.imread(bg_path, cv2.IMREAD_COLOR)[...,::-1] #RGB
bg_h, bg_w, bg_c = bg_image.shape
bg_image_resize = np.zeros((height, width, channel), dtype="uint8")
if (float(height) / float(width) < 1 and float(bg_h) / float(bg_w) < 1) or (
float(height) / float(width) >= 1 and float(bg_h) / float(bg_w) >= 1
):
if bg_h >= bg_w:
bg_h_new = int(np.ceil(bg_w * observed_hw_ratio))
if bg_h_new < bg_h:
bg_image_crop = bg_image[0:bg_h_new, 0:bg_w, :]
else:
bg_image_crop = bg_image
else:
bg_w_new = int(np.ceil(bg_h / observed_hw_ratio))
if bg_w_new < bg_w:
bg_image_crop = bg_image[0:bg_h, 0:bg_w_new, :]
else:
bg_image_crop = bg_image
else:
if bg_h >= bg_w:
bg_h_new = int(np.ceil(bg_w * observed_hw_ratio))
bg_image_crop = bg_image[0:bg_h_new, 0:bg_w, :]
else: # bg_h < bg_w
bg_w_new = int(np.ceil(bg_h / observed_hw_ratio))
print(bg_w_new)
bg_image_crop = bg_image[0:bg_h, 0:bg_w_new, :]
bg_image_resize_0, _ = resize(bg_image_crop, target_size, max_size)
h, w, c = bg_image_resize_0.shape
bg_image_resize[0:h, 0:w, :] = bg_image_resize_0
# add background to image_observed
res_image = bg_image_resize.copy()
res_image[mask>0]=im_observed[mask>0]
# im_observed = res_image
return res_image
def _read_data(self, idx):
"""
info structure:
{
'cat':[
{
"index": idx,
"model_path": str,
"rgb_path": str,
"depth_path": str,
"RT": np.array([3,4]),
"K": np.array([3,3]),
},
{
"index": idx,
"model_path": str,
"rgb_path": str,
"depth_path": str,
"RT": np.array([3,4]),
"K": np.array([3,3]),
}
...
],
'dog':[
]
...
}
"""
if isinstance(idx, (tuple, list)):
idx, seed = idx
else:
seed = None
seq_lengths = np.array(self.infos['seq_lengths'])
seq_lengths_cum = np.cumsum(seq_lengths)
seq_lengths_cum = np.insert(seq_lengths_cum, 0, 0) # insert a dummy 0
seq_idx = np.nonzero(seq_lengths_cum > idx)[0][0]-1
frame_idx = idx - seq_lengths_cum[seq_idx]
info = self.infos["data"][seq_idx]
dataset_idx = self.infos["dataset_idx"][seq_idx]
model_points_path = os.path.join(f'{os.path.dirname(__file__)}/../EXPDATA/LM6d_converted/models/{self.infos["seqs"][seq_idx]}/textured.obj' ) # TODO: need check
rgb_path = os.path.join(self.root_paths[dataset_idx], info[frame_idx]['rgb_observed_path'])
depth_path = os.path.join(self.root_paths[dataset_idx], info[frame_idx]['depth_gt_observed_path'])
if info[frame_idx].get('rgb_noisy_rendered', None) is not None:
rgb_noisy_rendered_path = os.path.join(self.root_paths[dataset_idx], info[frame_idx]['rgb_noisy_rendered'])
else:
rgb_noisy_rendered_path = None
if info[frame_idx].get('depth_noisy_rendered', None) is not None:
depth_noisy_rendered_path = os.path.join(self.root_paths[dataset_idx], info[frame_idx]['depth_noisy_rendered'])
else:
depth_noisy_rendered_path = None
if info[frame_idx].get('pose_noisy_rendered', None) is not None:
rendered_RT = info[frame_idx]['pose_noisy_rendered'].astype(np.float32)
# else:
elif self.is_train:
rendered_RT = sample_poses( info[frame_idx]['gt_pose'].astype(np.float32)[None] )
K = info[frame_idx]['K'].astype(np.float32)
RT = info[frame_idx]['gt_pose'].astype(np.float32) #[R,t]
# evaluation
if not self.is_train:
if self.pose_cnn_results_test is not None:
class_name=self.infos["seqs"][seq_idx]
if self.init_pose_type == "PVNET_LINEMOD":
try:
posecnn_RT = self.pose_cnn_results_test[class_name][frame_idx] # if self.pose_cnn_results_test is not None else np.zeros_like(RT)
#Transformations are needed as the pvnet has a different coordinate system.
posecnn_RT[:3,:3] = posecnn_RT[:3,:3]@self.blender_to_bop_pose[class_name][:3,:3].T
posecnn_RT[:3,3:] = -posecnn_RT[:3,:3] @self.blender_to_bop_pose[class_name][:3,3:] + posecnn_RT[:3,3:]
except:
print("Warning: frame_idx is out of the range of self.pose_cnn_results_test!", flush=True)
posecnn_RT= se3_q2m(self.pose_cnn_results_test_posecnn[class_name][frame_idx]['pose']) #np.zeros_like(RT)
elif self.init_pose_type =="POSECNN_LINEMOD":
posecnn_RT= se3_q2m(self.pose_cnn_results_test_posecnn[class_name][frame_idx]['pose'])
elif self.init_pose_type == "PVNET_LINEMOD_OCC":
try:
posecnn_RT = self.pose_cnn_results_test[class_name][frame_idx].copy()# if self.pose_cnn_results_test is not None else np.zeros_like(RT)
#Transformations are needed as the pvnet has a different coordinate system.
posecnn_RT[:3,:3] = posecnn_RT[:3,:3]@self.blender_to_bop_pose[class_name][:3,:3].T
posecnn_RT[:3,3:] = -posecnn_RT[:3,:3] @self.blender_to_bop_pose[class_name][:3,3:] + posecnn_RT[:3,3:]
except:
# print(frame_idx)
raise
else:
raise NotImplementedError
rendered_RT = posecnn_RT
else:
print("Warning: fail to load cnn poses!", flush=True)
posecnn_RT = np.zeros_like(RT)
else:
posecnn_RT = np.zeros_like(RT)
#add noise--for testing purpose only, should always be disabled in normal cases
# rot_std=0; trans_std=0.04; ang_max=1000;
# print(f"Add pose noises rot_std={rot_std}, trans_std={trans_std}", flush=True)
# rendered_RT=sample_poses(rendered_RT[None], rot_std=rot_std, trans_std=trans_std, ang_max=ang_max)
# Regularize the matrix to be a valid rotation
rendered_RT[:3,:3] = rendered_RT[:3,:3]@ np.linalg.inv(scipy.linalg.sqrtm(rendered_RT[:3,:3].T@rendered_RT[:3,:3]))
# model_points = np.fromfile(
# str(model_points_path), dtype=np.float32, count=-1).reshape([-1, self.model_point_dim]) # N x model_point_dim
model_points, _,_ = load_obj(str(model_points_path) )
model_points = model_points.numpy()
visb = model_points[:,-1:] # N x model_point_dim
model_point_features=np.ones_like(model_points[:,:1]).astype(np.float32)
rgb = np.asarray(PIL.Image.open(rgb_path))
if depth_path.endswith('.npy'):
depth = np.load(depth_path) # blender
else:
depth = cv2.imread(depth_path, -1) /1000.
if self.is_train and "LM6d_refine_syn" in self.root_paths[dataset_idx]: #synthetic data
rgb = self.load_random_background(rgb, mask=(depth>0)[...,None].repeat(rgb.shape[-1], axis=-1) )
rgb_rendered = np.asarray(PIL.Image.open(rgb_noisy_rendered_path)) if rgb_noisy_rendered_path is not None else None
depth_rendered = np.asarray(PIL.Image.open(depth_noisy_rendered_path))/1000 if depth_noisy_rendered_path is not None else None #TODO: need check
ren_mask = render_pointcloud(model_points, rendered_RT[None],K=K[None], render_image_size=rgb.shape[:2] ).squeeze()>0
# depth = range_to_depth(depth<1, depth*2, K)
return {
"class_name": self.infos["seqs"][seq_idx],
"idx": idx,
"model_points": model_points,
"visibility": visb,
"model_point_features":model_point_features,
"image": rgb,
"depth": depth,
"mask": depth>0,
"rendered_image": rgb_rendered,
"rendered_depth": depth_rendered,
"K": K,
"RT": RT,
"rendered_RT": rendered_RT.astype(np.float32),
"ren_mask":ren_mask,
"POSECNN_RT": posecnn_RT.astype(np.float32), #for test, TODO
"scale": 1 # model_scale * scale = depth_scale
}
def __getitem__(self, idx):
data=self._read_data(idx)
try:
data_p=self.prep_func(data, rand_rgb_transformer=self.rgb_transformer, find_2d3d_correspondence=self.is_train )
except Exception as e:
if e.args[0] in ["Too few correspondences are found!"] :
if isinstance(idx, (tuple, list)):
# idx, seed = idx
idx = [(idx[0]+1)%self.__len__(), idx[1]]
else:
idx = (idx+1) %self.__len__()
data_p= self.__getitem__(idx )
else:
raise ValueError
return data_p
def __len__(self):
return np.sum(self.infos['seq_lengths'])
================================================
FILE: data/preprocess.py
================================================
import open3d as o3d
import copy
import os
import pathlib
import pickle
import time
from collections import defaultdict
from functools import partial
import cv2
import numpy as np
import quaternion
from skimage import io as imgio
from utils.timer import simple_timer
import matplotlib.pyplot as plt
from collections.abc import Iterable
import torch
import torch.nn.functional as F
import quaternion
from functools import partial
import thirdparty.kpconv.cpp_wrappers.cpp_subsampling.grid_subsampling as cpp_subsampling
import thirdparty.kpconv.cpp_wrappers.cpp_neighbors.radius_neighbors as cpp_neighbors
from thirdparty.kpconv.lib.timer import Timer
from utils.geometric import range_to_depth, mask_depth_to_point_cloud
from utils.furthest_point_sample import fragmentation_fps
from utils.rand_utils import truncated_normal
def merge_batch(batch_list):
# [batch][key][seq]->example[key][seq][batch]
# Or [batch][key]->example[key][batch]
example_merged = defaultdict(list)
for example in batch_list: # batch dim
for k, v in example.items(): # key dim
# assert isinstance(v, list)
if isinstance(v, list):
seq_len = len(v)
if k not in example_merged:
example_merged[k] = [[] for i in range(seq_len)]
for i, vi in enumerate(v): # seq dim
example_merged[k][i].append(vi)
else:
example_merged[k].append(v)
ret = {}
for key, elems in example_merged.items():
if key in ['model_points', "original_model_points", 'visibility']:
# concat the points of lenghts (N1,N2...) to a longer one with length (N1+N2+...)
ret[key] = np.concatenate(elems, axis=0)
# record the point numbers for original batches
ret['batched_model_point_lengths'] = np.array(
[len(p) for p in elems], dtype=np.int32)
elif key in ['rand_model_points', ]:
# concat the points of lenghts (N1,N2...) to a longer one with length (N1+N2+...)
ret[key] = np.concatenate(elems, axis=0)
# record the point numbers for original batches
ret['batched_rand_model_point_lengths'] = np.array(
[len(p) for p in elems], dtype=np.int32)
elif key in ['model_point_features']:
ret[key] = np.concatenate(elems, axis=0)
# ['odometry/tq','odometry/RT','odometry/invRT' ]:
elif key in ['image', 'depth', 'K', 'RT', 'original_RT' ,'rand_RT', 'correspondences_2d3d', 'scale', 'POSECNN_RT','rendered_image', 'rendered_depth', 'rendered_RT', '3d_keypoint_inds', '3d_keypoints', 'mask', 'ren_mask']: #'depth_coords2d','lifted_points',
try:
ret[key] = np.stack(elems, axis=0)
except:
print(key, flush=True)
raise
elif key == 'metrics':
ret[key] = elems
else:
ret[key] = []
for e in elems:
ret[key].append(e)
return ret
def get_correspondences(src_pcd, tgt_pcd, search_voxel_size, K=None, trans=None):
if trans is not None:
src_pcd.transform(trans)
pcd_tree = o3d.geometry.KDTreeFlann(tgt_pcd)
correspondences = []
for i, point in enumerate(src_pcd.points):
[count, idx, _] = pcd_tree.search_radius_vector_3d(
point, search_voxel_size)
if K is not None:
idx = idx[:K]
for j in idx:
correspondences.append([i, j])
correspondences = np.array(correspondences)
# correspondences = torch.from_numpy(correspondences)
return correspondences
def to_pcd(xyz):
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(xyz)
return pcd
def to_tsfm(rot, trans):
tsfm = np.eye(4)
tsfm[:3, :3] = rot
tsfm[:3, 3] = trans.flatten()
return tsfm
def CameraIntrinsicUpdate(old_K, aug_param):
'''
old_K: array of shape (N,3,3), the old camera intrinsic parameters
aug_pram: dict, the data augmentation parameters
'''
aug_type = aug_param['aug_type']
assert aug_type in ['crop', 'scale', 'flip']
new_K = np.copy(old_K)
if aug_type == 'crop':
cx, cy = aug_param['crop/left_top_corner'] # x,y
new_K[..., 0, 2] = new_K[..., 0, 2] - cx
new_K[..., 1, 2] = new_K[..., 1, 2] - cy
elif aug_type == 'scale':
s_x, s_y = aug_param['scale/scale']
new_K[..., 0, 0] = s_x * new_K[..., 0, 0]
new_K[..., 1, 1] = s_y * new_K[..., 1, 1]
new_K[..., 0, 2] = s_x * new_K[..., 0, 2]
new_K[..., 1, 2] = s_y * new_K[..., 1, 2]
elif aug_type == 'flip':
w = aug_param['flip/width']
# h = aug_param['flip/heigh']
new_K[..., 0, 2] = w - new_K[..., 0, 2] # px' = w-px
# new_K[...,1,2] = h- new_K[...,1,2]
new_K[..., 0, 0] = - new_K[..., 0, 0] # fx' = -fx
return new_K
def crop_transform(images, depths, Ks, crop_param, ):
assert(len(images) == len(depths) == len(Ks))
crop_type = crop_param["crop_type"]
assert(crop_type in ["fixed", "center", "random"])
crop_size = crop_param["crop_size"]
iheight, iwidth = images[0].shape[:2]
if crop_type == "fixed":
lt_corner = crop_param["lt_corner"]
op = transforms.Crop(
lt_corner[0], lt_corner[1], crop_size[0], crop_size[1])
elif crop_type == "center":
op = transforms.CenterCrop(crop_size)
ci, cj, _, _ = op.get_params(images[0], crop_size)
lt_corner = ci, cj
elif crop_type == "random":
op = transforms.RandomCrop((iheight, iwidth), crop_size)
lt_corner = op.i, op.j
for i, _ in enumerate(images):
images[i] = op(images[i])
depths[i] = op(depths[i])
Ks[i] = CameraIntrinsicUpdate(Ks[i],
{"aug_type": "crop", "crop/left_top_corner": (lt_corner[1], lt_corner[0])})
return images, depths, Ks
# def patch_crop(image, depth, mask, K_old, margin_ratio=0.2, output_size=128, offset_ratio=(0,0),bbox=None, mask_depth=True):
def patch_crop(image, depth, mask, K_old, margin_ratio=0.2, output_size=128, offset_ratio=(0,0),bbox=None, mask_depth=False):
'''
image: HxWx3
mask: HxW
K_old: 3x3
offset: (offset_h, offset_w)
'''
H, W, _ = image.shape
mask = mask.astype('uint8')*255
if bbox is None:
_x, _y, _w, _h = cv2.boundingRect(mask)
else:
_x, _y, _w, _h = bbox[1], bbox[0], bbox[3]-bbox[1], bbox[2]-bbox[0]
# center = [_x+_w/2, _y+_h/2]
center = [_x+_w/2+offset_ratio[1]*_w, _y+_h/2+offset_ratio[0]*_h ]
L = int(max(_w, _h) * (1+2*margin_ratio))
if L<0:
#TODO
print(mask.sum(), depth.sum(), '!!!', flush=True)
L=128
x = max(0, int(center[0] - L/2))
y = max(0, int(center[1] - L/2))
crop = image[y:y+L, x:x+L]
# only keep the ROI depth
if mask_depth:
depth[mask < 1] = 0 # removed by dy at 0810
depth_crop = depth[y:y+L, x:x+L]
mask_crop = mask[y:y+L, x:x+L]
# w=h=int ((1+2*margin_ratio)*L) # actual crop size
w = h = L # actual crop size
# automatically handle the "out of range" problem
patch = np.zeros([h, w, 3], dtype=image.dtype)
# depth_patch = np.ones([h, w], dtype=depth.dtype)
depth_patch = np.zeros([h, w], dtype=depth.dtype)
mask_patch = np.zeros([h, w], dtype=depth.dtype)
try:
xp = 0
yp = 0
patch[xp: xp+crop.shape[0], yp:yp+crop.shape[1]] = crop
depth_patch[xp: xp+crop.shape[0], yp:yp+crop.shape[1]] = depth_crop
mask_patch[xp: xp+crop.shape[0], yp:yp+crop.shape[1]] = mask_crop
except:
import pdb
pdb.set_trace()
patch = cv2.resize(patch, (output_size, output_size),
interpolation=cv2.INTER_LINEAR)
depth_patch = cv2.resize(
depth_patch, (output_size, output_size), interpolation=cv2.INTER_NEAREST)
mask_patch = cv2.resize(
mask_patch, (output_size, output_size), interpolation=cv2.INTER_NEAREST)
# update the intrinsic parameters
K_new = np.zeros_like(K_old)
scale = output_size/L
K_new[0, 2] = (K_old[0, 2]-x)*scale
K_new[1, 2] = (K_old[1, 2]-y)*scale
K_new[0, 0] = K_old[0, 0]*scale
K_new[1, 1] = K_old[1, 1]*scale
K_new[2, 2] = 1
# return patch, depth_patch, K_new
return patch, depth_patch, mask_patch, K_new
def preprocess_deepim(
input_dict,
max_points,
correspondence_radius,
normalize_model=True,
rand_transform_model=False, # False,#True,
rand_rgb_transformer=None,
image_scale=None,
patch_cropper=None, # func patch_crop(...)
):
output_dict = copy.deepcopy(input_dict)
################################### process 3D point clouds ###################################
if (output_dict['model_points'].shape[0] > max_points):
# if(output_dict['model_points'].shape[0] > 20000):
idx = np.random.permutation(
output_dict['model_points'].shape[0])[:max_points]
print(idx, output_dict['model_points'].shape, flush=True)
output_dict['model_points'] = output_dict['model_points'][idx]
output_dict['model_point_features'] = output_dict['model_point_features'][idx]
output_dict['original_RT'] = copy.deepcopy(output_dict['RT'])
if normalize_model:
points = output_dict['model_points']
mean = points.mean(axis=0)
scope = points.max(axis=0)-points.min(axis=0)
points_normalize = (points-mean)/scope.max()
# points_normalize.tofile(bin_save_path)
# modify the extrinsic parameters
output_dict['RT'][:, 3:] = output_dict['RT'][:, :3] @ mean[:,
None] + output_dict['RT'][:, 3:] # 3x3 @ 3x1 + 3x1
# input_dict['RT'][:,:3] *=scope.max()
output_dict['scale'] = scope.max()
output_dict['original_model_points'] = output_dict['model_points']
output_dict['model_points'] = points_normalize
if rand_transform_model:
points = output_dict['model_points']
rand_quat = np.random.randn(1, 4)
rand_quat = rand_quat/np.linalg.norm(rand_quat, axis=-1)
rand_rot = quaternion.as_rotation_matrix(
quaternion.from_float_array(rand_quat)).squeeze() # 3x3
output_dict['rand_model_points'] = (
rand_rot@ points.T).T.astype(np.float32)
output_dict['rand_RT'] = copy.deepcopy(output_dict['RT'])
# output_dict['RT'][:,:3]@rand_rot.T
output_dict['rand_RT'][:, :3] = rand_rot
output_dict['rand_RT'][:, 3] = 0
################################### process 2D images ###################################
# carve out image patches
if patch_cropper is not None:
ref_mask = output_dict['depth'] > 0
output_dict['image'], output_dict['depth'], output_dict['K'] = patch_cropper(
output_dict['image'], output_dict['depth'], ref_mask, output_dict['K'])
output_dict['rendered_image'], output_dict['rendered_depth'], _ = patch_cropper(
output_dict['rendered_image'], output_dict['rendered_depth'], ref_mask, output_dict['K'].copy() )
# rescale image
if image_scale is not None:
output_dict['image'] = cv2.resize(output_dict['image'],
(output_dict['image'].shape[1]*image_scale,
output_dict['image'].shape[0]*image_scale),
interpolation=cv2.INTER_AREA)
output_dict['depth'] = cv2.resize(output_dict['depth'],
(output_dict['depth'].shape[1]*image_scale,
output_dict['depth'].shape[0]*image_scale),
interpolation=cv2.INTER_NEAREST)
output_dict['K'][:2] = output_dict['K'][:2]*image_scale
# lift depth
depth = output_dict['depth'].squeeze() # H,W
depth_pts, depth_coords2d = mask_depth_to_point_cloud(
depth != 0, depth, output_dict['K'])
depth_pts = (output_dict['RT'][:, :3].T@(depth_pts.T - output_dict['RT']
[:, 3:])).T / output_dict['scale'] # transformed to the model frame
# find 2d-3d correspondences
tsfm = np.eye(4)
tsfm[:3] = output_dict['RT']
model_pcd = output_dict['model_points']
correspondences_2d3d = get_correspondences(
to_pcd(depth_pts), to_pcd(model_pcd), correspondence_radius, K=5)
if len(correspondences_2d3d.shape) < 2 or len(correspondences_2d3d) < 10:
print(depth_pts.shape, model_pcd.shape)
print("correspondences_2d3d.shape:",
correspondences_2d3d.shape, flush=True)
# raise ValueError("Too few correspondences are found!")
raise Exception("Too few correspondences are found!")
output_dict['depth_coords2d'] = depth_coords2d
output_dict['lifted_points'] = depth_pts
# output_dict['correspondences_2d3d'] = np.zeros(1)#correspondences_2d3d
output_dict['correspondences_2d3d'] = correspondences_2d3d
if rand_rgb_transformer is not None:
output_dict['image'], _, _ = rand_rgb_transformer(output_dict['image'])
# TO TENSOR
output_dict['image'] = (output_dict['image'].astype(
np.float32)/255.0).transpose([2, 0, 1]) # .mean(axis=0, keepdims=True) # 1,H,W
output_dict['depth'] = output_dict['depth'].astype(np.float32)[
None] # 1,H,W
return output_dict
def preprocess(
input_dict,
max_points,
correspondence_radius,
normalize_model=True,
rand_transform_model=False,
rand_rgb_transformer=None,
image_scale=None,
crop_param=None,
kp_3d_param=None,
use_coords_as_3d_feat=False,
find_2d3d_correspondence=True,
):
output_dict = copy.deepcopy(input_dict)
################################### process 3D point clouds ###################################
if use_coords_as_3d_feat:
output_dict['model_point_features'] = output_dict['model_points'][:,:3]
if (output_dict['model_points'].shape[0] > max_points):
# if(output_dict['model_points'].shape[0] > 20000):
idx = np.random.permutation(
output_dict['model_points'].shape[0])[:max_points]
print(idx, output_dict['model_points'].shape, flush=True)
output_dict['model_points'] = output_dict['model_points'][idx]
output_dict['model_point_features'] = output_dict['model_point_features'][idx]
output_dict['original_RT'] = copy.deepcopy(output_dict['RT'])
output_dict['original_model_points'] = output_dict['model_points']
if normalize_model:
points = output_dict['model_points']
mean = points.mean(axis=0)
scope = points.max(axis=0)-points.min(axis=0)
points_normalize = (points-mean)/scope.max()
# modify the extrinsic parameters
output_dict['RT'][:, 3:] = output_dict['RT'][:, :3] @ mean[:,
None] + output_dict['RT'][:, 3:] # 3x3 @ 3x1 + 3x1
output_dict['scale'] = scope.max()
output_dict['model_points'] = points_normalize
if rand_transform_model:
points = output_dict['model_points']
rand_quat = np.random.randn(1, 4)
rand_quat = rand_quat/np.linalg.norm(rand_quat, axis=-1)
rand_rot = quaternion.as_rotation_matrix(
quaternion.from_float_array(rand_quat)).squeeze() # 3x3
output_dict['rand_model_points'] = (
rand_rot@ points.T).T.astype(np.float32)
output_dict['rand_RT'] = copy.deepcopy(output_dict['RT'])
output_dict['rand_RT'][:, :3] = rand_rot
output_dict['rand_RT'][:, 3] = 0
################################### process 2D images ###################################
#crop image
if crop_param is not None:# and output_dict['mask'].sum()>0:
#without random cropping
if not crop_param.rand_crop:
if crop_param.get("crop_with_init_pose", False):
# bbox= output_dict.get('bbox', None)
bbox=None
output_dict['image'], output_dict['depth'], output_dict['mask'], output_dict['K'] = patch_crop(output_dict['image'], output_dict['depth'], mask=output_dict['ren_mask'],
K_old=output_dict['K'], margin_ratio=crop_param.margin_ratio, output_size=crop_param.output_size, bbox=bbox
)
elif crop_param.get("crop_with_rand_bbox_shift", True):
bbox= output_dict.get('bbox', None)
# offset_ratio= [truncated_normal(0,0.5,-1,1)*crop_param.max_rand_offset_ratio, truncated_normal(0,0.5,-1,1)*crop_param.max_rand_offset_ratio]
offset_ratio= [truncated_normal(0,0.33,-1,1)*1, truncated_normal(0,0.33,-1,1)*1]
output_dict['image'], output_dict['depth'], output_dict['mask'], output_dict['K'] = patch_crop(output_dict['image'], output_dict['depth'], mask=output_dict['mask'],
K_old=output_dict['K'], margin_ratio=crop_param.margin_ratio, output_size=crop_param.output_size, offset_ratio=offset_ratio, bbox=output_dict.get('bbox', None)
)
else:
bbox= output_dict.get('bbox', None)
output_dict['image'], output_dict['depth'], output_dict['mask'], output_dict['K'] = patch_crop(output_dict['image'], output_dict['depth'], mask=output_dict['mask'],
K_old=output_dict['K'], margin_ratio=crop_param.margin_ratio, output_size=crop_param.output_size, bbox=output_dict.get('bbox', None)
)
else:
margin_ratio= truncated_normal(0.5, 0.5, 0, 1) *crop_param.max_rand_margin_ratio
offset_ratio= [truncated_normal(0,0.5,-1,1)*crop_param.max_rand_offset_ratio, truncated_normal(0,0.5,-1,1)*crop_param.max_rand_offset_ratio]
output_dict['image'], output_dict['depth'], output_dict['mask'], output_dict['K'] = patch_crop(output_dict['image'], output_dict['depth'], mask=output_dict['mask'],
K_old=output_dict['K'], margin_ratio=margin_ratio, output_size=crop_param.output_size, offset_ratio=offset_ratio, bbox=output_dict.get('bbox', None)
)
# rescale image
if image_scale is not None:
output_dict['image'] = cv2.resize(output_dict['image'],
(output_dict['image'].shape[1]*image_scale,
output_dict['image'].shape[0]*image_scale),
interpolation=cv2.INTER_AREA)
output_dict['depth'] = cv2.resize(output_dict['depth'],
(output_dict['depth'].shape[1]*image_scale,
output_dict['depth'].shape[0]*image_scale),
interpolation=cv2.INTER_NEAREST)
output_dict['K'][:2] = output_dict['K'][:2]*image_scale
# lift depths
depth = output_dict['depth'].squeeze() # H,W
depth_pts, depth_coords2d = mask_depth_to_point_cloud(
depth != 0, depth, output_dict['K'])
depth_pts = (output_dict['RT'][:, :3].T@(depth_pts.T - output_dict['RT']
[:, 3:])).T / output_dict['scale'] # transformed to the model frame
# find 2d-3d correspondences
if find_2d3d_correspondence:
tsfm = np.eye(4)
tsfm[:3] = output_dict['RT']
model_pcd = output_dict['model_points']
correspondences_2d3d = get_correspondences(
to_pcd(depth_pts), to_pcd(model_pcd), correspondence_radius, K=5)
if len(correspondences_2d3d.shape) < 2 or len(correspondences_2d3d) < 10:# or ( "mask" in output_dict and output_dict['mask'].sum()<10 ) :
print(depth_pts.shape, model_pcd.shape)
print("correspondences_2d3d.shape:",
correspondences_2d3d.shape, flush=True)
raise Exception("Too few correspondences are found!")
output_dict['depth_coords2d'] = depth_coords2d
output_dict['lifted_points'] = depth_pts
output_dict['correspondences_2d3d'] = correspondences_2d3d
else:
output_dict['depth_coords2d'] = depth_coords2d
output_dict['lifted_points'] = depth_pts
output_dict['correspondences_2d3d'] = np.zeros([10,2], dtype=np.int64)
if rand_rgb_transformer is not None:
output_dict['image'], _, _ = rand_rgb_transformer(output_dict['image'])
# TO TENSORs
output_dict['image'] = (output_dict['image'].astype(
np.float32)/255.0).transpose([2, 0, 1]) # .mean(axis=0, keepdims=True) # 1,H,W
output_dict['depth'] = output_dict['depth'].astype(np.float32)[
None] # 1,H,W
return output_dict
def batch_grid_subsampling_kpconv(points, batches_len, features=None, labels=None, sampleDl=0.1, max_p=0, verbose=0, random_grid_orient=True):
"""
CPP wrapper for a grid subsampling (method = barycenter for points and features)
"""
if (features is None) and (labels is None):
s_points, s_len = cpp_subsampling.subsample_batch(points,
batches_len,
sampleDl=sampleDl,
max_p=max_p,
verbose=verbose)
return torch.from_numpy(s_points), torch.from_numpy(s_len)
elif (labels is None):
s_points, s_len, s_features = cpp_subsampling.subsample_batch(points,
batches_len,
features=features,
sampleDl=sampleDl,
max_p=max_p,
verbose=verbose)
return torch.from_numpy(s_points), torch.from_numpy(s_len), torch.from_numpy(s_features)
elif (features is None):
s_points, s_len, s_labels = cpp_subsampling.subsample_batch(points,
batches_len,
classes=labels,
sampleDl=sampleDl,
max_p=max_p,
verbose=verbose)
return torch.from_numpy(s_points), torch.from_numpy(s_len), torch.from_numpy(s_labels)
else:
s_points, s_len, s_features, s_labels = cpp_subsampling.subsample_batch(points,
batches_len,
features=features,
classes=labels,
sampleDl=sampleDl,
max_p=max_p,
verbose=verbose)
return torch.from_numpy(s_points), torch.from_numpy(s_len), torch.from_numpy(s_features), torch.from_numpy(s_labels)
def batch_neighbors_kpconv(queries, supports, q_batches, s_batches, radius, max_neighbors):
"""
Computes neighbors for a batch of queries and supports, apply radius search
:param queries: (N1, 3) the query points
:param supports: (N2, 3) the support points
:param q_batches: (B) the list of lengths of batch elements in queries
:param s_batches: (B)the list of lengths of batch elements in supports
:param radius: float32
:return: neighbors indices
"""
neighbors = cpp_neighbors.batch_query(
queries, supports, q_batches, s_batches, radius=radius)
# print("neighbors.shape" , neighbors.shape, queries.shape,flush=True)
if max_neighbors > 0:
return torch.from_numpy(neighbors[:, :max_neighbors])
else:
return torch.from_numpy(neighbors)
def collate_fn_descriptor(list_data, config, neighborhood_limits):
ret = merge_batch(list_data)
batched_points = torch.from_numpy(ret['model_points'])
batched_lengths = torch.from_numpy(ret['batched_model_point_lengths'])
batched_features = torch.from_numpy(ret['model_point_features'])
if ret.get('rand_model_points', None) is not None:
batched_rand_points = torch.from_numpy(ret['rand_model_points'])
batched_rand_lengths = torch.from_numpy(
ret['batched_rand_model_point_lengths'])
batched_points = torch.cat(
[batched_points, batched_rand_points], dim=0)
batched_lengths = torch.cat(
[batched_lengths, batched_rand_lengths], dim=0)
batched_features = torch.cat(
[batched_features, batched_features], dim=0)
# Starting radius of convolutions
r_normal = config.first_subsampling_dl * config.conv_radius
# Starting layer
layer_blocks = []
layer = 0
# Lists of inputs
input_points = []
input_neighbors = []
input_pools = []
input_upsamples = []
input_batches_len = []
timer = Timer()
for block_i, block in enumerate(config.architecture):
# Stop when meeting a global pooling or upsampling
if 'global' in block or 'upsample' in block:
break
# Get all blocks of the layer
if not ('pool' in block or 'strided' in block):
layer_blocks += [block]
if block_i < len(config.architecture) - 1 and not ('upsample' in config.architecture[block_i + 1]):
continue
# Convolution neighbors indices
# *****************************
if layer_blocks:
# Convolutions are done in this layer, compute the neighbors with the good radius
if np.any(['deformable' in blck for blck in layer_blocks[:-1]]):
r = r_normal * config.deform_radius / config.conv_radius
else:
r = r_normal
conv_i = batch_neighbors_kpconv(
batched_points, batched_points, batched_lengths, batched_lengths, r, neighborhood_limits[layer])
else:
# This layer only perform pooling, no neighbors required
conv_i = torch.zeros((0, 1), dtype=torch.int64)
# Pooling neighbors indices
# *************************
if 'pool' in block or 'strided' in block:
# New subsampling length
dl = 2 * r_normal / config.conv_radius
# Subsampled points
pool_p, pool_b = batch_grid_subsampling_kpconv(
batched_points, batched_lengths, sampleDl=dl)
# Radius of pooled neighbors
if 'deformable' in block:
r = r_normal * config.deform_radius / config.conv_radius
else:
r = r_normal
# Subsample indices
pool_i = batch_neighbors_kpconv(
pool_p, batched_points, pool_b, batched_lengths, r, neighborhood_limits[layer])
# Upsample indices (with the radius of the next layer to keep wanted density)
up_i = batch_neighbors_kpconv(
batched_points, pool_p, batched_lengths, pool_b, 2 * r, neighborhood_limits[layer])
else:
# No pooling in the end of this layer, no pooling indices required
pool_i = torch.zeros((0, 1), dtype=torch.int64)
pool_p = torch.zeros((0, 3), dtype=torch.float32)
pool_b = torch.zeros((0,), dtype=torch.int64)
up_i = torch.zeros((0, 1), dtype=torch.int64)
# Updating input lists
input_points += [batched_points.float()]
input_neighbors += [conv_i.long()]
input_pools += [pool_i.long()]
input_upsamples += [up_i.long()]
input_batches_len += [batched_lengths]
# New points for next layer
batched_points = pool_p
batched_lengths = pool_b
# Update radius and reset blocks
r_normal *= 2
layer += 1
layer_blocks = []
###############
# Return inputs
###############
dict_inputs = {
"idx": ret["idx"],
'model_points': input_points,
'visibility': torch.from_numpy(ret['visibility']),
'neighbors': input_neighbors,
'pools': input_pools,
'upsamples': input_upsamples,
'model_point_features': batched_features.float(),
'stack_lengths': input_batches_len,
'image': torch.from_numpy(ret['image']),
'depth': torch.from_numpy(ret['depth']),
'mask': torch.from_numpy(ret['mask']),
'ren_mask': torch.from_numpy(ret['ren_mask']),
'K': torch.from_numpy(ret['K']),
'RT': torch.from_numpy(ret['RT']),
'original_RT': torch.from_numpy(ret['original_RT']),
'POSECNN_RT': torch.from_numpy(ret.get('POSECNN_RT', np.zeros_like(ret['RT']) ) ),
'rand_RT': torch.from_numpy(ret.get('rand_RT', np.zeros_like(ret['RT']))),
# "lifted_points": torch.from_numpy(ret['lifted_points']),
"lifted_points": [torch.from_numpy(d) for d in ret['lifted_points'] ] ,
# 'depth_coords2d': torch.from_numpy(ret['depth_coords2d']),
'depth_coords2d': [torch.from_numpy(d) for d in ret['depth_coords2d']],
"correspondences_2d3d": torch.from_numpy(ret['correspondences_2d3d']),
"original_model_points": torch.from_numpy(ret['original_model_points']),
"class_name": ret['class_name'],
"3d_keypoint_inds": torch.from_numpy(ret['3d_keypoint_inds']),
"3d_keypoints": torch.from_numpy(ret['3d_keypoints'] )
}
return dict_inputs
def collate_fn_descriptor_deepim(list_data, config, neighborhood_limits):
ret = merge_batch(list_data)
batched_points = torch.from_numpy(ret['model_points'])
batched_lengths = torch.from_numpy(ret['batched_model_point_lengths'])
batched_features = torch.from_numpy(ret['model_point_features'])
if ret.get('rand_model_points', None) is not None:
# torch.from_numpy(np.concatenate(batched_points_list, axis=0))
batched_rand_points = torch.from_numpy(ret['rand_model_points'])
# torch.from_numpy(np.concatenate(batched_points_list, axis=0))
batched_rand_lengths = torch.from_numpy(
ret['batched_rand_model_point_lengths'])
batched_points = torch.cat(
[batched_points, batched_rand_points], dim=0)
batched_lengths = torch.cat(
[batched_lengths, batched_rand_lengths], dim=0)
batched_features = torch.cat(
[batched_features, batched_features], dim=0)
# Starting radius of convolutions
r_normal = config.first_subsampling_dl * config.conv_radius
# Starting layer
layer_blocks = []
layer = 0
# Lists of inputs
input_points = []
input_neighbors = []
input_pools = []
input_upsamples = []
input_batches_len = []
timer = Timer()
for block_i, block in enumerate(config.architecture):
# timer.tic()
# Stop when meeting a global pooling or upsampling
if 'global' in block or 'upsample' in block:
break
# Get all blocks of the layer
if not ('pool' in block or 'strided' in block):
layer_blocks += [block]
if block_i < len(config.architecture) - 1 and not ('upsample' in config.architecture[block_i + 1]):
continue
# Convolution neighbors indices
# *****************************
if layer_blocks:
# Convolutions are done in this layer, compute the neighbors with the good radius
if np.any(['deformable' in blck for blck in layer_blocks[:-1]]):
r = r_normal * config.deform_radius / config.conv_radius
else:
r = r_normal
conv_i = batch_neighbors_kpconv(
batched_points, batched_points, batched_lengths, batched_lengths, r, neighborhood_limits[layer])
else:
# This layer only perform pooling, no neighbors required
conv_i = torch.zeros((0, 1), dtype=torch.int64)
# Pooling neighbors indices
# *************************
# If end of layer is a pooling operation
if 'pool' in block or 'strided' in block:
# New subsampling length
dl = 2 * r_normal / config.conv_radius
# Subsampled points
pool_p, pool_b = batch_grid_subsampling_kpconv(
batched_points, batched_lengths, sampleDl=dl)
# Radius of pooled neighbors
if 'deformable' in block:
r = r_normal * config.deform_radius / config.conv_radius
else:
r = r_normal
# Subsample indices
pool_i = batch_neighbors_kpconv(
pool_p, batched_points, pool_b, batched_lengths, r, neighborhood_limits[layer])
# Upsample indices (with the radius of the next layer to keep wanted density)
up_i = batch_neighbors_kpconv(
batched_points, pool_p, batched_lengths, pool_b, 2 * r, neighborhood_limits[layer])
else:
# No pooling in the end of this layer, no pooling indices required
pool_i = torch.zeros((0, 1), dtype=torch.int64)
pool_p = torch.zeros((0, 3), dtype=torch.float32)
pool_b = torch.zeros((0,), dtype=torch.int64)
up_i = torch.zeros((0, 1), dtype=torch.int64)
# Updating input lists
input_points += [batched_points.float()]
input_neighbors += [conv_i.long()]
input_pools += [pool_i.long()]
input_upsamples += [up_i.long()]
input_batches_len += [batched_lengths]
# New points for next layer
batched_points = pool_p
batched_lengths = pool_b
# Update radius and reset blocks
r_normal *= 2
layer += 1
layer_blocks = []
# timer.toc()
###############
# Return inputs
###############
dict_inputs = {
"idx": ret["idx"],
'model_points': input_points,
'visibility': torch.from_numpy(ret['visibility']),
'neighbors': input_neighbors,
'pools': input_pools,
'upsamples': input_upsamples,
'model_point_features': batched_features.float(),
'stack_lengths': input_batches_len,
'image': torch.from_numpy(ret['image']),
'depth': torch.from_numpy(ret['depth']),
"ren_mask": torch.from_numpy(ret['ren_mask']),
'K': torch.from_numpy(ret['K']),
'RT': torch.from_numpy(ret['RT']),
'original_RT': torch.from_numpy(ret['original_RT']),
'rendered_RT': torch.from_numpy(ret['rendered_RT']) if ret.get('rendered_RT', None) is not None else None ,
'POSECNN_RT': torch.from_numpy(ret.get('POSECNN_RT', np.zeros_like(ret['RT']) ) ),
# TODO
'rand_RT': torch.from_numpy(ret.get('rand_RT', np.zeros_like(ret['RT']))),
# "lifted_points": torch.from_numpy(ret['lifted_points']),
"lifted_points": [torch.from_numpy(d) for d in ret['lifted_points'] ] ,
# 'depth_coords2d': torch.from_numpy(ret['depth_coords2d']),
'depth_coords2d': [torch.from_numpy(d) for d in ret['depth_coords2d']],
"correspondences_2d3d": torch.from_numpy(ret['correspondences_2d3d']),
"original_model_points": torch.from_numpy(ret['original_model_points']),
"class_name": ret['class_name'],
}
return dict_inputs
def calibrate_neighbors(dataset, config, collate_fn, keep_ratio=0.8, samples_threshold=2000):
timer = Timer()
last_display = timer.total_time
# From config parameter, compute higher bound of neighbors number in a neighborhood
hist_n = int(np.ceil(4 / 3 * np.pi * (config.deform_radius + 1) ** 3))
neighb_hists = np.zeros((config.num_layers, hist_n), dtype=np.int32)
# Get histogram of neighborhood sizes i in 1 epoch max.
for i in range(len(dataset)):
timer.tic()
batched_input = collate_fn(
[dataset[i]], config, neighborhood_limits=[hist_n] * 5)
# update histogram
counts = [torch.sum(neighb_mat < neighb_mat.shape[0], dim=1).numpy()
for neighb_mat in batched_input['neighbors']]
hists = [np.bincount(c, minlength=hist_n)[:hist_n] for c in counts]
neighb_hists += np.vstack(hists)
timer.toc()
if timer.total_time - last_display > 0.1:
last_display = timer.total_time
print(f"Calib Neighbors {i:08d}: timings {timer.total_time:4.2f}s")
if np.min(np.sum(neighb_hists, axis=1)) > samples_threshold:
break
cumsum = np.cumsum(neighb_hists.T, axis=0)
percentiles = np.sum(cumsum < (keep_ratio * cumsum[hist_n - 1, :]), axis=0)
neighborhood_limits = percentiles
print('\n')
return neighborhood_limits
def get_dataloader(dataset, kpconv_config, batch_size=1, num_workers=4, shuffle=True, sampler=None, neighborhood_limits=None):
if neighborhood_limits is None:
# neighborhood_limits = calibrate_neighbors(dataset, dataset.config, collate_fn=collate_fn_descriptor)
neighborhood_limits = calibrate_neighbors(
dataset, kpconv_config, collate_fn=collate_fn_descriptor)
print("neighborhood:", neighborhood_limits)
dataloader = torch.utils.data.DataLoader(
dataset,
batch_size=batch_size,
shuffle=shuffle,
num_workers=num_workers,
# https://discuss.pytorch.org/t/supplying-arguments-to-collate-fn/25754/4
collate_fn=partial(collate_fn_descriptor, config=kpconv_config,
neighborhood_limits=neighborhood_limits),
sampler=sampler,
drop_last=False
)
return dataloader, neighborhood_limits
def get_dataloader_deepim(dataset, kpconv_config, batch_size=1, num_workers=4, shuffle=True, sampler=None, neighborhood_limits=None):
if neighborhood_limits is None:
neighborhood_limits = calibrate_neighbors(
dataset, kpconv_config, collate_fn=collate_fn_descriptor_deepim)
print("Neighborhood:", neighborhood_limits)
dataloader = torch.utils.data.DataLoader(
dataset,
batch_size=batch_size,
shuffle=shuffle,
num_workers=num_workers,
# https://discuss.pytorch.org/t/supplying-arguments-to-collate-fn/25754/4
collate_fn=partial(collate_fn_descriptor_deepim, config=kpconv_config,
neighborhood_limits=neighborhood_limits),
sampler=sampler,
drop_last=False
)
return dataloader, neighborhood_limits
if __name__ == '__main__':
pass
================================================
FILE: data/transforms.py
================================================
import numpy as np
import random
import torch
import torchvision
from torchvision.transforms import functional as F
import cv2
from PIL import Image
class Compose(object):
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, img, kpts=None, mask=None):
for t in self.transforms:
img, kpts, mask = t(img, kpts, mask)
return img, kpts, mask
def __repr__(self):
format_string = self.__class__.__name__ + "("
for t in self.transforms:
format_string += "\n"
format_string += " {0}".format(t)
format_string += "\n)"
return format_string
class ToTensor(object):
def __call__(self, img, kpts, mask):
return np.asarray(img).astype(np.float32) / 255., kpts, mask
class Normalize(object):
def __init__(self, mean, std, to_bgr=True):
self.mean = mean
self.std = std
self.to_bgr = to_bgr
def __call__(self, img, kpts, mask):
img -= self.mean
img /= self.std
if self.to_bgr:
img = img.transpose(2, 0, 1).astype(np.float32)
return img, kpts, mask
class ColorJitter(object):
def __init__(self,
brightness=None,
contrast=None,
saturation=None,
hue=None,
):
self.color_jitter = torchvision.transforms.ColorJitter(
brightness=brightness,
contrast=contrast,
saturation=saturation,
hue=hue,)
def __call__(self, image, kpts, mask):
image = np.asarray(self.color_jitter(Image.fromarray(np.ascontiguousarray(image, np.uint8))))
return image, kpts, mask
class RandomBlur(object):
def __init__(self, prob=0.5):
self.prob = prob
def __call__(self, image, kpts, mask):
if random.random() < self.prob:
sigma = np.random.choice([3, 5, 7, 9])
image = cv2.GaussianBlur(image, (sigma, sigma), 0)
return image, kpts, mask
def make_transforms(cfg, is_train):
if is_train is True:
transform = Compose(
[
RandomBlur(0.5),
ColorJitter(0.1, 0.1, 0.05, 0.05),
# ToTensor(),
# Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
]
)
else:
transform = Compose(
[
# ToTensor(),
# Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
]
)
return transform
================================================
FILE: data/ycb/basic.py
================================================
import mmcv
bop_ycb_idx2class={
1: '002_master_chef_can',
2: '003_cracker_box',
3: '004_sugar_box',
4: '005_tomato_soup_can',
5: '006_mustard_bottle',
6: '007_tuna_fish_can',
7: '008_pudding_box',
8: '009_gelatin_box',
9: '010_potted_meat_can',
10: '011_banana',
11: '019_pitcher_base',
12: '021_bleach_cleanser',
13: '024_bowl',
14: '025_mug',
15: '035_power_drill',
16: '036_wood_block',
17: '037_scissors',
18: '040_large_marker',
19: '051_large_clamp',
20: '052_extra_large_clamp',
21: '061_foam_brick',
}
bop_ycb_class2idx = dict([[bop_ycb_idx2class[k],k ] for k in bop_ycb_idx2class.keys() ])
================================================
FILE: doc/prepare_data.md
================================================
# Data Preparation Tips
All the related data for data preparation can be downloaded [here](https://mycuhk-my.sharepoint.com/:f:/g/personal/1155139432_link_cuhk_edu_hk/EoXnZ96Tuy9PpYlZCvDN8vUBPdP1lP-PWQWiZH2KtIQoaQ?e=lpE472). You could download them first and then follow the instructions below for data preparation.
## Download Datasets
First, the following dataset need to be downloaded and extracted to the folder *EXPDATA/*
[LINEMOD](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155139432_link_cuhk_edu_hk/EYFaYrk0kcdBgC6WMtLJqP0B9Ar0_Nff9qhI2Cs95qDbdA?e=yYxexC)
[LINEMOD_OCC_TEST](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155139432_link_cuhk_edu_hk/EUKcRnwyy9RGu2ASwA3QDXsBnMRrFP-U4X4Eqq-g_MhmIQ?e=hv6H2s)
## Synthetic Data Generation
The preprocessed data following [DeepIM](https://github.com/liyi14/mx-DeepIM) and [PVNet](https://github.com/zju3dv/pvnet-rendering) can be downloaded from [LM6d_converted](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155139432_link_cuhk_edu_hk/EYFaYrk0kcdBgC6WMtLJqP0B9Ar0_Nff9qhI2Cs95qDbdA) and [raw_data](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155139432_link_cuhk_edu_hk/ESSFXi_7qs1AgNmty7_9y4AB8ffFsGJWOC3ikgD5BIeXHQ?e=qOmvds).
After downloading, you should put the downloaded files into the folder *EXPDATA/* (lying in the repository's root directory).
To create occluded objects during training, we follow [PVNet](https://github.com/zju3dv/pvnet-rendering) to randomly create occlusions.
You could run the following scripts to transform the data format for our dataloader.
```
bash scripts/run_dataformatter.sh
```
The command above will automatically save the formatted data into *EXPDATA/*.
## Download the Object CAD Models
You also need to download the [object models](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155139432_link_cuhk_edu_hk/EQScZuLrkPNPmN4eO3kePaUBjOe92EvbKb7kGJk2vKz-bA?e=8McAdh) and put the extracted folder *models* into *./EXPDATA/LM6d_converted/.
## Download Background Images
[Pascal VOC 2012](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar) need to be downloaded to folder *EXPDATA/*. These images will be necessary for the random background generation for training.
## Download Initial Poses
The initial poses estimated by PoseCNN and PVNet can be downloaded from [here](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155139432_link_cuhk_edu_hk/EQh5y0M_zHVMnbVszjEviCUBNAX_22MFN26Msa48XlJ5MQ?e=rfhT7k).
The initial pose folder also should be put into the folder *EXPDATA/*
## Generate the Information Files
Run the following script to generate the info files, which is put into the folder *EXPDATA/data_info/*
```
bash scripts/run_datainfo_generation.sh
```
After the the data preparation, the expected directory structure should be
```
./EXPDATA
|──LM6d_converted
| |──LM6d_refine
| |──LM6d_refine_syn
| └──models
|──LINEMOD
| └──fuse_formatted
|──lmo
|──VOCdevkit
|──raw_data
|──init_poses
└──data_info
```
================================================
FILE: docker/Dockerfile
================================================
FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04
RUN apt-key del 7fa2af80
RUN apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
RUN rm /etc/apt/sources.list.d/cuda.list
RUN rm /etc/apt/sources.list.d/nvidia-ml.list
# Dependencies for glvnd and X11.
RUN apt-get update
RUN apt-get install -y -qq --no-install-recommends \
libglvnd0 \
libgl1 \
libglx0 \
libegl1 \
libxext6 \
libx11-6 \
&& rm -rf /var/lib/apt/lists/*
# Env vars for the nvidia-container-runtime.
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES graphics,utility,compute
#env vars for cuda
ENV CUDA_HOME /usr/local/cuda
#install miniconda
RUN apt-get update --fix-missing && \
apt-get install -y wget bzip2 ca-certificates curl git && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
RUN wget --quiet https://mirrors.tuna.tsinghua.edu.cn/anaconda/miniconda/Miniconda3-py37_4.9.2-Linux-x86_64.sh -O ~/miniconda.sh && \
/bin/bash ~/miniconda.sh -b -p /opt/miniconda3 && \
rm ~/miniconda.sh && \
/opt/miniconda3/bin/conda clean -tipsy && \
ln -s /opt/miniconda3/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
echo ". /opt/miniconda3/etc/profile.d/conda.sh" >> ~/.bashrc && \
echo "conda activate base" >> ~/.bashrc && \
echo "conda deactivate && conda activate py37" >> ~/.bashrc
#https://blog.csdn.net/Mao_Jonah/article/details/89502380
COPY freeze.yml freeze.yml
RUN /opt/miniconda3/bin/conda env create -n py37 -f freeze.yml
# WORKDIR /tmp/
# COPY config.jupyter.tar config.jupyter.tar
# RUN tar -xvf config.jupyter.tar -C /root/
#install apex
ENV TORCH_CUDA_ARCH_LIST "6.0 6.2 7.0 7.2"
# make sure we don't overwrite some existing directory called "apex"
WORKDIR /tmp/unique_for_apex
# uninstall Apex if present, twice to make absolutely sure :)
RUN /opt/miniconda3/envs/py37/bin/pip3 uninstall -y apex || :
RUN /opt/miniconda3/envs/py37/bin/pip3 uninstall -y apex || :
# SHA is something the user can touch to force recreation of this Docker layer,
# and therefore force cloning of the latest version of Apex
RUN SHA=ToUcHMe git clone https://github.com/NVIDIA/apex.git
WORKDIR /tmp/unique_for_apex/apex
RUN /opt/miniconda3/envs/py37/bin/pip3 install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .
#install pytorch3d
# RUN /opt/miniconda3/envs/py37/bin/pip install pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py37_cu102_pyt171/download.html
# RUN /opt/miniconda3/envs/py37/bin/pip install "git+https://github.com/facebookresearch/pytorch3d.git"
# RUN /opt/miniconda3/bin/conda install pytorch3d==0.5.0 -c pytorch3d -n py37
#other pkgs
RUN apt-get update \
&& apt-get install -y -qq --no-install-recommends \
cmake build-essential vim xvfb unzip tmux psmisc \
libx11-dev libassimp-dev \
mesa-common-dev freeglut3-dev \
rsync \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
#create some directories
RUN mkdir -p /home/RNNPose
EXPOSE 8887 8888 8889 10000 10001 10002
WORKDIR /home/RNNPose
================================================
FILE: docker/freeze.yml
================================================
name: py37_tmp
channels:
- pytorch
- pytorch3d
- open3d-admin
- bottler
- iopath
- fvcore
- conda-forge
- defaults
dependencies:
- pytorch3d=0.5.0
- _libgcc_mutex=0.1=main
- _openmp_mutex=4.5=1_gnu
- anyio=2.2.0=py37h06a4308_1
- argon2-cffi=20.1.0=py37h27cfd23_1
- async_generator=1.10=py37h28b3542_0
- attrs=21.2.0=pyhd3eb1b0_0
- babel=2.9.1=pyhd3eb1b0_0
- backcall=0.2.0=pyhd3eb1b0_0
- blas=1.0=mkl
- bleach=3.3.0=pyhd3eb1b0_0
- brotlipy=0.7.0=py37h27cfd23_1003
- ca-certificates=2021.5.30=ha878542_0
- certifi=2021.5.30=py37h89c1867_0
- cffi=1.14.5=py37h261ae71_0
- charset-normalizer=2.0.4=pyhd3eb1b0_0
- cryptography=3.4.7=py37hd23ed53_0
- cudatoolkit=10.2.89=h8f6ccaa_8
- cycler=0.10.0=py37_0
- dbus=1.13.18=hb2f20db_0
- defusedxml=0.7.1=pyhd3eb1b0_0
- entrypoints=0.3=py37_0
- expat=2.4.1=h2531618_2
- fontconfig=2.13.1=h6c09931_0
- freetype=2.10.4=h5ab3b9f_0
- fvcore=0.1.5.post20210825=py37
- glib=2.68.2=h36276a3_0
- gst-plugins-base=1.14.0=h8213a91_2
- gstreamer=1.14.0=h28cd5cc_2
- icu=58.2=he6710b0_3
- idna=3.2=pyhd3eb1b0_0
- importlib-metadata=3.10.0=py37h06a4308_0
- importlib_metadata=3.10.0=hd3eb1b0_0
- intel-openmp=2021.2.0=h06a4308_610
- iopath=0.1.9=py37
- ipykernel=5.3.4=py37h5ca1d4c_0
- ipython=7.22.0=py37hb070fc8_0
- ipython_genutils=0.2.0=pyhd3eb1b0_1
- ipywidgets=7.6.3=pyhd3eb1b0_1
- jedi=0.17.0=py37_0
- jinja2=3.0.0=pyhd3eb1b0_0
- joblib=1.0.1=pyhd3eb1b0_0
- jpeg=9b=h024ee3a_2
- json5=0.9.6=pyhd3eb1b0_0
- jsonschema=3.2.0=py_2
- jupyter=1.0.0=py37h89c1867_6
- jupyter_client=6.1.12=pyhd3eb1b0_0
- jupyter_console=6.4.0=pyhd8ed1ab_0
- jupyter_core=4.7.1=py37h06a4308_0
- jupyter_server=1.4.1=py37h06a4308_0
- jupyterlab=3.0.16=pyhd8ed1ab_0
- jupyterlab_pygments=0.1.2=py_0
- jupyterlab_server=2.7.1=pyhd3eb1b0_0
- jupyterlab_widgets=1.0.0=pyhd3eb1b0_1
- kiwisolver=1.3.1=py37h2531618_0
- kornia=0.5.3=pyhd8ed1ab_0
- lcms2=2.12=h3be6417_0
- ld_impl_linux-64=2.35.1=h7274673_9
- libffi=3.3=he6710b0_2
- libgcc-ng=9.3.0=h5101ec6_17
- libgfortran-ng=7.5.0=ha8ba4b0_17
- libgfortran4=7.5.0=ha8ba4b0_17
- libgomp=9.3.0=h5101ec6_17
- libpng=1.6.37=hbc83047_0
- libsodium=1.0.18=h7b6447c_0
- libstdcxx-ng=9.3.0=hd4cf53a_17
- libtiff=4.2.0=h85742a9_0
- libuuid=1.0.3=h1bed415_2
- libuv=1.40.0=h7b6447c_0
- libwebp-base=1.2.0=h27cfd23_0
- libxcb=1.14=h7b6447c_0
- libxml2=2.9.10=hb55368b_3
- lz4-c=1.9.3=h2531618_0
- markupsafe=2.0.1=py37h27cfd23_0
- matplotlib=3.3.4=py37h06a4308_0
- matplotlib-base=3.3.4=py37h62a2d02_0
- mistune=0.8.4=py37h14c3975_1001
- mkl=2021.2.0=h06a4308_296
- mkl-service=2.3.0=py37h27cfd23_1
- mkl_fft=1.3.0=py37h42c9631_2
- mkl_random=1.2.1=py37ha9443f7_2
- nbclassic=0.2.6=pyhd3eb1b0_0
- nbclient=0.5.3=pyhd3eb1b0_0
- nbconvert=6.0.7=py37_0
- nbformat=5.1.3=pyhd3eb1b0_0
- ncurses=6.2=he6710b0_1
- nest-asyncio=1.5.1=pyhd3eb1b0_0
- ninja=1.10.2=hff7bd54_1
- notebook=6.4.0=py37h06a4308_0
- numpy=1.20.2=py37h2d18471_0
- numpy-base=1.20.2=py37hfae3a4d_0
- nvidiacub=1.10.0=0
- olefile=0.46=py37_0
- open3d=0.13.0=py37_0
- openssl=1.1.1k=h7f98852_0
- packaging=20.9=pyhd3eb1b0_0
- pandas=1.2.4=py37h2531618_0
- pandoc=2.12=h06a4308_0
- pandocfilters=1.4.3=py37h06a4308_1
- parso=0.8.2=pyhd3eb1b0_0
- pcre=8.44=he6710b0_0
- pexpect=4.8.0=pyhd3eb1b0_3
- pickleshare=0.7.5=pyhd3eb1b0_1003
- pillow=8.2.0=py37he98fc37_0
- pip=21.1.2=py37h06a4308_0
- plyfile=0.7.4=pyhd8ed1ab_0
- portalocker=2.3.0=py37h06a4308_0
- prometheus_client=0.11.0=pyhd3eb1b0_0
- prompt-toolkit=3.0.17=pyh06a4308_0
- prompt_toolkit=3.0.17=hd3eb1b0_0
- ptyprocess=0.7.0=pyhd3eb1b0_2
- pycparser=2.20=py_2
- pygments=2.9.0=pyhd3eb1b0_0
- pyopenssl=20.0.1=pyhd3eb1b0_1
- pyparsing=2.4.7=pyhd3eb1b0_0
- pyqt=5.9.2=py37h05f1152_2
- pyrsistent=0.17.3=py37h7b6447c_0
- pysocks=1.7.1=py37_1
- python=3.7.10=h12debd9_4
- python-dateutil=2.8.1=pyhd3eb1b0_0
- python_abi=3.7=1_cp37m
- pytorch=1.7.1=py3.7_cuda10.2.89_cudnn7.6.5_0
- pytz=2021.1=pyhd3eb1b0_0
- pyzmq=20.0.0=py37h2531618_1
- qt=5.9.7=h5867ecd_1
- qtconsole=5.1.1=pyhd8ed1ab_0
- qtpy=1.10.0=pyhd8ed1ab_0
- readline=8.1=h27cfd23_0
- requests=2.26.0=pyhd3eb1b0_0
- scikit-learn=0.24.2=py37ha9443f7_0
- scipy=1.6.2=py37had2a1c9_1
- send2trash=1.5.0=pyhd3eb1b0_1
- setuptools=52.0.0=py37h06a4308_0
- sip=4.19.8=py37hf484d3e_0
- six=1.16.0=pyhd3eb1b0_0
- sniffio=1.2.0=py37h06a4308_1
- sqlite=3.35.4=hdfb4753_0
- tabulate=0.8.9=py37h06a4308_0
- terminado=0.9.4=py37h06a4308_0
- testpath=0.4.4=pyhd3eb1b0_0
- threadpoolctl=2.1.0=pyh5ca1d4c_0
- tk=8.6.10=hbc83047_0
- torchvision=0.8.2=py37_cu102
- tornado=6.1=py37h27cfd23_0
- traitlets=5.0.5=pyhd3eb1b0_0
- typing_extensions=3.7.4.3=pyha847dfd_0
- wcwidth=0.2.5=py_0
- webencodings=0.5.1=py37_1
- wheel=0.36.2=pyhd3eb1b0_0
- widgetsnbextension=3.5.1=py37_0
- xz=5.2.5=h7b6447c_0
- yacs=0.1.6=py_0
- yaml=0.2.5=h7b6447c_0
- zeromq=4.3.4=h2531618_0
- zipp=3.4.1=pyhd3eb1b0_0
- zlib=1.2.11=h7b6447c_3
- zstd=1.4.9=haebb681_0
- pip:
- absl-py==0.13.0
- addict==2.4.0
- anykeystore==0.2
- cachetools==4.2.2
- cryptacular==1.5.5
- cython==0.29.24
- decorator==4.4.2
- easydict==1.9
- einops==0.3.0
- fire==0.4.0
- flow-vis==0.1
- freetype-py==2.2.0
- future==0.18.2
- glumpy==1.2.0
- google-auth==1.31.0
- google-auth-oauthlib==0.4.4
- greenlet==1.1.0
- grpcio==1.38.0
- hupper==1.10.3
- imageio==2.9.0
- llvmlite==0.36.0
- loguru==0.5.3
- markdown==3.3.4
- networkx==2.5.1
- numba==0.53.1
- numpy-quaternion==2021.6.9.13.34.11
- oauthlib==3.1.1
- opencv-python==4.5.2.54
- pastedeploy==2.1.1
- pbkdf2==1.3
- plaster==1.0
- plaster-pastedeploy==0.7
- protobuf==3.17.3
- pyasn1==0.4.8
- pyasn1-modules==0.2.8
- pyassimp==4.1.3
- pyglet==1.5.17
- pyopengl==3.1.5
- pyopengl-accelerate==3.1.5
- pyramid==2.0
- pyramid-mailer==0.15.1
- python3-openid==3.2.0
- pywavelets==1.1.1
- pyyaml==5.4.1
- repoze-sendmail==4.4.1
- requests-oauthlib==1.3.0
- rsa==4.7.2
- scikit-image==0.18.1
- sqlalchemy==1.4.18
- tensorboard==2.5.0
- tensorboard-data-server==0.6.1
- tensorboard-plugin-wit==1.8.0
- tensorboardx==2.2
- termcolor==1.1.0
- tifffile==2021.6.14
- tqdm==4.61.1
- transaction==3.0.1
- transforms3d==0.3.1
- translationstring==1.4
- triangle==20200424
- urllib3==1.26.5
- velruse==1.1.1
- venusian==3.0.0
- vispy==0.6.6
- webob==1.8.7
- werkzeug==2.0.1
- zope-deprecation==4.4.0
- zope-interface==5.4.0
- mmcv
prefix: /opt/miniconda3/envs/py37_tmp
================================================
FILE: geometry/__init__.py
================================================
================================================
FILE: geometry/cholesky.py
================================================
# import tensorflow as tf
import torch #as tf
import numpy as np
# from utils.einsum import einsum
from torch import einsum
class _cholesky_solve(torch.autograd.Function):
@staticmethod
def forward(ctx, H, b):
chol = torch.cholesky(H)
xx = torch.cholesky_solve(b, chol)
ctx.save_for_backward(chol, xx)
return xx
# see OptNet: https://arxiv.org/pdf/1703.00443.pdf
@staticmethod
def backward(ctx, dx):
chol, xx = ctx.saved_tensors
dz = torch.cholesky_solve(dx, chol)
xs = torch.squeeze(xx, -1)
zs = torch.squeeze(dz, -1)
dH = -einsum('...i,...j->...ij', xs, zs)
return dH, dz
def cholesky_solve(H, b):
return _cholesky_solve.apply(H,b)
def solve(H, b, max_update=1.0):
""" Solves the linear system Hx = b, H > 0"""
# small system, solve on cpu
H = H.to(dtype=torch.float64)
b = b.to(dtype=torch.float64)
b = torch.unsqueeze(b, -1)
x = cholesky_solve(H, b)
# replaces nans and clip large updates
bad_values = torch.isnan(x)
x = torch.where(bad_values, torch.zeros_like(x), x)
x = torch.clamp(x, -max_update, max_update)
x = torch.squeeze(x, -1)
x = x.to(dtype=torch.float32)
return x
def __test__():
import numpy as np
np.random.seed(0)
M=np.random.uniform(size=(3,3))
H=torch.tensor(M@M.transpose(-1,-2), requires_grad=True )
b=torch.tensor(np.random.uniform(size=(3,) ), requires_grad=True )
x= solve(H,b )
x.backward(torch.ones_like(x) )
print(f"H={H}, b={b}, x={x}, grad={H.grad, b.grad}")
if __name__=="__main__":
__test__()
================================================
FILE: geometry/diff_render.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy
from pytorch3d.renderer import (
OpenGLPerspectiveCameras, look_at_view_transform, look_at_rotation,
RasterizationSettings, MeshRenderer, MeshRasterizer, BlendParams,
camera_position_from_spherical_angles, HardPhongShader, PointLights,FoVPerspectiveCameras, PerspectiveCameras, SoftPhongShader, Materials
)
try:
from pytorch3d.structures import Meshes, Textures
use_textures = True
except:
from pytorch3d.structures import Meshes
from pytorch3d.renderer import TexturesVertex
from pytorch3d.renderer import TexturesVertex as Textures
use_textures = False
import pytorch3d.renderer.mesh.utils as utils
from pytorch3d.io import load_obj, load_ply, load_objs_as_meshes
from pytorch3d.renderer.mesh.rasterizer import Fragments
from plyfile import PlyData
from utils.furthest_point_sample import fragmentation_fps
import time
def rasterize(R, T, meshes, rasterizer, blur_radius=0):
# It will automatically update the camera settings -> R, T in rasterizer.camera
fragments = rasterizer(meshes, R=R, T=T)
# Copy from pytorch3D source code, try if it is necessary to do gradient decent
if blur_radius > 0.0:
clipped_bary_coords = utils._clip_barycentric_coordinates(
fragments.bary_coords
)
clipped_zbuf = utils._interpolate_zbuf(
fragments.pix_to_face, clipped_bary_coords, meshes
)
fragments = Fragments(
bary_coords=clipped_bary_coords,
zbuf=clipped_zbuf,
dists=fragments.dists,
pix_to_face=fragments.pix_to_face,
)
return fragments
def set_bary_coords_to_nearest(bary_coords_):
ori_shape = bary_coords_.shape
exr = bary_coords_ * (bary_coords_ < 0)
bary_coords_ = bary_coords_.view(-1, bary_coords_.shape[-1])
arg_max_idx = bary_coords_.argmax(1)
return torch.zeros_like(bary_coords_).scatter(1, arg_max_idx.unsqueeze(1), 1.0).view(*ori_shape) + exr
class MeshRendererWithDepth(nn.Module):
def __init__(self, rasterizer, shader):
super().__init__()
self.rasterizer = rasterizer
self.shader = shader
def to(self, device):
# Rasterizer and shader have submodules which are not of type nn.Module
self.rasterizer.to(device)
self.shader.to(device)
return self
def forward(self, meshes_world, **kwargs) -> torch.Tensor:
fragments = self.rasterizer(meshes_world, **kwargs)
images = self.shader(fragments, meshes_world, **kwargs)
return images, fragments.zbuf
class DiffRender(nn.Module):
def __init__(self, mesh_path, render_texture=False):
super().__init__()
# self.mesh = mesh
if mesh_path.endswith('.ply'):
verts, faces = load_ply(mesh_path)
self.mesh = Meshes(verts=[verts], faces=[faces])
elif mesh_path.endswith('.obj'):
verts, faces,_ = load_obj(mesh_path)
faces=faces.verts_idx
self.mesh=load_objs_as_meshes([mesh_path])
self.verts = verts
self.faces = faces
self.cam_opencv2pytch3d = torch.tensor(
[[-1,0,0,0],
[0,-1,0, 0],
[0,0, 1, 0],
[0,0, 0, 1]], dtype=torch.float32
)
self.render_texture = render_texture
#get patch infos
self.pat_centers, self.pat_center_inds, self.vert_frag_ids= fragmentation_fps(verts.detach().cpu().numpy(), 64)
self.pat_centers = torch.from_numpy(self.pat_centers)
self.pat_center_inds = torch.from_numpy(self.pat_center_inds)
self.vert_frag_ids = torch.from_numpy(self.vert_frag_ids)[...,None] #Nx1
def to(self, *args, **kwargs):
if 'device' in kwargs.keys():
device = kwargs['device']
else:
device = args[0]
super().to(device)
self.mesh = self.mesh.to(device)
self.verts = self.verts.to(device)
self.faces = self.faces.to(device)
self.pat_centers = self.pat_centers.to(device)
self.pat_center_inds = self.pat_center_inds.to(device)
self.vert_frag_ids = self.vert_frag_ids.to(device)
return self
def get_patch_center_depths(self, T, K):
#no need to pre-transform, as here we do not use pytorch3d rendering
# T = self.cam_opencv2pytch3d.to(device=T.device)@T
## X_cam = X_world R + t
R = T[...,:3,:3].transpose(-1,-2)
t = T[...,:3,3]
#render depths
X_cam= (self.pat_centers@R+t) #BxKx3
depth= X_cam[...,2:] #BxKx1
x=X_cam@K.transpose(-1,-2) #BxNx3
x = x/x[...,-1:]
img_coords= x[...,:2]
return depth, img_coords
# Calculate interpolated maps -> [n, c, h, w]
# face_memory.shape: [n_face, 3, c]
@staticmethod
def forward_interpolate(R, t, meshes, face_memory, rasterizer, blur_radius=0, mode='bilinear', return_depth=True):
fragments = rasterize(R, t, meshes, rasterizer, blur_radius=blur_radius)
# [n, h, w, 1, d]
if mode == 'nearest':
out_map = utils.interpolate_face_attributes(fragments.pix_to_face, set_bary_coords_to_nearest(fragments.bary_coords), face_memory)
else:
out_map = utils.interpolate_face_attributes(fragments.pix_to_face, fragments.bary_coords, face_memory)
out_map = out_map.squeeze(dim=3)
out_map = out_map.transpose(3, 2).transpose(2, 1)
if return_depth:
return out_map, fragments.zbuf.permute(0,3,1,2) # depth
else:
return out_map
def render_mesh(self, T, K, render_image_size, near=0.1, far=6, lights=(1,1,-1) ):
B=T.shape[0]
device = T.device
T = self.cam_opencv2pytch3d.to(device=T.device)@T
## X_cam = X_world R + t
R = T[...,:3,:3].transpose(-1,-2)
t = T[...,:3,3]
cameras = PerspectiveCameras(focal_length= torch.stack([K[:,0,0], K[:,1,1] ], dim=-1),
principal_point=K[:,:2,2], R=R, T=t, image_size=[render_image_size]*B, in_ndc=False, device=device)
lights = PointLights(device=device, location=[lights])
raster_settings = RasterizationSettings(
image_size=render_image_size,
blur_radius=0.0,
faces_per_pixel=1,
bin_size=None, #0
perspective_correct=True
)
materials = Materials(
device=device,
# specular_color=[[0.0, 1.0, 0.0]],
shininess=0
)
renderer = MeshRendererWithDepth(
rasterizer=MeshRasterizer(
cameras=cameras,
raster_settings=raster_settings
),
shader=SoftPhongShader(
device=device,
cameras=cameras,
lights=lights,
blend_params=BlendParams(1e-4, 1e-4, (0, 0, 0))
)
)
image,depth =renderer(self.mesh, lights=lights, materials=materials)
return image.permute(0,3,1,2)[:,:3], depth.permute(0,3,1,2) # to BCHW
def render_offset_map(self, T, K, render_image_size, near=0.1, far=6):
yy, xx = torch.meshgrid(torch.arange(render_image_size[0], device=T.device), torch.arange(render_image_size[1], device=T.device) )
# xx = xx.to(dtype=torch.float32)
# yy = yy.to(dtype=torch.float32)
coords_grid = torch.stack( [ xx.to(dtype=torch.float32), yy.to(dtype=torch.float32)], dim=-1 )
#no need to pre-transform, as here we do not use pytorch3d rendering
# T = self.cam_opencv2pytch3d.to(device=T.device)@T
## X_cam = X_world R + t
R = T[...,:3,:3].transpose(-1,-2)
t = T[...,:3,3]
#render depths
X_cam= (self.pat_centers@R+t)
x=X_cam@K.transpose(-1,-2) #BxNx3
x = x/x[...,-1:]
offset = x[...,None,None,:2] - coords_grid #BxNx1x1x2-HxWx2
return offset.permute(0,1,4,2,3) #BxNx2xHxW
def forward(self, vert_attribute, T, K, render_image_size, near=0.1, far=6, mode='bilinear') :
"""
Args:
vert_attribute: (N,C)
T: (B,3,4) or (B,4,4)
K: (B,3,3)
render_image_size (tuple): (h,w)
near (float, optional): Defaults to 0.1.
far (int, optional): Defaults to 6.
"""
if vert_attribute is None:
return self.render_mesh(T, K, render_image_size, near=0.1, far=6 )
if self.render_texture:
ren_tex=self.render_mesh(T, K, render_image_size, near=0.1, far=6 )
B=T.shape[0]
face_attribute = vert_attribute[self.faces.long()]
device = T.device
T = self.cam_opencv2pytch3d.to(device=T.device)@T
## X_cam = X_world R + t
R = T[...,:3,:3].transpose(-1,-2)
t = T[...,:3,3]
# t = -(R@T[...,:3,3:]).squeeze(-1)
cameras = PerspectiveCameras(focal_length= torch.stack([K[:,0,0], K[:,1,1] ], dim=-1),
principal_point=K[:,:2,2], image_size=[render_image_size]*B, in_ndc=False, device=device)
raster_settings = RasterizationSettings(
image_size=render_image_size,
blur_radius=0.0,
faces_per_pixel=1,
bin_size=None, #0
perspective_correct=True
)
rasterizer = MeshRasterizer(
cameras=cameras,
raster_settings=raster_settings
)
out_map, out_depth=self.forward_interpolate(R, t, self.mesh, face_attribute, rasterizer, blur_radius=0, mode=mode)
if not self.render_texture:
return out_map, out_depth
else:
return torch.cat([ren_tex[0], out_map ], dim=1), out_depth
def render_depth(self, T, K, render_image_size, near=0.1, far=6, mode='neareast'):
"""
Args:
T: (B,3,4) or (B,4,4)
K: (B,3,3)
render_image_size (tuple): (h,w)
near (float, optional): Defaults to 0.1.
far (int, optional): Defaults to 6.
mode: 'bilinear' or 'neareast'
"""
B=T.shape[0]
device = T.device
T = self.cam_opencv2pytch3d.to(device=T.device)@T
## X_cam = X_world R + t
R = T[...,:3,:3].transpose(-1,-2)
t = T[...,:3,3]
cameras = PerspectiveCameras(focal_length= torch.stack([K[:,0,0], K[:,1,1] ], dim=-1),
principal_point=K[:,:2,2], image_size=[render_image_size]*B, in_ndc=False, device=device)
raster_settings = RasterizationSettings(
image_size=render_image_size,
blur_radius=0.0,
faces_per_pixel=1,
bin_size=0
)
rasterizer = MeshRasterizer(
cameras=cameras,
raster_settings=raster_settings
)
#render depths
vert_depths= (self.verts@R+t).squeeze(0)[...,2:]
face_depths = vert_depths[self.faces.long()]
out_depth=self.forward_interpolate(R, t, self.mesh, face_depths, rasterizer, blur_radius=0, mode='nearest', return_depth=False)
return out_depth
class DiffRendererWrapper(nn.Module):
def __init__(self, obj_paths, device="cuda", render_texture=False ):
super().__init__()
self.renderers = []
for obj_path in obj_paths:
self.renderers.append(
DiffRender(obj_path, render_texture).to(device=device)
)
self.renderers=nn.ModuleList(self.renderers)
self.cls2idx=None #could be updated outside
def get_patch_center_depths(self, model_names, T, K):
depths= []
image_coords= []
for b,_ in enumerate(model_names):
model_idx = self.cls2idx[model_names[b]]
depth, img_coord = self.renderers[model_idx].get_patch_center_depths(T[b:b+1], K )
depths.append(depth)
image_coords.append(img_coord)
return torch.cat(depths, dim=0), torch.cat(image_coords, dim=0)
def render_offset_map(self, model_names, T, K, render_image_size, near=0.1, far=6):
offsets= []
for b,_ in enumerate(model_names):
model_idx = self.cls2idx[model_names[b]]
offset = self.renderers[model_idx].render_offset_map(T[b:b+1], K[b:b+1], render_image_size, near, far )
offsets.append(offset)
return torch.cat(offsets, dim=0)
def render_pat_id(self, model_names, T, K, render_image_size, near=0.1, far=6):
pat_ids= []
for b,_ in enumerate(model_names):
model_idx = self.cls2idx[model_names[b]]
pat_id,_ = self.renderers[model_idx].forward(self.renderers[model_idx].vert_frag_ids.float()+1,T[b:b+1], K[b:b+1], render_image_size, near, far, 'nearest' )
pat_ids.append(pat_id-1) #+1 -1, set invalid parts as -1's
return torch.cat(pat_ids, dim=0)
def render_depth(self, model_names, T, K, render_image_size, near=0.1, far=6):
depth_outputs= []
for b,_ in enumerate(model_names):
model_idx = self.cls2idx[model_names[b]]
depth = self.renderers[model_idx].render_depth( T[b:b+1], K[b:b+1], render_image_size, near, far, 'nearest' )
depth_outputs.append(depth)
return torch.cat(depth_outputs, dim=0)
def forward(self, model_names, vert_attribute, T, K, render_image_size, near=0.1, far=6):
map_outputs= []
depth_outputs= []
for b,_ in enumerate(model_names):
model_idx = self.cls2idx[model_names[b]]
feamap, depth= self.renderers[model_idx]( vert_attribute[b], T[b:b+1], K[b:b+1], render_image_size, near, far )
map_outputs.append(feamap)
depth_outputs.append(depth)
return torch.cat(map_outputs, dim=0) , torch.cat(depth_outputs, dim=0)
================================================
FILE: geometry/diff_render_optim.py
================================================
## Speed optimized: sharing the rasterization among different rendering process
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy
from pytorch3d.renderer import (
OpenGLPerspectiveCameras, look_at_view_transform, look_at_rotation,
RasterizationSettings, MeshRenderer, MeshRasterizer, BlendParams,
camera_position_from_spherical_angles, HardPhongShader, PointLights,FoVPerspectiveCameras, PerspectiveCameras, SoftPhongShader, Materials
)
try:
from pytorch3d.structures import Meshes, Textures
use_textures = True
except:
from pytorch3d.structures import Meshes
from pytorch3d.renderer import TexturesVertex
from pytorch3d.renderer import TexturesVertex as Textures
use_textures = False
import pytorch3d.renderer.mesh.utils as utils
from pytorch3d.io import load_obj, load_ply, load_objs_as_meshes
from pytorch3d.renderer.mesh.rasterizer import Fragments
from plyfile import PlyData
from utils.furthest_point_sample import fragmentation_fps
def rasterize(R, T, meshes, rasterizer, blur_radius=0):
# It will automatically update the camera settings -> R, T in rasterizer.camera
fragments = rasterizer(meshes, R=R, T=T)
# Copy from pytorch3D source code, try if it is necessary to do gradient decent
if blur_radius > 0.0:
clipped_bary_coords = utils._clip_barycentric_coordinates(
fragments.bary_coords
)
clipped_zbuf = utils._interpolate_zbuf(
fragments.pix_to_face, clipped_bary_coords, meshes
)
fragments = Fragments(
bary_coords=clipped_bary_coords,
zbuf=clipped_zbuf,
dists=fragments.dists,
pix_to_face=fragments.pix_to_face,
)
return fragments
def set_bary_coords_to_nearest(bary_coords_):
ori_shape = bary_coords_.shape
exr = bary_coords_ * (bary_coords_ < 0)
bary_coords_ = bary_coords_.view(-1, bary_coords_.shape[-1])
arg_max_idx = bary_coords_.argmax(1)
return torch.zeros_like(bary_coords_).scatter(1, arg_max_idx.unsqueeze(1), 1.0).view(*ori_shape) + exr
class MeshRendererWithDepth(nn.Module):
def __init__(self, rasterizer, shader):
super().__init__()
self.rasterizer = rasterizer
self.shader = shader
def to(self, device):
# Rasterizer and shader have submodules which are not of type nn.Module
self.rasterizer.to(device)
self.shader.to(device)
return self
def forward(self, meshes_world, **kwargs) -> torch.Tensor:
fragments = self.rasterizer(meshes_world, **kwargs)
images = self.shader(fragments, meshes_world, **kwargs)
return images, fragments.zbuf
class MeshRendererWithDepth_v2(nn.Module):
def __init__(self, rasterizer, shader):
super().__init__()
self.rasterizer = rasterizer
self.shader = shader
# def to(self, device):
def to(self, *args, **kwargs):
if 'device' in kwargs.keys():
device = kwargs['device']
else:
device = args[0]
super().to(device)
# Rasterizer and shader have submodules which are not of type nn.Module
self.rasterizer.to(device)
self.shader.to(device)
return self
def forward(self, meshes_world, **kwargs) -> torch.Tensor:
if 'fragments' not in kwargs.keys() or kwargs['fragments'] is None: # sharing fragment results with others for speed, as the rasterizing process occupies most of time
if 'fragments' in kwargs:
del kwargs['fragments']
fragments = self.rasterizer(meshes_world, **kwargs)
else:
fragments = kwargs['fragments']
del kwargs['fragments']
images = self.shader(fragments, meshes_world, **kwargs)
return images, fragments.zbuf
class DiffRender(nn.Module):
def __init__(self, mesh_path, render_texture=False):
super().__init__()
# self.mesh = mesh
if mesh_path.endswith('.ply'):
verts, faces = load_ply(mesh_path)
self.mesh = Meshes(verts=[verts], faces=[faces])
elif mesh_path.endswith('.obj'):
verts, faces,_ = load_obj(mesh_path)
# import pdb; pdb.set_trace()
faces=faces.verts_idx
self.mesh=load_objs_as_meshes([mesh_path])
# self.mesh = Meshes(verts=verts, faces=faces, textures=None)
self.verts = verts
self.faces = faces
# self.mesh = Meshes(verts=[verts], faces=[faces])
# self.feature=feature
self.cam_opencv2pytch3d = torch.tensor(
[[-1,0,0,0],
[0,-1,0, 0],
[0,0, 1, 0],
[0,0, 0, 1]], dtype=torch.float32
)
self.render_texture = render_texture
#get patch infos
self.pat_centers, self.pat_center_inds, self.vert_frag_ids= fragmentation_fps(verts.detach().cpu().numpy(), 64)
self.pat_centers = torch.from_numpy(self.pat_centers)
self.pat_center_inds = torch.from_numpy(self.pat_center_inds)
self.vert_frag_ids = torch.from_numpy(self.vert_frag_ids)[...,None] #Nx1
def to(self, *args, **kwargs):
if 'device' in kwargs.keys():
device = kwargs['device']
else:
device = args[0]
super().to(device)
# self.rasterizer.cameras = self.rasterizer.cameras.to(device)
# self.face_memory = self.face_memory.to(device)
self.mesh = self.mesh.to(device)
self.verts = self.verts.to(device)
self.faces = self.faces.to(device)
self.pat_centers = self.pat_centers.to(device)
self.pat_center_inds = self.pat_center_inds.to(device)
self.vert_frag_ids = self.vert_frag_ids.to(device)
# self.cam_opencv2pytch3d = self.cam_opencv2pytch3d.to(device=device)
return self
def get_patch_center_depths(self, T, K):
#no need to pre-transform, as here we do not use pytorch3d rendering
# T = self.cam_opencv2pytch3d.to(device=T.device)@T
## X_cam = X_world R + t
R = T[...,:3,:3].transpose(-1,-2)
t = T[...,:3,3]
#render depths
X_cam= (self.pat_centers@R+t) #BxKx3
depth= X_cam[...,2:] #BxKx1
x=X_cam@K.transpose(-1,-2) #BxNx3
x = x/x[...,-1:]
img_coords= x[...,:2]
return depth, img_coords
# Calculate interpolated maps -> [n, c, h, w]
# face_memory.shape: [n_face, 3, c]
@staticmethod
def forward_interpolate(R, t, meshes, face_memory, rasterizer, blur_radius=0, mode='bilinear', return_depth=True):
fragments = rasterize(R, t, meshes, rasterizer, blur_radius=blur_radius)
# [n, h, w, 1, d]
if mode == 'nearest':
out_map = utils.interpolate_face_attributes(fragments.pix_to_face, set_bary_coords_to_nearest(fragments.bary_coords), face_memory)
else:
out_map = utils.interpolate_face_attributes(fragments.pix_to_face, fragments.bary_coords, face_memory)
out_map = out_map.squeeze(dim=3)
out_map = out_map.transpose(3, 2).transpose(2, 1)
if return_depth:
return out_map, fragments.zbuf.permute(0,3,1,2), fragments # depth
else:
return out_map, fragments
def render_mesh(self, T, K, render_image_size, near=0.1, far=6, lights=(1,1,-1), fragments=None ):
B=T.shape[0]
# face_attribute = vert_attribute[self.faces.long()]
device = T.device
T = self.cam_opencv2pytch3d.to(device=T.device)@T
## X_cam = X_world R + t
R = T[...,:3,:3].transpose(-1,-2)
t = T[...,:3,3]
cameras = PerspectiveCameras(focal_length= torch.stack([K[:,0,0], K[:,1,1] ], dim=-1),
principal_point=K[:,:2,2], R=R, T=t, image_size=[render_image_size]*B, in_ndc=False, device=device)
lights = PointLights(device=device, location=[lights])
raster_settings = RasterizationSettings(
image_size=render_image_size,
blur_radius=0.0,
faces_per_pixel=1, #5,
bin_size=None, #0
perspective_correct=True
)
materials = Materials(
device=device,
# specular_color=[[0.0, 1.0, 0.0]],
shininess=0
)
# renderer = MeshRendererWithDepth(
renderer = MeshRendererWithDepth_v2(
rasterizer=MeshRasterizer(
cameras=cameras,
raster_settings=raster_settings
),
shader=SoftPhongShader(
# shader=SoftGouraudShader(
device=device,
cameras=cameras,
lights=lights,
blend_params=BlendParams(1e-4, 1e-4, (0, 0, 0))
)
)
image,depth =renderer(self.mesh, lights=lights, materials=materials, fragments=fragments)
return image.permute(0,3,1,2)[:,:3], depth.permute(0,3,1,2) # to BCHW
def render_offset_map(self, T, K, render_image_size, near=0.1, far=6):
yy, xx = torch.meshgrid(torch.arange(render_image_size[0], device=T.device), torch.arange(render_image_size[1], device=T.device) )
# xx = xx.to(dtype=torch.float32)
# yy = yy.to(dtype=torch.float32)
coords_grid = torch.stack( [ xx.to(dtype=torch.float32), yy.to(dtype=torch.float32)], dim=-1 )
#no need to pre-transform, as here we do not use pytorch3d rendering
# T = self.cam_opencv2pytch3d.to(device=T.device)@T
## X_cam = X_world R + t
R = T[...,:3,:3].transpose(-1,-2)
t = T[...,:3,3]
#render depths
X_cam= (self.pat_centers@R+t)#.squeeze(0)[...,2:]
x=X_cam@K.transpose(-1,-2) #BxNx3
x = x/x[...,-1:]
offset = x[...,None,None,:2] - coords_grid #BxNx1x1x2-HxWx2
return offset.permute(0,1,4,2,3) #BxNx2xHxW
# def forward(self, face_attribute, T, K, render_image_size, near=0.1, far=6):
def forward(self, vert_attribute, T, K, render_image_size, near=0.1, far=6, render_texture=None, mode='bilinear') :
"""
Args:
vert_attribute: (N,C)
T: (B,3,4) or (B,4,4)
K: (B,3,3)
render_image_size (tuple): (h,w)
near (float, optional): Defaults to 0.1.
far (int, optional): Defaults to 6.
"""
# use default rendering settings
if render_texture is None:
render_texture= self.render_texture
if vert_attribute is None:
# only render the rgb image
return self.render_mesh(T, K, render_image_size, near=0.1, far=6 )
B=T.shape[0]
face_attribute = vert_attribute[self.faces.long()]
device = T.device
T = self.cam_opencv2pytch3d.to(device=T.device)@T
## X_cam = X_world R + t
R = T[...,:3,:3].transpose(-1,-2)
t = T[...,:3,3]
# t = -(R@T[...,:3,3:]).squeeze(-1)
cameras = PerspectiveCameras(focal_length= torch.stack([K[:,0,0], K[:,1,1] ], dim=-1),
principal_point=K[:,:2,2], image_size=[render_image_size]*B, in_ndc=False, device=device)
raster_settings = RasterizationSettings(
image_size=render_image_size,
blur_radius=0.0,
faces_per_pixel=1,
bin_size=None, #0
perspective_correct=True
)
rasterizer = MeshRasterizer(
cameras=cameras,
raster_settings=raster_settings
)
# forward_interpolate(R, T, meshes, face_memory, rasterizer, blur_radius=0, mode='bilinear')
out_map, out_depth, fragments=self.forward_interpolate(R, t, self.mesh, face_attribute, rasterizer, blur_radius=0, mode=mode)
if not render_texture:
return out_map, out_depth
else:
ren_tex=self.render_mesh(T, K, render_image_size, near=0.1, far=6, fragments=fragments )
#The first 3 channels contain the rendered textures
return torch.cat([ren_tex[0], out_map ], dim=1), out_depth
def render_depth(self, T, K, render_image_size, near=0.1, far=6, mode='neareast'):
"""
Args:
T: (B,3,4) or (B,4,4)
K: (B,3,3)
render_image_size (tuple): (h,w)
near (float, optional): Defaults to 0.1.
far (int, optional): Defaults to 6.
mode: 'bilinear' or 'neareast'
"""
B=T.shape[0]
device = T.device
T = self.cam_opencv2pytch3d.to(device=T.device)@T
## X_cam = X_world R + t
R = T[...,:3,:3].transpose(-1,-2)
t = T[...,:3,3]
cameras = PerspectiveCameras(focal_length= torch.stack([K[:,0,0], K[:,1,1] ], dim=-1),
principal_point=K[:,:2,2], image_size=[render_image_size]*B, in_ndc=False, device=device)
raster_settings = RasterizationSettings(
image_size=render_image_size,
blur_radius=0.0,
faces_per_pixel=1,
bin_size=0
)
rasterizer = MeshRasterizer(
cameras=cameras,
raster_settings=raster_settings
)
#render depths
vert_depths= (self.verts@R+t).squeeze(0)[...,2:]
face_depths = vert_depths[self.faces.long()]
out_depth, _ =self.forward_interpolate(R, t, self.mesh, face_depths, rasterizer, blur_radius=0, mode='nearest', return_depth=False)
return out_depth
def render_pointcloud(self, T, K, render_image_size, near=0.1, far=6):
"""
Args:
T: (B,3,4) or (B,4,4)
K: (B,3,3)
render_image_size (tuple): (h,w)
near (float, optional): Defaults to 0.1.
far (int, optional): Defaults to 6.
mode: 'bilinear' or 'neareast'
"""
B=T.shape[0]
device = T.device
# T = self.cam_opencv2pytch3d.to(device=T.device)@T
## X_cam = X_world R + t
R = T[...,:3,:3].transpose(-1,-2)
t = T[...,:3,3]
#render depths
# vert_depths= (self.verts@R+t).squeeze(0)[...,2:]
X_cam= (self.verts@R+t)#.squeeze(0)
x=X_cam@K.transpose(-1,-2) #BxNx3
depth = x[...,-1]
x = x/x[...,-1:]
out = torch.zeros([1,1, *render_image_size], dtype=R.dtype, device=R.device)
out[:, :,
torch.round(x[0, :, 1]).long().clamp(0, out.shape[2]-1),
torch.round(x[0, :, 0]).long().clamp(0, out.shape[3]-1)] = depth
return out #1x1xHxW
class DiffRendererWrapper(nn.Module):
def __init__(self, obj_paths, device="cuda", render_texture=False ):
super().__init__()
self.renderers = []
for obj_path in obj_paths:
self.renderers.append(
DiffRender(obj_path, render_texture).to(device=device)
)
self.renderers=nn.ModuleList(self.renderers)
self.cls2idx=None #updated outside
def get_patch_center_depths(self, model_names, T, K):
depths= []
image_coords= []
for b,_ in enumerate(model_names):
model_idx = self.cls2idx[model_names[b]]
depth, img_coord = self.renderers[model_idx].get_patch_center_depths(T[b:b+1], K )
depths.append(depth)
image_coords.append(img_coord)
return torch.cat(depths, dim=0), torch.cat(image_coords, dim=0)
def render_offset_map(self, model_names, T, K, render_image_size, near=0.1, far=6):
offsets= []
for b,_ in enumerate(model_names):
model_idx = self.cls2idx[model_names[b]]
offset = self.renderers[model_idx].render_offset_map(T[b:b+1], K[b:b+1], render_image_size, near, far )
offsets.append(offset)
return torch.cat(offsets, dim=0)
def render_pat_id(self, model_names, T, K, render_image_size, near=0.1, far=6):
pat_ids= []
for b,_ in enumerate(model_names):
model_idx = self.cls2idx[model_names[b]]
# face_pat_id = self.renderers[model_idx].vert_frag_ids[self.renderers[model_idx].faces.long()]
pat_id,_ = self.renderers[model_idx].forward(self.renderers[model_idx].vert_frag_ids.float()+1,T[b:b+1], K[b:b+1], render_image_size, near, far, 'nearest' )
pat_ids.append(pat_id-1) #+1 -1, set invalid parts as -1's
return torch.cat(pat_ids, dim=0)
def render_depth(self, model_names, T, K, render_image_size, near=0.1, far=6):
depth_outputs= []
for b,_ in enumerate(model_names):
model_idx = self.cls2idx[model_names[b]]
depth = self.renderers[model_idx].render_depth( T[b:b+1], K[b:b+1], render_image_size, near, far, 'nearest' )
depth_outputs.append(depth)
return torch.cat(depth_outputs, dim=0)
def render_mesh(self, model_names, T, K, render_image_size, near=0.1, far=6):
outputs= []
for b,_ in enumerate(model_names):
model_idx = self.cls2idx[model_names[b]]
img= self.renderers[model_idx].render_mesh( T[b:b+1], K[b:b+1], render_image_size, near, far, )[0]
outputs.append(img)
return torch.cat(outputs, dim=0)
def render_pointcloud(self, model_names, T, K, render_image_size, near=0.1, far=6):
outputs= []
for b,_ in enumerate(model_names):
model_idx = self.cls2idx[model_names[b]]
depth = self.renderers[model_idx].render_pointcloud( T[b:b+1], K[b:b+1], render_image_size, near, far )
outputs.append(depth)
return torch.cat(outputs, dim=0)
def forward(self, model_names, vert_attribute, T, K, render_image_size, near=0.1, far=6, render_tex=False):
map_outputs= []
depth_outputs= []
for b,_ in enumerate(model_names):
model_idx = self.cls2idx[model_names[b]]
feamap, depth= self.renderers[model_idx]( vert_attribute[b], T[b:b+1], K[b:b+1], render_image_size, near, far, render_texture=render_tex )
map_outputs.append(feamap)
depth_outputs.append(depth)
return torch.cat(map_outputs, dim=0) , torch.cat(depth_outputs, dim=0)
================================================
FILE: geometry/einsum.py
================================================
# import tensorflow as torch
import torch as torch
import numpy as np
import re
import string
def einsum(equation, *inputs):
equation = equation.replace(' ', '')
# input_shapes = [x.get_shape() for x in list(inputs)]
input_shapes = [x.shape for x in list(inputs)]
match = re.match('^([a-zA-Z,.]+)(->[a-zA-Z.]*)?$', equation)
if not match:
raise ValueError('Indices have incorrect format: %s' % equation)
input_axis_labels = match.group(1).split(',')
output_axis_labels = match.group(2)[2:] if match.group(2) else None
if len(input_shapes) != len(input_axis_labels):
raise ValueError('Got %d arguments for equation "%s", expecting %d' %
(len(input_shapes), equation, len(input_axis_labels)))
# Resolve Ellipsis
# Assign axes labels for unspecified dimensions in inputs. Labels taken
# from unused labels. Follow numpy einsum broadcasting conventions for
# tensors of different length and unlabeled output.
ellipsis_axes = ''
if '...' in equation:
unused = ''.join([c for c in string.ascii_lowercase
if c not in ''.join(input_axis_labels)])
for i, ax in enumerate(input_axis_labels):
if '...' in ax:
parts = ax.split('...')
if len(parts) != 2:
raise ValueError('Unable to resolve ellipsis. Excess number found.')
# n = input_shapes[i].ndims - len(''.join(parts))
n = len(input_shapes[i]) - len(''.join(parts))
if n < 0:
raise ValueError('Ellipses lengths do not match.')
if len(unused) < n:
raise ValueError(
'Unable to resolve ellipsis, too many distinct labels.')
replace_axes = unused[-n:] if n > 0 else ''
input_axis_labels[i] = input_axis_labels[i].replace('...',
replace_axes)
if len(replace_axes) > len(ellipsis_axes):
ellipsis_axes = replace_axes
equation = equation.replace('...', ellipsis_axes)
out = torch.einsum(equation, *inputs)
# torch.add_to_collection("checkpoints", out)
return out
================================================
FILE: geometry/intrinsics.py
================================================
import torch
import numpy as np
# from utils.einsum import einsum
from .einsum import einsum
def intrinsics_vec_to_matrix(kvec):
fx, fy, cx, cy = torch.unbind(kvec, dim=-1)
z = torch.zeros_like(fx)
o = torch.ones_like(fx)
K = torch.stack([fx, z, cx, z, fy, cy, z, z, o], dim=-1)
K = torch.reshape(K, list(kvec.shape)[:-1] + [3,3])
return K
def intrinsics_matrix_to_vec(kmat):
fx = kmat[..., 0, 0]
fy = kmat[..., 1, 1]
cx = kmat[..., 0, 2]
cy = kmat[..., 1, 2]
return torch.stack([fx, fy, cx, cy], dim=-1)
def update_intrinsics(intrinsics, delta_focal):
kvec = intrinsics_matrix_to_vec(intrinsics)
fx, fy, cx, cy = torch.unstack(kvec, num=4, axis=-1)
df = torch.squeeze(delta_focal, -1)
# update the focal lengths
fx = torch.exp(df) * fx
fy = torch.exp(df) * fy
kvec = torch.stack([fx, fy, cx, cy], axis=-1)
kmat = intrinsics_vec_to_matrix(kvec)
return kmat
def rescale_depth(depth, downscale=4):
depth = depth[:,None]
new_shape = [depth.shape[-2]//downscale, depth.shape[-1]//downscale]
depth = torch.nn.functional.interpolate(depth, new_shape, mode='nearest')
return torch.squeeze(depth, dim=1)
def rescale_depth_and_intrinsics(depth, intrinsics, downscale=4):
sc = torch.tensor([1.0/downscale, 1.0/downscale, 1.0], dtype=torch.float32, device=depth.device)
intrinsics = einsum('...ij,i->...ij', intrinsics, sc)
depth = rescale_depth(depth, downscale=downscale)
return depth, intrinsics
def rescale_depths_and_intrinsics(depth, intrinsics, downscale=4):
batch, frames, height, width = [depth.shape[i] for i in range(4)]
depth = torch.reshape(depth, [batch*frames, height, width])
depth, intrinsics = rescale_depth_and_intrinsics(depth, intrinsics, downscale)
depth = torch.reshape(depth,
[batch, frames]+list(depth.shape)[1:])
return depth, intrinsics
================================================
FILE: geometry/projective_ops.py
================================================
import numpy as np
import torch
# from utils.einsum import einsum
from torch import einsum
# MIN_DEPTH = 0.1
MIN_DEPTH = 0.01
def normalize_coords_grid(coords):
""" normalize the coordinates to [-1,1]
Args:
coords: BxKxHxWx2
"""
coords=coords.clone()
B,K,H,W,_ = coords.shape
coords[...,0] = 2*coords[...,0]/(W-1)-1
coords[...,1] = 2*coords[...,1]/(H-1)-1
return coords
def coords_grid(ref, homogeneous=True):
""" grid of pixel coordinates """
shape = ref.shape
yy, xx = torch.meshgrid(torch.arange(shape[-2], device=ref.device), torch.arange(shape[-1], device=ref.device) )
xx = xx.to(dtype=torch.float32)
yy = yy.to(dtype=torch.float32)
if homogeneous:
coords = torch.stack([xx, yy, torch.ones_like(xx)], dim=-1)
else:
coords = torch.stack([xx, yy], dim=-1)
new_shape = [1]*len(shape[:-2]) + list(shape[-2:]) + [-1]
coords = torch.reshape(coords, new_shape)
tile = list(shape[:-2])+ [1,1,1]
coords = coords.repeat(tile)
return coords # BxKxHxWx2
def extract_and_reshape_intrinsics(intrinsics, shape=None):
""" Extracts (fx, fy, cx, cy) from intrinsics matrix """
fx = intrinsics[:, 0, 0]
fy = intrinsics[:, 1, 1]
cx = intrinsics[:, 0, 2]
cy = intrinsics[:, 1, 2]
if shape is not None:
batch = list(fx.shape[:1])
fillr = [1]*len(shape[1:])
k_shape = batch+fillr
fx = torch.reshape(fx, k_shape)
fy = torch.reshape(fy, k_shape)
cx = torch.reshape(cx, k_shape)
cy = torch.reshape(cy, k_shape)
return (fx, fy, cx, cy)
def backproject(depth, intrinsics, jacobian=False, depth_coords=None):
""" backproject depth map to point cloud """
#depth_coords: (BxKxHxWx2)
if depth_coords is None:
coords = coords_grid(depth, homogeneous=True)
x, y, _ = torch.unbind(coords, axis=-1)
else:
x, y = torch.unbind(depth_coords, axis=-1)
x_shape = x.shape
fx, fy, cx, cy = extract_and_reshape_intrinsics(intrinsics, x_shape) #Bx1x1x1
Z = depth #BxKxHxW
X = Z * (x - cx) / fx
Y = Z * (y - cy) / fy
points = torch.stack([X, Y, Z], axis=-1)
if jacobian:
o = torch.zeros_like(Z) # used to fill in zeros
# jacobian w.r.t (fx, fy) , of shape BxKxHxWx4x1
jacobian_intrinsics = torch.stack([
torch.stack([-X / fx], dim=-1),
torch.stack([-Y / fy], dim=-1),
torch.stack([o], dim=-1),
torch.stack([o], dim=-1)], axis=-2)
return points, jacobian_intrinsics
return points
# return points, coords
def project(points, intrinsics, jacobian=False):
""" project point cloud onto image """
X, Y, Z = torch.unbind(points, axis=-1)
Z = torch.clamp(Z, min=MIN_DEPTH)
x_shape = X.shape
fx, fy, cx, cy = extract_and_reshape_intrinsics(intrinsics, x_shape)
x = fx * (X / Z) + cx
y = fy * (Y / Z) + cy
coords = torch.stack([x, y], axis=-1)
if jacobian:
o = torch.zeros_like(x) # used to fill in zeros
zinv1 = torch.where(Z <= MIN_DEPTH+.01, torch.zeros_like(Z), 1.0 / Z)
zinv2 = torch.where(Z <= MIN_DEPTH+.01, torch.zeros_like(Z), 1.0 / Z**2)
# jacobian w.r.t (X, Y, Z)
jacobian_points = torch.stack([
torch.stack([fx * zinv1, o, -fx * X * zinv2], axis=-1),
torch.stack([o, fy * zinv1, -fy * Y * zinv2], axis=-1)], axis=-2)
# jacobian w.r.t (fx, fy)
jacobian_intrinsics = torch.stack([
torch.stack([X * zinv1], axis=-1),
torch.stack([Y * zinv1], axis=-1),], axis=-2)
return coords, (jacobian_points, jacobian_intrinsics)
return coords
================================================
FILE: geometry/se3.py
================================================
"""
SO3 and SE3 operations, exponentials and logarithms adapted from Sophus
"""
import numpy as np
import torch
from .einsum import einsum
MIN_THETA = 1e-4
def matdotv(A,b):
return torch.squeeze(torch.matmul(A, torch.expand_dims(b, -1)), -1)
def hat(a):
a1, a2, a3 = torch.split(a, [1,1,1], dim=-1)
zz = torch.zeros_like(a1)
ax = torch.stack([
torch.cat([zz,-a3,a2], dim=-1),
torch.cat([a3,zz,-a1], dim=-1),
torch.cat([-a2,a1,zz], dim=-1)
], dim=-2)
return ax
### quaternion functions ###
def quaternion_rotate_point(q, pt, eq=None):
if eq is None:
w, vec = torch.split(q, [1, 3], axis=-1)
uv = 2*matdotv(hat(vec), pt)
return pt + w*uv + matdotv(hat(vec), uv)
else:
w, vec = torch.split(q, [1, 3], axis=-1)
uv1 = 2*einsum(eq, hat(w*vec), pt)
uv2 = 2*einsum(eq, hat(vec), pt)
return pt + uv1 + einsum(eq, hat(vec), uv2)
def quaternion_rotate_matrix(q, mat, eq=None):
if eq is None:
w, vec = torch.split(q, [1, 3], axis=-1)
uv = 2*torch.matmul(hat(vec), mat)
return mat + w*uv + torch.matmul(hat(vec), uv)
else:
w, vec = torch.split(q, [1, 3], axis=-1)
uv1 = 2*einsum(eq, hat(w*vec), mat)
uv2 = 2*einsum(eq, hat(vec), mat)
return mat + uv1 + einsum(eq, hat(vec), uv2)
def quaternion_inverse(q):
return q * [1, -1, -1, -1]
def quaternion_multiply(a, b):
aw, ax, ay, az = torch.split(a, [1,1,1,1], axis=-1)
bw, bx, by, bz = torch.split(b, [1,1,1,1], axis=-1)
q = torch.concat([
aw * bw - ax * bx - ay * by - az * bz,
aw * bx + ax * bw + ay * bz - az * by,
aw * by + ay * bw + az * bx - ax * bz,
aw * bz + az * bw + ax * by - ay * bx,
], axis=-1)
return q
def quaternion_to_matrix(q):
w, x, y, z = torch.split(q, [1,1,1,1], axis=-1)
r11 = 1 - 2 * y**2 - 2 * z**2
r12 = 2 * x * y - 2 * w * z
r13 = 2 * z * x + 2 * w * y
r21 = 2 * x * y + 2 * w * z
r22 = 1 - 2 * x**2 - 2 * z**2
r23 = 2 * y * z - 2 * w * x
r31 = 2 * z * x - 2 * w * y
r32 = 2 * y * z + 2 * w * x
r33 = 1 - 2 * x**2 - 2 * y**2
R = torch.stack([
torch.concat([r11,r12,r13], axis=-1),
torch.concat([r21,r22,r23], axis=-1),
torch.concat([r31,r32,r33], axis=-1)
], axis=-2)
return R
def rotation_matrix_to_quaternion(R):
Rxx, Ryx, Rzx = R[...,0,0], R[...,0,1], R[...,0,2]
Rxy, Ryy, Rzy = R[...,1,0], R[...,1,1], R[...,1,2]
Rxz, Ryz, Rzz = R[...,2,0], R[...,2,1], R[...,2,2]
zz = torch.zeros_like(Rxx)
k1 = torch.stack([Rxx-Ryy-Rzz, zz, zz, zz], axis=-1)
k2 = torch.stack([Ryx+Rxy, Ryy-Rxx-Rzz, zz, zz], axis=-1)
k3 = torch.stack([Rzx+Rxz, Rzy+Ryz, Rzz-Rxx-Ryy,zz], axis=-1)
k4 = torch.stack([Ryz-Rzy, Rzx-Rxz, Rxy-Ryx, Rxx+Ryy+Rzz], axis=-1)
K = torch.stack([k1, k2, k3, k4], axis=-2)
eigvals, eigvecs = torch.linalg.eigh(K)
x, y, z, w = torch.split(eigvecs[...,-1], [1,1,1,1], axis=-1)
qvec = torch.concat([w, x, y, z], axis=-1)
qvec /= torch.sqrt(torch.reduce_sum(qvec**2, axis=-1, keepdims=True))
return qvec * torch.sign(w)
def so3_expm_and_theta(omega):
""" omega in \so3 """
theta_sq = torch.reduce_sum(omega**2, axis=-1)
theta = torch.sqrt(theta_sq)
half_theta = 0.5*theta
### small ###
imag_factor = torch.where(theta>MIN_THETA,
torch.sin(half_theta) / (theta + 1e-12),
0.5 - (1.0/48.0)*theta_sq + (1.0/3840.0)*theta_sq*theta_sq)
real_factor = torch.where(theta>MIN_THETA, torch.cos(half_theta),
1.0 - (1.0/8.0)*theta_sq + (1.0/384.0)*theta_sq*theta_sq)
qw = real_factor
qx = imag_factor * omega[...,0]
qy = imag_factor * omega[...,1]
qz = imag_factor * omega[...,2]
quat = torch.stack([qw, qx, qy, qz], axis=-1)
return quat, theta
def so3_logm_and_theta(so3):
w, vec = torch.split(so3, [1,3], axis=-1)
squared_n = torch.reduce_sum(vec**2, axis=-1, keepdims=True)
n = torch.sqrt(squared_n)
two_atan_nbyw_by_n = torch.where(n SE(3), works for arbitrary batch dimensions
- Note: gradient is overridden with _se3_matrix_expm_grad, which approximates
gradient for small upsilon_omega
"""
eps=1e-12
inp_shape = upsilon_omega.shape
out_shape = list(inp_shape)[:-1]+[4,4]
upsilon_omega = torch.reshape(upsilon_omega, [-1, 6])
batch = upsilon_omega.shape[0]
v, w = torch.split(upsilon_omega, [3, 3], dim=-1)
theta_sq = torch.sum(w**2, dim=1 )
theta_sq = torch.reshape(theta_sq, [-1, 1, 1])
theta = torch.sqrt(theta_sq)
theta_po4 = theta_sq * theta_sq
wx = hat(w)
wx_sq = torch.matmul(wx, wx)
I = torch.eye(3, dtype=upsilon_omega.dtype, device=upsilon_omega.device).repeat([batch,1,1])
### taylor approximations ###
R1 = I + (1.0 - (1.0/6.0)*theta_sq + (1.0/120.0)*theta_po4) * wx + \
(0.5 - (1.0/12.0)*theta_sq + (1.0/720.0)*theta_po4) * wx_sq
V1 = I + (0.5 - (1.0/24.0)*theta_sq + (1.0/720.0)*theta_po4)*wx + \
((1.0/6.0) - (1.0/120.0)*theta_sq + (1.0/5040.0)*theta_po4)*wx_sq
### exact values ###
R2 = I + (torch.sin(theta) / (theta+eps)) * wx +\
((1 - torch.cos(theta)) / (theta_sq+eps)) * wx_sq
V2 = I + ((1 - torch.cos(theta)) / (theta_sq + eps)) * wx + \
((theta - torch.sin(theta))/(theta_sq*theta + eps)) * wx_sq
# print(theta.shape, R1.shape, R2.shape, ">>>", flush=True)
# R = torch.where(theta[:, 0, 0] MIN_DEPTH) & (pt_new[..., -1] > MIN_DEPTH)
# vmask = torch.cast(vmask, torch.float32)[..., torch.newaxis]
# vmask = vmask.to(dtype=torch.float32)[..., None, :,:] #BxKx1xHxW
vmask = vmask.to(dtype=torch.float32)[..., :, :, None] # BxKx1xHxW
return coords, vmask
return coords
def induced_flow(self, depth, intrinsics, valid_mask=False):
coords0 = pops.coords_grid(depth, homogeneous=False)
if valid_mask:
coords1, vmask = self.transform(
depth, intrinsics, valid_mask=valid_mask)
return coords1 - coords0, vmask
coords1 = self.transform(depth, intrinsics, valid_mask=valid_mask)
return coords1 - coords0
def depth_change(self, depth, intrinsics):
pt = pops.backproject(depth, intrinsics)
pt_new = self.__call__(pt)
return pt_new[..., -1] - pt[..., -1]
def identity(self):
""" Push identity transformation to start of collection """
batch, frames = self.shape()
if self.internal == 'matrix':
# I = torch.eye(4, batch_shape=[batch, 1])
I = torch.eye(4, dtype=self.G.dtype, device=self.G.device).repeat(
[batch, 1, 1, 1])
# return self.__class__(matrix=I, internal=self.internal, eq=self.eq)
return self.__class__(matrix=I, internal=self.internal, eq=self.eq)
else:
raise NotImplementedError
class SE3Sequence(SE3):
""" Stores collection of SE3 objects """
def __init__(self, upsilon=None, matrix=None, so3=None, translation=None, eq= "aijk,ai...k->ai...j",internal=DEFAULT_INTERNAL):
super().__init__(
upsilon, matrix, so3, translation, internal=internal, eq=eq)
# self.eq = "aijk,ai...k->ai...j"
def __call__(self, pt, inds=None, jacobian=False):
if self.internal == 'matrix':
return super().__call__(pt, jacobian=jacobian)
else:
raise NotImplementedError
def gather(self, inds):
if self.internal == 'matrix':
G = torch.index_select(self.G, index=inds, dim=1)
return SE3Sequence(matrix=G, internal=self.internal)
else:
raise NotImplementedError
# def append_identity(self):
# """ Push identity transformation to start of collection """
# batch, frames = self.shape()
# if self.internal == 'matrix':
# # I = torch.eye(4, batch_shape=[batch, 1])
# I = torch.eye(4, dtype=self.G.dtype, device=self.G.device).repeat(
# [batch, 1, 1, 1])
# G = torch.cat([I, self.G], dim=1)
# return SE3Sequence(matrix=G, internal=self.internal)
# else:
# raise NotImplementedError
def reprojction_optim(self,
target,
weight,
depth,
intrinsics,
num_iters=2,
depth_img_coords=None
):
target = clip_dangerous_gradients(target).to(dtype=torch.float64)
weight = clip_dangerous_gradients(weight).to(dtype=torch.float64)
X0 = pops.backproject(depth, intrinsics, depth_coords=depth_img_coords)
w = weight[..., None]
lm_lmbda = get_cfg("LM").LM_LMBDA
ep_lmbda = get_cfg("LM").EP_LMBDA
T = self.copy(stop_gradients=False)
for i in range(num_iters):
### compute the jacobians of the transformation ###
X1, jtran = T(X0, jacobian=True)
x1, (jproj, jkvec) = pops.project(X1, intrinsics, jacobian=True)
v = (X0[..., -1] > MIN_DEPTH) & (X1[..., -1] > MIN_DEPTH)
# v = v.to(dtype=torch.float32)[..., None, None]
v = v.to(dtype=torch.float64)[..., None, None]
### weighted gauss-newton update ###
J = einsum('...ij,...jk->...ik', jproj.to(dtype=torch.float64), jtran.to(dtype=torch.float64 ))
H = einsum('ai...j,ai...k->aijk', v*w*J, J)
b = einsum('ai...j,ai...->aij', v*w*J, target-x1)
### add dampening and apply increment ###
H += ep_lmbda*torch.eye(6, dtype=H.dtype, device=H.device) + lm_lmbda*H*torch.eye(6,dtype=H.dtype, device=H.device)
try:
delta_upsilon = cholesky_solve(H, b)
except:
# print(w.shape,v.shape, w.mean(), v.mean(),H,b, '!!!!')
raise
T = T.increment(delta_upsilon)
# update
if self.internal == 'matrix':
self.G = T.matrix()
T = SE3Sequence(
matrix=T.matrix(), internal=self.internal)
else:
raise NotImplementedError
return T
def transform(self, depth, intrinsics, valid_mask=False, return3d=False):
return super().transform(depth, intrinsics, valid_mask, return3d)
================================================
FILE: model/CFNet.py
================================================
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from thirdparty.raft.update import BasicUpdateBlock
from thirdparty.raft.extractor import BasicEncoder
from thirdparty.raft.corr import CorrBlock, AlternateCorrBlock
from thirdparty.raft.utils.utils import bilinear_sampler, coords_grid, upflow
try:
autocast = torch.cuda.amp.autocast
except:
# dummy autocast for PyTorch < 1.6
class autocast:
def __init__(self, enabled):
pass
def __enter__(self):
pass
def __exit__(self, *args):
pass
class ImageFeaEncoder(nn.Module):
def __init__(self, input_dim=3, output_dim=256):
super().__init__()
self.fnet = BasicEncoder(output_dim=output_dim, norm_fn='instance', dropout=False, input_dim=input_dim)
if 1:#self.args.pretrained_model is not None:
print("Loading the weights of RAFT...")
import os
self.load_state_dict(
# torch.load(self.args.pretrained_model, map_location='cpu'), strict=False
torch.load( f"{os.path.dirname(os.path.abspath(__file__)) }/../weights/img_fea_enc.pth", map_location='cpu'), strict=True
)
else:
print("ImageFeaEncoder will be trained from scratch...")
def forward(self, image1, image2):
image1 = 2 * (image1 / 255.0) - 1.0
image2 = 2 * (image2 / 255.0) - 1.0
image1 = image1.contiguous()
image2 = image2.contiguous()
with autocast(enabled=True):
fmap1, fmap2 = self.fnet([image1, image2])
return fmap1, fmap2
class GRU_CFUpdator(nn.Module):
def __init__(self, args):
super().__init__()
self.args = args
input_dim = args.get("input_dim", 3)
self.hidden_dim = hdim = 128
self.context_dim = cdim = 128
args.corr_levels = 4
args.corr_radius = 4
if 'alternate_corr' not in self.args:
self.args.alternate_corr = False
self.update_block = BasicUpdateBlock(self.args, hidden_dim=hdim)
if self.args.pretrained_model is not None:
print("Loading the weights of RAFT...")
import os
self.load_state_dict(
# torch.load(self.args.pretrained_model, map_location='cpu'), strict=False
torch.load( f"{os.path.dirname(os.path.abspath(__file__)) }/../weights/gru_update.pth", map_location='cpu'), strict=True
)
else:
print("GRU_CFUpdator will be trained from scratch...")
def freeze_bn(self):
for m in self.modules():
if isinstance(m, nn.BatchNorm2d):
m.eval()
def initialize_flow(self, img, downsample_rate=8):
""" Flow is represented as difference between two coordinate grids flow = coords1 - coords0"""
N, C, H, W = img.shape
coords0 = coords_grid(N, H//downsample_rate, W//downsample_rate).to(img.device)
coords1 = coords_grid(N, H//downsample_rate, W//downsample_rate).to(img.device)
# optical flow computed as difference: flow = coords1 - coords0
return coords0, coords1
def upsample_flow(self, flow, mask, upsample_scale=8):
""" Upsample flow field [H/8, W/8, 2] -> [H, W, 2] using convex combination """
N, _, H, W = flow.shape
mask = mask.view(N, 1, 9, upsample_scale, upsample_scale, H, W)
mask = torch.softmax(mask, dim=2)
up_flow = F.unfold(upsample_scale * flow, [3,3], padding=1)
up_flow = up_flow.view(N, 2, 9, 1, 1, H, W)
up_flow = torch.sum(mask * up_flow, dim=2)
up_flow = up_flow.permute(0, 1, 4, 2, 5, 3)
return up_flow.reshape(N, 2, upsample_scale*H, upsample_scale*W)
def forward(self, fmap1, fmap2, iters=1, flow_init=None, upsample=True, test_mode=False, context_fea=None, update_corr_fn=True):
""" Estimate optical flow between pair of frames """
hdim = self.hidden_dim
cdim = self.context_dim
if update_corr_fn: # need carful handling outside
# run the feature network
self.fmap1 = fmap1.float()
self.fmap2 = fmap2.float()
if self.args.alternate_corr:
self.corr_fn = AlternateCorrBlock(self.fmap1, self.fmap2, radius=self.args.corr_radius)
else:
self.corr_fn = CorrBlock(self.fmap1, self.fmap2, radius=self.args.corr_radius)
if update_corr_fn:
# run the context network
with autocast(enabled=self.args.mixed_precision):
assert context_fea is not None
ds = context_fea.shape[-1]//self.fmap1.shape[-1]
cnet = F.interpolate(context_fea, scale_factor=1/ds, mode='bilinear', align_corners=True)
self.net, self.inp = torch.split(cnet, [hdim, cdim], dim=1)
self.net = torch.tanh(self.net)
self.inp = torch.relu(self.inp)
# coords0, coords1 = self.initialize_flow(image1)
coords0, coords1 = self.initialize_flow(flow_init)
if flow_init is not None:
ds = flow_init.shape[-1]//coords0.shape[-1]
if ds !=1:
flow_init /=ds
flow_init = F.interpolate(flow_init, scale_factor=1/ds, mode='bilinear', align_corners=True)
coords1 = coords1 + flow_init
flow_predictions = []
for itr in range(iters):
coords1 = coords1.detach()
corr = self.corr_fn(coords1) # index correlation volume
flow = coords1 - coords0
with autocast(enabled=self.args.mixed_precision):
# net, up_mask, delta_flow = self.update_block(net, inp, corr, flow)
self.net, up_mask, delta_flow = self.update_block(self.net, self.inp, corr, flow)
# F(t+1) = F(t) + \Delta(t)
coords1 = coords1 + delta_flow
# upsample predictions
if up_mask is None:
flow_up = upflow(coords1 - coords0, scale=image1.shape[2]//coords0.shape[2],)
else:
if self.args.fea_net in ["bigdx4"]:
flow_up = self.upsample_flow(coords1 - coords0, up_mask, upsample_scale=4)
else:
flow_up = self.upsample_flow(coords1 - coords0, up_mask)
flow_predictions.append(flow_up)
if test_mode:
return coords1 - coords0, flow_up
return flow_predictions
================================================
FILE: model/HybridNet.py
================================================
import torch
import torch.nn as nn
from thirdparty.kpconv.kpconv_blocks import *
import torch.nn.functional as F
import numpy as np
from kpconv.lib.utils import square_distance
from model.descriptor2D import SuperPoint2D
from model.descriptor3D import KPSuperpoint3Dv2
REGISTERED_HYBRID_NET_CLASSES={}
def register_hybrid_net(cls, name=None):
global REGISTERED_HYBRID_NET_CLASSES
if name is None:
name = cls.__name__
assert name not in REGISTERED_HYBRID_NET_CLASSES, f"exist class: {REGISTERED_HYBRID_NET_CLASSES}"
REGISTERED_HYBRID_NET_CLASSES[name] = cls
return cls
def get_hybrid_net(name):
global REGISTERED_HYBRID_NET_CLASSES
assert name in REGISTERED_HYBRID_NET_CLASSES, f"available class: {REGISTERED_HYBRID_NET_CLASSES}"
return REGISTERED_HYBRID_NET_CLASSES[name]
class ContextFeatureNet(nn.Module):
def __init__(self, config):
super().__init__()
self.context_fea_extractor_3d= KPSuperpoint3Dv2(config['context_fea_extractor_3d'] )
def forward(self, batch):
# x = batch['features'].clone().detach()
# assert len(batch['stack_lengths'][-1])==1, "Only support bs=1 for now"
len_src_c = batch['stack_lengths'][-1][0]
pcd_c = batch['model_points'][-1]
pcd_c = pcd_c[:len_src_c]
image=batch['image']
############### encode 3d and 2d features ###############
batch3d={
'points': batch['model_points'],
'neighbors': batch['neighbors'],
'pools': batch['pools'],
'upsamples': batch['upsamples'],
'features': batch['model_point_features'],
'stack_lengths': batch['stack_lengths'],
}
ctx_descriptors_3d = self.context_fea_extractor_3d(batch3d)
return {
"ctx_fea_3d":ctx_descriptors_3d,
}
@register_hybrid_net
class HybridDescNet(nn.Module):
#independent 2d and 3d network
def __init__(self, config):
super().__init__()
self.corr_fea_extractor_2d= SuperPoint2D(config['keypoints_detector_2d'] )
self.corr_fea_extractor_3d= KPSuperpoint3Dv2(config['keypoints_detector_3d'] )
self.descriptors_3d = {}
def forward(self, batch):
assert len(set(batch['class_name']))==1, "A batch should contain data of the same class."
class_name = batch['class_name'][0]
len_src_c = batch['stack_lengths'][-1][0]
pcd_c = batch['model_points'][-1]
pcd_c = pcd_c[:len_src_c]#, pcd_c[len_src_c:]
image=batch['image']
############### encode 3d and 2d features ###############
batch3d={
'points': batch['model_points'],
'neighbors': batch['neighbors'],
'pools': batch['pools'],
'upsamples': batch['upsamples'],
'features': batch['model_point_features'],
'stack_lengths': batch['stack_lengths'],
}
if self.training:
self.descriptors_3d[class_name] = self.corr_fea_extractor_3d(batch3d)
else:
if class_name not in self.descriptors_3d:
self.descriptors_3d[class_name] = self.corr_fea_extractor_3d(batch3d)
descriptors_2d = self.corr_fea_extractor_2d(image)['descriptors']
return {
"descriptors_2d":descriptors_2d,
"descriptors_3d":self.descriptors_3d[class_name],
"scores_saliency_3d": None,
"scores_overlap_3d":None,
}
================================================
FILE: model/PoseRefiner.py
================================================
import os
import time
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.distributed as dist
import numpy as np
from easydict import EasyDict as edict
from functools import partial
from geometry.transformation import *
from geometry.intrinsics import *
from geometry.projective_ops import coords_grid, normalize_coords_grid
from model.CFNet import GRU_CFUpdator , ImageFeaEncoder
from utils.pose_utils import pose_padding
from config.default import get_cfg
EPS = 1e-5
MIN_DEPTH = 0.1
MAX_ERROR = 100.0
# exclude extremly large displacements
MAX_FLOW = 400
def raft_sequence_flow_loss(flow_preds, flow_gt, valid, gamma=0.8, max_flow=MAX_FLOW):
""" Loss function defined over sequence of flow predictions """
n_predictions = len(flow_preds)
flow_loss = 0.0
# exlude invalid pixels and extremely large diplacements
mag = torch.sum(flow_gt**2, dim=1).sqrt()
valid = (valid >= 0.5) & (mag < max_flow)
for i in range(n_predictions):
i_weight = gamma**(n_predictions - i - 1)
i_loss = (flow_preds[i] - flow_gt).abs()
flow_loss += i_weight * (valid[:, None] * i_loss).mean()
epe = torch.sum((flow_preds[-1] - flow_gt)**2, dim=1).sqrt()
epe = epe.view(-1)[valid.view(-1)]
metrics = {
'epe': epe.mean().item(),
'1px': (epe < 1).float().mean().item(),
'3px': (epe < 3).float().mean().item(),
'5px': (epe < 5).float().mean().item(),
}
return flow_loss, metrics
class PoseRefiner(nn.Module):
def __init__(self, cfg,
reuse=False,
schedule=None,
use_regressor=True,
is_calibrated=True,
bn_is_training=False,
is_training=True,
renderer=None,
):
super().__init__()
self.legacy=True
self.cfg = cfg
self.reuse = reuse
self.sigma=nn.ParameterList( [nn.Parameter(torch.ones(1)*1 )] )
self.with_corr_weight = self.cfg.get("with_corr_weight", True)
if not self.with_corr_weight:
print("Warning: the correlation weighting is disabled.")
self.is_calibrated = cfg.IS_CALIBRATED
if not is_calibrated:
self.is_calibrated = is_calibrated
self.is_training = is_training
self.use_regressor = use_regressor
self.residual_pose_history = []
self.Ti_history =[]
self.coords_history = []
self.residual_history = []
self.inds_history = []
# self.weights_history = []
self.flow_history = []
self.intrinsics_history = []
self.summaries = []
if self.cfg.FLOW_NET=='raft':
self.image_fea_enc= ImageFeaEncoder()
self.cf_net = GRU_CFUpdator(self.cfg.raft)
else:
raise NotImplementedError
self.renderer = renderer
def _clear(self,):
self.residual_pose_history = []
self.Ti_history =[]
self.coords_history = []
self.residual_history = []
self.inds_history = []
# self.weights_history = []
self.flow_history = []
self.intrinsics_history = []
self.summaries = []
def __len__(self):
return len(self.residual_pose_history)
def render(self, params, render_tex=False):
""" render a batch of images given the intrinsic and extrinsic params
Args:
params.K: np.array, of shape Bx3x3
params.camera_extrinsics: np.array, of shape Bx3x4
Returns:
[type]: [description]
"""
bs=params.K.shape[0]
colors=[]
depths=[]
color,depth= self.renderer( params.obj_cls, params.vert_attribute, T=params.camera_extrinsics, K=params.K,
render_image_size=params.render_image_size, near=0.1, far=6, render_tex=render_tex)
#set the invalid values to zeros
depth[depth==-1] = 0
return {
# 1x3xHxW
"syn_img": color,
"syn_depth": depth.detach(), # 1x1xHxW
}
def get_affine_transformation(self, mask, crop_center, with_intrinsic_transform=False, output_size=None, margin_ratio=0.4):
B,_,H,W = mask.shape
ratio = float(H) / float(W)
affine_matrices = []
intrinsic_matrices = []
for b in range(B):
zoom_c_x, zoom_c_y = crop_center[b] #crop_center
ys, xs = np.nonzero(mask[b][0].detach().cpu().numpy() )
if len(ys)>0 and len(xs)>0:
obj_imgn_start_x = xs.min()
obj_imgn_start_y = ys.min()
obj_imgn_end_x = xs.max()
obj_imgn_end_y = ys.max()
else:
obj_imgn_start_x=0
obj_imgn_start_y=0
obj_imgn_end_x=0
obj_imgn_end_y=0
# mask region
left_dist = zoom_c_x - obj_imgn_start_x
right_dist = obj_imgn_end_x - zoom_c_x
up_dist = zoom_c_y - obj_imgn_start_y
down_dist = obj_imgn_end_y - zoom_c_y
# crop_height = np.max([ratio * right_dist, ratio * left_dist, up_dist, down_dist]) * 2 * 1.4
crop_height = np.max([ratio * right_dist, ratio * left_dist, up_dist, down_dist]) * 2 * (1+margin_ratio)
crop_width = crop_height / ratio
# affine transformation for PyTorch
x1 = (zoom_c_x - crop_width / 2) * 2 / W - 1;
x2 = (zoom_c_x + crop_width / 2) * 2 / W - 1;
y1 = (zoom_c_y - crop_height / 2) * 2 / H - 1;
y2 = (zoom_c_y + crop_height / 2) * 2 / H - 1;
pts1 = np.float32([[x1, y1], [x1, y2], [x2, y1]])
pts2 = np.float32([[-1, -1], [-1, 1], [1, -1]])
affine_matrix = torch.tensor(cv2.getAffineTransform(pts2, pts1), device=mask.device, dtype=torch.float32)
affine_matrices.append(affine_matrix)
if with_intrinsic_transform:
# affine transformation for PyTorch
x1 = (zoom_c_x - crop_width / 2)
x2 = (zoom_c_x + crop_width / 2)
y1 = (zoom_c_y - crop_height / 2)
y2 = (zoom_c_y + crop_height / 2)
pts1 = np.float32([[x1, y1], [x1, y2], [x2, y1]])
# pts2 = np.float32([[0, 0], [0, H-1], [W-1, 0]])
pts2 = np.float32([[0, 0], [0, output_size[0]-1], [output_size[1]-1, 0]])
# pts2 = np.float32([[0, 0], [0, 1], [1, 0]])
intrinsic_matrix = torch.tensor(cv2.getAffineTransform(pts2, pts1), device=mask.device, dtype=torch.float32)
intrinsic_matrices.append(intrinsic_matrix)
if with_intrinsic_transform:
return torch.stack(affine_matrices, dim=0), torch.stack(intrinsic_matrices, dim=0)
else:
return torch.stack(affine_matrices, dim=0)
def gen_zoom_crop_grids(self, fg_mask, K, T, output_size, model_center=[0,0,0], margin_ratio=0.4):
##Get the projected model center in image (assuming the model is zero-centered, which should be reconsidered!)
crop_center=K@T[:,:3,3:]
crop_center = crop_center[:,:2]/crop_center[:,2:3]
##calculate affine transformation parameters
affine_matrices, crop_intrinsic_transform=self.get_affine_transformation(fg_mask, crop_center=crop_center.detach().cpu().numpy(), with_intrinsic_transform=True, output_size=(output_size[-2], output_size[-1]),margin_ratio=margin_ratio )
grids = F.affine_grid(affine_matrices, torch.Size(output_size) )
##Get cropped intrinsic_transform
intrinsics_crop= torch.inverse(pose_padding(crop_intrinsic_transform) )@K
return grids, intrinsics_crop
# def forward(self, image, Ts, intrinsics, fea_3d=None, inds=None, Tj_gt=None, obj_cls=None, geofea_3d=None, geofea_2d=None):
def forward(self, image, Ts, intrinsics, fea_3d=None, Tj_gt=None, obj_cls=None, geofea_3d=None, geofea_2d=None):
#clear the history data
self._clear()
cfg = self.cfg
RANDER_IMAGE_SIZE = get_cfg("BASIC").render_image_size
ZOOM_CROP_SIZE=get_cfg("BASIC").zoom_crop_size
if cfg.RESCALE_IMAGES:
images = 2 * (images / 255.0) - 1.0
Tij_gt=[]
syn_imgs=[]
syn_depths=[]
Ti = Ts
Tij = Ti.copy().identity()
for ren_iter in range(cfg.RENDER_ITER_COUNT):
# update rendering params
Ti = Tij*Ti # accumulate Ti
Tij.identity_() #set Tij to identity matrix at the begining of each ren_iter
if self.legacy:
Tij = Ti*Ti.inv() #set Tij to identity matrix at the begining of each ren_iter
render_params = edict({
"K": intrinsics.detach(),
"camera_extrinsics": Ti.matrix().detach().squeeze(1),
"obj_cls": obj_cls,
"render_image_size": RANDER_IMAGE_SIZE,
})
pc_depth = self.renderer.render_pointcloud(obj_cls, T=render_params.camera_extrinsics, K=render_params.K,
render_image_size=render_params.render_image_size)
if self.cfg.ONLINE_CROP:
#get the forground mask
fg_mask = pc_depth>0
B,C,_,_= pc_depth.size()
############### Get zoom parameters ###############
grids, intrinsics_crop = self.gen_zoom_crop_grids(fg_mask, render_params.K, render_params.camera_extrinsics, output_size=[B,C, *ZOOM_CROP_SIZE], model_center=None)
############### Render reference images ###############
# Concatentate the 3D ctx feature "fea_3d" and 3d descriptor "geofea_3d" for feature rendering
if geofea_3d is not None:
fea_3d_cat = torch.cat([fea_3d, geofea_3d ], dim=-1) # BxNxC
else:
fea_3d_cat = fea_3d
render_params.vert_attribute = fea_3d_cat #fea_3d
render_params.K = intrinsics_crop.detach()
render_params.render_image_size = ZOOM_CROP_SIZE
ren_res = self.render(render_params, render_tex=True)
if geofea_3d is not None:
syn_img, cfea, geofea1 = torch.split(ren_res['syn_img'],[3, fea_3d.shape[-1], geofea_3d.shape[-1] ] ,dim=1)
syn_depth = ren_res['syn_depth']
else:
syn_img, cfea = torch.split(ren_res['syn_img'], [3, fea_3d.shape[-1] ] ,dim=1)
geofea1=None
syn_depth = ren_res['syn_depth']
cfea = cfea*0.1 # balance the learning rate with the scale 0.1
## Crop and zoom images
syn_image_crop = syn_img
image_crop= F.grid_sample(image, grids)
cfea_crop= cfea
if geofea1 is not None and geofea_2d is not None:
# geofea1_crop = F.grid_sample(geofea1, grids)
geofea1_crop = geofea1
geofea2_crop = F.grid_sample(geofea_2d, grids)
#Render again to get more accurate depth for supervisions in losses
#TODO: could be merged into the rendering process above. -> Done
if self.legacy:#self.training:
depth_render_params=edict({
"K": intrinsics_crop.detach(),
"camera_extrinsics": Ti.matrix().detach().squeeze(1),
"obj_cls": obj_cls,
"render_image_size": ZOOM_CROP_SIZE,
})
syn_depth=self.renderer.render_depth(obj_cls, T=depth_render_params.camera_extrinsics, K=depth_render_params.K,
render_image_size=depth_render_params.render_image_size, near=0.1, far=6)
#for visualization only
syn_imgs.append(syn_image_crop)
syn_imgs.append(image_crop)
# encode image features
feats1, feats2=self.image_fea_enc(syn_image_crop, image_crop)
# depths = torch.index_select(syn_depth, index=ii, dim=1) + EPS
depths = syn_depth+EPS
for i in range(cfg.ITER_COUNT):
# save for loss calculation
self.intrinsics_history.append(intrinsics_crop)
syn_depths.append(syn_depth)
Tij = Tij.copy(stop_gradients=True)
intrinsics_crop = intrinsics_crop.detach()
#Get the projection in frame j of visible model points in frame i with the current relative pose estimation Tij
reproj_coords, vmask = Tij.transform(
depths, intrinsics_crop, valid_mask=True)
uniform_grids = coords_grid(depths)
flow_init = torch.einsum( "...ijk->...kij", reproj_coords-uniform_grids[..., :2] ) * (depths>EPS)
flow = self.cf_net(feats1, feats2, flow_init=flow_init.squeeze(1), context_fea=cfea_crop, update_corr_fn=i==0)
self.flow_history.append(flow)
# Get the correspondences in frame j for each point in frame i, based on the current flow estimates
if isinstance (flow, (list, tuple)): # flow net may return a list of flow maps
correspondence_target = torch.einsum("...ijk->...jki", flow[-1]) + uniform_grids[..., :2]
else:
correspondence_target = torch.einsum("...ijk->...jki", flow) + uniform_grids[..., :2]
# Optimize for the pose by minimizing errors between the constructed correspondence field
# (with the currently estimated pose) and the estimated correspondence field
if self.with_corr_weight and geofea1 is not None and geofea_2d is not None:
geofea2_crop_warpped = F.grid_sample(geofea2_crop, normalize_coords_grid(correspondence_target).squeeze(1) )
corr_weight= torch.sum(geofea1_crop*geofea2_crop_warpped, dim=1,keepdim=True).permute(0,2,3,1)[:,None] #insert frame axis
corr_weight = torch.exp(-torch.abs(1-corr_weight)/self.sigma[0]) * (syn_depth>0)[...,None].float()
else:
corr_weight = weight
Tij = Tij.reprojction_optim(
correspondence_target, corr_weight, depths, intrinsics_crop, num_iters=cfg.OPTIM_ITER_COUNT )
reproj_coords, vmask1 = Tij.transform(
depths, intrinsics_crop, valid_mask=True)
# For the loss calculation later
self.residual_pose_history.append(Tij)
self.Ti_history.append(Ti.copy(stop_gradients=True) )
Tij_gt.append( (Tj_gt*Ti.inv()).copy(stop_gradients=True) )
self.residual_history.append(
vmask*vmask1*(reproj_coords-correspondence_target)) # BxKxHxWx3
# The final update of Ti
Ti = Tij*Ti
return {
"Tij": Tij,
"Ti_pred": Ti,
"intrinsics": intrinsics,
"flow": self.flow_history[0],
"vmask": syn_depth>0,
"weight": torch.einsum("...ijk->...kij", corr_weight),
"syn_depth": syn_depths, #ren_res['syn_depth'],
"syn_img": syn_imgs+[image_crop, cfea_crop[:,:3]*10,geofea1[:,:3], geofea2_crop[:,:3]],
"Tij_gt" : Tij_gt
}
def compute_loss(self, Tij_gts, depths, intrinsics, loss='l1', log_error=True, loss3d=None, ):
total_loss = 0.0
for i in range(len(self.residual_pose_history)):
intrinsics = self.intrinsics_history[i]
depth, intrinsics = rescale_depths_and_intrinsics(depths[i], intrinsics, downscale=1)
Tij = self.residual_pose_history[i]
Gij = Tij_gts[i]
# intrinsics_pred = intrinsics
zstar = depth + EPS
flow_pred, valid_mask_pred = Tij.induced_flow(
zstar, intrinsics, valid_mask=True)
flow_star, valid_mask_star = Gij.induced_flow(
zstar, intrinsics, valid_mask=True)
valid_mask = valid_mask_pred * valid_mask_star
#3D alignment loss
loss_3d_proj = 0
if loss3d is not None:
Tj_pred=Tij*self.Ti_history[i]
Tj_gt=Gij*self.Ti_history[i]
loss_3d_proj = loss3d(
R_pred=Tj_pred.G[:, 0, :3, :3], t_pred=Tj_pred.G[:, 0, :3, 3], R_tgt=Tj_gt.G[:, 0, :3, :3], t_tgt=Tj_gt.G[:, 0, :3, 3])
# flow loss
if isinstance( self.flow_history[0], (list, tuple)):
#squeeze the frame dimmension
self.flow_history[i] = [self.flow_history[i][f].squeeze(1) for f in range(len(self.flow_history[i])) ]
flow_mask = valid_mask.squeeze(1).squeeze(-1)
loss_flow,_ = raft_sequence_flow_loss(self.flow_history[i], flow_gt=torch.einsum("...ijk->...kij", flow_star.squeeze(1)), valid= flow_mask, gamma=0.8, max_flow=MAX_FLOW)
else:
raise NotImplementedError
# reprojection loss
reproj_diff = valid_mask * \
torch.clamp(
torch.abs(flow_pred - flow_star), -MAX_ERROR, MAX_ERROR)
reproj_loss = torch.mean(reproj_diff)
total_loss +=self.cfg.get("TRAIN_PCALIGN_WEIGHT", 1)*loss_3d_proj+ self.cfg.TRAIN_FLOW_WEIGHT* loss_flow + self.cfg.TRAIN_REPROJ_WEIGHT*reproj_loss
# clear the intermediate values
self._clear()
return {
"total_loss": total_loss,
"reproj_loss": reproj_loss,
"flow_loss":loss_flow,
"loss_3d_proj": loss_3d_proj,
"valid_mask": valid_mask,
"Tij": Tj_pred.G,
"Gij": Tj_gt.G
}
================================================
FILE: model/RNNPose.py
================================================
#
import time
import torch
from torch import nn
from torch.nn import functional as F
import torch.distributed as dist
import apex
import numpy as np
import os
from easydict import EasyDict as edict
from transforms3d.euler import mat2euler, euler2mat, euler2quat, quat2euler
from functools import partial
from model.HybridNet import HybridDescNet,ContextFeatureNet
from thirdparty.kpconv.lib.utils import square_distance
from model.PoseRefiner import PoseRefiner
from utils.pose_utils import pose_padding
from geometry.transformation import SE3Sequence
from geometry.diff_render_optim import DiffRendererWrapper
from config.default import get_cfg
from utils.util import dict_recursive_op
# from model.RNNPose import register_posenet
REGISTERED_NETWORK_CLASSES = {}
def register_posenet(cls, name=None):
global REGISTERED_NETWORK_CLASSES
if name is None:
name = cls.__name__
assert name not in REGISTERED_NETWORK_CLASSES, f"exist class: {REGISTERED_NETWORK_CLASSES}"
REGISTERED_NETWORK_CLASSES[name] = cls
return cls
def get_posenet_class(name):
global REGISTERED_NETWORK_CLASSES
assert name in REGISTERED_NETWORK_CLASSES, f"available class: {REGISTERED_NETWORK_CLASSES}"
return REGISTERED_NETWORK_CLASSES[name]
@register_posenet
class RNNPose(nn.Module):
def __init__(self,
criterions,
opt,
name="RNNPose",
**kwargs):
super().__init__()
self.name = name
self.opt = opt
self.hybrid_desc_net=HybridDescNet(opt.descriptor_net)
self.ctx_fea_net = ContextFeatureNet(opt.descriptor_net)
self.ctx_fea = {}
self.render_params = edict({
"width": opt.input_w, # 128,
"height": opt.input_h, # 128,
"gpu_id": opt.get('gpu_id', 0),
"obj_seqs": opt.obj_seqs
})
renderer, diff_renderer= self._render_init(self.render_params)
self.diff_renderer = diff_renderer
self.motion_net = PoseRefiner(
opt.motion_net, bn_is_training=self.training, is_training=self.training,
renderer=diff_renderer
)
self.contrastive_loss = criterions.get(
"metric_loss", None)
self.pose_loss = criterions.get("pose_loss", None)
self.register_buffer("global_step", torch.LongTensor(1).zero_())
def update_global_step(self):
self.global_step += 1
def get_global_step(self):
return int(self.global_step.cpu().numpy()[0])
def clear_global_step(self):
self.global_step.zero_()
def sample_poses(self, pose_tgt):
SYN_STD_ROTATION = 15
SYN_STD_TRANSLATION = 0.01
ANGLE_MAX=45
pose_src = pose_tgt.copy()
num = pose_tgt.shape[0]
for i in range(num):
euler = mat2euler(pose_tgt[i, :3, :3])
euler += SYN_STD_ROTATION * np.random.randn(3) * np.pi / 180.0
pose_src[i, :3, :3] = euler2mat(euler[0], euler[1], euler[2])
pose_src[i, 0, 3] = pose_tgt[i, 0, 3]+ SYN_STD_TRANSLATION * np.random.randn(1)
pose_src[i, 1, 3] = pose_tgt[i, 1, 3] + SYN_STD_TRANSLATION * np.random.randn(1)
pose_src[i, 2, 3] = pose_tgt[i, 2, 3] + 5 * SYN_STD_TRANSLATION * np.random.randn(1)
r_dist = np.arccos((np.trace(pose_src[i, :3,:3].transpose(-1,-2) @ pose_tgt[i, :3,:3]) - 1 )/2)/np.pi*180
while r_dist > ANGLE_MAX:#or not (16 < center_x < (640 - 16) and 16 < center_y < (480 - 16)):
print("r_dist > ANGLE_MAX, resampling...")
euler = mat2euler(pose_tgt[i, :3, :3])
euler += SYN_STD_ROTATION * np.random.randn(3) * np.pi / 180.0
pose_src[i, :3, :3] = euler2mat(euler[0], euler[1], euler[2])
pose_src[i, 0, 3] = pose_tgt[i, 0, 3]+ SYN_STD_TRANSLATION * np.random.randn(1)
pose_src[i, 1, 3] = pose_tgt[i, 1, 3] + SYN_STD_TRANSLATION * np.random.randn(1)
pose_src[i, 2, 3] = pose_tgt[i, 2, 3] + 5 * SYN_STD_TRANSLATION * np.random.randn(1)
r_dist = np.arccos((np.trace(pose_src[i, :3,:3].transpose(-1,-2) @ pose_tgt[i, :3,:3]) - 1 )/2)*np.pi/180
return pose_src
def _render_init(self, config):
# from data.ycb.basic import bop_ycb_class2idx
print("config.gpu_id:", config.gpu_id)
obj_paths = []
tex_paths = []
# build cls2idx table for the renderer
cls2idx = {}
LM_SEQ=["ape", "benchvise", "camera","cam", "can", "cat", "driller", "duck", "eggbox", "glue", "holepuncher", "iron", "lamp", "phone"]
# YCB_SEQ=bop_ycb_class2idx.keys()
for i, seq in enumerate(set(config.obj_seqs)):
if seq in LM_SEQ:
obj_path = f'{os.path.dirname(__file__)}/../EXPDATA/LM6d_converted/models/{seq}/textured.obj'
tex_path = f'{os.path.dirname(__file__)}/../EXPDATA/LM6d_converted/models/{seq}/texture_map.png'
assert os.path.exists(obj_path), f"'{obj_path}' dose not exist!"
assert os.path.exists(tex_path), f"'{tex_path}' dose not exist!"
obj_paths.append(obj_path)
tex_paths.append(tex_path)
cls2idx[seq] = i
else:
raise NotImplementedError
renderer=None
diff_renderer = DiffRendererWrapper(obj_paths)
diff_renderer.cls2idx = cls2idx
return renderer, diff_renderer
def forward(self, sample):
assert len(set(sample['class_name']))==1, "A batch should contain data of the same class."
class_name = sample['class_name'][0]
#encode 3d-2d descriptors
preds_dict=self.hybrid_desc_net(sample)
len_src_f = sample['stack_lengths'][0][0]
geofea_3d = preds_dict.get('descriptors_3d', None)
geofea_2d = preds_dict.get('descriptors_2d', None)
#encode 3D context features
if self.training:
self.ctx_fea[class_name]=self.ctx_fea_net(sample)
else:
if class_name not in self.ctx_fea:
self.ctx_fea[class_name]=self.ctx_fea_net(sample)
preds_dict.update(self.ctx_fea[class_name])
ctx_fea_3d = preds_dict['ctx_fea_3d'][:len_src_f]
if self.training:
pose=pose_padding(sample['original_RT'])
if sample.get("rendered_RT", None) is not None:
syn_pose = pose_padding(sample['rendered_RT'])
else:
syn_pose = torch.tensor(self.sample_poses(sample['original_RT'].detach().cpu(
).numpy()), device=sample['original_RT'].device, dtype=sample['original_RT'].dtype)
syn_pose = pose_padding(syn_pose)
else:
pose = pose_padding(sample['original_RT'])
syn_pose = pose_padding(sample['rendered_RT'])
# calculate the GT relative pose and the initial pose
Ts_pred = SE3Sequence(
matrix=torch.stack([syn_pose ], dim=1))
mot_res = self.motion_net(
Ts=Ts_pred,
intrinsics=sample['K'],
image=sample['image'],
fea_3d=ctx_fea_3d[None],
Tj_gt=SE3Sequence(matrix=pose[:, None]),
obj_cls=sample['class_name'],
geofea_2d = geofea_2d,
geofea_3d = geofea_3d[None]
)
preds_dict.update(mot_res)
sample['syn_depth'] = mot_res['syn_depth']
if self.training:
ret = self.loss(sample, preds_dict)
ret['syn_img'] = mot_res['syn_img']
ret['syn_depth'] = mot_res['syn_depth']
ret['flow'] = mot_res['flow']
ret['weight'] = mot_res['weight']
return ret
else:
# ret = self.loss(sample, preds_dict)
ret={}
ret.update(preds_dict)
return ret
def loss(self, sample, preds_dict):
len_src_f = sample['stack_lengths'][0][0]
RT =sample['RT']
camera_intrinsic=sample['K']
descriptors_2d_map = preds_dict['descriptors_2d']
descriptors_3d = preds_dict['descriptors_3d'][:len_src_f]
rand_descriptors_3d = preds_dict['descriptors_3d'][len_src_f:]
model_points=sample['model_points'][0][:len_src_f]
orig_model_points = sample['original_model_points']
rand_model_points=sample['model_points'][0][len_src_f:]
lifted_points=sample['lifted_points'][0].squeeze(0)
correspondence=sample['correspondences_2d3d'].squeeze(0)
depth = sample['depth']
# get the foreground 2d descriptors
ys_, xs_=torch.nonzero(sample['depth'].squeeze(), as_tuple=True )
descriptors_2d=descriptors_2d_map[:,:,ys_,xs_].squeeze().permute([1,0])
if self.training:
device= lifted_points.device
fg_point_num = len(lifted_points)
model_point_num = len(model_points)
# append bg features
ys_bg, xs_bg = torch.nonzero(sample['depth'].squeeze()<=0, as_tuple=True )
descriptors_2d_bg = descriptors_2d_map[:,:,ys_bg, xs_bg].squeeze().permute([1,0])
# descriptors_2d_bg = descriptors_2d_bg[np.random.randint(0,len(descriptors_2d_bg), size=len(lifted_points) )]
descriptors_2d = torch.cat([descriptors_2d, descriptors_2d_bg], dim=0)
descriptors_3d = torch.cat([descriptors_3d, descriptors_2d_bg], dim=0 )
# append handcrafted coordinates to simplify the code(assign the same coordinates for the bg points far away from the fg points, i.e. 10e6 )
lifted_points = torch.cat([lifted_points, torch.ones([len(descriptors_2d_bg) ,3],device=device )*10e6 ] ) #append very distant points
model_points = torch.cat([model_points, torch.ones([len(descriptors_2d_bg) ,3], device=device)*10e6 ] ) #append very distant points
#append one-to-one inds
if len(descriptors_2d_bg)>0:
# randomly sample the bg points to balance the learning process
sample_inds=np.random.randint(0,len(descriptors_2d_bg), size=int(len(correspondence)*0.1) )
bg_corr= torch.stack([
torch.arange(fg_point_num, fg_point_num+len(descriptors_2d_bg), device=device )[sample_inds],
torch.arange(model_point_num, model_point_num+len(descriptors_2d_bg), device=device)[sample_inds]
], dim=-1) #Nx2
correspondence = torch.cat([correspondence, bg_corr ], dim=0)
if len(lifted_points)>0:
contra_loss=self.contrastive_loss(src_pcd=lifted_points, tgt_pcd=model_points,
src_feats=descriptors_2d, tgt_feats=descriptors_3d,
correspondence=correspondence,
scores_overlap=None, scores_saliency=None)
else:
print("Warning: Contrastive loss is skipped, as no lifted point is found!", flush=True)
contra_loss={
'circle_loss': torch.zeros([1], device=depth.device),
'recall': torch.zeros([1], device=depth.device)
}
loss3d = partial(self.pose_loss.forward,
points=orig_model_points[None])
motion_loss = self.motion_net.compute_loss(
preds_dict['Tij_gt'], sample['syn_depth'], intrinsics=sample['K'], loss='l1', log_error=True, loss3d=loss3d)
loss = self.contrastive_loss.weight * contra_loss['circle_loss'] + motion_loss['total_loss']
res = {
"loss": loss,
"circle_loss": contra_loss['circle_loss'].detach(),
"recall":contra_loss['recall'].detach(),
# "geometric_loss": torch.zeros(1),
"reproj_loss": motion_loss['reproj_loss'].detach(),
"loss_3d_proj": motion_loss['loss_3d_proj'].detach(),
"valid_mask": motion_loss["valid_mask"].detach(),
}
return res
================================================
FILE: model/descriptor2D.py
================================================
from easydict import EasyDict as edict
from pathlib import Path
import torch
from torch import nn
from torchplus.nn.modules.common import Empty
class SuperPoint2D(nn.Module):
"""SuperPoint Convolutional Detector and Descriptor
SuperPoint: Self-Supervised Interest Point Detection and
Description. Daniel DeTone, Tomasz Malisiewicz, and Andrew
Rabinovich. In CVPRW, 2019. https://arxiv.org/abs/1712.07629
"""
default_config = {
'descriptor_dim': 256,
'nms_radius': 4,
'keypoint_threshold': 0.005,
'max_keypoints': -1,
'remove_borders': 4,
'saliency_score_normalization_fuc': 'sigmoid',
"use_instance_norm": True
}
def __init__(self, config):
super().__init__()
self.default_config.update(config)
self.config=edict(self.default_config)
self.normalize_output=config.get('normalize_output', True)
self.saliency_score_normalization_fuc= self.config.saliency_score_normalization_fuc
assert self.saliency_score_normalization_fuc in ['sigmoid', 'softmax']
self.relu = nn.ReLU(inplace=True)
self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
if self.config.use_instance_norm:
self.Normalization=nn.InstanceNorm2d
else:
self.Normalization = Empty
c1, c2, c3, c4, c5 = 64, 64, 128, 128, 256
self.input_dim=config.input_dim
# self.conv1a = nn.Conv2d(1, c1, kernel_size=3, stride=1, padding=1)
self.conv1a = nn.Conv2d(config.input_dim, c1, kernel_size=3, stride=1, padding=1)
self.conv1b = nn.Conv2d(c1, c1, kernel_size=3, stride=1, padding=1)
self.conv2a = nn.Conv2d(c1, c2, kernel_size=3, stride=1, padding=1)
self.conv2b = nn.Conv2d(c2, c2, kernel_size=3, stride=1, padding=1)
self.conv3a = nn.Conv2d(c2, c3, kernel_size=3, stride=1, padding=1)
self.conv3b = nn.Conv2d(c3, c3, kernel_size=3, stride=1, padding=1)
self.conv4a = nn.Conv2d(c3, c4, kernel_size=3, stride=1, padding=1)
self.conv4b = nn.Conv2d(c4, c4, kernel_size=3, stride=1, padding=1)
self.convPa = nn.Sequential(
nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1),
self.Normalization(c5)
)
self.convPb = nn.Conv2d(c5, 1, kernel_size=1, stride=1, padding=0)
self.convDa = nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1)
self.convDb = nn.Conv2d(
c5, self.config['descriptor_dim'],
kernel_size=1, stride=1, padding=0)
self.decode1=nn.Sequential(
nn.Upsample(scale_factor=2,mode='bilinear'),
nn.Conv2d(c4,c4, kernel_size=3, stride=1, padding=1),
self.Normalization(c4),
nn.ReLU())
self.decode2=nn.Sequential(
nn.Upsample(scale_factor=2,mode='bilinear'),
nn.Conv2d(c4+c3,c4, kernel_size=3, stride=1, padding=1),
self.Normalization(c4),
nn.ReLU()
)
self.decode3=nn.Sequential(
nn.Upsample(scale_factor=2,mode='bilinear'),
nn.Conv2d(c4+c2,c4, kernel_size=3, stride=1, padding=1),
self.Normalization(c4),
nn.ReLU()
)
path = Path(__file__).parent.parent/ 'weights/superpoint_v1.pth'
self.load_state_dict(torch.load(str(path), map_location='cpu' ), strict=False)
mk = self.config['max_keypoints']
if mk == 0 or mk < -1:
raise ValueError('\"max_keypoints\" must be positive or \"-1\"')
print('Loaded SuperPoint model')
def load_state_dict(self,state_dict, strict=True):
if not strict:
updated_state_dict = {}
model_dict = self.state_dict()
for k, v in state_dict.items():
if k in model_dict and v.shape == model_dict[k].shape:
updated_state_dict[k] = v
else:
updated_state_dict = state_dict
super().load_state_dict(updated_state_dict, strict)
def forward_encoder(self, x):
if self.input_dim==1:
x=x.mean(dim=1, keepdims=True) #
x_skip=[]
# Shared Encoder
x = self.relu(self.conv1a(x))
x = self.relu(self.conv1b(x))
x_skip.append(x)
x = self.pool(x)
x = self.relu(self.conv2a(x))
x = self.relu(self.conv2b(x))
x_skip.append(x)
x = self.pool(x)
x = self.relu(self.conv3a(x))
x = self.relu(self.conv3b(x))
x_skip.append(x)
x = self.pool(x)
x = self.relu(self.conv4a(x))
x = self.relu(self.conv4b(x))
return x, x_skip
def forward_decoder(self, x, x_skip):
#upsample first
x = self.decode1(x)
x=torch.cat([x, x_skip[-1]], dim=1)
x = self.decode2(x)
x=torch.cat([x, x_skip[-2]], dim=1)
x = self.decode3(x)
# Compute the dense keypoint scores
cPa = self.relu(self.convPa(x))
scores = self.convPb(cPa)
if self.saliency_score_normalization_fuc == 'sigmoid':
scores = nn.functional.sigmoid(scores)
elif self.saliency_score_normalization_fuc == 'softmax':
scores_shape = scores.shape
scores = scores.reshape(*scores.shape[0:2], -1)
scores = nn.functional.softmax(scores/1, dim=-1)
scores= scores.reshape(*scores_shape)#.clone()
else:
raise ValueError
keypoints=None
# Compute the dense descriptors
cDa = self.relu(self.convDa(x))
descriptors = self.convDb(cDa)
if self.normalize_output:
descriptors = torch.nn.functional.normalize(descriptors, p=2, dim=1)
return keypoints, scores, descriptors
def forward(self, data):
""" Compute keypoints, scores, descriptors for image """
# Shared Encoder
x, x_skip=self.forward_encoder(data)
# Compute the dense keypoint scores
keypoints, scores, descriptors = self.forward_decoder(x, x_skip)
return {
'keypoints': keypoints,
'scores': scores,
'descriptors': descriptors,
}
================================================
FILE: model/descriptor3D.py
================================================
import torch
import torch.nn as nn
from kpconv.kpconv_blocks import *
import torch.nn.functional as F
import numpy as np
from kpconv.lib.utils import square_distance
class KPSuperpoint3Dv2(nn.Module):
#remove useless channels
def __init__(self, config):
super().__init__()
self.normalize_output=config.get('normalize_output', True)
# build the architectures
config.architecture = [
'simple',
'resnetb',
]
for i in range(config.num_layers-1):
config.architecture.append('resnetb_strided')
config.architecture.append('resnetb')
config.architecture.append('resnetb')
for i in range(config.num_layers-2):
config.architecture.append('nearest_upsample')
config.architecture.append('unary')
config.architecture.append('nearest_upsample')
config.architecture.append('last_unary')
############
# Parameters
############
# Current radius of convolution and feature dimension
layer = 0
r = config.first_subsampling_dl * config.conv_radius
in_dim = config.in_features_dim
out_dim = config.first_feats_dim
self.K = config.num_kernel_points
self.epsilon = torch.nn.Parameter(torch.tensor(-5.0))
self.final_feats_dim = config.final_feats_dim
#####################
# List Encoder blocks
#####################
# Save all block operations in a list of modules
self.encoder_blocks = nn.ModuleList()
self.encoder_skip_dims = []
self.encoder_skips = []
# Loop over consecutive blocks
for block_i, block in enumerate(config.architecture):
# Check equivariance
if ('equivariant' in block) and (not out_dim % 3 == 0):
raise ValueError('Equivariant block but features dimension is not a factor of 3')
# Detect change to next layer for skip connection
if np.any([tmp in block for tmp in ['pool', 'strided', 'upsample', 'global']]):
self.encoder_skips.append(block_i)
self.encoder_skip_dims.append(in_dim)
# Detect upsampling block to stop
if 'upsample' in block:
break
# Apply the good block function defining tf ops
self.encoder_blocks.append(block_decider(block,
r,
in_dim,
out_dim,
layer,
config))
# Update dimension of input from output
if 'simple' in block:
in_dim = out_dim // 2
else:
in_dim = out_dim
# Detect change to a subsampled layer
if 'pool' in block or 'strided' in block:
# Update radius and feature dimension for next layer
layer += 1
r *= 2
out_dim *= 2
#####################
# bottleneck layer
#####################
botneck_feats_dim = config.gnn_feats_dim
self.bottle = nn.Conv1d(in_dim, botneck_feats_dim,kernel_size=1,bias=True)
# num_head = config.num_head
self.proj_gnn = nn.Conv1d(botneck_feats_dim,botneck_feats_dim,kernel_size=1, bias=True)
#####################
# List Decoder blocks
#####################
out_dim = botneck_feats_dim # + 2
# Save all block operations in a list of modules
self.decoder_blocks = nn.ModuleList()
self.decoder_concats = []
# Find first upsampling block
start_i = 0
for block_i, block in enumerate(config.architecture):
if 'upsample' in block:
start_i = block_i
break
# Loop over consecutive blocks
for block_i, block in enumerate(config.architecture[start_i:]):
# Add dimension of skip connection concat
if block_i > 0 and 'upsample' in config.architecture[start_i + block_i - 1]:
in_dim += self.encoder_skip_dims[layer]
self.decoder_concats.append(block_i)
# Apply the good block function defining tf ops
self.decoder_blocks.append(block_decider(block,
r,
in_dim,
out_dim,
layer,
config))
# Update dimension of input from output
in_dim = out_dim
# Detect change to a subsampled layer
if 'upsample' in block:
# Update radius and feature dimension for next layer
layer -= 1
r *= 0.5
out_dim = out_dim // 2
return
def regular_score(self,score):
score = torch.where(torch.isnan(score), torch.zeros_like(score), score)
score = torch.where(torch.isinf(score), torch.zeros_like(score), score)
return score
def forward_encoder(self, batch):
# Get input features
x = batch['features'].clone().detach()
len_src_c = batch['stack_lengths'][-1][0]
len_src_f = batch['stack_lengths'][0][0]
pcd_c = batch['points'][-1]
pcd_f = batch['points'][0]
src_pcd_c, tgt_pcd_c = pcd_c[:len_src_c], pcd_c[len_src_c:]
sigmoid = nn.Sigmoid()
#################################
# 1. encoder
skip_x = []
for block_i, block_op in enumerate(self.encoder_blocks):
if block_i in self.encoder_skips:
skip_x.append(x)
x = block_op(x, batch)
#################################
# 2. project the bottleneck features
feats_c = x.transpose(0,1).unsqueeze(0) #[1, C, N]
feats_c = self.bottle(feats_c) #[1, C, N]
return feats_c,skip_x
def forward_middle(self, x):
feats_c = self.proj_gnn(x)
feats_gnn_raw = feats_c.squeeze(0).transpose(0,1)
return feats_gnn_raw
def forward_decoder(self, x, skip_x, batch ):
sigmoid = nn.Sigmoid()
for block_i, block_op in enumerate(self.decoder_blocks):
if block_i in self.decoder_concats:
x = torch.cat([x, skip_x.pop()], dim=1)
x = block_op(x, batch)
feats_f = x[:,:self.final_feats_dim]
# normalise point-wise features
if self.normalize_output:
feats_f = F.normalize(feats_f, p=2, dim=1)
return feats_f
def forward(self, batch):
# Get input features
feats_c,skip_x = self.forward_encoder(batch)
x = self.forward_middle(feats_c)
feats_f = self.forward_decoder(x, skip_x, batch)
return feats_f
================================================
FILE: model/losses.py
================================================
from sklearn.metrics import precision_recall_fscore_support
from thirdparty.kpconv.lib.utils import square_distance
from abc import ABCMeta, abstractmethod
import time
import numpy as np
import torch
from torch import nn
from torch.autograd import Variable
from torch.nn import functional as F
import torchplus
# from utils.pca import pca_tch
import kornia
# import self_voxelo.utils.pose_utils as pose_utils
import apex.amp as amp
# class Loss(object):
class Loss(nn.Module):
"""Abstract base class for loss functions."""
__metaclass__ = ABCMeta
def __init__(self, loss_weight=1):
super(Loss, self).__init__()
self._loss_weight = loss_weight
# def __call__(self,
def forward(self,
prediction_tensor,
target_tensor,
ignore_nan_targets=False,
scope=None,
**params):
"""Call the loss function.
Args:
prediction_tensor: an N-d tensor of shape [batch, anchors, ...]
representing predicted quantities.
target_tensor: an N-d tensor of shape [batch, anchors, ...] representing
regression or classification targets.
ignore_nan_targets: whether to ignore nan targets in the loss computation.
E.g. can be used if the target tensor is missing groundtruth data that
shouldn't be factored into the loss.
scope: Op scope name. Defaults to 'Loss' if None.
**params: Additional keyword arguments for specific implementations of
the Loss.
Returns:
loss: a tensor representing the value of the loss function.
"""
if ignore_nan_targets:
target_tensor = torch.where(torch.isnan(target_tensor),
prediction_tensor,
target_tensor)
# ret = self._compute_loss(prediction_tensor, target_tensor, **params)
# if isinstance(ret, (list, tuple)):
# return [self._loss_weight*ret[0]] + list(ret[1:])
# else:
return self._loss_weight*self._compute_loss(prediction_tensor, target_tensor, **params)
@abstractmethod
@amp.float_function
def _compute_loss(self, prediction_tensor, target_tensor, **params):
"""Method to be overridden by implementations.
Args:
prediction_tensor: a tensor representing predicted quantities
target_tensor: a tensor representing regression or classification targets
**params: Additional keyword arguments for specific implementations of
the Loss.
Returns:
loss: an N-d tensor of shape [batch, anchors, ...] containing the loss per
anchor
"""
raise NotImplementedError
class L2Loss(Loss):
def __init__(self, loss_weight=1):
super(L2Loss, self).__init__(loss_weight)
def _compute_loss(self, prediction_tensor, target_tensor, mask=None):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
code_size] representing the (encoded) predicted locations of objects.
target_tensor: A float tensor of shape [batch_size, num_anchors,
code_size] representing the regression targets
Returns:
loss: a float tensor of shape [batch_size, num_anchors] tensor
representing the value of the loss function.
"""
diff = prediction_tensor - target_tensor
if mask is not None:
mask = mask.expand_as(diff).byte()
diff = diff[mask]
# square_diff = 0.5 * weighted_diff * weighted_diff
square_diff = diff * diff
return square_diff.mean()
class AdaptiveWeightedL2Loss(Loss):
def __init__(self, init_alpha, learn_alpha=True, loss_weight=1, focal_gamma=0):
super(AdaptiveWeightedL2Loss, self).__init__(loss_weight)
self.learn_alpha = learn_alpha
self.alpha = nn.Parameter(torch.Tensor(
[init_alpha]), requires_grad=learn_alpha)
self.focal_gamma = focal_gamma
# self.alpha_shift = -13 # -10# TODO: temporarily test
def _compute_loss(self, prediction_tensor, target_tensor, mask=None, alpha=None, focal_gamma=None):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
code_size] representing the (encoded) predicted locations of objects.
target_tensor: A float tensor of shape [batch_size, num_anchors,
code_size] representing the regression targets
Returns:
loss: a float tensor of shape [batch_size, num_anchors] tensor
representing the value of the loss function.
"""
if focal_gamma is None:
focal_gamma = self.focal_gamma
_alpha = self.alpha
if mask is None:
mask = torch.ones_like(target_tensor)
else:
mask = mask.expand_as(target_tensor)
diff = prediction_tensor - target_tensor
square_diff = (diff * diff) * mask
# loss = square_diff.mean()
input_shape = prediction_tensor.shape
loss = torch.sum(square_diff, dim=list(range(1, len(input_shape)))) / \
(torch.sum(mask, dim=list(range(1, len(input_shape)))) + 1e-12) # (B,)
focal_weight = (torch.exp(-_alpha) * loss)**focal_gamma
focal_weight = focal_weight/(torch.sum(focal_weight) + 1e-12)
loss = focal_weight*(torch.exp(-_alpha) * loss)
loss = loss.sum() + _alpha
return loss
class MetricLoss(nn.Module):
"""
We evaluate both contrastive loss and circle loss
"""
def __init__(self, configs, log_scale=16, pos_optimal=0.1, neg_optimal=1.4, ):
super(MetricLoss, self).__init__()
self.log_scale = log_scale
self.pos_optimal = pos_optimal
self.neg_optimal = neg_optimal
self.pos_margin = configs.pos_margin
self.neg_margin = configs.neg_margin
self.max_points = configs.max_points
self.safe_radius = configs.safe_radius
self.matchability_radius = configs.matchability_radius
# just to take care of the numeric precision
self.pos_radius = configs.pos_radius + 0.001
self.weight = configs.get('loss_weight', 1)
def get_circle_loss(self, coords_dist, feats_dist):
"""
Modified from: https://github.com/XuyangBai/D3Feat.pytorch
"""
pos_mask = coords_dist < self.pos_radius
neg_mask = coords_dist > self.safe_radius
# get anchors that have both positive and negative pairs
row_sel = ((pos_mask.sum(-1) > 0) * (neg_mask.sum(-1) > 0)).detach()
col_sel = ((pos_mask.sum(-2) > 0) * (neg_mask.sum(-2) > 0)).detach()
# get alpha for both positive and negative pairs
pos_weight = feats_dist - 1e5 * \
(~pos_mask).float() # mask the non-positive
# mask the uninformative positive
pos_weight = (pos_weight - self.pos_optimal)
pos_weight = torch.max(torch.zeros_like(
pos_weight), pos_weight).detach()
neg_weight = feats_dist + 1e5 * \
(~neg_mask).float() # mask the non-negative
# mask the uninformative negative
neg_weight = (self.neg_optimal - neg_weight)
neg_weight = torch.max(torch.zeros_like(
neg_weight), neg_weight).detach()
lse_pos_row = torch.logsumexp(
self.log_scale * (feats_dist - self.pos_margin) * pos_weight, dim=-1)
lse_pos_col = torch.logsumexp(
self.log_scale * (feats_dist - self.pos_margin) * pos_weight, dim=-2)
lse_neg_row = torch.logsumexp(
self.log_scale * (self.neg_margin - feats_dist) * neg_weight, dim=-1)
lse_neg_col = torch.logsumexp(
self.log_scale * (self.neg_margin - feats_dist) * neg_weight, dim=-2)
loss_row = F.softplus(lse_pos_row + lse_neg_row)/self.log_scale
loss_col = F.softplus(lse_pos_col + lse_neg_col)/self.log_scale
circle_loss = (loss_row[row_sel].mean() + loss_col[col_sel].mean()) / 2
return circle_loss
def get_recall(self, coords_dist, feats_dist):
"""
Get feature match recall, divided by number of true inliers
"""
pos_mask = coords_dist < self.pos_radius
n_gt_pos = (pos_mask.sum(-1) > 0).float().sum()+1e-12
_, sel_idx = torch.min(feats_dist, -1)
sel_dist = torch.gather(coords_dist, dim=-1,
index=sel_idx[:, None])[pos_mask.sum(-1) > 0]
n_pred_pos = (sel_dist < self.pos_radius).float().sum()
recall = n_pred_pos / n_gt_pos
return recall
def get_weighted_bce_loss(self, prediction, gt):
loss = nn.BCELoss(reduction='none')
class_loss = loss(prediction, gt)
weights = torch.ones_like(gt)
w_negative = gt.sum()/gt.size(0)
w_positive = 1 - w_negative
weights[gt >= 0.5] = w_positive
weights[gt < 0.5] = w_negative
w_class_loss = torch.mean(weights * class_loss)
#######################################
# get classification precision and recall
predicted_labels = prediction.detach().cpu().round().numpy()
cls_precision, cls_recall, _, _ = precision_recall_fscore_support(
gt.cpu().numpy(), predicted_labels, average='binary')
return w_class_loss, cls_precision, cls_recall
def forward(self, src_pcd, tgt_pcd, src_feats, tgt_feats, correspondence, scores_overlap, scores_saliency, rot=None, trans=None):
"""
Circle loss for metric learning, here we feed the positive pairs only
Input:
src_pcd: [N, 3], pcd of the 3d model
tgt_pcd: [M, 3], pcd of the lifted model from 2d depth
rot: [3, 3], rotation used to rotate the src_pcd to the current frame
trans: [3, 1], translation used to translate the src_pcd to the current frame
src_feats: [N, C]
tgt_feats: [M, C]
"""
if rot is not None and trans is not None:
src_pcd = (torch.matmul(rot, src_pcd.transpose(0, 1)) +
trans).transpose(0, 1)
stats = dict()
#######################################
# filter some of correspondence
if(correspondence.size(0) > self.max_points):
choice = np.random.permutation(correspondence.size(0))[
:self.max_points]
correspondence = correspondence[choice]
src_idx = correspondence[:, 0]
tgt_idx = correspondence[:, 1]
src_pcd, tgt_pcd = src_pcd[src_idx], tgt_pcd[tgt_idx]
src_feats, tgt_feats = src_feats[src_idx], tgt_feats[tgt_idx]
#######################
# get L2 distance between source / target point cloud
coords_dist = torch.sqrt(square_distance(
src_pcd[None, :, :], tgt_pcd[None, :, :]).squeeze(0))
feats_dist = torch.sqrt(square_distance(
src_feats[None, :, :], tgt_feats[None, :, :], normalised=True)).squeeze(0)
##############################
# get FMR and circle loss
##############################
recall = self.get_recall(coords_dist, feats_dist)
circle_loss = self.get_circle_loss(coords_dist, feats_dist)
stats['circle_loss'] = circle_loss
stats['recall'] = recall
return stats
class PointAlignmentLoss(nn.Module):
def __init__(self, loss_weight=1, ):
super().__init__()
self._loss_weight = loss_weight
def forward(self, R_pred, t_pred, R_tgt, t_tgt, points):
return self._loss_weight*self._compute_loss(R_pred, t_pred, R_tgt, t_tgt, points)
def _compute_loss(self, R_pred, t_pred, R_tgt, t_tgt, points, ):
"""[summary]
Args:
R_pred ([type]): [Bx3x3]
t_pred ([type]): [Bx3]
R_tgt ([type]): [Bx3x3]
t_tgt ([type]): [Bx3]
points ([type]): [BxNx3]
Returns:
loss [type]: [loss value]
"""
loss = 0
for b in range(len(points)):
diff = points[b]@R_pred[b].transpose(-1, -2) + t_pred[b] - (
points[b]@R_tgt[b].transpose(-1, -2)+t_tgt[b])
square_diff = diff.abs()
loss += torch.mean(square_diff)*3
# loss/=len(points)
return loss
================================================
FILE: scripts/compile_3rdparty.sh
================================================
#!/usr/bin/bash
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
cd $SCRIPT_DIR/../thirdparty/kpconv/cpp_wrappers
bash compile_wrappers.sh
cd $SCRIPT_DIR/../thirdparty/nn
python setup.py build_ext --inplace
================================================
FILE: scripts/eval.sh
================================================
export PROJECT_ROOT_PATH=/home/RNNPose/Projects/Works/RNNPose_release
export PYTHONPATH="$PROJECT_ROOT_PATH:$PYTHONPATH"
export PYTHONPATH="$PROJECT_ROOT_PATH/thirdparty:$PYTHONPATH"
export model_dir='outputs'
seq=cat
gpu=1
start_gpu_id=0
mkdir $model_dir
train_file=$PROJECT_ROOT_PATH/tools/eval.py
config_path=/$PROJECT_ROOT_PATH/config/linemod/"$seq"_fw0.5.yml
pretrain=$PROJECT_ROOT_PATH/weights/trained_models/"$seq".tckpt
python -u $train_file multi_proc_train \
--config_path $config_path \
--model_dir $model_dir/results \
--use_dist True \
--dist_port 10000 \
--gpus_per_node $gpu \
--optim_eval True \
--use_apex False \
--world_size $gpu \
--start_gpu_id $start_gpu_id \
--pretrained_path $pretrain
================================================
FILE: scripts/eval_lmocc.sh
================================================
export PROJECT_ROOT_PATH=/home/RNNPose/Projects/Works/RNNPose_release
export PYTHONPATH="$PROJECT_ROOT_PATH:$PYTHONPATH"
export PYTHONPATH="$PROJECT_ROOT_PATH/thirdparty:$PYTHONPATH"
export model_dir='outputs'
seq=ape
gpu=1
start_gpu_id=0
mkdir $model_dir
train_file=$PROJECT_ROOT_PATH/tools/eval.py
config_path=/$PROJECT_ROOT_PATH/config/linemod/"$seq"_fw0.5_occ.yml
pretrain=$PROJECT_ROOT_PATH/weights/trained_models/"$seq".tckpt
python -u $train_file multi_proc_train \
--config_path $config_path \
--model_dir $model_dir/results \
--use_dist True \
--dist_port 10000 \
--gpus_per_node $gpu \
--optim_eval True \
--use_apex False \
--world_size $gpu \
--start_gpu_id $start_gpu_id \
--pretrained_path $pretrain
# --use_apex True \
================================================
FILE: scripts/run_dataformatter.sh
================================================
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
PROJ_ROOT=$SCRIPT_DIR/../
python $PROJ_ROOT/tools/transform_data_format.py run --data_type "LM_FUSE_PVNET" --data_info_path $PROJ_ROOT/EXPDATA/"/data_info/linemod_all_10k_default.info.all" --image_root $PROJ_ROOT/EXPDATA/raw_data/fuse --depth_root $PROJ_ROOT/EXPDATA/raw_data/orig_renders --save_dir $PROJ_ROOT/EXPDATA/LINEMOD/fuse_formatted && touch 3.finished
================================================
FILE: scripts/run_datainfo_generation.sh
================================================
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
PROJ_ROOT=$SCRIPT_DIR/../
export PYTHONPATH=$PROJ_ROOT:PYTHONPATH
mkdir --parent $PROJ_ROOT/EXPDATA/data_info/deepim/
python $PROJ_ROOT/tools/generate_data_info_deepim_0_orig.py create_data_info --data_root $PROJ_ROOT/EXPDATA/LM6d_converted/LM6d_refine --saving_path $PROJ_ROOT/EXPDATA/data_info/deepim/linemod_orig_deepim.info
python $PROJ_ROOT/tools/generate_data_info_deepim_1_syn.py create_data_info --data_root $PROJ_ROOT/EXPDATA/LM6d_converted/LM6d_refine_syn --saving_path $PROJ_ROOT/EXPDATA/data_info/deepim/linemod_syn_deepim.info --with_assertion True
python $PROJ_ROOT/tools/generate_data_info_deepim_2_posecnnval.py create_data_info --data_root $PROJ_ROOT/EXPDATA/LM6d_converted/LM6d_refine --saving_path $PROJ_ROOT/EXPDATA/data_info/linemod_posecnn.info --with_assertion True
python $PROJ_ROOT/tools/generate_data_info_v2_deepim.py create_data_info --data_root $PROJ_ROOT/EXPDATA/LINEMOD/fuse_formatted/ --saving_path $PROJ_ROOT/EXPDATA/data_info/linemod_fusesformatted_all10k_deepim.info --training_data_ratio 1 --shuffle=False
================================================
FILE: scripts/train.sh
================================================
export PROJECT_ROOT_PATH=/home/RNNPose/Projects/Works/RNNPose_release
export PYTHONPATH="$PROJECT_ROOT_PATH:$PYTHONPATH"
export PYTHONPATH="$PROJECT_ROOT_PATH/thirdparty:$PYTHONPATH"
export model_dir='outputs'
seq=cat
gpu=1
start_gpu_id=0
mkdir $model_dir
train_file=$PROJECT_ROOT_PATH/tools/train.py
config_path=/$PROJECT_ROOT_PATH/config/linemod/"$seq"_fw0.5.yml
# pretrain=$PROJECT_ROOT_PATH/weights/trained_models/"$seq".tckpt
python -u $train_file multi_proc_train \
--config_path $config_path \
--model_dir $model_dir/results \
--use_dist True \
--dist_port 10000 \
--gpus_per_node $gpu \
--optim_eval True \
--use_apex False \
--world_size $gpu \
--start_gpu_id $start_gpu_id \
# --pretrained_path $pretrain
================================================
FILE: thirdparty/kpconv/__init__.py
================================================
================================================
FILE: thirdparty/kpconv/cpp_wrappers/compile_wrappers.sh
================================================
#!/bin/bash
# Compile cpp subsampling
cd cpp_subsampling
python3 setup.py build_ext --inplace
cd ..
# Compile cpp neighbors
cd cpp_neighbors
python3 setup.py build_ext --inplace
cd ..
================================================
FILE: thirdparty/kpconv/cpp_wrappers/cpp_neighbors/build.bat
================================================
@echo off
py setup.py build_ext --inplace
pause
================================================
FILE: thirdparty/kpconv/cpp_wrappers/cpp_neighbors/neighbors/neighbors.cpp
================================================
#include "neighbors.h"
void brute_neighbors(vector& queries, vector& supports, vector& neighbors_indices, float radius, int verbose)
{
// Initialize variables
// ******************
// square radius
float r2 = radius * radius;
// indices
int i0 = 0;
// Counting vector
int max_count = 0;
vector> tmp(queries.size());
// Search neigbors indices
// ***********************
for (auto& p0 : queries)
{
int i = 0;
for (auto& p : supports)
{
if ((p0 - p).sq_norm() < r2)
{
tmp[i0].push_back(i);
if (tmp[i0].size() > max_count)
max_count = tmp[i0].size();
}
i++;
}
i0++;
}
// Reserve the memory
neighbors_indices.resize(queries.size() * max_count);
i0 = 0;
for (auto& inds : tmp)
{
for (int j = 0; j < max_count; j++)
{
if (j < inds.size())
neighbors_indices[i0 * max_count + j] = inds[j];
else
neighbors_indices[i0 * max_count + j] = -1;
}
i0++;
}
return;
}
void ordered_neighbors(vector& queries,
vector& supports,
vector& neighbors_indices,
float radius)
{
// Initialize variables
// ******************
// square radius
float r2 = radius * radius;
// indices
int i0 = 0;
// Counting vector
int max_count = 0;
float d2;
vector> tmp(queries.size());
vector> dists(queries.size());
// Search neigbors indices
// ***********************
for (auto& p0 : queries)
{
int i = 0;
for (auto& p : supports)
{
d2 = (p0 - p).sq_norm();
if (d2 < r2)
{
// Find order of the new point
auto it = std::upper_bound(dists[i0].begin(), dists[i0].end(), d2);
int index = std::distance(dists[i0].begin(), it);
// Insert element
dists[i0].insert(it, d2);
tmp[i0].insert(tmp[i0].begin() + index, i);
// Update max count
if (tmp[i0].size() > max_count)
max_count = tmp[i0].size();
}
i++;
}
i0++;
}
// Reserve the memory
neighbors_indices.resize(queries.size() * max_count);
i0 = 0;
for (auto& inds : tmp)
{
for (int j = 0; j < max_count; j++)
{
if (j < inds.size())
neighbors_indices[i0 * max_count + j] = inds[j];
else
neighbors_indices[i0 * max_count + j] = -1;
}
i0++;
}
return;
}
void batch_ordered_neighbors(vector& queries,
vector& supports,
vector& q_batches,
vector& s_batches,
vector& neighbors_indices,
float radius)
{
// Initialize variables
// ******************
// square radius
float r2 = radius * radius;
// indices
int i0 = 0;
// Counting vector
int max_count = 0;
float d2;
vector> tmp(queries.size());
vector> dists(queries.size());
// batch index
int b = 0;
int sum_qb = 0;
int sum_sb = 0;
// Search neigbors indices
// ***********************
for (auto& p0 : queries)
{
// Check if we changed batch
if (i0 == sum_qb + q_batches[b])
{
sum_qb += q_batches[b];
sum_sb += s_batches[b];
b++;
}
// Loop only over the supports of current batch
vector::iterator p_it;
int i = 0;
for(p_it = supports.begin() + sum_sb; p_it < supports.begin() + sum_sb + s_batches[b]; p_it++ )
{
d2 = (p0 - *p_it).sq_norm();
if (d2 < r2)
{
// Find order of the new point
auto it = std::upper_bound(dists[i0].begin(), dists[i0].end(), d2);
int index = std::distance(dists[i0].begin(), it);
// Insert element
dists[i0].insert(it, d2);
tmp[i0].insert(tmp[i0].begin() + index, sum_sb + i);
// Update max count
if (tmp[i0].size() > max_count)
max_count = tmp[i0].size();
}
i++;
}
i0++;
}
// Reserve the memory
neighbors_indices.resize(queries.size() * max_count);
i0 = 0;
for (auto& inds : tmp)
{
for (int j = 0; j < max_count; j++)
{
if (j < inds.size())
neighbors_indices[i0 * max_count + j] = inds[j];
else
neighbors_indices[i0 * max_count + j] = supports.size();
}
i0++;
}
return;
}
void batch_nanoflann_neighbors(vector& queries,
vector& supports,
vector& q_batches,
vector& s_batches,
vector& neighbors_indices,
float radius)
{
// Initialize variables
// ******************
// indices
int i0 = 0;
// Square radius
float r2 = radius * radius;
// Counting vector
int max_count = 0;
float d2;
vector>> all_inds_dists(queries.size());
// batch index
int b = 0;
int sum_qb = 0;
int sum_sb = 0;
// Nanoflann related variables
// ***************************
// CLoud variable
PointCloud current_cloud;
// Tree parameters
nanoflann::KDTreeSingleIndexAdaptorParams tree_params(10 /* max leaf */);
// KDTree type definition
typedef nanoflann::KDTreeSingleIndexAdaptor< nanoflann::L2_Simple_Adaptor ,
PointCloud,
3 > my_kd_tree_t;
// Pointer to trees
my_kd_tree_t* index;
// Build KDTree for the first batch element
current_cloud.pts = vector(supports.begin() + sum_sb, supports.begin() + sum_sb + s_batches[b]);
index = new my_kd_tree_t(3, current_cloud, tree_params);
index->buildIndex();
// Search neigbors indices
// ***********************
// Search params
nanoflann::SearchParams search_params;
search_params.sorted = true;
for (auto& p0 : queries)
{
// Check if we changed batch
if (i0 == sum_qb + q_batches[b])
{
sum_qb += q_batches[b];
sum_sb += s_batches[b];
b++;
// Change the points
current_cloud.pts.clear();
current_cloud.pts = vector(supports.begin() + sum_sb, supports.begin() + sum_sb + s_batches[b]);
// Build KDTree of the current element of the batch
delete index;
index = new my_kd_tree_t(3, current_cloud, tree_params);
index->buildIndex();
}
// Initial guess of neighbors size
all_inds_dists[i0].reserve(max_count);
// Find neighbors
float query_pt[3] = { p0.x, p0.y, p0.z};
size_t nMatches = index->radiusSearch(query_pt, r2, all_inds_dists[i0], search_params);
// Update max count
if (nMatches > max_count)
max_count = nMatches;
// Increment query idx
i0++;
}
// Reserve the memory
neighbors_indices.resize(queries.size() * max_count);
i0 = 0;
sum_sb = 0;
sum_qb = 0;
b = 0;
for (auto& inds_dists : all_inds_dists)
{
// Check if we changed batch
if (i0 == sum_qb + q_batches[b])
{
sum_qb += q_batches[b];
sum_sb += s_batches[b];
b++;
}
for (int j = 0; j < max_count; j++)
{
if (j < inds_dists.size())
neighbors_indices[i0 * max_count + j] = inds_dists[j].first + sum_sb;
else
neighbors_indices[i0 * max_count + j] = supports.size();
}
i0++;
}
delete index;
return;
}
================================================
FILE: thirdparty/kpconv/cpp_wrappers/cpp_neighbors/neighbors/neighbors.h
================================================
#include "../../cpp_utils/cloud/cloud.h"
#include "../../cpp_utils/nanoflann/nanoflann.hpp"
#include
#include
using namespace std;
void ordered_neighbors(vector& queries,
vector& supports,
vector& neighbors_indices,
float radius);
void batch_ordered_neighbors(vector& queries,
vector& supports,
vector& q_batches,
vector& s_batches,
vector& neighbors_indices,
float radius);
void batch_nanoflann_neighbors(vector& queries,
vector& supports,
vector& q_batches,
vector& s_batches,
vector& neighbors_indices,
float radius);
================================================
FILE: thirdparty/kpconv/cpp_wrappers/cpp_neighbors/setup.py
================================================
from distutils.core import setup, Extension
import numpy.distutils.misc_util
# Adding OpenCV to project
# ************************
# Adding sources of the project
# *****************************
SOURCES = ["../cpp_utils/cloud/cloud.cpp",
"neighbors/neighbors.cpp",
"wrapper.cpp"]
module = Extension(name="radius_neighbors",
sources=SOURCES,
extra_compile_args=['-std=c++11',
'-D_GLIBCXX_USE_CXX11_ABI=0'])
setup(ext_modules=[module], include_dirs=numpy.distutils.misc_util.get_numpy_include_dirs())
================================================
FILE: thirdparty/kpconv/cpp_wrappers/cpp_neighbors/wrapper.cpp
================================================
#include
#include
#include "neighbors/neighbors.h"
#include
// docstrings for our module
// *************************
static char module_docstring[] = "This module provides two methods to compute radius neighbors from pointclouds or batch of pointclouds";
static char batch_query_docstring[] = "Method to get radius neighbors in a batch of stacked pointclouds";
// Declare the functions
// *********************
static PyObject *batch_neighbors(PyObject *self, PyObject *args, PyObject *keywds);
// Specify the members of the module
// *********************************
static PyMethodDef module_methods[] =
{
{ "batch_query", (PyCFunction)batch_neighbors, METH_VARARGS | METH_KEYWORDS, batch_query_docstring },
{NULL, NULL, 0, NULL}
};
// Initialize the module
// *********************
static struct PyModuleDef moduledef =
{
PyModuleDef_HEAD_INIT,
"radius_neighbors", // m_name
module_docstring, // m_doc
-1, // m_size
module_methods, // m_methods
NULL, // m_reload
NULL, // m_traverse
NULL, // m_clear
NULL, // m_free
};
PyMODINIT_FUNC PyInit_radius_neighbors(void)
{
import_array();
return PyModule_Create(&moduledef);
}
// Definition of the batch_subsample method
// **********************************
static PyObject* batch_neighbors(PyObject* self, PyObject* args, PyObject* keywds)
{
// Manage inputs
// *************
// Args containers
PyObject* queries_obj = NULL;
PyObject* supports_obj = NULL;
PyObject* q_batches_obj = NULL;
PyObject* s_batches_obj = NULL;
// Keywords containers
static char* kwlist[] = { "queries", "supports", "q_batches", "s_batches", "radius", NULL };
float radius = 0.1;
// Parse the input
if (!PyArg_ParseTupleAndKeywords(args, keywds, "OOOO|$f", kwlist, &queries_obj, &supports_obj, &q_batches_obj, &s_batches_obj, &radius))
{
PyErr_SetString(PyExc_RuntimeError, "Error parsing arguments");
return NULL;
}
// Interpret the input objects as numpy arrays.
PyObject* queries_array = PyArray_FROM_OTF(queries_obj, NPY_FLOAT, NPY_IN_ARRAY);
PyObject* supports_array = PyArray_FROM_OTF(supports_obj, NPY_FLOAT, NPY_IN_ARRAY);
PyObject* q_batches_array = PyArray_FROM_OTF(q_batches_obj, NPY_INT, NPY_IN_ARRAY);
PyObject* s_batches_array = PyArray_FROM_OTF(s_batches_obj, NPY_INT, NPY_IN_ARRAY);
// Verify data was load correctly.
if (queries_array == NULL)
{
Py_XDECREF(queries_array);
Py_XDECREF(supports_array);
Py_XDECREF(q_batches_array);
Py_XDECREF(s_batches_array);
PyErr_SetString(PyExc_RuntimeError, "Error converting query points to numpy arrays of type float32");
return NULL;
}
if (supports_array == NULL)
{
Py_XDECREF(queries_array);
Py_XDECREF(supports_array);
Py_XDECREF(q_batches_array);
Py_XDECREF(s_batches_array);
PyErr_SetString(PyExc_RuntimeError, "Error converting support points to numpy arrays of type float32");
return NULL;
}
if (q_batches_array == NULL)
{
Py_XDECREF(queries_array);
Py_XDECREF(supports_array);
Py_XDECREF(q_batches_array);
Py_XDECREF(s_batches_array);
PyErr_SetString(PyExc_RuntimeError, "Error converting query batches to numpy arrays of type int32");
return NULL;
}
if (s_batches_array == NULL)
{
Py_XDECREF(queries_array);
Py_XDECREF(supports_array);
Py_XDECREF(q_batches_array);
Py_XDECREF(s_batches_array);
PyErr_SetString(PyExc_RuntimeError, "Error converting support batches to numpy arrays of type int32");
return NULL;
}
// Check that the input array respect the dims
if ((int)PyArray_NDIM(queries_array) != 2 || (int)PyArray_DIM(queries_array, 1) != 3)
{
Py_XDECREF(queries_array);
Py_XDECREF(supports_array);
Py_XDECREF(q_batches_array);
Py_XDECREF(s_batches_array);
PyErr_SetString(PyExc_RuntimeError, "Wrong dimensions : query.shape is not (N, 3)");
return NULL;
}
if ((int)PyArray_NDIM(supports_array) != 2 || (int)PyArray_DIM(supports_array, 1) != 3)
{
Py_XDECREF(queries_array);
Py_XDECREF(supports_array);
Py_XDECREF(q_batches_array);
Py_XDECREF(s_batches_array);
PyErr_SetString(PyExc_RuntimeError, "Wrong dimensions : support.shape is not (N, 3)");
return NULL;
}
if ((int)PyArray_NDIM(q_batches_array) > 1)
{
Py_XDECREF(queries_array);
Py_XDECREF(supports_array);
Py_XDECREF(q_batches_array);
Py_XDECREF(s_batches_array);
PyErr_SetString(PyExc_RuntimeError, "Wrong dimensions : queries_batches.shape is not (B,) ");
return NULL;
}
if ((int)PyArray_NDIM(s_batches_array) > 1)
{
Py_XDECREF(queries_array);
Py_XDECREF(supports_array);
Py_XDECREF(q_batches_array);
Py_XDECREF(s_batches_array);
PyErr_SetString(PyExc_RuntimeError, "Wrong dimensions : supports_batches.shape is not (B,) ");
return NULL;
}
if ((int)PyArray_DIM(q_batches_array, 0) != (int)PyArray_DIM(s_batches_array, 0))
{
Py_XDECREF(queries_array);
Py_XDECREF(supports_array);
Py_XDECREF(q_batches_array);
Py_XDECREF(s_batches_array);
PyErr_SetString(PyExc_RuntimeError, "Wrong number of batch elements: different for queries and supports ");
return NULL;
}
// Number of points
int Nq = (int)PyArray_DIM(queries_array, 0);
int Ns= (int)PyArray_DIM(supports_array, 0);
// Number of batches
int Nb = (int)PyArray_DIM(q_batches_array, 0);
// Call the C++ function
// *********************
// Convert PyArray to Cloud C++ class
vector queries;
vector supports;
vector q_batches;
vector s_batches;
queries = vector((PointXYZ*)PyArray_DATA(queries_array), (PointXYZ*)PyArray_DATA(queries_array) + Nq);
supports = vector((PointXYZ*)PyArray_DATA(supports_array), (PointXYZ*)PyArray_DATA(supports_array) + Ns);
q_batches = vector((int*)PyArray_DATA(q_batches_array), (int*)PyArray_DATA(q_batches_array) + Nb);
s_batches = vector((int*)PyArray_DATA(s_batches_array), (int*)PyArray_DATA(s_batches_array) + Nb);
// Create result containers
vector neighbors_indices;
// Compute results
//batch_ordered_neighbors(queries, supports, q_batches, s_batches, neighbors_indices, radius);
batch_nanoflann_neighbors(queries, supports, q_batches, s_batches, neighbors_indices, radius);
// Check result
if (neighbors_indices.size() < 1)
{
PyErr_SetString(PyExc_RuntimeError, "Error");
return NULL;
}
// Manage outputs
// **************
// Maximal number of neighbors
int max_neighbors = neighbors_indices.size() / Nq;
// Dimension of output containers
npy_intp* neighbors_dims = new npy_intp[2];
neighbors_dims[0] = Nq;
neighbors_dims[1] = max_neighbors;
// Create output array
PyObject* res_obj = PyArray_SimpleNew(2, neighbors_dims, NPY_INT);
PyObject* ret = NULL;
// Fill output array with values
size_t size_in_bytes = Nq * max_neighbors * sizeof(int);
memcpy(PyArray_DATA(res_obj), neighbors_indices.data(), size_in_bytes);
// Merge results
ret = Py_BuildValue("N", res_obj);
// Clean up
// ********
Py_XDECREF(queries_array);
Py_XDECREF(supports_array);
Py_XDECREF(q_batches_array);
Py_XDECREF(s_batches_array);
return ret;
}
================================================
FILE: thirdparty/kpconv/cpp_wrappers/cpp_subsampling/build.bat
================================================
@echo off
py setup.py build_ext --inplace
pause
================================================
FILE: thirdparty/kpconv/cpp_wrappers/cpp_subsampling/grid_subsampling/grid_subsampling.cpp
================================================
#include "grid_subsampling.h"
void grid_subsampling(vector& original_points,
vector& subsampled_points,
vector& original_features,
vector& subsampled_features,
vector& original_classes,
vector& subsampled_classes,
float sampleDl,
int verbose) {
// Initialize variables
// ******************
// Number of points in the cloud
size_t N = original_points.size();
// Dimension of the features
size_t fdim = original_features.size() / N;
size_t ldim = original_classes.size() / N;
// Limits of the cloud
PointXYZ minCorner = min_point(original_points);
PointXYZ maxCorner = max_point(original_points);
PointXYZ originCorner = floor(minCorner * (1/sampleDl)) * sampleDl;
// Dimensions of the grid
size_t sampleNX = (size_t)floor((maxCorner.x - originCorner.x) / sampleDl) + 1;
size_t sampleNY = (size_t)floor((maxCorner.y - originCorner.y) / sampleDl) + 1;
//size_t sampleNZ = (size_t)floor((maxCorner.z - originCorner.z) / sampleDl) + 1;
// Check if features and classes need to be processed
bool use_feature = original_features.size() > 0;
bool use_classes = original_classes.size() > 0;
// Create the sampled map
// **********************
// Verbose parameters
int i = 0;
int nDisp = N / 100;
// Initialize variables
size_t iX, iY, iZ, mapIdx;
unordered_map data;
for (auto& p : original_points)
{
// Position of point in sample map
iX = (size_t)floor((p.x - originCorner.x) / sampleDl);
iY = (size_t)floor((p.y - originCorner.y) / sampleDl);
iZ = (size_t)floor((p.z - originCorner.z) / sampleDl);
mapIdx = iX + sampleNX*iY + sampleNX*sampleNY*iZ;
// If not already created, create key
if (data.count(mapIdx) < 1)
data.emplace(mapIdx, SampledData(fdim, ldim));
// Fill the sample map
if (use_feature && use_classes)
data[mapIdx].update_all(p, original_features.begin() + i * fdim, original_classes.begin() + i * ldim);
else if (use_feature)
data[mapIdx].update_features(p, original_features.begin() + i * fdim);
else if (use_classes)
data[mapIdx].update_classes(p, original_classes.begin() + i * ldim);
else
data[mapIdx].update_points(p);
// Display
i++;
if (verbose > 1 && i%nDisp == 0)
std::cout << "\rSampled Map : " << std::setw(3) << i / nDisp << "%";
}
// Divide for barycentre and transfer to a vector
subsampled_points.reserve(data.size());
if (use_feature)
subsampled_features.reserve(data.size() * fdim);
if (use_classes)
subsampled_classes.reserve(data.size() * ldim);
for (auto& v : data)
{
subsampled_points.push_back(v.second.point * (1.0 / v.second.count));
if (use_feature)
{
float count = (float)v.second.count;
transform(v.second.features.begin(),
v.second.features.end(),
v.second.features.begin(),
[count](float f) { return f / count;});
subsampled_features.insert(subsampled_features.end(),v.second.features.begin(),v.second.features.end());
}
if (use_classes)
{
for (int i = 0; i < ldim; i++)
subsampled_classes.push_back(max_element(v.second.labels[i].begin(), v.second.labels[i].end(),
[](const pair&a, const pair&b){return a.second < b.second;})->first);
}
}
return;
}
void batch_grid_subsampling(vector& original_points,
vector& subsampled_points,
vector& original_features,
vector& subsampled_features,
vector& original_classes,
vector& subsampled_classes,
vector& original_batches,
vector& subsampled_batches,
float sampleDl,
int max_p)
{
// Initialize variables
// ******************
int b = 0;
int sum_b = 0;
// Number of points in the cloud
size_t N = original_points.size();
// Dimension of the features
size_t fdim = original_features.size() / N;
size_t ldim = original_classes.size() / N;
// Handle max_p = 0
if (max_p < 1)
max_p = N;
// Loop over batches
// *****************
for (b = 0; b < original_batches.size(); b++)
{
// Extract batch points features and labels
vector b_o_points = vector(original_points.begin () + sum_b,
original_points.begin () + sum_b + original_batches[b]);
vector b_o_features;
if (original_features.size() > 0)
{
b_o_features = vector(original_features.begin () + sum_b * fdim,
original_features.begin () + (sum_b + original_batches[b]) * fdim);
}
vector b_o_classes;
if (original_classes.size() > 0)
{
b_o_classes = vector(original_classes.begin () + sum_b * ldim,
original_classes.begin () + sum_b + original_batches[b] * ldim);
}
// Create result containers
vector b_s_points;
vector b_s_features;
vector b_s_classes;
// Compute subsampling on current batch
grid_subsampling(b_o_points,
b_s_points,
b_o_features,
b_s_features,
b_o_classes,
b_s_classes,
sampleDl,
0);
// Stack batches points features and labels
// ****************************************
// If too many points remove some
if (b_s_points.size() <= max_p)
{
subsampled_points.insert(subsampled_points.end(), b_s_points.begin(), b_s_points.end());
if (original_features.size() > 0)
subsampled_features.insert(subsampled_features.end(), b_s_features.begin(), b_s_features.end());
if (original_classes.size() > 0)
subsampled_classes.insert(subsampled_classes.end(), b_s_classes.begin(), b_s_classes.end());
subsampled_batches.push_back(b_s_points.size());
}
else
{
subsampled_points.insert(subsampled_points.end(), b_s_points.begin(), b_s_points.begin() + max_p);
if (original_features.size() > 0)
subsampled_features.insert(subsampled_features.end(), b_s_features.begin(), b_s_features.begin() + max_p * fdim);
if (original_classes.size() > 0)
subsampled_classes.insert(subsampled_classes.end(), b_s_classes.begin(), b_s_classes.begin() + max_p * ldim);
subsampled_batches.push_back(max_p);
}
// Stack new batch lengths
sum_b += original_batches[b];
}
return;
}
================================================
FILE: thirdparty/kpconv/cpp_wrappers/cpp_subsampling/grid_subsampling/grid_subsampling.h
================================================
#include "../../cpp_utils/cloud/cloud.h"
#include
#include
using namespace std;
class SampledData
{
public:
// Elements
// ********
int count;
PointXYZ point;
vector features;
vector> labels;
// Methods
// *******
// Constructor
SampledData()
{
count = 0;
point = PointXYZ();
}
SampledData(const size_t fdim, const size_t ldim)
{
count = 0;
point = PointXYZ();
features = vector(fdim);
labels = vector>(ldim);
}
// Method Update
void update_all(const PointXYZ p, vector::iterator f_begin, vector::iterator l_begin)
{
count += 1;
point += p;
transform (features.begin(), features.end(), f_begin, features.begin(), plus());
int i = 0;
for(vector::iterator it = l_begin; it != l_begin + labels.size(); ++it)
{
labels[i][*it] += 1;
i++;
}
return;
}
void update_features(const PointXYZ p, vector::iterator f_begin)
{
count += 1;
point += p;
transform (features.begin(), features.end(), f_begin, features.begin(), plus());
return;
}
void update_classes(const PointXYZ p, vector::iterator l_begin)
{
count += 1;
point += p;
int i = 0;
for(vector::iterator it = l_begin; it != l_begin + labels.size(); ++it)
{
labels[i][*it] += 1;
i++;
}
return;
}
void update_points(const PointXYZ p)
{
count += 1;
point += p;
return;
}
};
void grid_subsampling(vector& original_points,
vector& subsampled_points,
vector& original_features,
vector& subsampled_features,
vector& original_classes,
vector& subsampled_classes,
float sampleDl,
int verbose);
void batch_grid_subsampling(vector& original_points,
vector& subsampled_points,
vector& original_features,
vector& subsampled_features,
vector& original_classes,
vector& subsampled_classes,
vector& original_batches,
vector& subsampled_batches,
float sampleDl,
int max_p);
================================================
FILE: thirdparty/kpconv/cpp_wrappers/cpp_subsampling/setup.py
================================================
from distutils.core import setup, Extension
import numpy.distutils.misc_util
# Adding OpenCV to project
# ************************
# Adding sources of the project
# *****************************
SOURCES = ["../cpp_utils/cloud/cloud.cpp",
"grid_subsampling/grid_subsampling.cpp",
"wrapper.cpp"]
module = Extension(name="grid_subsampling",
sources=SOURCES,
extra_compile_args=['-std=c++11',
'-D_GLIBCXX_USE_CXX11_ABI=0'])
setup(ext_modules=[module], include_dirs=numpy.distutils.misc_util.get_numpy_include_dirs())
================================================
FILE: thirdparty/kpconv/cpp_wrappers/cpp_subsampling/wrapper.cpp
================================================
#include
#include
#include "grid_subsampling/grid_subsampling.h"
#include
// docstrings for our module
// *************************
static char module_docstring[] = "This module provides an interface for the subsampling of a batch of stacked pointclouds";
static char subsample_docstring[] = "function subsampling a pointcloud";
static char subsample_batch_docstring[] = "function subsampling a batch of stacked pointclouds";
// Declare the functions
// *********************
static PyObject *cloud_subsampling(PyObject* self, PyObject* args, PyObject* keywds);
static PyObject *batch_subsampling(PyObject *self, PyObject *args, PyObject *keywds);
// Specify the members of the module
// *********************************
static PyMethodDef module_methods[] =
{
{ "subsample", (PyCFunction)cloud_subsampling, METH_VARARGS | METH_KEYWORDS, subsample_docstring },
{ "subsample_batch", (PyCFunction)batch_subsampling, METH_VARARGS | METH_KEYWORDS, subsample_batch_docstring },
{NULL, NULL, 0, NULL}
};
// Initialize the module
// *********************
static struct PyModuleDef moduledef =
{
PyModuleDef_HEAD_INIT,
"grid_subsampling", // m_name
module_docstring, // m_doc
-1, // m_size
module_methods, // m_methods
NULL, // m_reload
NULL, // m_traverse
NULL, // m_clear
NULL, // m_free
};
PyMODINIT_FUNC PyInit_grid_subsampling(void)
{
import_array();
return PyModule_Create(&moduledef);
}
// Definition of the batch_subsample method
// **********************************
static PyObject* batch_subsampling(PyObject* self, PyObject* args, PyObject* keywds)
{
// Manage inputs
// *************
// Args containers
PyObject* points_obj = NULL;
PyObject* features_obj = NULL;
PyObject* classes_obj = NULL;
PyObject* batches_obj = NULL;
// Keywords containers
static char* kwlist[] = { "points", "batches", "features", "classes", "sampleDl", "method", "max_p", "verbose", NULL };
float sampleDl = 0.1;
const char* method_buffer = "barycenters";
int verbose = 0;
int max_p = 0;
// Parse the input
if (!PyArg_ParseTupleAndKeywords(args, keywds, "OO|$OOfsii", kwlist, &points_obj, &batches_obj, &features_obj, &classes_obj, &sampleDl, &method_buffer, &max_p, &verbose))
{
PyErr_SetString(PyExc_RuntimeError, "Error parsing arguments");
return NULL;
}
// Get the method argument
string method(method_buffer);
// Interpret method
if (method.compare("barycenters") && method.compare("voxelcenters"))
{
PyErr_SetString(PyExc_RuntimeError, "Error parsing method. Valid method names are \"barycenters\" and \"voxelcenters\" ");
return NULL;
}
// Check if using features or classes
bool use_feature = true, use_classes = true;
if (features_obj == NULL)
use_feature = false;
if (classes_obj == NULL)
use_classes = false;
// Interpret the input objects as numpy arrays.
PyObject* points_array = PyArray_FROM_OTF(points_obj, NPY_FLOAT, NPY_IN_ARRAY);
PyObject* batches_array = PyArray_FROM_OTF(batches_obj, NPY_INT, NPY_IN_ARRAY);
PyObject* features_array = NULL;
PyObject* classes_array = NULL;
if (use_feature)
features_array = PyArray_FROM_OTF(features_obj, NPY_FLOAT, NPY_IN_ARRAY);
if (use_classes)
classes_array = PyArray_FROM_OTF(classes_obj, NPY_INT, NPY_IN_ARRAY);
// Verify data was load correctly.
if (points_array == NULL)
{
Py_XDECREF(points_array);
Py_XDECREF(batches_array);
Py_XDECREF(classes_array);
Py_XDECREF(features_array);
PyErr_SetString(PyExc_RuntimeError, "Error converting input points to numpy arrays of type float32");
return NULL;
}
if (batches_array == NULL)
{
Py_XDECREF(points_array);
Py_XDECREF(batches_array);
Py_XDECREF(classes_array);
Py_XDECREF(features_array);
PyErr_SetString(PyExc_RuntimeError, "Error converting input batches to numpy arrays of type int32");
return NULL;
}
if (use_feature && features_array == NULL)
{
Py_XDECREF(points_array);
Py_XDECREF(batches_array);
Py_XDECREF(classes_array);
Py_XDECREF(features_array);
PyErr_SetString(PyExc_RuntimeError, "Error converting input features to numpy arrays of type float32");
return NULL;
}
if (use_classes && classes_array == NULL)
{
Py_XDECREF(points_array);
Py_XDECREF(batches_array);
Py_XDECREF(classes_array);
Py_XDECREF(features_array);
PyErr_SetString(PyExc_RuntimeError, "Error converting input classes to numpy arrays of type int32");
return NULL;
}
// Check that the input array respect the dims
if ((int)PyArray_NDIM(points_array) != 2 || (int)PyArray_DIM(points_array, 1) != 3)
{
Py_XDECREF(points_array);
Py_XDECREF(batches_array);
Py_XDECREF(classes_array);
Py_XDECREF(features_array);
PyErr_SetString(PyExc_RuntimeError, "Wrong dimensions : points.shape is not (N, 3)");
return NULL;
}
if ((int)PyArray_NDIM(batches_array) > 1)
{
Py_XDECREF(points_array);
Py_XDECREF(batches_array);
Py_XDECREF(classes_array);
Py_XDECREF(features_array);
PyErr_SetString(PyExc_RuntimeError, "Wrong dimensions : batches.shape is not (B,) ");
return NULL;
}
if (use_feature && ((int)PyArray_NDIM(features_array) != 2))
{
Py_XDECREF(points_array);
Py_XDECREF(batches_array);
Py_XDECREF(classes_array);
Py_XDECREF(features_array);
PyErr_SetString(PyExc_RuntimeError, "Wrong dimensions : features.shape is not (N, d)");
return NULL;
}
if (use_classes && (int)PyArray_NDIM(classes_array) > 2)
{
Py_XDECREF(points_array);
Py_XDECREF(batches_array);
Py_XDECREF(classes_array);
Py_XDECREF(features_array);
PyErr_SetString(PyExc_RuntimeError, "Wrong dimensions : classes.shape is not (N,) or (N, d)");
return NULL;
}
// Number of points
int N = (int)PyArray_DIM(points_array, 0);
// Number of batches
int Nb = (int)PyArray_DIM(batches_array, 0);
// Dimension of the features
int fdim = 0;
if (use_feature)
fdim = (int)PyArray_DIM(features_array, 1);
//Dimension of labels
int ldim = 1;
if (use_classes && (int)PyArray_NDIM(classes_array) == 2)
ldim = (int)PyArray_DIM(classes_array, 1);
// Check that the input array respect the number of points
if (use_feature && (int)PyArray_DIM(features_array, 0) != N)
{
Py_XDECREF(points_array);
Py_XDECREF(batches_array);
Py_XDECREF(classes_array);
Py_XDECREF(features_array);
PyErr_SetString(PyExc_RuntimeError, "Wrong dimensions : features.shape is not (N, d)");
return NULL;
}
if (use_classes && (int)PyArray_DIM(classes_array, 0) != N)
{
Py_XDECREF(points_array);
Py_XDECREF(batches_array);
Py_XDECREF(classes_array);
Py_XDECREF(features_array);
PyErr_SetString(PyExc_RuntimeError, "Wrong dimensions : classes.shape is not (N,) or (N, d)");
return NULL;
}
// Call the C++ function
// *********************
// Create pyramid
if (verbose > 0)
cout << "Computing cloud pyramid with support points: " << endl;
// Convert PyArray to Cloud C++ class
vector original_points;
vector original_batches;
vector original_features;
vector original_classes;
original_points = vector((PointXYZ*)PyArray_DATA(points_array), (PointXYZ*)PyArray_DATA(points_array) + N);
original_batches = vector((int*)PyArray_DATA(batches_array), (int*)PyArray_DATA(batches_array) + Nb);
if (use_feature)
original_features = vector((float*)PyArray_DATA(features_array), (float*)PyArray_DATA(features_array) + N * fdim);
if (use_classes)
original_classes = vector((int*)PyArray_DATA(classes_array), (int*)PyArray_DATA(classes_array) + N * ldim);
// Subsample
vector subsampled_points;
vector subsampled_features;
vector subsampled_classes;
vector subsampled_batches;
batch_grid_subsampling(original_points,
subsampled_points,
original_features,
subsampled_features,
original_classes,
subsampled_classes,
original_batches,
subsampled_batches,
sampleDl,
max_p);
// Check result
if (subsampled_points.size() < 1)
{
PyErr_SetString(PyExc_RuntimeError, "Error");
return NULL;
}
// Manage outputs
// **************
// Dimension of input containers
npy_intp* point_dims = new npy_intp[2];
point_dims[0] = subsampled_points.size();
point_dims[1] = 3;
npy_intp* feature_dims = new npy_intp[2];
feature_dims[0] = subsampled_points.size();
feature_dims[1] = fdim;
npy_intp* classes_dims = new npy_intp[2];
classes_dims[0] = subsampled_points.size();
classes_dims[1] = ldim;
npy_intp* batches_dims = new npy_intp[1];
batches_dims[0] = Nb;
// Create output array
PyObject* res_points_obj = PyArray_SimpleNew(2, point_dims, NPY_FLOAT);
PyObject* res_batches_obj = PyArray_SimpleNew(1, batches_dims, NPY_INT);
PyObject* res_features_obj = NULL;
PyObject* res_classes_obj = NULL;
PyObject* ret = NULL;
// Fill output array with values
size_t size_in_bytes = subsampled_points.size() * 3 * sizeof(float);
memcpy(PyArray_DATA(res_points_obj), subsampled_points.data(), size_in_bytes);
size_in_bytes = Nb * sizeof(int);
memcpy(PyArray_DATA(res_batches_obj), subsampled_batches.data(), size_in_bytes);
if (use_feature)
{
size_in_bytes = subsampled_points.size() * fdim * sizeof(float);
res_features_obj = PyArray_SimpleNew(2, feature_dims, NPY_FLOAT);
memcpy(PyArray_DATA(res_features_obj), subsampled_features.data(), size_in_bytes);
}
if (use_classes)
{
size_in_bytes = subsampled_points.size() * ldim * sizeof(int);
res_classes_obj = PyArray_SimpleNew(2, classes_dims, NPY_INT);
memcpy(PyArray_DATA(res_classes_obj), subsampled_classes.data(), size_in_bytes);
}
// Merge results
if (use_feature && use_classes)
ret = Py_BuildValue("NNNN", res_points_obj, res_batches_obj, res_features_obj, res_classes_obj);
else if (use_feature)
ret = Py_BuildValue("NNN", res_points_obj, res_batches_obj, res_features_obj);
else if (use_classes)
ret = Py_BuildValue("NNN", res_points_obj, res_batches_obj, res_classes_obj);
else
ret = Py_BuildValue("NN", res_points_obj, res_batches_obj);
// Clean up
// ********
Py_DECREF(points_array);
Py_DECREF(batches_array);
Py_XDECREF(features_array);
Py_XDECREF(classes_array);
return ret;
}
// Definition of the subsample method
// ****************************************
static PyObject* cloud_subsampling(PyObject* self, PyObject* args, PyObject* keywds)
{
// Manage inputs
// *************
// Args containers
PyObject* points_obj = NULL;
PyObject* features_obj = NULL;
PyObject* classes_obj = NULL;
// Keywords containers
static char* kwlist[] = { "points", "features", "classes", "sampleDl", "method", "verbose", NULL };
float sampleDl = 0.1;
const char* method_buffer = "barycenters";
int verbose = 0;
// Parse the input
if (!PyArg_ParseTupleAndKeywords(args, keywds, "O|$OOfsi", kwlist, &points_obj, &features_obj, &classes_obj, &sampleDl, &method_buffer, &verbose))
{
PyErr_SetString(PyExc_RuntimeError, "Error parsing arguments");
return NULL;
}
// Get the method argument
string method(method_buffer);
// Interpret method
if (method.compare("barycenters") && method.compare("voxelcenters"))
{
PyErr_SetString(PyExc_RuntimeError, "Error parsing method. Valid method names are \"barycenters\" and \"voxelcenters\" ");
return NULL;
}
// Check if using features or classes
bool use_feature = true, use_classes = true;
if (features_obj == NULL)
use_feature = false;
if (classes_obj == NULL)
use_classes = false;
// Interpret the input objects as numpy arrays.
PyObject* points_array = PyArray_FROM_OTF(points_obj, NPY_FLOAT, NPY_IN_ARRAY);
PyObject* features_array = NULL;
PyObject* classes_array = NULL;
if (use_feature)
features_array = PyArray_FROM_OTF(features_obj, NPY_FLOAT, NPY_IN_ARRAY);
if (use_classes)
classes_array = PyArray_FROM_OTF(classes_obj, NPY_INT, NPY_IN_ARRAY);
// Verify data was load correctly.
if (points_array == NULL)
{
Py_XDECREF(points_array);
Py_XDECREF(classes_array);
Py_XDECREF(features_array);
PyErr_SetString(PyExc_RuntimeError, "Error converting input points to numpy arrays of type float32");
return NULL;
}
if (use_feature && features_array == NULL)
{
Py_XDECREF(points_array);
Py_XDECREF(classes_array);
Py_XDECREF(features_array);
PyErr_SetString(PyExc_RuntimeError, "Error converting input features to numpy arrays of type float32");
return NULL;
}
if (use_classes && classes_array == NULL)
{
Py_XDECREF(points_array);
Py_XDECREF(classes_array);
Py_XDECREF(features_array);
PyErr_SetString(PyExc_RuntimeError, "Error converting input classes to numpy arrays of type int32");
return NULL;
}
// Check that the input array respect the dims
if ((int)PyArray_NDIM(points_array) != 2 || (int)PyArray_DIM(points_array, 1) != 3)
{
Py_XDECREF(points_array);
Py_XDECREF(classes_array);
Py_XDECREF(features_array);
PyErr_SetString(PyExc_RuntimeError, "Wrong dimensions : points.shape is not (N, 3)");
return NULL;
}
if (use_feature && ((int)PyArray_NDIM(features_array) != 2))
{
Py_XDECREF(points_array);
Py_XDECREF(classes_array);
Py_XDECREF(features_array);
PyErr_SetString(PyExc_RuntimeError, "Wrong dimensions : features.shape is not (N, d)");
return NULL;
}
if (use_classes && (int)PyArray_NDIM(classes_array) > 2)
{
Py_XDECREF(points_array);
Py_XDECREF(classes_array);
Py_XDECREF(features_array);
PyErr_SetString(PyExc_RuntimeError, "Wrong dimensions : classes.shape is not (N,) or (N, d)");
return NULL;
}
// Number of points
int N = (int)PyArray_DIM(points_array, 0);
// Dimension of the features
int fdim = 0;
if (use_feature)
fdim = (int)PyArray_DIM(features_array, 1);
//Dimension of labels
int ldim = 1;
if (use_classes && (int)PyArray_NDIM(classes_array) == 2)
ldim = (int)PyArray_DIM(classes_array, 1);
// Check that the input array respect the number of points
if (use_feature && (int)PyArray_DIM(features_array, 0) != N)
{
Py_XDECREF(points_array);
Py_XDECREF(classes_array);
Py_XDECREF(features_array);
PyErr_SetString(PyExc_RuntimeError, "Wrong dimensions : features.shape is not (N, d)");
return NULL;
}
if (use_classes && (int)PyArray_DIM(classes_array, 0) != N)
{
Py_XDECREF(points_array);
Py_XDECREF(classes_array);
Py_XDECREF(features_array);
PyErr_SetString(PyExc_RuntimeError, "Wrong dimensions : classes.shape is not (N,) or (N, d)");
return NULL;
}
// Call the C++ function
// *********************
// Create pyramid
if (verbose > 0)
cout << "Computing cloud pyramid with support points: " << endl;
// Convert PyArray to Cloud C++ class
vector original_points;
vector original_features;
vector original_classes;
original_points = vector((PointXYZ*)PyArray_DATA(points_array), (PointXYZ*)PyArray_DATA(points_array) + N);
if (use_feature)
original_features = vector((float*)PyArray_DATA(features_array), (float*)PyArray_DATA(features_array) + N * fdim);
if (use_classes)
original_classes = vector((int*)PyArray_DATA(classes_array), (int*)PyArray_DATA(classes_array) + N * ldim);
// Subsample
vector subsampled_points;
vector subsampled_features;
vector subsampled_classes;
grid_subsampling(original_points,
subsampled_points,
original_features,
subsampled_features,
original_classes,
subsampled_classes,
sampleDl,
verbose);
// Check result
if (subsampled_points.size() < 1)
{
PyErr_SetString(PyExc_RuntimeError, "Error");
return NULL;
}
// Manage outputs
// **************
// Dimension of input containers
npy_intp* point_dims = new npy_intp[2];
point_dims[0] = subsampled_points.size();
point_dims[1] = 3;
npy_intp* feature_dims = new npy_intp[2];
feature_dims[0] = subsampled_points.size();
feature_dims[1] = fdim;
npy_intp* classes_dims = new npy_intp[2];
classes_dims[0] = subsampled_points.size();
classes_dims[1] = ldim;
// Create output array
PyObject* res_points_obj = PyArray_SimpleNew(2, point_dims, NPY_FLOAT);
PyObject* res_features_obj = NULL;
PyObject* res_classes_obj = NULL;
PyObject* ret = NULL;
// Fill output array with values
size_t size_in_bytes = subsampled_points.size() * 3 * sizeof(float);
memcpy(PyArray_DATA(res_points_obj), subsampled_points.data(), size_in_bytes);
if (use_feature)
{
size_in_bytes = subsampled_points.size() * fdim * sizeof(float);
res_features_obj = PyArray_SimpleNew(2, feature_dims, NPY_FLOAT);
memcpy(PyArray_DATA(res_features_obj), subsampled_features.data(), size_in_bytes);
}
if (use_classes)
{
size_in_bytes = subsampled_points.size() * ldim * sizeof(int);
res_classes_obj = PyArray_SimpleNew(2, classes_dims, NPY_INT);
memcpy(PyArray_DATA(res_classes_obj), subsampled_classes.data(), size_in_bytes);
}
// Merge results
if (use_feature && use_classes)
ret = Py_BuildValue("NNN", res_points_obj, res_features_obj, res_classes_obj);
else if (use_feature)
ret = Py_BuildValue("NN", res_points_obj, res_features_obj);
else if (use_classes)
ret = Py_BuildValue("NN", res_points_obj, res_classes_obj);
else
ret = Py_BuildValue("N", res_points_obj);
// Clean up
// ********
Py_DECREF(points_array);
Py_XDECREF(features_array);
Py_XDECREF(classes_array);
return ret;
}
================================================
FILE: thirdparty/kpconv/cpp_wrappers/cpp_utils/cloud/cloud.cpp
================================================
//
//
// 0==========================0
// | Local feature test |
// 0==========================0
//
// version 1.0 :
// >
//
//---------------------------------------------------
//
// Cloud source :
// Define usefull Functions/Methods
//
//----------------------------------------------------
//
// Hugues THOMAS - 10/02/2017
//
#include "cloud.h"
// Getters
// *******
PointXYZ max_point(std::vector points)
{
// Initialize limits
PointXYZ maxP(points[0]);
// Loop over all points
for (auto p : points)
{
if (p.x > maxP.x)
maxP.x = p.x;
if (p.y > maxP.y)
maxP.y = p.y;
if (p.z > maxP.z)
maxP.z = p.z;
}
return maxP;
}
PointXYZ min_point(std::vector points)
{
// Initialize limits
PointXYZ minP(points[0]);
// Loop over all points
for (auto p : points)
{
if (p.x < minP.x)
minP.x = p.x;
if (p.y < minP.y)
minP.y = p.y;
if (p.z < minP.z)
minP.z = p.z;
}
return minP;
}
================================================
FILE: thirdparty/kpconv/cpp_wrappers/cpp_utils/cloud/cloud.h
================================================
//
//
// 0==========================0
// | Local feature test |
// 0==========================0
//
// version 1.0 :
// >
//
//---------------------------------------------------
//
// Cloud header
//
//----------------------------------------------------
//
// Hugues THOMAS - 10/02/2017
//
# pragma once
#include
#include
#include