Showing preview only (776K chars total). Download the full file or copy to clipboard to get everything.
Repository: hancyran/LiDAR-Diffusion
Branch: main
Commit: 8416ddbbda88
Files: 124
Total size: 733.1 KB
Directory structure:
gitextract_2_mjw6j0/
├── .gitignore
├── DESIGN.md
├── LICENSE
├── README.md
├── configs/
│ ├── autoencoder/
│ │ └── kitti/
│ │ └── autoencoder_c2_p4.yaml
│ └── lidar_diffusion/
│ └── kitti/
│ └── uncond_c2_p4.yaml
├── data/
│ └── config/
│ └── semantic-kitti.yaml
├── init/
│ └── create_env.sh
├── lidm/
│ ├── __init__.py
│ ├── data/
│ │ ├── __init__.py
│ │ ├── annotated_dataset.py
│ │ ├── base.py
│ │ ├── conditional_builder/
│ │ │ ├── __init__.py
│ │ │ ├── objects_bbox.py
│ │ │ ├── objects_center_points.py
│ │ │ └── utils.py
│ │ ├── helper_types.py
│ │ └── kitti.py
│ ├── eval/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── compile.sh
│ │ ├── eval_utils.py
│ │ ├── fid_score.py
│ │ ├── metric_utils.py
│ │ ├── models/
│ │ │ ├── __init__.py
│ │ │ ├── minkowskinet/
│ │ │ │ ├── __init__.py
│ │ │ │ └── model.py
│ │ │ ├── rangenet/
│ │ │ │ ├── __init__.py
│ │ │ │ └── model.py
│ │ │ ├── spvcnn/
│ │ │ │ ├── __init__.py
│ │ │ │ └── model.py
│ │ │ └── ts/
│ │ │ ├── __init__.py
│ │ │ ├── basic_blocks.py
│ │ │ └── utils.py
│ │ └── modules/
│ │ ├── __init__.py
│ │ ├── chamfer2D/
│ │ │ ├── __init__.py
│ │ │ ├── chamfer2D.cu
│ │ │ ├── chamfer_cuda.cpp
│ │ │ ├── dist_chamfer_2D.py
│ │ │ └── setup.py
│ │ ├── chamfer3D/
│ │ │ ├── __init__.py
│ │ │ ├── chamfer3D.cu
│ │ │ ├── chamfer_cuda.cpp
│ │ │ ├── dist_chamfer_3D.py
│ │ │ └── setup.py
│ │ └── emd/
│ │ ├── __init__.py
│ │ ├── emd.cpp
│ │ ├── emd_cuda.cu
│ │ ├── emd_module.py
│ │ └── setup.py
│ ├── models/
│ │ ├── __init__.py
│ │ ├── autoencoder.py
│ │ └── diffusion/
│ │ ├── __init__.py
│ │ ├── classifier.py
│ │ ├── ddim.py
│ │ ├── ddpm.py
│ │ └── plms.py
│ ├── modules/
│ │ ├── __init__.py
│ │ ├── attention.py
│ │ ├── basic.py
│ │ ├── diffusion/
│ │ │ ├── __init__.py
│ │ │ ├── model_ldm.py
│ │ │ ├── model_lidm.py
│ │ │ └── openaimodel.py
│ │ ├── distributions/
│ │ │ ├── __init__.py
│ │ │ └── distributions.py
│ │ ├── ema.py
│ │ ├── encoders/
│ │ │ ├── __init__.py
│ │ │ └── modules.py
│ │ ├── image_degradation/
│ │ │ ├── __init__.py
│ │ │ ├── bsrgan.py
│ │ │ ├── bsrgan_light.py
│ │ │ └── utils_image.py
│ │ ├── losses/
│ │ │ ├── __init__.py
│ │ │ ├── contperceptual.py
│ │ │ ├── discriminator.py
│ │ │ ├── geometric.py
│ │ │ ├── perceptual.py
│ │ │ └── vqperceptual.py
│ │ ├── minkowskinet/
│ │ │ ├── __init__.py
│ │ │ └── model.py
│ │ ├── rangenet/
│ │ │ ├── __init__.py
│ │ │ └── model.py
│ │ ├── spvcnn/
│ │ │ ├── __init__.py
│ │ │ └── model.py
│ │ ├── ts/
│ │ │ ├── __init__.py
│ │ │ ├── basic_blocks.py
│ │ │ └── utils.py
│ │ └── x_transformer.py
│ └── utils/
│ ├── __init__.py
│ ├── aug_utils.py
│ ├── lidar_utils.py
│ ├── lr_scheduler.py
│ ├── misc_utils.py
│ └── model_utils.py
├── main.py
├── models/
│ ├── baseline/
│ │ ├── kitti/
│ │ │ └── template/
│ │ │ └── config.yaml
│ │ └── nuscenes/
│ │ └── template/
│ │ └── config.yaml
│ ├── first_stage_models/
│ │ ├── ablate/
│ │ │ ├── f_c16/
│ │ │ │ └── config.yaml
│ │ │ ├── f_c16_p2/
│ │ │ │ └── config.yaml
│ │ │ ├── f_c2_p2/
│ │ │ │ └── config.yaml
│ │ │ ├── f_c2_p4/
│ │ │ │ └── config.yaml
│ │ │ ├── f_c32/
│ │ │ │ └── config.yaml
│ │ │ ├── f_c4/
│ │ │ │ └── config.yaml
│ │ │ ├── f_c4_p2/
│ │ │ │ └── config.yaml
│ │ │ ├── f_c4_p4/
│ │ │ │ └── config.yaml
│ │ │ ├── f_c64/
│ │ │ │ └── config.yaml
│ │ │ ├── f_c8/
│ │ │ │ └── config.yaml
│ │ │ ├── f_c8_p2/
│ │ │ │ └── config.yaml
│ │ │ ├── f_p16/
│ │ │ │ └── config.yaml
│ │ │ ├── f_p2/
│ │ │ │ └── config.yaml
│ │ │ ├── f_p4/
│ │ │ │ └── config.yaml
│ │ │ └── f_p8/
│ │ │ └── config.yaml
│ │ └── kitti/
│ │ ├── f_c2_p4/
│ │ │ └── config.yaml
│ │ └── f_c2_p4_wo_logscale/
│ │ └── config.yaml
│ └── lidm/
│ └── kitti/
│ ├── cam2lidar/
│ │ └── config.yaml
│ ├── sem2lidar/
│ │ └── config.yaml
│ ├── text2lidar/
│ │ └── config.yaml
│ ├── uncond/
│ │ └── config.yaml
│ └── uncond_wo_logscale/
│ └── config.yaml
└── scripts/
├── eval_ae.py
├── sample.py
├── sample_cond.py
└── text2lidar.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/
================================================
FILE: DESIGN.md
================================================
# Study on Design of LiDAR Compression
All the following experiments are conducted with 4 NVIDIA 3090 GPUs on KITTI-360 (64-beam).
Tip: Download the video instead of watching it with the Google Drive's built-in video player provides a better visualization.
### Autoencoders (trained with 40k steps, evaluated on reconstruction):
| Curvewise <br/> Factor | Patchwise <br/> Factor | Output <br/> Size | rFRID(↓) | rFSVD(↓) | rFPVD(↓) | CD(↓) | EMD(↓) | #Params (M) | Directory | Visualization of Reconstruction (val) |
|:----------------------:|:----------------------:|:-----------------:|:--------:|:--------:|:--------:|:-----:|:------:|:-----------:|:-------------------------------------------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
| N/A | N/A | Ground Truth | - | - | - | - | - | - | - | [Range Image](https://drive.google.com/file/d/1wAtQSlVwF2jCpcL3zbXlk2lGUYzo1GBf/view?usp=sharing), [Point Cloud](https://drive.google.com/file/d/1iHIB7Jw-WS0D_hXgQSOyyDyWCmPVR-6k/view?usp=sharing) |
| | | | | | | | | | | |
| 4 | 1 | 64x256x2 | 0.2 | 12.9 | 13.8 | 0.069 | 0.151 | 9.52 | [Google Drive](https://drive.google.com/drive/folders/1bLGigdh3oNBTfskdX5yisqJ3fd99wFnR?usp=drive_link) | [Range Image](https://drive.google.com/file/d/1w7slbsRjlU4kb0kl6LyjX-JojJvoWQhG/view?usp=sharing), [Point Cloud](https://drive.google.com/file/d/17ewPXoRMeA_HsvEOznsvxy3d6iKk7hC2/view?usp=sharing) |
| 8 | 1 | 64x128x3 | 0.9 | 21.2 | 17.4 | 0.141 | 0.230 | 10.76 | [Google Drive](https://drive.google.com/drive/folders/1qPCPJC9TsIEO2UaZqurPu99m4syzfzuq?usp=sharing) | [Range Image](https://drive.google.com/file/d/17kukYFlJY40_cVBuWXMLHiMe7ls2OLNh/view?usp=sharing), [Point Cloud](https://drive.google.com/file/d/116IXDMgrWn6OHtyEYIo6aM1ARloX3BWF/view?usp=sharing) |
| 16 | 1 | 64x64x4 | 2.8 | 31.1 | 23.9 | 0.220 | 0.265 | 12.43 | [Google Drive](https://drive.google.com/drive/folders/1IHm3KlwG4lQAa9Ygt3WRUPfDxAQ1Tjia?usp=sharing) | [Range Image](https://drive.google.com/file/d/12TKyoajTiU_hr1MAdK2PNveddorCshG4/view?usp=sharing), [Point Cloud](https://drive.google.com/file/d/18NCV7JoR3W1COaPH96a1ozbh8-58eT6n/view?usp=sharing) |
| 32 | 1 | 64x32x8 | 16.4 | 49.0 | 38.5 | 0.438 | 0.344 | 13.72 | [Google Drive](https://drive.google.com/drive/folders/1CnUGOoAZDrSbDG3DjVx5pcouAT5WQTGN?usp=sharing) | [Range Image](https://drive.google.com/file/d/1S2DPHfWAljKZrHJlPHIvxAPK2-rpdJ_J/view?usp=sharing), [Point Cloud](https://drive.google.com/file/d/1yx8V4Qav7sCigcfSHrrrJQOFF-s2PryV/view?usp=sharing) |
| | | | | | | | | | | |
| 1 | 2 | 32x512x2 | 1.5 | 25.0 | 23.8 | 0.096 | 0.178 | 2.87 | [Google Drive](https://drive.google.com/drive/folders/16OLfvexGSuOO8zNxkVLvY6rglvLn3HRG?usp=sharing) | [Range Image](https://drive.google.com/file/d/1tPPD2Pnn_6ge3x2yoJXhkDhe0Wi5Qxhw/view?usp=sharing), [Point Cloud](https://drive.google.com/file/d/1Xjg0ckVb208BFEgbv4VQtV-fVraEXUNC/view?usp=sharing) |
| 1 | 4 | 16x256x4 | 0.6 | 15.4 | 15.8 | 0.142 | 0.233 | 12.45 | [Google Drive](https://drive.google.com/drive/folders/1ArTAar3UM-7eBmkGb2bqDF0MVW6GL0az?usp=sharing) | [Range Image](https://drive.google.com/file/d/1Q_ZTRKyDOAmP314p9B6Cip79mc-FJ2se/view?usp=sharing), [Point Cloud](https://drive.google.com/file/d/1-t9zvSrov1OsF_WEIBqH3xkLzTJfxRBr/view?usp=sharing) |
| 1 | 8 | 8x128x16 | 17.7 | 35.7 | 33.1 | 0.384 | 0.327 | 15.78 | [Google Drive](https://drive.google.com/drive/folders/1Ol2P6ZYYFjEImLAhIhY8iR_G6bLKI4Yx?usp=sharing) | [Range Image](https://drive.google.com/file/d/14hPy2utsaxwPxW5PA7gO7ak7f-lcd-X5/view?usp=sharing), [Point Cloud](https://drive.google.com/file/d/1izj-_1hFkdaRCg2qUzkXByfCD-vBd_1M/view?usp=sharing) |
| 1 | 16 | 4x64x64 | 37.1 | 68.7 | 63.9 | 0.699 | 0.416 | 16.25 | [Google Drive](https://drive.google.com/drive/folders/1_vihPf9xgnr4Zib-dYNUZ1n6kTMxT3rG?usp=sharing) | [Range Image](https://drive.google.com/file/d/1G7evMm3H6WvbHFhBlCa8wxPzwVC3q-8H/view?usp=sharing), [Point Cloud](https://drive.google.com/file/d/1IdBrEpCIugvxVHyNOsNIg8Y8ZBWrHcWL/view?usp=sharing) |
| | | | | | | | | | | |
| 2 | 2 | 32x256x3 | 0.4 | 11.2 | 12.2 | 0.094 | 0.199 | 13.09 | [Google Drive](https://drive.google.com/drive/folders/1SdFEtMGRE9Oi23jlDrtebslc5hxhYLBQ?usp=sharing) | [Range Image](https://drive.google.com/file/d/1Ac4jVB6RkqMwV1fZcPGDyQhR3eE_Zj6C/view?usp=sharing), [Point Cloud](https://drive.google.com/file/d/1pg2ezSmXiu3ensvj564JIy6CpB46uZm7/view?usp=sharing) |
| 4 | 2 | 32x128x4 | 3.9 | 19.6 | 16.6 | 0.197 | 0.236 | 14.35 | [Google Drive](https://drive.google.com/drive/folders/1uWlZPiU9Jw4TFfvI4Avi4r0bEyJ9kw4i?usp=sharing) | [Range Image](https://drive.google.com/file/d/1yZGqe_DcDXew3JabnN4T1-P27ZlscHba/view?usp=sharing), [Point Cloud](https://drive.google.com/file/d/1i_q6gVY4gMtzKYlhlMQ9QrRql73VX05j/view?usp=sharing) |
| 8 | 2 | 32x64x8 | 8.0 | 25.3 | 20.2 | 0.277 | 0.294 | 16.06 | [Google Drive](https://drive.google.com/drive/folders/1Z9B7PjR5SlgAl2WLGmIPxiYTzmo17J--?usp=sharing) | [Range Image](https://drive.google.com/file/d/1HVqFbIE1lgotDplc8x7_hJkSU5vLtbRN/view?usp=sharing), [Point Cloud](https://drive.google.com/file/d/1jSYWZMmPelmfWpVa7V5f2Byr9vN2BKXo/view?usp=sharing) |
| 16 | 2 | 32x32x16 | 21.5 | 54.2 | 44.6 | 0.491 | 0.371 | 17.44 | [Google Drive](https://drive.google.com/drive/folders/1jBaEiAymHACWTdy_GbYOiG9e-GFVkIfe?usp=sharing) | [Range Image](https://drive.google.com/file/d/1flAzjRLcl5Jtc_T--GbbomKWi42DvW9v/view?usp=sharing), [Point Cloud](https://drive.google.com/file/d/1zfMzu6NFeLJhR1YPU28k7vPy1GX-80QT/view?usp=sharing) |
| 2 | 4 | 16x128x8 | 2.5 | 16.9 | 15.8 | 0.205 | 0.273 | 15.07 | [Google Drive](https://drive.google.com/drive/folders/1w-4bF4yORsot6xb5ia95RXWhfHrfpK0T?usp=sharing) | [Range Image](https://drive.google.com/file/d/1rm0sviRg4LfImgWVCi6THi3pHF4kFccH/view?usp=sharing), [Point Cloud](https://drive.google.com/file/d/1gPKB2zj44oLLEBuUXU8uiaXIcSWpyMOi/view?usp=sharing) |
| 4 | 4 | 16x128x16 | 13.8 | 29.5 | 25.4 | 0.341 | 0.317 | 16.86 | [Google Drive](https://drive.google.com/drive/folders/1_hY52mbKy4t3U5eWQ4Stq-3wZX1FPXXz?usp=sharing) | [Range Image](https://drive.google.com/file/d/1ldMRXfUtFNBtjCCc-KYR311dQvCmn0EF/view?usp=sharing), [Point Cloud](https://drive.google.com/file/d/129WcZXW3b6e4UMxZ9x4XCR3BlaKw1Vec/view?usp=sharing) |
### LiDMs (trained with 10k steps, evaluated on generation):
| Curvewise <br/> Factor | Patchwise <br/> Factor | Output <br/> Size | FRID(↓) | FSVD(↓) | FPVD(↓) | JSD(↓) | MMD(10$^-4$,↓) | Directory |
|:----------------------:|:----------------------:|:-----------------:|:-------:|:-------:|:-------:|:------:|:--------------:|:-------------------------------------------------------------------------------------------------------:|
| N/A | N/A | Ground Truth | - | - | - | - | - | |
| | | | | | | | | |
| 4 | 1 | 64x256x2 | 271 | 148 | 118 | 0.262 | 5.33 | [Google Drive](https://drive.google.com/drive/folders/1_bf9apVwhhmyaYiUPO5vE1t6Tqwbj2dq?usp=drive_link) |
| 8 | 1 | 64x128x3 | 162 | 85 | 68 | 0.234 | 5.03 | [Google Drive](https://drive.google.com/drive/folders/1M_NVgHNWbWDe6vOMML4ZpoO7-alKGGBl?usp=drive_link) |
| 16 | 1 | 64x64x4 | 142 | 116 | 106 | 0.232 | 5.15 | [Google Drive](https://drive.google.com/drive/folders/19DkZhHhVj7oa7XITXbdcNLGqqDkfjri-?usp=drive_link) |
| | | | | | | | | |
| 1 | 2 | 32x512x2 | 205 | 154 | 132 | 0.248 | 6.15 | [Google Drive](https://drive.google.com/drive/folders/1l5VZRImWDZttHIgM5A6heWjSYocoeujq?usp=drive_link) |
| 1 | 4 | 16x256x4 | 180 | 60 | 55 | 0.230 | 5.34 | [Google Drive](https://drive.google.com/drive/folders/1sg0iVMFf7EnAcUvpxq57kx7Y2D0Pclq7?usp=drive_link) |
| 1 | 8 | 8x128x16 | 192 | 88 | 78 | 0.243 | 5.14 | [Google Drive](https://drive.google.com/drive/folders/163yiMd3nEey6igZWk2ldegdlGtJhRppf?usp=drive_link) |
| | | | | | | | | |
| 2 | 2 | 32x256x3 | 161 | 73 | 63 | 0.228 | 5.44 | [Google Drive](https://drive.google.com/drive/folders/1cP-ghlv996glNHewCF01iU5lHy9iOgQO?usp=drive_link) |
| 4 | 2 | 32x128x4 | 145 | 77 | 68 | 0.222 | 5.10 | [Google Drive](https://drive.google.com/drive/folders/1zQf3_fFlp8r2b34ZySpHU4Nd1ilIDqRz?usp=drive_link) |
| 8 | 2 | 32x64x8 | 188 | 83 | 71 | 0.228 | 5.33 | [Google Drive](https://drive.google.com/drive/folders/1EXK5tw95LOKqxclFNdIbc6qQ7H-whRKp?usp=drive_link) |
| 2 | 4 | 16x128x8 | 162 | 56 | 49 | 0.228 | 4.82 | [Google Drive](https://drive.google.com/drive/folders/1JIQTswdJ3s4b_w1BHv6WWFs29fTswgRy?usp=drive_link) |
| 4 | 4 | 16x128x16 | 195 | 80 | 70 | 0.240 | 5.84 | [Google Drive](https://drive.google.com/drive/folders/1F47aSmU2CnWSx8mWZ1ICnKftgIUgpW58?usp=drive_link) |
### LiDM Performance with Different Scaling Factors:
<p align="center">
<img src=assets/lidm_frid.png width="450"/>
<img src=assets/lidm_fsvd.png width="450"/>
</p>
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2024 Haoxi Ran
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
<div align="center">
<h1>LiDAR Diffusion Models [CVPR 2024]</h1>
[**Haoxi Ran**](https://hancyran.github.io/) · [**Vitor Guizilini**](https://scholar.google.com.br/citations?user=UH9tP6QAAAAJ&hl=en) · [**Yue Wang**](https://yuewang.xyz/)
<a href="https://hancyran.github.io/assets/paper/lidar_diffusion.pdf"><img src='https://img.shields.io/badge/PDF-LiDAR Diffusion-yellow' alt='PDF'></a>
<a href="https://arxiv.org/abs/2404.00815"><img src='https://img.shields.io/badge/arXiv-2404.00815-red?logo=arXiv' alt='arXiv'></a>
<a href="https://lidar-diffusion.github.io/"><img src='https://img.shields.io/badge/Project-LiDAR Diffusion-green' alt='Project'></a>
<a href="https://www.youtube.com/watch?v=Vj7DubNZnDo"><img src='https://img.shields.io/badge/youtube-Video-slateblue?logo=youtube' alt='Video'></a>
<a href="#citation"><img src='https://img.shields.io/badge/BibTex-LiDAR Diffusion-blue' alt='Paper BibTex'></a>
<img src=assets/overview.png width="400"/>
</div>
## :tada: News :tada:
- [**Apr 14, 2024**] Pretrained autoencoders and LiDMs for different tasks are released!
- [**Apr 5, 2024**] Our codebase and a detailed study of our autoencoder design along with the pretrained models is released!
## Requirements
We provide an available [conda](https://conda.io/) environment named `lidar_diffusion`:
```
sh init/create_env.sh
conda activate lidar_diffusion
```
## Evaluation Toolbox
**Overview of evaluation metrics**:
<table>
<thead>
<tr>
<th style="text-align: center; vertical-align: middle;" colspan="3">Perceptual Metrics<br>(generation & reconstruction)</th>
<th style="text-align: center; vertical-align: middle;" colspan="2">Statistical Metrics<br>(generation only)</th>
<th style="text-align: center; vertical-align: middle;" colspan="2">Distance metrics <br> (reconstruction only)</th>
</tr>
</thead>
<tbody>
<tr>
<td style="text-align: center; vertical-align: middle;">FRID</td>
<td style="text-align: center; vertical-align: middle;">FSVD</td>
<td style="text-align: center; vertical-align: middle;">FPVD</td>
<td style="text-align: center; vertical-align: middle;">JSD</td>
<td style="text-align: center; vertical-align: middle;">MMD</td>
<td style="text-align: center; vertical-align: middle;">CD</td>
<td style="text-align: center; vertical-align: middle;">EMD</td>
</tr>
</tbody>
</table>
<br/>
To standardize the evaluation of LiDAR generative models, we provide a **self-contained** and **mostly CUDA-accelerated** evaluation toolbox in the directory `./lidm/eval/`. It implements and integrates various evaluation metrics, including:
* Perceptual metrics:
* Fréchet Range Image Distance (**FRID**)
* Fréchet Sparse Volume Distance (**FSVD**)
* Fréchet Point-based Volume Distance (**FPVD**)
* Statistical metrics:
* Minimum Matching Distance (**MMD**)
* Jensen-Shannon Divergence (**JSD**)
* Statistical pairwise metrics (for reconstruction only):
* Chamfer Distance (**CD**)
* Earth Mover's Distance (**EMD**)
For more details about setup and usage, please refer to the [Evaluation Toolbox README](./lidm/eval/README.md).
## Model Zoo
To test different tasks below, please download the pretrained LiDM and its corresponding autoencoder:
### Pretrained Autoencoders
#### 64-beam (evaluated on KITTI-360 val):
| Encoder | rFRID(↓) | rFSVD(↓) | rFPVD(↓) | CD(↓) | EMD(↓) | Checkpoint | Rec. Results val<br/>(Point Cloud) | Comment |
|:--------:|:--------:|:--------:|:--------:|:-----:|:------:|:------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------:|:------------------------:|
| f_c2_p4 | 2.15 | 20.2 | 16.2 | 0.160 | 0.203 | [[Google Drive]](https://drive.google.com/file/d/1fUlQVqnShylps4-PnFCRD-sW-6v_rAB4/view?usp=drive_link)<br/>(205MB) | [[Video]](https://drive.google.com/file/d/1bIjRtrF3ljtcR-esjTL79uJisn4cNf2D/view?usp=drive_link) | |
| f_c2_p4* | 2.06 | 20.3 | 15.7 | 0.092 | 0.176 | [[Google Drive]](https://drive.google.com/file/d/1A0zhQQXZTr8IfvpmsXrsG3lISC8KLkka/view?usp=drive_link)<br/>(205MB) | [[Video]](https://drive.google.com/file/d/1P_FbIOmYtS3kgutVAYXr7RShryO5Md7s/view?usp=drive_link) | *: w/o logarithm scaling |
### Benchmark for Unconditional LiDAR Generation
#### 64-beam (2k samples):
| Method | Encoder | FRID(↓) | FSVD(↓) | FPVD(↓) | JSD(↓) | MMD<br/>(10^-4,↓) | Checkpoint | Output LiDAR<br/>Point Clouds |
|:----------------------:|:--------:|:---------:|:--------:|:--------:|:---------:|:-----------------:|:------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------:|
| LiDAR-GAN | | 1222 | 183.4 | 168.1 | 0.272 | 4.74 | - | [[2k samples]](https://drive.google.com/file/d/1lzOqXHxtO83HNMZ_7_dU9GMee_Zm3clO/view?usp=drive_link) |
| LiDAR-VAE | | 199.1 | 129.9 | 105.8 | 0.237 | 7.07 | - | [[2k samples]](https://drive.google.com/file/d/1_6KGATYfzLur9bt8vISLXwzEIsjbXq_k/view?usp=drive_link) |
| ProjectedGAN | | 149.7 | 44.7 | 33.4 | 0.188 | 2.88 | - | [[2k samples]](https://drive.google.com/file/d/1LzLhuKpBOIZ6F7SlPtMdYuCSE_8P1qwz/view?usp=drive_link) |
| UltraLiDAR§ | | 370.0 | 72.1 | 66.6 | 0.747 | 17.12 | - | [[2k samples]](https://drive.google.com/file/d/17kft5S0nA_lnjECrK_aHzI5q1Erma_T7/view?usp=drive_link) |
| LiDARGen (1160s)† | | 129.0 | 39.2 | 33.4 | **0.188** | **2.88** | - | [[2k samples]](https://drive.google.com/file/d/1N5jTHjM8XnUYAMYkbsOipUQGhZjqMYDD/view?usp=drive_link) |
| | | | | | | | | |
| LiDARGen (50s)† | | 2051 | 480.6 | 400.7 | 0.506 | 9.91 | - | [[2k samples]](https://drive.google.com/file/d/1qN4T0Jg8P4IJLdaR_7sBdjID3TtzLITy/view?usp=drive_link) |
| LiDM (50s) | f_c2_p4 | 135.8 | **37.9** | **28.7** | 0.211 | 3.87 | [[Google Drive]](https://drive.google.com/file/d/1WKFwXi7xiXr2WCtM3ZX95CqlU-kOhhgC/view?usp=drive_link)<br/>(3.9GB) | [[2k samples]](https://drive.google.com/file/d/1mdWdzXHTW4IONgAYD44EvfUI8aokPfP_/view?usp=drive_link) |
| LiDM (50s) | f_c2_p4* | **125.1** | 38.8 | 29.0 | 0.211 | 3.84 | [[Google Drive]](https://drive.google.com/file/d/1huCr1xQJ6ZRS2VYcJ99vDrCS8QhxVysQ/view?usp=drive_link)<br/>(3.9GB) | [[2k samples]](https://drive.google.com/file/d/18K-9ps9Ej-OACRKe7D30reY4l6CttN6T/view?usp=drive_link) |
NOTE:
1. Each method is evaluated with **2,000** randomly generated samples.
2. †: samples generated by the officially released pretrained model in [LiDARGen github repo](https://github.com/vzyrianov/lidargen).
3. §: samples borrowed from [UltraLiDAR implementation](https://github.com/myc634/UltraLiDAR_nusc_waymo).
4. All above results are calculated from our [evaluation toolbox](#evaluation-toolbox). For more details, please refer to [Evaluation Toolbox README](./lidm/eval/README.md).
5. Each .pcd file is a list of point clouds stored by `joblib` package. To load those files, use command `joblib.load(path)`.
To evaluate above methods (except _LiDM_) yourself, download our provided .pcd files in the **Output** column to directory `./models/baseline/kitti/[method]/`:
```
CUDA_VISIBLE_DEVICES=0 python scripts/sample.py -d kitti -f models/baseline/kitti/[method]/samples.pcd --baseline --eval
```
To evaluate LiDM through the given .pcd files:
```
CUDA_VISIBLE_DEVICES=0 python scripts/sample.py -d kitti -f models/lidm/kitti/[method]/samples.pcd --eval
```
### Pretrained LiDMs for Other Tasks
| Task | Encoder | Dataset | FRID(↓) | FSVD(↓) | Checkpoint | Output |
|:------------------------------------:|:--------:|:-------------------------------------------------------:|:-------:|:-------:|:------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------:|
| Semantic Map to LiDAR | f_c2_p4* | [SemanticKITTI](http://semantic-kitti.org/dataset.html) | 11.8 | 19.1 | [[Google Drive]](https://drive.google.com/file/d/1Mijx3cRPupsC2d4b2FwlbOXsojeHAXaO/view?usp=drive_link)<br/>(3.9GB) | [[log.tar.gz]](https://drive.google.com/file/d/1N2hMDO0boL5TPmnulApPspIpNnG5d9e5/view?usp=drive_link)<br/>(2.1GB) |
| Camera to LiDAR | f_c2_p4* | [KITTI-360](https://www.cvlibs.net/datasets/kitti-360/) | 38.9 | 32.1 | [[Google Drive]](https://drive.google.com/file/d/1XzY7fSHQz72gWVFcmit-NlkoSwtMWbfz/view?usp=drive_link)<br/>(7.5GB) | [[log.tar.gz]](https://drive.google.com/file/d/1PZrMwiZiVvpYuuMKxMHpWEalt0b1lM17/view?usp=drive_link)<br/>(5.4GB) |
| Text to LiDAR | f_c2_p4* | _zero-shot_ | - | - | From _Camera-to-LiDAR_ | - |
NOTE:
1. The output `log.tar.gz` contains input conditions (`.png`), generated range images (`.png`), generated point clouds (`.txt`), and a collection of all output point clouds (`.pcd`).
### Study on Design of LiDAR Compression
For full details of our studies on the design of LiDAR Compression, please refer to [LiDAR Compression Design README](./DESIGN.md).
Tip: Download the video instead of watching it with the Google Drive's built-in video player provides a better visualization.
#### Autoencoders (trained with 40k steps, evaluated on reconstruction):
| Curvewise <br/> Factor | Patchwise <br/> Factor | Output <br/> Size | rFRID(↓) | rFSVD(↓) | #Params (M) | Visualization of Reconstruction (val) |
|:----------------------:|:----------------------:|:-----------------:|:--------:|:--------:|:-----------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
| N/A | N/A | Ground Truth | - | - | - | [[Range Image]](https://drive.google.com/file/d/1wAtQSlVwF2jCpcL3zbXlk2lGUYzo1GBf/view?usp=sharing), [[Point Cloud]](https://drive.google.com/file/d/1iHIB7Jw-WS0D_hXgQSOyyDyWCmPVR-6k/view?usp=sharing) |
| | | | | | | |
| 4 | 1 | 64x256x2 | 0.2 | 12.9 | 9.52 | [[Range Image]](https://drive.google.com/file/d/1w7slbsRjlU4kb0kl6LyjX-JojJvoWQhG/view?usp=sharing), [[Point Cloud]](https://drive.google.com/file/d/17ewPXoRMeA_HsvEOznsvxy3d6iKk7hC2/view?usp=sharing) |
| 8 | 1 | 64x128x3 | 0.9 | 21.2 | 10.76 | [[Range Image]](https://drive.google.com/file/d/17kukYFlJY40_cVBuWXMLHiMe7ls2OLNh/view?usp=sharing), [[Point Cloud]](https://drive.google.com/file/d/116IXDMgrWn6OHtyEYIo6aM1ARloX3BWF/view?usp=sharing) |
| 16 | 1 | 64x64x4 | 2.8 | 31.1 | 12.43 | [[Range Image]](https://drive.google.com/file/d/12TKyoajTiU_hr1MAdK2PNveddorCshG4/view?usp=sharing), [[Point Cloud]](https://drive.google.com/file/d/18NCV7JoR3W1COaPH96a1ozbh8-58eT6n/view?usp=sharing) |
| 32 | 1 | 64x32x8 | 16.4 | 49.0 | 13.72 | [[Range Image]](https://drive.google.com/file/d/1S2DPHfWAljKZrHJlPHIvxAPK2-rpdJ_J/view?usp=sharing), [[Point Cloud]](https://drive.google.com/file/d/1yx8V4Qav7sCigcfSHrrrJQOFF-s2PryV/view?usp=sharing) |
| | | | | | | |
| 1 | 2 | 32x512x2 | 1.5 | 25.0 | 2.87 | [[Range Image]](https://drive.google.com/file/d/1tPPD2Pnn_6ge3x2yoJXhkDhe0Wi5Qxhw/view?usp=sharing), [[Point Cloud]](https://drive.google.com/file/d/1Xjg0ckVb208BFEgbv4VQtV-fVraEXUNC/view?usp=sharing) |
| 1 | 4 | 16x256x4 | 0.6 | 15.4 | 12.45 | [[Range Image]](https://drive.google.com/file/d/1Q_ZTRKyDOAmP314p9B6Cip79mc-FJ2se/view?usp=sharing), [[Point Cloud]](https://drive.google.com/file/d/1-t9zvSrov1OsF_WEIBqH3xkLzTJfxRBr/view?usp=sharing) |
| 1 | 8 | 8x128x16 | 17.7 | 35.7 | 15.78 | [[Range Image]](https://drive.google.com/file/d/14hPy2utsaxwPxW5PA7gO7ak7f-lcd-X5/view?usp=sharing), [[Point Cloud]](https://drive.google.com/file/d/1izj-_1hFkdaRCg2qUzkXByfCD-vBd_1M/view?usp=sharing) |
| 1 | 16 | 4x64x64 | 37.1 | 68.7 | 16.25 | [[Range Image]](https://drive.google.com/file/d/1G7evMm3H6WvbHFhBlCa8wxPzwVC3q-8H/view?usp=sharing), [[Point Cloud]](https://drive.google.com/file/d/1IdBrEpCIugvxVHyNOsNIg8Y8ZBWrHcWL/view?usp=sharing) |
| | | | | | | |
| 2 | 2 | 32x256x3 | 0.4 | 11.2 | 13.09 | [[Range Image]](https://drive.google.com/file/d/1Ac4jVB6RkqMwV1fZcPGDyQhR3eE_Zj6C/view?usp=sharing), [[Point Cloud]](https://drive.google.com/file/d/1pg2ezSmXiu3ensvj564JIy6CpB46uZm7/view?usp=sharing) |
| 4 | 2 | 32x128x4 | 3.9 | 19.6 | 14.35 | [[Range Image]](https://drive.google.com/file/d/1yZGqe_DcDXew3JabnN4T1-P27ZlscHba/view?usp=sharing), [[Point Cloud]](https://drive.google.com/file/d/1i_q6gVY4gMtzKYlhlMQ9QrRql73VX05j/view?usp=sharing) |
| 8 | 2 | 32x64x8 | 8.0 | 25.3 | 16.06 | [[Range Image]](https://drive.google.com/file/d/1HVqFbIE1lgotDplc8x7_hJkSU5vLtbRN/view?usp=sharing), [[Point Cloud]](https://drive.google.com/file/d/1jSYWZMmPelmfWpVa7V5f2Byr9vN2BKXo/view?usp=sharing) |
| 16 | 2 | 32x32x16 | 21.5 | 54.2 | 17.44 | [[Range Image]](https://drive.google.com/file/d/1flAzjRLcl5Jtc_T--GbbomKWi42DvW9v/view?usp=sharing), [[Point Cloud]](https://drive.google.com/file/d/1zfMzu6NFeLJhR1YPU28k7vPy1GX-80QT/view?usp=sharing) |
| 2 | 4 | 16x128x8 | 2.5 | 16.9 | 15.07 | [[Range Image]](https://drive.google.com/file/d/1rm0sviRg4LfImgWVCi6THi3pHF4kFccH/view?usp=sharing), [[Point Cloud]](https://drive.google.com/file/d/1gPKB2zj44oLLEBuUXU8uiaXIcSWpyMOi/view?usp=sharing) |
| 4 | 4 | 16x128x16 | 13.8 | 29.5 | 16.86 | [[Range Image]](https://drive.google.com/file/d/1ldMRXfUtFNBtjCCc-KYR311dQvCmn0EF/view?usp=sharing), [[Point Cloud]](https://drive.google.com/file/d/129WcZXW3b6e4UMxZ9x4XCR3BlaKw1Vec/view?usp=sharing) |
## Unconditional LiDAR Generation
<p align="center">
<img src=assets/uncond.jpeg width="512"/>
</p>
To run sampling on pretrained models (and to evaluate your results with flag "--eval"), firstly download our provided [pretrained autoencoders](#pretrained-autoencoders) to directory `./models/first_stage_models/kitti/[model_name]` and [pretrained LiDMs](#benchmark-for-unconditional-lidar-generation) to directory `./models/lidm/kitti/[model_name]`:
```
CUDA_VISIBLE_DEVICES=0 python scripts/sample.py -d kitti -r models/lidm/kitti/[model_name]/model.ckpt -n 2000 --eval
```
## Semantic-Map-to-LiDAR
<p align="center">
<img src=assets/map2lidar.gif width="768"/>
</p>
To check the conditional results on a full sequence of semantic maps (sequence '08'), please refer to [this video](https://drive.google.com/file/d/1TtAROAmQVecZm2xDTEkfPGRP1Bbr8U6n/view?usp=drive_link)
Before run this task, set up the [SemanticKITTI](http://www.semantic-kitti.org/) dataset first for semantic labels as input.
To run sampling on pretrained models (and to evaluate your results with flag "--eval"):
```
CUDA_VISIBLE_DEVICES=0 python scripts/sample_cond.py -r models/lidm/kitti/sem2lidar/model.ckpt -d kitti [--eval]
```
## Camera-to-LiDAR
<p align="center">
<img src=assets/cam2lidar.jpeg width="768"/>
</p>
Before run this task, set up the [KITTI-360](https://www.cvlibs.net/datasets/kitti-360/) dataset first for camera images as input.
To run sampling on pretrained models:
```
CUDA_VISIBLE_DEVICES=0 python scripts/sample_cond.py -r models/lidm/kitti/sem2lidar/model.ckpt -d kitti [--eval]
```
## Text-to-LiDAR
<p align="center">
<img src=assets/text2lidar.jpeg width="768"/>
</p>
To run sampling on pretrained models:
```
CUDA_VISIBLE_DEVICES=0 python scripts/text2lidar.py -r models/lidm/kitti/cam2lidar/model.ckpt -d kitti -p "an empty road with no object"
```
## Training
Besides, to train your own LiDAR Diffusion Models, just run this command (for example, train both autoencoder and lidm on four gpus):
```
# train an autoencoder
python main.py -b configs/autoencoder/kitti/autoencoder_c2_p4.yaml -t --gpus 0,1,2,3
# train an LiDM
python main.py -b configs/lidar_diffusion/kitti/uncond_c2_p4.yaml -t --gpus 0,1,2,3
```
To debug the training process, just add flag `-d`:
```
python main.py -b path/to/your/config.yaml -t --gpus 0, -d
```
To resume your training from an existing log directory or an existing checkpoint file, use the flag `-r`:
```
# using a log directory
python main.py -b path/to/your/config.yaml -t --gpus 0, -r path/to/your/log
# or, using a checkpoint
python main.py -b path/to/your/config.yaml -t --gpus 0, -r path/to/your/ckpt/file
```
## Acknowledgement
- Our codebase for the diffusion models builds heavily on [Latent Diffusion](https://github.com/CompVis/latent-diffusion)
## Citation
If you find this project useful in your research, please consider citing:
```
@inproceedings{ran2024towards,
title={Towards Realistic Scene Generation with LiDAR Diffusion Models},
author={Ran, Haoxi and Guizilini, Vitor and Wang, Yue},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
year={2024}
}
```
================================================
FILE: configs/autoencoder/kitti/autoencoder_c2_p4.yaml
================================================
model:
base_learning_rate: 4.5e-6
target: lidm.models.autoencoder.VQModel
params:
monitor: val/rec_loss
embed_dim: 8
n_embed: 16384
lib_name: lidm
use_mask: False # False
lossconfig:
target: lidm.modules.losses.vqperceptual.VQGeoLPIPSWithDiscriminator
params:
disc_conditional: false
disc_start: 1
disc_in_channels: 1
disc_num_layers: 3
disc_weight: 0.6 # 0.6
disc_version: v0 # v1
codebook_weight: 1
curve_length: 1
geo_factor: 0
mask_factor: 0 # 0.0
perceptual_factor: 0
perceptual_type: rangenet_dec
ddconfig:
double_z: false
z_channels: 8
in_channels: 1
out_ch: 1
ch: 64
ch_mult: [1,2,2,4] # num_down = len(ch_mult)-1
strides: [[1,2],[2,2],[2,2]]
num_res_blocks: 2
attn_levels: []
dropout: 0.0
data:
target: main.DataModuleFromConfig
params:
batch_size: 4
num_workers: 8
wrap: true
dataset:
size: [64, 1024]
fov: [ 3,-25 ]
depth_range: [ 1.0,56.0 ]
depth_scale: 5.84 # np.log2(depth_max + 1)
log_scale: true
x_range: [ -50.0, 50.0 ]
y_range: [ -50.0, 50.0 ]
z_range: [ -3.0, 1.0 ]
resolution: 1
num_channels: 1
num_cats: 10
num_views: 2
num_sem_cats: 19
filtered_map_cats: [ ]
aug:
flip: true
rotate: true
keypoint_drop: false
keypoint_drop_range: [ 5,20 ]
randaug: false
train:
target: lidm.data.kitti.KITTIImageTrain
params:
condition_key: image
validation:
target: lidm.data.kitti.KITTIImageValidation
params:
condition_key: image
lightning:
callbacks:
image_logger:
target: main.ImageLogger
params:
batch_frequency: 1000
max_images: 8
increase_log_steps: true
trainer:
benchmark: true
accumulate_grad_batches: 2
================================================
FILE: configs/lidar_diffusion/kitti/uncond_c2_p4.yaml
================================================
model:
base_learning_rate: 1.0e-06
target: lidm.models.diffusion.ddpm.LatentDiffusion
params:
linear_start: 0.0015
linear_end: 0.0195
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
image_size: [16, 128]
channels: 8
monitor: val/loss_simple_ema
first_stage_key: image
unet_config:
target: lidm.modules.diffusion.openaimodel.UNetModel
params:
image_size: [16, 128]
in_channels: 8
out_channels: 8
model_channels: 256
attention_resolutions: [4, 2, 1]
num_res_blocks: 2
channel_mult: [1, 2, 4]
num_head_channels: 32
lib_name: lidm
first_stage_config:
target: lidm.models.autoencoder.VQModelInterface
params:
embed_dim: 8
n_embed: 16384
lib_name: lidm
use_mask: True # False
ckpt_path: models/first_stage_models/kitti/f_c2_p4/model.ckpt
ddconfig:
double_z: false
z_channels: 8
in_channels: 1
out_ch: 2
ch: 64
ch_mult: [1,2,2,4]
strides: [[1,2],[2,2],[2,2]]
num_res_blocks: 2
attn_levels: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config: "__is_unconditional__"
data:
target: main.DataModuleFromConfig
params:
batch_size: 16
num_workers: 8
wrap: true
dataset:
size: [64, 1024]
fov: [ 3,-25 ]
depth_range: [ 1.0,56.0 ]
depth_scale: 5.84 # np.log2(depth_max + 1)
log_scale: true
x_range: [ -50.0, 50.0 ]
y_range: [ -50.0, 50.0 ]
z_range: [ -3.0, 1.0 ]
resolution: 1
num_channels: 1
num_cats: 10
num_views: 2
num_sem_cats: 19
filtered_map_cats: [ ]
aug:
flip: true
rotate: false
keypoint_drop: false
keypoint_drop_range: [ 5,20 ]
randaug: false
train:
target: lidm.data.kitti.KITTI360Train
params:
condition_key: image
validation:
target: lidm.data.kitti.KITTI360Validation
params:
condition_key: image
lightning:
callbacks:
image_logger:
target: main.ImageLogger
params:
batch_frequency: 5000
max_images: 8
increase_log_steps: False
trainer:
benchmark: true
================================================
FILE: data/config/semantic-kitti.yaml
================================================
# This file is covered by the LICENSE file in the root of this project.
labels:
0 : "unlabeled"
1 : "outlier"
10: "car"
11: "bicycle"
13: "bus"
15: "motorcycle"
16: "on-rails"
18: "truck"
20: "other-vehicle"
30: "person"
31: "bicyclist"
32: "motorcyclist"
40: "road"
44: "parking"
48: "sidewalk"
49: "other-ground"
50: "building"
51: "fence"
52: "other-structure"
60: "lane-marking"
70: "vegetation"
71: "trunk"
72: "terrain"
80: "pole"
81: "traffic-sign"
99: "other-object"
252: "moving-car"
253: "moving-bicyclist"
254: "moving-person"
255: "moving-motorcyclist"
256: "moving-on-rails"
257: "moving-bus"
258: "moving-truck"
259: "moving-other-vehicle"
color_map: # bgr
0 : [0, 0, 0]
1 : [0, 0, 255]
10: [245, 150, 100]
11: [245, 230, 100]
13: [250, 80, 100]
15: [150, 60, 30]
16: [255, 0, 0]
18: [180, 30, 80]
20: [255, 0, 0]
30: [30, 30, 255]
31: [200, 40, 255]
32: [90, 30, 150]
40: [255, 0, 255]
44: [255, 150, 255]
48: [75, 0, 75]
49: [75, 0, 175]
50: [0, 200, 255]
51: [50, 120, 255]
52: [0, 150, 255]
60: [170, 255, 150]
70: [0, 175, 0]
71: [0, 60, 135]
72: [80, 240, 150]
80: [150, 240, 255]
81: [0, 0, 255]
99: [255, 255, 50]
252: [245, 150, 100]
256: [255, 0, 0]
253: [200, 40, 255]
254: [30, 30, 255]
255: [90, 30, 150]
257: [250, 80, 100]
258: [180, 30, 80]
259: [255, 0, 0]
content: # as a ratio with the total number of points
0: 0.018889854628292943
1: 0.0002937197336781505
10: 0.040818519255974316
11: 0.00016609538710764618
13: 2.7879693665067774e-05
15: 0.00039838616015114444
16: 0.0
18: 0.0020633612104619787
20: 0.0016218197275284021
30: 0.00017698551338515307
31: 1.1065903904919655e-08
32: 5.532951952459828e-09
40: 0.1987493871255525
44: 0.014717169549888214
48: 0.14392298360372
49: 0.0039048553037472045
50: 0.1326861944777486
51: 0.0723592229456223
52: 0.002395131480328884
60: 4.7084144280367186e-05
70: 0.26681502148037506
71: 0.006035012012626033
72: 0.07814222006271769
80: 0.002855498193863172
81: 0.0006155958086189918
99: 0.009923127583046915
252: 0.001789309418528068
253: 0.00012709999297008662
254: 0.00016059776092534436
255: 3.745553104802113e-05
256: 0.0
257: 0.00011351574470342043
258: 0.00010157861367183268
259: 4.3840131989471124e-05
# classes that are indistinguishable from single scan or inconsistent in
# ground truth are mapped to their closest equivalent
learning_map:
0 : 0 # "unlabeled"
1 : 0 # "outlier" mapped to "unlabeled" --------------------------mapped
10: 1 # "car"
11: 2 # "bicycle"
13: 5 # "bus" mapped to "other-vehicle" --------------------------mapped
15: 3 # "motorcycle"
16: 5 # "on-rails" mapped to "other-vehicle" ---------------------mapped
18: 4 # "truck"
20: 5 # "other-vehicle"
30: 6 # "person"
31: 7 # "bicyclist"
32: 8 # "motorcyclist"
40: 9 # "road"
44: 10 # "parking"
48: 11 # "sidewalk"
49: 12 # "other-ground"
50: 13 # "building"
51: 14 # "fence"
52: 0 # "other-structure" mapped to "unlabeled" ------------------mapped
60: 9 # "lane-marking" to "road" ---------------------------------mapped
70: 15 # "vegetation"
71: 16 # "trunk"
72: 17 # "terrain"
80: 18 # "pole"
81: 19 # "traffic-sign"
99: 0 # "other-object" to "unlabeled" ----------------------------mapped
252: 1 # "moving-car" to "car" ------------------------------------mapped
253: 7 # "moving-bicyclist" to "bicyclist" ------------------------mapped
254: 6 # "moving-person" to "person" ------------------------------mapped
255: 8 # "moving-motorcyclist" to "motorcyclist" ------------------mapped
256: 5 # "moving-on-rails" mapped to "other-vehicle" --------------mapped
257: 5 # "moving-bus" mapped to "other-vehicle" -------------------mapped
258: 4 # "moving-truck" to "truck" --------------------------------mapped
259: 5 # "moving-other"-vehicle to "other-vehicle" ----------------mapped
learning_map_inv: # inverse of previous map
0: 0 # "unlabeled", and others ignored
1: 10 # "car"
2: 11 # "bicycle"
3: 15 # "motorcycle"
4: 18 # "truck"
5: 20 # "other-vehicle"
6: 30 # "person"
7: 31 # "bicyclist"
8: 32 # "motorcyclist"
9: 40 # "road"
10: 44 # "parking"
11: 48 # "sidewalk"
12: 49 # "other-ground"
13: 50 # "building"
14: 51 # "fence"
15: 70 # "vegetation"
16: 71 # "trunk"
17: 72 # "terrain"
18: 80 # "pole"
19: 81 # "traffic-sign"
learning_ignore: # Ignore classes
0: True # "unlabeled", and others ignored
1: False # "car"
2: False # "bicycle"
3: False # "motorcycle"
4: False # "truck"
5: False # "other-vehicle"
6: False # "person"
7: False # "bicyclist"
8: False # "motorcyclist"
9: False # "road"
10: False # "parking"
11: False # "sidewalk"
12: False # "other-ground"
13: False # "building"
14: False # "fence"
15: False # "vegetation"
16: False # "trunk"
17: False # "terrain"
18: False # "pole"
19: False # "traffic-sign"
split: # sequence numbers
train:
- 0
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 9
- 10
valid:
- 8
test:
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
================================================
FILE: init/create_env.sh
================================================
#!/usr/bin/bash
# install rust compiler
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
export PATH="$HOME/.cargo/bin:$PATH"
# create conda environment
conda create -n lidar_diffusion python=3.10.11 -y
conda activate lidar_diffusion
# install dependencies
pip install --upgrade pip
pip install torchmetrics==0.5.0 pytorch-lightning==1.4.2 omegaconf==2.1.1 einops==0.3.0 transformers==4.36.2 imageio==2.9.0 imageio-ffmpeg==0.4.2 opencv-python kornia==0.7.0 wandb more_itertools
pip install gdown
pip install -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers
pip install -e git+https://github.com/openai/CLIP.git@main#egg=clip
# install torchsparse (optional)
#apt-get install libsparsehash-dev
#pip install git+https://github.com/mit-han-lab/torchsparse.git@v1.4.0
mkdir -p dataset/
================================================
FILE: lidm/__init__.py
================================================
================================================
FILE: lidm/data/__init__.py
================================================
================================================
FILE: lidm/data/annotated_dataset.py
================================================
from pathlib import Path
from typing import Optional, List, Dict, Union, Any
import warnings
from torch.utils.data import Dataset
from .conditional_builder.objects_bbox import ObjectsBoundingBoxConditionalBuilder
from .conditional_builder.objects_center_points import ObjectsCenterPointsConditionalBuilder
class Annotated3DObjectsDataset(Dataset):
def __init__(self, min_objects_per_image: int,
max_objects_per_image: int, no_tokens: int, num_beams: int, cats: List[str],
cat_blacklist: Optional[List[str]] = None, **kwargs):
self.min_objects_per_image = min_objects_per_image
self.max_objects_per_image = max_objects_per_image
self.no_tokens = no_tokens
self.num_beams = num_beams
self.categories = [c for c in cats if c not in cat_blacklist] if cat_blacklist is not None else cats
self._conditional_builders = None
@property
def no_classes(self) -> int:
return len(self.categories)
@property
def conditional_builders(self) -> ObjectsCenterPointsConditionalBuilder:
# cannot set this up in init because no_classes is only known after loading data in init of superclass
if self._conditional_builders is None:
self._conditional_builders = {
'center': ObjectsCenterPointsConditionalBuilder(
self.no_classes,
self.max_objects_per_image,
self.no_tokens,
self.num_beams
),
'bbox': ObjectsBoundingBoxConditionalBuilder(
self.no_classes,
self.max_objects_per_image,
self.no_tokens,
self.num_beams
)
}
return self._conditional_builders
def get_textual_label_for_category_id(self, category_id: int) -> str:
return self.categories[category_id]
================================================
FILE: lidm/data/base.py
================================================
import pdb
from abc import abstractmethod
from functools import partial
import PIL
import numpy as np
from PIL import Image
import torchvision.transforms.functional as TF
from torch.utils.data import Dataset, IterableDataset
from ..utils.aug_utils import get_lidar_transform, get_camera_transform, get_anno_transform
class DatasetBase(Dataset):
def __init__(self, data_root, split, dataset_config, aug_config, return_pcd=False, condition_key=None,
scale_factors=None, degradation=None, **kwargs):
self.data_root = data_root
self.split = split
self.data = []
self.aug_config = aug_config
self.img_size = dataset_config.size
self.fov = dataset_config.fov
self.depth_range = dataset_config.depth_range
self.filtered_map_cats = dataset_config.filtered_map_cats
self.depth_scale = dataset_config.depth_scale
self.log_scale = dataset_config.log_scale
if self.log_scale:
self.depth_thresh = (np.log2(1./255. + 1) / self.depth_scale) * 2. - 1 + 1e-6
else:
self.depth_thresh = (1./255. / self.depth_scale) * 2. - 1 + 1e-6
self.return_pcd = return_pcd
if degradation is not None and scale_factors is not None:
scaled_img_size = (int(self.img_size[0] / scale_factors[0]), int(self.img_size[1] / scale_factors[1]))
degradation_fn = {
"pil_nearest": PIL.Image.NEAREST,
"pil_bilinear": PIL.Image.BILINEAR,
"pil_bicubic": PIL.Image.BICUBIC,
"pil_box": PIL.Image.BOX,
"pil_hamming": PIL.Image.HAMMING,
"pil_lanczos": PIL.Image.LANCZOS,
}[degradation]
self.degradation_transform = partial(TF.resize, size=scaled_img_size, interpolation=degradation_fn)
else:
self.degradation_transform = None
self.condition_key = condition_key
self.lidar_transform = get_lidar_transform(aug_config, split)
self.anno_transform = get_anno_transform(aug_config, split) if condition_key in ['bbox', 'center'] else None
self.view_transform = get_camera_transform(aug_config, split) if condition_key in ['camera'] else None
self.prepare_data()
def prepare_data(self):
raise NotImplementedError
def process_scan(self, range_img):
range_img = np.where(range_img < 0, 0, range_img)
if self.log_scale:
# log scale
range_img = np.log2(range_img + 0.0001 + 1)
range_img = range_img / self.depth_scale
range_img = range_img * 2. - 1.
range_img = np.clip(range_img, -1, 1)
range_img = np.expand_dims(range_img, axis=0)
# mask
range_mask = np.ones_like(range_img)
range_mask[range_img < self.depth_thresh] = -1
return range_img, range_mask
@staticmethod
def load_lidar_sweep(*args, **kwargs):
raise NotImplementedError
@staticmethod
def load_semantic_map(*args, **kwargs):
raise NotImplementedError
@staticmethod
def load_camera(*args, **kwargs):
raise NotImplementedError
@staticmethod
def load_annotation(*args, **kwargs):
raise NotImplementedError
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
example = dict()
return example
class Txt2ImgIterableBaseDataset(IterableDataset):
"""
Define an interface to make the IterableDatasets for text2img data chainable
"""
def __init__(self, num_records=0, valid_ids=None, size=256):
super().__init__()
self.num_records = num_records
self.valid_ids = valid_ids
self.sample_ids = valid_ids
self.size = size
print(f'{self.__class__.__name__} dataset contains {self.__len__()} examples.')
def __len__(self):
return self.num_records
@abstractmethod
def __iter__(self):
pass
================================================
FILE: lidm/data/conditional_builder/__init__.py
================================================
================================================
FILE: lidm/data/conditional_builder/objects_bbox.py
================================================
from itertools import cycle
from typing import List, Tuple, Callable, Optional
from PIL import Image as pil_image, ImageDraw as pil_img_draw, ImageFont
from more_itertools.recipes import grouper
from torch import LongTensor, Tensor
from ..helper_types import BoundingBox, Annotation
from .objects_center_points import ObjectsCenterPointsConditionalBuilder, convert_pil_to_tensor
from .utils import COLOR_PALETTE, WHITE, GRAY_75, BLACK, additional_parameters_string, \
pad_list, get_plot_font_size, absolute_bbox
class ObjectsBoundingBoxConditionalBuilder(ObjectsCenterPointsConditionalBuilder):
@property
def object_descriptor_length(self) -> int:
return 3 # 3/5: object_representation (1) + corners (2/4)
def _make_object_descriptors(self, annotations: List[Annotation]) -> List[Tuple[int, ...]]:
object_tuples = [
(self.object_representation(ann), *self.token_pair_from_bbox(ann.bbox))
for ann in annotations
]
object_tuples = pad_list(object_tuples, self.empty_tuple, self.no_max_objects)
return object_tuples
def inverse_build(self, conditional: LongTensor) -> Tuple[List[Tuple[int, BoundingBox]], Optional[BoundingBox]]:
conditional_list = conditional.tolist()
object_triples = grouper(conditional_list, 3)
assert conditional.shape[0] == self.embedding_dim
return [(object_triple[0], self.bbox_from_token_pair(object_triple[1], object_triple[2])) for object_triple in object_triples if object_triple[0] != self.none], None
def plot(self, conditional: LongTensor, label_for_category_no: Callable[[int], str], figure_size: Tuple[int, int],
line_width: int = 3, font_size: Optional[int] = None) -> Tensor:
plot = pil_image.new('RGB', figure_size, WHITE)
draw = pil_img_draw.Draw(plot)
# font = ImageFont.truetype(
# "/usr/share/fonts/truetype/lato/Lato-Regular.ttf",
# size=get_plot_font_size(font_size, figure_size)
# )
font = ImageFont.load_default()
width, height = plot.size
description, crop_coordinates = self.inverse_build(conditional)
for (representation, bbox), color in zip(description, cycle(COLOR_PALETTE)):
annotation = self.representation_to_annotation(representation)
# class_label = label_for_category_no(annotation.category_id) + ' ' + additional_parameters_string(annotation)
class_label = label_for_category_no(annotation.category_id)
bbox = absolute_bbox(bbox, width, height)
draw.rectangle(bbox, outline=color, width=line_width)
draw.text((bbox[0] + line_width, bbox[1] + line_width), class_label, anchor='la', fill=BLACK, font=font)
if crop_coordinates is not None:
draw.rectangle(absolute_bbox(crop_coordinates, width, height), outline=GRAY_75, width=line_width)
return convert_pil_to_tensor(plot) / 127.5 - 1.
================================================
FILE: lidm/data/conditional_builder/objects_center_points.py
================================================
import math
import random
import warnings
from itertools import cycle
from typing import List, Optional, Tuple, Callable
from PIL import Image as pil_image, ImageDraw as pil_img_draw, ImageFont
from more_itertools.recipes import grouper
from .utils import COLOR_PALETTE, WHITE, GRAY_75, BLACK, additional_parameters_string, pad_list, get_circle_size, \
get_plot_font_size, absolute_bbox
from ..helper_types import BoundingBox, Annotation, Image
from torch import LongTensor, Tensor
from torchvision.transforms import PILToTensor
pil_to_tensor = PILToTensor()
def convert_pil_to_tensor(image: Image) -> Tensor:
with warnings.catch_warnings():
# to filter PyTorch UserWarning as described here: https://github.com/pytorch/vision/issues/2194
warnings.simplefilter("ignore")
return pil_to_tensor(image)
class ObjectsCenterPointsConditionalBuilder:
def __init__(self, no_object_classes: int, no_max_objects: int, no_tokens: int, num_beams: int):
self.no_object_classes = no_object_classes
self.no_max_objects = no_max_objects
self.no_tokens = no_tokens
# self.no_sections = int(math.sqrt(self.no_tokens))
self.no_sections = (self.no_tokens // num_beams, num_beams) # (width, height)
@property
def none(self) -> int:
return self.no_tokens - 1
@property
def object_descriptor_length(self) -> int:
return 2
@property
def empty_tuple(self) -> Tuple:
return (self.none,) * self.object_descriptor_length
@property
def embedding_dim(self) -> int:
return self.no_max_objects * self.object_descriptor_length
def tokenize_coordinates(self, x: float, y: float) -> int:
"""
Express 2d coordinates with one number.
Example: assume self.no_tokens = 16, then no_sections = 4:
0 0 0 0
0 0 # 0
0 0 0 0
0 0 0 x
Then the # position corresponds to token 6, the x position to token 15.
@param x: float in [0, 1]
@param y: float in [0, 1]
@return: discrete tokenized coordinate
"""
x_discrete = int(round(x * (self.no_sections[0] - 1)))
y_discrete = int(round(y * (self.no_sections[1] - 1)))
return y_discrete * self.no_sections[0] + x_discrete
def coordinates_from_token(self, token: int) -> (float, float):
x = token % self.no_sections[0]
y = token // self.no_sections[0]
return x / (self.no_sections[0] - 1), y / (self.no_sections[1] - 1)
def bbox_from_token_pair(self, token1: int, token2: int) -> BoundingBox:
x0, y0 = self.coordinates_from_token(token1)
x1, y1 = self.coordinates_from_token(token2)
# x2, y2 = self.coordinates_from_token(token3)
# x3, y3 = self.coordinates_from_token(token4)
return x0, y0, x1, y1
def token_pair_from_bbox(self, bbox: BoundingBox) -> Tuple:
# return self.tokenize_coordinates(bbox[0], bbox[1]), self.tokenize_coordinates(bbox[2], bbox[3]), self.tokenize_coordinates(bbox[4], bbox[5]), self.tokenize_coordinates(bbox[6], bbox[7])
return self.tokenize_coordinates(bbox[0], bbox[1]), self.tokenize_coordinates(bbox[4], bbox[5])
def inverse_build(self, conditional: LongTensor) \
-> Tuple[List[Tuple[int, Tuple[float, float]]], Optional[BoundingBox]]:
conditional_list = conditional.tolist()
table_of_content = grouper(conditional_list, self.object_descriptor_length)
assert conditional.shape[0] == self.embedding_dim
return [
(object_tuple[0], self.coordinates_from_token(object_tuple[1]))
for object_tuple in table_of_content if object_tuple[0] != self.none
], None
def plot(self, conditional: LongTensor, label_for_category_no: Callable[[int], str], figure_size: Tuple[int, int],
line_width: int = 3, font_size: Optional[int] = None) -> Tensor:
plot = pil_image.new('RGB', figure_size, WHITE)
draw = pil_img_draw.Draw(plot)
circle_size = get_circle_size(figure_size)
# font = ImageFont.truetype('/usr/share/fonts/truetype/lato/Lato-Regular.ttf',
# size=get_plot_font_size(font_size, figure_size))
font = ImageFont.load_default()
width, height = plot.size
description, crop_coordinates = self.inverse_build(conditional)
for (representation, (x, y)), color in zip(description, cycle(COLOR_PALETTE)):
x_abs, y_abs = x * width, y * height
ann = self.representation_to_annotation(representation)
label = label_for_category_no(ann.category_id) + ' ' + additional_parameters_string(ann)
ellipse_bbox = [x_abs - circle_size, y_abs - circle_size, x_abs + circle_size, y_abs + circle_size]
draw.ellipse(ellipse_bbox, fill=color, width=0)
draw.text((x_abs, y_abs), label, anchor='md', fill=BLACK, font=font)
if crop_coordinates is not None:
draw.rectangle(absolute_bbox(crop_coordinates, width, height), outline=GRAY_75, width=line_width)
return convert_pil_to_tensor(plot) / 127.5 - 1.
def object_representation(self, annotation: Annotation) -> int:
return annotation.category_id
def representation_to_annotation(self, representation: int) -> Annotation:
category_id = representation % self.no_object_classes
# noinspection PyTypeChecker
return Annotation(
bbox=None,
category_id=category_id,
)
def _make_object_descriptors(self, annotations: List[Annotation]) -> List[Tuple[int, ...]]:
object_tuples = [
(self.object_representation(a),
self.tokenize_coordinates(a.center[0], a.center[1]))
for a in annotations
]
empty_tuple = (self.none, self.none)
object_tuples = pad_list(object_tuples, empty_tuple, self.no_max_objects)
return object_tuples
def build(self, annotations: List[Annotation]) \
-> LongTensor:
if len(annotations) == 0:
warnings.warn('Did not receive any annotations.')
random.shuffle(annotations)
if len(annotations) > self.no_max_objects:
warnings.warn('Received more annotations than allowed.')
annotations = annotations[:self.no_max_objects]
object_tuples = self._make_object_descriptors(annotations)
flattened = [token for tuple_ in object_tuples for token in tuple_]
assert len(flattened) == self.embedding_dim
assert all(0 <= value < self.no_tokens for value in flattened)
return LongTensor(flattened)
================================================
FILE: lidm/data/conditional_builder/utils.py
================================================
import importlib
from typing import List, Any, Tuple, Optional
import numpy as np
from ..helper_types import BoundingBox, Annotation
# source: seaborn, color palette tab10
COLOR_PALETTE = [(30, 118, 179), (255, 126, 13), (43, 159, 43), (213, 38, 39), (147, 102, 188),
(139, 85, 74), (226, 118, 193), (126, 126, 126), (187, 188, 33), (22, 189, 206)]
BLACK = (0, 0, 0)
GRAY_75 = (63, 63, 63)
GRAY_50 = (127, 127, 127)
GRAY_25 = (191, 191, 191)
WHITE = (255, 255, 255)
FULL_CROP = (0., 0., 1., 1.)
def corners_3d_to_2d(corners3d):
"""
Args:
corners3d: (N, 8, 2)
Returns:
corners2d: (N, 4, 2)
"""
# select pairs to reorganize
mask_0_3 = corners3d[:, 0:4, 0].argmax(1) // 2 != 0
mask_4_7 = corners3d[:, 4:8, 0].argmin(1) // 2 != 0
# reorganize corners in the order of (bottom-right, bottom-left)
corners3d[mask_0_3, 0:4] = corners3d[mask_0_3][:, [2, 3, 0, 1]]
# reorganize corners in the order of (top-left, top-right)
corners3d[mask_4_7, 4:8] = corners3d[mask_4_7][:, [2, 3, 0, 1]]
# calculate corners in order
bot_r = np.stack([corners3d[:, 0:2, 0].max(1), corners3d[:, 0:2, 1].min(1)], axis=-1)
bot_l = np.stack([corners3d[:, 2:4, 0].min(1), corners3d[:, 2:4, 1].min(1)], axis=-1)
top_l = np.stack([corners3d[:, 4:6, 0].min(1), corners3d[:, 4:6, 1].max(1)], axis=-1)
top_r = np.stack([corners3d[:, 6:8, 0].max(1), corners3d[:, 6:8, 1].max(1)], axis=-1)
return np.stack([bot_r, bot_l, top_l, top_r], axis=1)
def rotate_points_along_z(points, angle):
"""
Args:
points: (N, 3 + C)
angle: angle along z-axis, angle increases x ==> y
Returns:
"""
cosa = np.cos(angle)
sina = np.sin(angle)
zeros = np.zeros(points.shape[0])
ones = np.ones(points.shape[0])
rot_matrix = np.stack((
cosa, sina, zeros,
-sina, cosa, zeros,
zeros, zeros, ones)).reshape((-1, 3, 3))
points_rot = np.matmul(points[:, :, 0:3], rot_matrix)
points_rot = np.concatenate((points_rot, points[:, :, 3:]), axis=-1)
return points_rot
def boxes_to_corners_3d(boxes3d):
"""
7 -------- 4
/| /|
6 -------- 5 .
| | | |
. 3 -------- 0
|/ |/
2 -------- 1
Args:
boxes3d: (N, 7) [x, y, z, dx, dy, dz, heading], (x, y, z) is the box center
Returns:
corners3d: (N, 8, 3)
"""
template = np.array(
[[1, 1, -1], [1, -1, -1], [-1, -1, -1], [-1, 1, -1],
[1, 1, 1], [1, -1, 1], [-1, -1, 1], [-1, 1, 1]],
) / 2
# corners3d = boxes3d[:, None, 3:6].repeat(1, 8, 1) * template[None, :, :]
corners3d = np.tile(boxes3d[:, None, 3:6], (1, 8, 1)) * template[None, :, :]
corners3d = rotate_points_along_z(corners3d.reshape((-1, 8, 3)), boxes3d[:, 6]).reshape((-1, 8, 3))
corners3d += boxes3d[:, None, 0:3]
return corners3d
def intersection_area(rectangle1: BoundingBox, rectangle2: BoundingBox) -> float:
"""
Give intersection area of two rectangles.
@param rectangle1: (x0, y0, w, h) of first rectangle
@param rectangle2: (x0, y0, w, h) of second rectangle
"""
rectangle1 = rectangle1[0], rectangle1[1], rectangle1[0] + rectangle1[2], rectangle1[1] + rectangle1[3]
rectangle2 = rectangle2[0], rectangle2[1], rectangle2[0] + rectangle2[2], rectangle2[1] + rectangle2[3]
x_overlap = max(0., min(rectangle1[2], rectangle2[2]) - max(rectangle1[0], rectangle2[0]))
y_overlap = max(0., min(rectangle1[3], rectangle2[3]) - max(rectangle1[1], rectangle2[1]))
return x_overlap * y_overlap
def horizontally_flip_bbox(bbox: BoundingBox) -> BoundingBox:
return 1 - (bbox[0] + bbox[2]), bbox[1], bbox[2], bbox[3]
def absolute_bbox(relative_bbox: BoundingBox, width: int, height: int) -> Tuple[int, int, int, int]:
bbox = relative_bbox
# bbox = bbox[0] * width, bbox[1] * height, (bbox[0] + bbox[2]) * width, (bbox[1] + bbox[3]) * height
bbox = bbox[0] * width, bbox[1] * height, bbox[2] * width, bbox[3] * height
# return int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
x1, x2 = min(int(bbox[2]), int(bbox[0])), max(int(bbox[2]), int(bbox[0]))
y1, y2 = min(int(bbox[3]), int(bbox[1])), max(int(bbox[3]), int(bbox[1]))
if x1 == x2:
x2 += 1
if y1 == y2:
y2 += 1
return x1, y1, x2, y2
def pad_list(list_: List, pad_element: Any, pad_to_length: int) -> List:
return list_ + [pad_element for _ in range(pad_to_length - len(list_))]
def rescale_annotations(annotations: List[Annotation], crop_coordinates: BoundingBox, flip: bool) -> \
List[Annotation]:
def clamp(x: float):
return max(min(x, 1.), 0.)
def rescale_bbox(bbox: BoundingBox) -> BoundingBox:
x0 = clamp((bbox[0] - crop_coordinates[0]) / crop_coordinates[2])
y0 = clamp((bbox[1] - crop_coordinates[1]) / crop_coordinates[3])
w = min(bbox[2] / crop_coordinates[2], 1 - x0)
h = min(bbox[3] / crop_coordinates[3], 1 - y0)
if flip:
x0 = 1 - (x0 + w)
return x0, y0, w, h
return [a._replace(bbox=rescale_bbox(a.bbox)) for a in annotations]
def filter_annotations(annotations: List[Annotation], crop_coordinates: BoundingBox) -> List:
return [a for a in annotations if intersection_area(a.bbox, crop_coordinates) > 0.0]
def additional_parameters_string(annotation: Annotation, short: bool = True) -> str:
sl = slice(1) if short else slice(None)
string = ''
if not (annotation.is_group_of or annotation.is_occluded or annotation.is_depiction or annotation.is_inside):
return string
if annotation.is_group_of:
string += 'group'[sl] + ','
if annotation.is_occluded:
string += 'occluded'[sl] + ','
if annotation.is_depiction:
string += 'depiction'[sl] + ','
if annotation.is_inside:
string += 'inside'[sl]
return '(' + string.strip(",") + ')'
def get_plot_font_size(font_size: Optional[int], figure_size: Tuple[int, int]) -> int:
if font_size is None:
font_size = 10
if max(figure_size) >= 256:
font_size = 12
if max(figure_size) >= 512:
font_size = 15
return font_size
def get_circle_size(figure_size: Tuple[int, int]) -> int:
circle_size = 2
if max(figure_size) >= 256:
circle_size = 3
if max(figure_size) >= 512:
circle_size = 4
return circle_size
def load_object_from_string(object_string: str) -> Any:
"""
Source: https://stackoverflow.com/a/10773699
"""
module_name, class_name = object_string.rsplit(".", 1)
return getattr(importlib.import_module(module_name), class_name)
================================================
FILE: lidm/data/helper_types.py
================================================
from typing import Tuple, Optional, NamedTuple, Union, List
from PIL.Image import Image as pil_image
from torch import Tensor
try:
from typing import Literal
except ImportError:
from typing_extensions import Literal
Image = Union[Tensor, pil_image]
# BoundingBox = Tuple[float, float, float, float] # x0, y0, w, h | x0, y0, x1, y1
# BoundingBox3D = Tuple[float, float, float, float, float, float] # x0, y0, z0, l, w, h
BoundingBox = Tuple[float, float, float, float] # corner coordinates (x,y) in the order of bottom-right -> bottom-left -> top-left -> top-right
Center = Tuple[float, float]
class Annotation(NamedTuple):
category_id: int
bbox: Optional[BoundingBox] = None
center: Optional[Center] = None
================================================
FILE: lidm/data/kitti.py
================================================
import glob
import os
import pickle
import numpy as np
import yaml
from PIL import Image
import xml.etree.ElementTree as ET
from lidm.data.base import DatasetBase
from .annotated_dataset import Annotated3DObjectsDataset
from .conditional_builder.utils import corners_3d_to_2d
from .helper_types import Annotation
from ..utils.lidar_utils import pcd2range, pcd2coord2d, range2pcd
# TODO add annotation categories and semantic categories
CATEGORIES = ['ignore', 'car', 'bicycle', 'motorcycle', 'truck', 'other-vehicle', 'person', 'bicyclist', 'motorcyclist',
'road', 'parking', 'sidewalk', 'other-ground', 'building', 'fence', 'vegetation', 'trunk', 'terrain',
'pole', 'traffic-sign']
CATE2LABEL = {k: v for v, k in enumerate(CATEGORIES)} # 0: invalid, 1~10: categories
LABEL2RGB = np.array([(0, 0, 0), (0, 0, 142), (119, 11, 32), (0, 0, 230), (0, 0, 70), (0, 0, 90), (220, 20, 60),
(255, 0, 0), (0, 0, 110), (128, 64, 128), (250, 170, 160), (244, 35, 232), (230, 150, 140),
(70, 70, 70), (190, 153, 153), (107, 142, 35), (0, 80, 100), (230, 150, 140), (153, 153, 153),
(220, 220, 0)])
CAMERAS = ['CAM_FRONT']
BBOX_CATS = ['car', 'people', 'cycle']
BBOX_CAT2LABEL = {'car': 0, 'truck': 0, 'bus': 0, 'caravan': 0, 'person': 1, 'rider': 2, 'motorcycle': 2, 'bicycle': 2}
# train + test
SEM_KITTI_TRAIN_SET = ['00', '01', '02', '03', '04', '05', '06', '07', '09', '10']
KITTI_TRAIN_SET = SEM_KITTI_TRAIN_SET + ['11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21']
KITTI360_TRAIN_SET = ['00', '02', '04', '05', '06', '07', '09', '10'] + ['08'] # partial test data at '02' sequence
CAM_KITTI360_TRAIN_SET = ['00', '04', '05', '06', '07', '08', '09', '10'] # cam mismatch lidar in '02'
# validation
SEM_KITTI_VAL_SET = KITTI_VAL_SET = ['08']
CAM_KITTI360_VAL_SET = KITTI360_VAL_SET = ['03']
class KITTIBase(DatasetBase):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.dataset_name = 'kitti'
self.num_sem_cats = kwargs['dataset_config'].num_sem_cats + 1
@staticmethod
def load_lidar_sweep(path):
scan = np.fromfile(path, dtype=np.float32)
scan = scan.reshape((-1, 4))
points = scan[:, 0:3] # get xyz
return points
def load_semantic_map(self, path, pcd):
raise NotImplementedError
def load_camera(self, path):
raise NotImplementedError
def __getitem__(self, idx):
example = dict()
data_path = self.data[idx]
# lidar point cloud
sweep = self.load_lidar_sweep(data_path)
if self.lidar_transform:
sweep, _ = self.lidar_transform(sweep, None)
if self.condition_key == 'segmentation':
# semantic maps
proj_range, sem_map = self.load_semantic_map(data_path, sweep)
example[self.condition_key] = sem_map
else:
proj_range, _ = pcd2range(sweep, self.img_size, self.fov, self.depth_range)
proj_range, proj_mask = self.process_scan(proj_range)
example['image'], example['mask'] = proj_range, proj_mask
if self.return_pcd:
reproj_sweep, _, _ = range2pcd(proj_range[0] * .5 + .5, self.fov, self.depth_range, self.depth_scale, self.log_scale)
example['raw'] = sweep
example['reproj'] = reproj_sweep.astype(np.float32)
# image degradation
if self.degradation_transform:
degraded_proj_range = self.degradation_transform(proj_range)
example['degraded_image'] = degraded_proj_range
# cameras
if self.condition_key == 'camera':
cameras = self.load_camera(data_path)
example[self.condition_key] = cameras
return example
class SemanticKITTIBase(KITTIBase):
def __init__(self, **kwargs):
super().__init__(**kwargs)
assert self.condition_key in ['segmentation'] # for segmentation input only
self.label2rgb = LABEL2RGB
def prepare_data(self):
# read data paths from KITTI
for seq_id in eval('SEM_KITTI_%s_SET' % self.split.upper()):
self.data.extend(glob.glob(os.path.join(
self.data_root, f'dataset/sequences/{seq_id}/velodyne/*.bin')))
# read label mapping
data_config = yaml.safe_load(open('./data/config/semantic-kitti.yaml', 'r'))
remap_dict = data_config["learning_map"]
max_key = max(remap_dict.keys())
self.learning_map = np.zeros((max_key + 100), dtype=np.int32)
self.learning_map[list(remap_dict.keys())] = list(remap_dict.values())
def load_semantic_map(self, path, pcd):
label_path = path.replace('velodyne', 'labels').replace('.bin', '.label')
labels = np.fromfile(label_path, dtype=np.uint32)
labels = labels.reshape((-1))
labels = labels & 0xFFFF # semantic label in lower half
labels = self.learning_map[labels]
proj_range, sem_map = pcd2range(pcd, self.img_size, self.fov, self.depth_range, labels=labels)
# sem_map = np.expand_dims(sem_map, axis=0).astype(np.int64)
sem_map = sem_map.astype(np.int64)
if self.filtered_map_cats is not None:
sem_map[np.isin(sem_map, self.filtered_map_cats)] = 0 # set filtered category as noise
onehot = np.eye(self.num_sem_cats, dtype=np.float32)[sem_map].transpose(2, 0, 1)
return proj_range, onehot
class SemanticKITTITrain(SemanticKITTIBase):
def __init__(self, **kwargs):
super().__init__(data_root='./dataset/SemanticKITTI', split='train', **kwargs)
class SemanticKITTIValidation(SemanticKITTIBase):
def __init__(self, **kwargs):
super().__init__(data_root='./dataset/SemanticKITTI', split='val', **kwargs)
class KITTI360Base(KITTIBase):
def __init__(self, split_per_view=None, **kwargs):
super().__init__(**kwargs)
self.split_per_view = split_per_view
if self.condition_key == 'camera':
assert self.split_per_view is not None, 'For camera-to-lidar, need to specify split_per_view'
def prepare_data(self):
# read data paths
self.data = []
if self.condition_key == 'camera':
seq_list = eval('CAM_KITTI360_%s_SET' % self.split.upper())
else:
seq_list = eval('KITTI360_%s_SET' % self.split.upper())
for seq_id in seq_list:
self.data.extend(glob.glob(os.path.join(
self.data_root, f'data_3d_raw/2013_05_28_drive_00{seq_id}_sync/velodyne_points/data/*.bin')))
def random_drop_camera(self, camera_list):
if np.random.rand() < self.aug_config['camera_drop'] and self.split == 'train':
camera_list = [np.zeros_like(c) if i != len(camera_list) // 2 else c for i, c in enumerate(camera_list)] # keep the middle view only
return camera_list
def load_camera(self, path):
camera_path = path.replace('data_3d_raw', 'data_2d_camera').replace('velodyne_points/data', 'image_00/data_rect').replace('.bin', '.png')
camera = np.array(Image.open(camera_path)).astype(np.float32) / 255.
camera = camera.transpose(2, 0, 1)
if self.view_transform:
camera = self.view_transform(camera)
camera_list = np.split(camera, self.split_per_view, axis=2) # split into n chunks as different views
camera_list = self.random_drop_camera(camera_list)
return camera_list
class KITTI360Train(KITTI360Base):
def __init__(self, **kwargs):
super().__init__(data_root='./dataset/KITTI-360', split='train', **kwargs)
class KITTI360Validation(KITTI360Base):
def __init__(self, **kwargs):
super().__init__(data_root='./dataset/KITTI-360', split='val', **kwargs)
class AnnotatedKITTI360Base(Annotated3DObjectsDataset, KITTI360Base):
def __init__(self, **kwargs):
self.id_bbox_dict = dict()
self.id_label_dict = dict()
Annotated3DObjectsDataset.__init__(self, **kwargs)
KITTI360Base.__init__(self, **kwargs)
assert self.condition_key in ['center', 'bbox'] # for annotated images only
@staticmethod
def parseOpencvMatrix(node):
rows = int(node.find('rows').text)
cols = int(node.find('cols').text)
data = node.find('data').text.split(' ')
mat = []
for d in data:
d = d.replace('\n', '')
if len(d) < 1:
continue
mat.append(float(d))
mat = np.reshape(mat, [rows, cols])
return mat
def parseVertices(self, child):
transform = self.parseOpencvMatrix(child.find('transform'))
R = transform[:3, :3]
T = transform[:3, 3]
vertices = self.parseOpencvMatrix(child.find('vertices'))
vertices = np.matmul(R, vertices.transpose()).transpose() + T
return vertices
def parse_bbox_xml(self, path):
tree = ET.parse(path)
root = tree.getroot()
bbox_dict = dict()
label_dict = dict()
for child in root:
if child.find('transform') is None:
continue
label_name = child.find('label').text
if label_name not in BBOX_CAT2LABEL:
continue
label = BBOX_CAT2LABEL[label_name]
timestamp = int(child.find('timestamp').text)
# verts = self.parseVertices(child)
verts = self.parseOpencvMatrix(child.find('vertices'))[:8]
if timestamp in bbox_dict:
bbox_dict[timestamp].append(verts)
label_dict[timestamp].append(label)
else:
bbox_dict[timestamp] = [verts]
label_dict[timestamp] = [label]
return bbox_dict, label_dict
def prepare_data(self):
KITTI360Base.prepare_data(self)
self.data = [p for p in self.data if '2013_05_28_drive_0008_sync' not in p] # remove unlabeled sequence 08
seq_list = eval('KITTI360_%s_SET' % self.split.upper())
for seq_id in seq_list:
if seq_id != '08':
xml_path = os.path.join(self.data_root, f'data_3d_bboxes/train/2013_05_28_drive_00{seq_id}_sync.xml')
bbox_dict, label_dict = self.parse_bbox_xml(xml_path)
self.id_bbox_dict[seq_id] = bbox_dict
self.id_label_dict[seq_id] = label_dict
def load_annotation(self, path):
seq_id = path.split('/')[-4].split('_')[-2][-2:]
timestamp = int(path.split('/')[-1].replace('.bin', ''))
verts_list = self.id_bbox_dict[seq_id][timestamp]
label_list = self.id_label_dict[seq_id][timestamp]
if self.condition_key == 'bbox':
points = np.stack(verts_list)
elif self.condition_key == 'center':
points = (verts_list[0] + verts_list[6]) / 2.
else:
raise NotImplementedError
labels = np.array([label_list])
if self.anno_transform:
points, labels = self.anno_transform(points, labels)
return points, labels
def __getitem__(self, idx):
example = dict()
data_path = self.data[idx]
# lidar point cloud
sweep = self.load_lidar_sweep(data_path)
# annotations
bbox_points, bbox_labels = self.load_annotation(data_path)
if self.lidar_transform:
sweep, bbox_points = self.lidar_transform(sweep, bbox_points)
# point cloud -> range
proj_range, _ = pcd2range(sweep, self.img_size, self.fov, self.depth_range)
proj_range, proj_mask = self.process_scan(proj_range)
example['image'], example['mask'] = proj_range, proj_mask
if self.return_pcd:
example['reproj'] = sweep
# annotation -> range
# NOTE: do not need to transform bbox points along with lidar, since their coordinates are based on range-image space instead of 3D space
proj_bbox_points, proj_bbox_labels = pcd2coord2d(bbox_points, self.fov, self.depth_range, labels=bbox_labels)
builder = self.conditional_builders[self.condition_key]
if self.condition_key == 'bbox':
proj_bbox_points = corners_3d_to_2d(proj_bbox_points)
annotations = [Annotation(bbox=bbox.flatten(), category_id=label) for bbox, label in
zip(proj_bbox_points, proj_bbox_labels)]
else:
annotations = [Annotation(center=center, category_id=label) for center, label in
zip(proj_bbox_points, proj_bbox_labels)]
example[self.condition_key] = builder.build(annotations)
return example
class AnnotatedKITTI360Train(AnnotatedKITTI360Base):
def __init__(self, **kwargs):
super().__init__(data_root='./dataset/KITTI-360', split='train', cats=BBOX_CATS, **kwargs)
class AnnotatedKITTI360Validation(AnnotatedKITTI360Base):
def __init__(self, **kwargs):
super().__init__(data_root='./dataset/KITTI-360', split='train', cats=BBOX_CATS, **kwargs)
class KITTIImageBase(KITTIBase):
"""
Range ImageSet only combining KITTI-360 and SemanticKITTI
#Samples (Training): 98014, #Samples (Val): 3511
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
assert self.condition_key in [None, 'image'] # for image input only
def prepare_data(self):
# read data paths from KITTI-360
self.data = []
for seq_id in eval('KITTI360_%s_SET' % self.split.upper()):
self.data.extend(glob.glob(os.path.join(
self.data_root, f'KITTI-360/data_3d_raw/2013_05_28_drive_00{seq_id}_sync/velodyne_points/data/*.bin')))
# read data paths from KITTI
for seq_id in eval('KITTI_%s_SET' % self.split.upper()):
self.data.extend(glob.glob(os.path.join(
self.data_root, f'SemanticKITTI/dataset/sequences/{seq_id}/velodyne/*.bin')))
class KITTIImageTrain(KITTIImageBase):
def __init__(self, **kwargs):
super().__init__(data_root='./dataset', split='train', **kwargs)
class KITTIImageValidation(KITTIImageBase):
def __init__(self, **kwargs):
super().__init__(data_root='./dataset', split='val', **kwargs)
================================================
FILE: lidm/eval/README.md
================================================
# Evaluation Toolbox for LiDAR Generation
This directory is a **self-contained**, **memory-friendly** and mostly **CUDA-accelerated** toolbox of multiple evaluation metrics for LiDAR generative models, including:
* Perceptual metrics (our proposed):
* Fréchet Range Image Distance (**FRID**)
* Fréchet Sparse Volume Distance (**FSVD**)
* Fréchet Point-based Volume Distance (**FPVD**)
* Statistical metrics (proposed in [Learning Representations and Generative Models for 3D Point Clouds](https://arxiv.org/abs/1707.02392)):
* Minimum Matching Distance (**MMD**)
* Jensen-Shannon Divergence (**JSD**)
* Statistical pairwise metrics (for reconstruction only):
* Chamfer Distance (**CD**)
* Earth Mover's Distance (**EMD**)
## Citation
If you find this project useful in your research, please consider citing:
```
@article{ran2024towards,
title={Towards Realistic Scene Generation with LiDAR Diffusion Models},
author={Ran, Haoxi and Guizilini, Vitor and Wang, Yue},
journal={arXiv preprint arXiv:2404.00815},
year={2024}
}
```
## Dependencies
### Basic (install through **pip**):
* scipy
* numpy
* torch
* pyyaml
### Required by FSVD and FPVD:
* [Torchsparse v1.4.0](https://github.com/mit-han-lab/torchsparse/tree/v1.4.0) (pip install git+https://github.com/mit-han-lab/torchsparse.git@v1.4.0)
* [Google Sparse Hash library](https://github.com/sparsehash/sparsehash) (apt-get install libsparsehash-dev **or** compile locally and update variable CPLUS_INCLUDE_PATH with directory path)
## Model Zoo
To evaluate with perceptual metrics on different types of LiDAR data, you can download all models through:
* this [google drive link](https://drive.google.com/file/d/1Ml4p4_nMlwLkSp7JB528GJv2_HxO8v1i/view?usp=drive_link) in the .zip file
or
* the **full directory** of one specific model:
### 64-beam LiDAR (trained on [SemanticKITTI](http://semantic-kitti.org/dataset.html)):
| Metric | Model | Arch | Link | Code | Comments |
|:------:|:-------------------------------------------------------------------------------------------:|:-----------------------:|:-------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------|---------------------------------------------------------------------------|
| FRID | [RangeNet++](https://www.ipb.uni-bonn.de/wp-content/papercite-data/pdf/milioto2019iros.pdf) | DarkNet21-based UNet | [Google Drive](https://drive.google.com/drive/folders/1ZS8KOoxB9hjB6kwKbH5Zfc8O5qJlKsbl?usp=drive_link) | [./models/rangenet/model.py](./models/rangenet/model.py) | range image input (our trained model without the need of remission input) |
| FSVD | [MinkowskiNet](https://arxiv.org/abs/1904.08755) | Sparse UNet | [Google Drive](https://drive.google.com/drive/folders/1zN12ZEvjIvo4PCjAsncgC22yvtRrCCMe?usp=drive_link) | [./models/minkowskinet/model.py](./models/minkowskinet/model.py) | point cloud input |
| FPVD | [SPVCNN](https://arxiv.org/abs/2007.16100) | Point-Voxel Sparse UNet | [Google Drive](https://drive.google.com/drive/folders/1oEm3qpxfGetiVAfXIvecawEiFqW79M6B?usp=drive_link) | [./models/spvcnn/model.py](./models/spvcnn/model.py) | point cloud input |
### 32-beam LiDAR (trained on [nuScenes](https://www.nuscenes.org/nuscenes)):
| Metric | Model | Arch | Link | Code | Comments |
|:------:|:------------------------------------------------:|:-----------------------:|:-------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------|-------------------|
| FSVD | [MinkowskiNet](https://arxiv.org/abs/1904.08755) | Sparse UNet | [Google Drive](https://drive.google.com/drive/folders/1oZIS9FlklCQ6dlh3TZ8Junir7QwgT-Me?usp=drive_link) | [./models/minkowskinet/model.py](./models/minkowskinet/model.py) | point cloud input |
| FPVD | [SPVCNN](https://arxiv.org/abs/2007.16100) | Point-Voxel Sparse UNet | [Google Drive](https://drive.google.com/drive/folders/1F69RbprAoT6MOJ7iI0KHjxuq-tbeqGiR?usp=drive_link) | [./models/spvcnn/model.py](./models/spvcnn/model.py) | point cloud input |
## Usage
1. Place the unzipped `pretrained_weights` folder under the root python directory **or** modify the `DEFAULT_ROOT` variable in the `__init__.py`.
2. Prepare input data, including the synthesized samples and the reference dataset. **Note**: The reference data should be the **point clouds projected back from range images** instead of raw point clouds.
3. Specify the data type (`32` or `64`) and the metrics to evaluate. Options: `mmd`, `jsd`, `frid`, `fsvd`, `fpvd`, `cd`, `emd`.
4. (Optional) If you want to compute `frid`, `fsvd` or `fpvd` metric, adjust the corresponding batch size through the `MODAL2BATCHSIZE` in file `__init__.py` according to your max GPU memory (default: ~24GB).
5. Start evaluation and all results will print out!
### Example:
```
from .eval_utils import evaluate
data = '64' # specify data type to evaluate
metrics = ['mmd', 'jsd', 'frid', 'fsvd', 'fpvd'] # specify metrics to evaluate
# list of np.float32 array
# shape of each array: (#points, #dim=3), #dim: xyz coordinate (NOTE: no need to input remission)
reference = ...
samples = ...
evaluate(reference, samples, metrics, data)
```
## Acknowledgement
- The implementation of MinkowskiNet and SPVCNN is borrowed from [2DPASS](https://github.com/yanx27/2DPASS).
- The implementation of RangeNet++ is borrowed from [the official RangeNet++ codebase](https://github.com/PRBonn/lidar-bonnetal).
- The implementation of Chamfer Distance is adapted from [CD Pytorch Implementation](https://github.com/ThibaultGROUEIX/ChamferDistancePytorch) and Earth Mover's Distance from [MSN official repo](https://github.com/Colin97/MSN-Point-Cloud-Completion).
================================================
FILE: lidm/eval/__init__.py
================================================
"""
@Author: Haoxi Ran
@Date: 01/03/2024
@Citation: Towards Realistic Scene Generation with LiDAR Diffusion Models
"""
import os
import torch
import yaml
from lidm.utils.misc_utils import dict2namespace
from ..modules.rangenet.model import Model as rangenet
try:
from ..modules.spvcnn.model import Model as spvcnn
from ..modules.minkowskinet.model import Model as minkowskinet
except:
print('To install torchsparse 1.4.0, please refer to https://github.com/mit-han-lab/torchsparse/tree/74099d10a51c71c14318bce63d6421f698b24f24')
# user settings
DEFAULT_ROOT = './pretrained_weights'
MODAL2BATCHSIZE = {'range': 100, 'voxel': 50, 'point_voxel': 25}
OUTPUT_TEMPLATE = 50 * '-' + '\n|' + 16 * ' ' + '{}:{:.4E}' + 17 * ' ' + '|\n' + 50 * '-'
# eval settings (do not modify)
VOXEL_SIZE = 0.05
NUM_SECTORS = 16
AGG_TYPE = 'depth'
TYPE2DATASET = {'32': 'nuscenes', '64': 'kitti'}
DATA_CONFIG = {'64': {'x': [-50, 50], 'y': [-50, 50], 'z': [-3, 1]},
'32': {'x': [-30, 30], 'y': [-30, 30], 'z': [-3, 6]}}
MODALITY2MODEL = {'range': 'rangenet', 'voxel': 'minkowskinet', 'point_voxel': 'spvcnn'}
DATASET_CONFIG = {'kitti': {'size': [64, 1024], 'fov': [3, -25], 'depth_range': [1.0, 56.0], 'depth_scale': 6},
'nuscenes': {'size': [32, 1024], 'fov': [10, -30], 'depth_range': [1.0, 45.0]}}
def build_model(dataset_name, model_name, device='cpu'):
# config
model_folder = os.path.join(DEFAULT_ROOT, dataset_name, model_name)
if not os.path.isdir(model_folder):
raise Exception('Not Available Pretrained Weights!')
config = yaml.safe_load(open(os.path.join(model_folder, 'config.yaml'), 'r'))
if model_name != 'rangenet':
config = dict2namespace(config)
# build model
model = eval(model_name)(config)
# load checkpoint
if model_name == 'rangenet':
model.load_pretrained_weights(model_folder)
else:
ckpt = torch.load(os.path.join(model_folder, 'model.ckpt'), map_location="cpu")
model.load_state_dict(ckpt['state_dict'], strict=False)
model.to(device)
model.eval()
return model
================================================
FILE: lidm/eval/compile.sh
================================================
#!/bin/sh
cd modules/chamfer
python setup.py build_ext --inplace
cd ../emd
python setup.py build_ext --inplace
cd ..
================================================
FILE: lidm/eval/eval_utils.py
================================================
"""
@Author: Haoxi Ran
@Date: 01/03/2024
@Citation: Towards Realistic Scene Generation with LiDAR Diffusion Models
"""
import multiprocessing
from functools import partial
import numpy as np
from scipy.spatial.distance import jensenshannon
from tqdm import tqdm
from . import OUTPUT_TEMPLATE
from .metric_utils import compute_logits, compute_pairwise_cd, \
compute_pairwise_emd, pcd2bev_sum, compute_pairwise_cd_batch, pcd2bev_bin
from .fid_score import calculate_frechet_distance
def evaluate(reference, samples, metrics, data):
# perceptual
if 'frid' in metrics:
compute_frid(reference, samples, data)
if 'fsvd' in metrics:
compute_fsvd(reference, samples, data)
if 'fpvd' in metrics:
compute_fpvd(reference, samples, data)
# reconstruction
if 'cd' in metrics:
compute_cd(reference, samples)
if 'emd' in metrics:
compute_emd(reference, samples)
# statistical
if 'jsd' in metrics:
compute_jsd(reference, samples, data)
if 'mmd' in metrics:
compute_mmd(reference, samples, data)
def compute_cd(reference, samples):
"""
Calculate score of Chamfer Distance (CD)
"""
print('Evaluating (CD) ...')
results = []
for x, y in zip(reference, samples):
d = compute_pairwise_cd(x, y)
results.append(d)
score = sum(results) / len(results)
print(OUTPUT_TEMPLATE.format('CD ', score))
def compute_emd(reference, samples):
"""
Calculate score of Earth Mover's Distance (EMD)
"""
print('Evaluating (EMD) ...')
results = []
for x, y in zip(reference, samples):
d = compute_pairwise_emd(x, y)
results.append(d)
score = sum(results) / len(results)
print(OUTPUT_TEMPLATE.format('EMD ', score))
def compute_mmd(reference, samples, data, dist='cd', verbose=True):
"""
Calculate the score of Minimum Matching Distance (MMD)
"""
print('Evaluating (MMD) ...')
assert dist in ['cd', 'emd']
reference, samples = pcd2bev_bin(data, reference, samples)
compute_dist_func = compute_pairwise_cd_batch if dist == 'cd' else compute_pairwise_emd
results = []
for r in tqdm(reference, disable=not verbose):
dists = compute_dist_func(r, samples)
results.append(min(dists))
score = sum(results) / len(results)
print(OUTPUT_TEMPLATE.format('MMD ', score))
def compute_jsd(reference, samples, data):
"""
Calculate the score of Jensen-Shannon Divergence (JSD)
"""
print('Evaluating (JSD) ...')
reference, samples = pcd2bev_sum(data, reference, samples)
reference = (reference / np.sum(reference)).flatten()
samples = (samples / np.sum(samples)).flatten()
score = jensenshannon(reference, samples)
print(OUTPUT_TEMPLATE.format('JSD ', score))
def compute_fd(reference, samples):
mu1, mu2 = np.mean(reference, axis=0), np.mean(samples, axis=0)
sigma1, sigma2 = np.cov(reference, rowvar=False), np.cov(samples, rowvar=False)
distance = calculate_frechet_distance(mu1, sigma1, mu2, sigma2)
return distance
def compute_frid(reference, samples, data):
"""
Calculate the score of Fréchet Range Image Distance (FRID)
"""
print('Evaluating (FRID) ...')
gt_logits, samples_logits = compute_logits(data, 'range', reference, samples)
score = compute_fd(gt_logits, samples_logits)
print(OUTPUT_TEMPLATE.format('FRID', score))
def compute_fsvd(reference, samples, data):
"""
Calculate the score of Fréchet Sparse Volume Distance (FSVD)
"""
print('Evaluating (FSVD) ...')
gt_logits, samples_logits = compute_logits(data, 'voxel', reference, samples)
score = compute_fd(gt_logits, samples_logits)
print(OUTPUT_TEMPLATE.format('FSVD', score))
def compute_fpvd(reference, samples, data):
"""
Calculate the score of Fréchet Point-based Volume Distance (FPVD)
"""
print('Evaluating (FPVD) ...')
gt_logits, samples_logits = compute_logits(data, 'point_voxel', reference, samples)
score = compute_fd(gt_logits, samples_logits)
print(OUTPUT_TEMPLATE.format('FPVD', score))
================================================
FILE: lidm/eval/fid_score.py
================================================
"""Calculates the Frechet Inception Distance (FID) to evalulate GANs
The FID metric calculates the distance between two distributions of images.
Typically, we have summary statistics (mean & covariance matrix) of one
of these distributions, while the 2nd distribution is given by a GAN.
When run as a stand-alone program, it compares the distribution of
images that are stored as PNG/JPEG at a specified location with a
distribution given by summary statistics (in pickle format).
The FID is calculated by assuming that X_1 and X_2 are the activations of
the pool_3 layer of the inception net for generated samples and real world
samples respectively.
See --help to see further details.
Code adapted from https://github.com/bioinf-jku/TTUR to use PyTorch instead
of Tensorflow
Copyright 2018 Institute of Bioinformatics, JKU Linz
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import os
import pathlib
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
import numpy as np
import torch
import torchvision.transforms as TF
from PIL import Image
from scipy import linalg
from torch.nn.functional import adaptive_avg_pool2d
try:
from tqdm import tqdm
except ImportError:
# If tqdm is not available, provide a mock version of it
def tqdm(x):
return x
class ImagePathDataset(torch.utils.data.Dataset):
def __init__(self, files, transforms=None):
self.files = files
self.transforms = transforms
def __len__(self):
return len(self.files)
def __getitem__(self, i):
path = self.files[i]
img = Image.open(path).convert('RGB')
if self.transforms is not None:
img = self.transforms(img)
return img
def get_activations(files, model, batch_size=50, dims=2048, device='cpu',
num_workers=1):
"""Calculates the activations of the pool_3 layer for all images.
Params:
-- files : List of image files paths
-- model : Instance of inception model
-- batch_size : Batch size of images for the model to process at once.
Make sure that the number of samples is a multiple of
the batch size, otherwise some samples are ignored. This
behavior is retained to match the original FID score
implementation.
-- dims : Dimensionality of features returned by Inception
-- device : Device to run calculations
-- num_workers : Number of parallel dataloader workers
Returns:
-- A numpy array of dimension (num images, dims) that contains the
activations of the given tensor when feeding inception with the
query tensor.
"""
model.eval()
if batch_size > len(files):
print(('Warning: batch size is bigger than the data size. '
'Setting batch size to data size'))
batch_size = len(files)
dataset = ImagePathDataset(files, transforms=TF.ToTensor())
dataloader = torch.utils.data.DataLoader(dataset,
batch_size=batch_size,
shuffle=False,
drop_last=False,
num_workers=num_workers)
pred_arr = np.empty((len(files), dims))
start_idx = 0
for batch in tqdm(dataloader):
batch = batch.to(device)
with torch.no_grad():
pred = model(batch)[0]
# If model output is not scalar, apply global spatial average pooling.
# This happens if you choose a dimensionality not equal 2048.
if pred.size(2) != 1 or pred.size(3) != 1:
pred = adaptive_avg_pool2d(pred, output_size=(1, 1))
pred = pred.squeeze(3).squeeze(2).cpu().numpy()
pred_arr[start_idx:start_idx + pred.shape[0]] = pred
start_idx = start_idx + pred.shape[0]
return pred_arr
def calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
"""Numpy implementation of the Frechet Distance.
The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
and X_2 ~ N(mu_2, C_2) is
d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
Stable version by Dougal J. Sutherland.
Params:
-- mu1 : Numpy array containing the activations of a layer of the
inception net (like returned by the function 'get_predictions')
for generated samples.
-- mu2 : The sample mean over activations, precalculated on an
representative data set.
-- sigma1: The covariance matrix over activations for generated samples.
-- sigma2: The covariance matrix over activations, precalculated on an
representative data set.
Returns:
-- : The Frechet Distance.
"""
mu1 = np.atleast_1d(mu1)
mu2 = np.atleast_1d(mu2)
sigma1 = np.atleast_2d(sigma1)
sigma2 = np.atleast_2d(sigma2)
assert mu1.shape == mu2.shape, \
'Training and test mean vectors have different lengths'
assert sigma1.shape == sigma2.shape, \
'Training and test covariances have different dimensions'
diff = mu1 - mu2
# Product might be almost singular
covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
if not np.isfinite(covmean).all():
msg = ('fid calculation produces singular product; '
'adding %s to diagonal of cov estimates') % eps
print(msg)
offset = np.eye(sigma1.shape[0]) * eps
covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))
# Numerical error might give slight imaginary component
if np.iscomplexobj(covmean):
if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
m = np.max(np.abs(covmean.imag))
raise ValueError('Imaginary component {}'.format(m))
covmean = covmean.real
tr_covmean = np.trace(covmean)
return (diff.dot(diff) + np.trace(sigma1)
+ np.trace(sigma2) - 2 * tr_covmean)
def calculate_activation_statistics(files, model, batch_size=50, dims=2048,
device='cpu', num_workers=1):
"""Calculation of the statistics used by the FID.
Params:
-- files : List of image files paths
-- model : Instance of inception model
-- batch_size : The images numpy array is split into batches with
batch size batch_size. A reasonable batch size
depends on the hardware.
-- dims : Dimensionality of features returned by Inception
-- device : Device to run calculations
-- num_workers : Number of parallel dataloader workers
Returns:
-- mu : The mean over samples of the activations of the pool_3 layer of
the inception model.
-- sigma : The covariance matrix of the activations of the pool_3 layer of
the inception model.
"""
act = get_activations(files, model, batch_size, dims, device, num_workers)
mu = np.mean(act, axis=0)
sigma = np.cov(act, rowvar=False)
return mu, sigma
================================================
FILE: lidm/eval/metric_utils.py
================================================
"""
@Author: Haoxi Ran
@Date: 01/03/2024
@Citation: Towards Realistic Scene Generation with LiDAR Diffusion Models
"""
import math
from itertools import repeat
from typing import List, Tuple, Union
import numpy as np
import torch
from . import build_model, VOXEL_SIZE, MODALITY2MODEL, MODAL2BATCHSIZE, DATASET_CONFIG, AGG_TYPE, NUM_SECTORS, \
TYPE2DATASET, DATA_CONFIG
try:
from torchsparse import SparseTensor, PointTensor
from torchsparse.utils.collate import sparse_collate_fn
from .modules.chamfer3D.dist_chamfer_3D import chamfer_3DDist
from .modules.chamfer2D.dist_chamfer_2D import chamfer_2DDist
from .modules.emd.emd_module import emdModule
except:
print(
'To install torchsparse 1.4.0, please refer to https://github.com/mit-han-lab/torchsparse/tree/74099d10a51c71c14318bce63d6421f698b24f24')
def ravel_hash(x: np.ndarray) -> np.ndarray:
assert x.ndim == 2, x.shape
x = x - np.min(x, axis=0)
x = x.astype(np.uint64, copy=False)
xmax = np.max(x, axis=0).astype(np.uint64) + 1
h = np.zeros(x.shape[0], dtype=np.uint64)
for k in range(x.shape[1] - 1):
h += x[:, k]
h *= xmax[k + 1]
h += x[:, -1]
return h
def sparse_quantize(coords, voxel_size: Union[float, Tuple[float, ...]] = 1, *, return_index: bool = False,
return_inverse: bool = False) -> List[np.ndarray]:
"""
Modified based on https://github.com/mit-han-lab/torchsparse/blob/462dea4a701f87a7545afb3616bf2cf53dd404f3/torchsparse/utils/quantize.py
"""
if isinstance(voxel_size, (float, int)):
voxel_size = tuple(repeat(voxel_size, coords.shape[1]))
assert isinstance(voxel_size, tuple) and len(voxel_size) in [2, 3] # support 2D and 3D coordinates only
voxel_size = np.array(voxel_size)
coords = np.floor(coords / voxel_size).astype(np.int32)
_, indices, inverse_indices = np.unique(
ravel_hash(coords), return_index=True, return_inverse=True
)
coords = coords[indices]
outputs = [coords]
if return_index:
outputs += [indices]
if return_inverse:
outputs += [inverse_indices]
return outputs[0] if len(outputs) == 1 else outputs
def pcd2range(pcd, size, fov, depth_range, remission=None, labels=None, **kwargs):
# laser parameters
fov_up = fov[0] / 180.0 * np.pi # field of view up in rad
fov_down = fov[1] / 180.0 * np.pi # field of view down in rad
fov_range = abs(fov_down) + abs(fov_up) # get field of view total in rad
# get depth (distance) of all points
depth = np.linalg.norm(pcd, 2, axis=1)
# mask points out of range
mask = np.logical_and(depth > depth_range[0], depth < depth_range[1])
depth, pcd = depth[mask], pcd[mask]
# get scan components
scan_x, scan_y, scan_z = pcd[:, 0], pcd[:, 1], pcd[:, 2]
# get angles of all points
yaw = -np.arctan2(scan_y, scan_x)
pitch = np.arcsin(scan_z / depth)
# get projections in image coords
proj_x = 0.5 * (yaw / np.pi + 1.0) # in [0.0, 1.0]
proj_y = 1.0 - (pitch + abs(fov_down)) / fov_range # in [0.0, 1.0]
# scale to image size using angular resolution
proj_x *= size[1] # in [0.0, W]
proj_y *= size[0] # in [0.0, H]
# round and clamp for use as index
proj_x = np.maximum(0, np.minimum(size[1] - 1, np.floor(proj_x))).astype(np.int32) # in [0,W-1]
proj_y = np.maximum(0, np.minimum(size[0] - 1, np.floor(proj_y))).astype(np.int32) # in [0,H-1]
# order in decreasing depth
order = np.argsort(depth)[::-1]
proj_x, proj_y = proj_x[order], proj_y[order]
# project depth
depth = depth[order]
proj_range = np.full(size, -1, dtype=np.float32)
proj_range[proj_y, proj_x] = depth
# project point feature
if remission is not None:
remission = remission[mask][order]
proj_feature = np.full(size, -1, dtype=np.float32)
proj_feature[proj_y, proj_x] = remission
elif labels is not None:
labels = labels[mask][order]
proj_feature = np.full(size, 0, dtype=np.float32)
proj_feature[proj_y, proj_x] = labels
else:
proj_feature = None
return proj_range, proj_feature
def range2xyz(range_img, fov, depth_range, depth_scale, log_scale=True, **kwargs):
# laser parameters
size = range_img.shape
fov_up = fov[0] / 180.0 * np.pi # field of view up in rad
fov_down = fov[1] / 180.0 * np.pi # field of view down in rad
fov_range = abs(fov_down) + abs(fov_up) # get field of view total in rad
# inverse transform from depth
if log_scale:
depth = (np.exp2(range_img * depth_scale) - 1)
else:
depth = range_img
scan_x, scan_y = np.meshgrid(np.arange(size[1]), np.arange(size[0]))
scan_x = scan_x.astype(np.float64) / size[1]
scan_y = scan_y.astype(np.float64) / size[0]
yaw = np.pi * (scan_x * 2 - 1)
pitch = (1.0 - scan_y) * fov_range - abs(fov_down)
xyz = -np.ones((3, *size))
xyz[0] = np.cos(yaw) * np.cos(pitch) * depth
xyz[1] = -np.sin(yaw) * np.cos(pitch) * depth
xyz[2] = np.sin(pitch) * depth
# mask out invalid points
mask = np.logical_and(depth > depth_range[0], depth < depth_range[1])
xyz[:, ~mask] = -1
return xyz
def pcd2voxel(pcd):
pcd_voxel = np.round(pcd / VOXEL_SIZE)
pcd_voxel = pcd_voxel - pcd_voxel.min(0, keepdims=1)
feat = np.concatenate((pcd, -np.ones((pcd.shape[0], 1))), axis=1) # -1 for remission placeholder
_, inds, inverse_map = sparse_quantize(pcd_voxel, 1, return_index=True, return_inverse=True)
feat = torch.FloatTensor(feat[inds])
pcd_voxel = torch.LongTensor(pcd_voxel[inds])
lidar = SparseTensor(feat, pcd_voxel)
output = {'lidar': lidar}
return output
def pcd2voxel_full(data_type, *args):
config = DATA_CONFIG[data_type]
x_range, y_range, z_range = config['x'], config['y'], config['z']
vol_shape = (math.ceil((x_range[1] - x_range[0]) / VOXEL_SIZE), math.ceil((y_range[1] - y_range[0]) / VOXEL_SIZE),
math.ceil((z_range[1] - z_range[0]) / VOXEL_SIZE))
min_bound = (math.ceil((x_range[0]) / VOXEL_SIZE), math.ceil((y_range[0]) / VOXEL_SIZE),
math.ceil((z_range[0]) / VOXEL_SIZE))
output = tuple()
for data in args:
volume_list = []
for pcd in data:
# mask out invalid points
mask_x = np.logical_and(pcd[:, 0] > x_range[0], pcd[:, 0] < x_range[1])
mask_y = np.logical_and(pcd[:, 1] > y_range[0], pcd[:, 1] < y_range[1])
mask_z = np.logical_and(pcd[:, 2] > z_range[0], pcd[:, 2] < z_range[1])
mask = mask_x & mask_y & mask_z
pcd = pcd[mask]
# voxelize
pcd_voxel = np.floor(pcd / VOXEL_SIZE)
_, indices, inverse_map = sparse_quantize(pcd_voxel, 1, return_index=True, return_inverse=True)
pcd_voxel = pcd_voxel[indices]
pcd_voxel = (pcd_voxel - min_bound).astype(np.int32)
# 2D bev grid
vol = np.zeros(vol_shape, dtype=np.float32)
vol[pcd_voxel[:, 0], pcd_voxel[:, 1], pcd_voxel[:, 2]] = 1
volume_list.append(vol)
output += (volume_list,)
return output
# def pcd2bev_full(data_type, *args, voxel_size=VOXEL_SIZE):
# config = DATA_CONFIG[data_type]
# x_range, y_range = config['x'], config['y']
# vol_shape = (math.ceil((x_range[1] - x_range[0]) / voxel_size), math.ceil((y_range[1] - y_range[0]) / voxel_size))
# min_bound = (math.ceil((x_range[0]) / voxel_size), math.ceil((y_range[0]) / voxel_size))
#
# output = tuple()
# for data in args:
# volume_list = []
# for pcd in data:
# # mask out invalid points
# mask_x = np.logical_and(pcd[:, 0] > x_range[0], pcd[:, 0] < x_range[1])
# mask_y = np.logical_and(pcd[:, 1] > y_range[0], pcd[:, 1] < y_range[1])
# mask = mask_x & mask_y
# pcd = pcd[mask][:, :2] # keep x,y coord
#
# # voxelize
# pcd_voxel = np.floor(pcd / voxel_size)
# _, indices, inverse_map = sparse_quantize(pcd_voxel, 1, return_index=True, return_inverse=True)
# pcd_voxel = pcd_voxel[indices]
# pcd_voxel = (pcd_voxel - min_bound).astype(np.int32)
#
# # 2D bev grid
# vol = np.zeros(vol_shape, dtype=np.float32)
# vol[pcd_voxel[:, 0], pcd_voxel[:, 1]] = 1
# volume_list.append(vol)
# output += (volume_list,)
# return output
def pcd2bev_sum(data_type, *args, voxel_size=VOXEL_SIZE):
config = DATA_CONFIG[data_type]
x_range, y_range = config['x'], config['y']
vol_shape = (math.ceil((x_range[1] - x_range[0]) / voxel_size), math.ceil((y_range[1] - y_range[0]) / voxel_size))
min_bound = (math.ceil((x_range[0]) / voxel_size), math.ceil((y_range[0]) / voxel_size))
output = tuple()
for data in args:
volume_sum = np.zeros(vol_shape, np.float32)
for pcd in data:
# mask out invalid points
mask_x = np.logical_and(pcd[:, 0] > x_range[0], pcd[:, 0] < x_range[1])
mask_y = np.logical_and(pcd[:, 1] > y_range[0], pcd[:, 1] < y_range[1])
mask = mask_x & mask_y
pcd = pcd[mask][:, :2] # keep x,y coord
# voxelize
pcd_voxel = np.floor(pcd / voxel_size)
_, indices, inverse_map = sparse_quantize(pcd_voxel, 1, return_index=True, return_inverse=True)
pcd_voxel = pcd_voxel[indices]
pcd_voxel = (pcd_voxel - min_bound).astype(np.int32)
# summation
volume_sum[pcd_voxel[:, 0], pcd_voxel[:, 1]] += 1.
output += (volume_sum,)
return output
def pcd2bev_bin(data_type, *args, voxel_size=0.5):
config = DATA_CONFIG[data_type]
x_range, y_range = config['x'], config['y']
vol_shape = (math.ceil((x_range[1] - x_range[0]) / voxel_size), math.ceil((y_range[1] - y_range[0]) / voxel_size))
min_bound = (math.ceil((x_range[0]) / voxel_size), math.ceil((y_range[0]) / voxel_size))
output = tuple()
for data in args:
pcd_list = []
for pcd in data:
# mask out invalid points
mask_x = np.logical_and(pcd[:, 0] > x_range[0], pcd[:, 0] < x_range[1])
mask_y = np.logical_and(pcd[:, 1] > y_range[0], pcd[:, 1] < y_range[1])
mask = mask_x & mask_y
pcd = pcd[mask][:, :2] # keep x,y coord
# voxelize
pcd_voxel = np.floor(pcd / voxel_size)
_, indices, inverse_map = sparse_quantize(pcd_voxel, 1, return_index=True, return_inverse=True)
pcd_voxel = pcd_voxel[indices]
pcd_voxel = ((pcd_voxel - min_bound) / vol_shape).astype(np.float32)
pcd_list.append(pcd_voxel)
output += (pcd_list,)
return output
def bev_sample(data_type, *args, voxel_size=0.5):
config = DATA_CONFIG[data_type]
x_range, y_range = config['x'], config['y']
output = tuple()
for data in args:
pcd_list = []
for pcd in data:
# mask out invalid points
mask_x = np.logical_and(pcd[:, 0] > x_range[0], pcd[:, 0] < x_range[1])
mask_y = np.logical_and(pcd[:, 1] > y_range[0], pcd[:, 1] < y_range[1])
mask = mask_x & mask_y
pcd = pcd[mask][:, :2] # keep x,y coord
# voxelize
pcd_voxel = np.floor(pcd / voxel_size)
_, indices, inverse_map = sparse_quantize(pcd_voxel, 1, return_index=True, return_inverse=True)
pcd = pcd[indices]
pcd_list.append(pcd)
output += (pcd_list,)
return output
def preprocess_pcd(pcd, **kwargs):
depth = np.linalg.norm(pcd, 2, axis=1)
mask = np.logical_and(depth > kwargs['depth_range'][0], depth < kwargs['depth_range'][1])
pcd = pcd[mask]
return pcd
def preprocess_range(pcd, **kwargs):
depth_img = pcd2range(pcd, **kwargs)[0]
xyz_img = range2xyz(depth_img, log_scale=False, **kwargs)
depth_img = depth_img[None]
img = np.vstack([depth_img, xyz_img])
return img
def batch2list(batch_dict, agg_type='depth', **kwargs):
"""
Aggregation Type: Default 'depth', ['all', 'sector', 'depth']
"""
output_list = []
batch_indices = batch_dict['batch_indices']
for b_idx in range(batch_indices.max() + 1):
# avg all
if agg_type == 'all':
logits = batch_dict['logits'][batch_indices == b_idx].mean(0)
# avg on sectors
elif agg_type == 'sector':
logits = batch_dict['logits'][batch_indices == b_idx]
coords = batch_dict['coords'][batch_indices == b_idx].float()
coords = coords - coords.mean(0)
angle = torch.atan2(coords[:, 1], coords[:, 0]) # [-pi, pi]
sector_range = torch.linspace(-np.pi - 1e-4, np.pi + 1e-4, NUM_SECTORS + 1)
logits_list = []
for i in range(NUM_SECTORS):
sector_indices = torch.where((angle >= sector_range[i]) & (angle < sector_range[i + 1]))[0]
sector_logits = logits[sector_indices].mean(0)
sector_logits = torch.nan_to_num(sector_logits, 0.)
logits_list.append(sector_logits)
logits = torch.cat(logits_list) # dim: 768
# avg by depth
elif agg_type == 'depth':
logits = batch_dict['logits'][batch_indices == b_idx]
coords = batch_dict['coords'][batch_indices == b_idx].float()
coords = coords - coords.mean(0)
bev_depth = torch.norm(coords, dim=-1) * VOXEL_SIZE
sector_range = torch.linspace(kwargs['depth_range'][0] + 3, kwargs['depth_range'][1], NUM_SECTORS + 1)
sector_range[0] = 0.
logits_list = []
for i in range(NUM_SECTORS):
sector_indices = torch.where((bev_depth >= sector_range[i]) & (bev_depth < sector_range[i + 1]))[0]
sector_logits = logits[sector_indices].mean(0)
sector_logits = torch.nan_to_num(sector_logits, 0.)
logits_list.append(sector_logits)
logits = torch.cat(logits_list) # dim: 768
else:
raise NotImplementedError
output_list.append(logits.detach().cpu().numpy())
return output_list
def compute_logits(data_type, modality, *args):
assert data_type in ['32', '64']
assert modality in ['range', 'voxel', 'point_voxel']
is_voxel = 'voxel' in modality
dataset_name = TYPE2DATASET[data_type]
dataset_config = DATASET_CONFIG[dataset_name]
bs = MODAL2BATCHSIZE[modality]
model = build_model(dataset_name, MODALITY2MODEL[modality], device='cuda')
output = tuple()
for data in args:
all_logits_list = []
for i in range(math.ceil(len(data) / bs)):
batch = data[i * bs:(i + 1) * bs]
if is_voxel:
batch = [pcd2voxel(preprocess_pcd(pcd, **dataset_config)) for pcd in batch]
batch = sparse_collate_fn(batch)
batch = {k: v.cuda() if isinstance(v, (torch.Tensor, SparseTensor, PointTensor)) else v for k, v in
batch.items()}
with torch.no_grad():
batch_out = model(batch, return_final_logits=True)
batch_out = batch2list(batch_out, AGG_TYPE, **dataset_config)
all_logits_list.extend(batch_out)
else:
batch = [preprocess_range(pcd, **dataset_config) for pcd in batch]
batch = torch.from_numpy(np.stack(batch)).float().cuda()
with torch.no_grad():
batch_out = model(batch, return_final_logits=True, agg_type=AGG_TYPE)
all_logits_list.append(batch_out)
if is_voxel:
all_logits = np.stack(all_logits_list)
else:
all_logits = np.vstack(all_logits_list)
output += (all_logits,)
del model, batch, batch_out
torch.cuda.empty_cache()
return output
def compute_pairwise_cd(x, y, module=None):
if module is None:
module = chamfer_3DDist()
if x.ndim == 2 and y.ndim == 2:
x, y = x[None], y[None]
x, y = torch.from_numpy(x).cuda(), torch.from_numpy(y).cuda()
dist1, dist2, _, _ = module(x, y)
dist = (dist1.mean() + dist2.mean()) / 2
return dist.item()
def compute_pairwise_cd_batch(reference, samples):
ndim = reference.ndim
assert ndim in [2, 3]
module = chamfer_3DDist() if ndim == 3 else chamfer_2DDist()
len_r, len_s = reference.shape[0], [s.shape[0] for s in samples]
max_len = max([len_r] + len_s)
reference = torch.from_numpy(
np.vstack([reference, np.ones((max_len - reference.shape[0], ndim), dtype=np.float32) * 1e6])).cuda()
samples = [np.vstack([s, np.ones((max_len - s.shape[0], ndim), dtype=np.float32) * 1e6]) for s in samples]
samples = torch.from_numpy(np.stack(samples)).cuda()
reference = reference.expand_as(samples)
dist_r, dist_s, _, _ = module(reference, samples)
results = []
for i in range(samples.shape[0]):
dist1, dist2, len1, len2 = dist_r[i], dist_s[i], len_r, len_s[i]
dist = (dist1[:len1].mean() + dist2[:len2].mean()) / 2.
results.append(dist.item())
return results
def compute_pairwise_emd(x, y, module=None):
if module is None:
module = emdModule()
n_points = min(x.shape[0], y.shape[0])
n_points = n_points - n_points % 1024
x, y = x[:n_points], y[:n_points]
if x.ndim == 2 and y.ndim == 2:
x, y = x[None], y[None]
x, y = torch.from_numpy(x).cuda(), torch.from_numpy(y).cuda()
dist, _ = module(x, y, 0.005, 50)
dist = torch.sqrt(dist).mean()
return dist.item()
================================================
FILE: lidm/eval/models/__init__.py
================================================
================================================
FILE: lidm/eval/models/minkowskinet/__init__.py
================================================
================================================
FILE: lidm/eval/models/minkowskinet/model.py
================================================
import torch
import torch.nn as nn
try:
import torchsparse
import torchsparse.nn as spnn
from ..ts import basic_blocks
except ImportError:
raise Exception('Required ts lib. Reference: https://github.com/mit-han-lab/torchsparse/tree/v1.4.0')
class Model(nn.Module):
def __init__(self, config):
super().__init__()
cr = config.model_params.cr
cs = config.model_params.layer_num
cs = [int(cr * x) for x in cs]
self.pres = self.vres = config.model_params.voxel_size
self.num_classes = config.model_params.num_class
self.stem = nn.Sequential(
spnn.Conv3d(config.model_params.input_dims, cs[0], kernel_size=3, stride=1),
spnn.BatchNorm(cs[0]), spnn.ReLU(True),
spnn.Conv3d(cs[0], cs[0], kernel_size=3, stride=1),
spnn.BatchNorm(cs[0]), spnn.ReLU(True))
self.stage1 = nn.Sequential(
basic_blocks.BasicConvolutionBlock(cs[0], cs[0], ks=2, stride=2, dilation=1),
basic_blocks.ResidualBlock(cs[0], cs[1], ks=3, stride=1, dilation=1),
basic_blocks.ResidualBlock(cs[1], cs[1], ks=3, stride=1, dilation=1),
)
self.stage2 = nn.Sequential(
basic_blocks.BasicConvolutionBlock(cs[1], cs[1], ks=2, stride=2, dilation=1),
basic_blocks.ResidualBlock(cs[1], cs[2], ks=3, stride=1, dilation=1),
basic_blocks.ResidualBlock(cs[2], cs[2], ks=3, stride=1, dilation=1),
)
self.stage3 = nn.Sequential(
basic_blocks.BasicConvolutionBlock(cs[2], cs[2], ks=2, stride=2, dilation=1),
basic_blocks.ResidualBlock(cs[2], cs[3], ks=3, stride=1, dilation=1),
basic_blocks.ResidualBlock(cs[3], cs[3], ks=3, stride=1, dilation=1),
)
self.stage4 = nn.Sequential(
basic_blocks.BasicConvolutionBlock(cs[3], cs[3], ks=2, stride=2, dilation=1),
basic_blocks.ResidualBlock(cs[3], cs[4], ks=3, stride=1, dilation=1),
basic_blocks.ResidualBlock(cs[4], cs[4], ks=3, stride=1, dilation=1),
)
self.up1 = nn.ModuleList([
basic_blocks.BasicDeconvolutionBlock(cs[4], cs[5], ks=2, stride=2),
nn.Sequential(
basic_blocks.ResidualBlock(cs[5] + cs[3], cs[5], ks=3, stride=1,
dilation=1),
basic_blocks.ResidualBlock(cs[5], cs[5], ks=3, stride=1, dilation=1),
)
])
self.up2 = nn.ModuleList([
basic_blocks.BasicDeconvolutionBlock(cs[5], cs[6], ks=2, stride=2),
nn.Sequential(
basic_blocks.ResidualBlock(cs[6] + cs[2], cs[6], ks=3, stride=1,
dilation=1),
basic_blocks.ResidualBlock(cs[6], cs[6], ks=3, stride=1, dilation=1),
)
])
self.up3 = nn.ModuleList([
basic_blocks.BasicDeconvolutionBlock(cs[6], cs[7], ks=2, stride=2),
nn.Sequential(
basic_blocks.ResidualBlock(cs[7] + cs[1], cs[7], ks=3, stride=1,
dilation=1),
basic_blocks.ResidualBlock(cs[7], cs[7], ks=3, stride=1, dilation=1),
)
])
self.up4 = nn.ModuleList([
basic_blocks.BasicDeconvolutionBlock(cs[7], cs[8], ks=2, stride=2),
nn.Sequential(
basic_blocks.ResidualBlock(cs[8] + cs[0], cs[8], ks=3, stride=1,
dilation=1),
basic_blocks.ResidualBlock(cs[8], cs[8], ks=3, stride=1, dilation=1),
)
])
self.classifier = nn.Sequential(nn.Linear(cs[8], self.num_classes))
self.weight_initialization()
self.dropout = nn.Dropout(0.3, True)
def weight_initialization(self):
for m in self.modules():
if isinstance(m, nn.BatchNorm1d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
def forward(self, data_dict, return_logits=False, return_final_logits=False):
x = data_dict['lidar']
x.C = x.C.int()
x0 = self.stem(x)
x1 = self.stage1(x0)
x2 = self.stage2(x1)
x3 = self.stage3(x2)
x4 = self.stage4(x3)
if return_logits:
output_dict = dict()
output_dict['logits'] = x4.F
output_dict['batch_indices'] = x4.C[:, -1]
return output_dict
y1 = self.up1[0](x4)
y1 = torchsparse.cat([y1, x3])
y1 = self.up1[1](y1)
y2 = self.up2[0](y1)
y2 = torchsparse.cat([y2, x2])
y2 = self.up2[1](y2)
y3 = self.up3[0](y2)
y3 = torchsparse.cat([y3, x1])
y3 = self.up3[1](y3)
y4 = self.up4[0](y3)
y4 = torchsparse.cat([y4, x0])
y4 = self.up4[1](y4)
if return_final_logits:
output_dict = dict()
output_dict['logits'] = y4.F
output_dict['coords'] = y4.C[:, :3]
output_dict['batch_indices'] = y4.C[:, -1]
return output_dict
output = self.classifier(y4.F)
data_dict['output'] = output.F
return data_dict
================================================
FILE: lidm/eval/models/rangenet/__init__.py
================================================
================================================
FILE: lidm/eval/models/rangenet/model.py
================================================
#!/usr/bin/env python3
# This file is covered by the LICENSE file in the root of this project.
from collections import OrderedDict
import torch
import torch.nn as nn
import torch.nn.functional as F
class BasicBlock(nn.Module):
def __init__(self, inplanes, planes, bn_d=0.1):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes[0], kernel_size=1,
stride=1, padding=0, bias=False)
self.bn1 = nn.BatchNorm2d(planes[0], momentum=bn_d)
self.relu1 = nn.LeakyReLU(0.1)
self.conv2 = nn.Conv2d(planes[0], planes[1], kernel_size=3,
stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes[1], momentum=bn_d)
self.relu2 = nn.LeakyReLU(0.1)
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu1(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu2(out)
out += residual
return out
# ******************************************************************************
# number of layers per model
model_blocks = {
21: [1, 1, 2, 2, 1],
53: [1, 2, 8, 8, 4],
}
class Backbone(nn.Module):
"""
Class for DarknetSeg. Subclasses PyTorch's own "nn" module
"""
def __init__(self, params):
super(Backbone, self).__init__()
self.use_range = params["input_depth"]["range"]
self.use_xyz = params["input_depth"]["xyz"]
self.use_remission = params["input_depth"]["remission"]
self.drop_prob = params["dropout"]
self.bn_d = params["bn_d"]
self.OS = params["OS"]
self.layers = params["extra"]["layers"]
# input depth calc
self.input_depth = 0
self.input_idxs = []
if self.use_range:
self.input_depth += 1
self.input_idxs.append(0)
if self.use_xyz:
self.input_depth += 3
self.input_idxs.extend([1, 2, 3])
if self.use_remission:
self.input_depth += 1
self.input_idxs.append(4)
# stride play
self.strides = [2, 2, 2, 2, 2]
# check current stride
current_os = 1
for s in self.strides:
current_os *= s
# make the new stride
if self.OS > current_os:
print("Can't do OS, ", self.OS,
" because it is bigger than original ", current_os)
else:
# redo strides according to needed stride
for i, stride in enumerate(reversed(self.strides), 0):
if int(current_os) != self.OS:
if stride == 2:
current_os /= 2
self.strides[-1 - i] = 1
if int(current_os) == self.OS:
break
# check that darknet exists
assert self.layers in model_blocks.keys()
# generate layers depending on darknet type
self.blocks = model_blocks[self.layers]
# input layer
self.conv1 = nn.Conv2d(self.input_depth, 32, kernel_size=3,
stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(32, momentum=self.bn_d)
self.relu1 = nn.LeakyReLU(0.1)
# encoder
self.enc1 = self._make_enc_layer(BasicBlock, [32, 64], self.blocks[0],
stride=self.strides[0], bn_d=self.bn_d)
self.enc2 = self._make_enc_layer(BasicBlock, [64, 128], self.blocks[1],
stride=self.strides[1], bn_d=self.bn_d)
self.enc3 = self._make_enc_layer(BasicBlock, [128, 256], self.blocks[2],
stride=self.strides[2], bn_d=self.bn_d)
self.enc4 = self._make_enc_layer(BasicBlock, [256, 512], self.blocks[3],
stride=self.strides[3], bn_d=self.bn_d)
self.enc5 = self._make_enc_layer(BasicBlock, [512, 1024], self.blocks[4],
stride=self.strides[4], bn_d=self.bn_d)
# for a bit of fun
self.dropout = nn.Dropout2d(self.drop_prob)
# last channels
self.last_channels = 1024
# make layer useful function
def _make_enc_layer(self, block, planes, blocks, stride, bn_d=0.1):
layers = []
# downsample
layers.append(("conv", nn.Conv2d(planes[0], planes[1],
kernel_size=3,
stride=[1, stride], dilation=1,
padding=1, bias=False)))
layers.append(("bn", nn.BatchNorm2d(planes[1], momentum=bn_d)))
layers.append(("relu", nn.LeakyReLU(0.1)))
# blocks
inplanes = planes[1]
for i in range(0, blocks):
layers.append(("residual_{}".format(i),
block(inplanes, planes, bn_d)))
return nn.Sequential(OrderedDict(layers))
def run_layer(self, x, layer, skips, os):
y = layer(x)
if y.shape[2] < x.shape[2] or y.shape[3] < x.shape[3]:
skips[os] = x.detach()
os *= 2
x = y
return x, skips, os
def forward(self, x, return_logits=False, return_list=None):
# filter input
x = x[:, self.input_idxs]
# run cnn
# store for skip connections
skips = {}
out_dict = {}
os = 1
# first layer
x, skips, os = self.run_layer(x, self.conv1, skips, os)
x, skips, os = self.run_layer(x, self.bn1, skips, os)
x, skips, os = self.run_layer(x, self.relu1, skips, os)
if return_list and 'enc_0' in return_list:
out_dict['enc_0'] = x.detach().cpu() # 32, 64, 1024
# all encoder blocks with intermediate dropouts
x, skips, os = self.run_layer(x, self.enc1, skips, os)
if return_list and 'enc_1' in return_list:
out_dict['enc_1'] = x.detach().cpu() # 64, 64, 512
x, skips, os = self.run_layer(x, self.dropout, skips, os)
x, skips, os = self.run_layer(x, self.enc2, skips, os)
if return_list and 'enc_2' in return_list:
out_dict['enc_2'] = x.detach().cpu() # 128, 64, 256
x, skips, os = self.run_layer(x, self.dropout, skips, os)
x, skips, os = self.run_layer(x, self.enc3, skips, os)
if return_list and 'enc_3' in return_list:
out_dict['enc_3'] = x.detach().cpu() # 256, 64, 128
x, skips, os = self.run_layer(x, self.dropout, skips, os)
x, skips, os = self.run_layer(x, self.enc4, skips, os)
if return_list and 'enc_4' in return_list:
out_dict['enc_4'] = x.detach().cpu() # 512, 64, 64
x, skips, os = self.run_layer(x, self.dropout, skips, os)
x, skips, os = self.run_layer(x, self.enc5, skips, os)
if return_list and 'enc_5' in return_list:
out_dict['enc_5'] = x.detach().cpu() # 1024, 64, 32
if return_logits:
return x
x, skips, os = self.run_layer(x, self.dropout, skips, os)
if return_list is not None:
return x, skips, out_dict
return x, skips
def get_last_depth(self):
return self.last_channels
def get_input_depth(self):
return self.input_depth
class Decoder(nn.Module):
"""
Class for DarknetSeg. Subclasses PyTorch's own "nn" module
"""
def __init__(self, params, OS=32, feature_depth=1024):
super(Decoder, self).__init__()
self.backbone_OS = OS
self.backbone_feature_depth = feature_depth
self.drop_prob = params["dropout"]
self.bn_d = params["bn_d"]
self.index = 0
# stride play
self.strides = [2, 2, 2, 2, 2]
# check current stride
current_os = 1
for s in self.strides:
current_os *= s
# redo strides according to needed stride
for i, stride in enumerate(self.strides):
if int(current_os) != self.backbone_OS:
if stride == 2:
current_os /= 2
self.strides[i] = 1
if int(current_os) == self.backbone_OS:
break
# decoder
self.dec5 = self._make_dec_layer(BasicBlock,
[self.backbone_feature_depth, 512],
bn_d=self.bn_d,
stride=self.strides[0])
self.dec4 = self._make_dec_layer(BasicBlock, [512, 256], bn_d=self.bn_d,
stride=self.strides[1])
self.dec3 = self._make_dec_layer(BasicBlock, [256, 128], bn_d=self.bn_d,
stride=self.strides[2])
self.dec2 = self._make_dec_layer(BasicBlock, [128, 64], bn_d=self.bn_d,
stride=self.strides[3])
self.dec1 = self._make_dec_layer(BasicBlock, [64, 32], bn_d=self.bn_d,
stride=self.strides[4])
# layer list to execute with skips
self.layers = [self.dec5, self.dec4, self.dec3, self.dec2, self.dec1]
# for a bit of fun
self.dropout = nn.Dropout2d(self.drop_prob)
# last channels
self.last_channels = 32
def _make_dec_layer(self, block, planes, bn_d=0.1, stride=2):
layers = []
# downsample
if stride == 2:
layers.append(("upconv", nn.ConvTranspose2d(planes[0], planes[1],
kernel_size=[1, 4], stride=[1, 2],
padding=[0, 1])))
else:
layers.append(("conv", nn.Conv2d(planes[0], planes[1],
kernel_size=3, padding=1)))
layers.append(("bn", nn.BatchNorm2d(planes[1], momentum=bn_d)))
layers.append(("relu", nn.LeakyReLU(0.1)))
# blocks
layers.append(("residual", block(planes[1], planes, bn_d)))
return nn.Sequential(OrderedDict(layers))
def run_layer(self, x, layer, skips, os):
feats = layer(x) # up
if feats.shape[-1] > x.shape[-1]:
os //= 2 # match skip
feats = feats + skips[os].detach() # add skip
x = feats
return x, skips, os
def forward(self, x, skips, return_logits=False, return_list=None):
os = self.backbone_OS
out_dict = {}
# run layers
x, skips, os = self.run_layer(x, self.dec5, skips, os)
if return_list and 'dec_4' in return_list:
out_dict['dec_4'] = x.detach().cpu() # 512, 64, 64
x, skips, os = self.run_layer(x, self.dec4, skips, os)
if return_list and 'dec_3' in return_list:
out_dict['dec_3'] = x.detach().cpu() # 256, 64, 128
x, skips, os = self.run_layer(x, self.dec3, skips, os)
if return_list and 'dec_2' in return_list:
out_dict['dec_2'] = x.detach().cpu() # 128, 64, 256
x, skips, os = self.run_layer(x, self.dec2, skips, os)
if return_list and 'dec_1' in return_list:
out_dict['dec_1'] = x.detach().cpu() # 64, 64, 512
x, skips, os = self.run_layer(x, self.dec1, skips, os)
if return_list and 'dec_0' in return_list:
out_dict['dec_0'] = x.detach().cpu() # 32, 64, 1024
logits = torch.clone(x).detach()
x = self.dropout(x)
if return_logits:
return x, logits
if return_list is not None:
return out_dict
return x
def get_last_depth(self):
return self.last_channels
class Model(nn.Module):
def __init__(self, config):
super().__init__()
self.config = config
self.backbone = Backbone(params=self.config["backbone"])
self.decoder = Decoder(params=self.config["decoder"], OS=self.config["backbone"]["OS"],
feature_depth=self.backbone.get_last_depth())
def load_pretrained_weights(self, path):
w_dict = torch.load(path + "/backbone",
map_location=lambda storage, loc: storage)
self.backbone.load_state_dict(w_dict, strict=True)
w_dict = torch.load(path + "/segmentation_decoder",
map_location=lambda storage, loc: storage)
self.decoder.load_state_dict(w_dict, strict=True)
def forward(self, x, return_logits=False, return_final_logits=False, return_list=None, agg_type='depth'):
if return_logits:
logits = self.backbone(x, return_logits)
logits = F.adaptive_avg_pool2d(logits, (1, 1)).squeeze()
logits = torch.clone(logits).detach().cpu().numpy()
return logits
elif return_list is not None:
x, skips, enc_dict = self.backbone(x, return_list=return_list)
dec_dict = self.decoder(x, skips, return_list=return_list)
out_dict = {**enc_dict, **dec_dict}
return out_dict
elif return_final_logits:
assert agg_type in ['all', 'sector', 'depth']
y, skips = self.backbone(x)
y, logits = self.decoder(y, skips, True)
B, C, H, W = logits.shape
N = 16
# avg all
if agg_type == 'all':
logits = logits.mean([2, 3])
# avg in patch
elif agg_type == 'sector':
logits = logits.view(B, C, H, N, W // N).mean([2, 4]).reshape(B, -1)
# avg in row
elif agg_type == 'depth':
logits = logits.view(B, C, N, H // N, W).mean([3, 4]).reshape(B, -1)
logits = torch.clone(logits).detach().cpu().numpy()
return logits
else:
y, skips = self.backbone(x)
y = self.decoder(y, skips, False)
return y
================================================
FILE: lidm/eval/models/spvcnn/__init__.py
================================================
================================================
FILE: lidm/eval/models/spvcnn/model.py
================================================
import torch.nn as nn
try:
import torchsparse
import torchsparse.nn as spnn
from torchsparse import PointTensor
from ..ts.utils import initial_voxelize, point_to_voxel, voxel_to_point
from ..ts import basic_blocks
except ImportError:
raise Exception('Required torchsparse lib. Reference: https://github.com/mit-han-lab/torchsparse/tree/v1.4.0')
class Model(nn.Module):
def __init__(self, config):
super().__init__()
cr = config.model_params.cr
cs = config.model_params.layer_num
cs = [int(cr * x) for x in cs]
self.pres = self.vres = config.model_params.voxel_size
self.num_classes = config.model_params.num_class
self.stem = nn.Sequential(
spnn.Conv3d(config.model_params.input_dims, cs[0], kernel_size=3, stride=1),
spnn.BatchNorm(cs[0]), spnn.ReLU(True),
spnn.Conv3d(cs[0], cs[0], kernel_size=3, stride=1),
spnn.BatchNorm(cs[0]), spnn.ReLU(True))
self.stage1 = nn.Sequential(
basic_blocks.BasicConvolutionBlock(cs[0], cs[0], ks=2, stride=2, dilation=1),
basic_blocks.ResidualBlock(cs[0], cs[1], ks=3, stride=1, dilation=1),
basic_blocks.ResidualBlock(cs[1], cs[1], ks=3, stride=1, dilation=1),
)
self.stage2 = nn.Sequential(
basic_blocks.BasicConvolutionBlock(cs[1], cs[1], ks=2, stride=2, dilation=1),
basic_blocks.ResidualBlock(cs[1], cs[2], ks=3, stride=1, dilation=1),
basic_blocks.ResidualBlock(cs[2], cs[2], ks=3, stride=1, dilation=1),
)
self.stage3 = nn.Sequential(
basic_blocks.BasicConvolutionBlock(cs[2], cs[2], ks=2, stride=2, dilation=1),
basic_blocks.ResidualBlock(cs[2], cs[3], ks=3, stride=1, dilation=1),
basic_blocks.ResidualBlock(cs[3], cs[3], ks=3, stride=1, dilation=1),
)
self.stage4 = nn.Sequential(
basic_blocks.BasicConvolutionBlock(cs[3], cs[3], ks=2, stride=2, dilation=1),
basic_blocks.ResidualBlock(cs[3], cs[4], ks=3, stride=1, dilation=1),
basic_blocks.ResidualBlock(cs[4], cs[4], ks=3, stride=1, dilation=1),
)
self.up1 = nn.ModuleList([
basic_blocks.BasicDeconvolutionBlock(cs[4], cs[5], ks=2, stride=2),
nn.Sequential(
basic_blocks.ResidualBlock(cs[5] + cs[3], cs[5], ks=3, stride=1,
dilation=1),
basic_blocks.ResidualBlock(cs[5], cs[5], ks=3, stride=1, dilation=1),
)
])
self.up2 = nn.ModuleList([
basic_blocks.BasicDeconvolutionBlock(cs[5], cs[6], ks=2, stride=2),
nn.Sequential(
basic_blocks.ResidualBlock(cs[6] + cs[2], cs[6], ks=3, stride=1,
dilation=1),
basic_blocks.ResidualBlock(cs[6], cs[6], ks=3, stride=1, dilation=1),
)
])
self.up3 = nn.ModuleList([
basic_blocks.BasicDeconvolutionBlock(cs[6], cs[7], ks=2, stride=2),
nn.Sequential(
basic_blocks.ResidualBlock(cs[7] + cs[1], cs[7], ks=3, stride=1,
dilation=1),
basic_blocks.ResidualBlock(cs[7], cs[7], ks=3, stride=1, dilation=1),
)
])
self.up4 = nn.ModuleList([
basic_blocks.BasicDeconvolutionBlock(cs[7], cs[8], ks=2, stride=2),
nn.Sequential(
basic_blocks.ResidualBlock(cs[8] + cs[0], cs[8], ks=3, stride=1,
dilation=1),
basic_blocks.ResidualBlock(cs[8], cs[8], ks=3, stride=1, dilation=1),
)
])
self.classifier = nn.Sequential(nn.Linear(cs[8], self.num_classes))
self.point_transforms = nn.ModuleList([
nn.Sequential(
nn.Linear(cs[0], cs[4]),
nn.BatchNorm1d(cs[4]),
nn.ReLU(True),
),
nn.Sequential(
nn.Linear(cs[4], cs[6]),
nn.BatchNorm1d(cs[6]),
nn.ReLU(True),
),
nn.Sequential(
nn.Linear(cs[6], cs[8]),
nn.BatchNorm1d(cs[8]),
nn.ReLU(True),
)
])
self.weight_initialization()
self.dropout = nn.Dropout(0.3, True)
def weight_initialization(self):
for m in self.modules():
if isinstance(m, nn.BatchNorm1d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
def forward(self, data_dict, return_logits=False, return_final_logits=False):
x = data_dict['lidar']
# x: SparseTensor z: PointTensor
z = PointTensor(x.F, x.C.float())
x0 = initial_voxelize(z, self.pres, self.vres)
x0 = self.stem(x0)
z0 = voxel_to_point(x0, z, nearest=False)
z0.F = z0.F
x1 = point_to_voxel(x0, z0)
x1 = self.stage1(x1)
x2 = self.stage2(x1)
x3 = self.stage3(x2)
x4 = self.stage4(x3)
z1 = voxel_to_point(x4, z0)
z1.F = z1.F + self.point_transforms[0](z0.F)
y1 = point_to_voxel(x4, z1)
if return_logits:
output_dict = dict()
output_dict['logits'] = y1.F
output_dict['batch_indices'] = y1.C[:, -1]
return output_dict
y1.F = self.dropout(y1.F)
y1 = self.up1[0](y1)
y1 = torchsparse.cat([y1, x3])
y1 = self.up1[1](y1)
y2 = self.up2[0](y1)
y2 = torchsparse.cat([y2, x2])
y2 = self.up2[1](y2)
z2 = voxel_to_point(y2, z1)
z2.F = z2.F + self.point_transforms[1](z1.F)
y3 = point_to_voxel(y2, z2)
y3.F = self.dropout(y3.F)
y3 = self.up3[0](y3)
y3 = torchsparse.cat([y3, x1])
y3 = self.up3[1](y3)
y4 = self.up4[0](y3)
y4 = torchsparse.cat([y4, x0])
y4 = self.up4[1](y4)
z3 = voxel_to_point(y4, z2)
z3.F = z3.F + self.point_transforms[2](z2.F)
if return_final_logits:
output_dict = dict()
output_dict['logits'] = z3.F
output_dict['coords'] = z3.C[:, :3]
output_dict['batch_indices'] = z3.C[:, -1].long()
return output_dict
# output = self.classifier(z3.F)
data_dict['logits'] = z3.F
return data_dict
================================================
FILE: lidm/eval/models/ts/__init__.py
================================================
================================================
FILE: lidm/eval/models/ts/basic_blocks.py
================================================
#!/usr/bin/env python
# encoding: utf-8
'''
@author: Xu Yan
@file: basic_blocks.py
@time: 2021/4/14 22:53
'''
import torch.nn as nn
try:
import torchsparse.nn as spnn
except:
print('To install torchsparse 1.4.0, please refer to https://github.com/mit-han-lab/torchsparse/tree/74099d10a51c71c14318bce63d6421f698b24f24')
class BasicConvolutionBlock(nn.Module):
def __init__(self, inc, outc, ks=3, stride=1, dilation=1):
super().__init__()
self.net = nn.Sequential(
spnn.Conv3d(
inc,
outc,
kernel_size=ks,
dilation=dilation,
stride=stride), spnn.BatchNorm(outc),
spnn.ReLU(True))
def forward(self, x):
out = self.net(x)
return out
class BasicDeconvolutionBlock(nn.Module):
def __init__(self, inc, outc, ks=3, stride=1):
super().__init__()
self.net = nn.Sequential(
spnn.Conv3d(
inc,
outc,
kernel_size=ks,
stride=stride,
transposed=True),
spnn.BatchNorm(outc),
spnn.ReLU(True))
def forward(self, x):
return self.net(x)
class ResidualBlock(nn.Module):
def __init__(self, inc, outc, ks=3, stride=1, dilation=1):
super().__init__()
self.net = nn.Sequential(
spnn.Conv3d(
inc,
outc,
kernel_size=ks,
dilation=dilation,
stride=stride), spnn.BatchNorm(outc),
spnn.ReLU(True),
spnn.Conv3d(
outc,
outc,
kernel_size=ks,
dilation=dilation,
stride=1),
spnn.BatchNorm(outc))
self.downsample = nn.Sequential() if (inc == outc and stride == 1) else \
nn.Sequential(
spnn.Conv3d(inc, outc, kernel_size=1, dilation=1, stride=stride),
spnn.BatchNorm(outc)
)
self.ReLU = spnn.ReLU(True)
def forward(self, x):
out = self.ReLU(self.net(x) + self.downsample(x))
return out
================================================
FILE: lidm/eval/models/ts/utils.py
================================================
import torch
try:
import torchsparse.nn.functional as F
from torchsparse import PointTensor, SparseTensor
from torchsparse.nn.utils import get_kernel_offsets
except:
print('To install torchsparse 1.4.0, please refer to https://github.com/mit-han-lab/torchsparse/tree/74099d10a51c71c14318bce63d6421f698b24f24')
__all__ = ['initial_voxelize', 'point_to_voxel', 'voxel_to_point']
# z: PointTensor
# return: SparseTensor
def initial_voxelize(z, init_res, after_res):
new_float_coord = torch.cat([(z.C[:, :3] * init_res) / after_res, z.C[:, -1].view(-1, 1)], 1)
pc_hash = F.sphash(torch.floor(new_float_coord).int())
sparse_hash = torch.unique(pc_hash)
idx_query = F.sphashquery(pc_hash, sparse_hash)
counts = F.spcount(idx_query.int(), len(sparse_hash))
inserted_coords = F.spvoxelize(torch.floor(new_float_coord), idx_query, counts)
inserted_coords = torch.round(inserted_coords).int()
inserted_feat = F.spvoxelize(z.F, idx_query, counts)
new_tensor = SparseTensor(inserted_feat, inserted_coords, 1)
new_tensor.cmaps.setdefault(new_tensor.stride, new_tensor.coords)
z.additional_features['idx_query'][1] = idx_query
z.additional_features['counts'][1] = counts
z.C = new_float_coord
return new_tensor
# x: SparseTensor, z: PointTensor
# return: SparseTensor
def point_to_voxel(x, z):
if z.additional_features is None or \
z.additional_features.get('idx_query') is None or \
z.additional_features['idx_query'].get(x.s) is None:
pc_hash = F.sphash(
torch.cat([torch.floor(z.C[:, :3] / x.s[0]).int() * x.s[0], z.C[:, -1].int().view(-1, 1)], 1))
sparse_hash = F.sphash(x.C)
idx_query = F.sphashquery(pc_hash, sparse_hash)
counts = F.spcount(idx_query.int(), x.C.shape[0])
z.additional_features['idx_query'][x.s] = idx_query
z.additional_features['counts'][x.s] = counts
else:
idx_query = z.additional_features['idx_query'][x.s]
counts = z.additional_features['counts'][x.s]
inserted_feat = F.spvoxelize(z.F, idx_query, counts)
new_tensor = SparseTensor(inserted_feat, x.C, x.s)
new_tensor.cmaps = x.cmaps
new_tensor.kmaps = x.kmaps
return new_tensor
# x: SparseTensor, z: PointTensor
# return: PointTensor
def voxel_to_point(x, z, nearest=False):
if z.idx_query is None or z.weights is None or z.idx_query.get(x.s) is None or z.weights.get(x.s) is None:
off = get_kernel_offsets(2, x.s, 1, device=z.F.device)
old_hash = F.sphash(
torch.cat([
torch.floor(z.C[:, :3] / x.s[0]).int() * x.s[0],
z.C[:, -1].int().view(-1, 1)], 1), off)
pc_hash = F.sphash(x.C.to(z.F.device))
idx_query = F.sphashquery(old_hash, pc_hash)
weights = F.calc_ti_weights(z.C, idx_query, scale=x.s[0]).transpose(0, 1).contiguous()
idx_query = idx_query.transpose(0, 1).contiguous()
if nearest:
weights[:, 1:] = 0.
idx_query[:, 1:] = -1
new_feat = F.spdevoxelize(x.F, idx_query, weights)
new_tensor = PointTensor(new_feat, z.C, idx_query=z.idx_query, weights=z.weights)
new_tensor.additional_features = z.additional_features
new_tensor.idx_query[x.s] = idx_query
new_tensor.weights[x.s] = weights
z.idx_query[x.s] = idx_query
z.weights[x.s] = weights
else:
new_feat = F.spdevoxelize(x.F, z.idx_query.get(x.s), z.weights.get(x.s))
new_tensor = PointTensor(new_feat, z.C, idx_query=z.idx_query, weights=z.weights)
new_tensor.additional_features = z.additional_features
return new_tensor
================================================
FILE: lidm/eval/modules/__init__.py
================================================
================================================
FILE: lidm/eval/modules/chamfer2D/__init__.py
================================================
================================================
FILE: lidm/eval/modules/chamfer2D/chamfer2D.cu
================================================
#include <stdio.h>
#include <ATen/ATen.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <vector>
__global__ void NmDistanceKernel(int b,int n,const float * xyz,int m,const float * xyz2,float * result,int * result_i){
const int batch=512;
__shared__ float buf[batch*2];
for (int i=blockIdx.x;i<b;i+=gridDim.x){
for (int k2=0;k2<m;k2+=batch){
int end_k=min(m,k2+batch)-k2;
for (int j=threadIdx.x;j<end_k*2;j+=blockDim.x){
buf[j]=xyz2[(i*m+k2)*2+j];
}
__syncthreads();
for (int j=threadIdx.x+blockIdx.y*blockDim.x;j<n;j+=blockDim.x*gridDim.y){
float x1=xyz[(i*n+j)*2+0];
float y1=xyz[(i*n+j)*2+1];
int best_i=0;
float best=0;
int end_ka=end_k-(end_k&2);
if (end_ka==batch){
for (int k=0;k<batch;k+=4){
{
float x2=buf[k*2+0]-x1;
float y2=buf[k*2+1]-y1;
float d=x2*x2+y2*y2;
if (k==0 || d<best){
best=d;
best_i=k+k2;
}
}
{
float x2=buf[k*2+2]-x1;
float y2=buf[k*2+3]-y1;
float d=x2*x2+y2*y2;
if (d<best){
best=d;
best_i=k+k2+1;
}
}
{
float x2=buf[k*2+4]-x1;
float y2=buf[k*2+5]-y1;
float d=x2*x2+y2*y2;
if (d<best){
best=d;
best_i=k+k2+2;
}
}
{
float x2=buf[k*2+6]-x1;
float y2=buf[k*2+7]-y1;
float d=x2*x2+y2*y2;
if (d<best){
best=d;
best_i=k+k2+3;
}
}
}
}else{
for (int k=0;k<end_ka;k+=4){
{
float x2=buf[k*2+0]-x1;
float y2=buf[k*2+1]-y1;
float d=x2*x2+y2*y2;
if (k==0 || d<best){
best=d;
best_i=k+k2;
}
}
{
float x2=buf[k*2+2]-x1;
float y2=buf[k*2+3]-y1;
float d=x2*x2+y2*y2;
if (d<best){
best=d;
best_i=k+k2+1;
}
}
{
float x2=buf[k*2+4]-x1;
float y2=buf[k*2+5]-y1;
float d=x2*x2+y2*y2;
if (d<best){
best=d;
best_i=k+k2+2;
}
}
{
float x2=buf[k*2+6]-x1;
float y2=buf[k*2+7]-y1;
float d=x2*x2+y2*y2;
if (d<best){
best=d;
best_i=k+k2+3;
}
}
}
}
for (int k=end_ka;k<end_k;k++){
float x2=buf[k*2+0]-x1;
float y2=buf[k*2+1]-y1;
float d=x2*x2+y2*y2;
if (k==0 || d<best){
best=d;
best_i=k+k2;
}
}
if (k2==0 || result[(i*n+j)]>best){
result[(i*n+j)]=best;
result_i[(i*n+j)]=best_i;
}
}
__syncthreads();
}
}
}
// int chamfer_cuda_forward(int b,int n,const float * xyz,int m,const float * xyz2,float * result,int * result_i,float * result2,int * result2_i, cudaStream_t stream){
int chamfer_cuda_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist1, at::Tensor dist2, at::Tensor idx1, at::Tensor idx2){
const auto batch_size = xyz1.size(0);
const auto n = xyz1.size(1); //num_points point cloud A
const auto m = xyz2.size(1); //num_points point cloud B
NmDistanceKernel<<<dim3(32,16,1),512>>>(batch_size, n, xyz1.data<float>(), m, xyz2.data<float>(), dist1.data<float>(), idx1.data<int>());
NmDistanceKernel<<<dim3(32,16,1),512>>>(batch_size, m, xyz2.data<float>(), n, xyz1.data<float>(), dist2.data<float>(), idx2.data<int>());
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("error in nnd updateOutput: %s\n", cudaGetErrorString(err));
//THError("aborting");
return 0;
}
return 1;
}
__global__ void NmDistanceGradKernel(int b,int n,const float * xyz1,int m,const float * xyz2,const float * grad_dist1,const int * idx1,float * grad_xyz1,float * grad_xyz2){
for (int i=blockIdx.x;i<b;i+=gridDim.x){
for (int j=threadIdx.x+blockIdx.y*blockDim.x;j<n;j+=blockDim.x*gridDim.y){
float x1=xyz1[(i*n+j)*2+0];
float y1=xyz1[(i*n+j)*2+1];
int j2=idx1[i*n+j];
float x2=xyz2[(i*m+j2)*2+0];
float y2=xyz2[(i*m+j2)*2+1];
float g=grad_dist1[i*n+j]*2;
atomicAdd(&(grad_xyz1[(i*n+j)*2+0]),g*(x1-x2));
atomicAdd(&(grad_xyz1[(i*n+j)*2+1]),g*(y1-y2));
atomicAdd(&(grad_xyz2[(i*m+j2)*2+0]),-(g*(x1-x2)));
atomicAdd(&(grad_xyz2[(i*m+j2)*2+1]),-(g*(y1-y2)));
}
}
}
// int chamfer_cuda_backward(int b,int n,const float * xyz1,int m,const float * xyz2,const float * grad_dist1,const int * idx1,const float * grad_dist2,const int * idx2,float * grad_xyz1,float * grad_xyz2, cudaStream_t stream){
int chamfer_cuda_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz1, at::Tensor gradxyz2, at::Tensor graddist1, at::Tensor graddist2, at::Tensor idx1, at::Tensor idx2){
// cudaMemset(grad_xyz1,0,b*n*3*4);
// cudaMemset(grad_xyz2,0,b*m*3*4);
const auto batch_size = xyz1.size(0);
const auto n = xyz1.size(1); //num_points point cloud A
const auto m = xyz2.size(1); //num_points point cloud B
NmDistanceGradKernel<<<dim3(1,16,1),256>>>(batch_size,n,xyz1.data<float>(),m,xyz2.data<float>(),graddist1.data<float>(),idx1.data<int>(),gradxyz1.data<float>(),gradxyz2.data<float>());
NmDistanceGradKernel<<<dim3(1,16,1),256>>>(batch_size,m,xyz2.data<float>(),n,xyz1.data<float>(),graddist2.data<float>(),idx2.data<int>(),gradxyz2.data<float>(),gradxyz1.data<float>());
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("error in nnd get grad: %s\n", cudaGetErrorString(err));
//THError("aborting");
return 0;
}
return 1;
}
================================================
FILE: lidm/eval/modules/chamfer2D/chamfer_cuda.cpp
================================================
#include <torch/torch.h>
#include <vector>
///TMP
//#include "common.h"
/// NOT TMP
int chamfer_cuda_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist1, at::Tensor dist2, at::Tensor idx1, at::Tensor idx2);
int chamfer_cuda_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz1, at::Tensor gradxyz2, at::Tensor graddist1, at::Tensor graddist2, at::Tensor idx1, at::Tensor idx2);
int chamfer_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist1, at::Tensor dist2, at::Tensor idx1, at::Tensor idx2) {
return chamfer_cuda_forward(xyz1, xyz2, dist1, dist2, idx1, idx2);
}
int chamfer_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz1, at::Tensor gradxyz2, at::Tensor graddist1,
at::Tensor graddist2, at::Tensor idx1, at::Tensor idx2) {
return chamfer_cuda_backward(xyz1, xyz2, gradxyz1, gradxyz2, graddist1, graddist2, idx1, idx2);
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("forward", &chamfer_forward, "chamfer forward (CUDA)");
m.def("backward", &chamfer_backward, "chamfer backward (CUDA)");
}
================================================
FILE: lidm/eval/modules/chamfer2D/dist_chamfer_2D.py
================================================
from torch import nn
from torch.autograd import Function
import torch
import importlib
import os
chamfer_found = importlib.find_loader("chamfer_2D") is not None
if not chamfer_found:
## Cool trick from https://github.com/chrdiller
print("Jitting Chamfer 2D")
cur_path = os.path.dirname(os.path.abspath(__file__))
build_path = cur_path.replace('chamfer2D', 'tmp')
os.makedirs(build_path, exist_ok=True)
from torch.utils.cpp_extension import load
chamfer_2D = load(name="chamfer_2D",
sources=[
"/".join(os.path.abspath(__file__).split('/')[:-1] + ["chamfer_cuda.cpp"]),
"/".join(os.path.abspath(__file__).split('/')[:-1] + ["chamfer2D.cu"]),
], build_directory=build_path)
print("Loaded JIT 2D CUDA chamfer distance")
else:
import chamfer_2D
print("Loaded compiled 2D CUDA chamfer distance")
# Chamfer's distance module @thibaultgroueix
# GPU tensors only
class chamfer_2DFunction(Function):
@staticmethod
def forward(ctx, xyz1, xyz2):
batchsize, n, dim = xyz1.size()
assert dim == 2, "Wrong last dimension for the chamfer distance 's input! Check with .size()"
_, m, dim = xyz2.size()
assert dim == 2, "Wrong last dimension for the chamfer distance 's input! Check with .size()"
device = xyz1.device
device = xyz1.device
dist1 = torch.zeros(batchsize, n)
dist2 = torch.zeros(batchsize, m)
idx1 = torch.zeros(batchsize, n).type(torch.IntTensor)
idx2 = torch.zeros(batchsize, m).type(torch.IntTensor)
dist1 = dist1.to(device)
dist2 = dist2.to(device)
idx1 = idx1.to(device)
idx2 = idx2.to(device)
torch.cuda.set_device(device)
chamfer_2D.forward(xyz1, xyz2, dist1, dist2, idx1, idx2)
ctx.save_for_backward(xyz1, xyz2, idx1, idx2)
return dist1, dist2, idx1, idx2
@staticmethod
def backward(ctx, graddist1, graddist2, gradidx1, gradidx2):
xyz1, xyz2, idx1, idx2 = ctx.saved_tensors
graddist1 = graddist1.contiguous()
graddist2 = graddist2.contiguous()
device = graddist1.device
gradxyz1 = torch.zeros(xyz1.size())
gradxyz2 = torch.zeros(xyz2.size())
gradxyz1 = gradxyz1.to(device)
gradxyz2 = gradxyz2.to(device)
chamfer_2D.backward(
xyz1, xyz2, gradxyz1, gradxyz2, graddist1, graddist2, idx1, idx2
)
return gradxyz1, gradxyz2
class chamfer_2DDist(nn.Module):
def __init__(self):
super(chamfer_2DDist, self).__init__()
def forward(self, input1, input2):
input1 = input1.contiguous()
input2 = input2.contiguous()
return chamfer_2DFunction.apply(input1, input2)
================================================
FILE: lidm/eval/modules/chamfer2D/setup.py
================================================
from setuptools import setup
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
setup(
name='chamfer_2D',
ext_modules=[
CUDAExtension('chamfer_2D', [
"/".join(__file__.split('/')[:-1] + ['chamfer_cuda.cpp']),
"/".join(__file__.split('/')[:-1] + ['chamfer2D.cu']),
]),
],
cmdclass={
'build_ext': BuildExtension
})
================================================
FILE: lidm/eval/modules/chamfer3D/__init__.py
================================================
================================================
FILE: lidm/eval/modules/chamfer3D/chamfer3D.cu
================================================
#include <stdio.h>
#include <ATen/ATen.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <vector>
__global__ void NmDistanceKernel(int b,int n,const float * xyz,int m,const float * xyz2,float * result,int * result_i){
const int batch=512;
__shared__ float buf[batch*3];
for (int i=blockIdx.x;i<b;i+=gridDim.x){
for (int k2=0;k2<m;k2+=batch){
int end_k=min(m,k2+batch)-k2;
for (int j=threadIdx.x;j<end_k*3;j+=blockDim.x){
buf[j]=xyz2[(i*m+k2)*3+j];
}
__syncthreads();
for (int j=threadIdx.x+blockIdx.y*blockDim.x;j<n;j+=blockDim.x*gridDim.y){
float x1=xyz[(i*n+j)*3+0];
float y1=xyz[(i*n+j)*3+1];
float z1=xyz[(i*n+j)*3+2];
int best_i=0;
float best=0;
int end_ka=end_k-(end_k&3);
if (end_ka==batch){
for (int k=0;k<batch;k+=4){
{
float x2=buf[k*3+0]-x1;
float y2=buf[k*3+1]-y1;
float z2=buf[k*3+2]-z1;
float d=x2*x2+y2*y2+z2*z2;
if (k==0 || d<best){
best=d;
best_i=k+k2;
}
}
{
float x2=buf[k*3+3]-x1;
float y2=buf[k*3+4]-y1;
float z2=buf[k*3+5]-z1;
float d=x2*x2+y2*y2+z2*z2;
if (d<best){
best=d;
best_i=k+k2+1;
}
}
{
float x2=buf[k*3+6]-x1;
float y2=buf[k*3+7]-y1;
float z2=buf[k*3+8]-z1;
float d=x2*x2+y2*y2+z2*z2;
if (d<best){
best=d;
best_i=k+k2+2;
}
}
{
float x2=buf[k*3+9]-x1;
float y2=buf[k*3+10]-y1;
float z2=buf[k*3+11]-z1;
float d=x2*x2+y2*y2+z2*z2;
if (d<best){
best=d;
best_i=k+k2+3;
}
}
}
}else{
for (int k=0;k<end_ka;k+=4){
{
float x2=buf[k*3+0]-x1;
float y2=buf[k*3+1]-y1;
float z2=buf[k*3+2]-z1;
float d=x2*x2+y2*y2+z2*z2;
if (k==0 || d<best){
best=d;
best_i=k+k2;
}
}
{
float x2=buf[k*3+3]-x1;
float y2=buf[k*3+4]-y1;
float z2=buf[k*3+5]-z1;
float d=x2*x2+y2*y2+z2*z2;
if (d<best){
best=d;
best_i=k+k2+1;
}
}
{
float x2=buf[k*3+6]-x1;
float y2=buf[k*3+7]-y1;
float z2=buf[k*3+8]-z1;
float d=x2*x2+y2*y2+z2*z2;
if (d<best){
best=d;
best_i=k+k2+2;
}
}
{
float x2=buf[k*3+9]-x1;
float y2=buf[k*3+10]-y1;
float z2=buf[k*3+11]-z1;
float d=x2*x2+y2*y2+z2*z2;
if (d<best){
best=d;
best_i=k+k2+3;
}
}
}
}
for (int k=end_ka;k<end_k;k++){
float x2=buf[k*3+0]-x1;
float y2=buf[k*3+1]-y1;
float z2=buf[k*3+2]-z1;
float d=x2*x2+y2*y2+z2*z2;
if (k==0 || d<best){
best=d;
best_i=k+k2;
}
}
if (k2==0 || result[(i*n+j)]>best){
result[(i*n+j)]=best;
result_i[(i*n+j)]=best_i;
}
}
__syncthreads();
}
}
}
// int chamfer_cuda_forward(int b,int n,const float * xyz,int m,const float * xyz2,float * result,int * result_i,float * result2,int * result2_i, cudaStream_t stream){
int chamfer_cuda_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist1, at::Tensor dist2, at::Tensor idx1, at::Tensor idx2){
const auto batch_size = xyz1.size(0);
const auto n = xyz1.size(1); //num_points point cloud A
const auto m = xyz2.size(1); //num_points point cloud B
NmDistanceKernel<<<dim3(32,16,1),512>>>(batch_size, n, xyz1.data<float>(), m, xyz2.data<float>(), dist1.data<float>(), idx1.data<int>());
NmDistanceKernel<<<dim3(32,16,1),512>>>(batch_size, m, xyz2.data<float>(), n, xyz1.data<float>(), dist2.data<float>(), idx2.data<int>());
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("error in nnd updateOutput: %s\n", cudaGetErrorString(err));
//THError("aborting");
return 0;
}
return 1;
}
__global__ void NmDistanceGradKernel(int b,int n,const float * xyz1,int m,const float * xyz2,const float * grad_dist1,const int * idx1,float * grad_xyz1,float * grad_xyz2){
for (int i=blockIdx.x;i<b;i+=gridDim.x){
for (int j=threadIdx.x+blockIdx.y*blockDim.x;j<n;j+=blockDim.x*gridDim.y){
float x1=xyz1[(i*n+j)*3+0];
float y1=xyz1[(i*n+j)*3+1];
float z1=xyz1[(i*n+j)*3+2];
int j2=idx1[i*n+j];
float x2=xyz2[(i*m+j2)*3+0];
float y2=xyz2[(i*m+j2)*3+1];
float z2=xyz2[(i*m+j2)*3+2];
float g=grad_dist1[i*n+j]*2;
atomicAdd(&(grad_xyz1[(i*n+j)*3+0]),g*(x1-x2));
atomicAdd(&(grad_xyz1[(i*n+j)*3+1]),g*(y1-y2));
atomicAdd(&(grad_xyz1[(i*n+j)*3+2]),g*(z1-z2));
atomicAdd(&(grad_xyz2[(i*m+j2)*3+0]),-(g*(x1-x2)));
atomicAdd(&(grad_xyz2[(i*m+j2)*3+1]),-(g*(y1-y2)));
atomicAdd(&(grad_xyz2[(i*m+j2)*3+2]),-(g*(z1-z2)));
}
}
}
// int chamfer_cuda_backward(int b,int n,const float * xyz1,int m,const float * xyz2,const float * grad_dist1,const int * idx1,const float * grad_dist2,const int * idx2,float * grad_xyz1,float * grad_xyz2, cudaStream_t stream){
int chamfer_cuda_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz1, at::Tensor gradxyz2, at::Tensor graddist1, at::Tensor graddist2, at::Tensor idx1, at::Tensor idx2){
// cudaMemset(grad_xyz1,0,b*n*3*4);
// cudaMemset(grad_xyz2,0,b*m*3*4);
const auto batch_size = xyz1.size(0);
const auto n = xyz1.size(1); //num_points point cloud A
const auto m = xyz2.size(1); //num_points point cloud B
NmDistanceGradKernel<<<dim3(1,16,1),256>>>(batch_size,n,xyz1.data<float>(),m,xyz2.data<float>(),graddist1.data<float>(),idx1.data<int>(),gradxyz1.data<float>(),gradxyz2.data<float>());
NmDistanceGradKernel<<<dim3(1,16,1),256>>>(batch_size,m,xyz2.data<float>(),n,xyz1.data<float>(),graddist2.data<float>(),idx2.data<int>(),gradxyz2.data<float>(),gradxyz1.data<float>());
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("error in nnd get grad: %s\n", cudaGetErrorString(err));
//THError("aborting");
return 0;
}
return 1;
}
================================================
FILE: lidm/eval/modules/chamfer3D/chamfer_cuda.cpp
================================================
#include <torch/torch.h>
#include <vector>
///TMP
//#include "common.h"
/// NOT TMP
int chamfer_cuda_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist1, at::Tensor dist2, at::Tensor idx1, at::Tensor idx2);
int chamfer_cuda_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz1, at::Tensor gradxyz2, at::Tensor graddist1, at::Tensor graddist2, at::Tensor idx1, at::Tensor idx2);
int chamfer_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist1, at::Tensor dist2, at::Tensor idx1, at::Tensor idx2) {
return chamfer_cuda_forward(xyz1, xyz2, dist1, dist2, idx1, idx2);
}
int chamfer_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz1, at::Tensor gradxyz2, at::Tensor graddist1,
at::Tensor graddist2, at::Tensor idx1, at::Tensor idx2) {
return chamfer_cuda_backward(xyz1, xyz2, gradxyz1, gradxyz2, graddist1, graddist2, idx1, idx2);
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("forward", &chamfer_forward, "chamfer forward (CUDA)");
m.def("backward", &chamfer_backward, "chamfer backward (CUDA)");
}
================================================
FILE: lidm/eval/modules/chamfer3D/dist_chamfer_3D.py
================================================
from torch import nn
from torch.autograd import Function
import torch
import importlib
import os
chamfer_found = importlib.find_loader("chamfer_3D") is not None
if not chamfer_found:
## Cool trick from https://github.com/chrdiller
print("Jitting Chamfer 3D")
from torch.utils.cpp_extension import load
chamfer_3D = load(name="chamfer_3D",
sources=[
"/".join(os.path.abspath(__file__).split('/')[:-1] + ["chamfer_cuda.cpp"]),
"/".join(os.path.abspath(__file__).split('/')[:-1] + ["chamfer3D.cu"]),
])
print("Loaded JIT 3D CUDA chamfer distance")
else:
import chamfer_3D
print("Loaded compiled 3D CUDA chamfer distance")
# Chamfer's distance module @thibaultgroueix
# GPU tensors only
class chamfer_3DFunction(Function):
@staticmethod
def forward(ctx, xyz1, xyz2):
batchsize, n, _ = xyz1.size()
_, m, _ = xyz2.size()
device = xyz1.device
dist1 = torch.zeros(batchsize, n)
dist2 = torch.zeros(batchsize, m)
idx1 = torch.zeros(batchsize, n).type(torch.IntTensor)
idx2 = torch.zeros(batchsize, m).type(torch.IntTensor)
dist1 = dist1.to(device)
dist2 = dist2.to(device)
idx1 = idx1.to(device)
idx2 = idx2.to(device)
torch.cuda.set_device(device)
chamfer_3D.forward(xyz1, xyz2, dist1, dist2, idx1, idx2)
ctx.save_for_backward(xyz1, xyz2, idx1, idx2)
return dist1, dist2, idx1, idx2
@staticmethod
def backward(ctx, graddist1, graddist2, gradidx1, gradidx2):
xyz1, xyz2, idx1, idx2 = ctx.saved_tensors
graddist1 = graddist1.contiguous()
graddist2 = graddist2.contiguous()
device = graddist1.device
gradxyz1 = torch.zeros(xyz1.size())
gradxyz2 = torch.zeros(xyz2.size())
gradxyz1 = gradxyz1.to(device)
gradxyz2 = gradxyz2.to(device)
chamfer_3D.backward(
xyz1, xyz2, gradxyz1, gradxyz2, graddist1, graddist2, idx1, idx2
)
return gradxyz1, gradxyz2
class chamfer_3DDist(nn.Module):
def __init__(self):
super(chamfer_3DDist, self).__init__()
def forward(self, input1, input2):
input1 = input1.contiguous()
input2 = input2.contiguous()
return chamfer_3DFunction.apply(input1, input2)
================================================
FILE: lidm/eval/modules/chamfer3D/setup.py
================================================
from setuptools import setup
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
setup(
name='chamfer_3D',
ext_modules=[
CUDAExtension('chamfer_3D', [
"/".join(__file__.split('/')[:-1] + ['chamfer_cuda.cpp']),
"/".join(__file__.split('/')[:-1] + ['chamfer3D.cu']),
]),
],
cmdclass={
'build_ext': BuildExtension
})
================================================
FILE: lidm/eval/modules/emd/__init__.py
================================================
================================================
FILE: lidm/eval/modules/emd/emd.cpp
================================================
// EMD approximation module (based on auction algorithm)
// author: Minghua Liu
#include <torch/extension.h>
#include <vector>
int emd_cuda_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist, at::Tensor assignment, at::Tensor price,
at::Tensor assignment_inv, at::Tensor bid, at::Tensor bid_increments, at::Tensor max_increments,
at::Tensor unass_idx, at::Tensor unass_cnt, at::Tensor unass_cnt_sum, at::Tensor cnt_tmp, at::Tensor max_idx, float eps, int iters);
int emd_cuda_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz, at::Tensor graddist, at::Tensor idx);
int emd_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist, at::Tensor assignment, at::Tensor price,
at::Tensor assignment_inv, at::Tensor bid, at::Tensor bid_increments, at::Tensor max_increments,
at::Tensor unass_idx, at::Tensor unass_cnt, at::Tensor unass_cnt_sum, at::Tensor cnt_tmp, at::Tensor max_idx, float eps, int iters) {
return emd_cuda_forward(xyz1, xyz2, dist, assignment, price, assignment_inv, bid, bid_increments, max_increments, unass_idx, unass_cnt, unass_cnt_sum, cnt_tmp, max_idx, eps, iters);
}
int emd_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz, at::Tensor graddist, at::Tensor idx) {
return emd_cuda_backward(xyz1, xyz2, gradxyz, graddist, idx);
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("forward", &emd_forward, "emd forward (CUDA)");
m.def("backward", &emd_backward, "emd backward (CUDA)");
}
================================================
FILE: lidm/eval/modules/emd/emd_cuda.cu
================================================
// EMD approximation module (based on auction algorithm)
// author: Minghua Liu
#include <stdio.h>
#include <ATen/ATen.h>
#include <cuda.h>
#include <iostream>
#include <cuda_runtime.h>
__device__ __forceinline__ float atomicMax(float *address, float val)
{
int ret = __float_as_int(*address);
while(val > __int_as_float(ret))
{
int old = ret;
if((ret = atomicCAS((int *)address, old, __float_as_int(val))) == old)
break;
}
return __int_as_float(ret);
}
__global__ void clear(int b, int * cnt_tmp, int * unass_cnt) {
for (int i = threadIdx.x; i < b; i += blockDim.x) {
cnt_tmp[i] = 0;
unass_cnt[i] = 0;
}
}
__global__ void calc_unass_cnt(int b, int n, int * assignment, int * unass_cnt) {
// count the number of unassigned points in each batch
const int BLOCK_SIZE = 1024;
__shared__ int scan_array[BLOCK_SIZE];
for (int i = blockIdx.x; i < b; i += gridDim.x) {
scan_array[threadIdx.x] = assignment[i * n + blockIdx.y * BLOCK_SIZE + threadIdx.x] == -1 ? 1 : 0;
__syncthreads();
int stride = 1;
while(stride <= BLOCK_SIZE / 2) {
int index = (threadIdx.x + 1) * stride * 2 - 1;
if(index < BLOCK_SIZE)
scan_array[index] += scan_array[index - stride];
stride = stride * 2;
__syncthreads();
}
__syncthreads();
if (threadIdx.x == BLOCK_SIZE - 1) {
atomicAdd(&unass_cnt[i], scan_array[threadIdx.x]);
}
__syncthreads();
}
}
__global__ void calc_unass_cnt_sum(int b, int * unass_cnt, int * unass_cnt_sum) {
// count the cumulative sum over over unass_cnt
const int BLOCK_SIZE = 512; // batch_size <= 512
__shared__ int scan_array[BLOCK_SIZE];
scan_array[threadIdx.x] = unass_cnt[threadIdx.x];
__syncthreads();
int stride = 1;
while(stride <= BLOCK_SIZE / 2) {
int index = (threadIdx.x + 1) * stride * 2 - 1;
if(index < BLOCK_SIZE)
scan_array[index] += scan_array[index - stride];
stride = stride * 2;
__syncthreads();
}
__syncthreads();
stride = BLOCK_SIZE / 4;
while(stride > 0) {
int index = (threadIdx.x + 1) * stride * 2 - 1;
if((index + stride) < BLOCK_SIZE)
scan_array[index + stride] += scan_array[index];
stride = stride / 2;
__syncthreads();
}
__syncthreads();
//printf("%d\n", unass_cnt_sum[b - 1]);
unass_cnt_sum[threadIdx.x] = scan_array[threadIdx.x];
}
__global__ void calc_unass_idx(int b, int n, int * assignment, int * unass_idx, int * unass_cnt, int * unass_cnt_sum, int * cnt_tmp) {
// list all the unassigned points
for (int i = blockIdx.x; i < b; i += gridDim.x) {
if (assignment[i * n + blockIdx.y * 1024 + threadIdx.x] == -1) {
int idx = atomicAdd(&cnt_tmp[i], 1);
unass_idx[unass_cnt_sum[i] - unass_cnt[i] + idx] = blockIdx.y * 1024 + threadIdx.x;
}
}
}
__global__ void Bid(int b, int n, const float * xyz1, const float * xyz2, float eps, int * assignment, int * assignment_inv, float * price,
int * bid, float * bid_increments, float * max_increments, int * unass_cnt, int * unass_cnt_sum, int * unass_idx) {
const int batch = 2048, block_size = 1024, block_cnt = n / 1024;
__shared__ float xyz2_buf[batch * 3];
__shared__ float price_buf[batch];
__shared__ float best_buf[block_size];
__shared__ float better_buf[block_size];
__shared__ int best_i_buf[block_size];
for (int i = blockIdx.x; i < b; i += gridDim.x) {
int _unass_cnt = unass_cnt[i];
if (_unass_cnt == 0)
continue;
int _unass_cnt_sum = unass_cnt_sum[i];
int unass_per_block = (_unass_cnt + block_cnt - 1) / block_cnt;
int thread_per_unass = block_size / unass_per_block;
int unass_this_block = max(min(_unass_cnt - (int) blockIdx.y * unass_per_block, unass_per_block), 0);
float x1, y1, z1, best = -1e9, better = -1e9;
int best_i = -1, _unass_id = -1, thread_in_unass;
if (threadIdx.x < thread_per_unass * unass_this_block) {
_unass_id = unass_per_block * blockIdx.y + threadIdx.x / thread_per_unass + _unass_cnt_sum - _unass_cnt;
_unass_id = unass_idx[_unass_id];
thread_in_unass = threadIdx.x % thread_per_unass;
x1 = xyz1[(i * n + _unass_id) * 3 + 0];
y1 = xyz1[(i * n + _unass_id) * 3 + 1];
z1 = xyz1[(i * n + _unass_id) * 3 + 2];
}
for (int k2 = 0; k2 < n; k2 += batch) {
int end_k = min(n, k2 + batch) - k2;
for (int j = threadIdx.x; j < end_k * 3; j += blockDim.x) {
xyz2_buf[j] = xyz2[(i * n + k2) * 3 + j];
}
for (int j = threadIdx.x; j < end_k; j += blockDim.x) {
price_buf[j] = price[i * n + k2 + j];
}
__syncthreads();
if (_unass_id != -1) {
int delta = (end_k + thread_per_unass - 1) / thread_per_unass;
int l = thread_in_unass * delta;
int r = min((thread_in_unass + 1) * delta, end_k);
for (int k = l; k < r; k++)
//if (!last || assignment_inv[i * n + k + k2] == -1)
{
float x2 = xyz2_buf[k * 3 + 0] - x1;
float y2 = xyz2_buf[k * 3 + 1] - y1;
float z2 = xyz2_buf[k * 3 + 2] - z1;
// the coordinates of points should be normalized to [0, 1]
float d = 3.0 - sqrtf(x2 * x2 + y2 * y2 + z2 * z2) - price_buf[k];
if (d > best) {
better = best;
best = d;
best_i = k + k2;
}
else if (d > better) {
better = d;
}
}
}
__syncthreads();
}
best_buf[threadIdx.x] = best;
better_buf[threadIdx.x] = better;
best_i_buf[threadIdx.x] = best_i;
__syncthreads();
if (_unass_id != -1 && thread_in_unass == 0) {
for (int j = threadIdx.x + 1; j < threadIdx.x + thread_per_unass; j++) {
if (best_buf[j] > best) {
better = max(best, better_buf[j]);
best = best_buf[j];
best_i = best_i_buf[j];
}
else better = max(better, best_buf[j]);
}
bid[i * n + _unass_id] = best_i;
bid_increments[i * n + _unass_id] = best - better + eps;
atomicMax(&max_increments[i * n + best_i], best - better + eps);
}
}
}
__global__ void GetMax(int b, int n, int * assignment, int * bid, float * bid_increments, float * max_increments, int * max_idx) {
for (int i = blockIdx.x; i < b; i += gridDim.x) {
int j = threadIdx.x + blockIdx.y * blockDim.x;
if (assignment[i * n + j] == -1) {
int bid_id = bid[i * n + j];
float bid_inc = bid_increments[i * n + j];
float max_inc = max_increments[i * n + bid_id];
if (bid_inc - 1e-6 <= max_inc && max_inc <= bid_inc + 1e-6)
{
max_idx[i * n + bid_id] = j;
}
}
}
}
__global__ void Assign(int b, int n, int * assignment, int * assignment_inv, float * price, int * bid, float * bid_increments, float * max_increments, int * max_idx, bool last) {
for (int i = blockIdx.x; i < b; i += gridDim.x) {
int j = threadIdx.x + blockIdx.y * blockDim.x;
if (assignment[i * n + j] == -1) {
int bid_id = bid[i * n + j];
if (last || max_idx[i * n + bid_id] == j)
{
float bid_inc = bid_increments[i * n + j];
int ass_inv = assignment_inv[i * n + bid_id];
if (!last && ass_inv != -1) {
assignment[i * n + ass_inv] = -1;
}
assignment_inv[i * n + bid_id] = j;
assignment[i * n + j] = bid_id;
price[i * n + bid_id] += bid_inc;
max_increments[i * n + bid_id] = -1e9;
}
}
}
}
__global__ void CalcDist(int b, int n, float * xyz1, float * xyz2, float * dist, int * assignment) {
for (int i = blockIdx.x; i < b; i += gridDim.x) {
int j = threadIdx.x + blockIdx.y * blockDim.x;
int k = assignment[i * n + j];
float deltax = xyz1[(i * n + j) * 3 + 0] - xyz2[(i * n + k) * 3 + 0];
float deltay = xyz1[(i * n + j) * 3 + 1] - xyz2[(i * n + k) * 3 + 1];
float deltaz = xyz1[(i * n + j) * 3 + 2] - xyz2[(i * n + k) * 3 + 2];
dist[i * n + j] = deltax * deltax + deltay * deltay + deltaz * deltaz;
}
}
int emd_cuda_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist, at::Tensor assignment, at::Tensor price,
at::Tensor assignment_inv, at::Tensor bid, at::Tensor bid_increments, at::Tensor max_increments,
at::Tensor unass_idx, at::Tensor unass_cnt, at::Tensor unass_cnt_sum, at::Tensor cnt_tmp, at::Tensor max_idx, float eps, int iters) {
const auto batch_size = xyz1.size(0);
const auto n = xyz1.size(1); //num_points point cloud A
const auto m = xyz2.size(1); //num_points point cloud B
if (n != m) {
printf("Input Error! The two point clouds should have the same size.\n");
return -1;
}
if (batch_size > 512) {
printf("Input Error! The batch size should be less than 512.\n");
return -1;
}
if (n % 1024 != 0) {
printf("Input Error! The size of the point clouds should be a multiple of 1024.\n");
return -1;
}
//cudaEvent_t start,stop;
//cudaEventCreate(&start);
//cudaEventCreate(&stop);
//cudaEventRecord(start);
//int iters = 50;
for (int i = 0; i < iters; i++) {
clear<<<1, batch_size>>>(batch_size, cnt_tmp.data<int>(), unass_cnt.data<int>());
calc_unass_cnt<<<dim3(batch_size, n / 1024, 1), 1024>>>(batch_size, n, assignment.data<int>(), unass_cnt.data<int>());
calc_unass_cnt_sum<<<1, batch_size>>>(batch_size, unass_cnt.data<int>(), unass_cnt_sum.data<int>());
calc_unass_idx<<<dim3(batch_size, n / 1024, 1), 1024>>>(batch_size, n, assignment.data<int>(), unass_idx.data<int>(), unass_cnt.data<int>(),
unass_cnt_sum.data<int>(), cnt_tmp.data<int>());
Bid<<<dim3(batch_size, n / 1024, 1), 1024>>>(batch_size, n, xyz1.data<float>(), xyz2.data<float>(), eps, assignment.data<int>(), assignment_inv.data<int>(),
price.data<float>(), bid.data<int>(), bid_increments.data<float>(), max_increments.data<float>(),
unass_cnt.data<int>(), unass_cnt_sum.data<int>(), unass_idx.data<int>());
GetMax<<<dim3(batch_size, n / 1024, 1), 1024>>>(batch_size, n, assignment.data<int>(), bid.data<int>(), bid_increments.data<float>(), max_increments.data<float>(), max_idx.data<int>());
Assign<<<dim3(batch_size, n / 1024, 1), 1024>>>(batch_size, n, assignment.data<int>(), assignment_inv.data<int>(), price.data<float>(), bid.data<int>(),
bid_increments.data<float>(), max_increments.data<float>(), max_idx.data<int>(), i == iters - 1);
}
CalcDist<<<dim3(batch_size, n / 1024, 1), 1024>>>(batch_size, n, xyz1.data<float>(), xyz2.data<float>(), dist.data<float>(), assignment.data<int>());
//cudaEventRecord(stop);
//cudaEventSynchronize(stop);
//float elapsedTime;
//cudaEventElapsedTime(&elapsedTime,start,stop);
//printf("%lf\n", elapsedTime);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("error in nnd Output: %s\n", cudaGetErrorString(err));
return 0;
}
return 1;
}
__global__ void NmDistanceGradKernel(int b, int n, const float * xyz1, const float * xyz2, const float * grad_dist, const int * idx, float * grad_xyz){
for (int i = blockIdx.x; i < b; i += gridDim.x) {
for (int j = threadIdx.x + blockIdx.y * blockDim.x; j < n; j += blockDim.x * gridDim.y) {
float x1 = xyz1[(i * n + j) * 3 + 0];
float y1 = xyz1[(i * n + j) * 3 + 1];
float z1 = xyz1[(i * n + j) * 3 + 2];
int j2 = idx[i * n + j];
float x2 = xyz2[(i * n + j2) * 3 + 0];
float y2 = xyz2[(i * n + j2) * 3 + 1];
float z2 = xyz2[(i * n + j2) * 3 + 2];
float g = grad_dist[i * n + j] * 2;
atomicAdd(&(grad_xyz[(i * n + j) * 3 + 0]), g * (x1 - x2));
atomicAdd(&(grad_xyz[(i * n + j) * 3 + 1]), g * (y1 - y2));
atomicAdd(&(grad_xyz[(i * n + j) * 3 + 2]), g * (z1 - z2));
}
}
}
int emd_cuda_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz, at::Tensor graddist, at::Tensor idx){
const auto batch_size = xyz1.size(0);
const auto n = xyz1.size(1);
const auto m = xyz2.size(1);
NmDistanceGradKernel<<<dim3(batch_size, n / 1024, 1), 1024>>>(batch_size, n, xyz1.data<float>(), xyz2.data<float>(), graddist.data<float>(), idx.data<int>(), gradxyz.data<float>());
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("error in nnd get grad: %s\n", cudaGetErrorString(err));
return 0;
}
return 1;
}
================================================
FILE: lidm/eval/modules/emd/emd_module.py
================================================
# EMD approximation module (based on auction algorithm)
# memory complexity: O(n)
# time complexity: O(n^2 * iter)
# author: Minghua Liu
# Input:
# xyz1, xyz2: [#batch, #points, 3]
# where xyz1 is the predicted point cloud and xyz2 is the ground truth point cloud
# two point clouds should have same size and be normalized to [0, 1]
# #points should be a multiple of 1024
# #batch should be no greater than 512
# eps is a parameter which balances the error rate and the speed of convergence
# iters is the number of iteration
# we only calculate gradient for xyz1
# Output:
# dist: [#batch, #points], sqrt(dist) -> L2 distance
# assignment: [#batch, #points], index of the matched point in the ground truth point cloud
# the result is an approximation and the assignment is not guranteed to be a bijection
import importlib
import os
import time
import numpy as np
import torch
from torch import nn
from torch.autograd import Function
emd_found = importlib.find_loader("emd") is not None
if not emd_found:
## Cool trick from https://github.com/chrdiller
print("Jitting EMD 3D")
from torch.utils.cpp_extension import load
emd = load(name="emd",
sources=[
"/".join(os.path.abspath(__file__).split('/')[:-1] + ["emd.cpp"]),
"/".join(os.path.abspath(__file__).split('/')[:-1] + ["emd_cuda.cu"]),
])
print("Loaded JIT 3D CUDA emd")
else:
import emd
print("Loaded compiled 3D CUDA emd")
class emdFunction(Function):
@staticmethod
def forward(ctx, xyz1, xyz2, eps, iters):
batchsize, n, _ = xyz1.size()
_, m, _ = xyz2.size()
assert (n == m)
assert (xyz1.size()[0] == xyz2.size()[0])
# assert(n % 1024 == 0)
assert (batchsize <= 512)
xyz1 = xyz1.contiguous().float().cuda()
xyz2 = xyz2.contiguous().float().cuda()
dist = torch.zeros(batchsize, n, device='cuda').contiguous()
assignment = torch.zeros(batchsize, n, device='cuda', dtype=torch.int32).contiguous() - 1
assignment_inv = torch.zeros(batchsize, m, device='cuda', dtype=torch.int32).contiguous() - 1
price = torch.zeros(batchsize, m, device='cuda').contiguous()
bid = torch.zeros(batchsize, n, device='cuda', dtype=torch.int32).contiguous()
bid_increments = torch.zeros(batchsize, n, device='cuda').contiguous()
max_increments = torch.zeros(batchsize, m, device='cuda').contiguous()
unass_idx = torch.zeros(batchsize * n, device='cuda', dtype=torch.int32).contiguous()
max_idx = torch.zeros(batchsize * m, device='cuda', dtype=torch.int32).contiguous()
unass_cnt = torch.zeros(512, dtype=torch.int32, device='cuda').contiguous()
unass_cnt_sum = torch.zeros(512, dtype=torch.int32, device='cuda').contiguous()
cnt_tmp = torch.zeros(512, dtype=torch.int32, device='cuda').contiguous()
emd.forward(xyz1, xyz2, dist, assignment, price, assignment_inv, bid, bid_increments, max_increments, unass_idx,
unass_cnt, unass_cnt_sum, cnt_tmp, max_idx, eps, iters)
ctx.save_for_backward(xyz1, xyz2, assignment)
return dist, assignment
@staticmethod
def backward(ctx, graddist, gradidx):
xyz1, xyz2, assignment = ctx.saved_tensors
graddist = graddist.contiguous(
gitextract_2_mjw6j0/
├── .gitignore
├── DESIGN.md
├── LICENSE
├── README.md
├── configs/
│ ├── autoencoder/
│ │ └── kitti/
│ │ └── autoencoder_c2_p4.yaml
│ └── lidar_diffusion/
│ └── kitti/
│ └── uncond_c2_p4.yaml
├── data/
│ └── config/
│ └── semantic-kitti.yaml
├── init/
│ └── create_env.sh
├── lidm/
│ ├── __init__.py
│ ├── data/
│ │ ├── __init__.py
│ │ ├── annotated_dataset.py
│ │ ├── base.py
│ │ ├── conditional_builder/
│ │ │ ├── __init__.py
│ │ │ ├── objects_bbox.py
│ │ │ ├── objects_center_points.py
│ │ │ └── utils.py
│ │ ├── helper_types.py
│ │ └── kitti.py
│ ├── eval/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── compile.sh
│ │ ├── eval_utils.py
│ │ ├── fid_score.py
│ │ ├── metric_utils.py
│ │ ├── models/
│ │ │ ├── __init__.py
│ │ │ ├── minkowskinet/
│ │ │ │ ├── __init__.py
│ │ │ │ └── model.py
│ │ │ ├── rangenet/
│ │ │ │ ├── __init__.py
│ │ │ │ └── model.py
│ │ │ ├── spvcnn/
│ │ │ │ ├── __init__.py
│ │ │ │ └── model.py
│ │ │ └── ts/
│ │ │ ├── __init__.py
│ │ │ ├── basic_blocks.py
│ │ │ └── utils.py
│ │ └── modules/
│ │ ├── __init__.py
│ │ ├── chamfer2D/
│ │ │ ├── __init__.py
│ │ │ ├── chamfer2D.cu
│ │ │ ├── chamfer_cuda.cpp
│ │ │ ├── dist_chamfer_2D.py
│ │ │ └── setup.py
│ │ ├── chamfer3D/
│ │ │ ├── __init__.py
│ │ │ ├── chamfer3D.cu
│ │ │ ├── chamfer_cuda.cpp
│ │ │ ├── dist_chamfer_3D.py
│ │ │ └── setup.py
│ │ └── emd/
│ │ ├── __init__.py
│ │ ├── emd.cpp
│ │ ├── emd_cuda.cu
│ │ ├── emd_module.py
│ │ └── setup.py
│ ├── models/
│ │ ├── __init__.py
│ │ ├── autoencoder.py
│ │ └── diffusion/
│ │ ├── __init__.py
│ │ ├── classifier.py
│ │ ├── ddim.py
│ │ ├── ddpm.py
│ │ └── plms.py
│ ├── modules/
│ │ ├── __init__.py
│ │ ├── attention.py
│ │ ├── basic.py
│ │ ├── diffusion/
│ │ │ ├── __init__.py
│ │ │ ├── model_ldm.py
│ │ │ ├── model_lidm.py
│ │ │ └── openaimodel.py
│ │ ├── distributions/
│ │ │ ├── __init__.py
│ │ │ └── distributions.py
│ │ ├── ema.py
│ │ ├── encoders/
│ │ │ ├── __init__.py
│ │ │ └── modules.py
│ │ ├── image_degradation/
│ │ │ ├── __init__.py
│ │ │ ├── bsrgan.py
│ │ │ ├── bsrgan_light.py
│ │ │ └── utils_image.py
│ │ ├── losses/
│ │ │ ├── __init__.py
│ │ │ ├── contperceptual.py
│ │ │ ├── discriminator.py
│ │ │ ├── geometric.py
│ │ │ ├── perceptual.py
│ │ │ └── vqperceptual.py
│ │ ├── minkowskinet/
│ │ │ ├── __init__.py
│ │ │ └── model.py
│ │ ├── rangenet/
│ │ │ ├── __init__.py
│ │ │ └── model.py
│ │ ├── spvcnn/
│ │ │ ├── __init__.py
│ │ │ └── model.py
│ │ ├── ts/
│ │ │ ├── __init__.py
│ │ │ ├── basic_blocks.py
│ │ │ └── utils.py
│ │ └── x_transformer.py
│ └── utils/
│ ├── __init__.py
│ ├── aug_utils.py
│ ├── lidar_utils.py
│ ├── lr_scheduler.py
│ ├── misc_utils.py
│ └── model_utils.py
├── main.py
├── models/
│ ├── baseline/
│ │ ├── kitti/
│ │ │ └── template/
│ │ │ └── config.yaml
│ │ └── nuscenes/
│ │ └── template/
│ │ └── config.yaml
│ ├── first_stage_models/
│ │ ├── ablate/
│ │ │ ├── f_c16/
│ │ │ │ └── config.yaml
│ │ │ ├── f_c16_p2/
│ │ │ │ └── config.yaml
│ │ │ ├── f_c2_p2/
│ │ │ │ └── config.yaml
│ │ │ ├── f_c2_p4/
│ │ │ │ └── config.yaml
│ │ │ ├── f_c32/
│ │ │ │ └── config.yaml
│ │ │ ├── f_c4/
│ │ │ │ └── config.yaml
│ │ │ ├── f_c4_p2/
│ │ │ │ └── config.yaml
│ │ │ ├── f_c4_p4/
│ │ │ │ └── config.yaml
│ │ │ ├── f_c64/
│ │ │ │ └── config.yaml
│ │ │ ├── f_c8/
│ │ │ │ └── config.yaml
│ │ │ ├── f_c8_p2/
│ │ │ │ └── config.yaml
│ │ │ ├── f_p16/
│ │ │ │ └── config.yaml
│ │ │ ├── f_p2/
│ │ │ │ └── config.yaml
│ │ │ ├── f_p4/
│ │ │ │ └── config.yaml
│ │ │ └── f_p8/
│ │ │ └── config.yaml
│ │ └── kitti/
│ │ ├── f_c2_p4/
│ │ │ └── config.yaml
│ │ └── f_c2_p4_wo_logscale/
│ │ └── config.yaml
│ └── lidm/
│ └── kitti/
│ ├── cam2lidar/
│ │ └── config.yaml
│ ├── sem2lidar/
│ │ └── config.yaml
│ ├── text2lidar/
│ │ └── config.yaml
│ ├── uncond/
│ │ └── config.yaml
│ └── uncond_wo_logscale/
│ └── config.yaml
└── scripts/
├── eval_ae.py
├── sample.py
├── sample_cond.py
└── text2lidar.py
SYMBOL INDEX (986 symbols across 60 files)
FILE: lidm/data/annotated_dataset.py
class Annotated3DObjectsDataset (line 11) | class Annotated3DObjectsDataset(Dataset):
method __init__ (line 12) | def __init__(self, min_objects_per_image: int,
method no_classes (line 24) | def no_classes(self) -> int:
method conditional_builders (line 28) | def conditional_builders(self) -> ObjectsCenterPointsConditionalBuilder:
method get_textual_label_for_category_id (line 47) | def get_textual_label_for_category_id(self, category_id: int) -> str:
FILE: lidm/data/base.py
class DatasetBase (line 15) | class DatasetBase(Dataset):
method __init__ (line 16) | def __init__(self, data_root, split, dataset_config, aug_config, retur...
method prepare_data (line 57) | def prepare_data(self):
method process_scan (line 60) | def process_scan(self, range_img):
method load_lidar_sweep (line 80) | def load_lidar_sweep(*args, **kwargs):
method load_semantic_map (line 84) | def load_semantic_map(*args, **kwargs):
method load_camera (line 88) | def load_camera(*args, **kwargs):
method load_annotation (line 92) | def load_annotation(*args, **kwargs):
method __len__ (line 95) | def __len__(self):
method __getitem__ (line 98) | def __getitem__(self, idx):
class Txt2ImgIterableBaseDataset (line 103) | class Txt2ImgIterableBaseDataset(IterableDataset):
method __init__ (line 107) | def __init__(self, num_records=0, valid_ids=None, size=256):
method __len__ (line 116) | def __len__(self):
method __iter__ (line 120) | def __iter__(self):
FILE: lidm/data/conditional_builder/objects_bbox.py
class ObjectsBoundingBoxConditionalBuilder (line 14) | class ObjectsBoundingBoxConditionalBuilder(ObjectsCenterPointsConditiona...
method object_descriptor_length (line 16) | def object_descriptor_length(self) -> int:
method _make_object_descriptors (line 19) | def _make_object_descriptors(self, annotations: List[Annotation]) -> L...
method inverse_build (line 27) | def inverse_build(self, conditional: LongTensor) -> Tuple[List[Tuple[i...
method plot (line 33) | def plot(self, conditional: LongTensor, label_for_category_no: Callabl...
FILE: lidm/data/conditional_builder/objects_center_points.py
function convert_pil_to_tensor (line 19) | def convert_pil_to_tensor(image: Image) -> Tensor:
class ObjectsCenterPointsConditionalBuilder (line 26) | class ObjectsCenterPointsConditionalBuilder:
method __init__ (line 27) | def __init__(self, no_object_classes: int, no_max_objects: int, no_tok...
method none (line 35) | def none(self) -> int:
method object_descriptor_length (line 39) | def object_descriptor_length(self) -> int:
method empty_tuple (line 43) | def empty_tuple(self) -> Tuple:
method embedding_dim (line 47) | def embedding_dim(self) -> int:
method tokenize_coordinates (line 50) | def tokenize_coordinates(self, x: float, y: float) -> int:
method coordinates_from_token (line 67) | def coordinates_from_token(self, token: int) -> (float, float):
method bbox_from_token_pair (line 72) | def bbox_from_token_pair(self, token1: int, token2: int) -> BoundingBox:
method token_pair_from_bbox (line 79) | def token_pair_from_bbox(self, bbox: BoundingBox) -> Tuple:
method inverse_build (line 83) | def inverse_build(self, conditional: LongTensor) \
method plot (line 93) | def plot(self, conditional: LongTensor, label_for_category_no: Callabl...
method object_representation (line 114) | def object_representation(self, annotation: Annotation) -> int:
method representation_to_annotation (line 117) | def representation_to_annotation(self, representation: int) -> Annotat...
method _make_object_descriptors (line 125) | def _make_object_descriptors(self, annotations: List[Annotation]) -> L...
method build (line 135) | def build(self, annotations: List[Annotation]) \
FILE: lidm/data/conditional_builder/utils.py
function corners_3d_to_2d (line 18) | def corners_3d_to_2d(corners3d):
function rotate_points_along_z (line 43) | def rotate_points_along_z(points, angle):
function boxes_to_corners_3d (line 64) | def boxes_to_corners_3d(boxes3d):
function intersection_area (line 92) | def intersection_area(rectangle1: BoundingBox, rectangle2: BoundingBox) ...
function horizontally_flip_bbox (line 105) | def horizontally_flip_bbox(bbox: BoundingBox) -> BoundingBox:
function absolute_bbox (line 109) | def absolute_bbox(relative_bbox: BoundingBox, width: int, height: int) -...
function pad_list (line 123) | def pad_list(list_: List, pad_element: Any, pad_to_length: int) -> List:
function rescale_annotations (line 127) | def rescale_annotations(annotations: List[Annotation], crop_coordinates:...
function filter_annotations (line 144) | def filter_annotations(annotations: List[Annotation], crop_coordinates: ...
function additional_parameters_string (line 148) | def additional_parameters_string(annotation: Annotation, short: bool = T...
function get_plot_font_size (line 164) | def get_plot_font_size(font_size: Optional[int], figure_size: Tuple[int,...
function get_circle_size (line 174) | def get_circle_size(figure_size: Tuple[int, int]) -> int:
function load_object_from_string (line 183) | def load_object_from_string(object_string: str) -> Any:
FILE: lidm/data/helper_types.py
class Annotation (line 17) | class Annotation(NamedTuple):
FILE: lidm/data/kitti.py
class KITTIBase (line 39) | class KITTIBase(DatasetBase):
method __init__ (line 40) | def __init__(self, **kwargs):
method load_lidar_sweep (line 46) | def load_lidar_sweep(path):
method load_semantic_map (line 52) | def load_semantic_map(self, path, pcd):
method load_camera (line 55) | def load_camera(self, path):
method __getitem__ (line 58) | def __getitem__(self, idx):
class SemanticKITTIBase (line 93) | class SemanticKITTIBase(KITTIBase):
method __init__ (line 94) | def __init__(self, **kwargs):
method prepare_data (line 99) | def prepare_data(self):
method load_semantic_map (line 111) | def load_semantic_map(self, path, pcd):
class SemanticKITTITrain (line 127) | class SemanticKITTITrain(SemanticKITTIBase):
method __init__ (line 128) | def __init__(self, **kwargs):
class SemanticKITTIValidation (line 132) | class SemanticKITTIValidation(SemanticKITTIBase):
method __init__ (line 133) | def __init__(self, **kwargs):
class KITTI360Base (line 137) | class KITTI360Base(KITTIBase):
method __init__ (line 138) | def __init__(self, split_per_view=None, **kwargs):
method prepare_data (line 144) | def prepare_data(self):
method random_drop_camera (line 155) | def random_drop_camera(self, camera_list):
method load_camera (line 160) | def load_camera(self, path):
class KITTI360Train (line 171) | class KITTI360Train(KITTI360Base):
method __init__ (line 172) | def __init__(self, **kwargs):
class KITTI360Validation (line 176) | class KITTI360Validation(KITTI360Base):
method __init__ (line 177) | def __init__(self, **kwargs):
class AnnotatedKITTI360Base (line 181) | class AnnotatedKITTI360Base(Annotated3DObjectsDataset, KITTI360Base):
method __init__ (line 182) | def __init__(self, **kwargs):
method parseOpencvMatrix (line 191) | def parseOpencvMatrix(node):
method parseVertices (line 205) | def parseVertices(self, child):
method parse_bbox_xml (line 213) | def parse_bbox_xml(self, path):
method prepare_data (line 239) | def prepare_data(self):
method load_annotation (line 251) | def load_annotation(self, path):
method __getitem__ (line 268) | def __getitem__(self, idx):
class AnnotatedKITTI360Train (line 304) | class AnnotatedKITTI360Train(AnnotatedKITTI360Base):
method __init__ (line 305) | def __init__(self, **kwargs):
class AnnotatedKITTI360Validation (line 309) | class AnnotatedKITTI360Validation(AnnotatedKITTI360Base):
method __init__ (line 310) | def __init__(self, **kwargs):
class KITTIImageBase (line 314) | class KITTIImageBase(KITTIBase):
method __init__ (line 321) | def __init__(self, **kwargs):
method prepare_data (line 325) | def prepare_data(self):
class KITTIImageTrain (line 338) | class KITTIImageTrain(KITTIImageBase):
method __init__ (line 339) | def __init__(self, **kwargs):
class KITTIImageValidation (line 343) | class KITTIImageValidation(KITTIImageBase):
method __init__ (line 344) | def __init__(self, **kwargs):
FILE: lidm/eval/__init__.py
function build_model (line 39) | def build_model(dataset_name, model_name, device='cpu'):
FILE: lidm/eval/eval_utils.py
function evaluate (line 20) | def evaluate(reference, samples, metrics, data):
function compute_cd (line 42) | def compute_cd(reference, samples):
function compute_emd (line 56) | def compute_emd(reference, samples):
function compute_mmd (line 70) | def compute_mmd(reference, samples, data, dist='cd', verbose=True):
function compute_jsd (line 87) | def compute_jsd(reference, samples, data):
function compute_fd (line 100) | def compute_fd(reference, samples):
function compute_frid (line 107) | def compute_frid(reference, samples, data):
function compute_fsvd (line 118) | def compute_fsvd(reference, samples, data):
function compute_fpvd (line 129) | def compute_fpvd(reference, samples, data):
FILE: lidm/eval/fid_score.py
function tqdm (line 40) | def tqdm(x):
class ImagePathDataset (line 43) | class ImagePathDataset(torch.utils.data.Dataset):
method __init__ (line 44) | def __init__(self, files, transforms=None):
method __len__ (line 48) | def __len__(self):
method __getitem__ (line 51) | def __getitem__(self, i):
function get_activations (line 59) | def get_activations(files, model, batch_size=50, dims=2048, device='cpu',
function calculate_frechet_distance (line 116) | def calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
function calculate_activation_statistics (line 170) | def calculate_activation_statistics(files, model, batch_size=50, dims=2048,
FILE: lidm/eval/metric_utils.py
function ravel_hash (line 28) | def ravel_hash(x: np.ndarray) -> np.ndarray:
function sparse_quantize (line 43) | def sparse_quantize(coords, voxel_size: Union[float, Tuple[float, ...]] ...
function pcd2range (line 69) | def pcd2range(pcd, size, fov, depth_range, remission=None, labels=None, ...
function range2xyz (line 125) | def range2xyz(range_img, fov, depth_range, depth_scale, log_scale=True, ...
function pcd2voxel (line 157) | def pcd2voxel(pcd):
function pcd2voxel_full (line 170) | def pcd2voxel_full(data_type, *args):
function pcd2bev_sum (line 233) | def pcd2bev_sum(data_type, *args, voxel_size=VOXEL_SIZE):
function pcd2bev_bin (line 261) | def pcd2bev_bin(data_type, *args, voxel_size=0.5):
function bev_sample (line 287) | def bev_sample(data_type, *args, voxel_size=0.5):
function preprocess_pcd (line 310) | def preprocess_pcd(pcd, **kwargs):
function preprocess_range (line 317) | def preprocess_range(pcd, **kwargs):
function batch2list (line 325) | def batch2list(batch_dict, agg_type='depth', **kwargs):
function compute_logits (line 374) | def compute_logits(data_type, modality, *args):
function compute_pairwise_cd (line 415) | def compute_pairwise_cd(x, y, module=None):
function compute_pairwise_cd_batch (line 426) | def compute_pairwise_cd_batch(reference, samples):
function compute_pairwise_emd (line 447) | def compute_pairwise_emd(x, y, module=None):
FILE: lidm/eval/models/minkowskinet/model.py
class Model (line 12) | class Model(nn.Module):
method __init__ (line 13) | def __init__(self, config):
method weight_initialization (line 94) | def weight_initialization(self):
method forward (line 100) | def forward(self, data_dict, return_logits=False, return_final_logits=...
FILE: lidm/eval/models/rangenet/model.py
class BasicBlock (line 10) | class BasicBlock(nn.Module):
method __init__ (line 11) | def __init__(self, inplanes, planes, bn_d=0.1):
method forward (line 22) | def forward(self, x):
class Backbone (line 46) | class Backbone(nn.Module):
method __init__ (line 51) | def __init__(self, params):
method _make_enc_layer (line 126) | def _make_enc_layer(self, block, planes, blocks, stride, bn_d=0.1):
method run_layer (line 145) | def run_layer(self, x, layer, skips, os):
method forward (line 153) | def forward(self, x, return_logits=False, return_list=None):
method get_last_depth (line 203) | def get_last_depth(self):
method get_input_depth (line 206) | def get_input_depth(self):
class Decoder (line 210) | class Decoder(nn.Module):
method __init__ (line 215) | def __init__(self, params, OS=32, feature_depth=1024):
method _make_dec_layer (line 261) | def _make_dec_layer(self, block, planes, bn_d=0.1, stride=2):
method run_layer (line 280) | def run_layer(self, x, layer, skips, os):
method forward (line 288) | def forward(self, x, skips, return_logits=False, return_list=None):
method get_last_depth (line 318) | def get_last_depth(self):
class Model (line 322) | class Model(nn.Module):
method __init__ (line 323) | def __init__(self, config):
method load_pretrained_weights (line 330) | def load_pretrained_weights(self, path):
method forward (line 338) | def forward(self, x, return_logits=False, return_final_logits=False, r...
FILE: lidm/eval/models/spvcnn/model.py
class Model (line 13) | class Model(nn.Module):
method __init__ (line 14) | def __init__(self, config):
method weight_initialization (line 112) | def weight_initialization(self):
method forward (line 118) | def forward(self, data_dict, return_logits=False, return_final_logits=...
FILE: lidm/eval/models/ts/basic_blocks.py
class BasicConvolutionBlock (line 16) | class BasicConvolutionBlock(nn.Module):
method __init__ (line 17) | def __init__(self, inc, outc, ks=3, stride=1, dilation=1):
method forward (line 28) | def forward(self, x):
class BasicDeconvolutionBlock (line 33) | class BasicDeconvolutionBlock(nn.Module):
method __init__ (line 34) | def __init__(self, inc, outc, ks=3, stride=1):
method forward (line 46) | def forward(self, x):
class ResidualBlock (line 50) | class ResidualBlock(nn.Module):
method __init__ (line 51) | def __init__(self, inc, outc, ks=3, stride=1, dilation=1):
method forward (line 77) | def forward(self, x):
FILE: lidm/eval/models/ts/utils.py
function initial_voxelize (line 15) | def initial_voxelize(z, init_res, after_res):
function point_to_voxel (line 38) | def point_to_voxel(x, z):
function voxel_to_point (line 63) | def voxel_to_point(x, z, nearest=False):
FILE: lidm/eval/modules/chamfer2D/chamfer_cuda.cpp
function chamfer_forward (line 17) | int chamfer_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist1, ...
function chamfer_backward (line 22) | int chamfer_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxy...
function PYBIND11_MODULE (line 30) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
FILE: lidm/eval/modules/chamfer2D/dist_chamfer_2D.py
class chamfer_2DFunction (line 32) | class chamfer_2DFunction(Function):
method forward (line 34) | def forward(ctx, xyz1, xyz2):
method backward (line 60) | def backward(ctx, graddist1, graddist2, gradidx1, gradidx2):
class chamfer_2DDist (line 77) | class chamfer_2DDist(nn.Module):
method __init__ (line 78) | def __init__(self):
method forward (line 81) | def forward(self, input1, input2):
FILE: lidm/eval/modules/chamfer3D/chamfer_cuda.cpp
function chamfer_forward (line 17) | int chamfer_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist1, ...
function chamfer_backward (line 22) | int chamfer_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxy...
function PYBIND11_MODULE (line 30) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
FILE: lidm/eval/modules/chamfer3D/dist_chamfer_3D.py
class chamfer_3DFunction (line 28) | class chamfer_3DFunction(Function):
method forward (line 30) | def forward(ctx, xyz1, xyz2):
method backward (line 52) | def backward(ctx, graddist1, graddist2, gradidx1, gradidx2):
class chamfer_3DDist (line 69) | class chamfer_3DDist(nn.Module):
method __init__ (line 70) | def __init__(self):
method forward (line 73) | def forward(self, input1, input2):
FILE: lidm/eval/modules/emd/emd.cpp
function emd_forward (line 14) | int emd_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist, at::T...
function emd_backward (line 20) | int emd_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz, a...
function PYBIND11_MODULE (line 28) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
FILE: lidm/eval/modules/emd/emd_module.py
class emdFunction (line 46) | class emdFunction(Function):
method forward (line 48) | def forward(ctx, xyz1, xyz2, eps, iters):
method backward (line 79) | def backward(ctx, graddist, gradidx):
class emdModule (line 90) | class emdModule(nn.Module):
method __init__ (line 91) | def __init__(self):
method forward (line 94) | def forward(self, input1, input2, eps, iters):
function test_emd (line 98) | def test_emd():
FILE: lidm/models/autoencoder.py
class VQModel (line 15) | class VQModel(pl.LightningModule):
method __init__ (line 16) | def __init__(self,
method ema_scope (line 71) | def ema_scope(self, context=None):
method init_from_ckpt (line 85) | def init_from_ckpt(self, path, ignore_keys=list()):
method on_train_batch_end (line 99) | def on_train_batch_end(self, *args, **kwargs):
method encode (line 103) | def encode(self, x):
method encode_to_prequant (line 109) | def encode_to_prequant(self, x):
method decode (line 114) | def decode(self, quant):
method decode_code (line 119) | def decode_code(self, code_b):
method forward (line 124) | def forward(self, input, return_pred_indices=False):
method get_input (line 131) | def get_input(self, batch, k):
method get_mask (line 149) | def get_mask(self, batch):
method training_step (line 155) | def training_step(self, batch, batch_idx, optimizer_idx):
method validation_step (line 178) | def validation_step(self, batch, batch_idx):
method _validation_step (line 185) | def _validation_step(self, batch, batch_idx, suffix=""):
method configure_optimizers (line 214) | def configure_optimizers(self):
method get_last_layer (line 247) | def get_last_layer(self):
method log_images (line 251) | def log_images(self, batch, only_inputs=False, plot_ema=False, **kwargs):
method to_rgb (line 271) | def to_rgb(self, x):
class VQModelInterface (line 280) | class VQModelInterface(VQModel):
method __init__ (line 281) | def __init__(self, embed_dim, *args, **kwargs):
method encode (line 285) | def encode(self, x):
method decode (line 290) | def decode(self, h, force_not_quantize=False):
class AutoencoderKL (line 305) | class AutoencoderKL(pl.LightningModule):
method __init__ (line 306) | def __init__(self,
method init_from_ckpt (line 337) | def init_from_ckpt(self, path, ignore_keys=list()):
method encode (line 348) | def encode(self, x):
method decode (line 354) | def decode(self, z):
method forward (line 359) | def forward(self, input, sample_posterior=True):
method get_input (line 368) | def get_input(self, batch, k):
method training_step (line 374) | def training_step(self, batch, batch_idx, optimizer_idx):
method validation_step (line 395) | def validation_step(self, batch, batch_idx):
method configure_optimizers (line 408) | def configure_optimizers(self):
method get_last_layer (line 419) | def get_last_layer(self):
method log_images (line 423) | def log_images(self, batch, only_inputs=False, **kwargs):
method to_rgb (line 439) | def to_rgb(self, x):
class IdentityFirstStage (line 448) | class IdentityFirstStage(torch.nn.Module):
method __init__ (line 449) | def __init__(self, *args, vq_interface=False, **kwargs):
method encode (line 453) | def encode(self, x, *args, **kwargs):
method decode (line 456) | def decode(self, x, *args, **kwargs):
method quantize (line 459) | def quantize(self, x, *args, **kwargs):
method forward (line 464) | def forward(self, x, *args, **kwargs):
FILE: lidm/models/diffusion/classifier.py
function disabled_train (line 22) | def disabled_train(self, mode=True):
class NoisyLatentImageClassifier (line 28) | class NoisyLatentImageClassifier(pl.LightningModule):
method __init__ (line 30) | def __init__(self,
method init_from_ckpt (line 70) | def init_from_ckpt(self, path, ignore_keys=list(), only_model=False):
method load_diffusion (line 88) | def load_diffusion(self):
method load_classifier (line 95) | def load_classifier(self, ckpt_path, pool):
method get_x_noisy (line 110) | def get_x_noisy(self, x, t, noise=None):
method forward (line 120) | def forward(self, x_noisy, t, *args, **kwargs):
method get_input (line 124) | def get_input(self, batch, k):
method get_conditioning (line 133) | def get_conditioning(self, batch, k=None):
method compute_top_k (line 150) | def compute_top_k(self, logits, labels, k, reduction="mean"):
method on_train_epoch_start (line 157) | def on_train_epoch_start(self):
method write_logs (line 162) | def write_logs(self, loss, logits, targets):
method shared_step (line 179) | def shared_step(self, batch, t=None):
method training_step (line 198) | def training_step(self, batch, batch_idx):
method reset_noise_accs (line 202) | def reset_noise_accs(self):
method on_validation_start (line 206) | def on_validation_start(self):
method validation_step (line 210) | def validation_step(self, batch, batch_idx):
method configure_optimizers (line 220) | def configure_optimizers(self):
method log_images (line 238) | def log_images(self, batch, N=8, *args, **kwargs):
FILE: lidm/models/diffusion/ddim.py
class DDIMSampler (line 12) | class DDIMSampler(object):
method __init__ (line 13) | def __init__(self, model, schedule="linear", **kwargs):
method register_buffer (line 19) | def register_buffer(self, name, attr):
method make_schedule (line 25) | def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddi...
method sample (line 57) | def sample(self,
method ddim_sampling (line 115) | def ddim_sampling(self, cond, shape,
method p_sample_ddim (line 167) | def p_sample_ddim(self, x, c, t, index, repeat_noise=False, use_origin...
FILE: lidm/models/diffusion/ddpm.py
function disabled_train (line 33) | def disabled_train(self, mode=True):
function uniform_on_device (line 39) | def uniform_on_device(r1, r2, shape, device):
class DDPM (line 43) | class DDPM(pl.LightningModule):
method __init__ (line 45) | def __init__(self,
method register_schedule (line 117) | def register_schedule(self, given_betas=None, beta_schedule="linear", ...
method ema_scope (line 172) | def ema_scope(self, context=None):
method init_from_ckpt (line 186) | def init_from_ckpt(self, path, ignore_keys=list(), only_model=False):
method q_mean_variance (line 204) | def q_mean_variance(self, x_start, t):
method predict_start_from_noise (line 216) | def predict_start_from_noise(self, x_t, t, noise):
method q_posterior (line 222) | def q_posterior(self, x_start, x_t, t):
method p_mean_variance (line 231) | def p_mean_variance(self, x, t, clip_denoised: bool):
method p_sample (line 244) | def p_sample(self, x, t, clip_denoised=True, repeat_noise=False):
method p_sample_loop (line 253) | def p_sample_loop(self, shape, return_intermediates=False):
method sample (line 268) | def sample(self, batch_size=16, return_intermediates=False):
method q_sample (line 274) | def q_sample(self, x_start, t, noise=None):
method get_loss (line 279) | def get_loss(self, pred, target, mean=True):
method p_losses (line 294) | def p_losses(self, x_start, t, noise=None):
method forward (line 323) | def forward(self, x, *args, **kwargs):
method get_input (line 329) | def get_input(self, batch, k):
method shared_step (line 335) | def shared_step(self, batch):
method training_step (line 340) | def training_step(self, batch, batch_idx):
method validation_step (line 356) | def validation_step(self, batch, batch_idx):
method on_train_batch_end (line 364) | def on_train_batch_end(self, *args, **kwargs):
method _get_rows_from_list (line 368) | def _get_rows_from_list(self, samples):
method log_images (line 376) | def log_images(self, batch, N=8, n_row=2, sample=True, return_keys=Non...
method configure_optimizers (line 413) | def configure_optimizers(self):
class LatentDiffusion (line 422) | class LatentDiffusion(DDPM):
method __init__ (line 425) | def __init__(self,
method make_cond_schedule (line 472) | def make_cond_schedule(self, ):
method on_train_batch_start (line 479) | def on_train_batch_start(self, batch, batch_idx, dataloader_idx):
method register_schedule (line 494) | def register_schedule(self,
method instantiate_first_stage (line 503) | def instantiate_first_stage(self, config):
method instantiate_cond_stage (line 510) | def instantiate_cond_stage(self, config):
method _get_denoise_row_from_list (line 531) | def _get_denoise_row_from_list(self, samples, desc='', force_no_decode...
method get_first_stage_encoding (line 543) | def get_first_stage_encoding(self, encoder_posterior):
method get_learned_conditioning (line 552) | def get_learned_conditioning(self, c):
method meshgrid (line 565) | def meshgrid(self, h, w):
method delta_border (line 572) | def delta_border(self, h, w):
method get_weighting (line 586) | def get_weighting(self, h, w, Ly, Lx, device):
method get_fold_unfold (line 602) | def get_fold_unfold(self, x, kernel_size, stride, uf=1, df=1): # todo...
method get_input (line 655) | def get_input(self, batch, k, return_first_stage_outputs=False, force_...
method decode_first_stage (line 711) | def decode_first_stage(self, z, predict_cids=False, force_not_quantize...
method differentiable_decode_first_stage (line 771) | def differentiable_decode_first_stage(self, z, predict_cids=False, for...
method encode_first_stage (line 830) | def encode_first_stage(self, x):
method shared_step (line 867) | def shared_step(self, batch, **kwargs):
method forward (line 872) | def forward(self, x, c, *args, **kwargs):
method _rescale_annotations (line 883) | def _rescale_annotations(self, bboxes, crop_coordinates): # TODO: mov...
method apply_model (line 893) | def apply_model(self, x_noisy, t, cond, return_ids=False):
method _predict_eps_from_xstart (line 995) | def _predict_eps_from_xstart(self, x_t, t, pred_xstart):
method _prior_bpd (line 999) | def _prior_bpd(self, x_start):
method p_losses (line 1013) | def p_losses(self, x_start, cond, t, noise=None):
method p_mean_variance (line 1052) | def p_mean_variance(self, x, c, t, clip_denoised: bool, return_codeboo...
method p_sample (line 1084) | def p_sample(self, x, c, t, clip_denoised=False, repeat_noise=False,
method progressive_denoising (line 1115) | def progressive_denoising(self, cond, shape, verbose=False, callback=N...
method p_sample_loop (line 1171) | def p_sample_loop(self, cond, shape, return_intermediates=False,
method sample (line 1222) | def sample(self, cond, batch_size=16, return_intermediates=False, x_T=...
method sample_log (line 1240) | def sample_log(self, cond, batch_size, ddim, ddim_steps, **kwargs):
method log_images (line 1254) | def log_images(self, batch, N=8, n_row=4, sample=True, ddim_steps=200,...
method configure_optimizers (line 1371) | def configure_optimizers(self):
method to_rgb (line 1396) | def to_rgb(self, x):
class DiffusionWrapper (line 1405) | class DiffusionWrapper(pl.LightningModule):
method __init__ (line 1406) | def __init__(self, diff_model_config, conditioning_key):
method forward (line 1412) | def forward(self, x, t, c_concat: list = None, c_crossattn: list = None):
class Layout2ImgDiffusion (line 1434) | class Layout2ImgDiffusion(LatentDiffusion):
method __init__ (line 1435) | def __init__(self, cond_stage_key, *args, **kwargs):
method log_images (line 1439) | def log_images(self, batch, N=8, dset=None, *args, **kwargs):
FILE: lidm/models/diffusion/plms.py
class PLMSSampler (line 11) | class PLMSSampler(object):
method __init__ (line 12) | def __init__(self, model, schedule="linear", **kwargs):
method register_buffer (line 18) | def register_buffer(self, name, attr):
method make_schedule (line 24) | def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddi...
method sample (line 58) | def sample(self,
method plms_sampling (line 115) | def plms_sampling(self, cond, shape,
method p_sample_plms (line 173) | def p_sample_plms(self, x, c, t, index, repeat_noise=False, use_origin...
FILE: lidm/modules/attention.py
function exists (line 11) | def exists(val):
function uniq (line 15) | def uniq(arr):
function default (line 19) | def default(val, d):
function max_neg_value (line 25) | def max_neg_value(t):
function init_ (line 29) | def init_(tensor):
class GEGLU (line 37) | class GEGLU(nn.Module):
method __init__ (line 38) | def __init__(self, dim_in, dim_out):
method forward (line 42) | def forward(self, x):
class FeedForward (line 47) | class FeedForward(nn.Module):
method __init__ (line 48) | def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0.):
method forward (line 63) | def forward(self, x):
function zero_module (line 67) | def zero_module(module):
function Normalize (line 76) | def Normalize(in_channels):
class LinearAttention (line 80) | class LinearAttention(nn.Module):
method __init__ (line 81) | def __init__(self, dim, heads=4, dim_head=32):
method forward (line 88) | def forward(self, x):
class SpatialSelfAttention (line 99) | class SpatialSelfAttention(nn.Module):
method __init__ (line 100) | def __init__(self, in_channels):
method forward (line 126) | def forward(self, x):
class CrossAttention (line 152) | class CrossAttention(nn.Module):
method __init__ (line 153) | def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, ...
method forward (line 170) | def forward(self, x, context=None, mask=None):
class BasicTransformerBlock (line 196) | class BasicTransformerBlock(nn.Module):
method __init__ (line 197) | def __init__(self, dim, n_heads, d_head, dropout=0., context_dim=None,...
method forward (line 208) | def forward(self, x, context=None):
method _forward (line 211) | def _forward(self, x, context=None):
class SpatialTransformer (line 218) | class SpatialTransformer(nn.Module):
method __init__ (line 226) | def __init__(self, in_channels, n_heads, d_head,
method forward (line 250) | def forward(self, x, context=None):
FILE: lidm/modules/basic.py
class CircularPad (line 21) | class CircularPad(nn.Module):
method __init__ (line 22) | def __init__(self, pad_size):
method forward (line 27) | def forward(self, x):
class CircularConv2d (line 35) | class CircularConv2d(nn.Conv2d):
method __init__ (line 36) | def __init__(self, *args, **kwargs):
method forward (line 52) | def forward(self, x: Tensor) -> Tensor:
class ActNorm (line 62) | class ActNorm(nn.Module):
method __init__ (line 63) | def __init__(self, num_features, logdet=False, affine=True,
method initialize (line 74) | def initialize(self, input):
method forward (line 95) | def forward(self, input, reverse=False):
method reverse (line 123) | def reverse(self, output):
function make_beta_schedule (line 147) | def make_beta_schedule(schedule, n_timestep, linear_start=1e-4, linear_e...
function make_ddim_timesteps (line 172) | def make_ddim_timesteps(ddim_discr_method, num_ddim_timesteps, num_ddpm_...
function make_ddim_sampling_parameters (line 188) | def make_ddim_sampling_parameters(alphacums, ddim_timesteps, eta, verbos...
function betas_for_alpha_bar (line 200) | def betas_for_alpha_bar(num_diffusion_timesteps, alpha_bar, max_beta=0.9...
function extract_into_tensor (line 219) | def extract_into_tensor(a, t, x_shape):
function checkpoint (line 225) | def checkpoint(func, inputs, params, flag):
class CheckpointFunction (line 242) | class CheckpointFunction(torch.autograd.Function):
method forward (line 244) | def forward(ctx, run_function, length, *args):
method backward (line 254) | def backward(ctx, *output_grads):
function timestep_embedding (line 274) | def timestep_embedding(timesteps, dim, max_period=10000, repeat_only=Fal...
function zero_module (line 295) | def zero_module(module):
function scale_module (line 304) | def scale_module(module, scale):
function mean_flat (line 313) | def mean_flat(tensor):
function normalization (line 320) | def normalization(channels):
class SiLU (line 330) | class SiLU(nn.Module):
method forward (line 331) | def forward(self, x):
class GroupNorm32 (line 335) | class GroupNorm32(nn.GroupNorm):
method forward (line 336) | def forward(self, x):
function conv_nd (line 340) | def conv_nd(dims, *args, cconv=False, **kwargs):
function linear (line 356) | def linear(*args, **kwargs):
function avg_pool_nd (line 363) | def avg_pool_nd(dims, *args, **kwargs):
class HybridConditioner (line 376) | class HybridConditioner(nn.Module):
method __init__ (line 378) | def __init__(self, c_concat_config, c_crossattn_config):
method forward (line 383) | def forward(self, c_concat, c_crossattn):
function noise_like (line 389) | def noise_like(shape, device, repeat=False):
FILE: lidm/modules/diffusion/model_ldm.py
function get_timestep_embedding (line 12) | def get_timestep_embedding(timesteps, embedding_dim):
function nonlinearity (line 33) | def nonlinearity(x):
function Normalize (line 38) | def Normalize(in_channels, num_groups=32):
class Upsample (line 42) | class Upsample(nn.Module):
method __init__ (line 43) | def __init__(self, in_channels, with_conv):
method forward (line 53) | def forward(self, x):
class Downsample (line 60) | class Downsample(nn.Module):
method __init__ (line 61) | def __init__(self, in_channels, with_conv):
method forward (line 72) | def forward(self, x):
class ResnetBlock (line 82) | class ResnetBlock(nn.Module):
method __init__ (line 83) | def __init__(self, *, in_channels, out_channels=None, conv_shortcut=Fa...
method forward (line 121) | def forward(self, x, temb):
class LinAttnBlock (line 144) | class LinAttnBlock(LinearAttention):
method __init__ (line 147) | def __init__(self, in_channels):
class AttnBlock (line 151) | class AttnBlock(nn.Module):
method __init__ (line 152) | def __init__(self, in_channels):
method forward (line 178) | def forward(self, x):
function make_attn (line 205) | def make_attn(in_channels, attn_type="vanilla"):
class Model (line 216) | class Model(nn.Module):
method __init__ (line 217) | def __init__(self, *, ch, out_ch, ch_mult=(1, 2, 4, 8), num_res_blocks,
method forward (line 312) | def forward(self, x, t=None, context=None):
method get_last_layer (line 360) | def get_last_layer(self):
class Encoder (line 364) | class Encoder(nn.Module):
method __init__ (line 365) | def __init__(self, *, ch, out_ch, ch_mult=(1, 2, 4, 8), num_res_blocks,
method forward (line 427) | def forward(self, x):
class Decoder (line 454) | class Decoder(nn.Module):
method __init__ (line 455) | def __init__(self, *, ch, out_ch, ch_mult=(1, 2, 4, 8), num_res_blocks,
method forward (line 521) | def forward(self, z):
class SimpleDecoder (line 556) | class SimpleDecoder(nn.Module):
method __init__ (line 557) | def __init__(self, in_channels, out_channels, *args, **kwargs):
method forward (line 579) | def forward(self, x):
class UpsampleDecoder (line 592) | class UpsampleDecoder(nn.Module):
method __init__ (line 593) | def __init__(self, in_channels, out_channels, ch, num_res_blocks,
method forward (line 623) | def forward(self, x):
class LatentRescaler (line 637) | class LatentRescaler(nn.Module):
method __init__ (line 638) | def __init__(self, factor, in_channels, mid_channels, out_channels, de...
method forward (line 662) | def forward(self, x):
class MergedRescaleEncoder (line 675) | class MergedRescaleEncoder(nn.Module):
method __init__ (line 676) | def __init__(self, in_channels, ch, out_ch, num_res_blocks,
method forward (line 688) | def forward(self, x):
class MergedRescaleDecoder (line 694) | class MergedRescaleDecoder(nn.Module):
method __init__ (line 695) | def __init__(self, z_channels, out_ch, num_res_blocks, attn_levels, ch...
method forward (line 705) | def forward(self, x):
class Upsampler (line 711) | class Upsampler(nn.Module):
method __init__ (line 712) | def __init__(self, in_size, out_size, in_channels, out_channels, ch_mu...
method forward (line 725) | def forward(self, x):
class Resize (line 731) | class Resize(nn.Module):
method __init__ (line 732) | def __init__(self, in_channels=None, learned=False, mode="bilinear"):
method forward (line 747) | def forward(self, x, scale_factor=1.0):
class FirstStagePostProcessor (line 755) | class FirstStagePostProcessor(nn.Module):
method __init__ (line 757) | def __init__(self, ch_mult: list, in_channels,
method instantiate_pretrained (line 791) | def instantiate_pretrained(self, config):
method encode_with_pretrained (line 799) | def encode_with_pretrained(self, x):
method forward (line 805) | def forward(self, x):
FILE: lidm/modules/diffusion/model_lidm.py
function get_timestep_embedding (line 14) | def get_timestep_embedding(timesteps, embedding_dim):
function nonlinearity (line 35) | def nonlinearity(x):
function Normalize (line 40) | def Normalize(in_channels, num_groups=32):
class Upsample (line 48) | class Upsample(nn.Module):
method __init__ (line 49) | def __init__(self, in_channels, with_conv, stride):
method forward (line 57) | def forward(self, x):
class Downsample (line 68) | class Downsample(nn.Module):
method __init__ (line 69) | def __init__(self, in_channels, with_conv, stride):
method forward (line 77) | def forward(self, x):
class ResnetBlock (line 88) | class ResnetBlock(nn.Module):
method __init__ (line 89) | def __init__(self, *, in_channels, out_channels=None, kernel_size=(3, ...
method forward (line 127) | def forward(self, x, temb):
class LinAttnBlock (line 150) | class LinAttnBlock(LinearAttention):
method __init__ (line 153) | def __init__(self, in_channels):
class AttnBlock (line 157) | class AttnBlock(nn.Module):
method __init__ (line 158) | def __init__(self, in_channels):
method forward (line 184) | def forward(self, x):
function make_attn (line 211) | def make_attn(in_channels, attn_type="vanilla"):
class Encoder (line 222) | class Encoder(nn.Module):
method __init__ (line 223) | def __init__(self, *, ch, out_ch, ch_mult, strides, num_res_blocks,
method forward (line 287) | def forward(self, x):
class Decoder (line 315) | class Decoder(nn.Module):
method __init__ (line 316) | def __init__(self, *, ch, out_ch, ch_mult, strides, num_res_blocks, at...
method forward (line 385) | def forward(self, z):
class SimpleDecoder (line 420) | class SimpleDecoder(nn.Module):
method __init__ (line 421) | def __init__(self, in_channels, out_channels, *args, **kwargs):
method forward (line 443) | def forward(self, x):
class UpsampleDecoder (line 456) | class UpsampleDecoder(nn.Module):
method __init__ (line 457) | def __init__(self, in_channels, out_channels, ch, num_res_blocks,
method forward (line 488) | def forward(self, x):
class LatentRescaler (line 502) | class LatentRescaler(nn.Module):
method __init__ (line 503) | def __init__(self, factor, in_channels, mid_channels, out_channels, de...
method forward (line 527) | def forward(self, x):
class MergedRescaleEncoder (line 540) | class MergedRescaleEncoder(nn.Module):
method __init__ (line 541) | def __init__(self, in_channels, ch, out_ch, num_res_blocks,
method forward (line 552) | def forward(self, x):
class MergedRescaleDecoder (line 558) | class MergedRescaleDecoder(nn.Module):
method __init__ (line 559) | def __init__(self, z_channels, out_ch, num_res_blocks, attn_levels, ch...
method forward (line 569) | def forward(self, x):
class Upsampler (line 575) | class Upsampler(nn.Module):
method __init__ (line 576) | def __init__(self, in_size, out_size, in_channels, out_channels, ch_mu...
method forward (line 589) | def forward(self, x):
class Resize (line 595) | class Resize(nn.Module):
method __init__ (line 596) | def __init__(self, in_channels=None, learned=False, mode="bilinear"):
method forward (line 611) | def forward(self, x, scale_factor=1.0):
class FirstStagePostProcessor (line 619) | class FirstStagePostProcessor(nn.Module):
method __init__ (line 621) | def __init__(self, ch_mult: list, in_channels,
method instantiate_pretrained (line 655) | def instantiate_pretrained(self, config):
method encode_with_pretrained (line 663) | def encode_with_pretrained(self, x):
method forward (line 669) | def forward(self, x):
FILE: lidm/modules/diffusion/openaimodel.py
function convert_module_to_f16 (line 22) | def convert_module_to_f16(x):
function convert_module_to_f32 (line 26) | def convert_module_to_f32(x):
class AttentionPool2d (line 31) | class AttentionPool2d(nn.Module):
method __init__ (line 36) | def __init__(
method forward (line 50) | def forward(self, x):
class TimestepBlock (line 61) | class TimestepBlock(nn.Module):
method forward (line 67) | def forward(self, x, emb):
class TimestepEmbedSequential (line 73) | class TimestepEmbedSequential(nn.Sequential, TimestepBlock):
method forward (line 79) | def forward(self, x, emb, context=None):
class Upsample (line 90) | class Upsample(nn.Module):
method __init__ (line 99) | def __init__(self, channels, use_conv, dims=2, out_channels=None, padd...
method forward (line 108) | def forward(self, x):
class TransposedUpsample (line 121) | class TransposedUpsample(nn.Module):
method __init__ (line 124) | def __init__(self, channels, out_channels=None, ks=5):
method forward (line 131) | def forward(self, x):
class Downsample (line 135) | class Downsample(nn.Module):
method __init__ (line 144) | def __init__(self, channels, use_conv, dims=2, out_channels=None, padd...
method forward (line 159) | def forward(self, x):
class ResBlock (line 164) | class ResBlock(TimestepBlock):
method __init__ (line 180) | def __init__(
method forward (line 245) | def forward(self, x, emb):
method _forward (line 256) | def _forward(self, x, emb):
class AttentionBlock (line 279) | class AttentionBlock(nn.Module):
method __init__ (line 286) | def __init__(
method forward (line 315) | def forward(self, x):
method _forward (line 320) | def _forward(self, x):
function count_flops_attn (line 329) | def count_flops_attn(model, _x, y):
class QKVAttentionLegacy (line 349) | class QKVAttentionLegacy(nn.Module):
method __init__ (line 354) | def __init__(self, n_heads):
method forward (line 358) | def forward(self, qkv):
method count_flops (line 377) | def count_flops(model, _x, y):
class QKVAttention (line 381) | class QKVAttention(nn.Module):
method __init__ (line 386) | def __init__(self, n_heads):
method forward (line 390) | def forward(self, qkv):
method count_flops (line 411) | def count_flops(model, _x, y):
class UNetModel (line 415) | class UNetModel(nn.Module):
method __init__ (line 445) | def __init__(
method convert_to_fp16 (line 703) | def convert_to_fp16(self):
method convert_to_fp32 (line 711) | def convert_to_fp32(self):
method forward (line 719) | def forward(self, x, timesteps=None, context=None, y=None, **kwargs):
class EncoderUNetModel (line 754) | class EncoderUNetModel(nn.Module):
method __init__ (line 760) | def __init__(
method convert_to_fp16 (line 935) | def convert_to_fp16(self):
method convert_to_fp32 (line 942) | def convert_to_fp32(self):
method forward (line 949) | def forward(self, x, timesteps):
FILE: lidm/modules/distributions/distributions.py
class AbstractDistribution (line 5) | class AbstractDistribution:
method sample (line 6) | def sample(self):
method mode (line 9) | def mode(self):
class DiracDistribution (line 13) | class DiracDistribution(AbstractDistribution):
method __init__ (line 14) | def __init__(self, value):
method sample (line 17) | def sample(self):
method mode (line 20) | def mode(self):
class DiagonalGaussianDistribution (line 24) | class DiagonalGaussianDistribution(object):
method __init__ (line 25) | def __init__(self, parameters, deterministic=False):
method sample (line 35) | def sample(self):
method kl (line 39) | def kl(self, other=None):
method nll (line 53) | def nll(self, sample, dims=[1,2,3]):
method mode (line 61) | def mode(self):
function normal_kl (line 65) | def normal_kl(mean1, logvar1, mean2, logvar2):
FILE: lidm/modules/ema.py
class LitEma (line 5) | class LitEma(nn.Module):
method __init__ (line 6) | def __init__(self, model, decay=0.9999, use_num_upates=True):
method forward (line 25) | def forward(self, model):
method copy_to (line 46) | def copy_to(self, model):
method store (line 55) | def store(self, parameters):
method restore (line 64) | def restore(self, parameters):
FILE: lidm/modules/encoders/modules.py
class AbstractEncoder (line 11) | class AbstractEncoder(nn.Module):
method __init__ (line 12) | def __init__(self):
method encode (line 15) | def encode(self, *args, **kwargs):
class ClassEmbedder (line 19) | class ClassEmbedder(nn.Module):
method __init__ (line 20) | def __init__(self, embed_dim, n_classes=1000, key='class'):
method forward (line 25) | def forward(self, batch, key=None):
class TransformerEmbedder (line 34) | class TransformerEmbedder(AbstractEncoder):
method __init__ (line 37) | def __init__(self, n_embed, n_layer, vocab_size, max_seq_len=77, devic...
method forward (line 43) | def forward(self, tokens):
method encode (line 48) | def encode(self, x):
class BERTTokenizer (line 52) | class BERTTokenizer(AbstractEncoder):
method __init__ (line 55) | def __init__(self, device="cuda", vq_interface=True, max_length=77):
method forward (line 64) | def forward(self, text):
method encode (line 71) | def encode(self, text):
method decode (line 77) | def decode(self, text):
class BERTEmbedder (line 81) | class BERTEmbedder(AbstractEncoder):
method __init__ (line 84) | def __init__(self, n_embed, n_layer, vocab_size=30522, max_seq_len=77,
method forward (line 95) | def forward(self, text):
method encode (line 103) | def encode(self, text):
class SpatialRescaler (line 108) | class SpatialRescaler(nn.Module):
method __init__ (line 109) | def __init__(self,
method forward (line 124) | def forward(self, x):
method encode (line 132) | def encode(self, x):
class FrozenCLIPTextEmbedder (line 136) | class FrozenCLIPTextEmbedder(nn.Module):
method __init__ (line 141) | def __init__(self, version='ViT-L/14', device="cuda", max_length=77, n...
method freeze (line 150) | def freeze(self):
method forward (line 155) | def forward(self, text):
method encode (line 162) | def encode(self, text):
class FrozenClipMultiTextEmbedder (line 170) | class FrozenClipMultiTextEmbedder(FrozenCLIPTextEmbedder):
method __init__ (line 171) | def __init__(self, num_views=1, apply_all=False, **kwargs):
method encode (line 176) | def encode(self, text):
class FrozenClipImageEmbedder (line 190) | class FrozenClipImageEmbedder(nn.Module):
method __init__ (line 195) | def __init__(
method init (line 211) | def init(self):
method preprocess (line 216) | def preprocess(self, x):
method forward (line 226) | def forward(self, x):
class FrozenClipMultiImageEmbedder (line 231) | class FrozenClipMultiImageEmbedder(FrozenClipImageEmbedder):
method __init__ (line 236) | def __init__(self, num_views=1, split_per_view=1, img_dim=768, out_dim...
method forward (line 243) | def forward(self, x):
class FrozenClipImagePatchEmbedder (line 261) | class FrozenClipImagePatchEmbedder(nn.Module):
method __init__ (line 266) | def __init__(
method init (line 289) | def init(self):
method preprocess (line 294) | def preprocess(self, x):
method encode_image_patch (line 304) | def encode_image_patch(self, x):
method forward (line 321) | def forward(self, x):
FILE: lidm/modules/image_degradation/bsrgan.py
function modcrop_np (line 29) | def modcrop_np(img, sf):
function analytic_kernel (line 49) | def analytic_kernel(k):
function anisotropic_Gaussian (line 65) | def anisotropic_Gaussian(ksize=15, theta=np.pi, l1=6, l2=6):
function gm_blur_kernel (line 86) | def gm_blur_kernel(mean, cov, size=15):
function shift_pixel (line 99) | def shift_pixel(x, sf, upper_left=True):
function blur (line 128) | def blur(x, k):
function gen_kernel (line 145) | def gen_kernel(k_size=np.array([15, 15]), scale_factor=np.array([4, 4]),...
function fspecial_gaussian (line 187) | def fspecial_gaussian(hsize, sigma):
function fspecial_laplacian (line 201) | def fspecial_laplacian(alpha):
function fspecial (line 210) | def fspecial(filter_type, *args, **kwargs):
function bicubic_degradation (line 228) | def bicubic_degradation(x, sf=3):
function srmd_degradation (line 240) | def srmd_degradation(x, k, sf=3):
function dpsr_degradation (line 262) | def dpsr_degradation(x, k, sf=3):
function classical_degradation (line 284) | def classical_degradation(x, k, sf=3):
function add_sharpening (line 299) | def add_sharpening(img, weight=0.5, radius=50, threshold=10):
function add_blur (line 325) | def add_blur(img, sf=4):
function add_resize (line 339) | def add_resize(img, sf=4):
function add_Gaussian_noise (line 369) | def add_Gaussian_noise(img, noise_level1=2, noise_level2=25):
function add_speckle_noise (line 386) | def add_speckle_noise(img, noise_level1=2, noise_level2=25):
function add_Poisson_noise (line 404) | def add_Poisson_noise(img):
function add_JPEG_noise (line 418) | def add_JPEG_noise(img):
function random_crop (line 427) | def random_crop(lq, hq, sf=4, lq_patchsize=64):
function degradation_bsrgan (line 438) | def degradation_bsrgan(img, sf=4, lq_patchsize=72, isp_model=None):
function degradation_bsrgan_variant (line 530) | def degradation_bsrgan_variant(image, sf=4, isp_model=None):
function degradation_bsrgan_plus (line 617) | def degradation_bsrgan_plus(img, sf=4, shuffle_prob=0.5, use_sharp=True,...
FILE: lidm/modules/image_degradation/bsrgan_light.py
function modcrop_np (line 29) | def modcrop_np(img, sf):
function analytic_kernel (line 49) | def analytic_kernel(k):
function anisotropic_Gaussian (line 65) | def anisotropic_Gaussian(ksize=15, theta=np.pi, l1=6, l2=6):
function gm_blur_kernel (line 86) | def gm_blur_kernel(mean, cov, size=15):
function shift_pixel (line 99) | def shift_pixel(x, sf, upper_left=True):
function blur (line 128) | def blur(x, k):
function gen_kernel (line 145) | def gen_kernel(k_size=np.array([15, 15]), scale_factor=np.array([4, 4]),...
function fspecial_gaussian (line 187) | def fspecial_gaussian(hsize, sigma):
function fspecial_laplacian (line 201) | def fspecial_laplacian(alpha):
function fspecial (line 210) | def fspecial(filter_type, *args, **kwargs):
function bicubic_degradation (line 228) | def bicubic_degradation(x, sf=3):
function srmd_degradation (line 240) | def srmd_degradation(x, k, sf=3):
function dpsr_degradation (line 262) | def dpsr_degradation(x, k, sf=3):
function classical_degradation (line 284) | def classical_degradation(x, k, sf=3):
function add_sharpening (line 299) | def add_sharpening(img, weight=0.5, radius=50, threshold=10):
function add_blur (line 325) | def add_blur(img, sf=4):
function add_resize (line 343) | def add_resize(img, sf=4):
function add_Gaussian_noise (line 373) | def add_Gaussian_noise(img, noise_level1=2, noise_level2=25):
function add_speckle_noise (line 390) | def add_speckle_noise(img, noise_level1=2, noise_level2=25):
function add_Poisson_noise (line 408) | def add_Poisson_noise(img):
function add_JPEG_noise (line 422) | def add_JPEG_noise(img):
function random_crop (line 431) | def random_crop(lq, hq, sf=4, lq_patchsize=64):
function degradation_bsrgan (line 442) | def degradation_bsrgan(img, sf=4, lq_patchsize=72, isp_model=None):
function degradation_bsrgan_variant (line 534) | def degradation_bsrgan_variant(image, sf=4, isp_model=None):
FILE: lidm/modules/image_degradation/utils_image.py
function is_image_file (line 29) | def is_image_file(filename):
function get_timestamp (line 33) | def get_timestamp():
function imshow (line 37) | def imshow(x, title=None, cbar=False, figsize=None):
function surf (line 47) | def surf(Z, cmap='rainbow', figsize=None):
function get_image_paths (line 67) | def get_image_paths(dataroot):
function _get_paths_from_images (line 74) | def _get_paths_from_images(path):
function patches_from_image (line 93) | def patches_from_image(img, p_size=512, p_overlap=64, p_max=800):
function imssave (line 112) | def imssave(imgs, img_path):
function split_imageset (line 125) | def split_imageset(original_dataroot, taget_dataroot, n_channels=3, p_si...
function mkdir (line 153) | def mkdir(path):
function mkdirs (line 158) | def mkdirs(paths):
function mkdir_and_rename (line 166) | def mkdir_and_rename(path):
function imread_uint (line 185) | def imread_uint(path, n_channels=3):
function imsave (line 203) | def imsave(img, img_path):
function imwrite (line 209) | def imwrite(img, img_path):
function read_img (line 220) | def read_img(path):
function uint2single (line 249) | def uint2single(img):
function single2uint (line 254) | def single2uint(img):
function uint162single (line 259) | def uint162single(img):
function single2uint16 (line 264) | def single2uint16(img):
function uint2tensor4 (line 275) | def uint2tensor4(img):
function uint2tensor3 (line 282) | def uint2tensor3(img):
function tensor2uint (line 289) | def tensor2uint(img):
function single2tensor3 (line 302) | def single2tensor3(img):
function single2tensor4 (line 307) | def single2tensor4(img):
function tensor2single (line 312) | def tensor2single(img):
function tensor2single3 (line 320) | def tensor2single3(img):
function single2tensor5 (line 329) | def single2tensor5(img):
function single32tensor5 (line 333) | def single32tensor5(img):
function single42tensor4 (line 337) | def single42tensor4(img):
function tensor2img (line 342) | def tensor2img(tensor, out_type=np.uint8, min_max=(0, 1)):
function augment_img (line 380) | def augment_img(img, mode=0):
function augment_img_tensor4 (line 401) | def augment_img_tensor4(img, mode=0):
function augment_img_tensor (line 422) | def augment_img_tensor(img, mode=0):
function augment_img_np3 (line 441) | def augment_img_np3(img, mode=0):
function augment_imgs (line 469) | def augment_imgs(img_list, hflip=True, rot=True):
function modcrop (line 494) | def modcrop(img_in, scale):
function shave (line 510) | def shave(img_in, border=0):
function rgb2ycbcr (line 529) | def rgb2ycbcr(img, only_y=True):
function ycbcr2rgb (line 553) | def ycbcr2rgb(img):
function bgr2ycbcr (line 573) | def bgr2ycbcr(img, only_y=True):
function channel_convert (line 597) | def channel_convert(in_c, tar_type, img_list):
function calculate_psnr (line 621) | def calculate_psnr(img1, img2, border=0):
function calculate_ssim (line 642) | def calculate_ssim(img1, img2, border=0):
function ssim (line 669) | def ssim(img1, img2):
function cubic (line 700) | def cubic(x):
function calculate_weights_indices (line 708) | def calculate_weights_indices(in_length, out_length, scale, kernel, kern...
function imresize (line 766) | def imresize(img, scale, antialiasing=True):
function imresize_np (line 839) | def imresize_np(img, scale, antialiasing=True):
FILE: lidm/modules/losses/__init__.py
function adopt_weight (line 6) | def adopt_weight(weight, global_step, threshold=0, value=0.):
function hinge_d_loss (line 12) | def hinge_d_loss(logits_real, logits_fake):
function vanilla_d_loss (line 19) | def vanilla_d_loss(logits_real, logits_fake):
function measure_perplexity (line 26) | def measure_perplexity(predicted_indices, n_embed):
function l1 (line 36) | def l1(x, y):
function l2 (line 40) | def l2(x, y):
function square_dist_loss (line 44) | def square_dist_loss(x, y):
function weights_init (line 48) | def weights_init(m):
FILE: lidm/modules/losses/contperceptual.py
class LPIPSWithDiscriminator (line 9) | class LPIPSWithDiscriminator(nn.Module):
method __init__ (line 10) | def __init__(self, disc_start, logvar_init=0.0, kl_weight=1.0, pixello...
method calculate_adaptive_weight (line 33) | def calculate_adaptive_weight(self, nll_loss, g_loss, last_layer=None):
method forward (line 46) | def forward(self, inputs, reconstructions, posteriors, optimizer_idx,
FILE: lidm/modules/losses/discriminator.py
class NLayerDiscriminator (line 8) | class NLayerDiscriminator(nn.Module):
method __init__ (line 12) | def __init__(self, input_nc=1, output_nc=1, ndf=64, n_layers=3, use_ac...
method forward (line 55) | def forward(self, input):
class LiDARNLayerDiscriminator (line 60) | class LiDARNLayerDiscriminator(nn.Module):
method __init__ (line 64) | def __init__(self, input_nc=1, output_nc=1, ndf=64, n_layers=3, use_ac...
method forward (line 107) | def forward(self, input):
class LiDARNLayerDiscriminatorV2 (line 112) | class LiDARNLayerDiscriminatorV2(nn.Module):
method __init__ (line 116) | def __init__(self, input_nc=1, output_nc=1, ndf=64, n_layers=3, use_ac...
method forward (line 160) | def forward(self, input):
class LiDARNLayerDiscriminatorV3 (line 165) | class LiDARNLayerDiscriminatorV3(nn.Module):
method __init__ (line 169) | def __init__(self, input_nc=1, output_nc=1, ndf=64, n_layers=3, use_ac...
method forward (line 213) | def forward(self, input):
FILE: lidm/modules/losses/geometric.py
class GeoConverter (line 9) | class GeoConverter(nn.Module):
method __init__ (line 10) | def __init__(self, curve_length=4, bev_only=False, dataset_config=dict...
method register_conversion (line 26) | def register_conversion(self):
method batch_range2xyz (line 41) | def batch_range2xyz(self, imgs):
method batch_range2bev (line 54) | def batch_range2bev(self, imgs):
method curve_compress (line 66) | def curve_compress(self, batch_coord):
method forward (line 71) | def forward(self, input):
FILE: lidm/modules/losses/perceptual.py
function download (line 31) | def download(url, local_path, chunk_size=1024):
function md5_hash (line 43) | def md5_hash(path):
function get_ckpt_path (line 49) | def get_ckpt_path(name, root, check=False):
class NetLinLayer (line 60) | class NetLinLayer(nn.Module):
method __init__ (line 63) | def __init__(self, chn_in, chn_out=1, use_dropout=False):
class PerceptualLoss (line 70) | class PerceptualLoss(nn.Module):
method __init__ (line 71) | def __init__(self, ptype, depth_scale, log_scale=True, use_dropout=Tru...
method normalize_tensor (line 90) | def normalize_tensor(x, eps=1e-10):
method spatial_average (line 95) | def spatial_average(x, keepdim=True):
method preprocess (line 98) | def preprocess(self, *inputs):
method forward (line 110) | def forward(self, target, input):
FILE: lidm/modules/losses/vqperceptual.py
class VQGeoLPIPSWithDiscriminator (line 12) | class VQGeoLPIPSWithDiscriminator(nn.Module):
method __init__ (line 13) | def __init__(self, disc_start, codebook_weight=1.0, pixelloss_weight=1.0,
method calculate_adaptive_weight (line 69) | def calculate_adaptive_weight(self, nll_loss, g_loss, last_layer=None):
method forward (line 82) | def forward(self, codebook_loss, inputs, reconstructions, optimizer_idx,
FILE: lidm/modules/minkowskinet/model.py
class Model (line 12) | class Model(nn.Module):
method __init__ (line 13) | def __init__(self, config):
method weight_initialization (line 94) | def weight_initialization(self):
method forward (line 100) | def forward(self, data_dict, return_logits=False, return_final_logits=...
FILE: lidm/modules/rangenet/model.py
class BasicBlock (line 10) | class BasicBlock(nn.Module):
method __init__ (line 11) | def __init__(self, inplanes, planes, bn_d=0.1):
method forward (line 22) | def forward(self, x):
class Backbone (line 46) | class Backbone(nn.Module):
method __init__ (line 51) | def __init__(self, params):
method _make_enc_layer (line 126) | def _make_enc_layer(self, block, planes, blocks, stride, bn_d=0.1):
method run_layer (line 145) | def run_layer(self, x, layer, skips, os):
method forward (line 153) | def forward(self, x, return_logits=False, return_list=None):
method get_last_depth (line 203) | def get_last_depth(self):
method get_input_depth (line 206) | def get_input_depth(self):
class Decoder (line 210) | class Decoder(nn.Module):
method __init__ (line 215) | def __init__(self, params, OS=32, feature_depth=1024):
method _make_dec_layer (line 261) | def _make_dec_layer(self, block, planes, bn_d=0.1, stride=2):
method run_layer (line 280) | def run_layer(self, x, layer, skips, os):
method forward (line 288) | def forward(self, x, skips, return_logits=False, return_list=None):
method get_last_depth (line 318) | def get_last_depth(self):
class Model (line 322) | class Model(nn.Module):
method __init__ (line 323) | def __init__(self, config):
method load_pretrained_weights (line 330) | def load_pretrained_weights(self, path):
method forward (line 338) | def forward(self, x, return_logits=False, return_final_logits=False, r...
FILE: lidm/modules/spvcnn/model.py
class Model (line 13) | class Model(nn.Module):
method __init__ (line 14) | def __init__(self, config):
method weight_initialization (line 112) | def weight_initialization(self):
method forward (line 118) | def forward(self, data_dict, return_logits=False, return_final_logits=...
FILE: lidm/modules/ts/basic_blocks.py
class BasicConvolutionBlock (line 12) | class BasicConvolutionBlock(nn.Module):
method __init__ (line 13) | def __init__(self, inc, outc, ks=3, stride=1, dilation=1):
method forward (line 24) | def forward(self, x):
class BasicDeconvolutionBlock (line 29) | class BasicDeconvolutionBlock(nn.Module):
method __init__ (line 30) | def __init__(self, inc, outc, ks=3, stride=1):
method forward (line 42) | def forward(self, x):
class ResidualBlock (line 46) | class ResidualBlock(nn.Module):
method __init__ (line 47) | def __init__(self, inc, outc, ks=3, stride=1, dilation=1):
method forward (line 73) | def forward(self, x):
FILE: lidm/modules/ts/utils.py
function initial_voxelize (line 11) | def initial_voxelize(z, init_res, after_res):
function point_to_voxel (line 34) | def point_to_voxel(x, z):
function voxel_to_point (line 59) | def voxel_to_point(x, z, nearest=False):
FILE: lidm/modules/x_transformer.py
class AbsolutePositionalEmbedding (line 25) | class AbsolutePositionalEmbedding(nn.Module):
method __init__ (line 26) | def __init__(self, dim, max_seq_len):
method init_ (line 31) | def init_(self):
method forward (line 34) | def forward(self, x):
class FixedPositionalEmbedding (line 39) | class FixedPositionalEmbedding(nn.Module):
method __init__ (line 40) | def __init__(self, dim):
method forward (line 45) | def forward(self, x, seq_dim=1, offset=0):
function exists (line 54) | def exists(val):
function default (line 58) | def default(val, d):
function always (line 64) | def always(val):
function not_equals (line 71) | def not_equals(val):
function equals (line 78) | def equals(val):
function max_neg_value (line 85) | def max_neg_value(tensor):
function pick_and_pop (line 91) | def pick_and_pop(keys, d):
function group_dict_by_key (line 96) | def group_dict_by_key(cond, d):
function string_begins_with (line 105) | def string_begins_with(prefix, str):
function group_by_key_prefix (line 109) | def group_by_key_prefix(prefix, d):
function groupby_prefix_and_trim (line 113) | def groupby_prefix_and_trim(prefix, d):
class Scale (line 120) | class Scale(nn.Module):
method __init__ (line 121) | def __init__(self, value, fn):
method forward (line 126) | def forward(self, x, **kwargs):
class Rezero (line 131) | class Rezero(nn.Module):
method __init__ (line 132) | def __init__(self, fn):
method forward (line 137) | def forward(self, x, **kwargs):
class ScaleNorm (line 142) | class ScaleNorm(nn.Module):
method __init__ (line 143) | def __init__(self, dim, eps=1e-5):
method forward (line 149) | def forward(self, x):
class RMSNorm (line 154) | class RMSNorm(nn.Module):
method __init__ (line 155) | def __init__(self, dim, eps=1e-8):
method forward (line 161) | def forward(self, x):
class Residual (line 166) | class Residual(nn.Module):
method forward (line 167) | def forward(self, x, residual):
class GRUGating (line 171) | class GRUGating(nn.Module):
method __init__ (line 172) | def __init__(self, dim):
method forward (line 176) | def forward(self, x, residual):
class GEGLU (line 187) | class GEGLU(nn.Module):
method __init__ (line 188) | def __init__(self, dim_in, dim_out):
method forward (line 192) | def forward(self, x):
class FeedForward (line 197) | class FeedForward(nn.Module):
method __init__ (line 198) | def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0.):
method forward (line 213) | def forward(self, x):
class Attention (line 218) | class Attention(nn.Module):
method __init__ (line 219) | def __init__(
method forward (line 271) | def forward(
class AttentionLayers (line 373) | class AttentionLayers(nn.Module):
method __init__ (line 374) | def __init__(
method forward (line 484) | def forward(
class Encoder (line 544) | class Encoder(AttentionLayers):
method __init__ (line 545) | def __init__(self, **kwargs):
class TransformerWrapper (line 550) | class TransformerWrapper(nn.Module):
method __init__ (line 551) | def __init__(
method init_ (line 597) | def init_(self):
method forward (line 600) | def forward(
FILE: lidm/utils/aug_utils.py
function get_lidar_transform (line 4) | def get_lidar_transform(config, split):
function get_camera_transform (line 13) | def get_camera_transform(config, split):
function get_anno_transform (line 21) | def get_anno_transform(config, split):
class Compose (line 30) | class Compose(object):
method __init__ (line 31) | def __init__(self, transforms):
method __call__ (line 34) | def __call__(self, pcd, pcd1=None):
class RandomFlip (line 40) | class RandomFlip(object):
method __init__ (line 41) | def __init__(self, p=1.):
method __call__ (line 44) | def __call__(self, coord, coord1=None):
class RandomRotateAligned (line 57) | class RandomRotateAligned(object):
method __init__ (line 58) | def __init__(self, rot=np.pi / 4, p=1.):
method __call__ (line 62) | def __call__(self, coord, coord1=None):
class RandomKeypointDrop (line 73) | class RandomKeypointDrop(object):
method __init__ (line 74) | def __init__(self, num_range=(5, 60), p=.5):
method __call__ (line 78) | def __call__(self, center, category=None):
FILE: lidm/utils/lidar_utils.py
function pcd2coord2d (line 6) | def pcd2coord2d(pcd, fov, depth_range, labels=None):
function pcd2range (line 41) | def pcd2range(pcd, size, fov, depth_range, remission=None, labels=None, ...
function range2pcd (line 97) | def range2pcd(range_img, fov, depth_range, depth_scale, log_scale=True, ...
function range2xyz (line 138) | def range2xyz(range_img, fov, depth_range, depth_scale, log_scale=True, ...
function pcd2bev (line 170) | def pcd2bev(pcd, x_range, y_range, z_range, resolution, **kwargs):
FILE: lidm/utils/lr_scheduler.py
class LambdaWarmUpCosineScheduler (line 4) | class LambdaWarmUpCosineScheduler:
method __init__ (line 8) | def __init__(self, warm_up_steps, lr_min, lr_max, lr_start, max_decay_...
method schedule (line 17) | def schedule(self, n, **kwargs):
method __call__ (line 32) | def __call__(self, n, **kwargs):
class LambdaWarmUpCosineScheduler2 (line 36) | class LambdaWarmUpCosineScheduler2:
method __init__ (line 41) | def __init__(self, warm_up_steps, f_min, f_max, f_start, cycle_lengths...
method find_in_interval (line 52) | def find_in_interval(self, n):
method schedule (line 59) | def schedule(self, n, **kwargs):
method __call__ (line 77) | def __call__(self, n, **kwargs):
class LambdaLinearScheduler (line 81) | class LambdaLinearScheduler(LambdaWarmUpCosineScheduler2):
method schedule (line 83) | def schedule(self, n, **kwargs):
FILE: lidm/utils/misc_utils.py
function set_seed (line 19) | def set_seed(seed):
function print_fn (line 35) | def print_fn(msg, verbose):
function dict2namespace (line 40) | def dict2namespace(config):
function log_txt_as_img (line 51) | def log_txt_as_img(wh, xc, size=10):
function isdepth (line 75) | def isdepth(x):
function ismap (line 81) | def ismap(x):
function isimage (line 87) | def isimage(x):
function exists (line 93) | def exists(x):
function default (line 97) | def default(val, d):
function mean_flat (line 103) | def mean_flat(tensor):
function count_params (line 111) | def count_params(model, verbose=False):
function instantiate_from_config (line 118) | def instantiate_from_config(config):
function get_obj_from_str (line 128) | def get_obj_from_str(string, reload=False):
function _do_parallel_data_prefetch (line 136) | def _do_parallel_data_prefetch(func, Q, data, idx, idx_to_fn=False):
function parallel_data_prefetch (line 148) | def parallel_data_prefetch(
FILE: lidm/utils/model_utils.py
function build_model (line 18) | def build_model(dataset_name, model_name, device='cpu'):
FILE: main.py
function get_parser (line 32) | def get_parser(**parser_kwargs):
function nondefault_trainer_args (line 134) | def nondefault_trainer_args(opt):
class WrappedDataset (line 141) | class WrappedDataset(Dataset):
method __init__ (line 144) | def __init__(self, dataset):
method __len__ (line 147) | def __len__(self):
method __getitem__ (line 150) | def __getitem__(self, idx):
function worker_init_fn (line 154) | def worker_init_fn(_):
class DataModuleFromConfig (line 170) | class DataModuleFromConfig(pl.LightningDataModule):
method __init__ (line 171) | def __init__(self, batch_size, train=None, validation=None, test=None,...
method prepare_data (line 199) | def prepare_data(self):
method setup (line 203) | def setup(self, stage=None):
method _train_dataloader (line 209) | def _train_dataloader(self):
method _val_dataloader (line 219) | def _val_dataloader(self, shuffle=False):
method _test_dataloader (line 230) | def _test_dataloader(self, shuffle=False):
method _predict_dataloader (line 243) | def _predict_dataloader(self, shuffle=False):
class SetupCallback (line 252) | class SetupCallback(Callback):
method __init__ (line 253) | def __init__(self, resume, now, logdir, ckptdir, cfgdir, config, light...
method on_keyboard_interrupt (line 263) | def on_keyboard_interrupt(self, trainer, pl_module):
method on_pretrain_routine_start (line 269) | def on_pretrain_routine_start(self, trainer, pl_module):
class ImageLogger (line 301) | class ImageLogger(Callback):
method __init__ (line 302) | def __init__(self, batch_frequency, max_images, clamp=True, increase_l...
method _testtube (line 323) | def _testtube(self, pl_module, images, batch_idx, split):
method log_local (line 334) | def log_local(self, save_dir, split, images,
method log_img (line 362) | def log_img(self, pl_module, batch, batch_idx, split="train"):
method check_frequency (line 394) | def check_frequency(self, check_idx, split):
method on_train_batch_end (line 406) | def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch...
method on_validation_batch_end (line 410) | def on_validation_batch_end(self, trainer, pl_module, outputs, batch, ...
class CUDACallback (line 418) | class CUDACallback(Callback):
method on_train_epoch_start (line 420) | def on_train_epoch_start(self, trainer, pl_module):
method on_train_epoch_end (line 426) | def on_train_epoch_end(self, trainer, pl_module, outputs):
function melk (line 729) | def melk(*args, **kwargs):
function divein (line 737) | def divein(*args, **kwargs):
FILE: scripts/eval_ae.py
function custom_to_pcd (line 38) | def custom_to_pcd(x, config, rgb=None):
function custom_to_pil (line 50) | def custom_to_pil(x):
function custom_to_np (line 62) | def custom_to_np(x):
function logs2pil (line 69) | def logs2pil(logs, keys=["sample"]):
function run (line 86) | def run(model, dataloader, imglogdir, pcdlogdir, nplog=None, config=None...
function save_logs (line 104) | def save_logs(logs, imglogdir, pcdlogdir, num, n_saved=0, key_list=None,...
function get_parser (line 121) | def get_parser():
function load_model_from_config (line 177) | def load_model_from_config(config, sd):
function load_model (line 185) | def load_model(config, ckpt):
function test_collate_fn (line 198) | def test_collate_fn(data):
FILE: scripts/sample.py
function custom_to_pcd (line 29) | def custom_to_pcd(x, config):
function custom_to_pil (line 38) | def custom_to_pil(x):
function custom_to_np (line 48) | def custom_to_np(x):
function logs2pil (line 55) | def logs2pil(logs, keys=["samples"]):
function convsample (line 73) | def convsample(model, shape, return_intermediates=True, verbose=True, ma...
function convsample_ddim (line 81) | def convsample_ddim(model, steps, shape, eta=1.0, verbose=False):
function make_convolutional_sample (line 90) | def make_convolutional_sample(model, batch_size, image_size=None, vanill...
function run (line 113) | def run(model, imglogdir, pcdlogdir, batch_size=50, image_size=None, van...
function save_logs (line 141) | def save_logs(logs, imglogdir, pcdlogdir, n_saved=0, key="samples", np_p...
function get_parser (line 165) | def get_parser():
function load_model_from_config (line 268) | def load_model_from_config(config, sd):
function load_model (line 276) | def load_model(config, ckpt):
function visualize (line 289) | def visualize(samples, logdir):
function test_collate_fn (line 298) | def test_collate_fn(data):
FILE: scripts/sample_cond.py
function custom_to_pcd (line 32) | def custom_to_pcd(x, config, rgb=None):
function custom_to_pil (line 44) | def custom_to_pil(x):
function logs2pil (line 56) | def logs2pil(logs, keys=["sample"]):
function run (line 73) | def run(model, dataloader, imglogdir, pcdlogdir, nplog=None, config=None...
function save_logs (line 91) | def save_logs(logs, imglogdir, pcdlogdir, num, n_saved=0, key_list=None,...
function get_parser (line 115) | def get_parser():
function load_model_from_config (line 205) | def load_model_from_config(config, sd):
function load_model (line 213) | def load_model(config, ckpt):
function visualize (line 225) | def visualize(samples, logdir):
function test_collate_fn (line 234) | def test_collate_fn(data):
function traverse_collate_fn (line 247) | def traverse_collate_fn(data):
FILE: scripts/text2lidar.py
function custom_to_pcd (line 32) | def custom_to_pcd(x, config, rgb=None):
function custom_to_pil (line 44) | def custom_to_pil(x):
function custom_to_np (line 56) | def custom_to_np(x):
function logs2pil (line 63) | def logs2pil(logs, keys=["sample"]):
function convsample (line 81) | def convsample(model, cond, shape, return_intermediates=True, verbose=Tr...
function convsample_ddim (line 89) | def convsample_ddim(model, cond, steps, shape, eta=1.0, verbose=False):
function make_convolutional_sample (line 98) | def make_convolutional_sample(model, cond, batch_size, vanilla=False, cu...
function run (line 121) | def run(model, text_encoder, prompt, imglogdir, pcdlogdir, custom_steps=...
function save_logs (line 142) | def save_logs(logs, imglogdir, pcdlogdir, n_saved=0, key="sample", np_pa...
function get_parser (line 158) | def get_parser():
function load_model_from_config (line 259) | def load_model_from_config(config, sd):
function load_model (line 267) | def load_model(config, ckpt):
function build_text_encoder (line 279) | def build_text_encoder(num_views, apply_all):
function visualize (line 285) | def visualize(samples, logdir):
function test_collate_fn (line 294) | def test_collate_fn(data):
Condensed preview — 124 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (784K chars).
[
{
"path": ".gitignore",
"chars": 3077,
"preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packagi"
},
{
"path": "DESIGN.md",
"chars": 13367,
"preview": "# Study on Design of LiDAR Compression \n\nAll the following experiments are conducted with 4 NVIDIA 3090 GPUs on KITTI-36"
},
{
"path": "LICENSE",
"chars": 1066,
"preview": "MIT License\n\nCopyright (c) 2024 Haoxi Ran\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\n"
},
{
"path": "README.md",
"chars": 21553,
"preview": "<div align=\"center\">\n<h1>LiDAR Diffusion Models [CVPR 2024]</h1>\n\n[**Haoxi Ran**](https://hancyran.github.io/) · [**Vito"
},
{
"path": "configs/autoencoder/kitti/autoencoder_c2_p4.yaml",
"chars": 1973,
"preview": "model:\n base_learning_rate: 4.5e-6\n target: lidm.models.autoencoder.VQModel\n params:\n monitor: val/rec_loss\n em"
},
{
"path": "configs/lidar_diffusion/kitti/uncond_c2_p4.yaml",
"chars": 2335,
"preview": "model:\n base_learning_rate: 1.0e-06\n target: lidm.models.diffusion.ddpm.LatentDiffusion\n params:\n linear_start: 0."
},
{
"path": "data/config/semantic-kitti.yaml",
"chars": 5539,
"preview": "# This file is covered by the LICENSE file in the root of this project.\nlabels: \n 0 : \"unlabeled\"\n 1 : \"outlier\"\n 10:"
},
{
"path": "init/create_env.sh",
"chars": 850,
"preview": "#!/usr/bin/bash\n\n# install rust compiler\ncurl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y\nexport "
},
{
"path": "lidm/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "lidm/data/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "lidm/data/annotated_dataset.py",
"chars": 1936,
"preview": "from pathlib import Path\nfrom typing import Optional, List, Dict, Union, Any\nimport warnings\n\nfrom torch.utils.data impo"
},
{
"path": "lidm/data/base.py",
"chars": 3982,
"preview": "import pdb\nfrom abc import abstractmethod\nfrom functools import partial\n\nimport PIL\nimport numpy as np\nfrom PIL import I"
},
{
"path": "lidm/data/conditional_builder/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "lidm/data/conditional_builder/objects_bbox.py",
"chars": 2965,
"preview": "from itertools import cycle\nfrom typing import List, Tuple, Callable, Optional\n\nfrom PIL import Image as pil_image, Imag"
},
{
"path": "lidm/data/conditional_builder/objects_center_points.py",
"chars": 6683,
"preview": "import math\nimport random\nimport warnings\nfrom itertools import cycle\nfrom typing import List, Optional, Tuple, Callable"
},
{
"path": "lidm/data/conditional_builder/utils.py",
"chars": 6696,
"preview": "import importlib\nfrom typing import List, Any, Tuple, Optional\n\nimport numpy as np\nfrom ..helper_types import BoundingBo"
},
{
"path": "lidm/data/helper_types.py",
"chars": 729,
"preview": "from typing import Tuple, Optional, NamedTuple, Union, List\nfrom PIL.Image import Image as pil_image\nfrom torch import T"
},
{
"path": "lidm/data/kitti.py",
"chars": 14224,
"preview": "import glob\nimport os\nimport pickle\nimport numpy as np\nimport yaml\nfrom PIL import Image\nimport xml.etree.ElementTree as"
},
{
"path": "lidm/eval/README.md",
"chars": 6670,
"preview": "# Evaluation Toolbox for LiDAR Generation\n\nThis directory is a **self-contained**, **memory-friendly** and mostly **CUDA"
},
{
"path": "lidm/eval/__init__.py",
"chars": 2113,
"preview": "\"\"\"\n@Author: Haoxi Ran\n@Date: 01/03/2024\n@Citation: Towards Realistic Scene Generation with LiDAR Diffusion Models\n\n\"\"\"\n"
},
{
"path": "lidm/eval/compile.sh",
"chars": 120,
"preview": "#!/bin/sh\n\ncd modules/chamfer\npython setup.py build_ext --inplace\n\ncd ../emd\npython setup.py build_ext --inplace\n\ncd ..\n"
},
{
"path": "lidm/eval/eval_utils.py",
"chars": 4133,
"preview": "\"\"\"\n@Author: Haoxi Ran\n@Date: 01/03/2024\n@Citation: Towards Realistic Scene Generation with LiDAR Diffusion Models\n\n\"\"\"\n"
},
{
"path": "lidm/eval/fid_score.py",
"chars": 7598,
"preview": "\"\"\"Calculates the Frechet Inception Distance (FID) to evalulate GANs\nThe FID metric calculates the distance between two "
},
{
"path": "lidm/eval/metric_utils.py",
"chars": 17795,
"preview": "\"\"\"\n@Author: Haoxi Ran\n@Date: 01/03/2024\n@Citation: Towards Realistic Scene Generation with LiDAR Diffusion Models\n\n\"\"\"\n"
},
{
"path": "lidm/eval/models/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "lidm/eval/models/minkowskinet/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "lidm/eval/models/minkowskinet/model.py",
"chars": 5222,
"preview": "import torch\nimport torch.nn as nn\n\ntry:\n import torchsparse\n import torchsparse.nn as spnn\n from ..ts import b"
},
{
"path": "lidm/eval/models/rangenet/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "lidm/eval/models/rangenet/model.py",
"chars": 14032,
"preview": "#!/usr/bin/env python3\n# This file is covered by the LICENSE file in the root of this project.\nfrom collections import O"
},
{
"path": "lidm/eval/models/spvcnn/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "lidm/eval/models/spvcnn/model.py",
"chars": 6489,
"preview": "import torch.nn as nn\n\ntry:\n import torchsparse\n import torchsparse.nn as spnn\n from torchsparse import PointTe"
},
{
"path": "lidm/eval/models/ts/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "lidm/eval/models/ts/basic_blocks.py",
"chars": 2179,
"preview": "#!/usr/bin/env python\n# encoding: utf-8\n'''\n@author: Xu Yan\n@file: basic_blocks.py\n@time: 2021/4/14 22:53\n'''\nimport tor"
},
{
"path": "lidm/eval/models/ts/utils.py",
"chars": 3678,
"preview": "import torch\n\ntry:\n import torchsparse.nn.functional as F\n from torchsparse import PointTensor, SparseTensor\n f"
},
{
"path": "lidm/eval/modules/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "lidm/eval/modules/chamfer2D/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "lidm/eval/modules/chamfer2D/chamfer2D.cu",
"chars": 5412,
"preview": "\n#include <stdio.h>\n#include <ATen/ATen.h>\n\n#include <cuda.h>\n#include <cuda_runtime.h>\n\n#include <vector>\n\n\n\n__global__"
},
{
"path": "lidm/eval/modules/chamfer2D/chamfer_cuda.cpp",
"chars": 1072,
"preview": "#include <torch/torch.h>\n#include <vector>\n\n///TMP\n//#include \"common.h\"\n/// NOT TMP\n\t\n\nint chamfer_cuda_forward(at::Ten"
},
{
"path": "lidm/eval/modules/chamfer2D/dist_chamfer_2D.py",
"chars": 2808,
"preview": "from torch import nn\nfrom torch.autograd import Function\nimport torch\nimport importlib\nimport os\n\nchamfer_found = import"
},
{
"path": "lidm/eval/modules/chamfer2D/setup.py",
"chars": 398,
"preview": "from setuptools import setup\nfrom torch.utils.cpp_extension import BuildExtension, CUDAExtension\n\nsetup(\n name='chamf"
},
{
"path": "lidm/eval/modules/chamfer3D/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "lidm/eval/modules/chamfer3D/chamfer3D.cu",
"chars": 5947,
"preview": "\n#include <stdio.h>\n#include <ATen/ATen.h>\n\n#include <cuda.h>\n#include <cuda_runtime.h>\n\n#include <vector>\n\n\n\n__global__"
},
{
"path": "lidm/eval/modules/chamfer3D/chamfer_cuda.cpp",
"chars": 1072,
"preview": "#include <torch/torch.h>\n#include <vector>\n\n///TMP\n//#include \"common.h\"\n/// NOT TMP\n\t\n\nint chamfer_cuda_forward(at::Ten"
},
{
"path": "lidm/eval/modules/chamfer3D/dist_chamfer_3D.py",
"chars": 2386,
"preview": "from torch import nn\nfrom torch.autograd import Function\nimport torch\nimport importlib\nimport os\n\nchamfer_found = import"
},
{
"path": "lidm/eval/modules/chamfer3D/setup.py",
"chars": 398,
"preview": "from setuptools import setup\nfrom torch.utils.cpp_extension import BuildExtension, CUDAExtension\n\nsetup(\n name='chamf"
},
{
"path": "lidm/eval/modules/emd/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "lidm/eval/modules/emd/emd.cpp",
"chars": 1530,
"preview": "// EMD approximation module (based on auction algorithm)\n// author: Minghua Liu\n#include <torch/extension.h>\n#include <v"
},
{
"path": "lidm/eval/modules/emd/emd_cuda.cu",
"chars": 11864,
"preview": "// EMD approximation module (based on auction algorithm)\n// author: Minghua Liu\n#include <stdio.h>\n#include <ATen/ATen.h"
},
{
"path": "lidm/eval/modules/emd/emd_module.py",
"chars": 4462,
"preview": "# EMD approximation module (based on auction algorithm)\n# memory complexity: O(n)\n# time complexity: O(n^2 * iter) \n# au"
},
{
"path": "lidm/eval/modules/emd/setup.py",
"chars": 296,
"preview": "from setuptools import setup\nfrom torch.utils.cpp_extension import BuildExtension, CUDAExtension\n\nsetup(\n name='emd',"
},
{
"path": "lidm/models/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "lidm/models/autoencoder.py",
"chars": 18305,
"preview": "import numpy as np\nimport torch\nimport pytorch_lightning as pl\nimport torch.nn.functional as F\nfrom contextlib import co"
},
{
"path": "lidm/models/diffusion/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "lidm/models/diffusion/classifier.py",
"chars": 10279,
"preview": "import os\nimport torch\nimport pytorch_lightning as pl\nfrom omegaconf import OmegaConf\nfrom torch.nn import functional as"
},
{
"path": "lidm/models/diffusion/ddim.py",
"chars": 11054,
"preview": "\"\"\"SAMPLING ONLY.\"\"\"\n\nimport torch\nimport numpy as np\nfrom tqdm import tqdm\nfrom functools import partial\n\nfrom ...modul"
},
{
"path": "lidm/models/diffusion/ddpm.py",
"chars": 67892,
"preview": "\"\"\"\nwild mixture of\nhttps://github.com/lucidrains/denoising-diffusion-pytorch/blob/7706bdfc6f527f58d33f84b7b522e61e6e316"
},
{
"path": "lidm/models/diffusion/plms.py",
"chars": 12444,
"preview": "\"\"\"SAMPLING ONLY.\"\"\"\n\nimport torch\nimport numpy as np\nfrom tqdm import tqdm\nfrom functools import partial\n\nfrom ...modul"
},
{
"path": "lidm/modules/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "lidm/modules/attention.py",
"chars": 8502,
"preview": "from inspect import isfunction\nimport math\nimport torch\nimport torch.nn.functional as F\nfrom torch import nn, einsum\nfro"
},
{
"path": "lidm/modules/basic.py",
"chars": 13725,
"preview": "# adopted from\n# https://github.com/openai/improved-diffusion/blob/main/improved_diffusion/gaussian_diffusion.py\n# and\n#"
},
{
"path": "lidm/modules/diffusion/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "lidm/modules/diffusion/model_ldm.py",
"chars": 32606,
"preview": "# pytorch_diffusion + derived encoder decoder\nimport math\nimport torch\nimport torch.nn as nn\nimport numpy as np\nfrom ein"
},
{
"path": "lidm/modules/diffusion/model_lidm.py",
"chars": 27412,
"preview": "# pytorch_diffusion + derived encoder decoder\nimport math\n\nimport torch\nimport torch.nn as nn\nimport numpy as np\nfrom ei"
},
{
"path": "lidm/modules/diffusion/openaimodel.py",
"chars": 35814,
"preview": "from abc import abstractmethod\nimport math\n\nimport numpy as np\nimport torch as th\nimport torch.nn as nn\nimport torch.nn."
},
{
"path": "lidm/modules/distributions/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "lidm/modules/distributions/distributions.py",
"chars": 2970,
"preview": "import torch\nimport numpy as np\n\n\nclass AbstractDistribution:\n def sample(self):\n raise NotImplementedError()\n"
},
{
"path": "lidm/modules/ema.py",
"chars": 2969,
"preview": "import torch\nfrom torch import nn\n\n\nclass LitEma(nn.Module):\n def __init__(self, model, decay=0.9999, use_num_upates="
},
{
"path": "lidm/modules/encoders/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "lidm/modules/encoders/modules.py",
"chars": 11198,
"preview": "import torch\nimport torch.nn as nn\nfrom functools import partial\nimport clip\nfrom einops import rearrange, repeat\nimport"
},
{
"path": "lidm/modules/image_degradation/__init__.py",
"chars": 150,
"preview": "from .bsrgan import degradation_bsrgan_variant as degradation_fn_bsr\nfrom .bsrgan_light import degradation_bsrgan_varian"
},
{
"path": "lidm/modules/image_degradation/bsrgan.py",
"chars": 25175,
"preview": "# -*- coding: utf-8 -*-\n\"\"\"\n# --------------------------------------------\n# Super-Resolution\n# ------------------------"
},
{
"path": "lidm/modules/image_degradation/bsrgan_light.py",
"chars": 22215,
"preview": "# -*- coding: utf-8 -*-\nimport numpy as np\nimport cv2\nimport torch\n\nfrom functools import partial\nimport random\nfrom sci"
},
{
"path": "lidm/modules/image_degradation/utils_image.py",
"chars": 29022,
"preview": "import os\nimport math\nimport random\nimport numpy as np\nimport torch\nimport cv2\nfrom torchvision.utils import make_grid\nf"
},
{
"path": "lidm/modules/losses/__init__.py",
"chars": 1607,
"preview": "import torch\nimport torch.nn.functional as F\nfrom torch import nn\n\n\ndef adopt_weight(weight, global_step, threshold=0, v"
},
{
"path": "lidm/modules/losses/contperceptual.py",
"chars": 5394,
"preview": "import torch\nimport torch.nn as nn\n\nfrom . import weights_init, hinge_d_loss, vanilla_d_loss\nfrom .discriminator import "
},
{
"path": "lidm/modules/losses/discriminator.py",
"chars": 9408,
"preview": "import functools\nimport torch.nn as nn\n\n\nfrom ..basic import ActNorm, CircularConv2d\n\n\nclass NLayerDiscriminator(nn.Modu"
},
{
"path": "lidm/modules/losses/geometric.py",
"chars": 3041,
"preview": "from functools import partial\n\nimport numpy as np\nimport torch\nfrom torch import nn\nimport torch.nn.functional as F\n\n\ncl"
},
{
"path": "lidm/modules/losses/perceptual.py",
"chars": 4470,
"preview": "import hashlib\nimport os\n\nimport requests\nimport torch\nimport torch.nn as nn\n\nfrom tqdm import tqdm\n\nfrom . import l1, l"
},
{
"path": "lidm/modules/losses/vqperceptual.py",
"chars": 8802,
"preview": "import torch\nfrom torch import nn\n\nfrom . import weights_init, l1, l2, hinge_d_loss, vanilla_d_loss, measure_perplexity,"
},
{
"path": "lidm/modules/minkowskinet/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "lidm/modules/minkowskinet/model.py",
"chars": 5222,
"preview": "import torch\nimport torch.nn as nn\n\ntry:\n import torchsparse\n import torchsparse.nn as spnn\n from ..ts import b"
},
{
"path": "lidm/modules/rangenet/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "lidm/modules/rangenet/model.py",
"chars": 14032,
"preview": "#!/usr/bin/env python3\n# This file is covered by the LICENSE file in the root of this project.\nfrom collections import O"
},
{
"path": "lidm/modules/spvcnn/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "lidm/modules/spvcnn/model.py",
"chars": 6489,
"preview": "import torch.nn as nn\n\ntry:\n import torchsparse\n import torchsparse.nn as spnn\n from torchsparse import PointTe"
},
{
"path": "lidm/modules/ts/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "lidm/modules/ts/basic_blocks.py",
"chars": 2013,
"preview": "#!/usr/bin/env python\n# encoding: utf-8\n'''\n@author: Xu Yan\n@file: basic_blocks.py\n@time: 2021/4/14 22:53\n'''\nimport tor"
},
{
"path": "lidm/modules/ts/utils.py",
"chars": 3504,
"preview": "import torch\nimport torchsparse.nn.functional as F\nfrom torchsparse import PointTensor, SparseTensor\nfrom torchsparse.nn"
},
{
"path": "lidm/modules/x_transformer.py",
"chars": 20166,
"preview": "\"\"\"shout-out to https://github.com/lucidrains/x-transformers/tree/main/x_transformers\"\"\"\nimport torch\nfrom torch import "
},
{
"path": "lidm/utils/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "lidm/utils/aug_utils.py",
"chars": 3681,
"preview": "import numpy as np\n\n\ndef get_lidar_transform(config, split):\n transform_list = []\n if config['rotate']:\n tr"
},
{
"path": "lidm/utils/lidar_utils.py",
"chars": 7080,
"preview": "import math\n\nimport numpy as np\n\n\ndef pcd2coord2d(pcd, fov, depth_range, labels=None):\n # laser parameters\n fov_up"
},
{
"path": "lidm/utils/lr_scheduler.py",
"chars": 3882,
"preview": "import numpy as np\n\n\nclass LambdaWarmUpCosineScheduler:\n \"\"\"\n note: use with a base_lr of 1.0\n \"\"\"\n def __in"
},
{
"path": "lidm/utils/misc_utils.py",
"chars": 6820,
"preview": "import argparse\nimport importlib\nimport random\n\nimport torch\nimport numpy as np\nfrom collections import abc\nfrom einops "
},
{
"path": "lidm/utils/model_utils.py",
"chars": 1248,
"preview": "import os\n\nimport torch\nimport yaml\n\nfrom lidm.utils.misc_utils import dict2namespace\nfrom ..modules.rangenet.model impo"
},
{
"path": "main.py",
"chars": 30125,
"preview": "import argparse, os, sys, datetime, glob, importlib, csv\nimport numpy as np\nimport time\nimport torch\nimport torchvision\n"
},
{
"path": "models/baseline/kitti/template/config.yaml",
"chars": 646,
"preview": "data:\n target: main.DataModuleFromConfig\n params:\n dataset:\n size: [64, 1024]\n fov: [ 3,-25 ]\n depth"
},
{
"path": "models/baseline/nuscenes/template/config.yaml",
"chars": 652,
"preview": "data:\n target: main.DataModuleFromConfig\n params:\n dataset:\n size: [32, 1024]\n fov: [ 10,-30 ]\n dept"
},
{
"path": "models/first_stage_models/ablate/f_c16/config.yaml",
"chars": 1558,
"preview": "model:\n base_learning_rate: 4.5e-6\n target: lidm.models.autoencoder.VQModel\n params:\n monitor: val/rec_loss\n em"
},
{
"path": "models/first_stage_models/ablate/f_c16_p2/config.yaml",
"chars": 1590,
"preview": "model:\n base_learning_rate: 4.5e-6\n target: lidm.models.autoencoder.VQModel\n params:\n monitor: val/rec_loss\n em"
},
{
"path": "models/first_stage_models/ablate/f_c2_p2/config.yaml",
"chars": 1562,
"preview": "model:\n base_learning_rate: 4.5e-6\n target: lidm.models.autoencoder.VQModel\n params:\n monitor: val/rec_loss\n em"
},
{
"path": "models/first_stage_models/ablate/f_c2_p4/config.yaml",
"chars": 1549,
"preview": "model:\n base_learning_rate: 4.5e-6\n target: lidm.models.autoencoder.VQModel\n params:\n monitor: val/rec_loss\n em"
},
{
"path": "models/first_stage_models/ablate/f_c32/config.yaml",
"chars": 1566,
"preview": "model:\n base_learning_rate: 4.5e-6\n target: lidm.models.autoencoder.VQModel\n params:\n monitor: val/rec_loss\n em"
},
{
"path": "models/first_stage_models/ablate/f_c4/config.yaml",
"chars": 1540,
"preview": "model:\n base_learning_rate: 4.5e-6\n target: lidm.models.autoencoder.VQModel\n params:\n monitor: val/rec_loss\n em"
},
{
"path": "models/first_stage_models/ablate/f_c4_p2/config.yaml",
"chars": 1571,
"preview": "model:\n base_learning_rate: 4.5e-6\n target: lidm.models.autoencoder.VQModel\n params:\n monitor: val/rec_loss\n em"
},
{
"path": "models/first_stage_models/ablate/f_c4_p4/config.yaml",
"chars": 1582,
"preview": "model:\n base_learning_rate: 4.5e-6\n target: lidm.models.autoencoder.VQModel\n params:\n monitor: val/rec_loss\n em"
},
{
"path": "models/first_stage_models/ablate/f_c64/config.yaml",
"chars": 1578,
"preview": "model:\n base_learning_rate: 4.5e-6\n target: lidm.models.autoencoder.VQModel\n params:\n monitor: val/rec_loss\n em"
},
{
"path": "models/first_stage_models/ablate/f_c8/config.yaml",
"chars": 1548,
"preview": "model:\n base_learning_rate: 4.5e-6\n target: lidm.models.autoencoder.VQModel\n params:\n monitor: val/rec_loss\n em"
},
{
"path": "models/first_stage_models/ablate/f_c8_p2/config.yaml",
"chars": 1558,
"preview": "model:\n base_learning_rate: 4.5e-6\n target: lidm.models.autoencoder.VQModel\n params:\n monitor: val/rec_loss\n em"
},
{
"path": "models/first_stage_models/ablate/f_p16/config.yaml",
"chars": 1560,
"preview": "model:\n base_learning_rate: 4.5e-6\n target: lidm.models.autoencoder.VQModel\n params:\n monitor: val/rec_loss\n em"
},
{
"path": "models/first_stage_models/ablate/f_p2/config.yaml",
"chars": 1532,
"preview": "model:\n base_learning_rate: 4.5e-6\n target: lidm.models.autoencoder.VQModel\n params:\n monitor: val/rec_loss\n em"
},
{
"path": "models/first_stage_models/ablate/f_p4/config.yaml",
"chars": 1541,
"preview": "model:\n base_learning_rate: 4.5e-6\n target: lidm.models.autoencoder.VQModel\n params:\n monitor: val/rec_loss\n em"
},
{
"path": "models/first_stage_models/ablate/f_p8/config.yaml",
"chars": 1552,
"preview": "model:\n base_learning_rate: 4.5e-6\n target: lidm.models.autoencoder.VQModel\n params:\n monitor: val/rec_loss\n em"
},
{
"path": "models/first_stage_models/kitti/f_c2_p4/config.yaml",
"chars": 1287,
"preview": "model:\n base_learning_rate: 4.5e-6\n target: lidm.models.autoencoder.VQModel\n params:\n monitor: val/rec_loss\n em"
},
{
"path": "models/first_stage_models/kitti/f_c2_p4_wo_logscale/config.yaml",
"chars": 1286,
"preview": "model:\n base_learning_rate: 4.5e-6\n target: lidm.models.autoencoder.VQModel\n params:\n monitor: val/rec_loss\n em"
},
{
"path": "models/lidm/kitti/cam2lidar/config.yaml",
"chars": 2716,
"preview": "model:\n base_learning_rate: 2.0e-06\n target: lidm.models.diffusion.ddpm.LatentDiffusion\n params:\n linear_start: 0."
},
{
"path": "models/lidm/kitti/sem2lidar/config.yaml",
"chars": 2607,
"preview": "model:\n base_learning_rate: 1.0e-06\n target: lidm.models.diffusion.ddpm.LatentDiffusion\n params:\n linear_start: 0."
},
{
"path": "models/lidm/kitti/text2lidar/config.yaml",
"chars": 2736,
"preview": "model:\n base_learning_rate: 2.0e-06\n target: lidm.models.diffusion.ddpm.LatentDiffusion\n params:\n linear_start: 0."
},
{
"path": "models/lidm/kitti/uncond/config.yaml",
"chars": 2336,
"preview": "model:\n base_learning_rate: 1.0e-06\n target: lidm.models.diffusion.ddpm.LatentDiffusion\n params:\n linear_start: 0."
},
{
"path": "models/lidm/kitti/uncond_wo_logscale/config.yaml",
"chars": 2341,
"preview": "model:\n base_learning_rate: 1.0e-06\n target: lidm.models.diffusion.ddpm.LatentDiffusion\n params:\n linear_start: 0."
},
{
"path": "scripts/eval_ae.py",
"chars": 10160,
"preview": "import math\nimport sys\n\nsys.path.append('./')\n\nimport os, argparse, glob, datetime, yaml\nimport torch\nfrom torch.utils.d"
},
{
"path": "scripts/sample.py",
"chars": 13579,
"preview": "import math\nimport sys\n\nsys.path.append('./')\n\nimport os, argparse, glob, datetime, yaml\nimport torch\nfrom torch.utils.d"
},
{
"path": "scripts/sample_cond.py",
"chars": 11971,
"preview": "import math\nimport sys\n\nsys.path.append('./')\n\nimport os, argparse, glob, datetime, yaml\nimport torch\nfrom torch.utils.d"
},
{
"path": "scripts/text2lidar.py",
"chars": 11637,
"preview": "import math\nimport sys\n\nsys.path.append('./')\n\nimport os, argparse, glob, datetime, yaml\nimport torch\nfrom torch.utils.d"
}
]
About this extraction
This page contains the full source code of the hancyran/LiDAR-Diffusion GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 124 files (733.1 KB), approximately 202.3k tokens, and a symbol index with 986 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.