Repository: CyberAgentAILab/SuperNormal Branch: main Commit: 09e26150f7e8 Files: 135 Total size: 479.3 KB Directory structure: gitextract_10qul1w_/ ├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── config/ │ ├── diligent.conf │ └── own_objects.conf ├── create_env.sh ├── data_capture_and_preprocessing/ │ ├── README.md │ ├── gather_and_convert_normal_map.py │ ├── iPhone_mvps_data_preprocessing.py │ ├── metashape2neus.py │ ├── metashape2neus2_json_and_images.py │ └── sam_mvps.py ├── download_data.sh ├── exp_runner.py ├── models/ │ ├── cd_and_fscore.py │ ├── dataset_loader.py │ ├── fields.py │ └── renderer.py ├── run_diligent.sh ├── run_own_object.sh ├── third_parties/ │ └── nerfacc-0.3.5/ │ └── nerfacc-0.3.5/ │ ├── .github/ │ │ └── workflows/ │ │ ├── building.yml │ │ ├── code_checks.yml │ │ ├── cuda/ │ │ │ ├── cu101-Linux-env.sh │ │ │ ├── cu101-Linux.sh │ │ │ ├── cu101-Windows-env.sh │ │ │ ├── cu101-Windows.sh │ │ │ ├── cu102-Linux-env.sh │ │ │ ├── cu102-Linux.sh │ │ │ ├── cu102-Windows-env.sh │ │ │ ├── cu102-Windows.sh │ │ │ ├── cu111-Linux-env.sh │ │ │ ├── cu111-Linux.sh │ │ │ ├── cu111-Windows-env.sh │ │ │ ├── cu111-Windows.sh │ │ │ ├── cu113-Linux-env.sh │ │ │ ├── cu113-Linux.sh │ │ │ ├── cu113-Windows-env.sh │ │ │ ├── cu113-Windows.sh │ │ │ ├── cu115-Linux-env.sh │ │ │ ├── cu115-Linux.sh │ │ │ ├── cu115-Windows-env.sh │ │ │ ├── cu115-Windows.sh │ │ │ ├── cu116-Linux-env.sh │ │ │ ├── cu116-Linux.sh │ │ │ ├── cu116-Windows-env.sh │ │ │ ├── cu116-Windows.sh │ │ │ ├── cu117-Linux-env.sh │ │ │ ├── cu117-Linux.sh │ │ │ ├── cu117-Windows-env.sh │ │ │ └── cu117-Windows.sh │ │ └── publish.yml │ ├── .gitignore │ ├── .gitmodules │ ├── .pre-commit-config.yaml │ ├── .readthedocs.yaml │ ├── CMakeLists.txt │ ├── LICENSE │ ├── MANIFEST.in │ ├── README.md │ ├── docs/ │ │ ├── Makefile │ │ ├── requirements.txt │ │ └── source/ │ │ ├── _static/ │ │ │ └── css/ │ │ │ └── readthedocs.css │ │ ├── apis/ │ │ │ ├── generated/ │ │ │ │ ├── nerfacc.accumulate_along_rays.rst │ │ │ │ ├── nerfacc.pack_data.rst │ │ │ │ ├── nerfacc.ray_aabb_intersect.rst │ │ │ │ ├── nerfacc.ray_resampling.rst │ │ │ │ ├── nerfacc.render_transmittance_from_alpha.rst │ │ │ │ ├── nerfacc.render_transmittance_from_density.rst │ │ │ │ ├── nerfacc.render_visibility.rst │ │ │ │ ├── nerfacc.render_weight_from_alpha.rst │ │ │ │ ├── nerfacc.render_weight_from_density.rst │ │ │ │ ├── nerfacc.unpack_data.rst │ │ │ │ └── nerfacc.unpack_info.rst │ │ │ ├── grid.rst │ │ │ ├── rendering.rst │ │ │ └── utils.rst │ │ ├── conf.py │ │ ├── examples/ │ │ │ ├── dnerf.rst │ │ │ ├── ngp.rst │ │ │ ├── unbounded.rst │ │ │ └── vanilla.rst │ │ └── index.rst │ ├── examples/ │ │ ├── datasets/ │ │ │ ├── __init__.py │ │ │ ├── dnerf_synthetic.py │ │ │ ├── nerf_360_v2.py │ │ │ ├── nerf_synthetic.py │ │ │ └── utils.py │ │ ├── radiance_fields/ │ │ │ ├── __init__.py │ │ │ ├── mlp.py │ │ │ └── ngp.py │ │ ├── requirements.txt │ │ ├── train_mlp_dnerf.py │ │ ├── train_mlp_nerf.py │ │ ├── train_ngp_nerf.py │ │ └── utils.py │ ├── nerfacc/ │ │ ├── __init__.py │ │ ├── cdf.py │ │ ├── contraction.py │ │ ├── cuda/ │ │ │ ├── __init__.py │ │ │ ├── _backend.py │ │ │ └── csrc/ │ │ │ ├── cdf.cu │ │ │ ├── contraction.cu │ │ │ ├── include/ │ │ │ │ ├── helpers_contraction.h │ │ │ │ ├── helpers_cuda.h │ │ │ │ └── helpers_math.h │ │ │ ├── intersection.cu │ │ │ ├── pack.cu │ │ │ ├── pybind.cu │ │ │ ├── ray_marching.cu │ │ │ ├── render_transmittance.cu │ │ │ ├── render_transmittance_cub.cu │ │ │ └── render_weight.cu │ │ ├── grid.py │ │ ├── intersection.py │ │ ├── losses.py │ │ ├── pack.py │ │ ├── ray_marching.py │ │ ├── sampling.py │ │ ├── version.py │ │ └── vol_rendering.py │ ├── scripts/ │ │ ├── run_aws_listing.py │ │ ├── run_dev_checks.py │ │ └── run_profiler.py │ ├── setup.cfg │ ├── setup.py │ └── tests/ │ ├── test_contraction.py │ ├── test_grid.py │ ├── test_intersection.py │ ├── test_loss.py │ ├── test_pack.py │ ├── test_ray_marching.py │ ├── test_rendering.py │ └── test_resampling.py └── utilities/ └── utils.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ data/ exp/ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ cover/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder .pybuilder/ target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv # For a library or package, you might want to ignore these files since the code is # intended to run in multiple environments; otherwise, check them in: # .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # poetry # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. # This is especially recommended for binary packages to ensure reproducibility, and is more # commonly ignored for libraries. # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control #poetry.lock # pdm # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. #pdm.lock # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it # in version control. # https://pdm.fming.dev/#use-with-ide .pdm.toml # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # pytype static type analyzer .pytype/ # Cython debug symbols cython_debug/ # PyCharm # JetBrains specific template is maintained in a separate JetBrains.gitignore that can # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. .idea/ ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2024 CyberAgent AI Lab Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================

SuperNormal: Neural Surface Reconstruction via Multi-View Normal Integration

Xu Cao · Takafumi Taketomi
CyberAgent

CVPR 2024


arXiv PDF [//]: # ( ) [//]: # ( Project Page)

### Update - **2024/09/30**: Real-world raw data and step-by-step data pre-processing instructions are available. See [here](./data_capture_and_preprocessing/README.md).
Teaser Fast and fine-grained 3D reconstruction from multi-view surface normal maps.
### Quick Start Code was tested on Ubuntu 18.04 (WSL2) using Python 3.8, PyTorch 2.1.0, and CUDA 11.8 on an Nvidia RTX4090Ti (24GB). **Before started, please ensure CUDA is installed in your environment ([11.8 can be found here](https://developer.nvidia.com/cuda-11-8-0-download-archive?target_os=Linux&target_arch=x86_64&Distribution=WSL-Ubuntu&target_version=2.0&target_type=deb_local)).** It is required by [tiny-cuda-nn](https://github.com/NVlabs/tiny-cuda-nn).
You should see something like the following after typing `nvcc --version` ```commandline nvcc: NVIDIA (R) Cuda compiler driver Copyright (c) 2005-2022 NVIDIA Corporation Built on Wed_Sep_21_10:33:58_PDT_2022 Cuda compilation tools, release 11.8, V11.8.89 Build cuda_11.8.r11.8/compiler.31833905_0 ```
Clone the repository and prepare the conda environment: ```commandline git clone https://github.com/CyberAgentAILab/SuperNormal.git cd SuperNormal . ./create_env.sh ``` Download data (~1.8GB): ```commandline ./download_data.sh ``` Run on the DiLiGenT-MV benchmark objects or on our captured objects: ```commandline ./run_diligent.sh # Training should take about 50 seconds per object ``` or ```commandline ./run_own_object.sh # Training should take about 5 minutes per object ``` Results are saved under `./exp`. NOTE: If RuntimeError like below occurs, `apt install ninja-build` may resolve the error. ``` RuntimeError: Ninja is required to load C++ extensions ``` ### Hyperparameter tuning tips Training hyperparameters are defined in `./configs/*.conf`. Some important hyperparameters are: - `dataset.normal_dir`: You can choose normal maps estimated by different methods as input for DiLiGenT-MV benchmark objects. - `train.end_iter`: The number of iterations for training. Should be adjusted according to the number of views and normal map resolutions. - `train.increase_bindwidth_every`: A strategy used in [Neuralangelo](https://research.nvidia.com/labs/dir/neuralangelo/) to progressively activate finer hash grid during training. Less than `end_iter`/`model.encoding.n_levels` should be fine. - `train.batch_size`: Number of patches in each batch for training. Should be adjusted according to the GPU memory. - `train.patch_size`: Better to be fixed to 3, i.e., each patch is 3x3. Large patch size will cause inaccurate volume rendering results for boudary pixels in a patch. ### Modifications to NerfAcc We add several functions to the original [NerfAcc](https://www.nerfacc.com) to adapt it to patch-based volume rendering. The key new functions (which are indicated by `patch_based` in function name) are in [third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/render_weight.cu/](./third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/render_weight.cu) and [third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/vol_rendering.py](./third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/vol_rendering.py). ### Acknowledgement This repo is built up on [NeuS](https://github.com/Totoro97/NeuS) and benefits from the amazing [tiny-cuda-nn](https://github.com/NVlabs/tiny-cuda-nn) and [NerfAcc](https://www.nerfacc.com). We also learned a lot from [instant-nsr-pl](https://github.com/bennyguo/instant-nsr-pl). ### Citation If you find our work useful in your research, please consider citing: ```bibtex @inproceedings{supernormal2024cao, title={SuperNormal: {N}eural Surface Reconstruction via Multi-View Normal Integration}, author={Cao Xu and Taketomi Takafumi}, booktitle={CVPR}, year={2024} } ``` ================================================ FILE: __init__.py ================================================ ================================================ FILE: config/diligent.conf ================================================ general { dataset_class = models.dataset_loader.Dataset renderer_class = models.renderer.NeuSRenderer base_exp_dir = ./exp/diligent_mv/CASE_NAME recording = [ ./, ./models ] } dataset { data_dir = data/diligent_mv_normals/CASE_NAME/ normal_dir = normal_world_space_sdmunips # choose normal maps estimated by different methods, should be in the world space cameras_name = cameras_sphere.npz exclude_views = [0, 4, 8, 12, 16] # index of views to exclude for test purpose, 0-based upsample_factor = 1 } train { learning_rate = 5e-4 learning_rate_alpha = 0.05 end_iter = 5000 increase_bindwidth_every = 350 # following neuralangelo's strategy gradient_method = dfd # dfd or fd or ad, for directional finite difference, finite difference, and auto-differentiation batch_size = 2048 patch_size = 3 # i.e., each training step samples 2048 patches of 3x3 pixels warm_up_end = 50 use_white_bkgd = False loss_type = l2 # for normal loss normal_weight = 1 eikonal_weight = 1 mask_weight = 1 } val { save_freq = 1000 val_normal_freq = 5001 val_normal_resolution_level = 1 gradient_method = dfd # dfd or fd or ad, can be different from training val_mesh_freq = 10000 val_mesh_res = 512 report_freq = 100 eval_metric_freq = 5000 } model { sdf_network { d_out = 1 d_in = 3 d_hidden = 64 n_layers = 1 skip_in = [-1] # -1 for no skip connection bias = 0.6 geometric_init = True weight_norm = True input_concat = True # concat input positions and encoded features } variance_network { init_val = 0.5 } ray_marching { start_step_size = 1e-2 end_step_size = 1e-3 occ_threshold = 0.1 occ_sigmoid_k = 80.0 occ_resolution = 128 occ_update_freq = 8 # batches } encoding{ otype=HashGrid, n_levels=14 n_features_per_level=2 log2_hashmap_size=19 base_resolution=32 per_level_scale=1.3195079107728942 } } ================================================ FILE: config/own_objects.conf ================================================ general { dataset_class = models.dataset_loader.Dataset renderer_class = models.renderer.NeuSRenderer base_exp_dir = ./exp/own_objects/CASE_NAME recording = [ ./, ./models ] } dataset { data_dir = data/own_objects_normals/CASE_NAME/ normal_dir = normal_world_space_sdmunips cameras_name = cameras_sphere.npz exclude_views = [] # index of views to exclude, 0-based upsample_factor = 1 } train { learning_rate = 5e-4 learning_rate_alpha = 0.05 end_iter = 30000 increase_bindwidth_every = 2000 # following neuralangelo's strategy gradient_method = dfd # dfd or fd or ad, for directional finite difference, finite difference, and auto-differentiation batch_size = 2048 patch_size = 3 # i.e., each training step samples 2048 patches of 3x3 pixels warm_up_end = 500 use_white_bkgd = False loss_type = l2 # for normal loss normal_weight = 1 eikonal_weight = 1 mask_weight = 1 } val { save_freq = 10000 val_normal_freq = 30000 val_normal_resolution_level = 2 gradient_method = dfd # dfd or fd or ad, can be different from training val_mesh_freq = 30000 val_mesh_res = 1024 report_freq = 100 eval_metric_freq = 30000 } model { sdf_network { d_out = 1 d_in = 3 d_hidden = 64 n_layers = 1 skip_in = [-1] bias = 0.8 geometric_init = True weight_norm = True input_concat = True # concat input positions and encoded features } variance_network { init_val = 0.5 } ray_marching { start_step_size = 1e-2 end_step_size = 1e-3 occ_threshold = 0.1 occ_sigmoid_k = 80.0 occ_resolution = 128 occ_update_freq = 8 # batches } encoding{ otype=HashGrid, n_levels=14 n_features_per_level=2 log2_hashmap_size=19 base_resolution=32 per_level_scale=1.3195079107728942 } } ================================================ FILE: create_env.sh ================================================ conda deactivate conda remove -y -n sn --all conda create -y -n sn python=3.8 conda activate sn pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu118 # install tiny-cuda-nn export PATH="/usr/local/cuda/bin:$PATH" export LIBRARY_PATH="/usr/local/cuda/lib64/stubs:$LIBRARY_PATH" pip install git+https://github.com/NVlabs/tiny-cuda-nn/@2ec562e853e6f482b5d09168705205f46358fb39#subdirectory=bindings/torch pip install -e ./third_parties/nerfacc-0.3.5/nerfacc-0.3.5/ pip install opencv-python==4.8.1.78 trimesh==3.23.5 open3d==0.17 pyvista==0.42.3 scipy==1.10.1 scikit-image==0.21.0 pyhocon==0.3.59 pyexr==0.3.10 tensorboard==2.14.0 icecream==2.1.3 PyMCubes==0.1.4 pyembree==0.2.11 ================================================ FILE: data_capture_and_preprocessing/README.md ================================================ This is a step-by-step guide to preprocess the raw images captured by an iPhone for the MVPS task. You can download our raw images using the following command *(~6 GB per object)*. ``` gdown 'https://drive.google.com/file/d/1BcCuZR0C-snmCNf8iGhkFgkQ6arfcQ-L/view?usp=sharing' --fuzzy unzip flower_girl.zip rm flower_girl.zip gdown 'https://drive.google.com/file/d/12QzgRbOjBSx295BS4zihnOjcdYh7ZaP9/view?usp=sharing' --fuzzy unzip lion.zip rm lion.zip gdown 'https://drive.google.com/file/d/1cvKbI5VvDhsuA4a06rYqqoAtQd8GtyeI/view?usp=sharing' --fuzzy unzip dog.zip rm dog.zip ``` ## File structure You should have the following file structure under each object's folder: ``` - RAW - mask - cameras.xml ``` The `RAW` folder contains all the DNG images captured by an iPhone. The `mask` folder contains the foreground masks for each view. The `cameras.xml` contains the calibrated camera parameters using [Metashape](https://oakcorp.net/agisoft/download/). ## Step-by-step data pre-processing First we convert the DNG images to PNG file format. ``` # pip install rawpy python iPhone_mvps_data_preprocessing.py --data_dir ``` Now the file structure looks like this ``` - RAW - mvps_png_full - sfm_png_full - mask - cameras.xml ``` The `mvps_png_full` folder contains the pre-processed images for photometric stereo, and the `sfm_png_full` folder contains the images for camera calibration using Structure from Motion. In each view, we first take an image in ambient light and then additionally illuminate the object with an active light source. So the first image in each view is collected in `sfm_png_full`. ### Mask preparation Now we prepare the foreground masks for each view. We used SAM to interactively segment the foreground objects. Please install SAM according to the [official instructions](https://github.com/facebookresearch/segment-anything). After installation, run the following command to segment the foreground objects for all views: ``` python sam_mvps.py --data_dir --checkpoint ``` This will pop up a window where you can interactively segment the foreground objects. Select points on the object to segment the foreground object, and press `Esc` to check the intermediate results. Continue to select points until you are satisfied with the segmentation results, and press `Enter` to save the mask. The process will be repeated for all views. The same mask will be saved in two places: `obj_folder/mask` and the corresponding folder containing the image from the same viewpoint. The latter will be used for normal map estimation. ### Camera calibration In [MetaShape](https://oakcorp.net/agisoft/download/), import the images in the `sfm_png_full` folder and run the camera calibration process. ``` [Workflow] -> [Add Folder] -> select `sfm_png_full` -> select single cameras -> [Workflow] -> [Align Photos] ``` After camera calibration, export the camera parameters to `cameras.xml`. ``` [File] -> [Export] -> [Export Cameras] ``` The resulting `cameras.xml` file is what we have put in the object folder. ### Normal map estimation Install [SDM-UniPS](https://github.com/satoshi-ikehata/SDM-UniPS-CVPR2023) and run the following command to generate the normal maps for each view: ``` python --session_name YOUR_SESSION_NAME --test_dir --checkpoint --scalable --target normal ``` Tips: Prepare the mask for each view to improve the normal estimation results. This should be done when you have completed the previous mask segmentation step. The original SDM-UniPS code outputs normal maps in the PNG format. You can instead get EXR format by replacing [this line](https://github.com/satoshi-ikehata/SDM-UniPS-CVPR2023/blob/96e68f353173c2ae85bfe609e4728a19a2f8c92e/sdm_unips/modules/builder/builder.py#L162) with the following one: ``` pyexr.write(f'{testdata.data.data_workspace}/normal.exr', nout) ``` Remember to install the [pyexr](https://github.com/tvogels/pyexr) package and import it in the file. After normal estimation, we collect the normal maps in the same folder. Since SDM-UniPS estimates normal maps in camera space, we also convert them to the world space using the camera parameters from the previous step. ``` python gather_and_convert_normal_map.py --data_dir --sdm_unips_result_dir ``` The file structure is now as follows: ``` - RAW - mvps_png_full - sfm_png_full - mask - normal_camera_space_sdmunips - normal_world_space_sdmunips - cameras.xml - results # if your SDM-UniPS output is in this folder ``` ### Convert camera parameters to NeuS format The last step is to convert the camera parameters to the NeuS format. ``` python metashape2neus.py --xml_path ``` This will create a `cameras_sphere.npz` file in the same folder as `cameras.xml`. We also provide the converter to NeuS2 format. Check `metashape2neus2_json_and_images.py` for more details. ## Tips for capturing your own data We used the iPhone's built-in camera app to take the images. Here are some tips for successful reconstruction: - Use a tripod to stabilize the camera. - Use a remote shutter release to avoid camera shake. - Keep the same focus point in each view. On iPhone, you can press and hold the screen to lock the focus point. - Use a white/black background to simplify the segmentation process. - Use a turntable to capture the object from different angles. - Place the object on a textured surface to help the Structure from Motion process. - Place the object in the center of the image. - We used a [video light](https://www.ulanzi.com/collections/lighting/products/mini-led-video-light-ulanzi-vl49-1672) to illuminate the object from different angles in each view. Other light sources like a ring light/flashlight may also work. - In each view, vary the light source's position sufficiently around the camera. We used 12 different light positions in our setup. - Reduce the exposure if the captured images are overexposed. The above capture process can be done with off-the-shelf equipment, but it is tedious. It would be more convenient if you could build a custom rig to automate the capture process, such as [this example](https://youtu.be/zyEw-1QUlkU?si=8RvYC23emoP8TXrU). ================================================ FILE: data_capture_and_preprocessing/gather_and_convert_normal_map.py ================================================ import os import cv2 import pyexr from glob import glob import numpy as np import shutil from bs4 import BeautifulSoup # $ pip install beautifulsoup4 lxml import argparse parser = argparse.ArgumentParser() parser.add_argument("--sdm_unips_result_dir", type=str, default="../../SDM-UniPS-CVPR2023/flower_girl/results") parser.add_argument("--data_dir", type=str, default="./flower_girl") args = parser.parse_args() xml_path = os.path.join(args.data_dir, "cameras.xml") obj_name = os.path.basename(args.data_dir) num_views = len(glob(os.path.join(args.sdm_unips_result_dir, "view_*.data"))) normal_map_camera_dir = os.path.join(args.data_dir, "normal_camera_space_sdmunips") normal_map_world_dir = os.path.join(args.data_dir, "normal_world_space_sdmunips") # create directories os.makedirs(normal_map_camera_dir, exist_ok=True) os.makedirs(normal_map_world_dir, exist_ok=True) with open(xml_path, "r") as f: xml_data = f.read() bs_data = BeautifulSoup(xml_data, "xml") b_unique = bs_data.find_all('camera') for tag in b_unique: img_name = tag.get("label") view_idx = int(img_name.split("_")[-1]) # camera to world transform C2W = np.array([float(i) for i in tag.find("transform").text.split(" ")]).reshape((4, 4)) normal_map_all = [] normal_map_path_all = [] for i in range(num_views): view_dir = os.path.join(args.sdm_unips_result_dir, f"view_{i:02d}.data") for tag in b_unique: img_name = tag.get("label") view_idx = int(img_name.split("_")[-1]) # camera to world transform if view_idx == i: C2W = np.array([float(i) for i in tag.find("transform").text.split(" ")]).reshape((4, 4)) R = C2W[:3, :3] break if os.path.exists(view_dir): # copy normal map normal_map_file = os.path.join(view_dir, "normal.exr") new_normal_map_file = os.path.join(normal_map_camera_dir, f"{i:02d}.exr") shutil.copy(normal_map_file, new_normal_map_file) # convert normal map to world space normal_map_camera = pyexr.read(new_normal_map_file) normal_map_camera[..., [1, 2]] *= -1 # revert y and z axis to match opencv conversion, X right, Y down, Z front H, W = normal_map_camera.shape[:2] normal_world = (R @ normal_map_camera.reshape(-1, 3).T).T.reshape([H, W, 3]) pyexr.write(os.path.join(normal_map_world_dir, f"{i:02d}.exr"), normal_world) ================================================ FILE: data_capture_and_preprocessing/iPhone_mvps_data_preprocessing.py ================================================ import rawpy, os from glob import glob import cv2 import numpy as np import os from tqdm import tqdm import argparse parser = argparse.ArgumentParser() parser.add_argument("--data_dir", type=str, default="./flower_girl") parser.add_argument("--num_img_per_view", type=int, default=13) arg = parser.parse_args() dng_list = glob(os.path.join(arg.data_dir, "RAW", "*.DNG")) dng_list.sort() num_image_per_view = arg.num_img_per_view num_view = len(dng_list) // num_image_per_view resize_factor = 1 # resize the png image to 1/2, 1/4, or 1 if resize_factor == 1/2: sfm_data_dir = os.path.join(arg.data_dir, "sfm_png_half") mvps_data_dir = os.path.join(arg.data_dir, "mvps_png_half") elif resize_factor == 1/4: sfm_data_dir = os.path.join(arg.data_dir, "sfm_png_quarter") mvps_data_dir = os.path.join(arg.data_dir, "mvps_png_quarter") elif resize_factor == 1: mvps_data_dir = os.path.join(arg.data_dir, "mvps_png_full") sfm_data_dir = os.path.join(arg.data_dir, "sfm_png_full") os.makedirs(sfm_data_dir, exist_ok=True) os.makedirs(mvps_data_dir, exist_ok=True) for view_idx in tqdm(range(num_view)): view_dir = os.path.join(mvps_data_dir, f"view_{view_idx:02d}.data") if os.path.exists(view_dir): continue os.makedirs(view_dir, exist_ok=True) view_dng_list = dng_list[view_idx * num_image_per_view: (view_idx + 1) * num_image_per_view] for dng_idx, dng_path in enumerate(view_dng_list): with rawpy.imread(dng_path) as raw: rgb = raw.postprocess(no_auto_bright=True, output_bps=16)[..., ::-1].astype(np.float32) rgb = rgb.astype(np.uint16) rgb_resized = cv2.resize(rgb, (0, 0), fx=resize_factor, fy=resize_factor) # choose the first image in each view for SfM if dng_idx == 0: cv2.imwrite(os.path.join(sfm_data_dir, f"{view_idx:02d}.png"), rgb_resized) cv2.imwrite(os.path.join(view_dir, f"L{dng_idx:02d}.png"), rgb_resized) ================================================ FILE: data_capture_and_preprocessing/metashape2neus.py ================================================ import os.path import xml from bs4 import BeautifulSoup # pip install beautifulsoup4 lxml import numpy as np # details of camera normalization can be found in Sec. C.3 in https://openaccess.thecvf.com/content/CVPR2023/supplemental/Cao_Multi-View_Azimuth_Stereo_CVPR_2023_supplemental.pdf def normalize_camera(R_list, t_list, camera2object_ratio=3): A_camera_normalize = 0 b_camera_normalize = 0 camera_center_list = [] for view_idx in range(len(R_list)): R = R_list[view_idx] t = t_list[view_idx] camera_center = - R.T @ t # in world coordinate camera_center_list.append(camera_center) vi = R[2][:, None] # the camera's principal axis in the world coordinates Vi = vi @ vi.T A_camera_normalize += np.eye(3) - Vi b_camera_normalize += camera_center.T @ (np.eye(3) - Vi) offset = np.linalg.lstsq(A_camera_normalize, np.squeeze(b_camera_normalize), rcond=None)[0] camera_center_dist_list = [np.sqrt(np.sum((np.squeeze(c) - offset) ** 2)) for c in camera_center_list] scale = np.max(camera_center_dist_list) / camera2object_ratio return offset, scale def make4x4(P): assert P.shape[-1] == 4 or P.shape[-1] == 3 assert len(P.shape) == 2 assert P.shape[0] == 3 or P.shape[0] == 4 ret = np.eye(4) ret[:P.shape[0], :P.shape[1]] = P return ret class MetashapePoseLoader: def __init__(self, xml_path, camera2object_ratio): with open(xml_path, "r") as f: xml_data = f.read() bs_data = BeautifulSoup(xml_data, "xml") c_unique = bs_data.find_all('resolution') img_width = int(c_unique[0].get("width")) img_height = int(c_unique[0].get("height")) c_intrinsics = bs_data.find_all('calibration') f = float(c_intrinsics[0].find("f").text) cx_offset = float(c_intrinsics[0].find("cx").text) cy_offset = float(c_intrinsics[0].find("cy").text) K = np.array([[f, 0, (img_width-1)/2 + cx_offset], [0, f, (img_height-1)/2 + cy_offset], [0, 0, 1]]) b_unique = bs_data.find_all('camera') R_list = [] t_list = [] C2W_list = [] camera_sphere = dict() for tag in b_unique: img_name = tag.get("label") view_idx = int(img_name.split("_")[-1]) # camera to world transform C2W = np.array([float(i) for i in tag.find("transform").text.split(" ")]).reshape((4, 4)) C2W_list.append(C2W) assert int(img_name) == view_idx W2C = np.linalg.inv(C2W) R_list.append(W2C[:3, :3]) t_list.append(W2C[:3, 3]) camera_sphere[f"world_mat_{view_idx}"] = make4x4(K) @ W2C offset, scale = normalize_camera(R_list, t_list, camera2object_ratio=camera2object_ratio) print("offset", offset, "scale", scale) num_views = len(C2W_list) scale_mat = np.eye(4) scale_mat[:3, :3] *= scale scale_mat[:3, 3] = offset for im_idx in range(num_views): camera_sphere[f"scale_mat_{im_idx}"] = scale_mat data_dir = os.path.dirname(xml_path) np.savez(os.path.join(data_dir, 'cameras_sphere.npz'), **camera_sphere) if __name__=="__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("--xml_path", type=str, required=True) parser.add_argument("--ratio", type=float, default=10) args = parser.parse_args() MetashapePoseLoader(args.xml_path, camera2object_ratio=args.ratio) ================================================ FILE: data_capture_and_preprocessing/metashape2neus2_json_and_images.py ================================================ from glob import glob import os import numpy as np import cv2 from bs4 import BeautifulSoup from metashape2neus import normalize_camera, make4x4 import json import argparse def create_json_file(data, filename): with open(filename, 'w') as f: json.dump(data, f, indent=4) parser = argparse.ArgumentParser() parser.add_argument('--data_dir', type=str, default="./flower_girl") arg = parser.parse_args() data_dir = os.path.join(arg.data_dir, "sfm_png_full") mask_dir = os.path.join(arg.data_dir, "mask") xml_path = os.path.join(arg.data_dir, "cameras.xml") obj_name = os.path.basename(arg.data_dir) target_dir = os.path.join(arg.data_dir, "neus2_input", "images") os.makedirs(target_dir, exist_ok=True) # load images and masks and save them as rgba images img_list = glob(os.path.join(data_dir, "*.png")) img_list.sort() num_view = len(img_list) print(num_view) img_h, img_w = cv2.imread(img_list[0]).shape[:2] for i in range(num_view): img_path = img_list[i] mask_path = os.path.join(mask_dir, f"{i:02d}.png") img = cv2.imread(img_path) mask = cv2.imread(mask_path) img = cv2.cvtColor(img, cv2.COLOR_BGR2BGRA) img[..., 3] = mask[..., 0] new_img_path = os.path.join(target_dir, f"{i:02d}.png") cv2.imwrite(new_img_path, img) print(f"Saved {new_img_path}") data = { "from_na": True, "w": img_w, "h": img_h, "aabb_scale": 1.0, "frames": [], "scale": 1, "offset": [1, 1, 1], } with open(xml_path, "r") as f: xml_data = f.read() bs_data = BeautifulSoup(xml_data, "xml") c_unique = bs_data.find_all('resolution') img_width = int(c_unique[0].get("width")) img_height = int(c_unique[0].get("height")) c_intrinsics = bs_data.find_all('calibration') f = float(c_intrinsics[0].find("f").text) cx_offset = float(c_intrinsics[0].find("cx").text) cy_offset = float(c_intrinsics[0].find("cy").text) K = np.array([[f, 0, (img_width - 1) / 2 + cx_offset], [0, f, (img_height - 1) / 2 + cy_offset], [0, 0, 1]]) b_unique = bs_data.find_all('camera') R_list = [] t_list = [] C2W_list = [] camera_sphere = dict() for tag in b_unique: img_name = tag.get("label") view_idx = int(img_name.split("_")[-1]) # camera to world transform C2W = np.array([float(i) for i in tag.find("transform").text.split(" ")]).reshape((4, 4)) C2W_list.append(C2W) print(img_name, view_idx) W2C = np.linalg.inv(C2W) R_list.append(W2C[:3, :3]) t_list.append(W2C[:3, 3]) camera_sphere[f"world_mat_{view_idx}"] = make4x4(K) @ W2C print(img_name) data["frames"].append({ "file_path": f"images/{img_name}.png", "transform_matrix": C2W.tolist(), "intrinsic_matrix": make4x4(K).tolist() }) offset, scale = normalize_camera(R_list, t_list, camera2object_ratio=10) data["scale"] = scale data["offset"] = list((-offset*scale + 0.5)) create_json_file(data, os.path.join(arg.data_dir, "neus2_input", 'transform.json')) ================================================ FILE: data_capture_and_preprocessing/sam_mvps.py ================================================ import os.path from glob import glob import argparse import torch.cuda from segment_anything import SamPredictor, sam_model_registry parser = argparse.ArgumentParser() parser.add_argument("--checkpoint", type=str, default=None) parser.add_argument("--data_dir", type=str, default="./") args = parser.parse_args() sam = sam_model_registry["vit_h"](checkpoint=args.checkpoint) sam.to(device="cuda") predictor = SamPredictor(sam) import cv2 import numpy as np import matplotlib.pyplot as plt import time from IPython.display import display, clear_output obj_dir = os.listdir(args.data_dir) obj_dir = [os.path.join(args.data_dir, obj) for obj in obj_dir if ".data" in obj] mask_dir = os.path.join(os.path.dirname(os.path.dirname(args.data_dir)), "mask") os.makedirs(mask_dir, exist_ok=True) def pick_point(event, x, y, flags, param): if event == cv2.EVENT_LBUTTONDOWN: print(f'You selected point ({x}, {y})') points.append(np.array([[x, y]])) def show_mask(mask, ax, random_color=False): if random_color: color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0) else: color = np.array([30 / 255, 144 / 255, 255 / 255, 0.6]) h, w = mask.shape[-2:] mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1) ax.imshow(mask_image) def show_points(coords, labels, ax, marker_size=375): pos_points = coords[labels == 1] neg_points = coords[labels == 0] ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25) ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25) def show_box(box, ax): x0, y0 = box[0], box[1] w, h = box[2] - box[0], box[3] - box[1] ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0, 0, 0, 0), lw=2)) for obj_dir_path in obj_dir: mask_path = os.path.join(obj_dir_path, "mask.png") if os.path.exists(mask_path): continue # randomly pick an image from the object directory img_list = glob(os.path.join(obj_dir_path, "*.png")) + glob(os.path.join(obj_dir_path, "*.jpg")) img_test_path = img_list[0] img_test = cv2.imread(img_test_path) predictor.set_image(img_test) torch.cuda.synchronize() points = [] while True: # Create a window cv2.namedWindow('image', cv2.WINDOW_NORMAL) # Bind the callback function to the window cv2.setMouseCallback('image', pick_point) while(1): cv2.imshow('image', img_test) if cv2.waitKey(20) & 0xFF == 27: # Break the loop when 'ESC' is pressed break cv2.destroyAllWindows() print(f'Selected points: {points}') input_point = np.concatenate(points, axis=0).reshape(-1, 2) input_label = np.ones(input_point.shape[0], dtype=np.int64) print(f'Input point: {input_point}') masks, scores, logits = predictor.predict( point_coords=input_point, point_labels=input_label, multimask_output=False, ) for i, (mask, score) in enumerate(zip(masks, scores)): plt.figure(figsize=(10,10)) plt.imshow(img_test[:, :, ::-1]) show_mask(mask, plt.gca()) show_points(input_point, input_label, plt.gca()) plt.title(f"Mask {i+1}, Score: {score:.3f}", fontsize=18) plt.axis('off') plt.show(block=False) plt.pause(3) plt.close() value = input("Press enter to save the mask, or c to continue selecting points: ") if value == "c": continue elif value == "": break # save the mask base_dir = os.path.dirname(img_test_path) view_idx = int(base_dir.split("/")[-1].split(".")[0].split("_")[-1]) mask_path1 = os.path.join(base_dir, "mask.png") mask_path2 = os.path.join(mask_dir, f"{view_idx:02d}.png") cv2.imwrite(mask_path1, mask.astype(np.uint8) * 255) cv2.imwrite(mask_path2, mask.astype(np.uint8) * 255) print(f"Mask saved at {mask_path1} and {mask_path2}") ================================================ FILE: download_data.sh ================================================ pip install gdown==5.1.0 gdown 'https://drive.google.com/file/d/1Y3-v5jo-IRyTsPh8srZxIc2v5WZdPly_/view?usp=sharing' --fuzzy unzip data.zip rm data.zip ================================================ FILE: exp_runner.py ================================================ import os import logging import argparse import numpy as np import cv2 as cv import trimesh import torch import torch.nn.functional as F from torch.utils.tensorboard import SummaryWriter from shutil import copyfile from tqdm.auto import tqdm from pyhocon import ConfigFactory from models.fields import SDFNetwork, SingleVarianceNetwork import pyexr import time from utilities.utils import crop_image_by_mask, toRGBA import open3d as o3d import pyvista as pv pv.set_plot_theme("document") pv.global_theme.transparent_background = True from models.cd_and_fscore import chamfer_distance_and_f1_score import csv from collections import OrderedDict def get_class(kls): parts = kls.split('.') module = ".".join(parts[:-1]) m = __import__(module) for comp in parts[1:]: m = getattr(m, comp) return m class Runner: def __init__(self, conf_text, mode='train', is_continue=False, datadir=None): self.device = torch.device('cuda') self.conf_text = conf_text if not is_continue: exp_time = str(time.strftime('%Y_%m_%d_%H_%M_%S', time.localtime(time.time()))) exp_time_dir = f"exp_{exp_time}" self.conf = ConfigFactory.parse_string(conf_text) self.base_exp_dir = os.path.join(self.conf['general.base_exp_dir'], exp_time_dir) os.makedirs(self.base_exp_dir, exist_ok=True) self.dataset = get_class(self.conf['general.dataset_class'])(self.conf['dataset']) self.iter_step = 0 # Training parameters self.end_iter = self.conf.get_int('train.end_iter') self.batch_size = self.conf.get_int('train.batch_size') self.patch_size = self.conf.get_int('train.patch_size', default=3) self.learning_rate = self.conf.get_float('train.learning_rate') self.learning_rate_alpha = self.conf.get_float('train.learning_rate_alpha') self.use_white_bkgd = self.conf.get_bool('train.use_white_bkgd') self.warm_up_end = self.conf.get_float('train.warm_up_end', default=0.0) self.loss_type = self.conf.get('train.loss_type', 'l1') self.normal_weight = self.conf.get_float('train.normal_weight') self.eikonal_weight = self.conf.get_float('train.eikonal_weight') self.mask_weight = self.conf.get_float('train.mask_weight') self.increase_bindwidth_every = self.conf.get_int('train.increase_bindwidth_every', default=350) # validation parameters self.val_normal_freq = self.conf.get_int('val.val_normal_freq') self.val_normal_resolution_level = self.conf.get_int('val.val_normal_resolution_level') self.val_gradient_method = self.conf.get('val.gradient_method', 'dfd') self.val_mesh_freq = self.conf.get_int('val.val_mesh_freq') self.val_mesh_res = self.conf.get_int('val.val_mesh_res') self.eval_metric_freq = self.conf.get_int('val.eval_metric_freq') self.report_freq = self.conf.get_int('val.report_freq') self.save_freq = self.conf.get_int('val.save_freq') # Ray marching parameters self.start_step_size = self.conf.get_float('model.ray_marching.start_step_size', default=1e-2) self.end_step_size = self.conf.get_float('model.ray_marching.end_step_size', default=5e-4) self.slop_step = (np.log10(self.start_step_size) - np.log10(self.end_step_size)) / self.end_iter # Networks params_to_train = [] self.sdf_network = SDFNetwork(**self.conf['model.sdf_network'], encoding_config=self.conf['model.encoding']).to(self.device) self.deviation_network = SingleVarianceNetwork(**self.conf['model.variance_network']).to(self.device) params_to_train += list(self.sdf_network.parameters()) params_to_train += list(self.deviation_network.parameters()) self.renderer = get_class(self.conf['general.renderer_class'])(self.sdf_network, self.deviation_network, self.conf["train"]["gradient_method"]) self.optimizer = torch.optim.Adam(params_to_train, lr=self.learning_rate) self.is_continue = is_continue self.mode = mode # Load checkpoint latest_model_name = None if is_continue: model_list_raw = os.listdir(os.path.join(self.base_exp_dir, 'checkpoints')) model_list = [] for model_name in model_list_raw: if model_name[-3:] == 'pth' and int(model_name[5:-4]) <= self.end_iter: model_list.append(model_name) model_list.sort() latest_model_name = model_list[-1] if latest_model_name is not None: logging.info('Find checkpoint: {}'.format(latest_model_name)) self.load_checkpoint(latest_model_name) # Backup codes and configs for debug if self.mode[:5] == 'train': self.file_backup() def train(self): print("Start training...") self.writer = SummaryWriter(log_dir=os.path.join(self.base_exp_dir, 'logs')) self.writer.add_graph(self.sdf_network, verbose=False, input_to_model=torch.randn(1, 3)) self.update_learning_rate() # create a csv file to save the evaluation metrics csv_file_name = f"eval_metrics.csv" csv_file_path = os.path.join(self.base_exp_dir, csv_file_name) if not os.path.exists(csv_file_path): with open(csv_file_path, 'w') as f: writer = csv.writer(f) if len(self.dataset.exclude_view_list)>0: writer.writerow(['iter', 'mae_all_view', 'mae_test_view', 'CD', 'fscore']) else: writer.writerow(['iter', 'mae_all_view', 'CD', 'fscore']) res_step = self.end_iter - self.iter_step pbar = tqdm(range(res_step)) for iter_i in pbar: # update ray marching step size self.renderer.sampling_step_size = 10 ** (np.log10(self.start_step_size) - self.slop_step*iter_i) # update occupancy grid self.renderer.occupancy_grid.every_n_step(step=iter_i, occ_eval_fn=self.renderer.occ_eval_fn, occ_thre=self.conf["model.ray_marching"]["occ_threshold"], n=self.conf["model.ray_marching"]["occ_update_freq"]) # following neuralangelo, gradually increase ingp bandwidth if self.iter_step % self.increase_bindwidth_every == 0: self.renderer.sdf_network.increase_bandwidth() # sample patches of pixels for training rays_o_patch_all, rays_d_patch_all, marching_plane_normal, V_inverse_patch_all, true_normal, mask = \ self.dataset.gen_random_patches(self.batch_size, patch_H=self.patch_size, patch_W=self.patch_size) rays_o_patch_center = rays_o_patch_all[:, self.patch_size // 2, self.patch_size // 2] # (num_patch, 3) rays_d_patch_center = rays_d_patch_all[:, self.patch_size // 2, self.patch_size// 2] # (num_patch, 3) near, far = self.dataset.near_far_from_sphere(rays_o_patch_center, rays_d_patch_center) if self.mask_weight > 0.0: mask = (mask > 0.5).float() else: mask = torch.ones_like(mask) mask_sum = mask.sum() + 1e-5 # forward rendering render_out = self.renderer.render(rays_o_patch_all, rays_d_patch_all, marching_plane_normal, near, far, V_inverse_patch_all) if render_out['gradients'] is None: # all rays are in the zero region of the occupancy grid self.update_learning_rate() continue comp_normal = render_out['comp_normal'] # rendered normal at pixels gradients = render_out['gradients'] # gradients at all sampled 3D points comp_mask = render_out['weight_sum'] # rendered occupancy at pixels samples_per_ray = render_out['samples_per_ray'] normal_error = (comp_normal - true_normal) * mask if self.loss_type == 'l1': normal_loss = F.l1_loss(normal_error, torch.zeros_like(normal_error), reduction='sum') / mask_sum elif self.loss_type == 'l2': normal_loss = F.mse_loss(normal_error, torch.zeros_like(normal_error), reduction='sum') / mask_sum gradients_norm = torch.linalg.norm(gradients, ord=2, dim=-1) eikonal_loss = F.mse_loss(gradients_norm, torch.ones_like(gradients_norm), reduction='mean') mask_loss = F.binary_cross_entropy(comp_mask.clip(1e-5, 1.0 - 1e-5), mask) loss = self.normal_weight * normal_loss + \ self.mask_weight * mask_loss + \ self.eikonal_weight * eikonal_loss self.optimizer.zero_grad() loss.backward() self.optimizer.step() self.iter_step += 1 self.update_learning_rate() if self.iter_step % self.report_freq == 0: message_postfix = OrderedDict(loss=f"{loss:.3e}", s=f"{self.deviation_network.variance.item():.3e}", rm_step=f"{self.renderer.sampling_step_size.item():.3e}", samples_per_ray=f"{samples_per_ray:.1f}") pbar.set_postfix(ordered_dict=message_postfix) if self.iter_step % self.save_freq == 0: self.save_checkpoint() if self.iter_step % self.val_mesh_freq == 0: self.validate_mesh(resolution=self.val_mesh_res) if self.iter_step % self.val_normal_freq == 0: for val_idx in range(self.dataset.n_images): self.validate_normal_patch_based(idx=val_idx, resolution_level=self.val_normal_resolution_level, gradient_method=self.val_gradient_method) if self.iter_step % self.eval_metric_freq == 0: # no gt mesh, skip the evaluation if self.dataset.mesh_gt is None: continue # remove invisible faces in the gt mesh if self.dataset.mesh_gt is not None and self.dataset.points_gt is None: self.dataset.mesh_gt.vertices = o3d.utility.Vector3dVector( (np.asarray(self.dataset.mesh_gt.vertices) - self.dataset.scale_mats_np[0][:3, 3][None]) / self.dataset.scale_mats_np[0][0, 0]) mesh = trimesh.Trimesh(np.asarray(self.dataset.mesh_gt.vertices), np.asarray(self.dataset.mesh_gt.triangles), process=False) self.dataset.points_gt = self.find_visible_points(mesh) * self.dataset.scale_mats_np[0][0, 0] + \ self.dataset.scale_mats_np[0][:3, 3][None] cd, fscore = self.eval_geo(resolution=512) print(f'iter: {self.iter_step} cd: {cd:.3e}, fscore: {fscore:.3e}') if len(self.dataset.exclude_view_list)>0: mae_allview, mae_test_view = self.eval_mae(gradient_method=self.val_gradient_method) print('MAE (all views) {0}: {1:.5f}'.format(self.val_gradient_method, mae_allview)) print('MAE (test views) {0}: {1:.5f}'.format(self.val_gradient_method, mae_test_view)) with open(csv_file_path, 'a') as f: writer = csv.writer(f) writer.writerow([self.iter_step, mae_allview, mae_test_view, cd, fscore]) else: mae_allview = self.eval_mae(gradient_method="dfd") # write to csv file with open(csv_file_path, 'a') as f: writer = csv.writer(f) writer.writerow([self.iter_step, mae_allview, cd, fscore]) def update_learning_rate(self): if self.iter_step < self.warm_up_end: learning_factor = self.iter_step / self.warm_up_end else: alpha = self.learning_rate_alpha progress = (self.iter_step - self.warm_up_end) / (self.end_iter - self.warm_up_end) learning_factor = (np.cos(np.pi * progress) + 1.0) * 0.5 * (1 - alpha) + alpha for g in self.optimizer.param_groups: g['lr'] = self.learning_rate * learning_factor def file_backup(self): dir_lis = self.conf['general.recording'] os.makedirs(os.path.join(self.base_exp_dir, 'recording'), exist_ok=True) for dir_name in dir_lis: cur_dir = os.path.join(self.base_exp_dir, 'recording', dir_name) os.makedirs(cur_dir, exist_ok=True) files = os.listdir(dir_name) for f_name in files: if f_name[-3:] == '.py': copyfile(os.path.join(dir_name, f_name), os.path.join(cur_dir, f_name)) try: copyfile(self.conf_path, os.path.join(self.base_exp_dir, 'recording', 'config.conf')) except: # save conf_text into a txt file with open(os.path.join(self.base_exp_dir, 'recording', 'config.conf'), 'w') as f: f.write(self.conf_text) def load_checkpoint(self, checkpoint_name): checkpoint = torch.load(os.path.join(self.base_exp_dir, 'checkpoints', checkpoint_name), map_location=self.device) self.sdf_network.load_state_dict(checkpoint['sdf_network_fine']) self.deviation_network.load_state_dict(checkpoint['variance_network_fine']) self.optimizer.load_state_dict(checkpoint['optimizer']) self.iter_step = checkpoint['iter_step'] logging.info('End') def save_checkpoint(self): checkpoint = { 'sdf_network_fine': self.sdf_network.state_dict(), 'variance_network_fine': self.deviation_network.state_dict(), 'optimizer': self.optimizer.state_dict(), 'iter_step': self.iter_step, } os.makedirs(os.path.join(self.base_exp_dir, 'checkpoints'), exist_ok=True) torch.save(checkpoint, os.path.join(self.base_exp_dir, 'checkpoints', 'ckpt_{:0>6d}.pth'.format(self.iter_step))) def validate_normal_pixel_based(self, idx=-1, resolution_level=-1): if idx < 0: idx = np.random.randint(self.dataset.n_images) print('Validate: iter: {}, camera: {}'.format(self.iter_step, idx)) if resolution_level < 0: resolution_level = self.validate_resolution_level rays_o, rays_d = self.dataset.gen_rays_at(idx, resolution_level=resolution_level, within_mask=False) H, W, _ = rays_o.shape rays_o = rays_o.reshape(-1, 3).split(8192) rays_d = rays_d.reshape(-1, 3).split(8192) out_normal_fine = [] out_depth_fine = [] mask_np = self.dataset.masks_np[idx].astype(bool)[..., 0] mask_np = cv.resize(mask_np.astype(np.uint8), ((int(W), int(H))), interpolation=cv.INTER_NEAREST).astype(bool) for rays_o_batch, rays_d_batch in tqdm(zip(rays_o, rays_d)): near, far = self.dataset.near_far_from_sphere(rays_o_batch, rays_d_batch) # background_rgb = torch.ones([1, 3]) if self.use_white_bkgd else None batch_normal, batch_depth = self.renderer.render_normal_pixel_based(rays_o_batch, rays_d_batch, near, far) out_normal_fine.append(batch_normal.detach().cpu().numpy()) out_depth_fine.append(batch_depth.detach().cpu().numpy()) if len(out_normal_fine) > 0: normal_img = np.concatenate(out_normal_fine, axis=0) rot = np.linalg.inv(self.dataset.pose_all[idx, :3, :3].detach().cpu().numpy()) # W2C rotation # normal_img_world = (normal_img.reshape([H, W, 3]) * 128 + 128).clip(0, 255) normal_img = np.matmul(rot[None, :, :], normal_img[:, :, None]).reshape([H, W, 3, -1]) normal_img[:,:, [1, 2]] *= -1 normal_img_norm = np.linalg.norm(np.squeeze(normal_img), axis=2, keepdims=True) normal_img_normalized = np.squeeze(normal_img) / (normal_img_norm+1e-7) # normal_img = ((np.squeeze(normal_img)/normal_img_norm) * 128 + 128).clip(0, 255) normal_img = (np.squeeze(normal_img) * 128 + 128).clip(0, 255) normal_img_normalized = (np.squeeze(normal_img_normalized) * 128 + 128).clip(0, 255) depth_img = np.concatenate(out_depth_fine, axis=0).reshape([H, W]) os.makedirs(os.path.join(self.base_exp_dir, 'normals'), exist_ok=True) os.makedirs(os.path.join(self.base_exp_dir, "depth"), exist_ok=True) normal_img_norm[~mask_np] = np.nan depth_img[~mask_np] = np.nan normal_img_norm = np.squeeze(normal_img_norm.clip(0.8, 1.2)) normal_img_norm = (normal_img_norm - np.nanmin(normal_img_norm)) / (np.nanmax(normal_img_norm) - np.nanmin(normal_img_norm)) normal_img_norm = np.nan_to_num(normal_img_norm) normal_img_norm = (normal_img_norm * 255).astype(np.uint8) normal_img_norm = cv.applyColorMap(normal_img_norm, cv.COLORMAP_JET) normal_img_norm[~mask_np] = 0 cv.imwrite(os.path.join(self.base_exp_dir, 'normals', '{:0>8d}_{}_{}_norm.png'.format(self.iter_step, 0, idx)), normal_img_norm[..., ::-1]) cv.imwrite(os.path.join(self.base_exp_dir, 'normals', '{:0>8d}_{}_{}.png'.format(self.iter_step, 0, idx)), normal_img[..., ::-1]) cv.imwrite(os.path.join(self.base_exp_dir, 'normals', '{:0>8d}_{}_{}_normalized.png'.format(self.iter_step, 0, idx)), normal_img_normalized[..., ::-1]) np.save(os.path.join(self.base_exp_dir, 'depth', '{:0>8d}_{}_{}.npy'.format(self.iter_step, 0, idx)), depth_img) return idx, (normal_img - 128) / 128. def validate_normal_patch_based(self, idx=-1, resolution_level=-1, gradient_method="dfd"): if idx < 0: idx = np.random.randint(self.dataset.n_images) print('Rendering normal maps... iter: {}, camera: {}'.format(self.iter_step, idx)) if resolution_level < 0: resolution_level = self.validate_resolution_level rays_o_patch_center, \ rays_d_patch_center, \ rays_o_patches_all, \ rays_v_patches_all, \ rays_ez, \ rays_A_inverse, horizontal_num_patch, vertical_num_patch = self.dataset.gen_patches_at(idx, resolution_level=resolution_level, patch_H=self.patch_size, patch_W=self.patch_size) mask_np = self.dataset.masks_np[idx].astype(bool) # (H, W) img_w = horizontal_num_patch * self.patch_size img_h = vertical_num_patch * self.patch_size # resize mask to the size of the image mask_np = cv.resize(mask_np.astype(np.uint8), ((int(img_w), int(img_h))), interpolation=cv.INTER_NEAREST).astype(bool) num_patches = rays_o_patches_all.shape[0] eval_patch_size = 1024 comp_normal_map = np.zeros([img_h, img_w, 3]) comp_normal_list = [] for patch_idx in range(0, num_patches, eval_patch_size): rays_o_patch_center_batch = rays_o_patch_center[patch_idx:patch_idx+eval_patch_size] rays_d_patch_center_batch = rays_d_patch_center[patch_idx:patch_idx+eval_patch_size] rays_o_patches_all_batch = rays_o_patches_all[patch_idx:patch_idx+eval_patch_size] rays_v_patches_all_batch = rays_v_patches_all[patch_idx:patch_idx+eval_patch_size] rays_ez_batch = rays_ez[patch_idx:patch_idx+eval_patch_size] rays_A_inverse_batch = rays_A_inverse[patch_idx:patch_idx+eval_patch_size] near, far = self.dataset.near_far_from_sphere(rays_o_patch_center_batch, rays_d_patch_center_batch) render_out = self.renderer.render(rays_o_patches_all_batch, rays_v_patches_all_batch, rays_ez_batch, near, far, rays_A_inverse_batch, gradient_method, mode='eval') comp_normal = render_out['comp_normal'] comp_normal = comp_normal.detach().cpu().numpy() comp_normal_list.append(comp_normal) comp_normal_list = np.concatenate(comp_normal_list, axis=0) count = 0 for i in range(0, img_h, self.patch_size): for j in range(0, img_w, self.patch_size): comp_normal_map[i:i+self.patch_size, j:j+self.patch_size] = comp_normal_list[count] count += 1 normal_img_world = comp_normal_map rot = np.linalg.inv(self.dataset.pose_all[idx, :3, :3].detach().cpu().numpy()) # W2C rotation normal_img = np.matmul(rot, normal_img_world[..., None]).squeeze() normal_img[..., [1, 2]] *= -1 normal_img_png = (np.squeeze(normal_img) * 128 + 128).clip(0, 255) normal_img_norm = np.linalg.norm(np.squeeze(normal_img), axis=2, keepdims=True) normal_dir = os.path.join(self.base_exp_dir, f'normals_validation_{gradient_method}', 'iter_{:0>6d}'.format(self.iter_step)) os.makedirs(normal_dir, exist_ok=True) normal_img_normalized = np.squeeze(normal_img) / (normal_img_norm + 1e-7) normal_img_normalized = (np.squeeze(normal_img_normalized) * 128 + 128).clip(0, 255) normal_eval = np.zeros((img_h, img_w, 3)) normal_eval[:normal_img_png.shape[0], :normal_img_png.shape[1]] = normal_img_png normal_eval_normalized = np.zeros((img_h, img_w, 3)) normal_eval_normalized[:normal_img_normalized.shape[0], :normal_img_normalized.shape[1]] = normal_img_normalized normal_img_normalized = crop_image_by_mask(toRGBA(normal_eval_normalized.astype(np.uint8)[...,::-1], mask_np), mask_np) cv.imwrite(os.path.join(normal_dir, '{:0>8d}_{}_{}_rendered.png'.format(self.iter_step, 0, idx)), normal_eval[..., ::-1]) cv.imwrite(os.path.join(normal_dir, '{:0>8d}_{}_{}_normalized.png'.format(self.iter_step, 0, idx)), normal_img_normalized) return normal_img_world, normal_dir def validate_mesh(self, world_space=True, resolution=256, threshold=0.0): print('Extracting mesh... iter: {}'.format(self.iter_step)) bound_min = torch.tensor(self.dataset.object_bbox_min, dtype=torch.float32) bound_max = torch.tensor(self.dataset.object_bbox_max, dtype=torch.float32) vertices, triangles =\ self.renderer.extract_geometry(bound_min, bound_max, resolution=resolution, threshold=threshold) mesh = trimesh.Trimesh(vertices, triangles) vertices, triangles = mesh.vertices, mesh.faces save_dir = os.path.join(self.base_exp_dir, 'meshes_validation') os.makedirs(save_dir, exist_ok=True) if world_space: vertices = vertices * self.dataset.scale_mats_np[0][0, 0] + self.dataset.scale_mats_np[0][:3, 3][None] self.writer.add_mesh('mesh_eval', vertices=vertices[None,...], faces=triangles[None,...], global_step=self.iter_step) mesh = self.remove_isolated_clusters(trimesh.Trimesh(vertices, triangles)) mesh_path = os.path.join(save_dir, 'iter_{:0>8d}.ply'.format(self.iter_step)) o3d.io.write_triangle_mesh((mesh_path), mesh) print(f'Mesh saved at {mesh_path}') def remove_isolated_clusters(self, mesh): # cleaning the marching cube extracted mesh import copy mesh = mesh.as_open3d # with o3d.utility.VerbosityContextManager( # o3d.utility.VerbosityLevel.Debug) as cm: triangle_clusters, cluster_n_triangles, cluster_area = ( mesh.cluster_connected_triangles()) triangle_clusters = np.asarray(triangle_clusters) cluster_n_triangles = np.asarray(cluster_n_triangles) mesh_eval = copy.deepcopy(mesh) largest_cluster_idx = cluster_n_triangles.argmax() triangles_to_remove = triangle_clusters != largest_cluster_idx mesh_eval.remove_triangles_by_mask(triangles_to_remove) mesh_eval.remove_unreferenced_vertices() return mesh_eval @torch.no_grad() def eval_mae(self, gradient_method): print("Computing mean angular errors...") normal_gt_dir = os.path.join(self.dataset.data_dir, "normal_world_space_GT") ae_map_list = [] normal_map_eval_list = [] ae_map_eval_list = [] ae_map_test_list = [] for idx in range(self.dataset.n_images): normal_gt = pyexr.read(os.path.join(normal_gt_dir, "{:02d}.exr".format(idx)))[..., :3] mask_np = self.dataset.masks_np[idx].astype(bool) normal_map_world, save_dir = self.validate_normal_patch_based(idx, resolution_level=self.val_normal_resolution_level, gradient_method=gradient_method) normal_map_world = normal_map_world / (1e-10 + np.linalg.norm(normal_map_world, axis=-1, keepdims=True)) normal_eval = np.zeros((self.dataset.H, self.dataset.W, 3)) normal_eval[:normal_map_world.shape[0], :normal_map_world.shape[1]] = normal_map_world normal_eval[~mask_np] = np.nan normal_map_eval_list.append(normal_eval) # self.writer.add_image(step=self.iter_step, data=(normal_eval + 1) / 2, name=("normal_eval_{:02d}".format(idx))) # pyexr.write(os.path.join(normal_save_dir, "{:02d}.exr".format(idx)), normal_img) angular_error_map = np.rad2deg(np.arccos(np.clip(np.sum(normal_gt * normal_eval, axis=-1), -1, 1))) # save angular error map ae_map_list.append(angular_error_map.copy()) if idx in self.dataset.exclude_view_list: ae_map_test_list.append(angular_error_map.copy()) # apply jet to angular error map angular_error_map[~mask_np] = 0 angular_error_map_jet = cv.applyColorMap((angular_error_map / 20 * 255).clip(0, 255).astype(np.uint8), cv.COLORMAP_JET) angular_error_map_jet[~mask_np] = 255 angular_error_map_jet = crop_image_by_mask(toRGBA(angular_error_map_jet, mask_np), mask_np) cv.imwrite(os.path.join(save_dir, '{:0>8d}_{}_{}_ae_up_{}.png'.format(self.iter_step, 0, idx, 20)), angular_error_map_jet) ae_map_eval_list.append(angular_error_map_jet) mae = np.nanmean(np.stack(ae_map_list, axis=0)) self.writer.add_scalar('Statistics/mae_allview', mae, self.iter_step) if len(ae_map_test_list) > 0: mae_test = np.nanmean(np.stack(ae_map_test_list, axis=0)) self.writer.add_scalar('Statistics/mae_testview', mae_test, self.iter_step) return mae, mae_test return mae @torch.no_grad() def eval_geo(self, resolution=1024): # save the mesh save_dir = os.path.join(self.base_exp_dir, 'points_val') os.makedirs(save_dir, exist_ok=True) # save gt points pcd_gt = o3d.geometry.PointCloud() pcd_gt.points = o3d.utility.Vector3dVector(self.dataset.points_gt) if not os.path.exists(os.path.join(save_dir, f"pcd_gt.ply")): o3d.io.write_point_cloud(os.path.join(save_dir, f"pcd_gt.ply"), pcd_gt) # marching cubes bound_min = torch.tensor(self.dataset.object_bbox_min, dtype=torch.float32) bound_max = torch.tensor(self.dataset.object_bbox_max, dtype=torch.float32) vertices, triangles = \ self.renderer.extract_geometry(bound_min, bound_max, resolution=resolution, threshold=0) # vertices = vertices * self.dataset.scale_mats_np[0][0, 0] + self.dataset.scale_mats_np[0][:3, 3][None] mesh = trimesh.Trimesh(np.asarray(vertices), np.asarray(triangles), process=False) vertices_world = vertices * self.dataset.scale_mats_np[0][0, 0] + self.dataset.scale_mats_np[0][:3, 3][None] mesh_world = trimesh.Trimesh(np.asarray(vertices_world), np.asarray(triangles), process=False) mesh_world_path = os.path.join(save_dir, f"{self.iter_step}_world.obj") mesh_world.export(mesh_world_path) points_eval = self.find_visible_points(mesh)*self.dataset.scale_mats_np[0][0, 0] + self.dataset.scale_mats_np[0][:3, 3][None] # save the sampled points sampled_points_path = os.path.join(save_dir, f"{self.iter_step}_points_eval.ply") pcd_eval = o3d.geometry.PointCloud() pcd_eval.points = o3d.utility.Vector3dVector(points_eval) o3d.io.write_point_cloud(sampled_points_path, pcd_eval) cd, fscore = chamfer_distance_and_f1_score(points_eval, self.dataset.points_gt) self.writer.add_scalar('Statistics/cd', cd, self.iter_step) self.writer.add_scalar('Statistics/fscore', fscore, self.iter_step) return cd, fscore def find_visible_points(self, mesh): num_view = self.dataset.n_images points_list = [] for view_idx in range(num_view): rays_o, rays_v = self.dataset.gen_rays_at(view_idx, resolution_level=1, within_mask=True) rays_o, rays_v = rays_o.cpu().detach().numpy(), rays_v.cpu().detach().numpy() rays_v = rays_v / np.linalg.norm(rays_v, axis=-1, keepdims=True) locations, index_ray, index_tri = mesh.ray.intersects_location( ray_origins=rays_o, ray_directions=rays_v, multiple_hits=False) points_list.append(locations) return np.concatenate(points_list, axis=0) if __name__ == '__main__': import warnings warnings.filterwarnings("ignore") torch.set_default_tensor_type('torch.cuda.FloatTensor') parser = argparse.ArgumentParser() parser.add_argument('--conf', type=str, default='./confs/base.conf') parser.add_argument('--mode', type=str, default='eval_normal') parser.add_argument('--mcube_threshold', type=float, default=0.0) parser.add_argument('--is_continue', default=False, action="store_true") parser.add_argument('--gpu', type=int, default=0) parser.add_argument('--obj_name', type=str, default='') args = parser.parse_args() torch.cuda.set_device(args.gpu) print(f'Running on the object: {args.obj_name}') f = open(args.conf) conf_text = f.read() conf_text = conf_text.replace('CASE_NAME', args.obj_name) runner = Runner(conf_text, args.mode, args.is_continue) runner.train() ================================================ FILE: models/cd_and_fscore.py ================================================ from scipy.spatial import KDTree import numpy as np def chamfer_distance_and_f1_score(ref_points, eval_points, f_threshold=0.5): """ This function calculates the chamfer distance and f1 score between two sets of points. Parameters: ref_points (numpy.ndarray): Reference points. A (p, 3) array representing points in the world space. eval_points (numpy.ndarray): Points to be evaluated. A (p, 3) array representing points in the world space. f_threshold (float, optional): Threshold for f1 score calculation. Default is 0.5mm. Returns: chamfer_dist (float): The chamfer distance between gt_points and eval_points. f_score (float): The f1 score between gt_points and eval_points. """ print("computing chamfer distance and f1 score...") distance_eval2gt, _ = KDTree(ref_points).query(eval_points, k=1, p=2) # p=2 for Euclidean distance distance_gt2eval, _ = KDTree(eval_points).query(ref_points, k=1, p=2) # following Uncertainty-aware deep multi-view photometric stereo chamfer_dist = (np.mean(distance_eval2gt) + np.mean(distance_gt2eval))/2 precision = np.mean(distance_eval2gt < f_threshold) recall = np.mean(distance_gt2eval < f_threshold) f_score = 2 * precision * recall / (precision + recall) return chamfer_dist, f_score ================================================ FILE: models/dataset_loader.py ================================================ import torch import torch.nn.functional as F import cv2 as cv import numpy as np import os from glob import glob from icecream import ic import pyexr import open3d as o3d import time from concurrent.futures import ThreadPoolExecutor def load_K_Rt_from_P(filename, P=None): # This function is borrowed from IDR: https://github.com/lioryariv/idr if P is None: lines = open(filename).read().splitlines() if len(lines) == 4: lines = lines[1:] lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] P = np.asarray(lines).astype(np.float32).squeeze() K, R, t, *_ = cv.decomposeProjectionMatrix(P) # CAUTION: R is the W2C rotation matrix but t is the camera position in world coordinate. K = K / K[2, 2] intrinsics = np.eye(4) intrinsics[:3, :3] = K C2W = np.eye(4, dtype=np.float32) C2W[:3, :3] = R.T C2W[:3, 3] = (t[:3] / t[3])[:, 0] return intrinsics, C2W class Dataset: def __init__(self, conf): super(Dataset, self).__init__() print('Load data: Begin') self.device = torch.device('cuda') self.conf = conf normal_dir = conf.get_string('normal_dir') self.data_dir = conf.get_string('data_dir') self.cameras_name = conf.get_string('cameras_name') self.exclude_view_list = conf['exclude_views'] # list of views to exclude from training. Used in novel-view normal synthesis evaluation. self.upsample_factor = conf.get_int('upsample_factor', default=1) ic(self.exclude_view_list) # load the GT mesh for evaluation if any mesh_path = os.path.join(self.data_dir, 'mesh_Gt.ply') if os.path.exists(mesh_path): self.mesh_gt = o3d.io.read_triangle_mesh(mesh_path) else: self.mesh_gt = None self.points_gt = None # will be computed from the mesh at evaluation time camera_dict = np.load(os.path.join(self.data_dir, self.cameras_name)) self.camera_dict = camera_dict self.normal_lis = sorted(glob(os.path.join(self.data_dir, normal_dir, '*.exr'))) self.n_images = len(self.normal_lis) self.train_images = set(range(self.n_images)) - set(self.exclude_view_list) self.img_idx_list = [int(os.path.basename(x).split('.')[0]) for x in self.normal_lis] print("loading normal maps...") with ThreadPoolExecutor(max_workers=min(64, os.cpu_count()*5)) as executor: def read_normal(im_name): return pyexr.read(im_name)[..., :3] self.normal_np = np.stack(list(executor.map(read_normal, self.normal_lis))) if self.upsample_factor > 1: # resize normal maps self.normal_np = F.interpolate(torch.from_numpy(self.normal_np).permute(0, 3, 1, 2), scale_factor=self.upsample_factor, mode='bilinear', align_corners=False).permute(0, 2, 3, 1).numpy() self.normals = torch.from_numpy(self.normal_np.astype(np.float32)).to(self.device) # [n_images, H, W, 3] print("loading normal maps done.") self.masks_lis = sorted(glob(os.path.join(self.data_dir, 'mask/*.png'))) with ThreadPoolExecutor(max_workers=min(64, os.cpu_count()*5)) as executor: def read_mask(im_name): return cv.imread(im_name) self.masks_np = np.stack(list(executor.map(read_mask, self.masks_lis))) / 255.0 if self.upsample_factor > 1: # resize mask self.masks_np = F.interpolate(torch.from_numpy(self.masks_np).permute(0, 3, 1, 2), scale_factor=self.upsample_factor, mode='nearest').permute(0, 2, 3, 1).numpy() self.masks_np = self.masks_np[..., 0] self.total_pixel = np.sum(self.masks_np) # set background of normal map to 0 self.normal_np[self.masks_np == 0] = 0 # world_mat is a projection matrix from world to image self.world_mats_np = [camera_dict['world_mat_%d' % idx].astype(np.float32) for idx in self.img_idx_list] self.scale_mats_np = [] # scale_mat: used for coordinate normalization, we assume the scene to render is inside a unit sphere at origin. self.scale_mats_np = [camera_dict['scale_mat_%d' % idx].astype(np.float32) for idx in self.img_idx_list] self.intrinsics_all = [] self.pose_all = [] self.V_inverse_all = [] self.H, self.W = self.normal_np.shape[1], self.normal_np.shape[2] for scale_mat, world_mat, normal_map, mask in zip(self.scale_mats_np, self.world_mats_np, self.normals, self.masks_np): P = world_mat @ scale_mat P = P[:3, :4] intrinsics, C2W = load_K_Rt_from_P(None, P) if self.upsample_factor > 1: # resize intrinsics intrinsics[0, 0] *= self.upsample_factor intrinsics[1, 1] *= self.upsample_factor intrinsics[0, 2] *= self.upsample_factor intrinsics[1, 2] *= self.upsample_factor self.intrinsics_all.append(torch.from_numpy(intrinsics).float()) self.pose_all.append(torch.from_numpy(C2W).float()) intrinsics_inverse = torch.inverse(torch.from_numpy(intrinsics).float()) pose = torch.from_numpy(C2W).float() # compute the V_inverse tx = torch.linspace(0, self.W - 1, int(self.W)) ty = torch.linspace(0, self.H - 1, int(self.H)) pixels_x, pixels_y = torch.meshgrid(tx, ty) p = torch.stack([pixels_x, pixels_y, torch.ones_like(pixels_y)], dim=-1).to(intrinsics_inverse.device) # W, H, 3 p = torch.matmul(intrinsics_inverse[None, None, :3, :3], p[:, :, :, None]).squeeze() # W, H, 3 rays_v = p / torch.linalg.norm(p, ord=2, dim=-1, keepdim=True) # W, H, 3 rays_v = torch.matmul(pose[None, None, :3, :3], rays_v[:, :, :, None]).squeeze() # W, H, 3 rays_v = rays_v.transpose(0, 1).to(self.device) # H, W, 3 # the axis direction of the camera coordinate system in the world coordinate system rays_right = pose[None, :3, 0].expand(rays_v.shape).to(self.device) # H, W, 3 rays_down = pose[None, :3, 1].expand(rays_v.shape).to(self.device) # H, W, 3 V_concat = torch.cat([rays_v[..., None, :], rays_right[..., None, :], rays_down[..., None, :]], dim=-2) # (H, W, 3, 3) # computing the inverse may take a while if the resolution is high # For 512x612, it takes about 0.8ms V_inverse = torch.inverse(V_concat) # (H, W, 3, 3) self.V_inverse_all.append(V_inverse) self.masks = torch.from_numpy(self.masks_np.astype(np.float32)).to(self.device) # [n_images, H, W, 3] self.intrinsics_all = torch.stack(self.intrinsics_all).to(self.device) # [n_images, 4, 4] self.intrinsics_all_inv = torch.inverse(self.intrinsics_all) # [n_images, 4, 4] self.focal_length = self.intrinsics_all[0][0, 0] self.pose_all = torch.stack(self.pose_all).to(self.device) # [n_images, 4, 4] self.image_pixels = self.H * self.W self.V_inverse_all = torch.stack(self.V_inverse_all).to(self.device) # [n_images, H, W, 3, 3] # for mesh extraction self.object_bbox_min = np.array([-1., -1., -1.]) self.object_bbox_max = np.array([1., 1., 1.]) print('Load data: End') def gen_rays_at(self, img_idx, resolution_level=1, within_mask=False): """ Generate all rays at world space from one camera. """ mask_np = self.masks_np[img_idx].astype(bool) # resize the mask using resolution_level mask_np = cv.resize(mask_np.astype(np.uint8)*255, (int(self.W // resolution_level), int(self.H // resolution_level)), interpolation=cv.INTER_NEAREST).astype(bool) l = resolution_level tx = torch.linspace(0, self.W - 1, int(self.W // l)) ty = torch.linspace(0, self.H - 1, int(self.H // l)) pixels_x, pixels_y = torch.meshgrid(tx, ty) p = torch.stack([pixels_x, pixels_y, torch.ones_like(pixels_y)], dim=-1) # W, H, 3 p = torch.matmul(self.intrinsics_all_inv[img_idx, None, None, :3, :3], p[:, :, :, None]).squeeze() # W, H, 3 rays_v = p / torch.linalg.norm(p, ord=2, dim=-1, keepdim=True) # W, H, 3 rays_v = torch.matmul(self.pose_all[img_idx, None, None, :3, :3], rays_v[:, :, :, None]).squeeze() # W, H, 3 rays_o = self.pose_all[img_idx, None, None, :3, 3].expand(rays_v.shape) # W, H, 3 rays_o = rays_o.transpose(0, 1) rays_v = rays_v.transpose(0, 1) if within_mask: return rays_o[mask_np], rays_v[mask_np] else: return rays_o, rays_v def gen_patches_at(self, img_idx, resolution_level=1, patch_H=3, patch_W=3): tx = torch.linspace(0, self.W - 1, int(self.W // resolution_level)) ty = torch.linspace(0, self.H - 1, int(self.H // resolution_level)) pixels_y, pixels_x = torch.meshgrid(ty, tx) p = torch.stack([pixels_x, pixels_y, torch.ones_like(pixels_y)], dim=-1) # H, W, 3 p = torch.matmul(self.intrinsics_all_inv[img_idx, :3, :3], p[..., None]).squeeze() # H, W, 3 rays_v = p / torch.linalg.norm(p, ord=2, dim=-1, keepdim=True) # W, H, 3 rays_v = torch.matmul(self.pose_all[img_idx, :3, :3], rays_v[:, :, :, None]).squeeze() # H, W, 3 # split rays_v into non-overlapping patches height, width, _ = rays_v.shape horizontal_num_patch = width // patch_W vertical_num_patch = height // patch_H rays_v_patches_all = [] rays_V_inverse_patches_all = [] rays_ez_patches_all = [] mask_value = [] for i in range(0, height-patch_H//2-1, patch_H): for j in range(0, width-patch_W//2-1, patch_W): rays_v_patch = rays_v[i:i + patch_H, j:j + patch_W] rays_v_patches_all.append(rays_v_patch) rays_V_inverse_patch = self.V_inverse_all[img_idx][i:i + patch_H, j:j + patch_W] rays_V_inverse_patches_all.append(rays_V_inverse_patch) rays_ez_patch = self.normals[img_idx][i + patch_H//2, j + patch_W//2] rays_ez_patches_all.append(rays_ez_patch) mask_value.append(self.masks_np[img_idx][i + patch_H//2, j + patch_W//2].astype(bool)) rays_v_patches_all = torch.stack(rays_v_patches_all, dim=0) # (num_patch, patch_H, patch_W, 3) rays_V_inverse_patches_all = torch.stack(rays_V_inverse_patches_all, dim=0) # (num_patch, patch_H, patch_W, 3, 3) rays_o_patches_all = self.pose_all[img_idx, :3, 3].expand(rays_v_patches_all.shape) # (num_patch, patch_H, patch_W, 3) rays_o_patch_center = rays_o_patches_all[:, patch_H//2, patch_W//2] # (num_patch, 3) rays_d_patch_center = rays_v_patches_all[:, patch_H//2, patch_W//2] # (num_patch, 3) marching_plane_normal_patches_all = self.pose_all[img_idx, :3, 2].expand(rays_d_patch_center.shape) # (num_patch, 3) return rays_o_patch_center, \ rays_d_patch_center, \ rays_o_patches_all, \ rays_v_patches_all, \ marching_plane_normal_patches_all, \ rays_V_inverse_patches_all, horizontal_num_patch, vertical_num_patch def gen_random_patches(self, num_patch, patch_H=3, patch_W=3): """ Generate random patches of rays at world space from all viewpoints. X-axis right, Y-axis down Parameters: num_patch (int): The number of patches to generate. patch_H (int, optional): The height of the patches. Default is 3. patch_W (int, optional): The width of the patches. Default is 3. Returns: rays_o_patch_all (torch.Tensor): The origins of the rays in each patch. A tensor of shape (num_patch, patch_H, patch_W, 3). rays_d_patch_all (torch.Tensor): The directions of the rays in each patch. A tensor of shape (num_patch, patch_H, patch_W, 3). marching_plane_normal (torch.Tensor): The normal direction of the image/marching plane. Since we randomly sample patches from all viewpoints, this normal is only identical for each patch. A tensor of shape (num_patch, 3). V_inverse_patch_all (torch.Tensor): The inverse of the V matrix at patches of pixels. A tensor of shape (num_patch, patch_H, patch_W, 3, 3). normal (torch.Tensor): The normals at patches of pixels. A tensor of shape (num_patch, patch_H, patch_W, 3). mask (torch.Tensor): The mask values at patches of pixels. A tensor of shape (num_patch, patch_H, patch_W, 1). """ # randomly sample center pixel locations of patches # assume all images have the same resolution patch_center_x = torch.randint(low=0+patch_W//2, high=self.W-1-patch_W//2, size=[num_patch], device=self.device) # (num_patch, ) patch_center_y = torch.randint(low=0+patch_H//2, high=self.H-1-patch_H//2, size=[num_patch], device=self.device) # (num_patch, ) # compute all pixel locations within the patches given patch size (patch_H, patch_W) patch_center_x_all = patch_center_x[:, None, None] + torch.arange(-patch_W//2+1, patch_W//2+1, device=self.device).repeat(patch_H, 1) # (num_patch, patch_H, patch_W) patch_center_y_all = patch_center_y[:, None, None] + torch.arange(-patch_H//2+1, patch_H//2+1, device=self.device).reshape(-1, 1).repeat(1, patch_W) # (num_patch, patch_H, patch_W) # randomly sample viewpoints img_idx = np.random.choice(list(self.train_images), size=[num_patch]) # (num_patch, ) img_idx = torch.tensor(img_idx, device=self.device) img_idx_expand = img_idx.view(-1, 1, 1).expand_as(patch_center_x_all) # (num_patch, patch_H, patch_W) # input normals and mask values for supervision normal = self.normals[img_idx_expand, patch_center_y_all, patch_center_x_all] # (num_patch, patch_H, patch_W, 3) V_inverse_patch_all = self.V_inverse_all[img_idx_expand, patch_center_y_all, patch_center_x_all] # (num_patch, patch_H, patch_W, 3, 3) mask = self.masks[img_idx_expand, patch_center_y_all, patch_center_x_all].unsqueeze(-1)#[..., :1] # (num_patch, patch_H, patch_W) # compute all ray directions within patches p_all = torch.stack([patch_center_x_all, patch_center_y_all, torch.ones_like(patch_center_y_all)], dim=-1).float().to(self.device) # (num_patch, patch_H, patch_W, 3) p_all = torch.matmul(self.intrinsics_all_inv[img_idx_expand, :3, :3], p_all[..., None])[..., 0] # (num_patch, patch_H, patch_W, 3) p_norm_all = torch.linalg.norm(p_all, ord=2, dim=-1, keepdim=True) # (num_patch, patch_H, patch_W, 1) rays_d_patch_all = p_all / p_norm_all # (num_patch, patch_H, patch_W, 3) rays_d_patch_all = torch.matmul(self.pose_all[img_idx, None, None, :3, :3], rays_d_patch_all[..., None])[..., 0] # (num_patch, patch_H, patch_W, 3) rays_o_patch_all = self.pose_all[img_idx, None, None, :3, 3].expand(rays_d_patch_all.shape) # (num_patch, patch_H, patch_W, 3) # the normal direction of the image/marching plane is the 3rd column of world2camera transformation marching_plane_normal = self.pose_all[img_idx, :3, 2].expand((num_patch, 3)) # (num_patch, 3) return rays_o_patch_all, \ rays_d_patch_all, \ marching_plane_normal, \ V_inverse_patch_all, \ normal,\ mask def near_far_from_sphere(self, rays_o, rays_d): """ This function calculates the near and far intersection points of rays with a unit sphere. Parameters: rays_o (torch.Tensor): Origin of the rays. A tensor of shape (N, 3) where N is the number of rays. rays_d (torch.Tensor): Direction of the rays. A tensor of shape (N, 3) where N is the number of rays. Returns: near (torch.Tensor): Near intersection points of the rays with the unit sphere. A tensor of shape (N, ). far (torch.Tensor): Far intersection points of the rays with the unit sphere. A tensor of shape (N, ). """ a = torch.sum(rays_d**2, dim=-1, keepdim=True) b = 2.0 * torch.sum(rays_o * rays_d, dim=-1, keepdim=True) c = torch.sum(rays_o**2, dim=-1, keepdim=True) - 1.0 mid = 0.5 * (-b) / a near = mid - torch.sqrt(b ** 2 - 4 * a * c) / (2 * a) far = mid + torch.sqrt(b ** 2 - 4 * a * c) / (2 * a) return near[..., 0], far[..., 0] def image_at(self, idx, resolution_level): img = cv.imread(self.images_lis[idx]) return (cv.resize(img, (self.W // resolution_level, self.H // resolution_level))).clip(0, 255) ================================================ FILE: models/fields.py ================================================ import torch import torch.nn as nn import numpy as np import tinycudann as tcnn from icecream import ic class SDFNetwork(nn.Module): def __init__(self, d_in, d_out, d_hidden, n_layers, skip_in=(4,), bias=0.5, geometric_init=True, weight_norm=True, inside_outside=False, encoding_config=None, input_concat=False): super(SDFNetwork, self).__init__() self.input_concat = input_concat dims = [d_in] + [d_hidden for _ in range(n_layers)] + [d_out] if encoding_config is not None: self.encoding = tcnn.Encoding(d_in, encoding_config).to(torch.float32) dims[0] = self.encoding.n_output_dims if input_concat: dims[0] += d_in else: self.encoding = None self.num_layers = len(dims) self.skip_in = skip_in self.bindwidth = 0 self.enc_dim = self.encoding.n_output_dims for l in range(0, self.num_layers - 1): if l + 1 in self.skip_in: out_dim = dims[l + 1] - dims[0] else: out_dim = dims[l + 1] lin = nn.Linear(dims[l], out_dim) if geometric_init: if l == self.num_layers - 2: if not inside_outside: torch.nn.init.normal_(lin.weight, mean=np.sqrt(np.pi) / np.sqrt(dims[l]), std=0.0001) torch.nn.init.constant_(lin.bias, -bias) else: torch.nn.init.normal_(lin.weight, mean=-np.sqrt(np.pi) / np.sqrt(dims[l]), std=0.0001) torch.nn.init.constant_(lin.bias, bias) elif self.encoding is not None and l == 0: torch.nn.init.constant_(lin.bias, 0.0) torch.nn.init.constant_(lin.weight[:, 3:], 0.0) torch.nn.init.normal_(lin.weight[:, :3], 0.0, np.sqrt(2) / np.sqrt(out_dim)) elif self.encoding is not None and l in self.skip_in: torch.nn.init.constant_(lin.bias, 0.0) torch.nn.init.normal_(lin.weight, 0.0, np.sqrt(2) / np.sqrt(out_dim)) torch.nn.init.constant_(lin.weight[:, -(dims[0] - 3):], 0.0) else: torch.nn.init.constant_(lin.bias, 0.0) torch.nn.init.normal_(lin.weight, 0.0, np.sqrt(2) / np.sqrt(out_dim)) if weight_norm: lin = nn.utils.weight_norm(lin) setattr(self, "lin" + str(l), lin) self.activation = nn.Softplus(beta=100) # self.activation = nn.ReLU() def increase_bandwidth(self): self.bindwidth += 1 def forward(self, inputs): if self.encoding is not None: encoded = self.encoding(inputs).to(torch.float32) # set the dimension of the encoding to 0 if the input is outside the bandwidth enc_mask = torch.ones(self.enc_dim, dtype=torch.bool, device=encoded.device, requires_grad=False) enc_mask[self.bindwidth*2:] = 0 encoded = encoded * enc_mask if self.input_concat: inputs = torch.cat([inputs, encoded], dim=1) x = inputs for l in range(0, self.num_layers - 1): lin = getattr(self, "lin" + str(l)) if l in self.skip_in: x = torch.cat([x, inputs], 1) / np.sqrt(2) x = lin(x) if l < self.num_layers - 2: x = self.activation(x) return x def sdf(self, x): return self.forward(x)[:, :1] def sdf_hidden_appearance(self, x): return self.forward(x) @torch.enable_grad() def gradient(self, x): x.requires_grad_(True) y = self.sdf(x) d_output = torch.ones_like(y, requires_grad=False, device=y.device) gradients = torch.autograd.grad( outputs=y, inputs=x, grad_outputs=d_output, create_graph=True, retain_graph=True, only_inputs=True)[0] return gradients.unsqueeze(1) @torch.enable_grad() def divergence(self, y, x): div = 0. for i in range(y.shape[-1]): div += torch.autograd.grad(y[..., i], x, torch.ones_like(y[..., i]), create_graph=True)[0][..., i:i + 1] return div @torch.enable_grad() def laplace(self, x): return self.divergence(self.gradient(x), x) class SingleVarianceNetwork(nn.Module): def __init__(self, init_val): super(SingleVarianceNetwork, self).__init__() self.register_parameter('variance', nn.Parameter(torch.tensor(init_val))) def forward(self, x): return torch.ones([len(x), 1]) * torch.exp(self.variance * 10.0) ================================================ FILE: models/renderer.py ================================================ import torch import numpy as np import mcubes from tqdm import tqdm from nerfacc import ContractionType, OccupancyGrid, ray_marching, \ render_weight_from_alpha_patch_based, accumulate_along_rays_patch_based, \ render_weight_from_alpha, accumulate_along_rays def extract_fields(bound_min, bound_max, resolution, query_func): N = 64 X = torch.linspace(bound_min[0], bound_max[0], resolution).split(N) Y = torch.linspace(bound_min[1], bound_max[1], resolution).split(N) Z = torch.linspace(bound_min[2], bound_max[2], resolution).split(N) u = np.zeros([resolution, resolution, resolution], dtype=np.float32) with torch.no_grad(): for xi, xs in tqdm(enumerate(X)): for yi, ys in enumerate(Y): for zi, zs in enumerate(Z): xx, yy, zz = torch.meshgrid(xs, ys, zs) pts = torch.cat([xx.reshape(-1, 1), yy.reshape(-1, 1), zz.reshape(-1, 1)], dim=-1) val = query_func(pts).reshape(len(xs), len(ys), len(zs)).detach().cpu().numpy() u[xi * N: xi * N + len(xs), yi * N: yi * N + len(ys), zi * N: zi * N + len(zs)] = val return u def extract_geometry(bound_min, bound_max, resolution, threshold, query_func): u = extract_fields(bound_min, bound_max, resolution, query_func) vertices, triangles = mcubes.marching_cubes(u, threshold) b_max_np = bound_max.detach().cpu().numpy() b_min_np = bound_min.detach().cpu().numpy() vertices = vertices / (resolution - 1.0) * (b_max_np - b_min_np)[None, :] + b_min_np[None, :] return vertices, triangles class NeuSRenderer: def __init__(self, sdf_network, deviation_network, gradient_method="dfd"): self.sdf_network = sdf_network self.deviation_network = deviation_network # define the occ grid, see NerfAcc for more details self.scene_aabb = torch.as_tensor([-1., -1., -1., 1., 1., 1.], dtype=torch.float32) # define the contraction_type for scene contraction self.contraction_type = ContractionType.AABB # create Occupancy Grid self.occupancy_grid = OccupancyGrid( roi_aabb=self.scene_aabb, resolution=128, # if res is different along different axis, use [256,128,64] contraction_type=self.contraction_type).to("cuda") self.sampling_step_size = 0.01 # ray marching step size, will be modified during training self.gradient_method = gradient_method # dfd or fd or ad def occ_eval_fn(self, x): # function for updating the occ grid given the current sdf sdf = self.sdf_network(x)[..., :1] alpha = torch.sigmoid(- sdf * 80) # occ grids with alpha below the occ threshold will be set as 0 return alpha def render(self, rays_o_patch_all, # (num_patch, patch_H, patch_W, 3) rays_d_patch_all, # (num_patch, patch_H, patch_W, 3) marching_plane_normal, # (num_patch, 3) near, # (num_patch,) far, # (num_patch,) V_inverse_patch_all, # (num_patch, patch_H, patch_W, 3, 3) val_gradient_method='dfd', mode='train'): # patch size, should be odd patch_H = rays_o_patch_all.shape[1] patch_W = rays_o_patch_all.shape[2] num_patch = rays_o_patch_all.shape[0] # extract camera location and ray direction of the patches' center pixels rays_o_patch_center = rays_o_patch_all[:, patch_H//2, patch_W//2] # (num_patch, 3) rays_d_patch_center = rays_d_patch_all[:, patch_H//2, patch_W//2] # (num_patch, 3) def alpha_fn_patch_center(t_starts, t_ends, ray_indices, ret_sdf=False): # the function used in ray marching ray_indices = ray_indices.long() t_origins = rays_o_patch_center[ray_indices] t_dirs = rays_d_patch_center[ray_indices] positions_starts = t_origins + t_dirs * t_starts positions_ends = t_origins + t_dirs * t_ends t_starts_shift_left = t_starts[1:] # attach the last element of t_ends to the end of t_starts_shift_left t_starts_shift_left = torch.cat([t_starts_shift_left, t_starts[-1:]], 0) # compute the diff mask between t_ends and t_starts_shift_left diff_mask = ((t_ends - t_starts_shift_left) != 0).squeeze() # if the diff maks is empty, return positions_ends_diff = positions_ends[diff_mask].reshape(-1, 3) positions_all = torch.cat([positions_starts, positions_ends_diff], 0) sdf_all = self.sdf_network(positions_all) sdf_start = sdf_all[:positions_starts.shape[0]] sdf_end_diff = sdf_all[positions_starts.shape[0]:] sdf_start_shift_left = sdf_start[1:] sdf_start_shift_left = torch.cat([sdf_start_shift_left, sdf_start[-1:]], 0) sdf_start_shift_left[diff_mask] = sdf_end_diff inv_s = self.deviation_network(torch.zeros([1, 3]))[:, :1].clip(1e-6, 1e6) # Single parameter inv_s = inv_s.expand(sdf_start.shape[0], 1) prev_cdf = torch.sigmoid(sdf_start * inv_s) next_cdf = torch.sigmoid(sdf_start_shift_left * inv_s) p = prev_cdf - next_cdf c = prev_cdf alpha = ((p + 1e-5) / (c + 1e-5)).view(-1).clip(0.0, 1.0) alpha = alpha.reshape(-1, 1) if ret_sdf: return alpha, sdf_start, sdf_start_shift_left else: return alpha with torch.no_grad(): patch_indices, t_starts_patch_center, t_ends_patch_center = ray_marching( rays_o_patch_center, rays_d_patch_center, t_min=near, t_max=far, grid=self.occupancy_grid, render_step_size=self.sampling_step_size, stratified=True, cone_angle=0.0, early_stop_eps=1e-8, alpha_fn=alpha_fn_patch_center, ) samples_per_ray = patch_indices.shape[0] / num_patch if patch_indices.shape[0] == 0: # all patch center rays are within the zero region of the occ grid. skip this iteration. return { "comp_normal": torch.zeros([num_patch, patch_H, patch_W, 3], device=rays_o_patch_center.device) } num_samples = patch_indices.shape[0] patch_indices = patch_indices.long() # compute the sampling distance on remaining rays t_starts_patch_all = t_starts_patch_center[:, None, None, :] * (rays_d_patch_center * marching_plane_normal).sum(-1, keepdim=True)[patch_indices][:, None, None, :] \ /(rays_d_patch_all * marching_plane_normal[:, None, None, :]).sum(-1, keepdim=True)[patch_indices] t_ends_patch_all = t_ends_patch_center[:, None, None, :] * (rays_d_patch_center * marching_plane_normal).sum(-1, keepdim=True)[patch_indices][:, None, None, :] \ /(rays_d_patch_all * marching_plane_normal[:, None, None, :]).sum(-1, keepdim=True)[patch_indices] t_starts_patch_center_shift_left = t_starts_patch_center[1:] t_starts_patch_center_shift_left = torch.cat([t_starts_patch_center_shift_left, t_starts_patch_center[-1:]], 0) diff_mask = ((t_ends_patch_center - t_starts_patch_center_shift_left) != 0)[..., 0] positions_starts_patch_all = rays_o_patch_all[patch_indices] + rays_d_patch_all[patch_indices] * t_starts_patch_all positions_ends_patch_all = rays_o_patch_all[patch_indices] + rays_d_patch_all[patch_indices] * t_ends_patch_all # (num_samples, patch_H, patch_W, 3) positions_ends_diff = positions_ends_patch_all[diff_mask] positions_all = torch.cat([positions_starts_patch_all, positions_ends_diff], 0) positions_all_flat = positions_all.reshape(-1, 3) sdf_all = self.sdf_network(positions_all_flat) sdf_all = sdf_all.reshape(*positions_all.shape[:-1], 1) sdf_starts_patch_all = sdf_all[:positions_starts_patch_all.shape[0]] sdf_end_diff = sdf_all[positions_starts_patch_all.shape[0]:] sdf_ends_patch_all = sdf_starts_patch_all[1:] sdf_ends_patch_all = torch.cat([sdf_ends_patch_all, sdf_starts_patch_all[-1:]], 0) sdf_ends_patch_all[diff_mask] = sdf_end_diff inv_s = self.deviation_network(torch.zeros([1, 3]))[:, :1].clip(1e-6, 1e6) # Single parameter prev_cdf = torch.sigmoid(sdf_starts_patch_all * inv_s) # (num_samples, patch_H, patch_W, 1) next_cdf = torch.sigmoid(sdf_ends_patch_all * inv_s) # (num_samples, patch_H, patch_W, 1) p = prev_cdf - next_cdf c = prev_cdf alpha = ((p + 1e-5) / (c + 1e-5)).clip(0.0, 1.0) # (num_samples, patch_H, patch_W, 1) weights_cuda = render_weight_from_alpha_patch_based(alpha.reshape(num_samples, patch_H*patch_W, 1), patch_indices) # (num_samples, patch_H, patch_W, 1) if mode == 'train': gradient_method = self.gradient_method elif mode == 'eval': gradient_method = val_gradient_method if gradient_method == "dfd": with torch.no_grad(): # distance between neighboring points on the same marching plane dist_x = torch.norm(positions_starts_patch_all[:, :, 1:, :] - positions_starts_patch_all[:, :, :-1, :], dim=-1, keepdim=True) # (num_samples, patch_H, patch_W-1, 1) dist_y = torch.norm(positions_starts_patch_all[:, 1:, :, :] - positions_starts_patch_all[:, :-1, :, :], dim=-1, keepdim=True) # (num_samples, patch_H-1, patch_W, 1) # directional derivatives along the ray direction # forward difference df_dt = (sdf_ends_patch_all - sdf_starts_patch_all) / (t_ends_patch_all - t_starts_patch_all) # (num_samples, patch_H, patch_W, 1) # directional derivatives along the image's x-direction # central difference df_dx = (sdf_starts_patch_all[:, :, 2:] - sdf_starts_patch_all[:, :, :-2]) / (dist_x[:, :, :-1] + dist_x[:, :, 1:] ) # (num_samples, patch_H, patch_W-2, 1) # directional derivatives along the image's y-direction # central difference df_dy = (sdf_starts_patch_all[:, 2:, :] - sdf_starts_patch_all[:, :-2, :]) / (dist_y[:, 1:, :] + dist_y[:, :-1, :]) # (num_samples, patch_H-2, patch_W, 1) # for points only have one-side neighbor point, # we use forward or backward difference correspondingly df_dx_left_boundary = (sdf_starts_patch_all[:, :, 1:2] - sdf_starts_patch_all[:, :, 0:1]) / dist_x[:, :, 0:1] # (num_samples, patch_H, 1) df_dx_right_boundary = (sdf_starts_patch_all[:, :, -1:] - sdf_starts_patch_all[:, :, -2:-1]) / dist_x[:, :, -1:] # (num_samples, patch_H, 1) df_dy_top_boundary = (sdf_starts_patch_all[:, 1:2, :] - sdf_starts_patch_all[:, 0:1, :]) / dist_y[:, 0:1, :] # (num_samples, 1, patch_W) df_dy_bottom_boundary = (sdf_starts_patch_all[:, -1:, :] - sdf_starts_patch_all[:, -2:-1, :]) / dist_y[:, -1:, :] # (num_samples, 1, patch_W) # concat the directional derivatives for boundary points and central points df_dx = torch.cat([df_dx_left_boundary, df_dx, df_dx_right_boundary], dim=2) # (num_samples, patch_H, patch_W, 1) df_dy = torch.cat([df_dy_top_boundary, df_dy, df_dy_bottom_boundary], dim=1) # (num_samples, patch_H, patch_W, 1) # concat the directional partial derivatives in three directions projected_gradients = torch.cat([df_dt, df_dx, df_dy], dim=-1) # (num_patches, patch_H, patch_W, 3) # recover the gradients from directional partial derivatives using the inverse of known directions V_inverse = V_inverse_patch_all[patch_indices] # (num_patches, patch_H, patch_W, 3, 3) gradients = (V_inverse @ projected_gradients[..., None])[..., 0] # (num_samples, patch_H, patch_W, 3) elif gradient_method == "ad": gradients = self.sdf_network.gradient(positions_starts_patch_all.reshape(-1, 3)).reshape(num_samples, patch_H, patch_W, 3) elif gradient_method == "fd": # 6-point finite difference self.fd_epsilon = 1e-3 positions_xn = positions_starts_patch_all + torch.tensor([[[[-self.fd_epsilon, 0, 0]]]], device=positions_starts_patch_all.device).expand( positions_starts_patch_all.shape) positions_xp = positions_starts_patch_all + torch.tensor([[[[self.fd_epsilon, 0, 0]]]], device=positions_starts_patch_all.device).expand( positions_starts_patch_all.shape) positions_yn = positions_starts_patch_all + torch.tensor([[[[0, -self.fd_epsilon, 0]]]], device=positions_starts_patch_all.device).expand( positions_starts_patch_all.shape) positions_yp = positions_starts_patch_all + torch.tensor([[[[0, self.fd_epsilon, 0]]]], device=positions_starts_patch_all.device).expand( positions_starts_patch_all.shape) positions_zn = positions_starts_patch_all + torch.tensor([[[[0, 0, -self.fd_epsilon]]]], device=positions_starts_patch_all.device).expand( positions_starts_patch_all.shape) positions_zp = positions_starts_patch_all + torch.tensor([[[[0, 0, self.fd_epsilon]]]], device=positions_starts_patch_all.device).expand( positions_starts_patch_all.shape) positions_concat = torch.cat( [positions_xn, positions_xp, positions_yn, positions_yp, positions_zn, positions_zp], 0).to( torch.float32).reshape(-1, 3) sdf_concat = self.sdf_network(positions_concat).reshape(-1, patch_H, patch_W, 1) num_samples = positions_starts_patch_all.shape[0] sdf_xn = sdf_concat[:num_samples].reshape(num_samples, patch_H, patch_W, 1) sdf_xp = sdf_concat[num_samples:2 * num_samples].reshape(num_samples, patch_H, patch_W, 1) sdf_yn = sdf_concat[2 * num_samples:3 * num_samples].reshape(num_samples, patch_H, patch_W, 1) sdf_yp = sdf_concat[3 * num_samples:4 * num_samples].reshape(num_samples, patch_H, patch_W, 1) sdf_zn = sdf_concat[4 * num_samples:5 * num_samples].reshape(num_samples, patch_H, patch_W, 1) sdf_zp = sdf_concat[5 * num_samples:].reshape(num_samples, patch_H, patch_W, 1) df_dx = (sdf_xp - sdf_xn) / (2 * self.fd_epsilon) df_dy = (sdf_yp - sdf_yn) / (2 * self.fd_epsilon) df_dz = (sdf_zp - sdf_zn) / (2 * self.fd_epsilon) gradients = torch.stack([df_dx, df_dy, df_dz], -1) weights_sum_cuda = accumulate_along_rays_patch_based(weights_cuda, patch_indices, n_patches=num_patch) # (num_samples, patch_H, patch_W, 1) weights_sum = weights_sum_cuda.reshape(num_patch, patch_H, patch_W, 1) comp_normals_cuda = accumulate_along_rays_patch_based(weights_cuda, patch_indices, values=gradients.reshape(num_samples,patch_H * patch_W, 3),n_patches=num_patch) # (num_samples, patch_H, patch_W, 3) comp_normal = comp_normals_cuda.reshape(num_patch, patch_H, patch_W, 3) inv_s = self.deviation_network(torch.zeros([1, 3]))[:, :1].clip(1e-6, 1e6) # Single parameter return { 's_val': 1/inv_s, 'weight_sum': weights_sum, 'gradients': gradients, "comp_normal": comp_normal, "samples_per_ray": samples_per_ray, } @torch.no_grad() def render_normal_pixel_based(self, rays_o, rays_d, near, far): def alpha_fn(t_starts, t_ends, ray_indices, ret_sdf=False): ray_indices = ray_indices.long() t_origins = rays_o[ray_indices] t_dirs = rays_d[ray_indices] positions_starts = t_origins + t_dirs * t_starts positions_ends = t_origins + t_dirs * t_ends t_starts_shift_left = t_starts[1:] # attach the last element of t_ends to the end of t_starts_shift_left t_starts_shift_left = torch.cat([t_starts_shift_left, t_starts[-1:]], 0) # compute the diff mask between t_ends and t_starts_shift_left diff_mask = ((t_ends - t_starts_shift_left) != 0).squeeze() # if the diff maks is empty, return positions_ends_diff = positions_ends[diff_mask].reshape(-1, 3) # ic(diff_mask.shape, positions_ends_diff.shape, positions_starts.shape) positions_all = torch.cat([positions_starts, positions_ends_diff], 0) sdf_all = self.sdf_network(positions_all) sdf_start = sdf_all[:positions_starts.shape[0]] sdf_end_diff = sdf_all[positions_starts.shape[0]:] sdf_start_shift_left = sdf_start[1:] sdf_start_shift_left = torch.cat([sdf_start_shift_left, sdf_start[-1:]], 0) sdf_start_shift_left[diff_mask] = sdf_end_diff inv_s = self.deviation_network(torch.zeros([1, 3]))[:, :1].clip(1e-6, 1e6) # Single parameter inv_s = inv_s.expand(sdf_start.shape[0], 1) prev_cdf = torch.sigmoid(sdf_start * inv_s) next_cdf = torch.sigmoid(sdf_start_shift_left * inv_s) p = prev_cdf - next_cdf c = prev_cdf alpha = ((p + 1e-5) / (c + 1e-5)).view(-1).clip(0.0, 1.0) alpha = alpha.reshape(-1, 1) if ret_sdf: return alpha, sdf_start, sdf_start_shift_left else: return alpha ray_indices, t_starts, t_ends = ray_marching( rays_o, rays_d, t_min=near.squeeze(), t_max=far.squeeze(), grid=self.occupancy_grid, render_step_size=self.sampling_step_size, stratified=True, cone_angle=0.0, alpha_thre=0.0, early_stop_eps=1e-3, alpha_fn=alpha_fn, ) alpha = alpha_fn(t_starts, t_ends, ray_indices) ray_indices = ray_indices.long() t_origins = rays_o[ray_indices] t_dirs = rays_d[ray_indices] midpoints = (t_starts + t_ends) / 2. positions = t_origins + t_dirs * midpoints gradients = self.sdf_network.gradient(positions).reshape(-1, 3) n_rays = rays_o.shape[0] weights = render_weight_from_alpha(alpha, ray_indices=ray_indices, n_rays=n_rays) # [n_samples, 1] comp_normal = accumulate_along_rays(weights, ray_indices, values=gradients, n_rays=n_rays) comp_depth = accumulate_along_rays(weights, ray_indices, values=midpoints, n_rays=n_rays) return comp_normal, comp_depth def extract_geometry(self, bound_min, bound_max, resolution, threshold=0.0): return extract_geometry(bound_min, bound_max, resolution=resolution, threshold=threshold, query_func=lambda pts: -self.sdf_network.sdf(pts)) ================================================ FILE: run_diligent.sh ================================================ for obj_name in buddha pot2 reading bear cow; do python exp_runner.py --conf config/diligent.conf --obj_name $obj_name done ================================================ FILE: run_own_object.sh ================================================ for obj_name in lion dog1 woman; do python exp_runner.py --conf config/own_objects.conf --obj_name $obj_name done ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/building.yml ================================================ name: Building Wheels on: [workflow_dispatch] jobs: wheel: runs-on: ${{ matrix.os }} environment: production strategy: fail-fast: false matrix: os: [ubuntu-18.04, windows-2019] python-version: ['3.7', '3.8', '3.9'] torch-version: [1.10.0, 1.11.0, 1.12.0, 1.13.0] cuda-version: ['cu102', 'cu113', 'cu116', 'cu117'] # os: [ubuntu-18.04] # python-version: ['3.9'] # torch-version: [1.10.0] # cuda-version: ['cu102'] exclude: - torch-version: 1.10.0 cuda-version: 'cu116' - torch-version: 1.10.0 cuda-version: 'cu117' - torch-version: 1.11.0 cuda-version: 'cu116' - torch-version: 1.11.0 cuda-version: 'cu117' - torch-version: 1.12.0 cuda-version: 'cu117' - torch-version: 1.13.0 cuda-version: 'cu102' - torch-version: 1.13.0 cuda-version: 'cu113' - os: windows-2019 torch-version: 1.11.0 cuda-version: 'cu102' - os: windows-2019 torch-version: 1.12.0 cuda-version: 'cu102' # - os: macos-10.15 # cuda-version: 'cu102' # - os: macos-10.15 # cuda-version: 'cu113' # - os: macos-10.15 # cuda-version: 'cu116' # - os: macos-10.15 # cuda-version: 'cu117' steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - name: Upgrade pip run: | pip install --upgrade setuptools pip install ninja - name: Free up disk space if: ${{ runner.os == 'Linux' }} run: | sudo rm -rf /usr/share/dotnet - name: Install CUDA ${{ matrix.cuda-version }} if: ${{ matrix.cuda-version != 'cpu' }} run: | bash .github/workflows/cuda/${{ matrix.cuda-version }}-${{ runner.os }}.sh - name: Install PyTorch ${{ matrix.torch-version }}+${{ matrix.cuda-version }} run: | pip install torch==${{ matrix.torch-version }} --extra-index-url https://download.pytorch.org/whl/${{ matrix.cuda-version }} python -c "import torch; print('PyTorch:', torch.__version__)" python -c "import torch; print('CUDA:', torch.version.cuda)" python -c "import torch; print('CUDA Available:', torch.cuda.is_available())" - name: Patch PyTorch static constexpr on Windows if: ${{ runner.os == 'Windows' }} run: | Torch_DIR=`python -c 'import os; import torch; print(os.path.dirname(torch.__file__))'` sed -i '31,38c\ TORCH_API void lazy_init_num_threads();' ${Torch_DIR}/include/ATen/Parallel.h shell: bash - name: Set version if: ${{ runner.os != 'macOS' }} run: | VERSION=`sed -n 's/^__version__ = "\(.*\)"/\1/p' nerfacc/version.py` TORCH_VERSION=`echo "pt${{ matrix.torch-version }}" | sed "s/..$//" | sed "s/\.//g"` CUDA_VERSION=`echo ${{ matrix.cuda-version }}` echo "New version name: $VERSION+$TORCH_VERSION$CUDA_VERSION" sed -i "s/$VERSION/$VERSION+$TORCH_VERSION$CUDA_VERSION/" nerfacc/version.py shell: bash - name: Install main package for CPU if: ${{ matrix.cuda-version == 'cpu' }} run: | FORCE_ONLY_CPU=1 pip install -e . shell: bash - name: Install main package for GPU if: ${{ matrix.cuda-version != 'cpu' }} run: | source .github/workflows/cuda/${{ matrix.cuda-version }}-${{ runner.os }}-env.sh pip install . shell: bash - name: Test installation run: | python -c "import nerfacc; print('nerfacc:', nerfacc.__version__)" - name: Build wheel run: | pip install wheel source .github/workflows/cuda/${{ matrix.cuda-version }}-${{ runner.os }}-env.sh python setup.py bdist_wheel --dist-dir=dist shell: bash - name: Configure AWS uses: aws-actions/configure-aws-credentials@v1 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws-region: us-west-2 - name: Upload wheel run: | aws s3 sync dist s3://nerfacc-bucket/whl/torch-${{ matrix.torch-version }}_${{ matrix.cuda-version }} --grants read=uri=http://acs.amazonaws.com/groups/global/AllUsers update_aws_listing: needs: [wheel] runs-on: ubuntu-latest environment: production steps: - uses: actions/checkout@v2 - name: Set up Python uses: actions/setup-python@v2 with: python-version: 3.9 - name: Upgrade pip run: | pip install --upgrade setuptools pip install boto3 - name: Configure AWS uses: aws-actions/configure-aws-credentials@v1 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws-region: us-west-2 - name: Update AWS listing run: | python scripts/run_aws_listing.py \ --access_key_id=${{ secrets.AWS_ACCESS_KEY_ID }} \ --secret_access_key=${{ secrets.AWS_SECRET_ACCESS_KEY }} \ --bucket="nerfacc-bucket" \ --region="us-west-2" ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/code_checks.yml ================================================ name: Core Tests. on: push: branches: [master] pull_request: branches: [master] permissions: contents: read jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - name: Set up Python 3.8.12 uses: actions/setup-python@v4 with: python-version: "3.8.12" - name: Install dependencies run: | pip install isort==5.10.1 black[jupyter]==22.3.0 - name: Run isort run: isort docs/ nerfacc/ scripts/ examples/ tests/ --profile black --skip examples/pycolmap --line-length 80 --check - name: Run Black run: black docs/ nerfacc/ scripts/ examples/ tests/ --exclude examples/pycolmap --line-length 80 --check # - name: Python Pylint # run: | # pylint nerfacc/ tests/ scripts/ examples/ ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu101-Linux-env.sh ================================================ #!/bin/bash CUDA_HOME=/usr/local/cuda-10.1 LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} PATH=${CUDA_HOME}/bin:${PATH} export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5" ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu101-Linux.sh ================================================ #!/bin/bash OS=ubuntu1804 wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 wget -nv https://developer.download.nvidia.com/compute/cuda/10.1/Prod/local_installers/cuda-repo-${OS}-10-1-local-10.1.243-418.87.00_1.0-1_amd64.deb sudo dpkg -i cuda-repo-${OS}-10-1-local-10.1.243-418.87.00_1.0-1_amd64.deb sudo apt-key add /var/cuda-repo-10-1-local-10.1.243-418.87.00/7fa2af80.pub sudo apt-get -qq update sudo apt install -y cuda-nvcc-10-1 cuda-libraries-dev-10-1 sudo apt clean rm -f https://developer.download.nvidia.com/compute/cuda/10.1/Prod/local_installers/cuda-repo-${OS}-10-1-local-10.1.243-418.87.00_1.0-1_amd64.deb ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu101-Windows-env.sh ================================================ #!/bin/bash CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v10.1 PATH=${CUDA_HOME}/bin:$PATH PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5" ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu101-Windows.sh ================================================ #!/bin/bash # Install NVIDIA drivers, see: # https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip" 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" export CUDA_SHORT=10.1 export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}/Prod/local_installers/ export CUDA_FILE=cuda_${CUDA_SHORT}.243_426.00_win10.exe # Install CUDA: curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}" echo "" echo "Installing from ${CUDA_FILE}..." PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow" echo "Done!" rm -f "${CUDA_FILE}" ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu102-Linux-env.sh ================================================ #!/bin/bash CUDA_HOME=/usr/local/cuda-10.2 LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} PATH=${CUDA_HOME}/bin:${PATH} export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5" ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu102-Linux.sh ================================================ #!/bin/bash OS=ubuntu1804 wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 wget -nv https://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda-repo-${OS}-10-2-local-10.2.89-440.33.01_1.0-1_amd64.deb sudo dpkg -i cuda-repo-${OS}-10-2-local-10.2.89-440.33.01_1.0-1_amd64.deb sudo apt-key add /var/cuda-repo-10-2-local-10.2.89-440.33.01/7fa2af80.pub sudo apt-get -qq update sudo apt install -y cuda-nvcc-10-2 cuda-libraries-dev-10-2 sudo apt clean rm -f https://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda-repo-${OS}-10-2-local-10.2.89-440.33.01_1.0-1_amd64.deb ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu102-Windows-env.sh ================================================ #!/bin/bash CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v10.2 PATH=${CUDA_HOME}/bin:$PATH PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5" ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu102-Windows.sh ================================================ #!/bin/bash # Install NVIDIA drivers, see: # https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip" 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" export CUDA_SHORT=10.2 export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}/Prod/local_installers export CUDA_FILE=cuda_${CUDA_SHORT}.89_441.22_win10.exe # Install CUDA: curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}" echo "" echo "Installing from ${CUDA_FILE}..." PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow" echo "Done!" rm -f "${CUDA_FILE}" ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu111-Linux-env.sh ================================================ #!/bin/bash CUDA_HOME=/usr/local/cuda-11.1 LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} PATH=${CUDA_HOME}/bin:${PATH} export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu111-Linux.sh ================================================ #!/bin/bash OS=ubuntu1804 wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 wget -nv https://developer.download.nvidia.com/compute/cuda/11.1.1/local_installers/cuda-repo-${OS}-11-1-local_11.1.1-455.32.00-1_amd64.deb sudo dpkg -i cuda-repo-${OS}-11-1-local_11.1.1-455.32.00-1_amd64.deb sudo apt-key add /var/cuda-repo-${OS}-11-1-local/7fa2af80.pub sudo apt-get -qq update sudo apt install -y cuda-nvcc-11-1 cuda-libraries-dev-11-1 sudo apt clean rm -f https://developer.download.nvidia.com/compute/cuda/11.1.1/local_installers/cuda-repo-${OS}-11-1-local_11.1.1-455.32.00-1_amd64.deb ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu111-Windows-env.sh ================================================ #!/bin/bash CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.1 PATH=${CUDA_HOME}/bin:$PATH PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="6.0+PTX" ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu111-Windows.sh ================================================ #!/bin/bash # Install NVIDIA drivers, see: # https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip" 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" export CUDA_SHORT=11.1 export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.1/local_installers export CUDA_FILE=cuda_${CUDA_SHORT}.1_456.81_win10.exe # Install CUDA: curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}" echo "" echo "Installing from ${CUDA_FILE}..." PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow" echo "Done!" rm -f "${CUDA_FILE}" ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu113-Linux-env.sh ================================================ #!/bin/bash CUDA_HOME=/usr/local/cuda-11.3 LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} PATH=${CUDA_HOME}/bin:${PATH} export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu113-Linux.sh ================================================ #!/bin/bash OS=ubuntu1804 wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 wget -nv https://developer.download.nvidia.com/compute/cuda/11.3.0/local_installers/cuda-repo-${OS}-11-3-local_11.3.0-465.19.01-1_amd64.deb sudo dpkg -i cuda-repo-${OS}-11-3-local_11.3.0-465.19.01-1_amd64.deb sudo apt-key add /var/cuda-repo-${OS}-11-3-local/7fa2af80.pub sudo apt-get -qq update sudo apt install -y cuda-nvcc-11-3 cuda-libraries-dev-11-3 sudo apt clean rm -f https://developer.download.nvidia.com/compute/cuda/11.3.0/local_installers/cuda-repo-${OS}-11-3-local_11.3.0-465.19.01-1_amd64.deb ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu113-Windows-env.sh ================================================ #!/bin/bash CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.3 PATH=${CUDA_HOME}/bin:$PATH PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="6.0+PTX" ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu113-Windows.sh ================================================ #!/bin/bash # Install NVIDIA drivers, see: # https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip" 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" export CUDA_SHORT=11.3 export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers export CUDA_FILE=cuda_${CUDA_SHORT}.0_465.89_win10.exe # Install CUDA: curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}" echo "" echo "Installing from ${CUDA_FILE}..." PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} thrust_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow" echo "Done!" rm -f "${CUDA_FILE}" ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu115-Linux-env.sh ================================================ #!/bin/bash CUDA_HOME=/usr/local/cuda-11.5 LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} PATH=${CUDA_HOME}/bin:${PATH} export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu115-Linux.sh ================================================ #!/bin/bash OS=ubuntu1804 wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 wget -nv https://developer.download.nvidia.com/compute/cuda/11.5.2/local_installers/cuda-repo-${OS}-11-5-local_11.5.2-495.29.05-1_amd64.deb sudo dpkg -i cuda-repo-${OS}-11-5-local_11.5.2-495.29.05-1_amd64.deb sudo apt-key add /var/cuda-repo-${OS}-11-5-local/7fa2af80.pub sudo apt-get -qq update sudo apt install -y cuda-nvcc-11-5 cuda-libraries-dev-11-5 sudo apt clean rm -f https://developer.download.nvidia.com/compute/cuda/11.5.2/local_installers/cuda-repo-${OS}-11-5-local_11.5.2-495.29.05-1_amd64.deb ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu115-Windows-env.sh ================================================ #!/bin/bash CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.3 PATH=${CUDA_HOME}/bin:$PATH PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="6.0+PTX" ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu115-Windows.sh ================================================ #!/bin/bash # TODO We currently use CUDA 11.3 to build CUDA 11.5 Windows wheels # Install NVIDIA drivers, see: # https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip" 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" export CUDA_SHORT=11.3 export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers export CUDA_FILE=cuda_${CUDA_SHORT}.0_465.89_win10.exe # Install CUDA: curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}" echo "" echo "Installing from ${CUDA_FILE}..." PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} thrust_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow" echo "Done!" rm -f "${CUDA_FILE}" ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu116-Linux-env.sh ================================================ #!/bin/bash CUDA_HOME=/usr/local/cuda-11.6 LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} PATH=${CUDA_HOME}/bin:${PATH} export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu116-Linux.sh ================================================ #!/bin/bash OS=ubuntu1804 wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 wget -nv https://developer.download.nvidia.com/compute/cuda/11.6.2/local_installers/cuda-repo-${OS}-11-6-local_11.6.2-510.47.03-1_amd64.deb sudo dpkg -i cuda-repo-${OS}-11-6-local_11.6.2-510.47.03-1_amd64.deb sudo apt-key add /var/cuda-repo-${OS}-11-6-local/7fa2af80.pub sudo apt-get -qq update sudo apt install -y cuda-nvcc-11-6 cuda-libraries-dev-11-6 sudo apt clean rm -f https://developer.download.nvidia.com/compute/cuda/11.5.2/local_installers/cuda-repo-${OS}-11-6-local_11.6.2-510.47.03-1_amd64.deb ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu116-Windows-env.sh ================================================ #!/bin/bash CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.3 PATH=${CUDA_HOME}/bin:$PATH PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="6.0+PTX" ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu116-Windows.sh ================================================ #!/bin/bash # TODO We currently use CUDA 11.3 to build CUDA 11.6 Windows wheels # Install NVIDIA drivers, see: # https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip" 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" export CUDA_SHORT=11.3 export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers export CUDA_FILE=cuda_${CUDA_SHORT}.0_465.89_win10.exe # Install CUDA: curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}" echo "" echo "Installing from ${CUDA_FILE}..." PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} thrust_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow" echo "Done!" rm -f "${CUDA_FILE}" ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu117-Linux-env.sh ================================================ #!/bin/bash CUDA_HOME=/usr/local/cuda-11.7 LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} PATH=${CUDA_HOME}/bin:${PATH} export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu117-Linux.sh ================================================ #!/bin/bash OS=ubuntu1804 wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 wget -nv https://developer.download.nvidia.com/compute/cuda/11.7.1/local_installers/cuda-repo-${OS}-11-7-local_11.7.1-515.65.01-1_amd64.deb sudo dpkg -i cuda-repo-${OS}-11-7-local_11.7.1-515.65.01-1_amd64.deb sudo cp /var/cuda-repo-${OS}-11-7-local/cuda-*-keyring.gpg /usr/share/keyrings/ sudo apt-get -qq update sudo apt install -y cuda-nvcc-11-7 cuda-libraries-dev-11-7 sudo apt clean rm -f https://developer.download.nvidia.com/compute/cuda/11.7.1/local_installers/cuda-repo-${OS}-11-7-local_11.7.1-515.65.01-1_amd64.deb ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu117-Windows-env.sh ================================================ #!/bin/bash CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.3 PATH=${CUDA_HOME}/bin:$PATH PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH export FORCE_CUDA=1 export TORCH_CUDA_ARCH_LIST="6.0+PTX" ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu117-Windows.sh ================================================ #!/bin/bash # TODO We currently use CUDA 11.3 to build CUDA 11.7 Windows wheels # Install NVIDIA drivers, see: # https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip" 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" export CUDA_SHORT=11.3 export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers export CUDA_FILE=cuda_${CUDA_SHORT}.0_465.89_win10.exe # Install CUDA: curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}" echo "" echo "Installing from ${CUDA_FILE}..." PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} thrust_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow" echo "Done!" rm -f "${CUDA_FILE}" ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/publish.yml ================================================ # This workflows will upload a Python Package using twine when a release is created # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries name: Upload Python Package on: release: types: [created] branches: [master] jobs: deploy: runs-on: ubuntu-latest environment: production steps: - uses: actions/checkout@v2 - name: Set up Python uses: actions/setup-python@v1 with: python-version: '3.7' - name: Install dependencies run: | python -m pip install build twine - name: Strip unsupported tags in README run: | sed -i '//,//d' README.md - name: Build and publish env: PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} run: | BUILD_NO_CUDA=1 python -m build twine upload --username __token__ --password $PYPI_TOKEN dist/* ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.gitignore ================================================ # Visual Studio Code configs. .vscode/ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ # lib/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .DS_Store # Direnv config. .envrc # line_profiler *.lprof # vscode .vsocde benchmarks/ outputs/ ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.gitmodules ================================================ [submodule "examples/pycolmap"] path = examples/pycolmap url = https://github.com/rmbrualla/pycolmap.git ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.pre-commit-config.yaml ================================================ repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v2.3.0 hooks: - id: end-of-file-fixer - id: trailing-whitespace - id: check-yaml - id: check-merge-conflict - id: requirements-txt-fixer - repo: https://github.com/psf/black rev: 22.10.0 hooks: - id: black language_version: python3.8.12 args: # arguments to configure black - --line-length=80 - repo: https://github.com/pycqa/isort rev: 5.10.1 hooks: - id: isort ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.readthedocs.yaml ================================================ version: 2 build: os: ubuntu-20.04 tools: python: "3.9" sphinx: fail_on_warning: true configuration: docs/source/conf.py python: install: # Equivalent to 'pip install .' - method: pip path: . # Equivalent to 'pip install -r docs/requirements.txt' - requirements: docs/requirements.txt ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/CMakeLists.txt ================================================ # cmake_minimum_required(VERSION 3.3) # project(nerfacc LANGUAGES CXX CUDA) # find_package(pybind11 REQUIRED) # find_package(Torch REQUIRED) # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") # set(SOURCE_DIR nerfacc/cuda/csrc) # set(INCLUDE_DIR nerfacc/cuda/csrc/include) # file(GLOB SOURCES ${SOURCE_DIR}/*.cu) # pybind11_add_module(${PROJECT_NAME} SHARED ${SOURCES}) # target_link_libraries(${PROJECT_NAME} PRIVATE "${TORCH_LIBRARIES}") # target_include_directories(${PROJECT_NAME} PRIVATE "${INCLUDE_DIR}") # # message(STATUS "CUDA enabled") # # set( CMAKE_CUDA_STANDARD 14 ) # # set( CMAKE_CUDA_STANDARD_REQUIRED ON) # # find_package(pybind11 REQUIRED) # # # find_package(Python3 REQUIRED COMPONENTS Development) # # # target_link_libraries(${PROJECT_NAME} PRIVATE Python3::Python) # # find_package(Torch REQUIRED) # # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") # # target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIBRARIES}) # # set(CSRC nerfacc/cuda/csrc) # # file(GLOB_RECURSE ALL_SOURCES ${ALL_SOURCES} ${CSRC}/*.cu) # # file(GLOB_RECURSE ALL_HEADERS ${CSRC}/include/*.h) # # add_library(${PROJECT_NAME} SHARED ${ALL_SOURCES}) # # target_include_directories(${PROJECT_NAME} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}") # # set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0") # # message("-- CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}") # # message("-- CMAKE_CXX_FLAGS_DEBUG: ${CMAKE_CXX_FLAGS_DEBUG}") # # message("-- CMAKE_CXX_FLAGS_RELEASE: ${CMAKE_CXX_FLAGS_RELEASE}") # # set_target_properties(${PROJECT_NAME} PROPERTIES # # EXPORT_NAME nerfacc # # INSTALL_RPATH ${TORCH_INSTALL_PREFIX}/lib) # # Cmake creates *.dylib by default, but python expects *.so by default # # if (APPLE) # # set_property(TARGET ${PROJECT_NAME} PROPERTY SUFFIX .so) # # endif() ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/LICENSE ================================================ MIT License Copyright (c) 2022 Ruilong Li Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/MANIFEST.in ================================================ include nerfacc/cuda/csrc/include/* include nerfacc/cuda/csrc/* ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/README.md ================================================

nerfacc logo

[![Core Tests.](https://github.com/KAIR-BAIR/nerfacc/actions/workflows/code_checks.yml/badge.svg)](https://github.com/KAIR-BAIR/nerfacc/actions/workflows/code_checks.yml) [![Documentation Status](https://readthedocs.com/projects/plenoptix-nerfacc/badge/?version=latest)](https://www.nerfacc.com/en/latest/?badge=latest) [![Downloads](https://pepy.tech/badge/nerfacc)](https://pepy.tech/project/nerfacc) https://www.nerfacc.com/ NerfAcc is a PyTorch Nerf acceleration toolbox for both training and inference. It focuses on efficient volumetric rendering of radiance fields, which is universal and plug-and-play for most of the NeRFs. Using NerfAcc, - The `vanilla NeRF` model with 8-layer MLPs can be trained to *better quality* (+~0.5 PNSR) in *1 hour* rather than *days* as in the paper. - The `Instant-NGP NeRF` model can be trained to *equal quality* in *4.5 minutes*, comparing to the official pure-CUDA implementation. - The `D-NeRF` model for *dynamic* objects can also be trained in *1 hour* rather than *2 days* as in the paper, and with *better quality* (+~2.5 PSNR). - Both *bounded* and *unbounded* scenes are supported. **And it is a pure Python interface with flexible APIs!** ## Installation **Dependence**: Please install [Pytorch](https://pytorch.org/get-started/locally/) first. The easist way is to install from PyPI. In this way it will build the CUDA code **on the first run** (JIT). ``` pip install nerfacc ``` Or install from source. In this way it will build the CUDA code during installation. ``` pip install git+https://github.com/KAIR-BAIR/nerfacc.git ``` We also provide pre-built wheels covering major combinations of Pytorch + CUDA supported by [official Pytorch](https://pytorch.org/get-started/previous-versions/). ``` # e.g., torch 1.13.0 + cu117 pip install nerfacc -f https://nerfacc-bucket.s3.us-west-2.amazonaws.com/whl/torch-1.13.0_cu117.html ``` | Windows & Linux | `cu102` | `cu113` | `cu116` | `cu117` | |-----------------|---------|---------|---------|---------| | torch 1.10.0 | ✅ | ✅ | | | | torch 1.11.0 | ✅* | ✅ | | | | torch 1.12.0 | ✅* | ✅ | ✅ | | | torch 1.13.0 | | | ✅ | ✅ | \* Pytorch does not support Windows pre-built wheels for those combinations thus we do not support as well. ## Usage The idea of NerfAcc is to perform efficient ray marching and volumetric rendering. So NerfAcc can work with any user-defined radiance field. To plug the NerfAcc rendering pipeline into your code and enjoy the acceleration, you only need to define two functions with your radiance field. - `sigma_fn`: Compute density at each sample. It will be used by `nerfacc.ray_marching()` to skip the empty and occluded space during ray marching, which is where the major speedup comes from. - `rgb_sigma_fn`: Compute color and density at each sample. It will be used by `nerfacc.rendering()` to conduct differentiable volumetric rendering. This function will receive gradients to update your network. A simple example is like this: ``` python import torch from torch import Tensor import nerfacc radiance_field = ... # network: a NeRF model rays_o: Tensor = ... # ray origins. (n_rays, 3) rays_d: Tensor = ... # ray normalized directions. (n_rays, 3) optimizer = ... # optimizer def sigma_fn( t_starts: Tensor, t_ends:Tensor, ray_indices: Tensor ) -> Tensor: """ Query density values from a user-defined radiance field. :params t_starts: Start of the sample interval along the ray. (n_samples, 1). :params t_ends: End of the sample interval along the ray. (n_samples, 1). :params ray_indices: Ray indices that each sample belongs to. (n_samples,). :returns The post-activation density values. (n_samples, 1). """ t_origins = rays_o[ray_indices] # (n_samples, 3) t_dirs = rays_d[ray_indices] # (n_samples, 3) positions = t_origins + t_dirs * (t_starts + t_ends) / 2.0 sigmas = radiance_field.query_density(positions) return sigmas # (n_samples, 1) def rgb_sigma_fn( t_starts: Tensor, t_ends: Tensor, ray_indices: Tensor ) -> Tuple[Tensor, Tensor]: """ Query rgb and density values from a user-defined radiance field. :params t_starts: Start of the sample interval along the ray. (n_samples, 1). :params t_ends: End of the sample interval along the ray. (n_samples, 1). :params ray_indices: Ray indices that each sample belongs to. (n_samples,). :returns The post-activation rgb and density values. (n_samples, 3), (n_samples, 1). """ t_origins = rays_o[ray_indices] # (n_samples, 3) t_dirs = rays_d[ray_indices] # (n_samples, 3) positions = t_origins + t_dirs * (t_starts + t_ends) / 2.0 rgbs, sigmas = radiance_field(positions, condition=t_dirs) return rgbs, sigmas # (n_samples, 3), (n_samples, 1) # Efficient Raymarching: Skip empty and occluded space, pack samples from all rays. # ray_indices: (n_samples,). t_starts: (n_samples, 1). t_ends: (n_samples, 1). with torch.no_grad(): ray_indices, t_starts, t_ends = nerfacc.ray_marching( rays_o, rays_d, sigma_fn=sigma_fn, near_plane=0.2, far_plane=1.0, early_stop_eps=1e-4, alpha_thre=1e-2, ) # Differentiable Volumetric Rendering. # colors: (n_rays, 3). opaicity: (n_rays, 1). depth: (n_rays, 1). color, opacity, depth = nerfacc.rendering( t_starts, t_ends, ray_indices, n_rays=rays_o.shape[0], rgb_sigma_fn=rgb_sigma_fn ) # Optimize: Both the network and rays will receive gradients optimizer.zero_grad() loss = F.mse_loss(color, color_gt) loss.backward() optimizer.step() ``` ## Examples: Before running those example scripts, please check the script about which dataset it is needed, and download the dataset first. ```bash # clone the repo with submodules. git clone --recursive git://github.com/KAIR-BAIR/nerfacc/ ``` ``` bash # Instant-NGP NeRF in 4.5 minutes with reproduced performance! # See results at here: https://www.nerfacc.com/en/latest/examples/ngp.html python examples/train_ngp_nerf.py --train_split train --scene lego ``` ``` bash # Vanilla MLP NeRF in 1 hour with better performance! # See results at here: https://www.nerfacc.com/en/latest/examples/vanilla.html python examples/train_mlp_nerf.py --train_split train --scene lego ``` ```bash # D-NeRF for Dynamic objects in 1 hour with better performance! # See results at here: https://www.nerfacc.com/en/latest/examples/dnerf.html python examples/train_mlp_dnerf.py --train_split train --scene lego ``` ```bash # Instant-NGP on unbounded scenes in 20 minutes! # See results at here: https://www.nerfacc.com/en/latest/examples/unbounded.html python examples/train_ngp_nerf.py --train_split train --scene garden --auto_aabb --unbounded --cone_angle=0.004 ``` Used by: - [nerfstudio](https://github.com/nerfstudio-project/nerfstudio): A collaboration friendly studio for NeRFs. - [instant-nsr-pl](https://github.com/bennyguo/instant-nsr-pl): NeuS in 10 minutes. ## Common Installation Issues
ImportError: .../csrc.so: undefined symbol If you are installing a pre-built wheel, make sure the Pytorch and CUDA version matchs with the nerfacc version (nerfacc.__version__).
## Citation ```bibtex @article{li2022nerfacc, title={NerfAcc: A General NeRF Accleration Toolbox.}, author={Li, Ruilong and Tancik, Matthew and Kanazawa, Angjoo}, journal={arXiv preprint arXiv:2210.04847}, year={2022} } ``` ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/Makefile ================================================ # Minimal makefile for Sphinx documentation # # You can set these variables from the command line, and also # from the environment for the first two. SPHINXOPTS ?= SPHINXBUILD ?= sphinx-build SOURCEDIR = source BUILDDIR = build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/requirements.txt ================================================ pytorch_sphinx_theme @ git+https://github.com/liruilong940607/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme sphinx==5.2.1 sphinx-copybutton==0.5.0 sphinx-design==0.2.0 ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/_static/css/readthedocs.css ================================================ .header-logo { background-image: url("../images/logo4x.png"); background-size: 156px 35px; height: 35px; width: 156px; } code { word-break: normal; } ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.accumulate_along_rays.rst ================================================ nerfacc.accumulate\_along\_rays =============================== .. currentmodule:: nerfacc .. autofunction:: accumulate_along_rays ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.pack_data.rst ================================================ nerfacc.pack\_data ================== .. currentmodule:: nerfacc .. autofunction:: pack_data ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.ray_aabb_intersect.rst ================================================ nerfacc.ray\_aabb\_intersect ============================ .. currentmodule:: nerfacc .. autofunction:: ray_aabb_intersect ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.ray_resampling.rst ================================================ nerfacc.ray\_resampling ======================= .. currentmodule:: nerfacc .. autofunction:: ray_resampling ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.render_transmittance_from_alpha.rst ================================================ nerfacc.render\_transmittance\_from\_alpha ========================================== .. currentmodule:: nerfacc .. autofunction:: render_transmittance_from_alpha ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.render_transmittance_from_density.rst ================================================ nerfacc.render\_transmittance\_from\_density ============================================ .. currentmodule:: nerfacc .. autofunction:: render_transmittance_from_density ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.render_visibility.rst ================================================ nerfacc.render\_visibility ========================== .. currentmodule:: nerfacc .. autofunction:: render_visibility ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.render_weight_from_alpha.rst ================================================ nerfacc.render\_weight\_from\_alpha =================================== .. currentmodule:: nerfacc .. autofunction:: render_weight_from_alpha ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.render_weight_from_density.rst ================================================ nerfacc.render\_weight\_from\_density ===================================== .. currentmodule:: nerfacc .. autofunction:: render_weight_from_density ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.unpack_data.rst ================================================ nerfacc.unpack\_data ==================== .. currentmodule:: nerfacc .. autofunction:: unpack_data ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.unpack_info.rst ================================================ nerfacc.unpack\_info ==================== .. currentmodule:: nerfacc .. autofunction:: unpack_info ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/grid.rst ================================================ .. _`Occupancy Grid`: Occupancy Grid =================================== .. currentmodule:: nerfacc .. autoclass:: ContractionType :members: .. autoclass:: Grid :members: .. autoclass:: OccupancyGrid :members: ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/rendering.rst ================================================ Volumetric Rendering =================================== In `nerfacc`, the volumetric rendering pipeline is broken down into 2 steps: 1. **Raymarching**: This is the process of shooting a ray through the scene and generate samples along the way. To perform efficient volumetric rendering, here we aim at skipping as many areas as possible. The emtpy space is skipped by using the cached occupancy grid (see :class:`nerfacc.OccupancyGrid`), and the invisible space is skipped by checking the transmittance of the ray while marching. Almost in all cases, those skipping won't result in a noticeable loss of quality as they would contribute very little to the final rendered image. But they will bring a significant speedup. 2. **Rendering**: This is the process of accumulating samples along the rays into final image. In this step we also need to query the attributes (a.k.a. color and density) of those samples generated by raymarching. Early stoping is supported in this step. | .. currentmodule:: nerfacc .. autofunction:: ray_marching .. autofunction:: rendering ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/utils.rst ================================================ Utils =================================== .. currentmodule:: nerfacc .. autosummary:: :nosignatures: :toctree: generated/ ray_aabb_intersect unpack_info accumulate_along_rays render_transmittance_from_density render_transmittance_from_alpha render_weight_from_density render_weight_from_alpha render_visibility ray_resampling pack_data unpack_data ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/conf.py ================================================ import pytorch_sphinx_theme __version__ = None exec(open("../../nerfacc/version.py", "r").read()) # -- Project information project = "nerfacc" copyright = "2022, Ruilong" author = "Ruilong" release = __version__ # -- General configuration extensions = [ "sphinx.ext.napoleon", "sphinx.ext.duration", "sphinx.ext.doctest", "sphinx.ext.autodoc", "sphinx.ext.autosummary", "sphinx.ext.intersphinx", ] intersphinx_mapping = { "python": ("https://docs.python.org/3/", None), "sphinx": ("https://www.sphinx-doc.org/en/master/", None), } intersphinx_disabled_domains = ["std"] templates_path = ["_templates"] # -- Options for HTML output # html_theme = "furo" html_theme = "pytorch_sphinx_theme" html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()] html_static_path = ["_static"] html_css_files = ["css/readthedocs.css"] # Ignore >>> when copying code copybutton_prompt_text = r">>> |\.\.\. " copybutton_prompt_is_regexp = True # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. html_theme_options = { # The target url that the logo directs to. Unset to do nothing "logo_url": "https://www.nerfacc.com/en/latest/index.html", # "menu" is a list of dictionaries where you can specify the content and the # behavior of each item in the menu. Each item can either be a link or a # dropdown menu containing a list of links. "menu": [ # A link {"name": "GitHub", "url": "https://github.com/KAIR-BAIR/nerfacc"}, # A dropdown menu # { # "name": "Projects", # "children": [ # # A vanilla dropdown item # { # "name": "nerfstudio", # "url": "https://docs.nerf.studio/", # "description": "The all-in-one repo for NeRFs", # }, # ], # # Optional, determining whether this dropdown menu will always be # # highlighted. # # "active": True, # }, ], } # html_theme_options = { # "canonical_url": "", # "analytics_id": "", # "logo_only": False, # "display_version": True, # "prev_next_buttons_location": "bottom", # "style_external_links": False, # # Toc options # "collapse_navigation": True, # "sticky_navigation": True, # "navigation_depth": 4, # "includehidden": True, # "titles_only": False # } # -- Options for EPUB output epub_show_urls = "footnote" # typehints autodoc_typehints = "description" ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/examples/dnerf.rst ================================================ Dynamic Scene ==================== See code `examples/train_mlp_dnerf.py` at our `github repository`_ for details. Benchmarks ------------ *updated on 2022-10-08* Here we trained a 8-layer-MLP for the radiance field and a 4-layer-MLP for the warping field, (similar to the T-Nerf model in the `D-Nerf`_ paper) on the `D-Nerf dataset`_. We used train split for training and test split for evaluation. Our experiments are conducted on a single NVIDIA TITAN RTX GPU. The training memory footprint is about 11GB. .. note:: The :ref:`Occupancy Grid` used in this example is shared by all the frames. In other words, instead of using it to indicate the opacity of an area at a single timestamp, Here we use it to indicate the `maximum` opacity at this area `over all the timestamps`. It is not optimal but still makes the rendering very efficient. +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+ | PSNR | bouncing | hell | hook | jumping | lego | mutant | standup | trex | MEAN | | | balls | warrior | | jacks | | | | | | +======================+==========+=========+=======+=========+=======+========+=========+=======+=======+ | D-Nerf (~ days) | 32.80 | 25.02 | 29.25 | 32.80 | 21.64 | 31.29 | 32.79 | 31.75 | 29.67 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+ | Ours (~ 1 hr) | 39.49 | 25.58 | 31.86 | 32.73 | 24.32 | 35.55 | 35.90 | 32.33 | 32.22 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+ | Ours (Training time)| 37min | 52min | 69min | 64min | 44min | 79min | 79min | 39min | 58min | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+ .. _`D-Nerf`: https://arxiv.org/abs/2011.13961 .. _`D-Nerf dataset`: https://www.dropbox.com/s/0bf6fl0ye2vz3vr/data.zip?dl=0 .. _`github repository`: https://github.com/KAIR-BAIR/nerfacc/tree/76c0f9817da4c9c8b5ccf827eb069ee2ce854b75 ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/examples/ngp.rst ================================================ .. _`Instant-NGP Example`: Instant-NGP ==================== See code `examples/train_ngp_nerf.py` at our `github repository`_ for details. Benchmarks ------------ *updated on 2022-10-12* Here we trained a `Instant-NGP Nerf`_ model on the `Nerf-Synthetic dataset`_. We follow the same settings with the Instant-NGP paper, which uses train split for training and test split for evaluation. All experiments are conducted on a single NVIDIA TITAN RTX GPU. The training memory footprint is about 3GB. .. note:: The Instant-NGP paper makes use of the alpha channel in the images to apply random background augmentation during training. For fair comparision, we rerun their code with a constant white background during both training and testing. Also it is worth to mention that we didn't strictly follow the training receipe in the Instant-NGP paper, such as the learning rate schedule etc, as the purpose of this benchmark is to showcase instead of reproducing the paper. +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ | PSNR | Lego | Mic |Materials| Chair |Hotdog | Ficus | Drums | Ship | MEAN | | | | | | | | | | | | +=======================+=======+=======+=========+=======+=======+=======+=======+=======+=======+ |Instant-NGP 35k steps | 35.87 | 36.22 | 29.08 | 35.10 | 37.48 | 30.61 | 23.85 | 30.62 | 32.35 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ |(training time) | 309s | 258s | 256s | 316s | 292s | 207s | 218s | 250s | 263s | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ |Ours 20k steps | 35.50 | 36.16 | 29.14 | 35.23 | 37.15 | 31.71 | 24.88 | 29.91 | 32.46 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ |(training time) | 287s | 274s | 269s | 317s | 269s | 244s | 249s | 257s | 271s | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ .. _`Instant-NGP Nerf`: https://github.com/NVlabs/instant-ngp/tree/51e4107edf48338e9ab0316d56a222e0adf87143 .. _`github repository`: https://github.com/KAIR-BAIR/nerfacc/tree/76c0f9817da4c9c8b5ccf827eb069ee2ce854b75 .. _`Nerf-Synthetic dataset`: https://drive.google.com/drive/folders/1JDdLGDruGNXWnM1eqY1FNL9PlStjaKWi ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/examples/unbounded.rst ================================================ Unbounded Scene ==================== See code `examples/train_ngp_nerf.py` at our `github repository`_ for details. Benchmarks ------------ *updated on 2022-11-07* Here we trained a `Instant-NGP Nerf`_ on the `MipNerf360`_ dataset. We used train split for training and test split for evaluation. Our experiments are conducted on a single NVIDIA TITAN RTX GPU. The training memory footprint is about 6-9GB. The main difference between working with unbounded scenes and bounded scenes, is that a contraction method is needed to map the infinite space to a finite :ref:`Occupancy Grid`. We have difference options provided for this (see :ref:`Occupancy Grid`). The experiments here is basically the Instant-NGP experiments (see :ref:`Instant-NGP Example`) with a contraction method that takes from `MipNerf360`_. .. note:: Even though we are comparing with `Nerf++`_ and `MipNerf360`_, the model and everything are totally different with them. There are plenty of ideas from those papers that would be very helpful for the performance, but we didn't adopt them. As this is just a simple example to show how to use the library, we didn't want to make it too complicated. +----------------------+-------+-------+-------+-------+-------+-------+-------+-------+ | PSNR |Garden |Bicycle|Bonsai |Counter|Kitchen| Room | Stump | MEAN | | | | | | | | | | | +======================+=======+=======+=======+=======+=======+=======+=======+=======+ | Nerf++ (~days) | 24.32 | 22.64 | 29.15 | 26.38 | 27.80 | 28.87 | 24.34 | 26.21 | +----------------------+-------+-------+-------+-------+-------+-------+-------+-------+ | MipNerf360 (~days) | 26.98 | 24.37 | 33.46 | 29.55 | 32.23 | 31.63 | 26.40 | 29.23 | +----------------------+-------+-------+-------+-------+-------+-------+-------+-------+ | Ours (~20 mins) | 25.41 | 22.97 | 30.71 | 27.34 | 30.32 | 31.00 | 23.43 | 27.31 | +----------------------+-------+-------+-------+-------+-------+-------+-------+-------+ | Ours (Training time) | 25min | 17min | 19min | 23min | 28min | 20min | 17min | 21min | +----------------------+-------+-------+-------+-------+-------+-------+-------+-------+ .. _`Instant-NGP Nerf`: https://arxiv.org/abs/2201.05989 .. _`MipNerf360`: https://arxiv.org/abs/2111.12077 .. _`Nerf++`: https://arxiv.org/abs/2010.07492 .. _`github repository`: https://github.com/KAIR-BAIR/nerfacc/tree/76c0f9817da4c9c8b5ccf827eb069ee2ce854b75 ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/examples/vanilla.rst ================================================ Vanilla Nerf ==================== See code `examples/train_mlp_nerf.py` at our `github repository`_ for details. Benchmarks ------------ *updated on 2022-10-08* Here we trained a 8-layer-MLP for the radiance field as in the `vanilla Nerf`_. We used the train split for training and test split for evaluation as in the Nerf paper. Our experiments are conducted on a single NVIDIA TITAN RTX GPU. The training memory footprint is about 10GB. .. note:: The vanilla Nerf paper uses two MLPs for course-to-fine sampling. Instead here we only use a single MLP with more samples (1024). Both ways share the same spirit to do dense sampling around the surface. Our fast rendering inheritly skip samples away from the surface so we can simplly increase the number of samples with a single MLP, to achieve the same goal with the coarse-to-fine sampling, without runtime or memory issue. +----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ | PSNR | Lego | Mic |Materials| Chair |Hotdog | Ficus | Drums | Ship | MEAN | | | | | | | | | | | | +======================+=======+=======+=========+=======+=======+=======+=======+=======+=======+ | NeRF (~ days) | 32.54 | 32.91 | 29.62 | 33.00 | 36.18 | 30.13 | 25.01 | 28.65 | 31.00 | +----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ | Ours (~ 50min) | 33.69 | 33.76 | 29.73 | 33.32 | 35.80 | 32.52 | 25.39 | 28.18 | 31.55 | +----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ | Ours (Training time)| 58min | 53min | 46min | 62min | 56min | 42min | 52min | 49min | 52min | +----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ .. _`github repository`: : https://github.com/KAIR-BAIR/nerfacc/tree/76c0f9817da4c9c8b5ccf827eb069ee2ce854b75 .. _`vanilla Nerf`: https://arxiv.org/abs/2003.08934 ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/index.rst ================================================ NerfAcc Documentation =================================== NerfAcc is a PyTorch Nerf acceleration toolbox for both training and inference. It focus on efficient volumetric rendering of radiance fields, which is universal and plug-and-play for most of the NeRFs. Using NerfAcc, - The `vanilla Nerf`_ model with 8-layer MLPs can be trained to *better quality* (+~0.5 PNSR) \ in *1 hour* rather than *1~2 days* as in the paper. - The `Instant-NGP Nerf`_ model can be trained to *equal quality* in *4.5 minutes*, \ comparing to the official pure-CUDA implementation. - The `D-Nerf`_ model for *dynamic* objects can also be trained in *1 hour* \ rather than *2 days* as in the paper, and with *better quality* (+~2.5 PSNR). - Both *bounded* and *unbounded* scenes are supported. **And it is pure Python interface with flexible APIs!** | Github: https://github.com/KAIR-BAIR/nerfacc | Paper: https://arxiv.org/pdf/2210.04847.pdf | Authors: `Ruilong Li`_, `Matthew Tancik`_, `Angjoo Kanazawa`_ .. note:: This repo is focusing on the single scene situation. Generalizable Nerfs across multiple scenes is currently out of the scope of this repo. But you may still find some useful tricks in this repo. :) Installation: ------------- .. code-block:: console $ pip install nerfacc Usage: ------------- The idea of NerfAcc is to perform efficient ray marching and volumetric rendering. So NerfAcc can work with any user-defined radiance field. To plug the NerfAcc rendering pipeline into your code and enjoy the acceleration, you only need to define two functions with your radience field. - `sigma_fn`: Compute density at each sample. It will be used by :func:`nerfacc.ray_marching` to skip the empty and occluded space during ray marching, which is where the major speedup comes from. - `rgb_sigma_fn`: Compute color and density at each sample. It will be used by :func:`nerfacc.rendering` to conduct differentiable volumetric rendering. This function will receive gradients to update your network. An simple example is like this: .. code-block:: python import torch from torch import Tensor import nerfacc radiance_field = ... # network: a NeRF model rays_o: Tensor = ... # ray origins. (n_rays, 3) rays_d: Tensor = ... # ray normalized directions. (n_rays, 3) optimizer = ... # optimizer def sigma_fn( t_starts: Tensor, t_ends:Tensor, ray_indices: Tensor ) -> Tensor: """ Query density values from a user-defined radiance field. :params t_starts: Start of the sample interval along the ray. (n_samples, 1). :params t_ends: End of the sample interval along the ray. (n_samples, 1). :params ray_indices: Ray indices that each sample belongs to. (n_samples,). :returns The post-activation density values. (n_samples, 1). """ t_origins = rays_o[ray_indices] # (n_samples, 3) t_dirs = rays_d[ray_indices] # (n_samples, 3) positions = t_origins + t_dirs * (t_starts + t_ends) / 2.0 sigmas = radiance_field.query_density(positions) return sigmas # (n_samples, 1) def rgb_sigma_fn( t_starts: Tensor, t_ends: Tensor, ray_indices: Tensor ) -> Tuple[Tensor, Tensor]: """ Query rgb and density values from a user-defined radiance field. :params t_starts: Start of the sample interval along the ray. (n_samples, 1). :params t_ends: End of the sample interval along the ray. (n_samples, 1). :params ray_indices: Ray indices that each sample belongs to. (n_samples,). :returns The post-activation rgb and density values. (n_samples, 3), (n_samples, 1). """ t_origins = rays_o[ray_indices] # (n_samples, 3) t_dirs = rays_d[ray_indices] # (n_samples, 3) positions = t_origins + t_dirs * (t_starts + t_ends) / 2.0 rgbs, sigmas = radiance_field(positions, condition=t_dirs) return rgbs, sigmas # (n_samples, 3), (n_samples, 1) # Efficient Raymarching: Skip empty and occluded space, pack samples from all rays. # ray_indices: (n_samples,). t_starts: (n_samples, 1). t_ends: (n_samples, 1). with torch.no_grad(): ray_indices, t_starts, t_ends = nerfacc.ray_marching( rays_o, rays_d, sigma_fn=sigma_fn, near_plane=0.2, far_plane=1.0, early_stop_eps=1e-4, alpha_thre=1e-2, ) # Differentiable Volumetric Rendering. # colors: (n_rays, 3). opaicity: (n_rays, 1). depth: (n_rays, 1). color, opacity, depth = nerfacc.rendering( t_starts, t_ends, ray_indices, n_rays=rays_o.shape[0], rgb_sigma_fn=rgb_sigma_fn ) # Optimize: Both the network and rays will receive gradients optimizer.zero_grad() loss = F.mse_loss(color, color_gt) loss.backward() optimizer.step() Links: ------------- .. toctree:: :glob: :maxdepth: 1 :caption: Python API apis/* .. toctree:: :glob: :maxdepth: 1 :caption: Example Usages examples/* .. toctree:: :maxdepth: 1 :caption: Projects nerfstudio .. _`vanilla Nerf`: https://arxiv.org/abs/2003.08934 .. _`Instant-NGP Nerf`: https://arxiv.org/abs/2201.05989 .. _`D-Nerf`: https://arxiv.org/abs/2011.13961 .. _`MipNerf360`: https://arxiv.org/abs/2111.12077 .. _`pixel-Nerf`: https://arxiv.org/abs/2012.02190 .. _`Nerf++`: https://arxiv.org/abs/2010.07492 .. _`Ruilong Li`: https://www.liruilong.cn/ .. _`Matthew Tancik`: https://www.matthewtancik.com/ .. _`Angjoo Kanazawa`: https://people.eecs.berkeley.edu/~kanazawa/ ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/datasets/__init__.py ================================================ ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/datasets/dnerf_synthetic.py ================================================ """ Copyright (c) 2022 Ruilong Li, UC Berkeley. """ import json import os import imageio.v2 as imageio import numpy as np import torch import torch.nn.functional as F from .utils import Rays def _load_renderings(root_fp: str, subject_id: str, split: str): """Load images from disk.""" if not root_fp.startswith("/"): # allow relative path. e.g., "./data/dnerf_synthetic/" root_fp = os.path.join( os.path.dirname(os.path.abspath(__file__)), "..", "..", root_fp, ) data_dir = os.path.join(root_fp, subject_id) with open( os.path.join(data_dir, "transforms_{}.json".format(split)), "r" ) as fp: meta = json.load(fp) images = [] camtoworlds = [] timestamps = [] for i in range(len(meta["frames"])): frame = meta["frames"][i] fname = os.path.join(data_dir, frame["file_path"] + ".png") rgba = imageio.imread(fname) timestamp = ( frame["time"] if "time" in frame else float(i) / (len(meta["frames"]) - 1) ) timestamps.append(timestamp) camtoworlds.append(frame["transform_matrix"]) images.append(rgba) images = np.stack(images, axis=0) camtoworlds = np.stack(camtoworlds, axis=0) timestamps = np.stack(timestamps, axis=0) h, w = images.shape[1:3] camera_angle_x = float(meta["camera_angle_x"]) focal = 0.5 * w / np.tan(0.5 * camera_angle_x) return images, camtoworlds, focal, timestamps class SubjectLoader(torch.utils.data.Dataset): """Single subject data loader for training and evaluation.""" SPLITS = ["train", "val", "test"] SUBJECT_IDS = [ "bouncingballs", "hellwarrior", "hook", "jumpingjacks", "lego", "mutant", "standup", "trex", ] WIDTH, HEIGHT = 800, 800 NEAR, FAR = 2.0, 6.0 OPENGL_CAMERA = True def __init__( self, subject_id: str, root_fp: str, split: str, color_bkgd_aug: str = "white", num_rays: int = None, near: float = None, far: float = None, batch_over_images: bool = True, ): super().__init__() assert split in self.SPLITS, "%s" % split assert subject_id in self.SUBJECT_IDS, "%s" % subject_id assert color_bkgd_aug in ["white", "black", "random"] self.split = split self.num_rays = num_rays self.near = self.NEAR if near is None else near self.far = self.FAR if far is None else far self.training = (num_rays is not None) and ( split in ["train", "trainval"] ) self.color_bkgd_aug = color_bkgd_aug self.batch_over_images = batch_over_images ( self.images, self.camtoworlds, self.focal, self.timestamps, ) = _load_renderings(root_fp, subject_id, split) self.images = torch.from_numpy(self.images).to(torch.uint8) self.camtoworlds = torch.from_numpy(self.camtoworlds).to(torch.float32) self.timestamps = torch.from_numpy(self.timestamps).to(torch.float32)[ :, None ] self.K = torch.tensor( [ [self.focal, 0, self.WIDTH / 2.0], [0, self.focal, self.HEIGHT / 2.0], [0, 0, 1], ], dtype=torch.float32, ) # (3, 3) assert self.images.shape[1:3] == (self.HEIGHT, self.WIDTH) def __len__(self): return len(self.images) @torch.no_grad() def __getitem__(self, index): data = self.fetch_data(index) data = self.preprocess(data) return data def preprocess(self, data): """Process the fetched / cached data with randomness.""" rgba, rays = data["rgba"], data["rays"] pixels, alpha = torch.split(rgba, [3, 1], dim=-1) if self.training: if self.color_bkgd_aug == "random": color_bkgd = torch.rand(3, device=self.images.device) elif self.color_bkgd_aug == "white": color_bkgd = torch.ones(3, device=self.images.device) elif self.color_bkgd_aug == "black": color_bkgd = torch.zeros(3, device=self.images.device) else: # just use white during inference color_bkgd = torch.ones(3, device=self.images.device) pixels = pixels * alpha + color_bkgd * (1.0 - alpha) return { "pixels": pixels, # [n_rays, 3] or [h, w, 3] "rays": rays, # [n_rays,] or [h, w] "color_bkgd": color_bkgd, # [3,] **{k: v for k, v in data.items() if k not in ["rgba", "rays"]}, } def update_num_rays(self, num_rays): self.num_rays = num_rays def fetch_data(self, index): """Fetch the data (it maybe cached for multiple batches).""" num_rays = self.num_rays if self.training: if self.batch_over_images: image_id = torch.randint( 0, len(self.images), size=(num_rays,), device=self.images.device, ) else: image_id = [index] x = torch.randint( 0, self.WIDTH, size=(num_rays,), device=self.images.device ) y = torch.randint( 0, self.HEIGHT, size=(num_rays,), device=self.images.device ) else: image_id = [index] x, y = torch.meshgrid( torch.arange(self.WIDTH, device=self.images.device), torch.arange(self.HEIGHT, device=self.images.device), indexing="xy", ) x = x.flatten() y = y.flatten() # generate rays rgba = self.images[image_id, y, x] / 255.0 # (num_rays, 4) c2w = self.camtoworlds[image_id] # (num_rays, 3, 4) camera_dirs = F.pad( torch.stack( [ (x - self.K[0, 2] + 0.5) / self.K[0, 0], (y - self.K[1, 2] + 0.5) / self.K[1, 1] * (-1.0 if self.OPENGL_CAMERA else 1.0), ], dim=-1, ), (0, 1), value=(-1.0 if self.OPENGL_CAMERA else 1.0), ) # [num_rays, 3] # [n_cams, height, width, 3] directions = (camera_dirs[:, None, :] * c2w[:, :3, :3]).sum(dim=-1) origins = torch.broadcast_to(c2w[:, :3, -1], directions.shape) viewdirs = directions / torch.linalg.norm( directions, dim=-1, keepdims=True ) if self.training: origins = torch.reshape(origins, (num_rays, 3)) viewdirs = torch.reshape(viewdirs, (num_rays, 3)) rgba = torch.reshape(rgba, (num_rays, 4)) else: origins = torch.reshape(origins, (self.HEIGHT, self.WIDTH, 3)) viewdirs = torch.reshape(viewdirs, (self.HEIGHT, self.WIDTH, 3)) rgba = torch.reshape(rgba, (self.HEIGHT, self.WIDTH, 4)) rays = Rays(origins=origins, viewdirs=viewdirs) timestamps = self.timestamps[image_id] return { "rgba": rgba, # [h, w, 4] or [num_rays, 4] "rays": rays, # [h, w, 3] or [num_rays, 3] "timestamps": timestamps, # [num_rays, 1] } ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/datasets/nerf_360_v2.py ================================================ """ Copyright (c) 2022 Ruilong Li, UC Berkeley. """ import collections import os import sys import imageio import numpy as np import torch import torch.nn.functional as F import tqdm from .utils import Rays _PATH = os.path.abspath(__file__) sys.path.insert( 0, os.path.join(os.path.dirname(_PATH), "..", "pycolmap", "pycolmap") ) from scene_manager import SceneManager def _load_colmap(root_fp: str, subject_id: str, split: str, factor: int = 1): assert factor in [1, 2, 4, 8] data_dir = os.path.join(root_fp, subject_id) colmap_dir = os.path.join(data_dir, "sparse/0/") manager = SceneManager(colmap_dir) manager.load_cameras() manager.load_images() # Assume shared intrinsics between all cameras. cam = manager.cameras[1] fx, fy, cx, cy = cam.fx, cam.fy, cam.cx, cam.cy K = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]]) K[:2, :] /= factor # Extract extrinsic matrices in world-to-camera format. imdata = manager.images w2c_mats = [] bottom = np.array([0, 0, 0, 1]).reshape(1, 4) for k in imdata: im = imdata[k] rot = im.R() trans = im.tvec.reshape(3, 1) w2c = np.concatenate([np.concatenate([rot, trans], 1), bottom], axis=0) w2c_mats.append(w2c) w2c_mats = np.stack(w2c_mats, axis=0) # Convert extrinsics to camera-to-world. camtoworlds = np.linalg.inv(w2c_mats) # Image names from COLMAP. No need for permuting the poses according to # image names anymore. image_names = [imdata[k].name for k in imdata] # # Switch from COLMAP (right, down, fwd) to Nerf (right, up, back) frame. # poses = poses @ np.diag([1, -1, -1, 1]) # Get distortion parameters. type_ = cam.camera_type if type_ == 0 or type_ == "SIMPLE_PINHOLE": params = None camtype = "perspective" elif type_ == 1 or type_ == "PINHOLE": params = None camtype = "perspective" if type_ == 2 or type_ == "SIMPLE_RADIAL": params = {k: 0.0 for k in ["k1", "k2", "k3", "p1", "p2"]} params["k1"] = cam.k1 camtype = "perspective" elif type_ == 3 or type_ == "RADIAL": params = {k: 0.0 for k in ["k1", "k2", "k3", "p1", "p2"]} params["k1"] = cam.k1 params["k2"] = cam.k2 camtype = "perspective" elif type_ == 4 or type_ == "OPENCV": params = {k: 0.0 for k in ["k1", "k2", "k3", "p1", "p2"]} params["k1"] = cam.k1 params["k2"] = cam.k2 params["p1"] = cam.p1 params["p2"] = cam.p2 camtype = "perspective" elif type_ == 5 or type_ == "OPENCV_FISHEYE": params = {k: 0.0 for k in ["k1", "k2", "k3", "k4"]} params["k1"] = cam.k1 params["k2"] = cam.k2 params["k3"] = cam.k3 params["k4"] = cam.k4 camtype = "fisheye" assert params is None, "Only support pinhole camera model." # Previous Nerf results were generated with images sorted by filename, # ensure metrics are reported on the same test set. inds = np.argsort(image_names) image_names = [image_names[i] for i in inds] camtoworlds = camtoworlds[inds] # Load images. if factor > 1: image_dir_suffix = f"_{factor}" else: image_dir_suffix = "" colmap_image_dir = os.path.join(data_dir, "images") image_dir = os.path.join(data_dir, "images" + image_dir_suffix) for d in [image_dir, colmap_image_dir]: if not os.path.exists(d): raise ValueError(f"Image folder {d} does not exist.") # Downsampled images may have different names vs images used for COLMAP, # so we need to map between the two sorted lists of files. colmap_files = sorted(os.listdir(colmap_image_dir)) image_files = sorted(os.listdir(image_dir)) colmap_to_image = dict(zip(colmap_files, image_files)) image_paths = [ os.path.join(image_dir, colmap_to_image[f]) for f in image_names ] print("loading images") images = [imageio.imread(x) for x in tqdm.tqdm(image_paths)] images = np.stack(images, axis=0) # Select the split. all_indices = np.arange(images.shape[0]) split_indices = { "test": all_indices[all_indices % 8 == 0], "train": all_indices[all_indices % 8 != 0], } indices = split_indices[split] # All per-image quantities must be re-indexed using the split indices. images = images[indices] camtoworlds = camtoworlds[indices] return images, camtoworlds, K class SubjectLoader(torch.utils.data.Dataset): """Single subject data loader for training and evaluation.""" SPLITS = ["train", "test"] SUBJECT_IDS = [ "garden", "bicycle", "bonsai", "counter", "kitchen", "room", "stump", ] OPENGL_CAMERA = False def __init__( self, subject_id: str, root_fp: str, split: str, color_bkgd_aug: str = "white", num_rays: int = None, near: float = None, far: float = None, batch_over_images: bool = True, factor: int = 1, ): super().__init__() assert split in self.SPLITS, "%s" % split assert subject_id in self.SUBJECT_IDS, "%s" % subject_id assert color_bkgd_aug in ["white", "black", "random"] self.split = split self.num_rays = num_rays self.near = near self.far = far self.training = (num_rays is not None) and ( split in ["train", "trainval"] ) self.color_bkgd_aug = color_bkgd_aug self.batch_over_images = batch_over_images self.images, self.camtoworlds, self.K = _load_colmap( root_fp, subject_id, split, factor ) self.images = torch.from_numpy(self.images).to(torch.uint8) self.camtoworlds = torch.from_numpy(self.camtoworlds).to(torch.float32) self.K = torch.tensor(self.K).to(torch.float32) self.height, self.width = self.images.shape[1:3] def __len__(self): return len(self.images) @torch.no_grad() def __getitem__(self, index): data = self.fetch_data(index) data = self.preprocess(data) return data def preprocess(self, data): """Process the fetched / cached data with randomness.""" pixels, rays = data["rgb"], data["rays"] if self.training: if self.color_bkgd_aug == "random": color_bkgd = torch.rand(3, device=self.images.device) elif self.color_bkgd_aug == "white": color_bkgd = torch.ones(3, device=self.images.device) elif self.color_bkgd_aug == "black": color_bkgd = torch.zeros(3, device=self.images.device) else: # just use white during inference color_bkgd = torch.ones(3, device=self.images.device) return { "pixels": pixels, # [n_rays, 3] or [h, w, 3] "rays": rays, # [n_rays,] or [h, w] "color_bkgd": color_bkgd, # [3,] **{k: v for k, v in data.items() if k not in ["rgb", "rays"]}, } def update_num_rays(self, num_rays): self.num_rays = num_rays def fetch_data(self, index): """Fetch the data (it maybe cached for multiple batches).""" num_rays = self.num_rays if self.training: if self.batch_over_images: image_id = torch.randint( 0, len(self.images), size=(num_rays,), device=self.images.device, ) else: image_id = [index] x = torch.randint( 0, self.width, size=(num_rays,), device=self.images.device ) y = torch.randint( 0, self.height, size=(num_rays,), device=self.images.device ) else: image_id = [index] x, y = torch.meshgrid( torch.arange(self.width, device=self.images.device), torch.arange(self.height, device=self.images.device), indexing="xy", ) x = x.flatten() y = y.flatten() # generate rays rgb = self.images[image_id, y, x] / 255.0 # (num_rays, 3) c2w = self.camtoworlds[image_id] # (num_rays, 3, 4) camera_dirs = F.pad( torch.stack( [ (x - self.K[0, 2] + 0.5) / self.K[0, 0], (y - self.K[1, 2] + 0.5) / self.K[1, 1] * (-1.0 if self.OPENGL_CAMERA else 1.0), ], dim=-1, ), (0, 1), value=(-1.0 if self.OPENGL_CAMERA else 1.0), ) # [num_rays, 3] # [n_cams, height, width, 3] directions = (camera_dirs[:, None, :] * c2w[:, :3, :3]).sum(dim=-1) origins = torch.broadcast_to(c2w[:, :3, -1], directions.shape) viewdirs = directions / torch.linalg.norm( directions, dim=-1, keepdims=True ) if self.training: origins = torch.reshape(origins, (num_rays, 3)) viewdirs = torch.reshape(viewdirs, (num_rays, 3)) rgb = torch.reshape(rgb, (num_rays, 3)) else: origins = torch.reshape(origins, (self.height, self.width, 3)) viewdirs = torch.reshape(viewdirs, (self.height, self.width, 3)) rgb = torch.reshape(rgb, (self.height, self.width, 3)) rays = Rays(origins=origins, viewdirs=viewdirs) return { "rgb": rgb, # [h, w, 3] or [num_rays, 3] "rays": rays, # [h, w, 3] or [num_rays, 3] } ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/datasets/nerf_synthetic.py ================================================ """ Copyright (c) 2022 Ruilong Li, UC Berkeley. """ import collections import json import os import imageio.v2 as imageio import numpy as np import torch import torch.nn.functional as F from .utils import Rays def _load_renderings(root_fp: str, subject_id: str, split: str): """Load images from disk.""" if not root_fp.startswith("/"): # allow relative path. e.g., "./data/nerf_synthetic/" root_fp = os.path.join( os.path.dirname(os.path.abspath(__file__)), "..", "..", root_fp, ) data_dir = os.path.join(root_fp, subject_id) with open( os.path.join(data_dir, "transforms_{}.json".format(split)), "r" ) as fp: meta = json.load(fp) images = [] camtoworlds = [] for i in range(len(meta["frames"])): frame = meta["frames"][i] fname = os.path.join(data_dir, frame["file_path"] + ".png") rgba = imageio.imread(fname) camtoworlds.append(frame["transform_matrix"]) images.append(rgba) images = np.stack(images, axis=0) camtoworlds = np.stack(camtoworlds, axis=0) h, w = images.shape[1:3] camera_angle_x = float(meta["camera_angle_x"]) focal = 0.5 * w / np.tan(0.5 * camera_angle_x) return images, camtoworlds, focal class SubjectLoader(torch.utils.data.Dataset): """Single subject data loader for training and evaluation.""" SPLITS = ["train", "val", "trainval", "test"] SUBJECT_IDS = [ "chair", "drums", "ficus", "hotdog", "lego", "materials", "mic", "ship", ] WIDTH, HEIGHT = 800, 800 NEAR, FAR = 2.0, 6.0 OPENGL_CAMERA = True def __init__( self, subject_id: str, root_fp: str, split: str, color_bkgd_aug: str = "white", num_rays: int = None, near: float = None, far: float = None, batch_over_images: bool = True, ): super().__init__() assert split in self.SPLITS, "%s" % split assert subject_id in self.SUBJECT_IDS, "%s" % subject_id assert color_bkgd_aug in ["white", "black", "random"] self.split = split self.num_rays = num_rays self.near = self.NEAR if near is None else near self.far = self.FAR if far is None else far self.training = (num_rays is not None) and ( split in ["train", "trainval"] ) self.color_bkgd_aug = color_bkgd_aug self.batch_over_images = batch_over_images if split == "trainval": _images_train, _camtoworlds_train, _focal_train = _load_renderings( root_fp, subject_id, "train" ) _images_val, _camtoworlds_val, _focal_val = _load_renderings( root_fp, subject_id, "val" ) self.images = np.concatenate([_images_train, _images_val]) self.camtoworlds = np.concatenate( [_camtoworlds_train, _camtoworlds_val] ) self.focal = _focal_train else: self.images, self.camtoworlds, self.focal = _load_renderings( root_fp, subject_id, split ) self.images = torch.from_numpy(self.images).to(torch.uint8) self.camtoworlds = torch.from_numpy(self.camtoworlds).to(torch.float32) self.K = torch.tensor( [ [self.focal, 0, self.WIDTH / 2.0], [0, self.focal, self.HEIGHT / 2.0], [0, 0, 1], ], dtype=torch.float32, ) # (3, 3) assert self.images.shape[1:3] == (self.HEIGHT, self.WIDTH) def __len__(self): return len(self.images) @torch.no_grad() def __getitem__(self, index): data = self.fetch_data(index) data = self.preprocess(data) return data def preprocess(self, data): """Process the fetched / cached data with randomness.""" rgba, rays = data["rgba"], data["rays"] pixels, alpha = torch.split(rgba, [3, 1], dim=-1) if self.training: if self.color_bkgd_aug == "random": color_bkgd = torch.rand(3, device=self.images.device) elif self.color_bkgd_aug == "white": color_bkgd = torch.ones(3, device=self.images.device) elif self.color_bkgd_aug == "black": color_bkgd = torch.zeros(3, device=self.images.device) else: # just use white during inference color_bkgd = torch.ones(3, device=self.images.device) pixels = pixels * alpha + color_bkgd * (1.0 - alpha) return { "pixels": pixels, # [n_rays, 3] or [h, w, 3] "rays": rays, # [n_rays,] or [h, w] "color_bkgd": color_bkgd, # [3,] **{k: v for k, v in data.items() if k not in ["rgba", "rays"]}, } def update_num_rays(self, num_rays): self.num_rays = num_rays def fetch_data(self, index): """Fetch the data (it maybe cached for multiple batches).""" num_rays = self.num_rays if self.training: if self.batch_over_images: image_id = torch.randint( 0, len(self.images), size=(num_rays,), device=self.images.device, ) else: image_id = [index] x = torch.randint( 0, self.WIDTH, size=(num_rays,), device=self.images.device ) y = torch.randint( 0, self.HEIGHT, size=(num_rays,), device=self.images.device ) else: image_id = [index] x, y = torch.meshgrid( torch.arange(self.WIDTH, device=self.images.device), torch.arange(self.HEIGHT, device=self.images.device), indexing="xy", ) x = x.flatten() y = y.flatten() # generate rays rgba = self.images[image_id, y, x] / 255.0 # (num_rays, 4) c2w = self.camtoworlds[image_id] # (num_rays, 3, 4) camera_dirs = F.pad( torch.stack( [ (x - self.K[0, 2] + 0.5) / self.K[0, 0], (y - self.K[1, 2] + 0.5) / self.K[1, 1] * (-1.0 if self.OPENGL_CAMERA else 1.0), ], dim=-1, ), (0, 1), value=(-1.0 if self.OPENGL_CAMERA else 1.0), ) # [num_rays, 3] # [n_cams, height, width, 3] directions = (camera_dirs[:, None, :] * c2w[:, :3, :3]).sum(dim=-1) origins = torch.broadcast_to(c2w[:, :3, -1], directions.shape) viewdirs = directions / torch.linalg.norm( directions, dim=-1, keepdims=True ) if self.training: origins = torch.reshape(origins, (num_rays, 3)) viewdirs = torch.reshape(viewdirs, (num_rays, 3)) rgba = torch.reshape(rgba, (num_rays, 4)) else: origins = torch.reshape(origins, (self.HEIGHT, self.WIDTH, 3)) viewdirs = torch.reshape(viewdirs, (self.HEIGHT, self.WIDTH, 3)) rgba = torch.reshape(rgba, (self.HEIGHT, self.WIDTH, 4)) rays = Rays(origins=origins, viewdirs=viewdirs) return { "rgba": rgba, # [h, w, 4] or [num_rays, 4] "rays": rays, # [h, w, 3] or [num_rays, 3] } ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/datasets/utils.py ================================================ """ Copyright (c) 2022 Ruilong Li, UC Berkeley. """ import collections Rays = collections.namedtuple("Rays", ("origins", "viewdirs")) def namedtuple_map(fn, tup): """Apply `fn` to each element of `tup` and cast to `tup`'s namedtuple.""" return type(tup)(*(None if x is None else fn(x) for x in tup)) ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/radiance_fields/__init__.py ================================================ ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/radiance_fields/mlp.py ================================================ """ Copyright (c) 2022 Ruilong Li, UC Berkeley. """ import functools import math from typing import Callable, Optional import torch import torch.nn as nn import torch.nn.functional as F class MLP(nn.Module): def __init__( self, input_dim: int, # The number of input tensor channels. output_dim: int = None, # The number of output tensor channels. net_depth: int = 8, # The depth of the MLP. net_width: int = 256, # The width of the MLP. skip_layer: int = 4, # The layer to add skip layers to. hidden_init: Callable = nn.init.xavier_uniform_, hidden_activation: Callable = nn.ReLU(), output_enabled: bool = True, output_init: Optional[Callable] = nn.init.xavier_uniform_, output_activation: Optional[Callable] = nn.Identity(), bias_enabled: bool = True, bias_init: Callable = nn.init.zeros_, ): super().__init__() self.input_dim = input_dim self.output_dim = output_dim self.net_depth = net_depth self.net_width = net_width self.skip_layer = skip_layer self.hidden_init = hidden_init self.hidden_activation = hidden_activation self.output_enabled = output_enabled self.output_init = output_init self.output_activation = output_activation self.bias_enabled = bias_enabled self.bias_init = bias_init self.hidden_layers = nn.ModuleList() in_features = self.input_dim for i in range(self.net_depth): self.hidden_layers.append( nn.Linear(in_features, self.net_width, bias=bias_enabled) ) if ( (self.skip_layer is not None) and (i % self.skip_layer == 0) and (i > 0) ): in_features = self.net_width + self.input_dim else: in_features = self.net_width if self.output_enabled: self.output_layer = nn.Linear( in_features, self.output_dim, bias=bias_enabled ) else: self.output_dim = in_features self.initialize() def initialize(self): def init_func_hidden(m): if isinstance(m, nn.Linear): if self.hidden_init is not None: self.hidden_init(m.weight) if self.bias_enabled and self.bias_init is not None: self.bias_init(m.bias) self.hidden_layers.apply(init_func_hidden) if self.output_enabled: def init_func_output(m): if isinstance(m, nn.Linear): if self.output_init is not None: self.output_init(m.weight) if self.bias_enabled and self.bias_init is not None: self.bias_init(m.bias) self.output_layer.apply(init_func_output) def forward(self, x): inputs = x for i in range(self.net_depth): x = self.hidden_layers[i](x) x = self.hidden_activation(x) if ( (self.skip_layer is not None) and (i % self.skip_layer == 0) and (i > 0) ): x = torch.cat([x, inputs], dim=-1) if self.output_enabled: x = self.output_layer(x) x = self.output_activation(x) return x class DenseLayer(MLP): def __init__(self, input_dim, output_dim, **kwargs): super().__init__( input_dim=input_dim, output_dim=output_dim, net_depth=0, # no hidden layers **kwargs, ) class NerfMLP(nn.Module): def __init__( self, input_dim: int, # The number of input tensor channels. condition_dim: int, # The number of condition tensor channels. net_depth: int = 8, # The depth of the MLP. net_width: int = 256, # The width of the MLP. skip_layer: int = 4, # The layer to add skip layers to. net_depth_condition: int = 1, # The depth of the second part of MLP. net_width_condition: int = 128, # The width of the second part of MLP. ): super().__init__() self.base = MLP( input_dim=input_dim, net_depth=net_depth, net_width=net_width, skip_layer=skip_layer, output_enabled=False, ) hidden_features = self.base.output_dim self.sigma_layer = DenseLayer(hidden_features, 1) if condition_dim > 0: self.bottleneck_layer = DenseLayer(hidden_features, net_width) self.rgb_layer = MLP( input_dim=net_width + condition_dim, output_dim=3, net_depth=net_depth_condition, net_width=net_width_condition, skip_layer=None, ) else: self.rgb_layer = DenseLayer(hidden_features, 3) def query_density(self, x): x = self.base(x) raw_sigma = self.sigma_layer(x) return raw_sigma def forward(self, x, condition=None): x = self.base(x) raw_sigma = self.sigma_layer(x) if condition is not None: if condition.shape[:-1] != x.shape[:-1]: num_rays, n_dim = condition.shape condition = condition.view( [num_rays] + [1] * (x.dim() - condition.dim()) + [n_dim] ).expand(list(x.shape[:-1]) + [n_dim]) bottleneck = self.bottleneck_layer(x) x = torch.cat([bottleneck, condition], dim=-1) raw_rgb = self.rgb_layer(x) return raw_rgb, raw_sigma class SinusoidalEncoder(nn.Module): """Sinusoidal Positional Encoder used in Nerf.""" def __init__(self, x_dim, min_deg, max_deg, use_identity: bool = True): super().__init__() self.x_dim = x_dim self.min_deg = min_deg self.max_deg = max_deg self.use_identity = use_identity self.register_buffer( "scales", torch.tensor([2**i for i in range(min_deg, max_deg)]) ) @property def latent_dim(self) -> int: return ( int(self.use_identity) + (self.max_deg - self.min_deg) * 2 ) * self.x_dim def forward(self, x: torch.Tensor) -> torch.Tensor: """ Args: x: [..., x_dim] Returns: latent: [..., latent_dim] """ if self.max_deg == self.min_deg: return x xb = torch.reshape( (x[Ellipsis, None, :] * self.scales[:, None]), list(x.shape[:-1]) + [(self.max_deg - self.min_deg) * self.x_dim], ) latent = torch.sin(torch.cat([xb, xb + 0.5 * math.pi], dim=-1)) if self.use_identity: latent = torch.cat([x] + [latent], dim=-1) return latent class VanillaNeRFRadianceField(nn.Module): def __init__( self, net_depth: int = 8, # The depth of the MLP. net_width: int = 256, # The width of the MLP. skip_layer: int = 4, # The layer to add skip layers to. net_depth_condition: int = 1, # The depth of the second part of MLP. net_width_condition: int = 128, # The width of the second part of MLP. ) -> None: super().__init__() self.posi_encoder = SinusoidalEncoder(3, 0, 10, True) self.view_encoder = SinusoidalEncoder(3, 0, 4, True) self.mlp = NerfMLP( input_dim=self.posi_encoder.latent_dim, condition_dim=self.view_encoder.latent_dim, net_depth=net_depth, net_width=net_width, skip_layer=skip_layer, net_depth_condition=net_depth_condition, net_width_condition=net_width_condition, ) def query_opacity(self, x, step_size): density = self.query_density(x) # if the density is small enough those two are the same. # opacity = 1.0 - torch.exp(-density * step_size) opacity = density * step_size return opacity def query_density(self, x): x = self.posi_encoder(x) sigma = self.mlp.query_density(x) return F.relu(sigma) def forward(self, x, condition=None): x = self.posi_encoder(x) if condition is not None: condition = self.view_encoder(condition) rgb, sigma = self.mlp(x, condition=condition) return torch.sigmoid(rgb), F.relu(sigma) class DNeRFRadianceField(nn.Module): def __init__(self) -> None: super().__init__() self.posi_encoder = SinusoidalEncoder(3, 0, 4, True) self.time_encoder = SinusoidalEncoder(1, 0, 4, True) self.warp = MLP( input_dim=self.posi_encoder.latent_dim + self.time_encoder.latent_dim, output_dim=3, net_depth=4, net_width=64, skip_layer=2, output_init=functools.partial(torch.nn.init.uniform_, b=1e-4), ) self.nerf = VanillaNeRFRadianceField() def query_opacity(self, x, timestamps, step_size): idxs = torch.randint(0, len(timestamps), (x.shape[0],), device=x.device) t = timestamps[idxs] density = self.query_density(x, t) # if the density is small enough those two are the same. # opacity = 1.0 - torch.exp(-density * step_size) opacity = density * step_size return opacity def query_density(self, x, t): x = x + self.warp( torch.cat([self.posi_encoder(x), self.time_encoder(t)], dim=-1) ) return self.nerf.query_density(x) def forward(self, x, t, condition=None): x = x + self.warp( torch.cat([self.posi_encoder(x), self.time_encoder(t)], dim=-1) ) return self.nerf(x, condition=condition) ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/radiance_fields/ngp.py ================================================ """ Copyright (c) 2022 Ruilong Li, UC Berkeley. """ from typing import Callable, List, Union import torch from torch.autograd import Function from torch.cuda.amp import custom_bwd, custom_fwd try: import tinycudann as tcnn except ImportError as e: print( f"Error: {e}! " "Please install tinycudann by: " "pip install git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch" ) exit() class _TruncExp(Function): # pylint: disable=abstract-method # Implementation from torch-ngp: # https://github.com/ashawkey/torch-ngp/blob/93b08a0d4ec1cc6e69d85df7f0acdfb99603b628/activation.py @staticmethod @custom_fwd(cast_inputs=torch.float32) def forward(ctx, x): # pylint: disable=arguments-differ ctx.save_for_backward(x) return torch.exp(x) @staticmethod @custom_bwd def backward(ctx, g): # pylint: disable=arguments-differ x = ctx.saved_tensors[0] return g * torch.exp(torch.clamp(x, max=15)) trunc_exp = _TruncExp.apply def contract_to_unisphere( x: torch.Tensor, aabb: torch.Tensor, eps: float = 1e-6, derivative: bool = False, ): aabb_min, aabb_max = torch.split(aabb, 3, dim=-1) x = (x - aabb_min) / (aabb_max - aabb_min) x = x * 2 - 1 # aabb is at [-1, 1] mag = x.norm(dim=-1, keepdim=True) mask = mag.squeeze(-1) > 1 if derivative: dev = (2 * mag - 1) / mag**2 + 2 * x**2 * ( 1 / mag**3 - (2 * mag - 1) / mag**4 ) dev[~mask] = 1.0 dev = torch.clamp(dev, min=eps) return dev else: x[mask] = (2 - 1 / mag[mask]) * (x[mask] / mag[mask]) x = x / 4 + 0.5 # [-inf, inf] is at [0, 1] return x class NGPradianceField(torch.nn.Module): """Instance-NGP radiance Field""" def __init__( self, aabb: Union[torch.Tensor, List[float]], num_dim: int = 3, use_viewdirs: bool = True, density_activation: Callable = lambda x: trunc_exp(x - 1), unbounded: bool = False, geo_feat_dim: int = 15, n_levels: int = 16, log2_hashmap_size: int = 19, ) -> None: super().__init__() if not isinstance(aabb, torch.Tensor): aabb = torch.tensor(aabb, dtype=torch.float32) self.register_buffer("aabb", aabb) self.num_dim = num_dim self.use_viewdirs = use_viewdirs self.density_activation = density_activation self.unbounded = unbounded self.geo_feat_dim = geo_feat_dim per_level_scale = 1.4472692012786865 if self.use_viewdirs: self.direction_encoding = tcnn.Encoding( n_input_dims=num_dim, encoding_config={ "otype": "Composite", "nested": [ { "n_dims_to_encode": 3, "otype": "SphericalHarmonics", "degree": 4, }, # {"otype": "Identity", "n_bins": 4, "degree": 4}, ], }, ) self.mlp_base = tcnn.NetworkWithInputEncoding( n_input_dims=num_dim, n_output_dims=1 + self.geo_feat_dim, encoding_config={ "otype": "HashGrid", "n_levels": n_levels, "n_features_per_level": 2, "log2_hashmap_size": log2_hashmap_size, "base_resolution": 16, "per_level_scale": per_level_scale, }, network_config={ "otype": "FullyFusedMLP", "activation": "ReLU", "output_activation": "None", "n_neurons": 64, "n_hidden_layers": 1, }, ) if self.geo_feat_dim > 0: self.mlp_head = tcnn.Network( n_input_dims=( ( self.direction_encoding.n_output_dims if self.use_viewdirs else 0 ) + self.geo_feat_dim ), n_output_dims=3, network_config={ "otype": "FullyFusedMLP", "activation": "ReLU", "output_activation": "Sigmoid", "n_neurons": 64, "n_hidden_layers": 2, }, ) def query_density(self, x, return_feat: bool = False): if self.unbounded: x = contract_to_unisphere(x, self.aabb) else: aabb_min, aabb_max = torch.split(self.aabb, self.num_dim, dim=-1) x = (x - aabb_min) / (aabb_max - aabb_min) selector = ((x > 0.0) & (x < 1.0)).all(dim=-1) x = ( self.mlp_base(x.view(-1, self.num_dim)) .view(list(x.shape[:-1]) + [1 + self.geo_feat_dim]) .to(x) ) density_before_activation, base_mlp_out = torch.split( x, [1, self.geo_feat_dim], dim=-1 ) density = ( self.density_activation(density_before_activation) * selector[..., None] ) if return_feat: return density, base_mlp_out else: return density def _query_rgb(self, dir, embedding): # tcnn requires directions in the range [0, 1] if self.use_viewdirs: dir = (dir + 1.0) / 2.0 d = self.direction_encoding(dir.view(-1, dir.shape[-1])) h = torch.cat([d, embedding.view(-1, self.geo_feat_dim)], dim=-1) else: h = embedding.view(-1, self.geo_feat_dim) rgb = ( self.mlp_head(h) .view(list(embedding.shape[:-1]) + [3]) .to(embedding) ) return rgb def forward( self, positions: torch.Tensor, directions: torch.Tensor = None, ): if self.use_viewdirs and (directions is not None): assert ( positions.shape == directions.shape ), f"{positions.shape} v.s. {directions.shape}" density, embedding = self.query_density(positions, return_feat=True) rgb = self._query_rgb(directions, embedding=embedding) return rgb, density ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/requirements.txt ================================================ git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch opencv-python imageio numpy tqdm scipy ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/train_mlp_dnerf.py ================================================ """ Copyright (c) 2022 Ruilong Li, UC Berkeley. """ import argparse import math import os import time import imageio import numpy as np import torch import torch.nn.functional as F import tqdm from datasets.dnerf_synthetic import SubjectLoader from radiance_fields.mlp import DNeRFRadianceField from utils import render_image, set_random_seed from nerfacc import ContractionType, OccupancyGrid if __name__ == "__main__": device = "cuda:0" set_random_seed(42) parser = argparse.ArgumentParser() parser.add_argument( "--train_split", type=str, default="train", choices=["train"], help="which train split to use", ) parser.add_argument( "--scene", type=str, default="lego", choices=[ # dnerf "bouncingballs", "hellwarrior", "hook", "jumpingjacks", "lego", "mutant", "standup", "trex", ], help="which scene to use", ) parser.add_argument( "--aabb", type=lambda s: [float(item) for item in s.split(",")], default="-1.5,-1.5,-1.5,1.5,1.5,1.5", help="delimited list input", ) parser.add_argument( "--test_chunk_size", type=int, default=8192, ) parser.add_argument("--cone_angle", type=float, default=0.0) args = parser.parse_args() render_n_samples = 1024 # setup the scene bounding box. contraction_type = ContractionType.AABB scene_aabb = torch.tensor(args.aabb, dtype=torch.float32, device=device) near_plane = None far_plane = None render_step_size = ( (scene_aabb[3:] - scene_aabb[:3]).max() * math.sqrt(3) / render_n_samples ).item() # setup the radiance field we want to train. max_steps = 30000 grad_scaler = torch.cuda.amp.GradScaler(1) radiance_field = DNeRFRadianceField().to(device) optimizer = torch.optim.Adam(radiance_field.parameters(), lr=5e-4) scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=[ max_steps // 2, max_steps * 3 // 4, max_steps * 5 // 6, max_steps * 9 // 10, ], gamma=0.33, ) # setup the dataset data_root_fp = "/home/ruilongli/data/dnerf/" target_sample_batch_size = 1 << 16 grid_resolution = 128 train_dataset = SubjectLoader( subject_id=args.scene, root_fp=data_root_fp, split=args.train_split, num_rays=target_sample_batch_size // render_n_samples, ) train_dataset.images = train_dataset.images.to(device) train_dataset.camtoworlds = train_dataset.camtoworlds.to(device) train_dataset.K = train_dataset.K.to(device) train_dataset.timestamps = train_dataset.timestamps.to(device) test_dataset = SubjectLoader( subject_id=args.scene, root_fp=data_root_fp, split="test", num_rays=None, ) test_dataset.images = test_dataset.images.to(device) test_dataset.camtoworlds = test_dataset.camtoworlds.to(device) test_dataset.K = test_dataset.K.to(device) test_dataset.timestamps = test_dataset.timestamps.to(device) occupancy_grid = OccupancyGrid( roi_aabb=args.aabb, resolution=grid_resolution, contraction_type=contraction_type, ).to(device) # training step = 0 tic = time.time() for epoch in range(10000000): for i in range(len(train_dataset)): radiance_field.train() data = train_dataset[i] render_bkgd = data["color_bkgd"] rays = data["rays"] pixels = data["pixels"] timestamps = data["timestamps"] # update occupancy grid occupancy_grid.every_n_step( step=step, occ_eval_fn=lambda x: radiance_field.query_opacity( x, timestamps, render_step_size ), ) # render rgb, acc, depth, n_rendering_samples = render_image( radiance_field, occupancy_grid, rays, scene_aabb, # rendering options near_plane=near_plane, far_plane=far_plane, render_step_size=render_step_size, render_bkgd=render_bkgd, cone_angle=args.cone_angle, alpha_thre=0.01 if step > 1000 else 0.00, # dnerf options timestamps=timestamps, ) if n_rendering_samples == 0: continue # dynamic batch size for rays to keep sample batch size constant. num_rays = len(pixels) num_rays = int( num_rays * (target_sample_batch_size / float(n_rendering_samples)) ) train_dataset.update_num_rays(num_rays) alive_ray_mask = acc.squeeze(-1) > 0 # compute loss loss = F.smooth_l1_loss(rgb[alive_ray_mask], pixels[alive_ray_mask]) optimizer.zero_grad() # do not unscale it because we are using Adam. grad_scaler.scale(loss).backward() optimizer.step() scheduler.step() if step % 5000 == 0: elapsed_time = time.time() - tic loss = F.mse_loss(rgb[alive_ray_mask], pixels[alive_ray_mask]) print( f"elapsed_time={elapsed_time:.2f}s | step={step} | " f"loss={loss:.5f} | " f"alive_ray_mask={alive_ray_mask.long().sum():d} | " f"n_rendering_samples={n_rendering_samples:d} | num_rays={len(pixels):d} |" ) if step >= 0 and step % max_steps == 0 and step > 0: # evaluation radiance_field.eval() psnrs = [] with torch.no_grad(): for i in tqdm.tqdm(range(len(test_dataset))): data = test_dataset[i] render_bkgd = data["color_bkgd"] rays = data["rays"] pixels = data["pixels"] timestamps = data["timestamps"] # rendering rgb, acc, depth, _ = render_image( radiance_field, occupancy_grid, rays, scene_aabb, # rendering options near_plane=None, far_plane=None, render_step_size=render_step_size, render_bkgd=render_bkgd, cone_angle=args.cone_angle, alpha_thre=0.01, # test options test_chunk_size=args.test_chunk_size, # dnerf options timestamps=timestamps, ) mse = F.mse_loss(rgb, pixels) psnr = -10.0 * torch.log(mse) / np.log(10.0) psnrs.append(psnr.item()) # imageio.imwrite( # "acc_binary_test.png", # ((acc > 0).float().cpu().numpy() * 255).astype(np.uint8), # ) # imageio.imwrite( # "rgb_test.png", # (rgb.cpu().numpy() * 255).astype(np.uint8), # ) # break psnr_avg = sum(psnrs) / len(psnrs) print(f"evaluation: psnr_avg={psnr_avg}") train_dataset.training = True if step == max_steps: print("training stops") exit() step += 1 ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/train_mlp_nerf.py ================================================ """ Copyright (c) 2022 Ruilong Li, UC Berkeley. """ import argparse import math import os import time import imageio import numpy as np import torch import torch.nn.functional as F import tqdm from radiance_fields.mlp import VanillaNeRFRadianceField from utils import render_image, set_random_seed from nerfacc import ContractionType, OccupancyGrid if __name__ == "__main__": device = "cuda:0" set_random_seed(42) parser = argparse.ArgumentParser() parser.add_argument( "--train_split", type=str, default="trainval", choices=["train", "trainval"], help="which train split to use", ) parser.add_argument( "--scene", type=str, default="lego", choices=[ # nerf synthetic "chair", "drums", "ficus", "hotdog", "lego", "materials", "mic", "ship", # mipnerf360 unbounded "garden", ], help="which scene to use", ) parser.add_argument( "--aabb", type=lambda s: [float(item) for item in s.split(",")], default="-1.5,-1.5,-1.5,1.5,1.5,1.5", help="delimited list input", ) parser.add_argument( "--test_chunk_size", type=int, default=8192, ) parser.add_argument( "--unbounded", action="store_true", help="whether to use unbounded rendering", ) parser.add_argument("--cone_angle", type=float, default=0.0) args = parser.parse_args() render_n_samples = 1024 # setup the scene bounding box. if args.unbounded: print("Using unbounded rendering") contraction_type = ContractionType.UN_BOUNDED_SPHERE # contraction_type = ContractionType.UN_BOUNDED_TANH scene_aabb = None near_plane = 0.2 far_plane = 1e4 render_step_size = 1e-2 else: contraction_type = ContractionType.AABB scene_aabb = torch.tensor(args.aabb, dtype=torch.float32, device=device) near_plane = None far_plane = None render_step_size = ( (scene_aabb[3:] - scene_aabb[:3]).max() * math.sqrt(3) / render_n_samples ).item() # setup the radiance field we want to train. max_steps = 50000 grad_scaler = torch.cuda.amp.GradScaler(1) radiance_field = VanillaNeRFRadianceField().to(device) optimizer = torch.optim.Adam(radiance_field.parameters(), lr=5e-4) scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=[ max_steps // 2, max_steps * 3 // 4, max_steps * 5 // 6, max_steps * 9 // 10, ], gamma=0.33, ) # setup the dataset train_dataset_kwargs = {} test_dataset_kwargs = {} if args.scene == "garden": from datasets.nerf_360_v2 import SubjectLoader data_root_fp = "/home/ruilongli/data/360_v2/" target_sample_batch_size = 1 << 16 train_dataset_kwargs = {"color_bkgd_aug": "random", "factor": 4} test_dataset_kwargs = {"factor": 4} grid_resolution = 128 else: from datasets.nerf_synthetic import SubjectLoader data_root_fp = "/home/ruilongli/data/nerf_synthetic/" target_sample_batch_size = 1 << 16 grid_resolution = 128 train_dataset = SubjectLoader( subject_id=args.scene, root_fp=data_root_fp, split=args.train_split, num_rays=target_sample_batch_size // render_n_samples, **train_dataset_kwargs, ) train_dataset.images = train_dataset.images.to(device) train_dataset.camtoworlds = train_dataset.camtoworlds.to(device) train_dataset.K = train_dataset.K.to(device) test_dataset = SubjectLoader( subject_id=args.scene, root_fp=data_root_fp, split="test", num_rays=None, **test_dataset_kwargs, ) test_dataset.images = test_dataset.images.to(device) test_dataset.camtoworlds = test_dataset.camtoworlds.to(device) test_dataset.K = test_dataset.K.to(device) occupancy_grid = OccupancyGrid( roi_aabb=args.aabb, resolution=grid_resolution, contraction_type=contraction_type, ).to(device) # training step = 0 tic = time.time() for epoch in range(10000000): for i in range(len(train_dataset)): radiance_field.train() data = train_dataset[i] render_bkgd = data["color_bkgd"] rays = data["rays"] pixels = data["pixels"] # update occupancy grid occupancy_grid.every_n_step( step=step, occ_eval_fn=lambda x: radiance_field.query_opacity( x, render_step_size ), ) # render rgb, acc, depth, n_rendering_samples = render_image( radiance_field, occupancy_grid, rays, scene_aabb, # rendering options near_plane=near_plane, far_plane=far_plane, render_step_size=render_step_size, render_bkgd=render_bkgd, cone_angle=args.cone_angle, ) if n_rendering_samples == 0: continue # dynamic batch size for rays to keep sample batch size constant. num_rays = len(pixels) num_rays = int( num_rays * (target_sample_batch_size / float(n_rendering_samples)) ) train_dataset.update_num_rays(num_rays) alive_ray_mask = acc.squeeze(-1) > 0 # compute loss loss = F.smooth_l1_loss(rgb[alive_ray_mask], pixels[alive_ray_mask]) optimizer.zero_grad() # do not unscale it because we are using Adam. grad_scaler.scale(loss).backward() optimizer.step() scheduler.step() if step % 5000 == 0: elapsed_time = time.time() - tic loss = F.mse_loss(rgb[alive_ray_mask], pixels[alive_ray_mask]) print( f"elapsed_time={elapsed_time:.2f}s | step={step} | " f"loss={loss:.5f} | " f"alive_ray_mask={alive_ray_mask.long().sum():d} | " f"n_rendering_samples={n_rendering_samples:d} | num_rays={len(pixels):d} |" ) if step >= 0 and step % max_steps == 0 and step > 0: # evaluation radiance_field.eval() psnrs = [] with torch.no_grad(): for i in tqdm.tqdm(range(len(test_dataset))): data = test_dataset[i] render_bkgd = data["color_bkgd"] rays = data["rays"] pixels = data["pixels"] # rendering rgb, acc, depth, _ = render_image( radiance_field, occupancy_grid, rays, scene_aabb, # rendering options near_plane=None, far_plane=None, render_step_size=render_step_size, render_bkgd=render_bkgd, cone_angle=args.cone_angle, # test options test_chunk_size=args.test_chunk_size, ) mse = F.mse_loss(rgb, pixels) psnr = -10.0 * torch.log(mse) / np.log(10.0) psnrs.append(psnr.item()) # imageio.imwrite( # "acc_binary_test.png", # ((acc > 0).float().cpu().numpy() * 255).astype(np.uint8), # ) # imageio.imwrite( # "rgb_test.png", # (rgb.cpu().numpy() * 255).astype(np.uint8), # ) # break psnr_avg = sum(psnrs) / len(psnrs) print(f"evaluation: psnr_avg={psnr_avg}") train_dataset.training = True if step == max_steps: print("training stops") exit() step += 1 ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/train_ngp_nerf.py ================================================ """ Copyright (c) 2022 Ruilong Li, UC Berkeley. """ import argparse import math import os import time import imageio import numpy as np import torch import torch.nn.functional as F import tqdm from radiance_fields.ngp import NGPradianceField from utils import render_image, set_random_seed from nerfacc import ContractionType, OccupancyGrid if __name__ == "__main__": device = "cuda:0" set_random_seed(42) parser = argparse.ArgumentParser() parser.add_argument( "--train_split", type=str, default="trainval", choices=["train", "trainval"], help="which train split to use", ) parser.add_argument( "--scene", type=str, default="lego", choices=[ # nerf synthetic "chair", "drums", "ficus", "hotdog", "lego", "materials", "mic", "ship", # mipnerf360 unbounded "garden", "bicycle", "bonsai", "counter", "kitchen", "room", "stump", ], help="which scene to use", ) parser.add_argument( "--aabb", type=lambda s: [float(item) for item in s.split(",")], default="-1.5,-1.5,-1.5,1.5,1.5,1.5", help="delimited list input", ) parser.add_argument( "--test_chunk_size", type=int, default=8192, ) parser.add_argument( "--unbounded", action="store_true", help="whether to use unbounded rendering", ) parser.add_argument( "--auto_aabb", action="store_true", help="whether to automatically compute the aabb", ) parser.add_argument("--cone_angle", type=float, default=0.0) args = parser.parse_args() render_n_samples = 1024 # setup the dataset train_dataset_kwargs = {} test_dataset_kwargs = {} if args.unbounded: from datasets.nerf_360_v2 import SubjectLoader data_root_fp = "/home/ruilongli/data/360_v2/" target_sample_batch_size = 1 << 20 train_dataset_kwargs = {"color_bkgd_aug": "random", "factor": 4} test_dataset_kwargs = {"factor": 4} grid_resolution = 256 else: from datasets.nerf_synthetic import SubjectLoader data_root_fp = "/home/ruilongli/data/nerf_synthetic/" target_sample_batch_size = 1 << 18 grid_resolution = 128 train_dataset = SubjectLoader( subject_id=args.scene, root_fp=data_root_fp, split=args.train_split, num_rays=target_sample_batch_size // render_n_samples, **train_dataset_kwargs, ) train_dataset.images = train_dataset.images.to(device) train_dataset.camtoworlds = train_dataset.camtoworlds.to(device) train_dataset.K = train_dataset.K.to(device) test_dataset = SubjectLoader( subject_id=args.scene, root_fp=data_root_fp, split="test", num_rays=None, **test_dataset_kwargs, ) test_dataset.images = test_dataset.images.to(device) test_dataset.camtoworlds = test_dataset.camtoworlds.to(device) test_dataset.K = test_dataset.K.to(device) if args.auto_aabb: camera_locs = torch.cat( [train_dataset.camtoworlds, test_dataset.camtoworlds] )[:, :3, -1] args.aabb = torch.cat( [camera_locs.min(dim=0).values, camera_locs.max(dim=0).values] ).tolist() print("Using auto aabb", args.aabb) # setup the scene bounding box. if args.unbounded: print("Using unbounded rendering") contraction_type = ContractionType.UN_BOUNDED_SPHERE # contraction_type = ContractionType.UN_BOUNDED_TANH scene_aabb = None near_plane = 0.2 far_plane = 1e4 render_step_size = 1e-2 alpha_thre = 1e-2 else: contraction_type = ContractionType.AABB scene_aabb = torch.tensor(args.aabb, dtype=torch.float32, device=device) near_plane = None far_plane = None render_step_size = ( (scene_aabb[3:] - scene_aabb[:3]).max() * math.sqrt(3) / render_n_samples ).item() alpha_thre = 0.0 # setup the radiance field we want to train. max_steps = 20000 grad_scaler = torch.cuda.amp.GradScaler(2**10) radiance_field = NGPradianceField( aabb=args.aabb, unbounded=args.unbounded, ).to(device) optimizer = torch.optim.Adam( radiance_field.parameters(), lr=1e-2, eps=1e-15 ) scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=[max_steps // 2, max_steps * 3 // 4, max_steps * 9 // 10], gamma=0.33, ) occupancy_grid = OccupancyGrid( roi_aabb=args.aabb, resolution=grid_resolution, contraction_type=contraction_type, ).to(device) # training step = 0 tic = time.time() for epoch in range(10000000): for i in range(len(train_dataset)): radiance_field.train() data = train_dataset[i] render_bkgd = data["color_bkgd"] rays = data["rays"] pixels = data["pixels"] def occ_eval_fn(x): if args.cone_angle > 0.0: # randomly sample a camera for computing step size. camera_ids = torch.randint( 0, len(train_dataset), (x.shape[0],), device=device ) origins = train_dataset.camtoworlds[camera_ids, :3, -1] t = (origins - x).norm(dim=-1, keepdim=True) # compute actual step size used in marching, based on the distance to the camera. step_size = torch.clamp( t * args.cone_angle, min=render_step_size ) # filter out the points that are not in the near far plane. if (near_plane is not None) and (far_plane is not None): step_size = torch.where( (t > near_plane) & (t < far_plane), step_size, torch.zeros_like(step_size), ) else: step_size = render_step_size # compute occupancy density = radiance_field.query_density(x) return density * step_size # update occupancy grid occupancy_grid.every_n_step(step=step, occ_eval_fn=occ_eval_fn) # render rgb, acc, depth, n_rendering_samples = render_image( radiance_field, occupancy_grid, rays, scene_aabb, # rendering options near_plane=near_plane, far_plane=far_plane, render_step_size=render_step_size, render_bkgd=render_bkgd, cone_angle=args.cone_angle, alpha_thre=alpha_thre, ) if n_rendering_samples == 0: continue # dynamic batch size for rays to keep sample batch size constant. num_rays = len(pixels) num_rays = int( num_rays * (target_sample_batch_size / float(n_rendering_samples)) ) train_dataset.update_num_rays(num_rays) alive_ray_mask = acc.squeeze(-1) > 0 # compute loss loss = F.smooth_l1_loss(rgb[alive_ray_mask], pixels[alive_ray_mask]) optimizer.zero_grad() # do not unscale it because we are using Adam. grad_scaler.scale(loss).backward() optimizer.step() scheduler.step() if step % 10000 == 0: elapsed_time = time.time() - tic loss = F.mse_loss(rgb[alive_ray_mask], pixels[alive_ray_mask]) print( f"elapsed_time={elapsed_time:.2f}s | step={step} | " f"loss={loss:.5f} | " f"alive_ray_mask={alive_ray_mask.long().sum():d} | " f"n_rendering_samples={n_rendering_samples:d} | num_rays={len(pixels):d} |" ) if step >= 0 and step % max_steps == 0 and step > 0: # evaluation radiance_field.eval() psnrs = [] with torch.no_grad(): for i in tqdm.tqdm(range(len(test_dataset))): data = test_dataset[i] render_bkgd = data["color_bkgd"] rays = data["rays"] pixels = data["pixels"] # rendering rgb, acc, depth, _ = render_image( radiance_field, occupancy_grid, rays, scene_aabb, # rendering options near_plane=near_plane, far_plane=far_plane, render_step_size=render_step_size, render_bkgd=render_bkgd, cone_angle=args.cone_angle, alpha_thre=alpha_thre, # test options test_chunk_size=args.test_chunk_size, ) mse = F.mse_loss(rgb, pixels) psnr = -10.0 * torch.log(mse) / np.log(10.0) psnrs.append(psnr.item()) # imageio.imwrite( # "acc_binary_test.png", # ((acc > 0).float().cpu().numpy() * 255).astype(np.uint8), # ) # imageio.imwrite( # "rgb_test.png", # (rgb.cpu().numpy() * 255).astype(np.uint8), # ) # break psnr_avg = sum(psnrs) / len(psnrs) print(f"evaluation: psnr_avg={psnr_avg}") train_dataset.training = True if step == max_steps: print("training stops") exit() step += 1 ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/utils.py ================================================ """ Copyright (c) 2022 Ruilong Li, UC Berkeley. """ import random from typing import Optional import numpy as np import torch from datasets.utils import Rays, namedtuple_map from nerfacc import OccupancyGrid, ray_marching, rendering def set_random_seed(seed): random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) def render_image( # scene radiance_field: torch.nn.Module, occupancy_grid: OccupancyGrid, rays: Rays, scene_aabb: torch.Tensor, # rendering options near_plane: Optional[float] = None, far_plane: Optional[float] = None, render_step_size: float = 1e-3, render_bkgd: Optional[torch.Tensor] = None, cone_angle: float = 0.0, alpha_thre: float = 0.0, # test options test_chunk_size: int = 8192, # only useful for dnerf timestamps: Optional[torch.Tensor] = None, ): """Render the pixels of an image.""" rays_shape = rays.origins.shape if len(rays_shape) == 3: height, width, _ = rays_shape num_rays = height * width rays = namedtuple_map( lambda r: r.reshape([num_rays] + list(r.shape[2:])), rays ) else: num_rays, _ = rays_shape def sigma_fn(t_starts, t_ends, ray_indices): t_origins = chunk_rays.origins[ray_indices] t_dirs = chunk_rays.viewdirs[ray_indices] positions = t_origins + t_dirs * (t_starts + t_ends) / 2.0 if timestamps is not None: # dnerf t = ( timestamps[ray_indices] if radiance_field.training else timestamps.expand_as(positions[:, :1]) ) return radiance_field.query_density(positions, t) return radiance_field.query_density(positions) def rgb_sigma_fn(t_starts, t_ends, ray_indices): t_origins = chunk_rays.origins[ray_indices] t_dirs = chunk_rays.viewdirs[ray_indices] positions = t_origins + t_dirs * (t_starts + t_ends) / 2.0 if timestamps is not None: # dnerf t = ( timestamps[ray_indices] if radiance_field.training else timestamps.expand_as(positions[:, :1]) ) return radiance_field(positions, t, t_dirs) return radiance_field(positions, t_dirs) results = [] chunk = ( torch.iinfo(torch.int32).max if radiance_field.training else test_chunk_size ) for i in range(0, num_rays, chunk): chunk_rays = namedtuple_map(lambda r: r[i : i + chunk], rays) ray_indices, t_starts, t_ends = ray_marching( chunk_rays.origins, chunk_rays.viewdirs, scene_aabb=scene_aabb, grid=occupancy_grid, sigma_fn=sigma_fn, near_plane=near_plane, far_plane=far_plane, render_step_size=render_step_size, stratified=radiance_field.training, cone_angle=cone_angle, alpha_thre=alpha_thre, ) rgb, opacity, depth = rendering( t_starts, t_ends, ray_indices, n_rays=chunk_rays.origins.shape[0], rgb_sigma_fn=rgb_sigma_fn, render_bkgd=render_bkgd, ) chunk_results = [rgb, opacity, depth, len(t_starts)] results.append(chunk_results) colors, opacities, depths, n_rendering_samples = [ torch.cat(r, dim=0) if isinstance(r[0], torch.Tensor) else r for r in zip(*results) ] return ( colors.view((*rays_shape[:-1], -1)), opacities.view((*rays_shape[:-1], -1)), depths.view((*rays_shape[:-1], -1)), sum(n_rendering_samples), ) ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/__init__.py ================================================ """ Copyright (c) 2022 Ruilong Li, UC Berkeley. """ import warnings from .cdf import ray_resampling from .contraction import ContractionType, contract, contract_inv from .grid import Grid, OccupancyGrid, query_grid from .intersection import ray_aabb_intersect from .losses import distortion as loss_distortion from .pack import pack_data, pack_info, unpack_data, unpack_info from .ray_marching import ray_marching from .version import __version__ from .vol_rendering import ( accumulate_along_rays, accumulate_along_rays_patch_based, render_transmittance_from_alpha, render_transmittance_from_density, render_visibility, render_visibility_patch_based, render_weight_from_alpha, render_weight_from_density, render_weight_from_alpha_patch_based, render_weight_and_transmittance_from_alpha_patch_based, rendering, ) # About to be deprecated def unpack_to_ray_indices(*args, **kwargs): warnings.warn( "`unpack_to_ray_indices` will be deprecated. Please use `unpack_info` instead.", DeprecationWarning, stacklevel=2, ) return unpack_info(*args, **kwargs) __all__ = [ "__version__", "Grid", "OccupancyGrid", "query_grid", "ContractionType", "contract", "contract_inv", "ray_aabb_intersect", "ray_marching", "accumulate_along_rays", "accumulate_along_rays_patch_based", "render_visibility", "render_visibility_patch_based", "render_weight_from_alpha", "render_weight_from_alpha_patch_based", "render_weight_from_density", "rendering", "pack_data", "unpack_data", "unpack_info", "pack_info", "ray_resampling", "loss_distortion", "unpack_to_ray_indices", "render_transmittance_from_density", "render_transmittance_from_alpha", "render_weight_and_transmittance_from_alpha_patch_based" ] ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cdf.py ================================================ """ Copyright (c) 2022 Ruilong Li, UC Berkeley. """ from typing import Tuple from torch import Tensor import nerfacc.cuda as _C def ray_resampling( packed_info: Tensor, t_starts: Tensor, t_ends: Tensor, weights: Tensor, n_samples: int, ) -> Tuple[Tensor, Tensor, Tensor]: """Resample a set of rays based on the CDF of the weights. Args: packed_info (Tensor): Stores information on which samples belong to the same ray. \ See :func:`nerfacc.ray_marching` for details. Tensor with shape (n_rays, 2). t_starts: Where the frustum-shape sample starts along a ray. Tensor with \ shape (n_samples, 1). t_ends: Where the frustum-shape sample ends along a ray. Tensor with \ shape (n_samples, 1). weights: Volumetric rendering weights for those samples. Tensor with shape \ (n_samples,). n_samples (int): Number of samples per ray to resample. Returns: Resampled packed info (n_rays, 2), t_starts (n_samples, 1), and t_ends (n_samples, 1). """ ( resampled_packed_info, resampled_t_starts, resampled_t_ends, ) = _C.ray_resampling( packed_info.contiguous(), t_starts.contiguous(), t_ends.contiguous(), weights.contiguous(), n_samples, ) return resampled_packed_info, resampled_t_starts, resampled_t_ends ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/contraction.py ================================================ """ Copyright (c) 2022 Ruilong Li, UC Berkeley. """ from enum import Enum import torch import nerfacc.cuda as _C class ContractionType(Enum): """Space contraction options. This is an enum class that describes how a :class:`nerfacc.Grid` covers the 3D space. It is also used by :func:`nerfacc.ray_marching` to determine how to perform ray marching within the grid. The options in this enum class are: Attributes: AABB: Linearly map the region of interest :math:`[x_0, x_1]` to a unit cube in :math:`[0, 1]`. .. math:: f(x) = \\frac{x - x_0}{x_1 - x_0} UN_BOUNDED_TANH: Contract an unbounded space into a unit cube in :math:`[0, 1]` using tanh. The region of interest :math:`[x_0, x_1]` is first mapped into :math:`[-0.5, +0.5]` before applying tanh. .. math:: f(x) = \\frac{1}{2}(tanh(\\frac{x - x_0}{x_1 - x_0} - \\frac{1}{2}) + 1) UN_BOUNDED_SPHERE: Contract an unbounded space into a unit sphere. Used in `Mip-Nerf 360: Unbounded Anti-Aliased Neural Radiance Fields`_. .. math:: f(x) = \\begin{cases} z(x) & ||z(x)|| \\leq 1 \\\\ (2 - \\frac{1}{||z(x)||})(\\frac{z(x)}{||z(x)||}) & ||z(x)|| > 1 \\end{cases} .. math:: z(x) = \\frac{x - x_0}{x_1 - x_0} * 2 - 1 .. _Mip-Nerf 360\: Unbounded Anti-Aliased Neural Radiance Fields: https://arxiv.org/abs/2111.12077 """ AABB = 0 UN_BOUNDED_TANH = 1 UN_BOUNDED_SPHERE = 2 def to_cpp_version(self): """Convert to the C++ version of the enum class. Returns: The C++ version of the enum class. """ return _C.ContractionTypeGetter(self.value) @torch.no_grad() def contract( x: torch.Tensor, roi: torch.Tensor, type: ContractionType = ContractionType.AABB, ) -> torch.Tensor: """Contract the space into [0, 1]^3. Args: x (torch.Tensor): Un-contracted points. roi (torch.Tensor): Region of interest. type (ContractionType): Contraction type. Returns: torch.Tensor: Contracted points ([0, 1]^3). """ ctype = type.to_cpp_version() return _C.contract(x.contiguous(), roi.contiguous(), ctype) @torch.no_grad() def contract_inv( x: torch.Tensor, roi: torch.Tensor, type: ContractionType = ContractionType.AABB, ) -> torch.Tensor: """Recover the space from [0, 1]^3 by inverse contraction. Args: x (torch.Tensor): Contracted points ([0, 1]^3). roi (torch.Tensor): Region of interest. type (ContractionType): Contraction type. Returns: torch.Tensor: Un-contracted points. """ ctype = type.to_cpp_version() return _C.contract_inv(x.contiguous(), roi.contiguous(), ctype) ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/__init__.py ================================================ """ Copyright (c) 2022 Ruilong Li, UC Berkeley. """ from typing import Any, Callable def _make_lazy_cuda_func(name: str) -> Callable: def call_cuda(*args, **kwargs): # pylint: disable=import-outside-toplevel from ._backend import _C return getattr(_C, name)(*args, **kwargs) return call_cuda ContractionTypeGetter = _make_lazy_cuda_func("ContractionType") contract = _make_lazy_cuda_func("contract") contract_inv = _make_lazy_cuda_func("contract_inv") grid_query = _make_lazy_cuda_func("grid_query") ray_aabb_intersect = _make_lazy_cuda_func("ray_aabb_intersect") ray_marching = _make_lazy_cuda_func("ray_marching") ray_resampling = _make_lazy_cuda_func("ray_resampling") is_cub_available = _make_lazy_cuda_func("is_cub_available") transmittance_from_sigma_forward_cub = _make_lazy_cuda_func( "transmittance_from_sigma_forward_cub" ) transmittance_from_sigma_backward_cub = _make_lazy_cuda_func( "transmittance_from_sigma_backward_cub" ) transmittance_from_alpha_forward_cub = _make_lazy_cuda_func( "transmittance_from_alpha_forward_cub" ) transmittance_from_alpha_backward_cub = _make_lazy_cuda_func( "transmittance_from_alpha_backward_cub" ) transmittance_from_sigma_forward_naive = _make_lazy_cuda_func( "transmittance_from_sigma_forward_naive" ) transmittance_from_sigma_backward_naive = _make_lazy_cuda_func( "transmittance_from_sigma_backward_naive" ) transmittance_from_alpha_forward_naive = _make_lazy_cuda_func( "transmittance_from_alpha_forward_naive" ) transmittance_from_alpha_backward_naive = _make_lazy_cuda_func( "transmittance_from_alpha_backward_naive" ) transmittance_from_alpha_patch_based_forward_naive = _make_lazy_cuda_func( "transmittance_from_alpha_patch_based_forward_naive" ) transmittance_from_alpha_patch_based_backward_naive = _make_lazy_cuda_func( "transmittance_from_alpha_patch_based_backward_naive" ) weight_from_sigma_forward_naive = _make_lazy_cuda_func( "weight_from_sigma_forward_naive" ) weight_from_sigma_backward_naive = _make_lazy_cuda_func( "weight_from_sigma_backward_naive" ) weight_from_alpha_forward_naive = _make_lazy_cuda_func( "weight_from_alpha_forward_naive" ) weight_from_alpha_backward_naive = _make_lazy_cuda_func( "weight_from_alpha_backward_naive" ) # weight_from_alpha_importance_sampling_forward_naive = _make_lazy_cuda_func( # "weight_from_alpha_importance_sampling_forward_naive" # ) # # weight_from_alpha_importance_sampling_backward_naive = _make_lazy_cuda_func( # "weight_from_alpha_importance_sampling_backward_naive" # ) weight_from_alpha_patch_based_forward_naive = _make_lazy_cuda_func( "weight_from_alpha_patch_based_forward_naive" ) weight_from_alpha_patch_based_backward_naive = _make_lazy_cuda_func( "weight_from_alpha_patch_based_backward_naive" ) weight_and_transmittance_from_alpha_patch_based_forward_naive = _make_lazy_cuda_func( "weight_and_transmittance_from_alpha_patch_based_forward_naive" ) weight_and_transmittance_from_alpha_patch_based_backward_naive = _make_lazy_cuda_func( "weight_and_transmittance_from_alpha_patch_based_backward_naive" ) unpack_data = _make_lazy_cuda_func("unpack_data") unpack_info = _make_lazy_cuda_func("unpack_info") unpack_info_to_mask = _make_lazy_cuda_func("unpack_info_to_mask") ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/_backend.py ================================================ """ Copyright (c) 2022 Ruilong Li, UC Berkeley. """ import glob import json import os import shutil from subprocess import DEVNULL, call from rich.console import Console from torch.utils.cpp_extension import _get_build_directory, load PATH = os.path.dirname(os.path.abspath(__file__)) def cuda_toolkit_available(): """Check if the nvcc is avaiable on the machine.""" try: call(["nvcc"], stdout=DEVNULL, stderr=DEVNULL) return True except FileNotFoundError: return False def cuda_toolkit_version(): """Get the cuda toolkit version.""" cuda_home = os.path.join(os.path.dirname(shutil.which("nvcc")), "..") if os.path.exists(os.path.join(cuda_home, "version.txt")): with open(os.path.join(cuda_home, "version.txt")) as f: cuda_version = f.read().strip().split()[-1] elif os.path.exists(os.path.join(cuda_home, "version.json")): with open(os.path.join(cuda_home, "version.json")) as f: cuda_version = json.load(f)["cuda"]["version"] else: raise RuntimeError("Cannot find the cuda version.") return cuda_version name = "nerfacc_cuda" build_dir = _get_build_directory(name, verbose=False) extra_include_paths = [] extra_cflags = ["-O3"] extra_cuda_cflags = ["-O3"] _C = None try: # try to import the compiled module (via setup.py) from nerfacc import csrc as _C except ImportError: # if failed, try with JIT compilation if cuda_toolkit_available(): if os.listdir(build_dir) != []: # If the build exists, we assume the extension has been built # and we can load it. _C = load( name=name, sources=glob.glob(os.path.join(PATH, "csrc/*.cu")), extra_cflags=extra_cflags, extra_cuda_cflags=extra_cuda_cflags, extra_include_paths=extra_include_paths, ) else: # Build from scratch. Remove the build directory just to be safe: pytorch jit might stuck # if the build directory exists. shutil.rmtree(build_dir) with Console().status( "[bold yellow]NerfAcc: Setting up CUDA (This may take a few minutes the first time)", spinner="bouncingBall", ): _C = load( name=name, sources=glob.glob(os.path.join(PATH, "csrc/*.cu")), extra_cflags=extra_cflags, extra_cuda_cflags=extra_cuda_cflags, extra_include_paths=extra_include_paths, ) else: Console().print( "[yellow]NerfAcc: No CUDA toolkit found. NerfAcc will be disabled.[/yellow]" ) __all__ = ["_C"] ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/cdf.cu ================================================ /* * Copyright (c) 2022 Ruilong Li, UC Berkeley. */ #include "include/helpers_cuda.h" template __global__ void cdf_resampling_kernel( const uint32_t n_rays, const int *packed_info, // input ray & point indices. const scalar_t *starts, // input start t const scalar_t *ends, // input end t const scalar_t *weights, // transmittance weights const int *resample_packed_info, scalar_t *resample_starts, scalar_t *resample_ends) { CUDA_GET_THREAD_ID(i, n_rays); // locate const int base = packed_info[i * 2 + 0]; // point idx start. const int steps = packed_info[i * 2 + 1]; // point idx shift. const int resample_base = resample_packed_info[i * 2 + 0]; const int resample_steps = resample_packed_info[i * 2 + 1]; if (steps == 0) return; starts += base; ends += base; weights += base; resample_starts += resample_base; resample_ends += resample_base; // normalize weights **per ray** scalar_t weights_sum = 0.0f; for (int j = 0; j < steps; j++) weights_sum += weights[j]; scalar_t padding = fmaxf(1e-5f - weights_sum, 0.0f); scalar_t padding_step = padding / steps; weights_sum += padding; int num_bins = resample_steps + 1; scalar_t cdf_step_size = (1.0f - 1.0 / num_bins) / resample_steps; int idx = 0, j = 0; scalar_t cdf_prev = 0.0f, cdf_next = (weights[idx] + padding_step) / weights_sum; scalar_t cdf_u = 1.0 / (2 * num_bins); while (j < num_bins) { if (cdf_u < cdf_next) { // printf("cdf_u: %f, cdf_next: %f\n", cdf_u, cdf_next); // resample in this interval scalar_t scaling = (ends[idx] - starts[idx]) / (cdf_next - cdf_prev); scalar_t t = (cdf_u - cdf_prev) * scaling + starts[idx]; if (j < num_bins - 1) resample_starts[j] = t; if (j > 0) resample_ends[j - 1] = t; // going further to next resample cdf_u += cdf_step_size; j += 1; } else { // going to next interval idx += 1; cdf_prev = cdf_next; cdf_next += (weights[idx] + padding_step) / weights_sum; } } if (j != num_bins) { printf("Error: %d %d %f\n", j, num_bins, weights_sum); } return; } // template // __global__ void cdf_resampling_kernel( // const uint32_t n_rays, // const int *packed_info, // input ray & point indices. // const scalar_t *starts, // input start t // const scalar_t *ends, // input end t // const scalar_t *weights, // transmittance weights // const int *resample_packed_info, // scalar_t *resample_starts, // scalar_t *resample_ends) // { // CUDA_GET_THREAD_ID(i, n_rays); // // locate // const int base = packed_info[i * 2 + 0]; // point idx start. // const int steps = packed_info[i * 2 + 1]; // point idx shift. // const int resample_base = resample_packed_info[i * 2 + 0]; // const int resample_steps = resample_packed_info[i * 2 + 1]; // if (steps == 0) // return; // starts += base; // ends += base; // weights += base; // resample_starts += resample_base; // resample_ends += resample_base; // scalar_t cdf_step_size = 1.0f / resample_steps; // // normalize weights **per ray** // scalar_t weights_sum = 0.0f; // for (int j = 0; j < steps; j++) // weights_sum += weights[j]; // scalar_t padding = fmaxf(1e-5f - weights_sum, 0.0f); // scalar_t padding_step = padding / steps; // weights_sum += padding; // int idx = 0, j = 0; // scalar_t cdf_prev = 0.0f, cdf_next = (weights[idx] + padding_step) / weights_sum; // scalar_t cdf_u = 0.5f * cdf_step_size; // while (cdf_u < 1.0f) // { // if (cdf_u < cdf_next) // { // // resample in this interval // scalar_t scaling = (ends[idx] - starts[idx]) / (cdf_next - cdf_prev); // scalar_t resample_mid = (cdf_u - cdf_prev) * scaling + starts[idx]; // scalar_t resample_half_size = cdf_step_size * scaling * 0.5; // resample_starts[j] = fmaxf(resample_mid - resample_half_size, starts[idx]); // resample_ends[j] = fminf(resample_mid + resample_half_size, ends[idx]); // // going further to next resample // cdf_u += cdf_step_size; // j += 1; // } // else // { // // go to next interval // idx += 1; // if (idx == steps) // break; // cdf_prev = cdf_next; // cdf_next += (weights[idx] + padding_step) / weights_sum; // } // } // if (j != resample_steps) // { // printf("Error: %d %d %f\n", j, resample_steps, weights_sum); // } // return; // } std::vector ray_resampling( torch::Tensor packed_info, torch::Tensor starts, torch::Tensor ends, torch::Tensor weights, const int steps) { DEVICE_GUARD(packed_info); CHECK_INPUT(packed_info); CHECK_INPUT(starts); CHECK_INPUT(ends); CHECK_INPUT(weights); TORCH_CHECK(packed_info.ndimension() == 2 & packed_info.size(1) == 2); TORCH_CHECK(starts.ndimension() == 2 & starts.size(1) == 1); TORCH_CHECK(ends.ndimension() == 2 & ends.size(1) == 1); TORCH_CHECK(weights.ndimension() == 1); const uint32_t n_rays = packed_info.size(0); const uint32_t n_samples = weights.size(0); const int threads = 256; const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads); torch::Tensor num_steps = torch::split(packed_info, 1, 1)[1]; torch::Tensor resample_num_steps = (num_steps > 0).to(num_steps.options()) * steps; torch::Tensor resample_cum_steps = resample_num_steps.cumsum(0, torch::kInt32); torch::Tensor resample_packed_info = torch::cat( {resample_cum_steps - resample_num_steps, resample_num_steps}, 1); int total_steps = resample_cum_steps[resample_cum_steps.size(0) - 1].item(); torch::Tensor resample_starts = torch::zeros({total_steps, 1}, starts.options()); torch::Tensor resample_ends = torch::zeros({total_steps, 1}, ends.options()); AT_DISPATCH_FLOATING_TYPES_AND_HALF( weights.scalar_type(), "ray_resampling", ([&] { cdf_resampling_kernel<<>>( n_rays, // inputs packed_info.data_ptr(), starts.data_ptr(), ends.data_ptr(), weights.data_ptr(), resample_packed_info.data_ptr(), // outputs resample_starts.data_ptr(), resample_ends.data_ptr()); })); return {resample_packed_info, resample_starts, resample_ends}; } ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/contraction.cu ================================================ /* * Copyright (c) 2022 Ruilong Li, UC Berkeley. */ #include "include/helpers_cuda.h" #include "include/helpers_math.h" #include "include/helpers_contraction.h" __global__ void contract_kernel( // samples info const uint32_t n_samples, const float *samples, // (n_samples, 3) // contraction const float *roi, const ContractionType type, // outputs float *out_samples) { CUDA_GET_THREAD_ID(i, n_samples); // locate samples += i * 3; out_samples += i * 3; const float3 roi_min = make_float3(roi[0], roi[1], roi[2]); const float3 roi_max = make_float3(roi[3], roi[4], roi[5]); const float3 xyz = make_float3(samples[0], samples[1], samples[2]); float3 xyz_unit = apply_contraction(xyz, roi_min, roi_max, type); out_samples[0] = xyz_unit.x; out_samples[1] = xyz_unit.y; out_samples[2] = xyz_unit.z; return; } __global__ void contract_inv_kernel( // samples info const uint32_t n_samples, const float *samples, // (n_samples, 3) // contraction const float *roi, const ContractionType type, // outputs float *out_samples) { CUDA_GET_THREAD_ID(i, n_samples); // locate samples += i * 3; out_samples += i * 3; const float3 roi_min = make_float3(roi[0], roi[1], roi[2]); const float3 roi_max = make_float3(roi[3], roi[4], roi[5]); const float3 xyz_unit = make_float3(samples[0], samples[1], samples[2]); float3 xyz = apply_contraction_inv(xyz_unit, roi_min, roi_max, type); out_samples[0] = xyz.x; out_samples[1] = xyz.y; out_samples[2] = xyz.z; return; } torch::Tensor contract( const torch::Tensor samples, // contraction const torch::Tensor roi, const ContractionType type) { DEVICE_GUARD(samples); CHECK_INPUT(samples); const int n_samples = samples.size(0); const int threads = 256; const int blocks = CUDA_N_BLOCKS_NEEDED(n_samples, threads); torch::Tensor out_samples = torch::empty({n_samples, 3}, samples.options()); contract_kernel<<>>( n_samples, samples.data_ptr(), // contraction roi.data_ptr(), type, // outputs out_samples.data_ptr()); return out_samples; } torch::Tensor contract_inv( const torch::Tensor samples, // contraction const torch::Tensor roi, const ContractionType type) { DEVICE_GUARD(samples); CHECK_INPUT(samples); const int n_samples = samples.size(0); const int threads = 256; const int blocks = CUDA_N_BLOCKS_NEEDED(n_samples, threads); torch::Tensor out_samples = torch::empty({n_samples, 3}, samples.options()); contract_inv_kernel<<>>( n_samples, samples.data_ptr(), // contraction roi.data_ptr(), type, // outputs out_samples.data_ptr()); return out_samples; } ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/include/helpers_contraction.h ================================================ /* * Copyright (c) 2022 Ruilong Li, UC Berkeley. */ #pragma once #include "helpers_math.h" enum ContractionType { AABB = 0, UN_BOUNDED_TANH = 1, UN_BOUNDED_SPHERE = 2, }; inline __device__ __host__ float3 roi_to_unit( const float3 xyz, const float3 roi_min, const float3 roi_max) { // roi -> [0, 1]^3 return (xyz - roi_min) / (roi_max - roi_min); } inline __device__ __host__ float3 unit_to_roi( const float3 xyz, const float3 roi_min, const float3 roi_max) { // [0, 1]^3 -> roi return xyz * (roi_max - roi_min) + roi_min; } inline __device__ __host__ float3 inf_to_unit_tanh( const float3 xyz, float3 roi_min, const float3 roi_max) { /** [-inf, inf]^3 -> [0, 1]^3 roi -> cube of [0.25, 0.75]^3 **/ float3 xyz_unit = roi_to_unit(xyz, roi_min, roi_max); // roi -> [0, 1]^3 xyz_unit = xyz_unit - 0.5f; // roi -> [-0.5, 0.5]^3 return make_float3(tanhf(xyz_unit.x), tanhf(xyz_unit.y), tanhf(xyz_unit.z)) * 0.5f + 0.5f; } inline __device__ __host__ float3 unit_to_inf_tanh( const float3 xyz, float3 roi_min, const float3 roi_max) { /** [0, 1]^3 -> [-inf, inf]^3 cube of [0.25, 0.75]^3 -> roi **/ float3 xyz_unit = clamp( make_float3( atanhf(xyz.x * 2.0f - 1.0f), atanhf(xyz.y * 2.0f - 1.0f), atanhf(xyz.z * 2.0f - 1.0f)), -1e10f, 1e10f); xyz_unit = xyz_unit + 0.5f; xyz_unit = unit_to_roi(xyz_unit, roi_min, roi_max); return xyz_unit; } inline __device__ __host__ float3 inf_to_unit_sphere( const float3 xyz, const float3 roi_min, const float3 roi_max) { /** From MipNeRF360 [-inf, inf]^3 -> sphere of [0, 1]^3; roi -> sphere of [0.25, 0.75]^3 **/ float3 xyz_unit = roi_to_unit(xyz, roi_min, roi_max); // roi -> [0, 1]^3 xyz_unit = xyz_unit * 2.0f - 1.0f; // roi -> [-1, 1]^3 float norm_sq = dot(xyz_unit, xyz_unit); float norm = sqrt(norm_sq); if (norm > 1.0f) { xyz_unit = (2.0f - 1.0f / norm) * (xyz_unit / norm); } xyz_unit = xyz_unit * 0.25f + 0.5f; // [-1, 1]^3 -> [0.25, 0.75]^3 return xyz_unit; } inline __device__ __host__ float3 unit_sphere_to_inf( const float3 xyz, const float3 roi_min, const float3 roi_max) { /** From MipNeRF360 sphere of [0, 1]^3 -> [-inf, inf]^3; sphere of [0.25, 0.75]^3 -> roi **/ float3 xyz_unit = (xyz - 0.5f) * 4.0f; // [0.25, 0.75]^3 -> [-1, 1]^3 float norm_sq = dot(xyz_unit, xyz_unit); float norm = sqrt(norm_sq); if (norm > 1.0f) { xyz_unit = xyz_unit / fmaxf((2.0f * norm - 1.0f * norm_sq), 1e-10f); } xyz_unit = xyz_unit * 0.5f + 0.5f; // [-1, 1]^3 -> [0, 1]^3 xyz_unit = unit_to_roi(xyz_unit, roi_min, roi_max); // [0, 1]^3 -> roi return xyz_unit; } inline __device__ __host__ float3 apply_contraction( const float3 xyz, const float3 roi_min, const float3 roi_max, const ContractionType type) { switch (type) { case AABB: return roi_to_unit(xyz, roi_min, roi_max); case UN_BOUNDED_TANH: return inf_to_unit_tanh(xyz, roi_min, roi_max); case UN_BOUNDED_SPHERE: return inf_to_unit_sphere(xyz, roi_min, roi_max); } } inline __device__ __host__ float3 apply_contraction_inv( const float3 xyz, const float3 roi_min, const float3 roi_max, const ContractionType type) { switch (type) { case AABB: return unit_to_roi(xyz, roi_min, roi_max); case UN_BOUNDED_TANH: return unit_to_inf_tanh(xyz, roi_min, roi_max); case UN_BOUNDED_SPHERE: return unit_sphere_to_inf(xyz, roi_min, roi_max); } } ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/include/helpers_cuda.h ================================================ /* * Copyright (c) 2022 Ruilong Li, UC Berkeley. */ #pragma once #include #include #include #include // #include // cub support for scan by key is added to cub 1.15 // in https://github.com/NVIDIA/cub/pull/376 #if CUB_VERSION >= 101500 #define CUB_SUPPORTS_SCAN_BY_KEY() 1 #else #define CUB_SUPPORTS_SCAN_BY_KEY() 0 #endif #define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x " must be a CUDA tensor") #define CHECK_CONTIGUOUS(x) \ TORCH_CHECK(x.is_contiguous(), #x " must be contiguous") #define CHECK_INPUT(x) \ CHECK_CUDA(x); \ CHECK_CONTIGUOUS(x) #define CUDA_GET_THREAD_ID(tid, Q) \ const int tid = blockIdx.x * blockDim.x + threadIdx.x; \ if (tid >= Q) \ return #define CUDA_GET_THREAD_ID_2D(tidx, tidy, P, Q) \ const int tidx = blockIdx.x * blockDim.x + threadIdx.x; \ const int tidy = blockIdx.y * blockDim.y + threadIdx.y; \ if (tidx >= P || tidy >= Q) \ return #define CUDA_N_BLOCKS_NEEDED(Q, CUDA_N_THREADS) ((Q - 1) / CUDA_N_THREADS + 1) #define DEVICE_GUARD(_ten) \ const at::cuda::OptionalCUDAGuard device_guard(device_of(_ten)); // https://github.com/pytorch/pytorch/blob/233305a852e1cd7f319b15b5137074c9eac455f6/aten/src/ATen/cuda/cub.cuh#L38-L46 #define CUB_WRAPPER(func, ...) do { \ size_t temp_storage_bytes = 0; \ func(nullptr, temp_storage_bytes, __VA_ARGS__); \ auto& caching_allocator = *::c10::cuda::CUDACachingAllocator::get(); \ auto temp_storage = caching_allocator.allocate(temp_storage_bytes); \ func(temp_storage.get(), temp_storage_bytes, __VA_ARGS__); \ AT_CUDA_CHECK(cudaGetLastError()); \ } while (false) ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/include/helpers_math.h ================================================ /* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. * Modified by Ruilong Li, 2022 * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * This file implements common mathematical operations on vector types * (float3, float4 etc.) since these are not provided as standard by CUDA. * * The syntax is modeled on the Cg standard library. * * This is part of the Helper library includes * * Thanks to Linh Hah for additions and fixes. */ #ifndef HELPER_MATH_H #define HELPER_MATH_H #include "cuda_runtime.h" typedef unsigned int uint; typedef unsigned short ushort; #ifndef EXIT_WAIVED #define EXIT_WAIVED 2 #endif #ifndef __CUDACC__ #include //////////////////////////////////////////////////////////////////////////////// // host implementations of CUDA functions //////////////////////////////////////////////////////////////////////////////// inline float fminf(float a, float b) { return a < b ? a : b; } inline float fmaxf(float a, float b) { return a > b ? a : b; } inline int max(int a, int b) { return a > b ? a : b; } inline int min(int a, int b) { return a < b ? a : b; } inline float rsqrtf(float x) { return 1.0f / sqrtf(x); } #endif //////////////////////////////////////////////////////////////////////////////// // constructors //////////////////////////////////////////////////////////////////////////////// inline __host__ __device__ float2 make_float2(float s) { return make_float2(s, s); } inline __host__ __device__ float2 make_float2(float3 a) { return make_float2(a.x, a.y); } inline __host__ __device__ float2 make_float2(int2 a) { return make_float2(float(a.x), float(a.y)); } inline __host__ __device__ float2 make_float2(uint2 a) { return make_float2(float(a.x), float(a.y)); } inline __host__ __device__ int2 make_int2(int s) { return make_int2(s, s); } inline __host__ __device__ int2 make_int2(int3 a) { return make_int2(a.x, a.y); } inline __host__ __device__ int2 make_int2(uint2 a) { return make_int2(int(a.x), int(a.y)); } inline __host__ __device__ int2 make_int2(float2 a) { return make_int2(int(a.x), int(a.y)); } inline __host__ __device__ uint2 make_uint2(uint s) { return make_uint2(s, s); } inline __host__ __device__ uint2 make_uint2(uint3 a) { return make_uint2(a.x, a.y); } inline __host__ __device__ uint2 make_uint2(int2 a) { return make_uint2(uint(a.x), uint(a.y)); } inline __host__ __device__ float3 make_float3(float s) { return make_float3(s, s, s); } inline __host__ __device__ float3 make_float3(float2 a) { return make_float3(a.x, a.y, 0.0f); } inline __host__ __device__ float3 make_float3(float2 a, float s) { return make_float3(a.x, a.y, s); } inline __host__ __device__ float3 make_float3(float4 a) { return make_float3(a.x, a.y, a.z); } inline __host__ __device__ float3 make_float3(int3 a) { return make_float3(float(a.x), float(a.y), float(a.z)); } inline __host__ __device__ float3 make_float3(uint3 a) { return make_float3(float(a.x), float(a.y), float(a.z)); } inline __host__ __device__ int3 make_int3(int s) { return make_int3(s, s, s); } inline __host__ __device__ int3 make_int3(int2 a) { return make_int3(a.x, a.y, 0); } inline __host__ __device__ int3 make_int3(int2 a, int s) { return make_int3(a.x, a.y, s); } inline __host__ __device__ int3 make_int3(uint3 a) { return make_int3(int(a.x), int(a.y), int(a.z)); } inline __host__ __device__ int3 make_int3(float3 a) { return make_int3(int(a.x), int(a.y), int(a.z)); } inline __host__ __device__ uint3 make_uint3(uint s) { return make_uint3(s, s, s); } inline __host__ __device__ uint3 make_uint3(uint2 a) { return make_uint3(a.x, a.y, 0); } inline __host__ __device__ uint3 make_uint3(uint2 a, uint s) { return make_uint3(a.x, a.y, s); } inline __host__ __device__ uint3 make_uint3(uint4 a) { return make_uint3(a.x, a.y, a.z); } inline __host__ __device__ uint3 make_uint3(int3 a) { return make_uint3(uint(a.x), uint(a.y), uint(a.z)); } inline __host__ __device__ float4 make_float4(float s) { return make_float4(s, s, s, s); } inline __host__ __device__ float4 make_float4(float3 a) { return make_float4(a.x, a.y, a.z, 0.0f); } inline __host__ __device__ float4 make_float4(float3 a, float w) { return make_float4(a.x, a.y, a.z, w); } inline __host__ __device__ float4 make_float4(int4 a) { return make_float4(float(a.x), float(a.y), float(a.z), float(a.w)); } inline __host__ __device__ float4 make_float4(uint4 a) { return make_float4(float(a.x), float(a.y), float(a.z), float(a.w)); } inline __host__ __device__ int4 make_int4(int s) { return make_int4(s, s, s, s); } inline __host__ __device__ int4 make_int4(int3 a) { return make_int4(a.x, a.y, a.z, 0); } inline __host__ __device__ int4 make_int4(int3 a, int w) { return make_int4(a.x, a.y, a.z, w); } inline __host__ __device__ int4 make_int4(uint4 a) { return make_int4(int(a.x), int(a.y), int(a.z), int(a.w)); } inline __host__ __device__ int4 make_int4(float4 a) { return make_int4(int(a.x), int(a.y), int(a.z), int(a.w)); } inline __host__ __device__ uint4 make_uint4(uint s) { return make_uint4(s, s, s, s); } inline __host__ __device__ uint4 make_uint4(uint3 a) { return make_uint4(a.x, a.y, a.z, 0); } inline __host__ __device__ uint4 make_uint4(uint3 a, uint w) { return make_uint4(a.x, a.y, a.z, w); } inline __host__ __device__ uint4 make_uint4(int4 a) { return make_uint4(uint(a.x), uint(a.y), uint(a.z), uint(a.w)); } //////////////////////////////////////////////////////////////////////////////// // negate //////////////////////////////////////////////////////////////////////////////// inline __host__ __device__ float2 operator-(float2 &a) { return make_float2(-a.x, -a.y); } inline __host__ __device__ int2 operator-(int2 &a) { return make_int2(-a.x, -a.y); } inline __host__ __device__ float3 operator-(float3 &a) { return make_float3(-a.x, -a.y, -a.z); } inline __host__ __device__ int3 operator-(int3 &a) { return make_int3(-a.x, -a.y, -a.z); } inline __host__ __device__ float4 operator-(float4 &a) { return make_float4(-a.x, -a.y, -a.z, -a.w); } inline __host__ __device__ int4 operator-(int4 &a) { return make_int4(-a.x, -a.y, -a.z, -a.w); } //////////////////////////////////////////////////////////////////////////////// // addition //////////////////////////////////////////////////////////////////////////////// inline __host__ __device__ float2 operator+(float2 a, float2 b) { return make_float2(a.x + b.x, a.y + b.y); } inline __host__ __device__ void operator+=(float2 &a, float2 b) { a.x += b.x; a.y += b.y; } inline __host__ __device__ float2 operator+(float2 a, float b) { return make_float2(a.x + b, a.y + b); } inline __host__ __device__ float2 operator+(float b, float2 a) { return make_float2(a.x + b, a.y + b); } inline __host__ __device__ void operator+=(float2 &a, float b) { a.x += b; a.y += b; } inline __host__ __device__ int2 operator+(int2 a, int2 b) { return make_int2(a.x + b.x, a.y + b.y); } inline __host__ __device__ void operator+=(int2 &a, int2 b) { a.x += b.x; a.y += b.y; } inline __host__ __device__ int2 operator+(int2 a, int b) { return make_int2(a.x + b, a.y + b); } inline __host__ __device__ int2 operator+(int b, int2 a) { return make_int2(a.x + b, a.y + b); } inline __host__ __device__ void operator+=(int2 &a, int b) { a.x += b; a.y += b; } inline __host__ __device__ uint2 operator+(uint2 a, uint2 b) { return make_uint2(a.x + b.x, a.y + b.y); } inline __host__ __device__ void operator+=(uint2 &a, uint2 b) { a.x += b.x; a.y += b.y; } inline __host__ __device__ uint2 operator+(uint2 a, uint b) { return make_uint2(a.x + b, a.y + b); } inline __host__ __device__ uint2 operator+(uint b, uint2 a) { return make_uint2(a.x + b, a.y + b); } inline __host__ __device__ void operator+=(uint2 &a, uint b) { a.x += b; a.y += b; } inline __host__ __device__ float3 operator+(float3 a, float3 b) { return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); } inline __host__ __device__ void operator+=(float3 &a, float3 b) { a.x += b.x; a.y += b.y; a.z += b.z; } inline __host__ __device__ float3 operator+(float3 a, float b) { return make_float3(a.x + b, a.y + b, a.z + b); } inline __host__ __device__ void operator+=(float3 &a, float b) { a.x += b; a.y += b; a.z += b; } inline __host__ __device__ int3 operator+(int3 a, int3 b) { return make_int3(a.x + b.x, a.y + b.y, a.z + b.z); } inline __host__ __device__ void operator+=(int3 &a, int3 b) { a.x += b.x; a.y += b.y; a.z += b.z; } inline __host__ __device__ int3 operator+(int3 a, int b) { return make_int3(a.x + b, a.y + b, a.z + b); } inline __host__ __device__ void operator+=(int3 &a, int b) { a.x += b; a.y += b; a.z += b; } inline __host__ __device__ uint3 operator+(uint3 a, uint3 b) { return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z); } inline __host__ __device__ void operator+=(uint3 &a, uint3 b) { a.x += b.x; a.y += b.y; a.z += b.z; } inline __host__ __device__ uint3 operator+(uint3 a, uint b) { return make_uint3(a.x + b, a.y + b, a.z + b); } inline __host__ __device__ void operator+=(uint3 &a, uint b) { a.x += b; a.y += b; a.z += b; } inline __host__ __device__ int3 operator+(int b, int3 a) { return make_int3(a.x + b, a.y + b, a.z + b); } inline __host__ __device__ uint3 operator+(uint b, uint3 a) { return make_uint3(a.x + b, a.y + b, a.z + b); } inline __host__ __device__ float3 operator+(float b, float3 a) { return make_float3(a.x + b, a.y + b, a.z + b); } inline __host__ __device__ float4 operator+(float4 a, float4 b) { return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); } inline __host__ __device__ void operator+=(float4 &a, float4 b) { a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w; } inline __host__ __device__ float4 operator+(float4 a, float b) { return make_float4(a.x + b, a.y + b, a.z + b, a.w + b); } inline __host__ __device__ float4 operator+(float b, float4 a) { return make_float4(a.x + b, a.y + b, a.z + b, a.w + b); } inline __host__ __device__ void operator+=(float4 &a, float b) { a.x += b; a.y += b; a.z += b; a.w += b; } inline __host__ __device__ int4 operator+(int4 a, int4 b) { return make_int4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); } inline __host__ __device__ void operator+=(int4 &a, int4 b) { a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w; } inline __host__ __device__ int4 operator+(int4 a, int b) { return make_int4(a.x + b, a.y + b, a.z + b, a.w + b); } inline __host__ __device__ int4 operator+(int b, int4 a) { return make_int4(a.x + b, a.y + b, a.z + b, a.w + b); } inline __host__ __device__ void operator+=(int4 &a, int b) { a.x += b; a.y += b; a.z += b; a.w += b; } inline __host__ __device__ uint4 operator+(uint4 a, uint4 b) { return make_uint4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); } inline __host__ __device__ void operator+=(uint4 &a, uint4 b) { a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w; } inline __host__ __device__ uint4 operator+(uint4 a, uint b) { return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b); } inline __host__ __device__ uint4 operator+(uint b, uint4 a) { return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b); } inline __host__ __device__ void operator+=(uint4 &a, uint b) { a.x += b; a.y += b; a.z += b; a.w += b; } //////////////////////////////////////////////////////////////////////////////// // subtract //////////////////////////////////////////////////////////////////////////////// inline __host__ __device__ float2 operator-(float2 a, float2 b) { return make_float2(a.x - b.x, a.y - b.y); } inline __host__ __device__ void operator-=(float2 &a, float2 b) { a.x -= b.x; a.y -= b.y; } inline __host__ __device__ float2 operator-(float2 a, float b) { return make_float2(a.x - b, a.y - b); } inline __host__ __device__ float2 operator-(float b, float2 a) { return make_float2(b - a.x, b - a.y); } inline __host__ __device__ void operator-=(float2 &a, float b) { a.x -= b; a.y -= b; } inline __host__ __device__ int2 operator-(int2 a, int2 b) { return make_int2(a.x - b.x, a.y - b.y); } inline __host__ __device__ void operator-=(int2 &a, int2 b) { a.x -= b.x; a.y -= b.y; } inline __host__ __device__ int2 operator-(int2 a, int b) { return make_int2(a.x - b, a.y - b); } inline __host__ __device__ int2 operator-(int b, int2 a) { return make_int2(b - a.x, b - a.y); } inline __host__ __device__ void operator-=(int2 &a, int b) { a.x -= b; a.y -= b; } inline __host__ __device__ uint2 operator-(uint2 a, uint2 b) { return make_uint2(a.x - b.x, a.y - b.y); } inline __host__ __device__ void operator-=(uint2 &a, uint2 b) { a.x -= b.x; a.y -= b.y; } inline __host__ __device__ uint2 operator-(uint2 a, uint b) { return make_uint2(a.x - b, a.y - b); } inline __host__ __device__ uint2 operator-(uint b, uint2 a) { return make_uint2(b - a.x, b - a.y); } inline __host__ __device__ void operator-=(uint2 &a, uint b) { a.x -= b; a.y -= b; } inline __host__ __device__ float3 operator-(float3 a, float3 b) { return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); } inline __host__ __device__ void operator-=(float3 &a, float3 b) { a.x -= b.x; a.y -= b.y; a.z -= b.z; } inline __host__ __device__ float3 operator-(float3 a, float b) { return make_float3(a.x - b, a.y - b, a.z - b); } inline __host__ __device__ float3 operator-(float b, float3 a) { return make_float3(b - a.x, b - a.y, b - a.z); } inline __host__ __device__ void operator-=(float3 &a, float b) { a.x -= b; a.y -= b; a.z -= b; } inline __host__ __device__ int3 operator-(int3 a, int3 b) { return make_int3(a.x - b.x, a.y - b.y, a.z - b.z); } inline __host__ __device__ void operator-=(int3 &a, int3 b) { a.x -= b.x; a.y -= b.y; a.z -= b.z; } inline __host__ __device__ int3 operator-(int3 a, int b) { return make_int3(a.x - b, a.y - b, a.z - b); } inline __host__ __device__ int3 operator-(int b, int3 a) { return make_int3(b - a.x, b - a.y, b - a.z); } inline __host__ __device__ void operator-=(int3 &a, int b) { a.x -= b; a.y -= b; a.z -= b; } inline __host__ __device__ uint3 operator-(uint3 a, uint3 b) { return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z); } inline __host__ __device__ void operator-=(uint3 &a, uint3 b) { a.x -= b.x; a.y -= b.y; a.z -= b.z; } inline __host__ __device__ uint3 operator-(uint3 a, uint b) { return make_uint3(a.x - b, a.y - b, a.z - b); } inline __host__ __device__ uint3 operator-(uint b, uint3 a) { return make_uint3(b - a.x, b - a.y, b - a.z); } inline __host__ __device__ void operator-=(uint3 &a, uint b) { a.x -= b; a.y -= b; a.z -= b; } inline __host__ __device__ float4 operator-(float4 a, float4 b) { return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); } inline __host__ __device__ void operator-=(float4 &a, float4 b) { a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w; } inline __host__ __device__ float4 operator-(float4 a, float b) { return make_float4(a.x - b, a.y - b, a.z - b, a.w - b); } inline __host__ __device__ void operator-=(float4 &a, float b) { a.x -= b; a.y -= b; a.z -= b; a.w -= b; } inline __host__ __device__ int4 operator-(int4 a, int4 b) { return make_int4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); } inline __host__ __device__ void operator-=(int4 &a, int4 b) { a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w; } inline __host__ __device__ int4 operator-(int4 a, int b) { return make_int4(a.x - b, a.y - b, a.z - b, a.w - b); } inline __host__ __device__ int4 operator-(int b, int4 a) { return make_int4(b - a.x, b - a.y, b - a.z, b - a.w); } inline __host__ __device__ void operator-=(int4 &a, int b) { a.x -= b; a.y -= b; a.z -= b; a.w -= b; } inline __host__ __device__ uint4 operator-(uint4 a, uint4 b) { return make_uint4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); } inline __host__ __device__ void operator-=(uint4 &a, uint4 b) { a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w; } inline __host__ __device__ uint4 operator-(uint4 a, uint b) { return make_uint4(a.x - b, a.y - b, a.z - b, a.w - b); } inline __host__ __device__ uint4 operator-(uint b, uint4 a) { return make_uint4(b - a.x, b - a.y, b - a.z, b - a.w); } inline __host__ __device__ void operator-=(uint4 &a, uint b) { a.x -= b; a.y -= b; a.z -= b; a.w -= b; } //////////////////////////////////////////////////////////////////////////////// // multiply //////////////////////////////////////////////////////////////////////////////// inline __host__ __device__ float2 operator*(float2 a, float2 b) { return make_float2(a.x * b.x, a.y * b.y); } inline __host__ __device__ void operator*=(float2 &a, float2 b) { a.x *= b.x; a.y *= b.y; } inline __host__ __device__ float2 operator*(float2 a, float b) { return make_float2(a.x * b, a.y * b); } inline __host__ __device__ float2 operator*(float b, float2 a) { return make_float2(b * a.x, b * a.y); } inline __host__ __device__ void operator*=(float2 &a, float b) { a.x *= b; a.y *= b; } inline __host__ __device__ int2 operator*(int2 a, int2 b) { return make_int2(a.x * b.x, a.y * b.y); } inline __host__ __device__ void operator*=(int2 &a, int2 b) { a.x *= b.x; a.y *= b.y; } inline __host__ __device__ int2 operator*(int2 a, int b) { return make_int2(a.x * b, a.y * b); } inline __host__ __device__ int2 operator*(int b, int2 a) { return make_int2(b * a.x, b * a.y); } inline __host__ __device__ void operator*=(int2 &a, int b) { a.x *= b; a.y *= b; } inline __host__ __device__ uint2 operator*(uint2 a, uint2 b) { return make_uint2(a.x * b.x, a.y * b.y); } inline __host__ __device__ void operator*=(uint2 &a, uint2 b) { a.x *= b.x; a.y *= b.y; } inline __host__ __device__ uint2 operator*(uint2 a, uint b) { return make_uint2(a.x * b, a.y * b); } inline __host__ __device__ uint2 operator*(uint b, uint2 a) { return make_uint2(b * a.x, b * a.y); } inline __host__ __device__ void operator*=(uint2 &a, uint b) { a.x *= b; a.y *= b; } inline __host__ __device__ float3 operator*(float3 a, float3 b) { return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); } inline __host__ __device__ void operator*=(float3 &a, float3 b) { a.x *= b.x; a.y *= b.y; a.z *= b.z; } inline __host__ __device__ float3 operator*(float3 a, float b) { return make_float3(a.x * b, a.y * b, a.z * b); } inline __host__ __device__ float3 operator*(float b, float3 a) { return make_float3(b * a.x, b * a.y, b * a.z); } inline __host__ __device__ void operator*=(float3 &a, float b) { a.x *= b; a.y *= b; a.z *= b; } inline __host__ __device__ int3 operator*(int3 a, int3 b) { return make_int3(a.x * b.x, a.y * b.y, a.z * b.z); } inline __host__ __device__ void operator*=(int3 &a, int3 b) { a.x *= b.x; a.y *= b.y; a.z *= b.z; } inline __host__ __device__ int3 operator*(int3 a, int b) { return make_int3(a.x * b, a.y * b, a.z * b); } inline __host__ __device__ int3 operator*(int b, int3 a) { return make_int3(b * a.x, b * a.y, b * a.z); } inline __host__ __device__ void operator*=(int3 &a, int b) { a.x *= b; a.y *= b; a.z *= b; } inline __host__ __device__ uint3 operator*(uint3 a, uint3 b) { return make_uint3(a.x * b.x, a.y * b.y, a.z * b.z); } inline __host__ __device__ void operator*=(uint3 &a, uint3 b) { a.x *= b.x; a.y *= b.y; a.z *= b.z; } inline __host__ __device__ uint3 operator*(uint3 a, uint b) { return make_uint3(a.x * b, a.y * b, a.z * b); } inline __host__ __device__ uint3 operator*(uint b, uint3 a) { return make_uint3(b * a.x, b * a.y, b * a.z); } inline __host__ __device__ void operator*=(uint3 &a, uint b) { a.x *= b; a.y *= b; a.z *= b; } inline __host__ __device__ float4 operator*(float4 a, float4 b) { return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); } inline __host__ __device__ void operator*=(float4 &a, float4 b) { a.x *= b.x; a.y *= b.y; a.z *= b.z; a.w *= b.w; } inline __host__ __device__ float4 operator*(float4 a, float b) { return make_float4(a.x * b, a.y * b, a.z * b, a.w * b); } inline __host__ __device__ float4 operator*(float b, float4 a) { return make_float4(b * a.x, b * a.y, b * a.z, b * a.w); } inline __host__ __device__ void operator*=(float4 &a, float b) { a.x *= b; a.y *= b; a.z *= b; a.w *= b; } inline __host__ __device__ int4 operator*(int4 a, int4 b) { return make_int4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); } inline __host__ __device__ void operator*=(int4 &a, int4 b) { a.x *= b.x; a.y *= b.y; a.z *= b.z; a.w *= b.w; } inline __host__ __device__ int4 operator*(int4 a, int b) { return make_int4(a.x * b, a.y * b, a.z * b, a.w * b); } inline __host__ __device__ int4 operator*(int b, int4 a) { return make_int4(b * a.x, b * a.y, b * a.z, b * a.w); } inline __host__ __device__ void operator*=(int4 &a, int b) { a.x *= b; a.y *= b; a.z *= b; a.w *= b; } inline __host__ __device__ uint4 operator*(uint4 a, uint4 b) { return make_uint4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); } inline __host__ __device__ void operator*=(uint4 &a, uint4 b) { a.x *= b.x; a.y *= b.y; a.z *= b.z; a.w *= b.w; } inline __host__ __device__ uint4 operator*(uint4 a, uint b) { return make_uint4(a.x * b, a.y * b, a.z * b, a.w * b); } inline __host__ __device__ uint4 operator*(uint b, uint4 a) { return make_uint4(b * a.x, b * a.y, b * a.z, b * a.w); } inline __host__ __device__ void operator*=(uint4 &a, uint b) { a.x *= b; a.y *= b; a.z *= b; a.w *= b; } //////////////////////////////////////////////////////////////////////////////// // divide //////////////////////////////////////////////////////////////////////////////// inline __host__ __device__ float2 operator/(float2 a, float2 b) { return make_float2(a.x / b.x, a.y / b.y); } inline __host__ __device__ void operator/=(float2 &a, float2 b) { a.x /= b.x; a.y /= b.y; } inline __host__ __device__ float2 operator/(float2 a, float b) { return make_float2(a.x / b, a.y / b); } inline __host__ __device__ void operator/=(float2 &a, float b) { a.x /= b; a.y /= b; } inline __host__ __device__ float2 operator/(float b, float2 a) { return make_float2(b / a.x, b / a.y); } inline __host__ __device__ float3 operator/(float3 a, float3 b) { return make_float3(a.x / b.x, a.y / b.y, a.z / b.z); } inline __host__ __device__ void operator/=(float3 &a, float3 b) { a.x /= b.x; a.y /= b.y; a.z /= b.z; } inline __host__ __device__ float3 operator/(float3 a, float b) { return make_float3(a.x / b, a.y / b, a.z / b); } inline __host__ __device__ void operator/=(float3 &a, float b) { a.x /= b; a.y /= b; a.z /= b; } inline __host__ __device__ float3 operator/(float b, float3 a) { return make_float3(b / a.x, b / a.y, b / a.z); } inline __host__ __device__ float4 operator/(float4 a, float4 b) { return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); } inline __host__ __device__ void operator/=(float4 &a, float4 b) { a.x /= b.x; a.y /= b.y; a.z /= b.z; a.w /= b.w; } inline __host__ __device__ float4 operator/(float4 a, float b) { return make_float4(a.x / b, a.y / b, a.z / b, a.w / b); } inline __host__ __device__ void operator/=(float4 &a, float b) { a.x /= b; a.y /= b; a.z /= b; a.w /= b; } inline __host__ __device__ float4 operator/(float b, float4 a) { return make_float4(b / a.x, b / a.y, b / a.z, b / a.w); } //////////////////////////////////////////////////////////////////////////////// // min //////////////////////////////////////////////////////////////////////////////// inline __host__ __device__ float2 fminf(float2 a, float2 b) { return make_float2(fminf(a.x, b.x), fminf(a.y, b.y)); } inline __host__ __device__ float3 fminf(float3 a, float3 b) { return make_float3(fminf(a.x, b.x), fminf(a.y, b.y), fminf(a.z, b.z)); } inline __host__ __device__ float4 fminf(float4 a, float4 b) { return make_float4(fminf(a.x, b.x), fminf(a.y, b.y), fminf(a.z, b.z), fminf(a.w, b.w)); } inline __host__ __device__ int2 min(int2 a, int2 b) { return make_int2(min(a.x, b.x), min(a.y, b.y)); } inline __host__ __device__ int3 min(int3 a, int3 b) { return make_int3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); } inline __host__ __device__ int4 min(int4 a, int4 b) { return make_int4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); } inline __host__ __device__ uint2 min(uint2 a, uint2 b) { return make_uint2(min(a.x, b.x), min(a.y, b.y)); } inline __host__ __device__ uint3 min(uint3 a, uint3 b) { return make_uint3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); } inline __host__ __device__ uint4 min(uint4 a, uint4 b) { return make_uint4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); } //////////////////////////////////////////////////////////////////////////////// // max //////////////////////////////////////////////////////////////////////////////// inline __host__ __device__ float2 fmaxf(float2 a, float2 b) { return make_float2(fmaxf(a.x, b.x), fmaxf(a.y, b.y)); } inline __host__ __device__ float3 fmaxf(float3 a, float3 b) { return make_float3(fmaxf(a.x, b.x), fmaxf(a.y, b.y), fmaxf(a.z, b.z)); } inline __host__ __device__ float4 fmaxf(float4 a, float4 b) { return make_float4(fmaxf(a.x, b.x), fmaxf(a.y, b.y), fmaxf(a.z, b.z), fmaxf(a.w, b.w)); } inline __host__ __device__ int2 max(int2 a, int2 b) { return make_int2(max(a.x, b.x), max(a.y, b.y)); } inline __host__ __device__ int3 max(int3 a, int3 b) { return make_int3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); } inline __host__ __device__ int4 max(int4 a, int4 b) { return make_int4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); } inline __host__ __device__ uint2 max(uint2 a, uint2 b) { return make_uint2(max(a.x, b.x), max(a.y, b.y)); } inline __host__ __device__ uint3 max(uint3 a, uint3 b) { return make_uint3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); } inline __host__ __device__ uint4 max(uint4 a, uint4 b) { return make_uint4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); } //////////////////////////////////////////////////////////////////////////////// // lerp // - linear interpolation between a and b, based on value t in [0, 1] range //////////////////////////////////////////////////////////////////////////////// inline __device__ __host__ float lerp(float a, float b, float t) { return a + t * (b - a); } inline __device__ __host__ float2 lerp(float2 a, float2 b, float t) { return a + t * (b - a); } inline __device__ __host__ float3 lerp(float3 a, float3 b, float t) { return a + t * (b - a); } inline __device__ __host__ float4 lerp(float4 a, float4 b, float t) { return a + t * (b - a); } //////////////////////////////////////////////////////////////////////////////// // clamp // - clamp the value v to be in the range [a, b] //////////////////////////////////////////////////////////////////////////////// inline __device__ __host__ float clamp(float f, float a, float b) { return fmaxf(a, fminf(f, b)); } inline __device__ __host__ int clamp(int f, int a, int b) { return max(a, min(f, b)); } inline __device__ __host__ uint clamp(uint f, uint a, uint b) { return max(a, min(f, b)); } inline __device__ __host__ float2 clamp(float2 v, float a, float b) { return make_float2(clamp(v.x, a, b), clamp(v.y, a, b)); } inline __device__ __host__ float2 clamp(float2 v, float2 a, float2 b) { return make_float2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y)); } inline __device__ __host__ float3 clamp(float3 v, float a, float b) { return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); } inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b) { return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z)); } inline __device__ __host__ float4 clamp(float4 v, float a, float b) { return make_float4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b)); } inline __device__ __host__ float4 clamp(float4 v, float4 a, float4 b) { return make_float4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w)); } inline __device__ __host__ int2 clamp(int2 v, int a, int b) { return make_int2(clamp(v.x, a, b), clamp(v.y, a, b)); } inline __device__ __host__ int2 clamp(int2 v, int2 a, int2 b) { return make_int2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y)); } inline __device__ __host__ int3 clamp(int3 v, int a, int b) { return make_int3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); } inline __device__ __host__ int3 clamp(int3 v, int3 a, int3 b) { return make_int3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z)); } inline __device__ __host__ int4 clamp(int4 v, int a, int b) { return make_int4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b)); } inline __device__ __host__ int4 clamp(int4 v, int4 a, int4 b) { return make_int4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w)); } inline __device__ __host__ uint2 clamp(uint2 v, uint a, uint b) { return make_uint2(clamp(v.x, a, b), clamp(v.y, a, b)); } inline __device__ __host__ uint2 clamp(uint2 v, uint2 a, uint2 b) { return make_uint2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y)); } inline __device__ __host__ uint3 clamp(uint3 v, uint a, uint b) { return make_uint3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); } inline __device__ __host__ uint3 clamp(uint3 v, uint3 a, uint3 b) { return make_uint3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z)); } inline __device__ __host__ uint4 clamp(uint4 v, uint a, uint b) { return make_uint4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b)); } inline __device__ __host__ uint4 clamp(uint4 v, uint4 a, uint4 b) { return make_uint4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w)); } //////////////////////////////////////////////////////////////////////////////// // dot product //////////////////////////////////////////////////////////////////////////////// inline __host__ __device__ float dot(float2 a, float2 b) { return a.x * b.x + a.y * b.y; } inline __host__ __device__ float dot(float3 a, float3 b) { return a.x * b.x + a.y * b.y + a.z * b.z; } inline __host__ __device__ float dot(float4 a, float4 b) { return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; } inline __host__ __device__ int dot(int2 a, int2 b) { return a.x * b.x + a.y * b.y; } inline __host__ __device__ int dot(int3 a, int3 b) { return a.x * b.x + a.y * b.y + a.z * b.z; } inline __host__ __device__ int dot(int4 a, int4 b) { return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; } inline __host__ __device__ uint dot(uint2 a, uint2 b) { return a.x * b.x + a.y * b.y; } inline __host__ __device__ uint dot(uint3 a, uint3 b) { return a.x * b.x + a.y * b.y + a.z * b.z; } inline __host__ __device__ uint dot(uint4 a, uint4 b) { return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; } //////////////////////////////////////////////////////////////////////////////// // length //////////////////////////////////////////////////////////////////////////////// inline __host__ __device__ float length(float2 v) { return sqrtf(dot(v, v)); } inline __host__ __device__ float length(float3 v) { return sqrtf(dot(v, v)); } inline __host__ __device__ float length(float4 v) { return sqrtf(dot(v, v)); } //////////////////////////////////////////////////////////////////////////////// // normalize //////////////////////////////////////////////////////////////////////////////// inline __host__ __device__ float2 normalize(float2 v) { float invLen = rsqrtf(dot(v, v)); return v * invLen; } inline __host__ __device__ float3 normalize(float3 v) { float invLen = rsqrtf(dot(v, v)); return v * invLen; } inline __host__ __device__ float4 normalize(float4 v) { float invLen = rsqrtf(dot(v, v)); return v * invLen; } //////////////////////////////////////////////////////////////////////////////// // floor //////////////////////////////////////////////////////////////////////////////// inline __host__ __device__ float2 floorf(float2 v) { return make_float2(floorf(v.x), floorf(v.y)); } inline __host__ __device__ float3 floorf(float3 v) { return make_float3(floorf(v.x), floorf(v.y), floorf(v.z)); } inline __host__ __device__ float4 floorf(float4 v) { return make_float4(floorf(v.x), floorf(v.y), floorf(v.z), floorf(v.w)); } //////////////////////////////////////////////////////////////////////////////// // frac - returns the fractional portion of a scalar or each vector component //////////////////////////////////////////////////////////////////////////////// inline __host__ __device__ float fracf(float v) { return v - floorf(v); } inline __host__ __device__ float2 fracf(float2 v) { return make_float2(fracf(v.x), fracf(v.y)); } inline __host__ __device__ float3 fracf(float3 v) { return make_float3(fracf(v.x), fracf(v.y), fracf(v.z)); } inline __host__ __device__ float4 fracf(float4 v) { return make_float4(fracf(v.x), fracf(v.y), fracf(v.z), fracf(v.w)); } //////////////////////////////////////////////////////////////////////////////// // fmod //////////////////////////////////////////////////////////////////////////////// inline __host__ __device__ float2 fmodf(float2 a, float2 b) { return make_float2(fmodf(a.x, b.x), fmodf(a.y, b.y)); } inline __host__ __device__ float3 fmodf(float3 a, float3 b) { return make_float3(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z)); } inline __host__ __device__ float4 fmodf(float4 a, float4 b) { return make_float4(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z), fmodf(a.w, b.w)); } //////////////////////////////////////////////////////////////////////////////// // absolute value //////////////////////////////////////////////////////////////////////////////// inline __host__ __device__ float2 fabs(float2 v) { return make_float2(fabs(v.x), fabs(v.y)); } inline __host__ __device__ float3 fabs(float3 v) { return make_float3(fabs(v.x), fabs(v.y), fabs(v.z)); } inline __host__ __device__ float4 fabs(float4 v) { return make_float4(fabs(v.x), fabs(v.y), fabs(v.z), fabs(v.w)); } inline __host__ __device__ int2 abs(int2 v) { return make_int2(abs(v.x), abs(v.y)); } inline __host__ __device__ int3 abs(int3 v) { return make_int3(abs(v.x), abs(v.y), abs(v.z)); } inline __host__ __device__ int4 abs(int4 v) { return make_int4(abs(v.x), abs(v.y), abs(v.z), abs(v.w)); } //////////////////////////////////////////////////////////////////////////////// // reflect // - returns reflection of incident ray I around surface normal N // - N should be normalized, reflected vector's length is equal to length of I //////////////////////////////////////////////////////////////////////////////// inline __host__ __device__ float3 reflect(float3 i, float3 n) { return i - 2.0f * n * dot(n, i); } //////////////////////////////////////////////////////////////////////////////// // cross product //////////////////////////////////////////////////////////////////////////////// inline __host__ __device__ float3 cross(float3 a, float3 b) { return make_float3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x); } //////////////////////////////////////////////////////////////////////////////// // smoothstep // - returns 0 if x < a // - returns 1 if x > b // - otherwise returns smooth interpolation between 0 and 1 based on x //////////////////////////////////////////////////////////////////////////////// inline __device__ __host__ float smoothstep(float a, float b, float x) { float y = clamp((x - a) / (b - a), 0.0f, 1.0f); return (y * y * (3.0f - (2.0f * y))); } inline __device__ __host__ float2 smoothstep(float2 a, float2 b, float2 x) { float2 y = clamp((x - a) / (b - a), 0.0f, 1.0f); return (y * y * (make_float2(3.0f) - (make_float2(2.0f) * y))); } inline __device__ __host__ float3 smoothstep(float3 a, float3 b, float3 x) { float3 y = clamp((x - a) / (b - a), 0.0f, 1.0f); return (y * y * (make_float3(3.0f) - (make_float3(2.0f) * y))); } inline __device__ __host__ float4 smoothstep(float4 a, float4 b, float4 x) { float4 y = clamp((x - a) / (b - a), 0.0f, 1.0f); return (y * y * (make_float4(3.0f) - (make_float4(2.0f) * y))); } //////////////////////////////////////////////////////////////////////////////// // sign //////////////////////////////////////////////////////////////////////////////// inline __device__ __host__ float3 sign(float3 a) { return make_float3( copysignf(1.0f, a.x), copysignf(1.0f, a.y), copysignf(1.0f, a.z)); } #endif ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/intersection.cu ================================================ /* * Copyright (c) 2022 Ruilong Li, UC Berkeley. */ #include "include/helpers_cuda.h" template inline __host__ __device__ void _swap(scalar_t &a, scalar_t &b) { scalar_t c = a; a = b; b = c; } template inline __host__ __device__ void _ray_aabb_intersect( const scalar_t *rays_o, const scalar_t *rays_d, const scalar_t *aabb, scalar_t *near, scalar_t *far) { // aabb is [xmin, ymin, zmin, xmax, ymax, zmax] scalar_t tmin = (aabb[0] - rays_o[0]) / rays_d[0]; scalar_t tmax = (aabb[3] - rays_o[0]) / rays_d[0]; if (tmin > tmax) _swap(tmin, tmax); scalar_t tymin = (aabb[1] - rays_o[1]) / rays_d[1]; scalar_t tymax = (aabb[4] - rays_o[1]) / rays_d[1]; if (tymin > tymax) _swap(tymin, tymax); if (tmin > tymax || tymin > tmax) { *near = 1e10; *far = 1e10; return; } if (tymin > tmin) tmin = tymin; if (tymax < tmax) tmax = tymax; scalar_t tzmin = (aabb[2] - rays_o[2]) / rays_d[2]; scalar_t tzmax = (aabb[5] - rays_o[2]) / rays_d[2]; if (tzmin > tzmax) _swap(tzmin, tzmax); if (tmin > tzmax || tzmin > tmax) { *near = 1e10; *far = 1e10; return; } if (tzmin > tmin) tmin = tzmin; if (tzmax < tmax) tmax = tzmax; *near = tmin; *far = tmax; return; } template __global__ void ray_aabb_intersect_kernel( const int N, const scalar_t *rays_o, const scalar_t *rays_d, const scalar_t *aabb, scalar_t *t_min, scalar_t *t_max) { // aabb is [xmin, ymin, zmin, xmax, ymax, zmax] CUDA_GET_THREAD_ID(thread_id, N); // locate rays_o += thread_id * 3; rays_d += thread_id * 3; t_min += thread_id; t_max += thread_id; _ray_aabb_intersect(rays_o, rays_d, aabb, t_min, t_max); scalar_t zero = static_cast(0.f); *t_min = *t_min > zero ? *t_min : zero; return; } /** * @brief Ray AABB Test * * @param rays_o Ray origins. Tensor with shape [N, 3]. * @param rays_d Normalized ray directions. Tensor with shape [N, 3]. * @param aabb Scene AABB [xmin, ymin, zmin, xmax, ymax, zmax]. Tensor with shape [6]. * @return std::vector * Ray AABB intersection {t_min, t_max} with shape [N] respectively. Note the t_min is * clipped to minimum zero. 1e10 is returned if no intersection. */ std::vector ray_aabb_intersect( const torch::Tensor rays_o, const torch::Tensor rays_d, const torch::Tensor aabb) { DEVICE_GUARD(rays_o); CHECK_INPUT(rays_o); CHECK_INPUT(rays_d); CHECK_INPUT(aabb); TORCH_CHECK(rays_o.ndimension() == 2 & rays_o.size(1) == 3) TORCH_CHECK(rays_d.ndimension() == 2 & rays_d.size(1) == 3) TORCH_CHECK(aabb.ndimension() == 1 & aabb.size(0) == 6) const int N = rays_o.size(0); const int threads = 256; const int blocks = CUDA_N_BLOCKS_NEEDED(N, threads); torch::Tensor t_min = torch::empty({N}, rays_o.options()); torch::Tensor t_max = torch::empty({N}, rays_o.options()); AT_DISPATCH_FLOATING_TYPES_AND_HALF( rays_o.scalar_type(), "ray_aabb_intersect", ([&] { ray_aabb_intersect_kernel<<>>( N, rays_o.data_ptr(), rays_d.data_ptr(), aabb.data_ptr(), t_min.data_ptr(), t_max.data_ptr()); })); return {t_min, t_max}; } ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/pack.cu ================================================ /* * Copyright (c) 2022 Ruilong Li, UC Berkeley. */ #include "include/helpers_cuda.h" __global__ void unpack_info_kernel( // input const int n_rays, const int *packed_info, // output int64_t *ray_indices) { CUDA_GET_THREAD_ID(i, n_rays); // locate const int base = packed_info[i * 2 + 0]; // point idx start. const int steps = packed_info[i * 2 + 1]; // point idx shift. if (steps == 0) return; ray_indices += base; for (int j = 0; j < steps; ++j) { ray_indices[j] = i; } } __global__ void unpack_info_to_mask_kernel( // input const int n_rays, const int *packed_info, const int n_samples, // output bool *masks) // [n_rays, n_samples] { CUDA_GET_THREAD_ID(i, n_rays); // locate const int base = packed_info[i * 2 + 0]; // point idx start. const int steps = packed_info[i * 2 + 1]; // point idx shift. if (steps == 0) return; masks += i * n_samples; for (int j = 0; j < steps; ++j) { masks[j] = true; } } template __global__ void unpack_data_kernel( const uint32_t n_rays, const int *packed_info, // input ray & point indices. const int data_dim, const scalar_t *data, const int n_sampler_per_ray, scalar_t *unpacked_data) // (n_rays, n_sampler_per_ray, data_dim) { CUDA_GET_THREAD_ID(i, n_rays); // locate const int base = packed_info[i * 2 + 0]; // point idx start. const int steps = packed_info[i * 2 + 1]; // point idx shift. if (steps == 0) return; data += base * data_dim; unpacked_data += i * n_sampler_per_ray * data_dim; for (int j = 0; j < steps; j++) { for (int k = 0; k < data_dim; k++) { unpacked_data[j * data_dim + k] = data[j * data_dim + k]; } } return; } torch::Tensor unpack_info(const torch::Tensor packed_info, const int n_samples) { DEVICE_GUARD(packed_info); CHECK_INPUT(packed_info); const int n_rays = packed_info.size(0); const int threads = 256; const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads); // int n_samples = packed_info[n_rays - 1].sum(0).item(); torch::Tensor ray_indices = torch::empty( {n_samples}, packed_info.options().dtype(torch::kLong)); unpack_info_kernel<<>>( n_rays, packed_info.data_ptr(), ray_indices.data_ptr()); return ray_indices; } torch::Tensor unpack_info_to_mask( const torch::Tensor packed_info, const int n_samples) { DEVICE_GUARD(packed_info); CHECK_INPUT(packed_info); const int n_rays = packed_info.size(0); const int threads = 256; const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads); torch::Tensor masks = torch::zeros( {n_rays, n_samples}, packed_info.options().dtype(torch::kBool)); unpack_info_to_mask_kernel<<>>( n_rays, packed_info.data_ptr(), n_samples, masks.data_ptr()); return masks; } torch::Tensor unpack_data( torch::Tensor packed_info, torch::Tensor data, int n_samples_per_ray) { DEVICE_GUARD(packed_info); CHECK_INPUT(packed_info); CHECK_INPUT(data); TORCH_CHECK(packed_info.ndimension() == 2 & packed_info.size(1) == 2); TORCH_CHECK(data.ndimension() == 2); const int n_rays = packed_info.size(0); const int n_samples = data.size(0); const int data_dim = data.size(1); const int threads = 256; const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads); torch::Tensor unpacked_data = torch::zeros( {n_rays, n_samples_per_ray, data_dim}, data.options()); AT_DISPATCH_ALL_TYPES( data.scalar_type(), "unpack_data", ([&] { unpack_data_kernel<<>>( n_rays, // inputs packed_info.data_ptr(), data_dim, data.data_ptr(), n_samples_per_ray, // outputs unpacked_data.data_ptr()); })); return unpacked_data; } ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/pybind.cu ================================================ /* * Copyright (c) 2022 Ruilong Li, UC Berkeley. */ #include "include/helpers_cuda.h" #include "include/helpers_math.h" #include "include/helpers_contraction.h" std::vector ray_aabb_intersect( const torch::Tensor rays_o, const torch::Tensor rays_d, const torch::Tensor aabb); std::vector ray_marching( // rays const torch::Tensor rays_o, const torch::Tensor rays_d, const torch::Tensor t_min, const torch::Tensor t_max, // occupancy grid & contraction const torch::Tensor roi, const torch::Tensor grid_binary, const ContractionType type, // sampling const float step_size, const float cone_angle); torch::Tensor unpack_info( const torch::Tensor packed_info, const int n_samples); torch::Tensor unpack_info_to_mask( const torch::Tensor packed_info, const int n_samples); torch::Tensor grid_query( const torch::Tensor samples, // occupancy grid & contraction const torch::Tensor roi, const torch::Tensor grid_value, const ContractionType type); torch::Tensor contract( const torch::Tensor samples, // contraction const torch::Tensor roi, const ContractionType type); torch::Tensor contract_inv( const torch::Tensor samples, // contraction const torch::Tensor roi, const ContractionType type); std::vector ray_resampling( torch::Tensor packed_info, torch::Tensor starts, torch::Tensor ends, torch::Tensor weights, const int steps); torch::Tensor unpack_data( torch::Tensor packed_info, torch::Tensor data, int n_samples_per_ray); // cub implementations: parallel across samples bool is_cub_available() { return (bool) CUB_SUPPORTS_SCAN_BY_KEY(); } torch::Tensor transmittance_from_sigma_forward_cub( torch::Tensor ray_indices, torch::Tensor starts, torch::Tensor ends, torch::Tensor sigmas); torch::Tensor transmittance_from_sigma_backward_cub( torch::Tensor ray_indices, torch::Tensor starts, torch::Tensor ends, torch::Tensor transmittance, torch::Tensor transmittance_grad); torch::Tensor transmittance_from_alpha_forward_cub( torch::Tensor ray_indices, torch::Tensor alphas); torch::Tensor transmittance_from_alpha_backward_cub( torch::Tensor ray_indices, torch::Tensor alphas, torch::Tensor transmittance, torch::Tensor transmittance_grad); // naive implementations: parallel across rays torch::Tensor transmittance_from_sigma_forward_naive( torch::Tensor packed_info, torch::Tensor starts, torch::Tensor ends, torch::Tensor sigmas); torch::Tensor transmittance_from_sigma_backward_naive( torch::Tensor packed_info, torch::Tensor starts, torch::Tensor ends, torch::Tensor transmittance, torch::Tensor transmittance_grad); torch::Tensor transmittance_from_alpha_forward_naive( torch::Tensor packed_info, torch::Tensor alphas); torch::Tensor transmittance_from_alpha_backward_naive( torch::Tensor packed_info, torch::Tensor alphas, torch::Tensor transmittance, torch::Tensor transmittance_grad); torch::Tensor weight_from_sigma_forward_naive( torch::Tensor packed_info, torch::Tensor starts, torch::Tensor ends, torch::Tensor sigmas); torch::Tensor weight_from_sigma_backward_naive( torch::Tensor weights, torch::Tensor grad_weights, torch::Tensor packed_info, torch::Tensor starts, torch::Tensor ends, torch::Tensor sigmas); torch::Tensor weight_from_alpha_forward_naive( torch::Tensor packed_info, torch::Tensor alphas); torch::Tensor weight_from_alpha_backward_naive( torch::Tensor weights, torch::Tensor grad_weights, torch::Tensor packed_info, torch::Tensor alphas); torch::Tensor weight_from_alpha_patch_based_forward_naive( torch::Tensor packed_info, torch::Tensor alphas); torch::Tensor weight_from_alpha_patch_based_backward_naive( torch::Tensor weights, torch::Tensor grad_weights, torch::Tensor packed_info, torch::Tensor alphas); std::vector weight_and_transmittance_from_alpha_patch_based_forward_naive( torch::Tensor packed_info, // (n_patches, 2) torch::Tensor alphas // (n_samples, patches_size, 1) ); torch::Tensor weight_and_transmittance_from_alpha_patch_based_backward_naive( torch::Tensor weights, torch::Tensor grad_weights, // (n_samples, patches_size, 1) torch::Tensor packed_info, torch::Tensor alphas); torch::Tensor transmittance_from_alpha_patch_based_forward_naive( torch::Tensor packed_info, torch::Tensor alphas); torch::Tensor transmittance_from_alpha_patch_based_backward_naive( torch::Tensor packed_info, torch::Tensor alphas, torch::Tensor transmittance, torch::Tensor transmittance_grad); PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { // contraction py::enum_(m, "ContractionType") .value("AABB", ContractionType::AABB) .value("UN_BOUNDED_TANH", ContractionType::UN_BOUNDED_TANH) .value("UN_BOUNDED_SPHERE", ContractionType::UN_BOUNDED_SPHERE); m.def("contract", &contract); m.def("contract_inv", &contract_inv); // grid m.def("grid_query", &grid_query); // marching m.def("ray_aabb_intersect", &ray_aabb_intersect); m.def("ray_marching", &ray_marching); m.def("ray_resampling", &ray_resampling); // rendering m.def("is_cub_available", is_cub_available); m.def("transmittance_from_sigma_forward_cub", transmittance_from_sigma_forward_cub); m.def("transmittance_from_sigma_backward_cub", transmittance_from_sigma_backward_cub); m.def("transmittance_from_alpha_forward_cub", transmittance_from_alpha_forward_cub); m.def("transmittance_from_alpha_backward_cub", transmittance_from_alpha_backward_cub); m.def("transmittance_from_sigma_forward_naive", transmittance_from_sigma_forward_naive); m.def("transmittance_from_sigma_backward_naive", transmittance_from_sigma_backward_naive); m.def("transmittance_from_alpha_forward_naive", transmittance_from_alpha_forward_naive); m.def("transmittance_from_alpha_backward_naive", transmittance_from_alpha_backward_naive); m.def("weight_from_sigma_forward_naive", weight_from_sigma_forward_naive); m.def("weight_from_sigma_backward_naive", weight_from_sigma_backward_naive); m.def("weight_from_alpha_forward_naive", weight_from_alpha_forward_naive); m.def("weight_from_alpha_backward_naive", weight_from_alpha_backward_naive); m.def("weight_from_alpha_patch_based_forward_naive", weight_from_alpha_patch_based_forward_naive); m.def("weight_from_alpha_patch_based_backward_naive", weight_from_alpha_patch_based_backward_naive); m.def("weight_and_transmittance_from_alpha_patch_based_forward_naive", weight_and_transmittance_from_alpha_patch_based_forward_naive); m.def("weight_and_transmittance_from_alpha_patch_based_backward_naive", weight_and_transmittance_from_alpha_patch_based_backward_naive); m.def("transmittance_from_alpha_patch_based_forward_naive", transmittance_from_alpha_patch_based_forward_naive); m.def("transmittance_from_alpha_patch_based_backward_naive", transmittance_from_alpha_patch_based_backward_naive); // pack & unpack m.def("unpack_data", &unpack_data); m.def("unpack_info", &unpack_info); m.def("unpack_info_to_mask", &unpack_info_to_mask); } ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/ray_marching.cu ================================================ /* * Copyright (c) 2022 Ruilong Li, UC Berkeley. */ #include "include/helpers_cuda.h" #include "include/helpers_math.h" #include "include/helpers_contraction.h" inline __device__ __host__ float calc_dt( const float t, const float cone_angle, const float dt_min, const float dt_max) { return clamp(t * cone_angle, dt_min, dt_max); } inline __device__ __host__ int grid_idx_at( const float3 xyz_unit, const int3 grid_res) { // xyz should be always in [0, 1]^3. int3 ixyz = make_int3(xyz_unit * make_float3(grid_res)); ixyz = clamp(ixyz, make_int3(0, 0, 0), grid_res - 1); int3 grid_offset = make_int3(grid_res.y * grid_res.z, grid_res.z, 1); int idx = dot(ixyz, grid_offset); return idx; } template inline __device__ __host__ scalar_t grid_occupied_at( const float3 xyz, const float3 roi_min, const float3 roi_max, ContractionType type, const int3 grid_res, const scalar_t *grid_value) { if (type == ContractionType::AABB && (xyz.x < roi_min.x || xyz.x > roi_max.x || xyz.y < roi_min.y || xyz.y > roi_max.y || xyz.z < roi_min.z || xyz.z > roi_max.z)) { return false; } float3 xyz_unit = apply_contraction( xyz, roi_min, roi_max, type); int idx = grid_idx_at(xyz_unit, grid_res); return grid_value[idx]; } // dda like step inline __device__ __host__ float distance_to_next_voxel( const float3 xyz, const float3 dir, const float3 inv_dir, const float3 roi_min, const float3 roi_max, const int3 grid_res) { float3 _occ_res = make_float3(grid_res); float3 _xyz = roi_to_unit(xyz, roi_min, roi_max) * _occ_res; float3 txyz = ((floorf(_xyz + 0.5f + 0.5f * sign(dir)) - _xyz) * inv_dir) / _occ_res * (roi_max - roi_min); float t = min(min(txyz.x, txyz.y), txyz.z); return fmaxf(t, 0.0f); } inline __device__ __host__ float advance_to_next_voxel( const float t, const float dt_min, const float3 xyz, const float3 dir, const float3 inv_dir, const float3 roi_min, const float3 roi_max, const int3 grid_res, const float far) { // Regular stepping (may be slower but matches non-empty space) float t_target = t + distance_to_next_voxel( xyz, dir, inv_dir, roi_min, roi_max, grid_res); t_target = min(t_target, far); float _t = t; do { _t += dt_min; } while (_t < t_target); return _t; } // ------------------------------------------------------------------------------- // Raymarching // ------------------------------------------------------------------------------- __global__ void ray_marching_kernel( // rays info const uint32_t n_rays, const float *rays_o, // shape (n_rays, 3) const float *rays_d, // shape (n_rays, 3) const float *t_min, // shape (n_rays,) const float *t_max, // shape (n_rays,) // occupancy grid & contraction const float *roi, const int3 grid_res, const bool *grid_binary, // shape (reso_x, reso_y, reso_z) const ContractionType type, // sampling const float step_size, const float cone_angle, const int *packed_info, // first round outputs int *num_steps, // second round outputs int64_t *ray_indices, float *t_starts, float *t_ends) { CUDA_GET_THREAD_ID(i, n_rays); bool is_first_round = (packed_info == nullptr); // locate rays_o += i * 3; rays_d += i * 3; t_min += i; t_max += i; if (is_first_round) { num_steps += i; } else { int base = packed_info[i * 2 + 0]; int steps = packed_info[i * 2 + 1]; t_starts += base; t_ends += base; ray_indices += base; } const float3 origin = make_float3(rays_o[0], rays_o[1], rays_o[2]); const float3 dir = make_float3(rays_d[0], rays_d[1], rays_d[2]); const float3 inv_dir = 1.0f / dir; const float near = t_min[0], far = t_max[0]; const float3 roi_min = make_float3(roi[0], roi[1], roi[2]); const float3 roi_max = make_float3(roi[3], roi[4], roi[5]); // TODO: compute dt_max from occ resolution. float dt_min = step_size; float dt_max = 1e10f; int j = 0; float t0 = near; float dt = calc_dt(t0, cone_angle, dt_min, dt_max); float t1 = t0 + dt; float t_mid = (t0 + t1) * 0.5f; while (t_mid < far) { // current center const float3 xyz = origin + t_mid * dir; if (grid_occupied_at(xyz, roi_min, roi_max, type, grid_res, grid_binary)) { if (!is_first_round) { t_starts[j] = t0; t_ends[j] = t1; ray_indices[j] = i; } ++j; // march to next sample t0 = t1; t1 = t0 + calc_dt(t0, cone_angle, dt_min, dt_max); t_mid = (t0 + t1) * 0.5f; } else { // march to next sample switch (type) { case ContractionType::AABB: // no contraction t_mid = advance_to_next_voxel( t_mid, dt_min, xyz, dir, inv_dir, roi_min, roi_max, grid_res, far); dt = calc_dt(t_mid, cone_angle, dt_min, dt_max); t0 = t_mid - dt * 0.5f; t1 = t_mid + dt * 0.5f; break; default: // any type of scene contraction does not work with DDA. t0 = t1; t1 = t0 + calc_dt(t0, cone_angle, dt_min, dt_max); t_mid = (t0 + t1) * 0.5f; break; } } } if (is_first_round) { *num_steps = j; } return; } std::vector ray_marching( // rays const torch::Tensor rays_o, const torch::Tensor rays_d, const torch::Tensor t_min, const torch::Tensor t_max, // occupancy grid & contraction const torch::Tensor roi, const torch::Tensor grid_binary, const ContractionType type, // sampling const float step_size, const float cone_angle) { DEVICE_GUARD(rays_o); CHECK_INPUT(rays_o); CHECK_INPUT(rays_d); CHECK_INPUT(t_min); CHECK_INPUT(t_max); CHECK_INPUT(roi); CHECK_INPUT(grid_binary); TORCH_CHECK(rays_o.ndimension() == 2 & rays_o.size(1) == 3) TORCH_CHECK(rays_d.ndimension() == 2 & rays_d.size(1) == 3) TORCH_CHECK(t_min.ndimension() == 1) TORCH_CHECK(t_max.ndimension() == 1) TORCH_CHECK(roi.ndimension() == 1 & roi.size(0) == 6) TORCH_CHECK(grid_binary.ndimension() == 3) const int n_rays = rays_o.size(0); const int3 grid_res = make_int3( grid_binary.size(0), grid_binary.size(1), grid_binary.size(2)); const int threads = 256; const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads); // helper counter torch::Tensor num_steps = torch::empty( {n_rays}, rays_o.options().dtype(torch::kInt32)); // count number of samples per ray ray_marching_kernel<<>>( // rays n_rays, rays_o.data_ptr(), rays_d.data_ptr(), t_min.data_ptr(), t_max.data_ptr(), // occupancy grid & contraction roi.data_ptr(), grid_res, grid_binary.data_ptr(), type, // sampling step_size, cone_angle, nullptr, /* packed_info */ // outputs num_steps.data_ptr(), nullptr, /* ray_indices */ nullptr, /* t_starts */ nullptr /* t_ends */); torch::Tensor cum_steps = num_steps.cumsum(0, torch::kInt32); torch::Tensor packed_info = torch::stack({cum_steps - num_steps, num_steps}, 1); // output samples starts and ends int total_steps = cum_steps[cum_steps.size(0) - 1].item(); torch::Tensor t_starts = torch::empty({total_steps, 1}, rays_o.options()); torch::Tensor t_ends = torch::empty({total_steps, 1}, rays_o.options()); torch::Tensor ray_indices = torch::empty({total_steps}, cum_steps.options().dtype(torch::kLong)); ray_marching_kernel<<>>( // rays n_rays, rays_o.data_ptr(), rays_d.data_ptr(), t_min.data_ptr(), t_max.data_ptr(), // occupancy grid & contraction roi.data_ptr(), grid_res, grid_binary.data_ptr(), type, // sampling step_size, cone_angle, packed_info.data_ptr(), // outputs nullptr, /* num_steps */ ray_indices.data_ptr(), t_starts.data_ptr(), t_ends.data_ptr()); return {packed_info, ray_indices, t_starts, t_ends}; } // ---------------------------------------------------------------------------- // Query the occupancy grid // ---------------------------------------------------------------------------- template __global__ void query_occ_kernel( // rays info const uint32_t n_samples, const float *samples, // shape (n_samples, 3) // occupancy grid & contraction const float *roi, const int3 grid_res, const scalar_t *grid_value, // shape (reso_x, reso_y, reso_z) const ContractionType type, // outputs scalar_t *occs) { CUDA_GET_THREAD_ID(i, n_samples); // locate samples += i * 3; occs += i; const float3 roi_min = make_float3(roi[0], roi[1], roi[2]); const float3 roi_max = make_float3(roi[3], roi[4], roi[5]); const float3 xyz = make_float3(samples[0], samples[1], samples[2]); *occs = grid_occupied_at(xyz, roi_min, roi_max, type, grid_res, grid_value); return; } torch::Tensor grid_query( const torch::Tensor samples, // occupancy grid & contraction const torch::Tensor roi, const torch::Tensor grid_value, const ContractionType type) { DEVICE_GUARD(samples); CHECK_INPUT(samples); const int n_samples = samples.size(0); const int3 grid_res = make_int3( grid_value.size(0), grid_value.size(1), grid_value.size(2)); const int threads = 256; const int blocks = CUDA_N_BLOCKS_NEEDED(n_samples, threads); torch::Tensor occs = torch::empty({n_samples}, grid_value.options()); AT_DISPATCH_FLOATING_TYPES_AND( at::ScalarType::Bool, occs.scalar_type(), "grid_query", ([&] { query_occ_kernel<<>>( n_samples, samples.data_ptr(), // grid roi.data_ptr(), grid_res, grid_value.data_ptr(), type, // outputs occs.data_ptr()); })); return occs; } ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/render_transmittance.cu ================================================ /* * Copyright (c) 2022 Ruilong Li, UC Berkeley. */ #include "include/helpers_cuda.h" __global__ void transmittance_from_sigma_forward_kernel( const uint32_t n_rays, // inputs const int *packed_info, const float *starts, const float *ends, const float *sigmas, // outputs float *transmittance) { CUDA_GET_THREAD_ID(i, n_rays); // locate const int base = packed_info[i * 2 + 0]; const int steps = packed_info[i * 2 + 1]; if (steps == 0) return; starts += base; ends += base; sigmas += base; transmittance += base; // accumulation float cumsum = 0.0f; for (int j = 0; j < steps; ++j) { transmittance[j] = __expf(-cumsum); cumsum += sigmas[j] * (ends[j] - starts[j]); } // // another way to impl: // float T = 1.f; // for (int j = 0; j < steps; ++j) // { // const float delta = ends[j] - starts[j]; // const float alpha = 1.f - __expf(-sigmas[j] * delta); // transmittance[j] = T; // T *= (1.f - alpha); // } return; } __global__ void transmittance_from_sigma_backward_kernel( const uint32_t n_rays, // inputs const int *packed_info, const float *starts, const float *ends, const float *transmittance, const float *transmittance_grad, // outputs float *sigmas_grad) { CUDA_GET_THREAD_ID(i, n_rays); // locate const int base = packed_info[i * 2 + 0]; const int steps = packed_info[i * 2 + 1]; if (steps == 0) return; transmittance += base; transmittance_grad += base; starts += base; ends += base; sigmas_grad += base; // accumulation float cumsum = 0.0f; for (int j = steps - 1; j >= 0; --j) { sigmas_grad[j] = cumsum * (ends[j] - starts[j]); cumsum += -transmittance_grad[j] * transmittance[j]; } return; } __global__ void transmittance_from_alpha_forward_kernel( const uint32_t n_rays, // inputs const int *packed_info, const float *alphas, // outputs float *transmittance) { CUDA_GET_THREAD_ID(i, n_rays); // locate const int base = packed_info[i * 2 + 0]; const int steps = packed_info[i * 2 + 1]; if (steps == 0) return; alphas += base; transmittance += base; // accumulation float T = 1.0f; for (int j = 0; j < steps; ++j) { transmittance[j] = T; T *= (1.0f - alphas[j]); } return; } __global__ void transmittance_from_alpha_backward_kernel( const uint32_t n_rays, // inputs const int *packed_info, const float *alphas, const float *transmittance, const float *transmittance_grad, // outputs float *alphas_grad) { CUDA_GET_THREAD_ID(i, n_rays); // locate const int base = packed_info[i * 2 + 0]; const int steps = packed_info[i * 2 + 1]; if (steps == 0) return; alphas += base; transmittance += base; transmittance_grad += base; alphas_grad += base; // accumulation float cumsum = 0.0f; for (int j = steps - 1; j >= 0; --j) { alphas_grad[j] = cumsum / fmax(1.0f - alphas[j], 1e-10f); cumsum += -transmittance_grad[j] * transmittance[j]; } return; } __global__ void transmittance_from_alpha_patch_based_forward_kernel( const uint32_t n_patches, const uint32_t patch_size, // inputs const int *packed_info, const float *alphas, // outputs float *transmittance) { CUDA_GET_THREAD_ID_2D(i, k, n_patches, patch_size); // i is the patch id, k is the ray id within the patch // locate const int base = packed_info[i * 2 + 0]; // get the base of the patch const int steps = packed_info[i * 2 + 1]; // get the steps of the patch if (steps == 0) return; alphas += base * patch_size; // move the pointer to the base transmittance += base * patch_size; // move the pointer to the base // accumulation float T = 1.0f; for (int j = 0; j < steps; ++j) { const uint32_t ray_id = j * patch_size + k; transmittance[ray_id] = T; T *= (1.0f - alphas[j]); } return; } __global__ void transmittance_from_alpha_patch_based_backward_kernel( const uint32_t n_patches, const uint32_t patch_size, // inputs const int *packed_info, const float *alphas, const float *transmittance, const float *transmittance_grad, // outputs float *alphas_grad) { CUDA_GET_THREAD_ID_2D(i, k, n_patches, patch_size); // i is the patch id, k is the ray id within the patch // locate const int base = packed_info[i * 2 + 0]; const int steps = packed_info[i * 2 + 1]; if (steps == 0) return; alphas += base * patch_size; transmittance += base * patch_size; transmittance_grad += base * patch_size; alphas_grad += base * patch_size; // accumulation float cumsum = 0.0f; for (int j = steps - 1; j >= 0; --j) { const uint32_t sample_idx = j * patch_size + k; alphas_grad[sample_idx] = cumsum / fmax(1.0f - alphas[sample_idx], 1e-10f); cumsum += -transmittance_grad[sample_idx] * transmittance[sample_idx]; } return; } torch::Tensor transmittance_from_sigma_forward_naive( torch::Tensor packed_info, torch::Tensor starts, torch::Tensor ends, torch::Tensor sigmas) { DEVICE_GUARD(packed_info); CHECK_INPUT(packed_info); CHECK_INPUT(starts); CHECK_INPUT(ends); CHECK_INPUT(sigmas); TORCH_CHECK(packed_info.ndimension() == 2); TORCH_CHECK(starts.ndimension() == 2 & starts.size(1) == 1); TORCH_CHECK(ends.ndimension() == 2 & ends.size(1) == 1); TORCH_CHECK(sigmas.ndimension() == 2 & sigmas.size(1) == 1); const uint32_t n_samples = sigmas.size(0); const uint32_t n_rays = packed_info.size(0); const int threads = 256; const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads); // outputs torch::Tensor transmittance = torch::empty_like(sigmas); // parallel across rays transmittance_from_sigma_forward_kernel<<< blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>( n_rays, // inputs packed_info.data_ptr(), starts.data_ptr(), ends.data_ptr(), sigmas.data_ptr(), // outputs transmittance.data_ptr()); return transmittance; } torch::Tensor transmittance_from_sigma_backward_naive( torch::Tensor packed_info, torch::Tensor starts, torch::Tensor ends, torch::Tensor transmittance, torch::Tensor transmittance_grad) { DEVICE_GUARD(packed_info); CHECK_INPUT(packed_info); CHECK_INPUT(starts); CHECK_INPUT(ends); CHECK_INPUT(transmittance); CHECK_INPUT(transmittance_grad); TORCH_CHECK(packed_info.ndimension() == 2); TORCH_CHECK(starts.ndimension() == 2 & starts.size(1) == 1); TORCH_CHECK(ends.ndimension() == 2 & ends.size(1) == 1); TORCH_CHECK(transmittance.ndimension() == 2 & transmittance.size(1) == 1); TORCH_CHECK(transmittance_grad.ndimension() == 2 & transmittance_grad.size(1) == 1); const uint32_t n_samples = transmittance.size(0); const uint32_t n_rays = packed_info.size(0); const int threads = 256; const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads); // outputs torch::Tensor sigmas_grad = torch::empty_like(transmittance); // parallel across rays transmittance_from_sigma_backward_kernel<<< blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>( n_rays, // inputs packed_info.data_ptr(), starts.data_ptr(), ends.data_ptr(), transmittance.data_ptr(), transmittance_grad.data_ptr(), // outputs sigmas_grad.data_ptr()); return sigmas_grad; } torch::Tensor transmittance_from_alpha_forward_naive( torch::Tensor packed_info, torch::Tensor alphas) { DEVICE_GUARD(packed_info); CHECK_INPUT(packed_info); CHECK_INPUT(alphas); TORCH_CHECK(alphas.ndimension() == 2 & alphas.size(1) == 1); TORCH_CHECK(packed_info.ndimension() == 2); const uint32_t n_samples = alphas.size(0); const uint32_t n_rays = packed_info.size(0); const int threads = 256; const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads); // outputs torch::Tensor transmittance = torch::empty_like(alphas); // parallel across rays transmittance_from_alpha_forward_kernel<<< blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>( n_rays, // inputs packed_info.data_ptr(), alphas.data_ptr(), // outputs transmittance.data_ptr()); return transmittance; } torch::Tensor transmittance_from_alpha_backward_naive( torch::Tensor packed_info, torch::Tensor alphas, torch::Tensor transmittance, torch::Tensor transmittance_grad) { DEVICE_GUARD(packed_info); CHECK_INPUT(packed_info); CHECK_INPUT(transmittance); CHECK_INPUT(transmittance_grad); TORCH_CHECK(packed_info.ndimension() == 2); TORCH_CHECK(transmittance.ndimension() == 2 & transmittance.size(1) == 1); TORCH_CHECK(transmittance_grad.ndimension() == 2 & transmittance_grad.size(1) == 1); const uint32_t n_samples = transmittance.size(0); const uint32_t n_rays = packed_info.size(0); const int threads = 256; const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads); // outputs torch::Tensor alphas_grad = torch::empty_like(alphas); // parallel across rays transmittance_from_alpha_backward_kernel<<< blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>( n_rays, // inputs packed_info.data_ptr(), alphas.data_ptr(), transmittance.data_ptr(), transmittance_grad.data_ptr(), // outputs alphas_grad.data_ptr()); return alphas_grad; } torch::Tensor transmittance_from_alpha_patch_based_forward_naive( torch::Tensor packed_info, torch::Tensor alphas) { DEVICE_GUARD(packed_info); CHECK_INPUT(packed_info); CHECK_INPUT(alphas); TORCH_CHECK(packed_info.ndimension() == 2); TORCH_CHECK(alphas.ndimension() == 3 & alphas.size(2) == 1); const uint32_t n_samples = alphas.size(0); const uint32_t n_patches = packed_info.size(0); const uint32_t patch_size = alphas.size(1); // compute the required number of thread.y from patch size // take the log2 of patch size and round up to the next power of 2 const uint32_t thread_for_a_patch = pow(2, ceil(log2(patch_size))); const uint32_t thread_for_n_samples = 256 / thread_for_a_patch; const dim3 threads(thread_for_n_samples, thread_for_a_patch); const dim3 blocks((n_patches+threads.x-1)/threads.x, (patch_size+threads.y-1)/threads.y); // outputs torch::Tensor transmittance = torch::empty_like(alphas); // parallel across rays transmittance_from_alpha_patch_based_forward_kernel<<< blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>( n_patches, patch_size, // inputs packed_info.data_ptr(), alphas.data_ptr(), // outputs transmittance.data_ptr()); return transmittance; } torch::Tensor transmittance_from_alpha_patch_based_backward_naive( torch::Tensor packed_info, torch::Tensor alphas, torch::Tensor transmittance, torch::Tensor transmittance_grad) { DEVICE_GUARD(packed_info); CHECK_INPUT(packed_info); CHECK_INPUT(transmittance); CHECK_INPUT(transmittance_grad); TORCH_CHECK(packed_info.ndimension() == 2); TORCH_CHECK(transmittance.ndimension() == 3 & transmittance.size(2) == 1); TORCH_CHECK(transmittance_grad.ndimension() == 3 & transmittance_grad.size(2) == 1); const uint32_t n_samples = alphas.size(0); const uint32_t n_patches = packed_info.size(0); const uint32_t patch_size = alphas.size(1); // compute the required number of thread.y from patch size // take the log2 of patch size and round up to the next power of 2 const uint32_t thread_for_a_patch = pow(2, ceil(log2(patch_size))); const uint32_t thread_for_n_samples = 256 / thread_for_a_patch; const dim3 threads(thread_for_n_samples, thread_for_a_patch); const dim3 blocks((n_patches+threads.x-1)/threads.x, (patch_size+threads.y-1)/threads.y); // outputs torch::Tensor alphas_grad = torch::empty_like(alphas); // parallel across rays transmittance_from_alpha_patch_based_backward_kernel<<< blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>( n_patches, patch_size, // inputs packed_info.data_ptr(), alphas.data_ptr(), transmittance.data_ptr(), transmittance_grad.data_ptr(), // outputs alphas_grad.data_ptr()); return alphas_grad; } ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/render_transmittance_cub.cu ================================================ /* * Copyright (c) 2022 Ruilong Li, UC Berkeley. */ // CUB is supported in CUDA >= 11.0 // ExclusiveScanByKey is supported in CUB >= 1.15.0 (CUDA >= 11.6) // See: https://github.com/NVIDIA/cub/tree/main#releases #include "include/helpers_cuda.h" #if CUB_SUPPORTS_SCAN_BY_KEY() #include #endif struct Product { template __host__ __device__ __forceinline__ T operator()(const T &a, const T &b) const { return a * b; } }; #if CUB_SUPPORTS_SCAN_BY_KEY() template inline void exclusive_sum_by_key( KeysInputIteratorT keys, ValuesInputIteratorT input, ValuesOutputIteratorT output, int64_t num_items) { TORCH_CHECK(num_items <= std::numeric_limits::max(), "cub ExclusiveSumByKey does not support more than LONG_MAX elements"); CUB_WRAPPER(cub::DeviceScan::ExclusiveSumByKey, keys, input, output, num_items, cub::Equality(), at::cuda::getCurrentCUDAStream()); } template inline void exclusive_prod_by_key( KeysInputIteratorT keys, ValuesInputIteratorT input, ValuesOutputIteratorT output, int64_t num_items) { TORCH_CHECK(num_items <= std::numeric_limits::max(), "cub ExclusiveScanByKey does not support more than LONG_MAX elements"); CUB_WRAPPER(cub::DeviceScan::ExclusiveScanByKey, keys, input, output, Product(), 1.0f, num_items, cub::Equality(), at::cuda::getCurrentCUDAStream()); } #endif torch::Tensor transmittance_from_sigma_forward_cub( torch::Tensor ray_indices, torch::Tensor starts, torch::Tensor ends, torch::Tensor sigmas) { DEVICE_GUARD(ray_indices); CHECK_INPUT(ray_indices); CHECK_INPUT(starts); CHECK_INPUT(ends); CHECK_INPUT(sigmas); TORCH_CHECK(ray_indices.ndimension() == 1); TORCH_CHECK(starts.ndimension() == 2 & starts.size(1) == 1); TORCH_CHECK(ends.ndimension() == 2 & ends.size(1) == 1); TORCH_CHECK(sigmas.ndimension() == 2 & sigmas.size(1) == 1); const uint32_t n_samples = sigmas.size(0); // parallel across samples torch::Tensor sigmas_dt = sigmas * (ends - starts); torch::Tensor sigmas_dt_cumsum = torch::empty_like(sigmas); #if CUB_SUPPORTS_SCAN_BY_KEY() exclusive_sum_by_key( ray_indices.data_ptr(), sigmas_dt.data_ptr(), sigmas_dt_cumsum.data_ptr(), n_samples); #else std::runtime_error("CUB functions are only supported in CUDA >= 11.6."); #endif torch::Tensor transmittance = (-sigmas_dt_cumsum).exp(); return transmittance; } torch::Tensor transmittance_from_sigma_backward_cub( torch::Tensor ray_indices, torch::Tensor starts, torch::Tensor ends, torch::Tensor transmittance, torch::Tensor transmittance_grad) { DEVICE_GUARD(ray_indices); CHECK_INPUT(ray_indices); CHECK_INPUT(starts); CHECK_INPUT(ends); CHECK_INPUT(transmittance); CHECK_INPUT(transmittance_grad); TORCH_CHECK(ray_indices.ndimension() == 1); TORCH_CHECK(starts.ndimension() == 2 & starts.size(1) == 1); TORCH_CHECK(ends.ndimension() == 2 & ends.size(1) == 1); TORCH_CHECK(transmittance.ndimension() == 2 & transmittance.size(1) == 1); TORCH_CHECK(transmittance_grad.ndimension() == 2 & transmittance_grad.size(1) == 1); const uint32_t n_samples = transmittance.size(0); // parallel across samples torch::Tensor sigmas_dt_cumsum_grad = -transmittance_grad * transmittance; torch::Tensor sigmas_dt_grad = torch::empty_like(transmittance_grad); #if CUB_SUPPORTS_SCAN_BY_KEY() exclusive_sum_by_key( thrust::make_reverse_iterator(ray_indices.data_ptr() + n_samples), thrust::make_reverse_iterator(sigmas_dt_cumsum_grad.data_ptr() + n_samples), thrust::make_reverse_iterator(sigmas_dt_grad.data_ptr() + n_samples), n_samples); #else std::runtime_error("CUB functions are only supported in CUDA >= 11.6."); #endif torch::Tensor sigmas_grad = sigmas_dt_grad * (ends - starts); return sigmas_grad; } torch::Tensor transmittance_from_alpha_forward_cub( torch::Tensor ray_indices, torch::Tensor alphas) { DEVICE_GUARD(ray_indices); CHECK_INPUT(ray_indices); CHECK_INPUT(alphas); TORCH_CHECK(alphas.ndimension() == 2 & alphas.size(1) == 1); TORCH_CHECK(ray_indices.ndimension() == 1); const uint32_t n_samples = alphas.size(0); // parallel across samples torch::Tensor transmittance = torch::empty_like(alphas); #if CUB_SUPPORTS_SCAN_BY_KEY() exclusive_prod_by_key( ray_indices.data_ptr(), (1.0f - alphas).data_ptr(), transmittance.data_ptr(), n_samples); #else std::runtime_error("CUB functions are only supported in CUDA >= 11.6."); #endif return transmittance; } torch::Tensor transmittance_from_alpha_backward_cub( torch::Tensor ray_indices, torch::Tensor alphas, torch::Tensor transmittance, torch::Tensor transmittance_grad) { DEVICE_GUARD(ray_indices); CHECK_INPUT(ray_indices); CHECK_INPUT(transmittance); CHECK_INPUT(transmittance_grad); TORCH_CHECK(ray_indices.ndimension() == 1); TORCH_CHECK(transmittance.ndimension() == 2 & transmittance.size(1) == 1); TORCH_CHECK(transmittance_grad.ndimension() == 2 & transmittance_grad.size(1) == 1); const uint32_t n_samples = transmittance.size(0); // parallel across samples torch::Tensor sigmas_dt_cumsum_grad = -transmittance_grad * transmittance; torch::Tensor sigmas_dt_grad = torch::empty_like(transmittance_grad); #if CUB_SUPPORTS_SCAN_BY_KEY() exclusive_sum_by_key( thrust::make_reverse_iterator(ray_indices.data_ptr() + n_samples), thrust::make_reverse_iterator(sigmas_dt_cumsum_grad.data_ptr() + n_samples), thrust::make_reverse_iterator(sigmas_dt_grad.data_ptr() + n_samples), n_samples); #else std::runtime_error("CUB functions are only supported in CUDA >= 11.6."); #endif torch::Tensor alphas_grad = sigmas_dt_grad / (1.0f - alphas).clamp_min(1e-10f); return alphas_grad; } ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/render_weight.cu ================================================ /* * Copyright (c) 2022 Ruilong Li, UC Berkeley. */ #include "include/helpers_cuda.h" __global__ void weight_from_sigma_forward_kernel( const uint32_t n_rays, const int *packed_info, const float *starts, const float *ends, const float *sigmas, // outputs float *weights) { CUDA_GET_THREAD_ID(i, n_rays); // locate const int base = packed_info[i * 2 + 0]; const int steps = packed_info[i * 2 + 1]; if (steps == 0) return; starts += base; ends += base; sigmas += base; weights += base; // accumulation float T = 1.f; for (int j = 0; j < steps; ++j) { const float delta = ends[j] - starts[j]; const float alpha = 1.f - __expf(-sigmas[j] * delta); weights[j] = alpha * T; T *= (1.f - alpha); } return; } __global__ void weight_from_sigma_backward_kernel( const uint32_t n_rays, const int *packed_info, const float *starts, const float *ends, const float *sigmas, const float *weights, const float *grad_weights, // outputs float *grad_sigmas) { CUDA_GET_THREAD_ID(i, n_rays); // locate const int base = packed_info[i * 2 + 0]; const int steps = packed_info[i * 2 + 1]; if (steps == 0) return; starts += base; ends += base; sigmas += base; weights += base; grad_weights += base; grad_sigmas += base; float accum = 0; for (int j = 0; j < steps; ++j) { accum += grad_weights[j] * weights[j]; } // accumulation float T = 1.f; for (int j = 0; j < steps; ++j) { const float delta = ends[j] - starts[j]; const float alpha = 1.f - __expf(-sigmas[j] * delta); grad_sigmas[j] = (grad_weights[j] * T - accum) * delta; accum -= grad_weights[j] * weights[j]; T *= (1.f - alpha); } return; } // template __global__ void weight_from_alpha_patch_based_forward_kernel( const uint32_t n_patches, const uint32_t patch_size, const int *packed_info, // (n_patches, 2) const float *alphas, // (n_samples, patch_size, 1) // outputs float *weights// () ){ CUDA_GET_THREAD_ID_2D(i, k, n_patches, patch_size); // i is the patch id, k is the ray id within the patch // locate const int base = packed_info[i * 2 + 0]; // get the base of the patch const int steps = packed_info[i * 2 + 1]; // get the steps of the patch if (steps == 0) return; alphas += base * patch_size; // move the pointer to the base weights += base * patch_size; // move the pointer to the base // transmittance += base * patch_size; // move the pointer to the base // accumulation float T = 1.f; for (int j = 0; j < steps; ++j) { const uint32_t ray_id = j * patch_size + k; const float alpha = alphas[ray_id]; // get the alpha value // transmittance[ray_id] = T; weights[ray_id] = alpha * T; // calculate the weight T *= (1.f - alpha); // update the T value } return; } __global__ void weight_and_transmittance_from_alpha_patch_based_forward_kernel( const uint32_t n_patches, const uint32_t patch_size, const int *packed_info, // (n_patches, 2) const float *alphas, // (n_samples, patch_size, 1) // outputs float *weights, float *transmittance// () ){ CUDA_GET_THREAD_ID_2D(i, k, n_patches, patch_size); // i is the patch id, k is the ray id within the patch // locate const int base = packed_info[i * 2 + 0]; // get the base of the patch const int steps = packed_info[i * 2 + 1]; // get the steps of the patch if (steps == 0) return; alphas += base * patch_size; // move the pointer to the base weights += base * patch_size; // move the pointer to the base transmittance += base * patch_size; // move the pointer to the base // accumulation float T = 1.f; for (int j = 0; j < steps; ++j) { const uint32_t ray_id = j * patch_size + k; const float alpha = alphas[ray_id]; // get the alpha value transmittance[ray_id] = T; weights[ray_id] = alpha * T; // calculate the weight T *= (1.f - alpha); // update the T value } return; } __global__ void weight_from_alpha_forward_kernel( const uint32_t n_rays, const int *packed_info, const float *alphas, // outputs float *weights) { CUDA_GET_THREAD_ID(i, n_rays); // i is the thread id // locate const int base = packed_info[i * 2 + 0]; // get the base const int steps = packed_info[i * 2 + 1]; // get the steps if (steps == 0) return; alphas += base; // move the pointer to the base weights += base; // move the pointer to the base // accumulation float T = 1.f; for (int j = 0; j < steps; ++j) { const float alpha = alphas[j]; // get the alpha value weights[j] = alpha * T; // calculate the weight T *= (1.f - alpha); // update the T value } return; } __global__ void weight_from_alpha_backward_kernel( const uint32_t n_rays, const int *packed_info, const float *alphas, const float *weights, const float *grad_weights, // outputs float *grad_alphas) { CUDA_GET_THREAD_ID(i, n_rays); // locate const int base = packed_info[i * 2 + 0]; const int steps = packed_info[i * 2 + 1]; if (steps == 0) return; alphas += base; weights += base; grad_weights += base; grad_alphas += base; float accum = 0; for (int j = 0; j < steps; ++j) { accum += grad_weights[j] * weights[j]; } // accumulation float T = 1.f; for (int j = 0; j < steps; ++j) { const float alpha = alphas[j]; grad_alphas[j] = (grad_weights[j] * T - accum) / fmaxf(1.f - alpha, 1e-10f); accum -= grad_weights[j] * weights[j]; T *= (1.f - alpha); } return; } __global__ void weight_from_alpha_importance_sampling_forward_kernel( const uint32_t n_rays, const int *packed_info, const float *alphas, const float *importance, // outputs float *weights) { CUDA_GET_THREAD_ID(i, n_rays); // i is the thread id // locate const int base = packed_info[i * 2 + 0]; // get the base const int steps = packed_info[i * 2 + 1]; // get the steps if (steps == 0) return; alphas += base; // move the pointer to the base weights += base; // move the pointer to the base importance += base; // move the pointer to the base // accumulation float T = 1.f; for (int j = 0; j < steps; ++j) { const float alpha = alphas[j]; // get the alpha value weights[j] = alpha * T / importance[j]; // calculate the weight T *= (1.f - alpha); // update the T value } return; } __global__ void weight_from_alpha_importance_sampling_backward_kernel( const uint32_t n_rays, const int *packed_info, const float *alphas, const float *weights, const float *grad_weights, const float *importance, // outputs float *grad_alphas) { CUDA_GET_THREAD_ID(i, n_rays); // locate const int base = packed_info[i * 2 + 0]; const int steps = packed_info[i * 2 + 1]; if (steps == 0) return; alphas += base; weights += base; grad_weights += base; grad_alphas += base; importance += base; float accum = 0; for (int j = 0; j < steps; ++j) { accum += grad_weights[j] * weights[j]; } // accumulation float T = 1.f; for (int j = 0; j < steps; ++j) { const float alpha = alphas[j]; grad_alphas[j] = (grad_weights[j] * T - importance[j] * accum) / (importance[j] * fmaxf(1.f - alpha, 1e-10f)); accum -= grad_weights[j] * weights[j]; T *= (1.f - alpha); } return; } __global__ void weight_from_alpha_patch_based_backward_kernel( const uint32_t n_patches, const uint32_t patch_size, const int *packed_info, const float *alphas, const float *weights, const float *grad_weights, // outputs float *grad_alphas) { CUDA_GET_THREAD_ID_2D(i, k, n_patches, patch_size); // i is the patch id, k is the ray id within the patch // locate const int base = packed_info[i * 2 + 0]; const int steps = packed_info[i * 2 + 1]; if (steps == 0) return; alphas += base * patch_size; // move the pointer to the base weights += base * patch_size; // move the pointer to the base grad_weights += base * patch_size; // move the pointer to the base grad_alphas += base * patch_size; // move the pointer to the base float accum = 0; for (int j = 0; j < steps; ++j) { const uint32_t sample_idx = j * patch_size + k; accum += grad_weights[sample_idx] * weights[sample_idx]; } // accumulation float T = 1.f; for (int j = 0; j < steps; ++j) { const uint32_t sample_idx = j * patch_size + k; const float alpha = alphas[sample_idx]; grad_alphas[sample_idx] = (grad_weights[sample_idx] * T - accum) / fmaxf(1.f - alpha, 1e-10f); accum -= grad_weights[sample_idx] * weights[sample_idx]; T *= (1.f - alpha); } return; } __global__ void weight_and_transmittance_from_alpha_patch_based_backward_kernel( const uint32_t n_patches, const uint32_t patch_size, const int *packed_info, const float *alphas, const float *weights, const float *grad_weights, // outputs float *grad_alphas) { CUDA_GET_THREAD_ID_2D(i, k, n_patches, patch_size); // i is the patch id, k is the ray id within the patch // locate const int base = packed_info[i * 2 + 0]; const int steps = packed_info[i * 2 + 1]; if (steps == 0) return; alphas += base * patch_size; // move the pointer to the base weights += base * patch_size; // move the pointer to the base grad_weights += base * patch_size; // move the pointer to the base grad_alphas += base * patch_size; // move the pointer to the base float accum = 0; for (int j = 0; j < steps; ++j) { const uint32_t sample_idx = j * patch_size + k; accum += grad_weights[sample_idx] * weights[sample_idx]; } // accumulation float T = 1.f; for (int j = 0; j < steps; ++j) { const uint32_t sample_idx = j * patch_size + k; const float alpha = alphas[sample_idx]; grad_alphas[sample_idx] = (grad_weights[sample_idx] * T - accum) / fmaxf(1.f - alpha, 1e-10f); accum -= grad_weights[sample_idx] * weights[sample_idx]; T *= (1.f - alpha); } return; } torch::Tensor weight_from_sigma_forward_naive( torch::Tensor packed_info, torch::Tensor starts, torch::Tensor ends, torch::Tensor sigmas) { DEVICE_GUARD(packed_info); CHECK_INPUT(packed_info); CHECK_INPUT(starts); CHECK_INPUT(ends); CHECK_INPUT(sigmas); TORCH_CHECK(packed_info.ndimension() == 2); TORCH_CHECK(starts.ndimension() == 2 & starts.size(1) == 1); TORCH_CHECK(ends.ndimension() == 2 & ends.size(1) == 1); TORCH_CHECK(sigmas.ndimension() == 2 & sigmas.size(1) == 1); const uint32_t n_samples = sigmas.size(0); const uint32_t n_rays = packed_info.size(0); const int threads = 256; const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads); // outputs torch::Tensor weights = torch::empty_like(sigmas); weight_from_sigma_forward_kernel<<< blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>( n_rays, // inputs packed_info.data_ptr(), starts.data_ptr(), ends.data_ptr(), sigmas.data_ptr(), // outputs weights.data_ptr()); return weights; } torch::Tensor weight_from_sigma_backward_naive( torch::Tensor weights, torch::Tensor grad_weights, torch::Tensor packed_info, torch::Tensor starts, torch::Tensor ends, torch::Tensor sigmas) { DEVICE_GUARD(packed_info); CHECK_INPUT(weights); CHECK_INPUT(grad_weights); CHECK_INPUT(packed_info); CHECK_INPUT(starts); CHECK_INPUT(ends); CHECK_INPUT(sigmas); TORCH_CHECK(packed_info.ndimension() == 2); TORCH_CHECK(starts.ndimension() == 2 & starts.size(1) == 1); TORCH_CHECK(ends.ndimension() == 2 & ends.size(1) == 1); TORCH_CHECK(sigmas.ndimension() == 2 & sigmas.size(1) == 1); TORCH_CHECK(weights.ndimension() == 2 & weights.size(1) == 1); TORCH_CHECK(grad_weights.ndimension() == 2 & grad_weights.size(1) == 1); const uint32_t n_samples = sigmas.size(0); const uint32_t n_rays = packed_info.size(0); const int threads = 256; const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads); // outputs torch::Tensor grad_sigmas = torch::empty_like(sigmas); weight_from_sigma_backward_kernel<<< blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>( n_rays, // inputs packed_info.data_ptr(), starts.data_ptr(), ends.data_ptr(), sigmas.data_ptr(), weights.data_ptr(), grad_weights.data_ptr(), // outputs grad_sigmas.data_ptr()); return grad_sigmas; } torch::Tensor weight_from_alpha_forward_naive( torch::Tensor packed_info, torch::Tensor alphas) { DEVICE_GUARD(packed_info); CHECK_INPUT(packed_info); CHECK_INPUT(alphas); TORCH_CHECK(packed_info.ndimension() == 2); TORCH_CHECK(alphas.ndimension() == 2 & alphas.size(1) == 1); const uint32_t n_samples = alphas.size(0); const uint32_t n_rays = packed_info.size(0); const int threads = 256; const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads); // outputs torch::Tensor weights = torch::empty_like(alphas); weight_from_alpha_forward_kernel<<< blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>( n_rays, // inputs packed_info.data_ptr(), alphas.data_ptr(), // outputs weights.data_ptr()); return weights; } torch::Tensor weight_from_alpha_patch_based_forward_naive( torch::Tensor packed_info, // (n_patches, 2) torch::Tensor alphas // (n_samples, patches_size, 1) ) { DEVICE_GUARD(packed_info); CHECK_INPUT(packed_info); CHECK_INPUT(alphas); TORCH_CHECK(packed_info.ndimension() == 2); TORCH_CHECK(alphas.ndimension() == 3 & alphas.size(2) == 1); const uint32_t n_samples = alphas.size(0); const uint32_t n_patches = packed_info.size(0); const uint32_t patch_size = alphas.size(1); // compute the required number of thread.y from patch size // take the log2 of patch size and round up to the next power of 2 const uint32_t thread_for_a_patch = pow(2, ceil(log2(patch_size))); const uint32_t thread_for_n_samples = 256 / thread_for_a_patch; // convert to uint // thread_for_a_patch = static_cast(thread_for_a_patch); // thread_for_n_samples = static_cast(thread_for_n_samples); const dim3 threads(thread_for_n_samples, thread_for_a_patch); // const dim3 blocks = CUDA_N_BLOCKS_NEEDED(n_samples, threads); const dim3 blocks((n_patches+threads.x-1)/threads.x, (patch_size+threads.y-1)/threads.y); // outputs torch::Tensor weights = torch::empty_like(alphas); torch::Tensor transmittance = torch::empty_like(alphas); weight_from_alpha_patch_based_forward_kernel<<< blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>( n_patches, patch_size, // inputs packed_info.data_ptr(), alphas.data_ptr(), // outputs weights.data_ptr()); return weights; } torch::Tensor weight_from_alpha_backward_naive( torch::Tensor weights, torch::Tensor grad_weights, torch::Tensor packed_info, torch::Tensor alphas) { DEVICE_GUARD(packed_info); CHECK_INPUT(packed_info); CHECK_INPUT(alphas); CHECK_INPUT(weights); CHECK_INPUT(grad_weights); TORCH_CHECK(packed_info.ndimension() == 2); TORCH_CHECK(alphas.ndimension() == 2 & alphas.size(1) == 1); TORCH_CHECK(weights.ndimension() == 2 & weights.size(1) == 1); TORCH_CHECK(grad_weights.ndimension() == 2 & grad_weights.size(1) == 1); const uint32_t n_samples = alphas.size(0); const uint32_t n_rays = packed_info.size(0); const int threads = 256; const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads); // outputs torch::Tensor grad_alphas = torch::empty_like(alphas); weight_from_alpha_backward_kernel<<< blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>( n_rays, // inputs packed_info.data_ptr(), alphas.data_ptr(), weights.data_ptr(), grad_weights.data_ptr(), // outputs grad_alphas.data_ptr()); return grad_alphas; } torch::Tensor weight_from_alpha_patch_based_backward_naive( torch::Tensor weights, torch::Tensor grad_weights, // (n_samples, patches_size, 1) torch::Tensor packed_info, torch::Tensor alphas) // (n_samples, patches_size, 1) { DEVICE_GUARD(packed_info); CHECK_INPUT(packed_info); CHECK_INPUT(alphas); CHECK_INPUT(weights); CHECK_INPUT(grad_weights); TORCH_CHECK(packed_info.ndimension() == 2); TORCH_CHECK(alphas.ndimension() == 3 & alphas.size(2) == 1); TORCH_CHECK(weights.ndimension() == 3 & weights.size(2) == 1); TORCH_CHECK(grad_weights.ndimension() == 3 & grad_weights.size(2) == 1); const uint32_t n_samples = alphas.size(0); const uint32_t n_patches = packed_info.size(0); const uint32_t patch_size = alphas.size(1); // compute the required number of thread.y from patch size // take the log2 of patch size and round up to the next power of 2 const uint32_t thread_for_a_patch = pow(2, ceil(log2(patch_size))); const uint32_t thread_for_n_samples = 256 / thread_for_a_patch; const dim3 threads(thread_for_n_samples, thread_for_a_patch); const dim3 blocks((n_patches+threads.x-1)/threads.x, (patch_size+threads.y-1)/threads.y); // outputs torch::Tensor grad_alphas = torch::empty_like(alphas); weight_from_alpha_patch_based_backward_kernel<<< blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>( n_patches, patch_size, // inputs packed_info.data_ptr(), alphas.data_ptr(), weights.data_ptr(), grad_weights.data_ptr(), // outputs grad_alphas.data_ptr()); return grad_alphas; } std::vector weight_and_transmittance_from_alpha_patch_based_forward_naive( torch::Tensor packed_info, // (n_patches, 2) torch::Tensor alphas // (n_samples, patches_size, 1) ) { DEVICE_GUARD(packed_info); CHECK_INPUT(packed_info); CHECK_INPUT(alphas); TORCH_CHECK(packed_info.ndimension() == 2); TORCH_CHECK(alphas.ndimension() == 3 & alphas.size(2) == 1); const uint32_t n_samples = alphas.size(0); const uint32_t n_patches = packed_info.size(0); const uint32_t patch_size = alphas.size(1); // compute the required number of thread.y from patch size // take the log2 of patch size and round up to the next power of 2 const uint32_t thread_for_a_patch = pow(2, ceil(log2(patch_size))); const uint32_t thread_for_n_samples = 256 / thread_for_a_patch; const dim3 threads(thread_for_n_samples, thread_for_a_patch); const dim3 blocks((n_patches+threads.x-1)/threads.x, (patch_size+threads.y-1)/threads.y); // outputs torch::Tensor weights = torch::empty_like(alphas); torch::Tensor transmittance = torch::empty_like(alphas); weight_and_transmittance_from_alpha_patch_based_forward_kernel<<< blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>( n_patches, patch_size, // inputs packed_info.data_ptr(), alphas.data_ptr(), // outputs weights.data_ptr(), transmittance.data_ptr()); return {weights, transmittance}; } torch::Tensor weight_and_transmittance_from_alpha_patch_based_backward_naive( torch::Tensor weights, torch::Tensor grad_weights, // (n_samples, patches_size, 1) torch::Tensor packed_info, torch::Tensor alphas) // (n_samples, patches_size, 1) { DEVICE_GUARD(packed_info); CHECK_INPUT(packed_info); CHECK_INPUT(alphas); CHECK_INPUT(weights); CHECK_INPUT(grad_weights); TORCH_CHECK(packed_info.ndimension() == 2); TORCH_CHECK(alphas.ndimension() == 3 & alphas.size(2) == 1); TORCH_CHECK(weights.ndimension() == 3 & weights.size(2) == 1); TORCH_CHECK(grad_weights.ndimension() == 3 & grad_weights.size(2) == 1); const uint32_t n_samples = alphas.size(0); const uint32_t n_patches = packed_info.size(0); const uint32_t patch_size = alphas.size(1); // compute the required number of thread.y from patch size // take the log2 of patch size and round up to the next power of 2 const uint32_t thread_for_a_patch = pow(2, ceil(log2(patch_size))); const uint32_t thread_for_n_samples = 256 / thread_for_a_patch; const dim3 threads(thread_for_n_samples, thread_for_a_patch); const dim3 blocks((n_patches+threads.x-1)/threads.x, (patch_size+threads.y-1)/threads.y); // outputs torch::Tensor grad_alphas = torch::empty_like(alphas); weight_and_transmittance_from_alpha_patch_based_backward_kernel<<< blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>( n_patches, patch_size, // inputs packed_info.data_ptr(), alphas.data_ptr(), weights.data_ptr(), grad_weights.data_ptr(), // outputs grad_alphas.data_ptr()); return grad_alphas; } // torch::Tensor weight_from_alpha_importance_sampling_forward_naive( // torch::Tensor packed_info, torch::Tensor alphas, torch::Tensor importance_pdfs) // { // DEVICE_GUARD(packed_info); // CHECK_INPUT(packed_info); // CHECK_INPUT(alphas); // CHECK_INPUT(importance_pdfs); // TORCH_CHECK(packed_info.ndimension() == 2); // TORCH_CHECK(alphas.ndimension() == 2 & alphas.size(1) == 1); // TORCH_CHECK(importance_pdfs.ndimension() == 2 & importance_pdfs.size(1) == 1); // // const uint32_t n_samples = alphas.size(0); // const uint32_t n_rays = packed_info.size(0); // // const int threads = 256; // const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads); // // // outputs // torch::Tensor weights = torch::empty_like(alphas); // // weight_from_alpha_forward_kernel<<< // blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>( // n_rays, // // inputs // packed_info.data_ptr(), // alphas.data_ptr(), // importance_pdfs.data_ptr(), // // outputs // weights.data_ptr()); // return weights; // } // // torch::Tensor weight_from_alpha_importance_sampling_backward_naive( // torch::Tensor weights, // torch::Tensor grad_weights, // torch::Tensor packed_info, // torch::Tensor alphas, // torch::Tensor importance_pdfs) // { // DEVICE_GUARD(packed_info); // CHECK_INPUT(packed_info); // CHECK_INPUT(alphas); // CHECK_INPUT(weights); // CHECK_INPUT(grad_weights); // CHECK_INPUT(importance_pdfs); // TORCH_CHECK(packed_info.ndimension() == 2); // TORCH_CHECK(alphas.ndimension() == 2 & alphas.size(1) == 1); // TORCH_CHECK(weights.ndimension() == 2 & weights.size(1) == 1); // TORCH_CHECK(importance_pdfs.ndimension() == 2 & importance_pdfs.size(1) == 1); // TORCH_CHECK(grad_weights.ndimension() == 2 & grad_weights.size(1) == 1); // // // const uint32_t n_samples = alphas.size(0); // const uint32_t n_rays = packed_info.size(0); // // const int threads = 256; // const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads); // // // outputs // torch::Tensor grad_alphas = torch::empty_like(alphas); // // weight_from_alpha_backward_kernel<<< // blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>( // n_rays, // // inputs // packed_info.data_ptr(), // alphas.data_ptr(), // weights.data_ptr(), // grad_weights.data_ptr(), // importance_pdfs.data_ptr(), // // outputs // grad_alphas.data_ptr()); // return grad_alphas; // } ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/grid.py ================================================ """ Copyright (c) 2022 Ruilong Li @ UC Berkeley """ from typing import Callable, List, Union import torch import torch.nn as nn import nerfacc.cuda as _C from .contraction import ContractionType, contract_inv # TODO: check torch.scatter_reduce_ # from torch_scatter import scatter_max @torch.no_grad() def query_grid( samples: torch.Tensor, grid_roi: torch.Tensor, grid_values: torch.Tensor, grid_type: ContractionType, ): """Query grid values given coordinates. Args: samples: (n_samples, 3) tensor of coordinates. grid_roi: (6,) region of interest of the grid. Usually it should be accquired from the grid itself using `grid.roi_aabb`. grid_values: A 3D tensor of grid values in the shape of (resx, resy, resz). grid_type: Contraction type of the grid. Usually it should be accquired from the grid itself using `grid.contraction_type`. Returns: (n_samples) values for those samples queried from the grid. """ assert samples.dim() == 2 and samples.size(-1) == 3 assert grid_roi.dim() == 1 and grid_roi.size(0) == 6 assert grid_values.dim() == 3 assert isinstance(grid_type, ContractionType) return _C.grid_query( samples.contiguous(), grid_roi.contiguous(), grid_values.contiguous(), grid_type.to_cpp_version(), ) class Grid(nn.Module): """An abstract Grid class. The grid is used as a cache of the 3D space to indicate whether each voxel area is important or not for the differentiable rendering process. The ray marching function (see :func:`nerfacc.ray_marching`) would use the grid to skip the unimportant voxel areas. To work with :func:`nerfacc.ray_marching`, three attributes must exist: - :attr:`roi_aabb`: The axis-aligned bounding box of the region of interest. - :attr:`binary`: A 3D binarized tensor of shape {resx, resy, resz}, \ with torch.bool data type. - :attr:`contraction_type`: The contraction type of the grid, indicating how \ the 3D space is mapped to the grid. """ def __init__(self, *args, **kwargs): super().__init__() self.register_buffer("_dummy", torch.empty(0), persistent=False) @property def device(self) -> torch.device: return self._dummy.device @property def roi_aabb(self) -> torch.Tensor: """The axis-aligned bounding box of the region of interest. Its is a shape (6,) tensor in the format of {minx, miny, minz, maxx, maxy, maxz}. """ if hasattr(self, "_roi_aabb"): return getattr(self, "_roi_aabb") else: raise NotImplementedError("please set an attribute named _roi_aabb") @property def binary(self) -> torch.Tensor: """A 3D binarized tensor with torch.bool data type. The tensor is of shape (resx, resy, resz), in which each boolen value represents whether the corresponding voxel should be kept or not. """ if hasattr(self, "_binary"): return getattr(self, "_binary") else: raise NotImplementedError("please set an attribute named _binary") @property def contraction_type(self) -> ContractionType: """The contraction type of the grid. The contraction type is an indicator of how the 3D space is contracted to this voxel grid. See :class:`nerfacc.ContractionType` for more details. """ if hasattr(self, "_contraction_type"): return getattr(self, "_contraction_type") else: raise NotImplementedError( "please set an attribute named _contraction_type" ) class OccupancyGrid(Grid): """Occupancy grid: whether each voxel area is occupied or not. Args: roi_aabb: The axis-aligned bounding box of the region of interest. Useful for mapping the 3D space to the grid. resolution: The resolution of the grid. If an integer is given, the grid is assumed to be a cube. Otherwise, a list or a tensor of shape (3,) is expected. Default: 128. contraction_type: The contraction type of the grid. See :class:`nerfacc.ContractionType` for more details. Default: :attr:`nerfacc.ContractionType.AABB`. """ NUM_DIM: int = 3 def __init__( self, roi_aabb: Union[List[int], torch.Tensor], resolution: Union[int, List[int], torch.Tensor] = 128, contraction_type: ContractionType = ContractionType.AABB, ) -> None: super().__init__() if isinstance(resolution, int): resolution = [resolution] * self.NUM_DIM if isinstance(resolution, (list, tuple)): resolution = torch.tensor(resolution, dtype=torch.int32) assert isinstance( resolution, torch.Tensor ), f"Invalid type: {type(resolution)}" assert resolution.shape == ( self.NUM_DIM, ), f"Invalid shape: {resolution.shape}" if isinstance(roi_aabb, (list, tuple)): roi_aabb = torch.tensor(roi_aabb, dtype=torch.float32) assert isinstance( roi_aabb, torch.Tensor ), f"Invalid type: {type(roi_aabb)}" assert roi_aabb.shape == torch.Size( [self.NUM_DIM * 2] ), f"Invalid shape: {roi_aabb.shape}" # total number of voxels self.num_cells = int(resolution.prod().item()) # required attributes self.register_buffer("_roi_aabb", roi_aabb) self.register_buffer( "_binary", torch.zeros(resolution.tolist(), dtype=torch.bool) ) self._contraction_type = contraction_type # helper attributes self.register_buffer("resolution", resolution) self.register_buffer("occs", torch.zeros(self.num_cells)) # Grid coords & indices grid_coords = _meshgrid3d(resolution).reshape( self.num_cells, self.NUM_DIM ) self.register_buffer("grid_coords", grid_coords, persistent=False) grid_indices = torch.arange(self.num_cells) self.register_buffer("grid_indices", grid_indices, persistent=False) @torch.no_grad() def _get_all_cells(self) -> torch.Tensor: """Returns all cells of the grid.""" return self.grid_indices @torch.no_grad() def _sample_uniform_and_occupied_cells(self, n: int) -> torch.Tensor: """Samples both n uniform and occupied cells.""" uniform_indices = torch.randint( self.num_cells, (n,), device=self.device ) occupied_indices = torch.nonzero(self._binary.flatten())[:, 0] if n < len(occupied_indices): selector = torch.randint( len(occupied_indices), (n,), device=self.device ) occupied_indices = occupied_indices[selector] indices = torch.cat([uniform_indices, occupied_indices], dim=0) return indices @torch.no_grad() def _update( self, step: int, occ_eval_fn: Callable, occ_thre: float = 0.01, ema_decay: float = 0.95, warmup_steps: int = 256, ) -> None: """Update the occ field in the EMA way.""" # sample cells if step < warmup_steps: indices = self._get_all_cells() else: N = self.num_cells // 4 indices = self._sample_uniform_and_occupied_cells(N) # infer occupancy: density * step_size grid_coords = self.grid_coords[indices] x = ( grid_coords + torch.rand_like(grid_coords, dtype=torch.float32) ) / self.resolution if self._contraction_type == ContractionType.UN_BOUNDED_SPHERE: # only the points inside the sphere are valid mask = (x - 0.5).norm(dim=1) < 0.5 x = x[mask] indices = indices[mask] # voxel coordinates [0, 1]^3 -> world x = contract_inv( x, roi=self._roi_aabb, type=self._contraction_type, ) occ = occ_eval_fn(x).squeeze(-1) # ema update self.occs[indices] = torch.maximum(self.occs[indices] * ema_decay, occ) # suppose to use scatter max but emperically it is almost the same. # self.occs, _ = scatter_max( # occ, indices, dim=0, out=self.occs * ema_decay # ) self._binary = ( self.occs > torch.clamp(self.occs.mean(), max=occ_thre) ).view(self._binary.shape) @torch.no_grad() def every_n_step( self, step: int, occ_eval_fn: Callable, occ_thre: float = 1e-2, ema_decay: float = 0.95, warmup_steps: int = 256, n: int = 16, ) -> None: """Update the grid every n steps during training. Args: step: Current training step. occ_eval_fn: A function that takes in sample locations :math:`(N, 3)` and returns the occupancy values :math:`(N, 1)` at those locations. occ_thre: Threshold used to binarize the occupancy grid. Default: 1e-2. ema_decay: The decay rate for EMA updates. Default: 0.95. warmup_steps: Sample all cells during the warmup stage. After the warmup stage we change the sampling strategy to 1/4 uniformly sampled cells together with 1/4 occupied cells. Default: 256. n: Update the grid every n steps. Default: 16. """ if not self.training: raise RuntimeError( "You should only call this function only during training. " "Please call _update() directly if you want to update the " "field during inference." ) if step % n == 0 and self.training: self._update( step=step, occ_eval_fn=occ_eval_fn, occ_thre=occ_thre, ema_decay=ema_decay, warmup_steps=warmup_steps, ) @torch.no_grad() def query_occ(self, samples: torch.Tensor) -> torch.Tensor: """Query the occupancy field at the given samples. Args: samples: Samples in the world coordinates. (n_samples, 3) Returns: Occupancy values at the given samples. (n_samples,) """ return query_grid( samples, self._roi_aabb, self.binary, self.contraction_type, ) def _meshgrid3d( res: torch.Tensor, device: Union[torch.device, str] = "cpu" ) -> torch.Tensor: """Create 3D grid coordinates.""" assert len(res) == 3 res = res.tolist() return torch.stack( torch.meshgrid( [ torch.arange(res[0], dtype=torch.long), torch.arange(res[1], dtype=torch.long), torch.arange(res[2], dtype=torch.long), ], indexing="ij", ), dim=-1, ).to(device) ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/intersection.py ================================================ """ Copyright (c) 2022 Ruilong Li, UC Berkeley. """ from typing import Tuple import torch from torch import Tensor import nerfacc.cuda as _C @torch.no_grad() def ray_aabb_intersect( rays_o: Tensor, rays_d: Tensor, aabb: Tensor ) -> Tuple[Tensor, Tensor]: """Ray AABB Test. Note: this function is not differentiable to any inputs. Args: rays_o: Ray origins of shape (n_rays, 3). rays_d: Normalized ray directions of shape (n_rays, 3). aabb: Scene bounding box {xmin, ymin, zmin, xmax, ymax, zmax}. \ Tensor with shape (6) Returns: Ray AABB intersection {t_min, t_max} with shape (n_rays) respectively. \ Note the t_min is clipped to minimum zero. 1e10 means no intersection. Examples: .. code-block:: python aabb = torch.tensor([0.0, 0.0, 0.0, 1.0, 1.0, 1.0], device="cuda:0") rays_o = torch.rand((128, 3), device="cuda:0") rays_d = torch.randn((128, 3), device="cuda:0") rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True) t_min, t_max = ray_aabb_intersect(rays_o, rays_d, aabb) """ if rays_o.is_cuda and rays_d.is_cuda and aabb.is_cuda: rays_o = rays_o.contiguous() rays_d = rays_d.contiguous() aabb = aabb.contiguous() t_min, t_max = _C.ray_aabb_intersect(rays_o, rays_d, aabb) else: raise NotImplementedError("Only support cuda inputs.") return t_min, t_max ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/losses.py ================================================ from torch import Tensor from .pack import unpack_data def distortion( packed_info: Tensor, weights: Tensor, t_starts: Tensor, t_ends: Tensor ) -> Tensor: """Distortion loss from Mip-NeRF 360 paper, Equ. 15. Args: packed_info: Packed info for the samples. (n_rays, 2) weights: Weights for the samples. (all_samples,) t_starts: Per-sample start distance. Tensor with shape (all_samples, 1). t_ends: Per-sample end distance. Tensor with shape (all_samples, 1). Returns: Distortion loss. (n_rays,) """ # (all_samples, 1) -> (n_rays, n_samples) w = unpack_data(packed_info, weights[..., None]).squeeze(-1) t1 = unpack_data(packed_info, t_starts).squeeze(-1) t2 = unpack_data(packed_info, t_ends).squeeze(-1) interval = t2 - t1 tmid = (t1 + t2) / 2 loss_uni = (1 / 3) * (interval * w.pow(2)).sum(-1) ww = w.unsqueeze(-1) * w.unsqueeze(-2) mm = (tmid.unsqueeze(-1) - tmid.unsqueeze(-2)).abs() loss_bi = (ww * mm).sum((-1, -2)) return loss_uni + loss_bi ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/pack.py ================================================ """ Copyright (c) 2022 Ruilong Li, UC Berkeley. """ from typing import Optional, Tuple import torch from torch import Tensor import nerfacc.cuda as _C def pack_data(data: Tensor, mask: Tensor) -> Tuple[Tensor, Tensor]: """Pack per-ray data (n_rays, n_samples, D) to (all_samples, D) based on mask. Args: data: Tensor with shape (n_rays, n_samples, D). mask: Boolen tensor with shape (n_rays, n_samples). Returns: Tuple of Tensors including packed data (all_samples, D), \ and packed_info (n_rays, 2) which stores the start index of the sample, and the number of samples kept for each ray. \ Examples: .. code-block:: python data = torch.rand((10, 3, 4), device="cuda:0") mask = data.rand((10, 3), dtype=torch.bool, device="cuda:0") packed_data, packed_info = pack(data, mask) print(packed_data.shape, packed_info.shape) """ assert data.dim() == 3, "data must be with shape of (n_rays, n_samples, D)." assert ( mask.shape == data.shape[:2] ), "mask must be with shape of (n_rays, n_samples)." assert mask.dtype == torch.bool, "mask must be a boolean tensor." packed_data = data[mask] num_steps = mask.sum(dim=-1, dtype=torch.int32) cum_steps = num_steps.cumsum(dim=0, dtype=torch.int32) packed_info = torch.stack([cum_steps - num_steps, num_steps], dim=-1) return packed_data, packed_info @torch.no_grad() def pack_info(ray_indices: Tensor, n_rays: int = None) -> Tensor: """Pack `ray_indices` to `packed_info`. Useful for converting per sample data to per ray data. Note: this function is not differentiable to any inputs. Args: ray_indices: Ray index of each sample. LongTensor with shape (n_sample). Returns: packed_info: Stores information on which samples belong to the same ray. \ See :func:`nerfacc.ray_marching` for details. IntTensor with shape (n_rays, 2). """ assert ( ray_indices.dim() == 1 ), "ray_indices must be a 1D tensor with shape (n_samples)." if ray_indices.is_cuda: ray_indices = ray_indices device = ray_indices.device if n_rays is None: n_rays = int(ray_indices.max()) + 1 # else: # assert n_rays > ray_indices.max() src = torch.ones_like(ray_indices, dtype=torch.int) num_steps = torch.zeros((n_rays,), device=device, dtype=torch.int) num_steps.scatter_add_(0, ray_indices, src) cum_steps = num_steps.cumsum(dim=0, dtype=torch.int) packed_info = torch.stack([cum_steps - num_steps, num_steps], dim=-1) else: raise NotImplementedError("Only support cuda inputs.") return packed_info @torch.no_grad() def unpack_info(packed_info: Tensor, n_samples: int) -> Tensor: """Unpack `packed_info` to `ray_indices`. Useful for converting per ray data to per sample data. Note: this function is not differentiable to any inputs. Args: packed_info: Stores information on which samples belong to the same ray. \ See :func:`nerfacc.ray_marching` for details. IntTensor with shape (n_rays, 2). n_samples: Total number of samples. Returns: Ray index of each sample. LongTensor with shape (n_sample). Examples: .. code-block:: python rays_o = torch.rand((128, 3), device="cuda:0") rays_d = torch.randn((128, 3), device="cuda:0") rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True) # Ray marching with near far plane. packed_info, t_starts, t_ends = ray_marching( rays_o, rays_d, near_plane=0.1, far_plane=1.0, render_step_size=1e-3 ) # torch.Size([128, 2]) torch.Size([115200, 1]) torch.Size([115200, 1]) print(packed_info.shape, t_starts.shape, t_ends.shape) # Unpack per-ray info to per-sample info. ray_indices = unpack_info(packed_info, t_starts.shape[0]) # torch.Size([115200]) torch.int64 print(ray_indices.shape, ray_indices.dtype) """ assert ( packed_info.dim() == 2 and packed_info.shape[-1] == 2 ), "packed_info must be a 2D tensor with shape (n_rays, 2)." if packed_info.is_cuda: ray_indices = _C.unpack_info(packed_info.contiguous(), n_samples) else: raise NotImplementedError("Only support cuda inputs.") return ray_indices def unpack_data( packed_info: Tensor, data: Tensor, n_samples: Optional[int] = None, ) -> Tensor: """Unpack packed data (all_samples, D) to per-ray data (n_rays, n_samples, D). Args: packed_info (Tensor): Stores information on which samples belong to the same ray. \ See :func:`nerfacc.ray_marching` for details. Tensor with shape (n_rays, 2). data: Packed data to unpack. Tensor with shape (n_samples, D). n_samples (int): Optional Number of samples per ray. If not provided, it \ will be inferred from the packed_info. Returns: Unpacked data (n_rays, n_samples, D). Examples: .. code-block:: python rays_o = torch.rand((128, 3), device="cuda:0") rays_d = torch.randn((128, 3), device="cuda:0") rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True) # Ray marching with aabb. scene_aabb = torch.tensor([0.0, 0.0, 0.0, 1.0, 1.0, 1.0], device="cuda:0") packed_info, t_starts, t_ends = ray_marching( rays_o, rays_d, scene_aabb=scene_aabb, render_step_size=1e-2 ) print(t_starts.shape) # torch.Size([all_samples, 1]) t_starts = unpack_data(packed_info, t_starts, n_samples=1024) print(t_starts.shape) # torch.Size([128, 1024, 1]) """ assert ( packed_info.dim() == 2 and packed_info.shape[-1] == 2 ), "packed_info must be a 2D tensor with shape (n_rays, 2)." assert ( data.dim() == 2 ), "data must be a 2D tensor with shape (n_samples, D)." if n_samples is None: n_samples = packed_info[:, 1].max().item() return _UnpackData.apply(packed_info, data, n_samples) class _UnpackData(torch.autograd.Function): """Unpack packed data (all_samples, D) to per-ray data (n_rays, n_samples, D).""" @staticmethod def forward(ctx, packed_info: Tensor, data: Tensor, n_samples: int): # shape of the data should be (all_samples, D) packed_info = packed_info.contiguous() data = data.contiguous() if ctx.needs_input_grad[1]: ctx.save_for_backward(packed_info) ctx.n_samples = n_samples return _C.unpack_data(packed_info, data, n_samples) @staticmethod def backward(ctx, grad: Tensor): # shape of the grad should be (n_rays, n_samples, D) packed_info = ctx.saved_tensors[0] n_samples = ctx.n_samples mask = _C.unpack_info_to_mask(packed_info, n_samples) packed_grad = grad[mask].contiguous() return None, packed_grad, None ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/ray_marching.py ================================================ from typing import Callable, Optional, Tuple import torch import nerfacc.cuda as _C from .contraction import ContractionType from .grid import Grid from .intersection import ray_aabb_intersect from .vol_rendering import render_visibility @torch.no_grad() def ray_marching( # rays rays_o: torch.Tensor, rays_d: torch.Tensor, t_min: Optional[torch.Tensor] = None, t_max: Optional[torch.Tensor] = None, # bounding box of the scene scene_aabb: Optional[torch.Tensor] = None, # binarized grid for skipping empty space grid: Optional[Grid] = None, # sigma/alpha function for skipping invisible space sigma_fn: Optional[Callable] = None, alpha_fn: Optional[Callable] = None, early_stop_eps: float = 1e-4, alpha_thre: float = 0.0, # rendering options near_plane: Optional[float] = None, far_plane: Optional[float] = None, render_step_size: float = 1e-3, stratified: bool = False, cone_angle: float = 0.0, ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """Ray marching with space skipping. Note: The logic for computing `t_min` and `t_max`: 1. If `t_min` and `t_max` are given, use them with highest priority. 2. If `t_min` and `t_max` are not given, but `scene_aabb` is given, use \ :func:`ray_aabb_intersect` to compute `t_min` and `t_max`. 3. If `t_min` and `t_max` are not given, and `scene_aabb` is not given, \ set `t_min` to 0.0, and `t_max` to 1e10. (the case of unbounded scene) 4. Always clip `t_min` with `near_plane` and `t_max` with `far_plane` if given. Warning: This function is not differentiable to any inputs. Args: rays_o: Ray origins of shape (n_rays, 3). rays_d: Normalized ray directions of shape (n_rays, 3). t_min: Optional. Per-ray minimum distance. Tensor with shape (n_rays). t_max: Optional. Per-ray maximum distance. Tensor with shape (n_rays). scene_aabb: Optional. Scene bounding box for computing t_min and t_max. A tensor with shape (6,) {xmin, ymin, zmin, xmax, ymax, zmax}. `scene_aabb` will be ignored if both `t_min` and `t_max` are provided. grid: Optional. Grid that idicates where to skip during marching. See :class:`nerfacc.Grid` for details. sigma_fn: Optional. If provided, the marching will skip the invisible space by evaluating the density along the ray with `sigma_fn`. It should be a function that takes in samples {t_starts (N, 1), t_ends (N, 1), ray indices (N,)} and returns the post-activation density values (N, 1). You should only provide either `sigma_fn` or `alpha_fn`. alpha_fn: Optional. If provided, the marching will skip the invisible space by evaluating the density along the ray with `alpha_fn`. It should be a function that takes in samples {t_starts (N, 1), t_ends (N, 1), ray indices (N,)} and returns the post-activation opacity values (N, 1). You should only provide either `sigma_fn` or `alpha_fn`. early_stop_eps: Early stop threshold for skipping invisible space. Default: 1e-4. alpha_thre: Alpha threshold for skipping empty space. Default: 0.0. near_plane: Optional. Near plane distance. If provided, it will be used to clip t_min. far_plane: Optional. Far plane distance. If provided, it will be used to clip t_max. render_step_size: Step size for marching. Default: 1e-3. stratified: Whether to use stratified sampling. Default: False. cone_angle: Cone angle for linearly-increased step size. 0. means constant step size. Default: 0.0. Returns: A tuple of tensors. - **ray_indices**: Ray index of each sample. IntTensor with shape (n_samples). - **t_starts**: Per-sample start distance. Tensor with shape (n_samples, 1). - **t_ends**: Per-sample end distance. Tensor with shape (n_samples, 1). Examples: .. code-block:: python import torch from nerfacc import OccupancyGrid, ray_marching, unpack_info device = "cuda:0" batch_size = 128 rays_o = torch.rand((batch_size, 3), device=device) rays_d = torch.randn((batch_size, 3), device=device) rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True) # Ray marching with near far plane. ray_indices, t_starts, t_ends = ray_marching( rays_o, rays_d, near_plane=0.1, far_plane=1.0, render_step_size=1e-3 ) # Ray marching with aabb. scene_aabb = torch.tensor([0.0, 0.0, 0.0, 1.0, 1.0, 1.0], device=device) ray_indices, t_starts, t_ends = ray_marching( rays_o, rays_d, scene_aabb=scene_aabb, render_step_size=1e-3 ) # Ray marching with per-ray t_min and t_max. t_min = torch.zeros((batch_size,), device=device) t_max = torch.ones((batch_size,), device=device) ray_indices, t_starts, t_ends = ray_marching( rays_o, rays_d, t_min=t_min, t_max=t_max, render_step_size=1e-3 ) # Ray marching with aabb and skip areas based on occupancy grid. scene_aabb = torch.tensor([0.0, 0.0, 0.0, 1.0, 1.0, 1.0], device=device) grid = OccupancyGrid(roi_aabb=[0.0, 0.0, 0.0, 0.5, 0.5, 0.5]).to(device) ray_indices, t_starts, t_ends = ray_marching( rays_o, rays_d, scene_aabb=scene_aabb, grid=grid, render_step_size=1e-3 ) # Convert t_starts and t_ends to sample locations. t_mid = (t_starts + t_ends) / 2.0 sample_locs = rays_o[ray_indices] + t_mid * rays_d[ray_indices] """ if not rays_o.is_cuda: raise NotImplementedError("Only support cuda inputs.") if alpha_fn is not None and sigma_fn is not None: raise ValueError( "Only one of `alpha_fn` and `sigma_fn` should be provided." ) # logic for t_min and t_max: # 1. if t_min and t_max are given, use them with highest priority. # 2. if t_min and t_max are not given, but scene_aabb is given, use # ray_aabb_intersect to compute t_min and t_max. # 3. if t_min and t_max are not given, and scene_aabb is not given, # set t_min to 0.0, and t_max to 1e10. (the case of unbounded scene) # 4. always clip t_min with near_plane and t_max with far_plane if given. if t_min is None or t_max is None: if scene_aabb is not None: t_min, t_max = ray_aabb_intersect(rays_o, rays_d, scene_aabb) else: t_min = torch.zeros_like(rays_o[..., 0]) t_max = torch.ones_like(rays_o[..., 0]) * 1e10 if near_plane is not None: t_min = torch.clamp(t_min, min=near_plane) if far_plane is not None: t_max = torch.clamp(t_max, max=far_plane) # stratified sampling: prevent overfitting during training if stratified: t_min = t_min + torch.rand_like(t_min) * render_step_size # use grid for skipping if given if grid is not None: grid_roi_aabb = grid.roi_aabb grid_binary = grid.binary contraction_type = grid.contraction_type.to_cpp_version() else: grid_roi_aabb = torch.tensor( [-1e10, -1e10, -1e10, 1e10, 1e10, 1e10], dtype=torch.float32, device=rays_o.device, ) grid_binary = torch.ones( [1, 1, 1], dtype=torch.bool, device=rays_o.device ) contraction_type = ContractionType.AABB.to_cpp_version() # marching with grid-based skipping packed_info, ray_indices, t_starts, t_ends = _C.ray_marching( # rays rays_o.contiguous(), rays_d.contiguous(), t_min.contiguous(), t_max.contiguous(), # coontraction and grid grid_roi_aabb.contiguous(), grid_binary.contiguous(), contraction_type, # sampling render_step_size, cone_angle, ) # skip invisible space if sigma_fn is not None or alpha_fn is not None: # Query sigma without gradients if sigma_fn is not None: sigmas = sigma_fn(t_starts, t_ends, ray_indices) assert ( sigmas.shape == t_starts.shape ), "sigmas must have shape of (N, 1)! Got {}".format(sigmas.shape) alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts)) elif alpha_fn is not None: alphas = alpha_fn(t_starts, t_ends, ray_indices) assert ( alphas.shape == t_starts.shape ), "alphas must have shape of (N, 1)! Got {}".format(alphas.shape) # Compute visibility of the samples, and filter out invisible samples masks = render_visibility( alphas, ray_indices=ray_indices, packed_info=packed_info, early_stop_eps=early_stop_eps, alpha_thre=alpha_thre, n_rays=rays_o.shape[0], ) ray_indices, t_starts, t_ends = ( ray_indices[masks], t_starts[masks], t_ends[masks], ) return ray_indices, t_starts, t_ends ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/sampling.py ================================================ import math from typing import Callable, Optional, Tuple, Union, overload import torch import nerfacc.cuda as _C from .cdf import ray_resampling from .grid import Grid from .pack import pack_info, unpack_info from .vol_rendering import ( render_transmittance_from_alpha, render_weight_from_density, ) @overload def sample_along_rays( rays_o: torch.Tensor, # [n_rays, 3] rays_d: torch.Tensor, # [n_rays, 3] t_min: torch.Tensor, # [n_rays,] t_max: torch.Tensor, # [n_rays,] step_size: float, cone_angle: float = 0.0, grid: Optional[Grid] = None, ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """Sample along rays with per-ray min max.""" ... @overload def sample_along_rays( rays_o: torch.Tensor, # [n_rays, 3] rays_d: torch.Tensor, # [n_rays, 3] t_min: float, t_max: float, step_size: float, cone_angle: float = 0.0, ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """Sample along rays with near far plane.""" ... @torch.no_grad() def sample_along_rays( rays_o: torch.Tensor, # [n_rays, 3] rays_d: torch.Tensor, # [n_rays, 3] t_min: Union[float, torch.Tensor], # [n_rays,] t_max: Union[float, torch.Tensor], # [n_rays,] step_size: float, cone_angle: float = 0.0, grid: Optional[Grid] = None, ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """Sample intervals along rays.""" if isinstance(t_min, float) and isinstance(t_max, float): n_rays = rays_o.shape[0] device = rays_o.device num_steps = math.floor((t_max - t_min) / step_size) t_starts = ( (t_min + torch.arange(0, num_steps, device=device) * step_size) .expand(n_rays, -1) .reshape(-1, 1) ) t_ends = t_starts + step_size ray_indices = torch.arange(0, n_rays, device=device).repeat_interleave( num_steps, dim=0 ) else: if grid is None: packed_info, ray_indices, t_starts, t_ends = _C.ray_marching( # rays t_min.contiguous(), t_max.contiguous(), # sampling step_size, cone_angle, ) else: ( packed_info, ray_indices, t_starts, t_ends, ) = _C.ray_marching_with_grid( # rays rays_o.contiguous(), rays_d.contiguous(), t_min.contiguous(), t_max.contiguous(), # coontraction and grid grid.roi_aabb.contiguous(), grid.binary.contiguous(), grid.contraction_type.to_cpp_version(), # sampling step_size, cone_angle, ) return ray_indices, t_starts, t_ends @torch.no_grad() def proposal_sampling_with_filter( t_starts: torch.Tensor, # [n_samples, 1] t_ends: torch.Tensor, # [n_samples, 1] ray_indices: torch.Tensor, # [n_samples,] n_rays: Optional[int] = None, # compute density of samples: {t_starts, t_ends, ray_indices} -> density sigma_fn: Optional[Callable] = None, # proposal density fns: {t_starts, t_ends, ray_indices} -> density proposal_sigma_fns: Tuple[Callable, ...] = [], proposal_n_samples: Tuple[int, ...] = [], proposal_require_grads: bool = False, # acceleration options early_stop_eps: float = 1e-4, alpha_thre: float = 0.0, ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: """Hueristic marching with proposal fns.""" assert len(proposal_sigma_fns) == len(proposal_n_samples), ( "proposal_sigma_fns and proposal_n_samples must have the same length, " f"but got {len(proposal_sigma_fns)} and {len(proposal_n_samples)}." ) if n_rays is None: n_rays = ray_indices.max() + 1 # compute density from proposal fns proposal_samples = [] for proposal_fn, n_samples in zip(proposal_sigma_fns, proposal_n_samples): # compute weights for resampling sigmas = proposal_fn(t_starts, t_ends, ray_indices) assert ( sigmas.shape == t_starts.shape ), "sigmas must have shape of (N, 1)! Got {}".format(sigmas.shape) alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts)) transmittance = render_transmittance_from_alpha( alphas, ray_indices=ray_indices, n_rays=n_rays ) weights = alphas * transmittance # Compute visibility for filtering if alpha_thre > 0 or early_stop_eps > 0: vis = (alphas >= alpha_thre) & (transmittance >= early_stop_eps) vis = vis.squeeze(-1) ray_indices, t_starts, t_ends, weights = ( ray_indices[vis], t_starts[vis], t_ends[vis], weights[vis], ) packed_info = pack_info(ray_indices, n_rays=n_rays) # Rerun the proposal function **with** gradients on filtered samples. if proposal_require_grads: with torch.enable_grad(): sigmas = proposal_fn(t_starts, t_ends, ray_indices) weights = render_weight_from_density( t_starts, t_ends, sigmas, ray_indices=ray_indices ) proposal_samples.append( (packed_info, t_starts, t_ends, weights) ) # resampling on filtered samples packed_info, t_starts, t_ends = ray_resampling( packed_info, t_starts, t_ends, weights, n_samples=n_samples ) ray_indices = unpack_info(packed_info, t_starts.shape[0]) # last round filtering with sigma_fn if (alpha_thre > 0 or early_stop_eps > 0) and (sigma_fn is not None): sigmas = sigma_fn(t_starts, t_ends, ray_indices) assert ( sigmas.shape == t_starts.shape ), "sigmas must have shape of (N, 1)! Got {}".format(sigmas.shape) alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts)) transmittance = render_transmittance_from_alpha( alphas, ray_indices=ray_indices, n_rays=n_rays ) vis = (alphas >= alpha_thre) & (transmittance >= early_stop_eps) vis = vis.squeeze(-1) ray_indices, t_starts, t_ends = ( ray_indices[vis], t_starts[vis], t_ends[vis], ) return ray_indices, t_starts, t_ends, proposal_samples ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/version.py ================================================ """ Copyright (c) 2022 Ruilong Li, UC Berkeley. """ __version__ = "0.3.5" ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/vol_rendering.py ================================================ """ Copyright (c) 2022 Ruilong Li, UC Berkeley. """ from typing import Callable, Optional, Tuple import torch from torch import Tensor import nerfacc.cuda as _C from .pack import pack_info def rendering( # ray marching results t_starts: torch.Tensor, t_ends: torch.Tensor, ray_indices: torch.Tensor, n_rays: int, # radiance field rgb_sigma_fn: Optional[Callable] = None, rgb_alpha_fn: Optional[Callable] = None, # rendering options render_bkgd: Optional[torch.Tensor] = None, ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """Render the rays through the radience field defined by `rgb_sigma_fn`. This function is differentiable to the outputs of `rgb_sigma_fn` so it can be used for gradient-based optimization. Note: Either `rgb_sigma_fn` or `rgb_alpha_fn` should be provided. Warning: This function is not differentiable to `t_starts`, `t_ends` and `ray_indices`. Args: t_starts: Per-sample start distance. Tensor with shape (n_samples, 1). t_ends: Per-sample end distance. Tensor with shape (n_samples, 1). ray_indices: Ray index of each sample. IntTensor with shape (n_samples). n_rays: Total number of rays. This will decide the shape of the ouputs. rgb_sigma_fn: A function that takes in samples {t_starts (N, 1), t_ends (N, 1), \ ray indices (N,)} and returns the post-activation rgb (N, 3) and density \ values (N, 1). rgb_alpha_fn: A function that takes in samples {t_starts (N, 1), t_ends (N, 1), \ ray indices (N,)} and returns the post-activation rgb (N, 3) and opacity \ values (N, 1). render_bkgd: Optional. Background color. Tensor with shape (3,). Returns: Ray colors (n_rays, 3), opacities (n_rays, 1) and depths (n_rays, 1). Examples: .. code-block:: python >>> rays_o = torch.rand((128, 3), device="cuda:0") >>> rays_d = torch.randn((128, 3), device="cuda:0") >>> rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True) >>> ray_indices, t_starts, t_ends = ray_marching( >>> rays_o, rays_d, near_plane=0.1, far_plane=1.0, render_step_size=1e-3) >>> def rgb_sigma_fn(t_starts, t_ends, ray_indices): >>> # This is a dummy function that returns random values. >>> rgbs = torch.rand((t_starts.shape[0], 3), device="cuda:0") >>> sigmas = torch.rand((t_starts.shape[0], 1), device="cuda:0") >>> return rgbs, sigmas >>> colors, opacities, depths = rendering( >>> t_starts, t_ends, ray_indices, n_rays=128, rgb_sigma_fn=rgb_sigma_fn) >>> print(colors.shape, opacities.shape, depths.shape) torch.Size([128, 3]) torch.Size([128, 1]) torch.Size([128, 1]) """ if rgb_sigma_fn is None and rgb_alpha_fn is None: raise ValueError( "At least one of `rgb_sigma_fn` and `rgb_alpha_fn` should be specified." ) # Query sigma/alpha and color with gradients if rgb_sigma_fn is not None: rgbs, sigmas = rgb_sigma_fn(t_starts, t_ends, ray_indices) assert rgbs.shape[-1] == 3, "rgbs must have 3 channels, got {}".format( rgbs.shape ) assert ( sigmas.shape == t_starts.shape ), "sigmas must have shape of (N, 1)! Got {}".format(sigmas.shape) # Rendering: compute weights. weights = render_weight_from_density( t_starts, t_ends, sigmas, ray_indices=ray_indices, n_rays=n_rays, ) elif rgb_alpha_fn is not None: rgbs, alphas = rgb_alpha_fn(t_starts, t_ends, ray_indices) assert rgbs.shape[-1] == 3, "rgbs must have 3 channels, got {}".format( rgbs.shape ) assert ( alphas.shape == t_starts.shape ), "alphas must have shape of (N, 1)! Got {}".format(alphas.shape) # Rendering: compute weights. weights = render_weight_from_alpha( alphas, ray_indices=ray_indices, n_rays=n_rays, ) # Rendering: accumulate rgbs, opacities, and depths along the rays. colors = accumulate_along_rays( weights, ray_indices, values=rgbs, n_rays=n_rays ) opacities = accumulate_along_rays( weights, ray_indices, values=None, n_rays=n_rays ) depths = accumulate_along_rays( weights, ray_indices, values=(t_starts + t_ends) / 2.0, n_rays=n_rays, ) # Background composition. if render_bkgd is not None: colors = colors + render_bkgd * (1.0 - opacities) return colors, opacities, depths def accumulate_along_rays( weights: Tensor, ray_indices: Tensor, values: Optional[Tensor] = None, n_rays: Optional[int] = None, ) -> Tensor: """Accumulate volumetric values along the ray. Note: This function is only differentiable to `weights` and `values`. Args: weights: Volumetric rendering weights for those samples. Tensor with shape \ (n_samples,). ray_indices: Ray index of each sample. LongTensor with shape (n_samples). values: The values to be accmulated. Tensor with shape (n_samples, D). If \ None, the accumulated values are just weights. Default is None. n_rays: Total number of rays. This will decide the shape of the ouputs. If \ None, it will be inferred from `ray_indices.max() + 1`. If specified \ it should be at least larger than `ray_indices.max()`. Default is None. Returns: Accumulated values with shape (n_rays, D). If `values` is not given then we return \ the accumulated weights, in which case D == 1. Examples: .. code-block:: python # Rendering: accumulate rgbs, opacities, and depths along the rays. colors = accumulate_along_rays(weights, ray_indices, values=rgbs, n_rays=n_rays) opacities = accumulate_along_rays(weights, ray_indices, values=None, n_rays=n_rays) depths = accumulate_along_rays( weights, ray_indices, values=(t_starts + t_ends) / 2.0, n_rays=n_rays, ) # (n_rays, 3), (n_rays, 1), (n_rays, 1) print(colors.shape, opacities.shape, depths.shape) """ assert ray_indices.dim() == 1 and weights.dim() == 2 if not weights.is_cuda: raise NotImplementedError("Only support cuda inputs.") if values is not None: assert ( values.dim() == 2 and values.shape[0] == weights.shape[0] ), "Invalid shapes: {} vs {}".format(values.shape, weights.shape) src = weights * values else: src = weights if ray_indices.numel() == 0: assert n_rays is not None return torch.zeros((n_rays, src.shape[-1]), device=weights.device) if n_rays is None: n_rays = int(ray_indices.max()) + 1 # assert n_rays > ray_indices.max() index = ray_indices[:, None].expand(-1, src.shape[-1]) outputs = torch.zeros( (n_rays, src.shape[-1]), device=src.device, dtype=src.dtype ) outputs.scatter_add_(0, index, src) return outputs def accumulate_along_rays_importance( weights: Tensor, ray_indices: Tensor, values: Optional[Tensor] = None, n_rays: Optional[int] = None, ) -> Tensor: """Accumulate volumetric values along the ray. Note: This function is only differentiable to `weights` and `values`. Args: weights: Volumetric rendering weights for those samples. Tensor with shape \ (n_samples,). ray_indices: Ray index of each sample. LongTensor with shape (n_samples). values: The values to be accmulated. Tensor with shape (n_samples, D). If \ None, the accumulated values are just weights. Default is None. n_rays: Total number of rays. This will decide the shape of the ouputs. If \ None, it will be inferred from `ray_indices.max() + 1`. If specified \ it should be at least larger than `ray_indices.max()`. Default is None. Returns: Accumulated values with shape (n_rays, D). If `values` is not given then we return \ the accumulated weights, in which case D == 1. Examples: .. code-block:: python # Rendering: accumulate rgbs, opacities, and depths along the rays. colors = accumulate_along_rays(weights, ray_indices, values=rgbs, n_rays=n_rays) opacities = accumulate_along_rays(weights, ray_indices, values=None, n_rays=n_rays) depths = accumulate_along_rays( weights, ray_indices, values=(t_starts + t_ends) / 2.0, n_rays=n_rays, ) # (n_rays, 3), (n_rays, 1), (n_rays, 1) print(colors.shape, opacities.shape, depths.shape) """ assert ray_indices.dim() == 1 and weights.dim() == 2 if not weights.is_cuda: raise NotImplementedError("Only support cuda inputs.") if values is not None: assert ( values.dim() == 2 and values.shape[0] == weights.shape[0] ), "Invalid shapes: {} vs {}".format(values.shape, weights.shape) src = weights * values else: src = weights if ray_indices.numel() == 0: assert n_rays is not None return torch.zeros((n_rays, src.shape[-1]), device=weights.device) if n_rays is None: n_rays = int(ray_indices.max()) + 1 # assert n_rays > ray_indices.max() index = ray_indices[:, None].expand(-1, src.shape[-1]) outputs = torch.zeros( (n_rays, src.shape[-1]), device=src.device, dtype=src.dtype ) outputs.scatter_add_(0, index, src) return outputs def accumulate_along_rays_patch_based( weights: Tensor, ray_indices: Tensor, values: Optional[Tensor] = None, n_patches: Optional[int] = None, ) -> Tensor: """Accumulate volumetric values along the ray. Note: This function is only differentiable to `weights` and `values`. Args: weights: Volumetric rendering weights for those samples. Tensor with shape \ (n_samples,). ray_indices: Ray index of each sample. LongTensor with shape (n_samples). values: The values to be accmulated. Tensor with shape (n_samples, D). If \ None, the accumulated values are just weights. Default is None. n_rays: Total number of rays. This will decide the shape of the ouputs. If \ None, it will be inferred from `ray_indices.max() + 1`. If specified \ it should be at least larger than `ray_indices.max()`. Default is None. Returns: Accumulated values with shape (n_rays, D). If `values` is not given then we return \ the accumulated weights, in which case D == 1. Examples: .. code-block:: python # Rendering: accumulate rgbs, opacities, and depths along the rays. colors = accumulate_along_rays(weights, ray_indices, values=rgbs, n_rays=n_rays) opacities = accumulate_along_rays(weights, ray_indices, values=None, n_rays=n_rays) depths = accumulate_along_rays( weights, ray_indices, values=(t_starts + t_ends) / 2.0, n_rays=n_rays, ) # (n_rays, 3), (n_rays, 1), (n_rays, 1) print(colors.shape, opacities.shape, depths.shape) """ assert ray_indices.dim() == 1 and weights.dim() == 3 # (num_samples, patch_size, 1) if not weights.is_cuda: raise NotImplementedError("Only support cuda inputs.") if values is not None: assert ( values.dim() == 3 and values.shape[0] == weights.shape[0] ), "Invalid shapes: {} vs {}".format(values.shape, weights.shape) src = weights * values else: src = weights if ray_indices.numel() == 0: assert n_patches is not None return torch.zeros((n_patches, src.shape[1], src.shape[-1]), device=weights.device) if n_patches is None: n_patches = int(ray_indices.max()) + 1 # assert n_rays > ray_indices.max() index = ray_indices[:, None, None].expand(-1, src.shape[1], src.shape[-1]) outputs = torch.zeros( (n_patches, src.shape[1], src.shape[-1]), device=src.device, dtype=src.dtype ) outputs.scatter_add_(0, index, src) return outputs def render_transmittance_from_density( t_starts: Tensor, t_ends: Tensor, sigmas: Tensor, *, packed_info: Optional[torch.Tensor] = None, ray_indices: Optional[torch.Tensor] = None, n_rays: Optional[int] = None, ) -> Tensor: """Compute transmittance :math:`T_i` from density :math:`\\sigma_i`. .. math:: T_i = exp(-\\sum_{j=1}^{i-1}\\sigma_j\delta_j) Note: Either `ray_indices` or `packed_info` should be provided. If `ray_indices` is provided, CUB acceleration will be used if available (CUDA >= 11.6). Otherwise, we will use the naive implementation with `packed_info`. Args: t_starts: Where the frustum-shape sample starts along a ray. Tensor with \ shape (n_samples, 1). t_ends: Where the frustum-shape sample ends along a ray. Tensor with \ shape (n_samples, 1). sigmas: The density values of the samples. Tensor with shape (n_samples, 1). packed_info: Optional. Stores information on which samples belong to the same ray. \ See :func:`nerfacc.ray_marching` for details. LongTensor with shape (n_rays, 2). ray_indices: Optional. Ray index of each sample. LongTensor with shape (n_sample). n_rays: Optional. Number of rays. Only useful when `ray_indices` is provided yet \ CUB acceleration is not available. We will implicitly convert `ray_indices` to \ `packed_info` and use the naive implementation. If not provided, we will infer \ it from `ray_indices` but it will be slower. Returns: The rendering transmittance. Tensor with shape (n_sample, 1). Examples: .. code-block:: python >>> t_starts = torch.tensor( >>> [[0.0], [1.0], [2.0], [3.0], [4.0], [5.0], [6.0]], device="cuda") >>> t_ends = torch.tensor( >>> [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0], [7.0]], device="cuda") >>> sigmas = torch.tensor( >>> [[0.4], [0.8], [0.1], [0.8], [0.1], [0.0], [0.9]], device="cuda") >>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device="cuda") >>> transmittance = render_transmittance_from_density( >>> t_starts, t_ends, sigmas, ray_indices=ray_indices) [[1.00], [0.67], [0.30], [1.00], [0.45], [1.00], [1.00]] """ assert ( ray_indices is not None or packed_info is not None ), "Either ray_indices or packed_info should be provided." if ray_indices is not None and _C.is_cub_available(): transmittance = _RenderingTransmittanceFromDensityCUB.apply( ray_indices, t_starts, t_ends, sigmas ) else: if packed_info is None: packed_info = pack_info(ray_indices, n_rays=n_rays) transmittance = _RenderingTransmittanceFromDensityNaive.apply( packed_info, t_starts, t_ends, sigmas ) return transmittance def render_transmittance_from_alpha( alphas: Tensor, *, packed_info: Optional[torch.Tensor] = None, ray_indices: Optional[torch.Tensor] = None, n_rays: Optional[int] = None, ) -> Tensor: """Compute transmittance :math:`T_i` from alpha :math:`\\alpha_i`. .. math:: T_i = \\prod_{j=1}^{i-1}(1-\\alpha_j) Note: Either `ray_indices` or `packed_info` should be provided. If `ray_indices` is provided, CUB acceleration will be used if available (CUDA >= 11.6). Otherwise, we will use the naive implementation with `packed_info`. Args: alphas: The opacity values of the samples. Tensor with shape (n_samples, 1). packed_info: Optional. Stores information on which samples belong to the same ray. \ See :func:`nerfacc.ray_marching` for details. LongTensor with shape (n_rays, 2). ray_indices: Optional. Ray index of each sample. LongTensor with shape (n_sample). n_rays: Optional. Number of rays. Only useful when `ray_indices` is provided yet \ CUB acceleration is not available. We will implicitly convert `ray_indices` to \ `packed_info` and use the naive implementation. If not provided, we will infer \ it from `ray_indices` but it will be slower. Returns: The rendering transmittance. Tensor with shape (n_sample, 1). Examples: .. code-block:: python >>> alphas = torch.tensor( >>> [[0.4], [0.8], [0.1], [0.8], [0.1], [0.0], [0.9]], device="cuda")) >>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device="cuda") >>> transmittance = render_transmittance_from_alpha(alphas, ray_indices=ray_indices) tensor([[1.0], [0.6], [0.12], [1.0], [0.2], [1.0], [1.0]]) """ assert ( ray_indices is not None or packed_info is not None ), "Either ray_indices or packed_info should be provided." if ray_indices is not None and _C.is_cub_available(): transmittance = _RenderingTransmittanceFromAlphaCUB.apply( ray_indices, alphas ) else: if packed_info is None: packed_info = pack_info(ray_indices, n_rays=n_rays) transmittance = _RenderingTransmittanceFromAlphaNaive.apply( packed_info, alphas ) return transmittance def render_weight_from_density( t_starts: Tensor, t_ends: Tensor, sigmas: Tensor, *, packed_info: Optional[torch.Tensor] = None, ray_indices: Optional[torch.Tensor] = None, n_rays: Optional[int] = None, ) -> torch.Tensor: """Compute rendering weights :math:`w_i` from density :math:`\\sigma_i` and interval :math:`\\delta_i`. .. math:: w_i = T_i(1 - exp(-\\sigma_i\delta_i)), \\quad\\textrm{where}\\quad T_i = exp(-\\sum_{j=1}^{i-1}\\sigma_j\delta_j) Note: Either `ray_indices` or `packed_info` should be provided. If `ray_indices` is provided, CUB acceleration will be used if available (CUDA >= 11.6). Otherwise, we will use the naive implementation with `packed_info`. Args: t_starts: Where the frustum-shape sample starts along a ray. Tensor with \ shape (n_samples, 1). t_ends: Where the frustum-shape sample ends along a ray. Tensor with \ shape (n_samples, 1). sigmas: The density values of the samples. Tensor with shape (n_samples, 1). packed_info: Optional. Stores information on which samples belong to the same ray. \ See :func:`nerfacc.ray_marching` for details. LongTensor with shape (n_rays, 2). ray_indices: Optional. Ray index of each sample. LongTensor with shape (n_sample). n_rays: Optional. Number of rays. Only useful when `ray_indices` is provided yet \ CUB acceleration is not available. We will implicitly convert `ray_indices` to \ `packed_info` and use the naive implementation. If not provided, we will infer \ it from `ray_indices` but it will be slower. Returns: The rendering weights. Tensor with shape (n_sample, 1). Examples: .. code-block:: python >>> t_starts = torch.tensor( >>> [[0.0], [1.0], [2.0], [3.0], [4.0], [5.0], [6.0]], device="cuda") >>> t_ends = torch.tensor( >>> [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0], [7.0]], device="cuda") >>> sigmas = torch.tensor( >>> [[0.4], [0.8], [0.1], [0.8], [0.1], [0.0], [0.9]], device="cuda") >>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device="cuda") >>> weights = render_weight_from_density( >>> t_starts, t_ends, sigmas, ray_indices=ray_indices) [[0.33], [0.37], [0.03], [0.55], [0.04], [0.00], [0.59]] """ assert ( ray_indices is not None or packed_info is not None ), "Either ray_indices or packed_info should be provided." if ray_indices is not None and _C.is_cub_available(): transmittance = _RenderingTransmittanceFromDensityCUB.apply( ray_indices, t_starts, t_ends, sigmas ) alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts)) weights = transmittance * alphas else: if packed_info is None: packed_info = pack_info(ray_indices, n_rays=n_rays) weights = _RenderingWeightFromDensityNaive.apply( packed_info, t_starts, t_ends, sigmas ) return weights def render_weight_from_alpha_patch_based( alphas: Tensor, ray_indices: Tensor, *, # packed_info: Optional[torch.Tensor] = None n_rays: Optional[int] = None, ) -> torch.Tensor: """Compute rendering weights :math:`w_i` from opacity :math:`\\alpha_i`. .. math:: w_i = T_i\\alpha_i, \\quad\\textrm{where}\\quad T_i = \\prod_{j=1}^{i-1}(1-\\alpha_j) Note: Either `ray_indices` or `packed_info` should be provided. If `ray_indices` is provided, CUB acceleration will be used if available (CUDA >= 11.6). Otherwise, we will use the naive implementation with `packed_info`. Args: alphas: The opacity values of the samples. Tensor with shape (n_samples, 1). packed_info: Optional. Stores information on which samples belong to the same ray. \ See :func:`nerfacc.ray_marching` for details. LongTensor with shape (n_rays, 2). ray_indices: Optional. Ray index of each sample. LongTensor with shape (n_sample). n_rays: Optional. Number of rays. Only useful when `ray_indices` is provided yet \ CUB acceleration is not available. We will implicitly convert `ray_indices` to \ `packed_info` and use the naive implementation. If not provided, we will infer \ it from `ray_indices` but it will be slower. Returns: The rendering weights. Tensor with shape (n_sample, 1). Examples: .. code-block:: python >>> alphas = torch.tensor( >>> [[0.4], [0.8], [0.1], [0.8], [0.1], [0.0], [0.9]], device="cuda")) >>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device="cuda") >>> weights = render_weight_from_alpha(alphas, ray_indices=ray_indices) tensor([[0.4], [0.48], [0.012], [0.8], [0.02], [0.0], [0.9]]) """ packed_info = pack_info(ray_indices, n_rays=n_rays) weights = _RenderingWeightFromAlphaPatchBasedNaive.apply(packed_info, alphas) return weights def render_weight_and_transmittance_from_alpha_patch_based( alphas: Tensor, ray_indices: Tensor, *, # packed_info: Optional[torch.Tensor] = None n_rays: Optional[int] = None, ) -> torch.Tensor: """Compute rendering weights :math:`w_i` from opacity :math:`\\alpha_i`. .. math:: w_i = T_i\\alpha_i, \\quad\\textrm{where}\\quad T_i = \\prod_{j=1}^{i-1}(1-\\alpha_j) Note: Either `ray_indices` or `packed_info` should be provided. If `ray_indices` is provided, CUB acceleration will be used if available (CUDA >= 11.6). Otherwise, we will use the naive implementation with `packed_info`. Args: alphas: The opacity values of the samples. Tensor with shape (n_samples, 1). packed_info: Optional. Stores information on which samples belong to the same ray. \ See :func:`nerfacc.ray_marching` for details. LongTensor with shape (n_rays, 2). ray_indices: Optional. Ray index of each sample. LongTensor with shape (n_sample). n_rays: Optional. Number of rays. Only useful when `ray_indices` is provided yet \ CUB acceleration is not available. We will implicitly convert `ray_indices` to \ `packed_info` and use the naive implementation. If not provided, we will infer \ it from `ray_indices` but it will be slower. Returns: The rendering weights. Tensor with shape (n_sample, 1). Examples: .. code-block:: python >>> alphas = torch.tensor( >>> [[0.4], [0.8], [0.1], [0.8], [0.1], [0.0], [0.9]], device="cuda")) >>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device="cuda") >>> weights = render_weight_from_alpha(alphas, ray_indices=ray_indices) tensor([[0.4], [0.48], [0.012], [0.8], [0.02], [0.0], [0.9]]) """ packed_info = pack_info(ray_indices, n_rays=n_rays) weights, transmittance = _RenderingWeightAndTransmittanceFromAlphaPatchBasedNaive.apply(packed_info, alphas) return weights, transmittance def render_weight_from_alpha( alphas: Tensor, *, packed_info: Optional[torch.Tensor] = None, ray_indices: Optional[torch.Tensor] = None, n_rays: Optional[int] = None, ) -> torch.Tensor: """Compute rendering weights :math:`w_i` from opacity :math:`\\alpha_i`. .. math:: w_i = T_i\\alpha_i, \\quad\\textrm{where}\\quad T_i = \\prod_{j=1}^{i-1}(1-\\alpha_j) Note: Either `ray_indices` or `packed_info` should be provided. If `ray_indices` is provided, CUB acceleration will be used if available (CUDA >= 11.6). Otherwise, we will use the naive implementation with `packed_info`. Args: alphas: The opacity values of the samples. Tensor with shape (n_samples, 1). packed_info: Optional. Stores information on which samples belong to the same ray. \ See :func:`nerfacc.ray_marching` for details. LongTensor with shape (n_rays, 2). ray_indices: Optional. Ray index of each sample. LongTensor with shape (n_sample). n_rays: Optional. Number of rays. Only useful when `ray_indices` is provided yet \ CUB acceleration is not available. We will implicitly convert `ray_indices` to \ `packed_info` and use the naive implementation. If not provided, we will infer \ it from `ray_indices` but it will be slower. Returns: The rendering weights. Tensor with shape (n_sample, 1). Examples: .. code-block:: python >>> alphas = torch.tensor( >>> [[0.4], [0.8], [0.1], [0.8], [0.1], [0.0], [0.9]], device="cuda")) >>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device="cuda") >>> weights = render_weight_from_alpha(alphas, ray_indices=ray_indices) tensor([[0.4], [0.48], [0.012], [0.8], [0.02], [0.0], [0.9]]) """ assert ( ray_indices is not None or packed_info is not None ), "Either ray_indices or packed_info should be provided." if ray_indices is not None and _C.is_cub_available(): transmittance = _RenderingTransmittanceFromAlphaCUB.apply( ray_indices, alphas ) weights = transmittance * alphas else: if packed_info is None: packed_info = pack_info(ray_indices, n_rays=n_rays) weights = _RenderingWeightFromAlphaNaive.apply(packed_info, alphas) return weights @torch.no_grad() def render_visibility( alphas: torch.Tensor, *, ray_indices: Optional[torch.Tensor] = None, packed_info: Optional[torch.Tensor] = None, n_rays: Optional[int] = None, early_stop_eps: float = 1e-4, alpha_thre: float = 0.0, ) -> torch.Tensor: """Filter out transparent and occluded samples. In this function, we first compute the transmittance from the sample opacity. The transmittance is then used to filter out occluded samples. And opacity is used to filter out transparent samples. The function returns a boolean tensor indicating which samples are visible (`transmittance > early_stop_eps` and `opacity > alpha_thre`). Note: Either `ray_indices` or `packed_info` should be provided. If `ray_indices` is provided, CUB acceleration will be used if available (CUDA >= 11.6). Otherwise, we will use the naive implementation with `packed_info`. Args: alphas: The opacity values of the samples. Tensor with shape (n_samples, 1). packed_info: Optional. Stores information on which samples belong to the same ray. \ See :func:`nerfacc.ray_marching` for details. LongTensor with shape (n_rays, 2). ray_indices: Optional. Ray index of each sample. LongTensor with shape (n_sample). n_rays: Optional. Number of rays. Only useful when `ray_indices` is provided yet \ CUB acceleration is not available. We will implicitly convert `ray_indices` to \ `packed_info` and use the naive implementation. If not provided, we will infer \ it from `ray_indices` but it will be slower. early_stop_eps: The early stopping threshold on transmittance. alpha_thre: The threshold on opacity. Returns: The visibility of each sample. Tensor with shape (n_samples, 1). Examples: .. code-block:: python >>> alphas = torch.tensor( >>> [[0.4], [0.8], [0.1], [0.8], [0.1], [0.0], [0.9]], device="cuda") >>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device="cuda") >>> transmittance = render_transmittance_from_alpha(alphas, ray_indices=ray_indices) tensor([[1.0], [0.6], [0.12], [1.0], [0.2], [1.0], [1.0]]) >>> visibility = render_visibility( >>> alphas, ray_indices=ray_indices, early_stop_eps=0.3, alpha_thre=0.2) tensor([True, True, False, True, False, False, True]) """ assert ( ray_indices is not None or packed_info is not None ), "Either ray_indices or packed_info should be provided." if ray_indices is not None and _C.is_cub_available(): transmittance = _RenderingTransmittanceFromAlphaCUB.apply( ray_indices, alphas ) else: if packed_info is None: packed_info = pack_info(ray_indices, n_rays=n_rays) transmittance = _RenderingTransmittanceFromAlphaNaive.apply( packed_info, alphas ) visibility = transmittance >= early_stop_eps if alpha_thre > 0: visibility = visibility & (alphas >= alpha_thre) visibility = visibility.squeeze(-1) return visibility @torch.no_grad() def render_visibility_patch_based( alphas: torch.Tensor, *, ray_indices: Optional[torch.Tensor] = None, packed_info: Optional[torch.Tensor] = None, n_patches: Optional[int] = None, early_stop_eps: float = 1e-4, alpha_thre: float = 0.0, ) -> torch.Tensor: """Filter out transparent and occluded samples. In this function, we first compute the transmittance from the sample opacity. The transmittance is then used to filter out occluded samples. And opacity is used to filter out transparent samples. The function returns a boolean tensor indicating which samples are visible (`transmittance > early_stop_eps` and `opacity > alpha_thre`). Note: Either `ray_indices` or `packed_info` should be provided. If `ray_indices` is provided, CUB acceleration will be used if available (CUDA >= 11.6). Otherwise, we will use the naive implementation with `packed_info`. Args: alphas: The opacity values of the samples. Tensor with shape (n_samples, 1). packed_info: Optional. Stores information on which samples belong to the same ray. \ See :func:`nerfacc.ray_marching` for details. LongTensor with shape (n_rays, 2). ray_indices: Optional. Ray index of each sample. LongTensor with shape (n_sample). n_rays: Optional. Number of rays. Only useful when `ray_indices` is provided yet \ CUB acceleration is not available. We will implicitly convert `ray_indices` to \ `packed_info` and use the naive implementation. If not provided, we will infer \ it from `ray_indices` but it will be slower. early_stop_eps: The early stopping threshold on transmittance. alpha_thre: The threshold on opacity. Returns: The visibility of each sample. Tensor with shape (n_samples, 1). Examples: .. code-block:: python >>> alphas = torch.tensor( >>> [[0.4], [0.8], [0.1], [0.8], [0.1], [0.0], [0.9]], device="cuda") >>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device="cuda") >>> transmittance = render_transmittance_from_alpha(alphas, ray_indices=ray_indices) tensor([[1.0], [0.6], [0.12], [1.0], [0.2], [1.0], [1.0]]) >>> visibility = render_visibility( >>> alphas, ray_indices=ray_indices, early_stop_eps=0.3, alpha_thre=0.2) tensor([True, True, False, True, False, False, True]) """ assert ( ray_indices is not None or packed_info is not None ), "Either ray_indices or packed_info should be provided." if ray_indices is not None and _C.is_cub_available(): transmittance = _RenderingTransmittanceFromAlphaCUB.apply( ray_indices, alphas ) else: if packed_info is None: packed_info = pack_info(ray_indices, n_rays=n_patches) transmittance = _RenderingTransmittanceFromAlphaPatchBasedNaive.apply( packed_info, alphas ) visibility = torch.any(transmittance >= early_stop_eps, dim=1, keepdim=True) if alpha_thre > 0: visibility = visibility & (alphas >= alpha_thre) visibility = visibility.squeeze() return visibility class _RenderingTransmittanceFromDensityCUB(torch.autograd.Function): """Rendering transmittance from density with CUB implementation.""" @staticmethod def forward(ctx, ray_indices, t_starts, t_ends, sigmas): ray_indices = ray_indices.contiguous() t_starts = t_starts.contiguous() t_ends = t_ends.contiguous() sigmas = sigmas.contiguous() transmittance = _C.transmittance_from_sigma_forward_cub( ray_indices, t_starts, t_ends, sigmas ) if ctx.needs_input_grad[3]: ctx.save_for_backward(ray_indices, t_starts, t_ends, transmittance) return transmittance @staticmethod def backward(ctx, transmittance_grads): transmittance_grads = transmittance_grads.contiguous() ray_indices, t_starts, t_ends, transmittance = ctx.saved_tensors grad_sigmas = _C.transmittance_from_sigma_backward_cub( ray_indices, t_starts, t_ends, transmittance, transmittance_grads ) return None, None, None, grad_sigmas class _RenderingTransmittanceFromDensityNaive(torch.autograd.Function): """Rendering transmittance from density with naive forloop.""" @staticmethod def forward(ctx, packed_info, t_starts, t_ends, sigmas): packed_info = packed_info.contiguous() t_starts = t_starts.contiguous() t_ends = t_ends.contiguous() sigmas = sigmas.contiguous() transmittance = _C.transmittance_from_sigma_forward_naive( packed_info, t_starts, t_ends, sigmas ) if ctx.needs_input_grad[3]: ctx.save_for_backward(packed_info, t_starts, t_ends, transmittance) return transmittance @staticmethod def backward(ctx, transmittance_grads): transmittance_grads = transmittance_grads.contiguous() packed_info, t_starts, t_ends, transmittance = ctx.saved_tensors grad_sigmas = _C.transmittance_from_sigma_backward_naive( packed_info, t_starts, t_ends, transmittance, transmittance_grads ) return None, None, None, grad_sigmas class _RenderingTransmittanceFromAlphaCUB(torch.autograd.Function): """Rendering transmittance from opacity with CUB implementation.""" @staticmethod def forward(ctx, ray_indices, alphas): ray_indices = ray_indices.contiguous() alphas = alphas.contiguous() transmittance = _C.transmittance_from_alpha_forward_cub( ray_indices, alphas ) if ctx.needs_input_grad[1]: ctx.save_for_backward(ray_indices, transmittance, alphas) return transmittance @staticmethod def backward(ctx, transmittance_grads): transmittance_grads = transmittance_grads.contiguous() ray_indices, transmittance, alphas = ctx.saved_tensors grad_alphas = _C.transmittance_from_alpha_backward_cub( ray_indices, alphas, transmittance, transmittance_grads ) return None, grad_alphas class _RenderingTransmittanceFromAlphaNaive(torch.autograd.Function): """Rendering transmittance from opacity with naive forloop.""" @staticmethod def forward(ctx, packed_info, alphas): packed_info = packed_info.contiguous() alphas = alphas.contiguous() transmittance = _C.transmittance_from_alpha_forward_naive( packed_info, alphas ) if ctx.needs_input_grad[1]: ctx.save_for_backward(packed_info, transmittance, alphas) return transmittance @staticmethod def backward(ctx, transmittance_grads): transmittance_grads = transmittance_grads.contiguous() packed_info, transmittance, alphas = ctx.saved_tensors grad_alphas = _C.transmittance_from_alpha_backward_naive( packed_info, alphas, transmittance, transmittance_grads ) return None, grad_alphas class _RenderingTransmittanceFromAlphaPatchBasedNaive(torch.autograd.Function): """Rendering weight from opacity with naive forloop.""" @staticmethod def forward(ctx, packed_info, alphas): packed_info = packed_info.contiguous() alphas = alphas.contiguous() transmittance = _C.transmittance_from_alpha_patch_based_forward_naive(packed_info, alphas) if ctx.needs_input_grad[1]: ctx.save_for_backward(packed_info, transmittance, alphas) return transmittance @staticmethod def backward(ctx, grad_transmittance): grad_transmittance = grad_transmittance.contiguous() packed_info, transmittance, alphas = ctx.saved_tensors grad_alphas = _C.weight_and_transmittance_from_alpha_patch_based_backward_naive( packed_info, alphas, transmittance, grad_transmittance ) return None, grad_alphas class _RenderingWeightFromDensityNaive(torch.autograd.Function): """Rendering weight from density with naive forloop.""" @staticmethod def forward(ctx, packed_info, t_starts, t_ends, sigmas): packed_info = packed_info.contiguous() t_starts = t_starts.contiguous() t_ends = t_ends.contiguous() sigmas = sigmas.contiguous() weights = _C.weight_from_sigma_forward_naive( packed_info, t_starts, t_ends, sigmas ) if ctx.needs_input_grad[3]: ctx.save_for_backward( packed_info, t_starts, t_ends, sigmas, weights ) return weights @staticmethod def backward(ctx, grad_weights): grad_weights = grad_weights.contiguous() packed_info, t_starts, t_ends, sigmas, weights = ctx.saved_tensors grad_sigmas = _C.weight_from_sigma_backward_naive( weights, grad_weights, packed_info, t_starts, t_ends, sigmas ) return None, None, None, grad_sigmas class _RenderingWeightFromAlphaNaive(torch.autograd.Function): """Rendering weight from opacity with naive forloop.""" @staticmethod def forward(ctx, packed_info, alphas): packed_info = packed_info.contiguous() alphas = alphas.contiguous() weights = _C.weight_from_alpha_forward_naive(packed_info, alphas) if ctx.needs_input_grad[1]: ctx.save_for_backward(packed_info, alphas, weights) return weights @staticmethod def backward(ctx, grad_weights): grad_weights = grad_weights.contiguous() packed_info, alphas, weights = ctx.saved_tensors grad_alphas = _C.weight_from_alpha_backward_naive( weights, grad_weights, packed_info, alphas ) return None, grad_alphas class _RenderingWeightFromAlphaPatchBasedNaive(torch.autograd.Function): """Rendering weight from opacity with naive forloop.""" @staticmethod def forward(ctx, packed_info, alphas): packed_info = packed_info.contiguous() alphas = alphas.contiguous() weights = _C.weight_from_alpha_patch_based_forward_naive(packed_info, alphas) # print(weights.shape, transmittance.shape) if ctx.needs_input_grad[1]: ctx.save_for_backward(packed_info, alphas, weights) return weights @staticmethod def backward(ctx, grad_weights): grad_weights = grad_weights.contiguous() packed_info, alphas, weights = ctx.saved_tensors grad_alphas = _C.weight_from_alpha_patch_based_backward_naive( weights, grad_weights, packed_info, alphas ) return None, grad_alphas class _RenderingWeightAndTransmittanceFromAlphaPatchBasedNaive(torch.autograd.Function): """Rendering weight from opacity with naive forloop.""" @staticmethod def forward(ctx, packed_info, alphas): packed_info = packed_info.contiguous() alphas = alphas.contiguous() weights, transmittance = _C.weight_and_transmittance_from_alpha_patch_based_forward_naive(packed_info, alphas) # print(weights.shape, transmittance.shape) if ctx.needs_input_grad[1]: ctx.save_for_backward(packed_info, alphas, weights) return weights, transmittance @staticmethod def backward(ctx, grad_weights, grad_transmittance): grad_weights = grad_weights.contiguous() packed_info, alphas, weights = ctx.saved_tensors grad_alphas = _C.weight_and_transmittance_from_alpha_patch_based_backward_naive( weights, grad_weights, packed_info, alphas ) return None, grad_alphas class _RenderingWeightFromAlphaImportanceSamplingNaive(torch.autograd.Function): """Rendering weight from opacity with naive forloop.""" @staticmethod def forward(ctx, packed_info, alphas, importance_pdfs): packed_info = packed_info.contiguous() alphas = alphas.contiguous() importance_pdfs = importance_pdfs.contiguous() weights = _C.weight_from_alpha_importance_sampling_forward_naive(packed_info, alphas, importance_pdfs) if ctx.needs_input_grad[1]: ctx.save_for_backward(packed_info, alphas, importance_pdfs, weights) return weights @staticmethod def backward(ctx, grad_weights): grad_weights = grad_weights.contiguous() packed_info, alphas, importance_pdfs, weights = ctx.saved_tensors grad_alphas = _C.weight_from_alpha_backward_naive( weights, grad_weights, packed_info, alphas, importance_pdfs ) return None, grad_alphas ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/scripts/run_aws_listing.py ================================================ import argparse import os from boto3 import client parser = argparse.ArgumentParser() parser.add_argument("--access_key_id", type=str, required=True) parser.add_argument("--secret_access_key", type=str, required=True) parser.add_argument("--bucket", type=str, required=True) parser.add_argument("--region", type=str, required=True) args = parser.parse_args() URL = f"https://{args.bucket}.s3.{args.region}.amazonaws.com/" s3 = client( "s3", aws_access_key_id=args.access_key_id, aws_secret_access_key=args.secret_access_key, ) responses = s3.list_objects_v2(Bucket=args.bucket, Prefix="whl/")["Contents"] subdirectories = {} for data in responses: splits = data["Key"].split("/") if len(splits) == 3: subdirectories[splits[1]] = [] for dir in subdirectories.keys(): responses = s3.list_objects_v2(Bucket=args.bucket, Prefix=f"whl/{dir}")[ "Contents" ] for data in responses: splits = data["Key"].split("/") if len(splits) == 3: subdirectories[dir].append(splits[2]) for dir, files in subdirectories.items(): lines = "" for file in files: href = os.path.join(URL, "whl", dir, file) lines += f"{file}\n
\n" html = f"\n\n\n{lines}\n\n\n" html_file = f"/tmp/{dir}.html" with open(html_file, "w") as f: f.write(html) s3.upload_file( html_file, args.bucket, f"whl/{dir}.html", ExtraArgs={"ContentType": "text/html"}, ) ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/scripts/run_dev_checks.py ================================================ #!/usr/bin/env python """Simple yaml debugger""" import subprocess import yaml from rich.console import Console from rich.style import Style console = Console(width=120) LOCAL_TESTS = [ "Run license checks", "Run isort", "Run Black", "Python Pylint", "Test with pytest", ] def run_command(command: str) -> bool: """Run a command kill actions if it fails Args: command: command to run continue_on_fail: whether to continue running commands if the current one fails. """ ret_code = subprocess.call(command, shell=True) if ret_code != 0: console.print(f"[bold red]Error: `{command}` failed.") return ret_code == 0 def run_github_actions_file(filename: str): """Run a github actions file locally. Args: filename: Which yml github actions file to run. """ with open(filename, "rb") as f: my_dict = yaml.safe_load(f) steps = my_dict["jobs"]["build"]["steps"] success = True for step in steps: if "name" in step and step["name"] in LOCAL_TESTS: compressed = step["run"].replace("\n", ";").replace("\\", "") compressed = compressed.replace("--check", "") curr_command = f"{compressed}" console.line() console.rule(f"[bold green]Running: {curr_command}") success = success and run_command(curr_command) else: skip_name = step["name"] if "name" in step else step["uses"] console.print(f"Skipping {skip_name}") # Code Testing console.line() console.rule("[bold green]Running pytest") success = success and run_command("pytest") # Add checks for building documentation console.line() console.rule("[bold green]Building Documentation") success = success and run_command( "cd docs/; make clean; make html SPHINXOPTS='-W;'" ) if success: console.line() console.rule(characters="=") console.print( "[bold green]:TADA: :TADA: :TADA: ALL CHECKS PASSED :TADA: :TADA: :TADA:", justify="center", ) console.rule(characters="=") else: console.line() console.rule(characters="=", style=Style(color="red")) console.print( "[bold red]:skull: :skull: :skull: ERRORS FOUND :skull: :skull: :skull:", justify="center", ) console.rule(characters="=", style=Style(color="red")) if __name__ == "__main__": run_github_actions_file(filename=".github/workflows/code_checks.yml") ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/scripts/run_profiler.py ================================================ from typing import Callable import torch import tqdm import nerfacc # timing # https://github.com/pytorch/pytorch/commit/d2784c233bfc57a1d836d961694bcc8ec4ed45e4 class Profiler: def __init__(self, warmup=10, repeat=1000): self.warmup = warmup self.repeat = repeat def __call__(self, func: Callable): # warmup for _ in range(self.warmup): func() torch.cuda.synchronize() # profile with torch.profiler.profile( activities=[ torch.profiler.ProfilerActivity.CPU, torch.profiler.ProfilerActivity.CUDA, ], profile_memory=True, ) as prof: for _ in range(self.repeat): func() torch.cuda.synchronize() # return events = prof.key_averages() # print(events.table(sort_by="self_cpu_time_total", row_limit=10)) self_cpu_time_total = ( sum([event.self_cpu_time_total for event in events]) / self.repeat ) self_cuda_time_total = ( sum([event.self_cuda_time_total for event in events]) / self.repeat ) self_cuda_memory_usage = max( [event.self_cuda_memory_usage for event in events] ) return ( self_cpu_time_total, # in us self_cuda_time_total, # in us self_cuda_memory_usage, # in bytes ) def main(): device = "cuda:0" torch.manual_seed(42) profiler = Profiler(warmup=10, repeat=100) # # contract # print("* contract") # x = torch.rand([1024, 3], device=device) # roi = torch.tensor([0, 0, 0, 1, 1, 1], dtype=torch.float32, device=device) # fn = lambda: nerfacc.contract( # x, roi=roi, type=nerfacc.ContractionType.UN_BOUNDED_TANH # ) # cpu_t, cuda_t, cuda_bytes = profiler(fn) # print(f"{cpu_t:.2f} us, {cuda_t:.2f} us, {cuda_bytes / 1024 / 1024:.2f} MB") # rendering print("* rendering") batch_size = 81920 rays_o = torch.rand((batch_size, 3), device=device) rays_d = torch.randn((batch_size, 3), device=device) rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True) ray_indices, t_starts, t_ends = nerfacc.ray_marching( rays_o, rays_d, near_plane=0.1, far_plane=1.0, render_step_size=1e-1, ) sigmas = torch.randn_like(t_starts, requires_grad=True) fn = ( lambda: nerfacc.render_weight_from_density( ray_indices, t_starts, t_ends, sigmas ) .sum() .backward() ) fn() torch.cuda.synchronize() for _ in tqdm.tqdm(range(100)): fn() torch.cuda.synchronize() cpu_t, cuda_t, cuda_bytes = profiler(fn) print(f"{cpu_t:.2f} us, {cuda_t:.2f} us, {cuda_bytes / 1024 / 1024:.2f} MB") packed_info = nerfacc.pack_info(ray_indices, n_rays=batch_size) fn = ( lambda: nerfacc.vol_rendering._RenderingDensity.apply( packed_info, t_starts, t_ends, sigmas, 0 ) .sum() .backward() ) fn() torch.cuda.synchronize() for _ in tqdm.tqdm(range(100)): fn() torch.cuda.synchronize() cpu_t, cuda_t, cuda_bytes = profiler(fn) print(f"{cpu_t:.2f} us, {cuda_t:.2f} us, {cuda_bytes / 1024 / 1024:.2f} MB") if __name__ == "__main__": main() ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/setup.cfg ================================================ [isort] multi_line_output = 3 line_length = 80 include_trailing_comma = true skip=./examples/pycolmap ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/setup.py ================================================ import glob import os import os.path as osp import platform import sys from setuptools import find_packages, setup __version__ = None exec(open("nerfacc/version.py", "r").read()) URL = "https://github.com/KAIR-BAIR/nerfacc" BUILD_NO_CUDA = os.getenv("BUILD_NO_CUDA", "0") == "1" WITH_SYMBOLS = os.getenv("WITH_SYMBOLS", "0") == "1" def get_ext(): from torch.utils.cpp_extension import BuildExtension return BuildExtension.with_options( no_python_abi_suffix=True, use_ninja=False ) def get_extensions(): import torch from torch.__config__ import parallel_info from torch.utils.cpp_extension import CUDAExtension extensions_dir = osp.join("nerfacc", "cuda", "csrc") sources = glob.glob(osp.join(extensions_dir, "*.cu")) # remove generated 'hip' files, in case of rebuilds sources = [path for path in sources if "hip" not in path] undef_macros = [] define_macros = [] if sys.platform == "win32": define_macros += [("nerfacc_EXPORTS", None)] extra_compile_args = {"cxx": ["-O3"]} if not os.name == "nt": # Not on Windows: extra_compile_args["cxx"] += ["-Wno-sign-compare"] extra_link_args = [] if WITH_SYMBOLS else ["-s"] info = parallel_info() if ( "backend: OpenMP" in info and "OpenMP not found" not in info and sys.platform != "darwin" ): extra_compile_args["cxx"] += ["-DAT_PARALLEL_OPENMP"] if sys.platform == "win32": extra_compile_args["cxx"] += ["/openmp"] else: extra_compile_args["cxx"] += ["-fopenmp"] else: print("Compiling without OpenMP...") # Compile for mac arm64 if sys.platform == "darwin" and platform.machine() == "arm64": extra_compile_args["cxx"] += ["-arch", "arm64"] extra_link_args += ["-arch", "arm64"] nvcc_flags = os.getenv("NVCC_FLAGS", "") nvcc_flags = [] if nvcc_flags == "" else nvcc_flags.split(" ") nvcc_flags += ["-O3"] if torch.version.hip: # USE_ROCM was added to later versions of PyTorch. # Define here to support older PyTorch versions as well: define_macros += [("USE_ROCM", None)] undef_macros += ["__HIP_NO_HALF_CONVERSIONS__"] else: nvcc_flags += ["--expt-relaxed-constexpr"] extra_compile_args["nvcc"] = nvcc_flags extension = CUDAExtension( f"nerfacc.csrc", sources, include_dirs=[osp.join(extensions_dir, "include")], define_macros=define_macros, undef_macros=undef_macros, extra_compile_args=extra_compile_args, extra_link_args=extra_link_args, ) return [extension] # work-around hipify abs paths include_package_data = True # if torch.cuda.is_available() and torch.version.hip: # include_package_data = False setup( name="nerfacc", version=__version__, description="A General NeRF Acceleration Toolbox", author="Ruilong", author_email="ruilongli94@gmail.com", url=URL, download_url=f"{URL}/archive/{__version__}.tar.gz", keywords=[], python_requires=">=3.7", install_requires=["rich>=12", "torch"], extras_require={ # dev dependencies. Install them by `pip install nerfacc[dev]` "dev": [ "black[jupyter]==22.3.0", "isort==5.10.1", "pylint==2.13.4", "pytest==7.1.2", "pytest-xdist==2.5.0", "typeguard>=2.13.3", "pyyaml==6.0", "build", "twine", ], }, ext_modules=get_extensions() if not BUILD_NO_CUDA else [], cmdclass={"build_ext": get_ext()} if not BUILD_NO_CUDA else {}, packages=find_packages(), include_package_data=include_package_data, ) ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/tests/test_contraction.py ================================================ import pytest import torch import nerfacc.cuda as _C from nerfacc import ContractionType, contract, contract_inv device = "cuda:0" batch_size = 32 eps = 1e-6 @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") def test_ContractionType(): ctype = ContractionType.AABB.to_cpp_version() assert ctype == _C.ContractionTypeGetter(0) ctype = ContractionType.UN_BOUNDED_TANH.to_cpp_version() assert ctype == _C.ContractionTypeGetter(1) ctype = ContractionType.UN_BOUNDED_SPHERE.to_cpp_version() assert ctype == _C.ContractionTypeGetter(2) @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") def test_identity(): x = torch.rand([batch_size, 3], device=device) roi = torch.tensor([0, 0, 0, 1, 1, 1], dtype=torch.float32, device=device) x_out = contract(x, roi=roi, type=ContractionType.AABB) assert torch.allclose(x_out, x, atol=eps) x_inv = contract_inv(x_out, roi=roi, type=ContractionType.AABB) assert torch.allclose(x_inv, x, atol=eps) @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") def test_aabb(): x = torch.rand([batch_size, 3], device=device) roi = torch.tensor( [-1, -1, -1, 1, 1, 1], dtype=torch.float32, device=device ) x_out = contract(x, roi=roi, type=ContractionType.AABB) x_out_tgt = x * 0.5 + 0.5 assert torch.allclose(x_out, x_out_tgt, atol=eps) x_inv = contract_inv(x_out, roi=roi, type=ContractionType.AABB) assert torch.allclose(x_inv, x, atol=eps) @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") def test_tanh(): x = torch.randn([batch_size, 3], device=device) roi = torch.tensor( [-0.2, -0.3, -0.4, 0.7, 0.8, 0.6], dtype=torch.float32, device=device ) x_out = contract(x, roi=roi, type=ContractionType.UN_BOUNDED_TANH) x_out_tgt = ( torch.tanh((x - roi[:3]) / (roi[3:] - roi[:3]) - 0.5) * 0.5 + 0.5 ) assert torch.allclose(x_out, x_out_tgt, atol=eps) x_inv = contract_inv(x_out, roi=roi, type=ContractionType.UN_BOUNDED_TANH) assert torch.allclose(x_inv, x, atol=eps) @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") def test_sphere(): x = torch.randn([batch_size, 3], device=device) roi = torch.tensor( [-0.2, -0.3, -0.4, 0.7, 0.8, 0.6], dtype=torch.float32, device=device ) x_out = contract(x, roi=roi, type=ContractionType.UN_BOUNDED_SPHERE) assert ((x_out - 0.5).norm(dim=-1) < 0.5).all() x_inv = contract_inv(x_out, roi=roi, type=ContractionType.UN_BOUNDED_SPHERE) assert torch.allclose(x_inv, x, atol=eps) if __name__ == "__main__": test_ContractionType() test_identity() test_aabb() test_tanh() test_sphere() ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/tests/test_grid.py ================================================ import pytest import torch from nerfacc import OccupancyGrid device = "cuda:0" @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") def occ_eval_fn(x: torch.Tensor) -> torch.Tensor: """Pesudo occupancy function: (N, 3) -> (N, 1).""" return ((x - 0.5).norm(dim=-1, keepdim=True) < 0.5).float() @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") def test_occ_grid(): roi_aabb = [0, 0, 0, 1, 1, 1] occ_grid = OccupancyGrid(roi_aabb=roi_aabb, resolution=128).to(device) occ_grid.every_n_step(0, occ_eval_fn, occ_thre=0.1) assert occ_grid.roi_aabb.shape == (6,) assert occ_grid.binary.shape == (128, 128, 128) @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") def test_query_grid(): roi_aabb = [0, 0, 0, 1, 1, 1] occ_grid = OccupancyGrid(roi_aabb=roi_aabb, resolution=128).to(device) occ_grid.every_n_step(0, occ_eval_fn, occ_thre=0.1) samples = torch.rand((100, 3), device=device) occs = occ_grid.query_occ(samples) assert occs.shape == (100,) if __name__ == "__main__": test_occ_grid() test_query_grid() ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/tests/test_intersection.py ================================================ import pytest import torch from nerfacc import ray_aabb_intersect device = "cuda:0" batch_size = 32 eps = 1e-6 @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") def test_intersection(): rays_o = torch.rand([batch_size, 3], device=device) rays_d = torch.randn([batch_size, 3], device=device) aabb = torch.tensor([0, 0, 0, 1, 1, 1], dtype=torch.float32, device=device) t_min, t_max = ray_aabb_intersect(rays_o, rays_d, aabb) assert (t_min == 0).all() t = torch.rand_like(t_min) * (t_max - t_min) + t_min x = rays_o + t.unsqueeze(-1) * rays_d assert (x >= 0).all() and (x <= 1).all() if __name__ == "__main__": test_intersection() ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/tests/test_loss.py ================================================ import pytest import torch from nerfacc import pack_info, ray_marching from nerfacc.losses import distortion device = "cuda:0" batch_size = 32 eps = 1e-6 @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") def test_distortion(): rays_o = torch.rand((batch_size, 3), device=device) rays_d = torch.randn((batch_size, 3), device=device) rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True) ray_indices, t_starts, t_ends = ray_marching( rays_o, rays_d, near_plane=0.1, far_plane=1.0, render_step_size=1e-3, ) packed_info = pack_info(ray_indices, n_rays=batch_size) weights = torch.rand((t_starts.shape[0],), device=device) loss = distortion(packed_info, weights, t_starts, t_ends) assert loss.shape == (batch_size,) if __name__ == "__main__": test_distortion() ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/tests/test_pack.py ================================================ import pytest import torch from nerfacc import pack_data, pack_info, unpack_data, unpack_info device = "cuda:0" batch_size = 32 eps = 1e-6 @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") def test_pack_data(): n_rays = 2 n_samples = 3 data = torch.rand((n_rays, n_samples, 2), device=device, requires_grad=True) mask = torch.rand((n_rays, n_samples), device=device) > 0.5 packed_data, packed_info = pack_data(data, mask) unpacked_data = unpack_data(packed_info, packed_data, n_samples) unpacked_data.sum().backward() assert (data.grad[mask] == 1).all() assert torch.allclose( unpacked_data.sum(dim=1), (data * mask[..., None]).sum(dim=1) ) @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") def test_unpack_info(): packed_info = torch.tensor( [[0, 1], [1, 0], [1, 4]], dtype=torch.int32, device=device ) ray_indices_tgt = torch.tensor( [0, 2, 2, 2, 2], dtype=torch.int64, device=device ) ray_indices = unpack_info(packed_info, n_samples=5) packed_info_2 = pack_info(ray_indices, n_rays=packed_info.shape[0]) assert torch.allclose(packed_info.int(), packed_info_2.int()) assert torch.allclose(ray_indices, ray_indices_tgt) if __name__ == "__main__": test_pack_data() test_unpack_info() ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/tests/test_ray_marching.py ================================================ import pytest import torch from nerfacc import OccupancyGrid, ray_marching, unpack_info device = "cuda:0" batch_size = 128 @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") def test_marching_with_near_far(): rays_o = torch.rand((batch_size, 3), device=device) rays_d = torch.randn((batch_size, 3), device=device) rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True) ray_indices, t_starts, t_ends = ray_marching( rays_o, rays_d, near_plane=0.1, far_plane=1.0, render_step_size=1e-3, ) return @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") def test_marching_with_grid(): rays_o = torch.rand((batch_size, 3), device=device) rays_d = torch.randn((batch_size, 3), device=device) rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True) grid = OccupancyGrid(roi_aabb=[0, 0, 0, 1, 1, 1]).to(device) grid._binary[:] = True ray_indices, t_starts, t_ends = ray_marching( rays_o, rays_d, grid=grid, near_plane=0.0, far_plane=1.0, render_step_size=1e-2, ) ray_indices = ray_indices samples = ( rays_o[ray_indices] + rays_d[ray_indices] * (t_starts + t_ends) / 2.0 ) assert (samples <= grid.roi_aabb[3:].unsqueeze(0)).all() assert (samples >= grid.roi_aabb[:3].unsqueeze(0)).all() return if __name__ == "__main__": test_marching_with_near_far() test_marching_with_grid() ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/tests/test_rendering.py ================================================ import pytest import torch from nerfacc import ( accumulate_along_rays, render_transmittance_from_density, render_visibility, render_weight_from_alpha, render_weight_from_density, rendering, ) device = "cuda:0" batch_size = 32 eps = 1e-6 @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") def test_render_visibility(): ray_indices = torch.tensor( [0, 2, 2, 2, 2], dtype=torch.int64, device=device ) # (samples,) alphas = torch.tensor( [0.4, 0.3, 0.8, 0.8, 0.5], dtype=torch.float32, device=device ).unsqueeze( -1 ) # (n_samples, 1) # transmittance: [1.0, 1.0, 0.7, 0.14, 0.028] vis = render_visibility( alphas, ray_indices=ray_indices, early_stop_eps=0.03, alpha_thre=0.0 ) vis_tgt = torch.tensor( [True, True, True, True, False], dtype=torch.bool, device=device ) assert torch.allclose(vis, vis_tgt) # transmittance: [1.0, 1.0, 1.0, 0.2, 0.04] vis = render_visibility( alphas, ray_indices=ray_indices, early_stop_eps=0.05, alpha_thre=0.35 ) vis_tgt = torch.tensor( [True, False, True, True, False], dtype=torch.bool, device=device ) assert torch.allclose(vis, vis_tgt) @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") def test_render_weight_from_alpha(): ray_indices = torch.tensor( [0, 2, 2, 2, 2], dtype=torch.int64, device=device ) # (samples,) alphas = torch.tensor( [0.4, 0.3, 0.8, 0.8, 0.5], dtype=torch.float32, device=device ).unsqueeze( -1 ) # (n_samples, 1) # transmittance: [1.0, 1.0, 0.7, 0.14, 0.028] weights = render_weight_from_alpha( alphas, ray_indices=ray_indices, n_rays=3 ) weights_tgt = torch.tensor( [1.0 * 0.4, 1.0 * 0.3, 0.7 * 0.8, 0.14 * 0.8, 0.028 * 0.5], dtype=torch.float32, device=device, ).unsqueeze(-1) assert torch.allclose(weights, weights_tgt) @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") def test_render_weight_from_density(): ray_indices = torch.tensor( [0, 2, 2, 2, 2], dtype=torch.int64, device=device ) # (samples,) sigmas = torch.rand( (ray_indices.shape[0], 1), device=device ) # (n_samples, 1) t_starts = torch.rand_like(sigmas) t_ends = torch.rand_like(sigmas) + 1.0 alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts)) weights = render_weight_from_density( t_starts, t_ends, sigmas, ray_indices=ray_indices, n_rays=3 ) weights_tgt = render_weight_from_alpha( alphas, ray_indices=ray_indices, n_rays=3 ) assert torch.allclose(weights, weights_tgt) @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") def test_accumulate_along_rays(): ray_indices = torch.tensor( [0, 2, 2, 2, 2], dtype=torch.int64, device=device ) # (n_rays,) weights = torch.tensor( [0.4, 0.3, 0.8, 0.8, 0.5], dtype=torch.float32, device=device ).unsqueeze(-1) values = torch.rand((5, 2), device=device) # (n_samples, 1) ray_values = accumulate_along_rays( weights, ray_indices, values=values, n_rays=3 ) assert ray_values.shape == (3, 2) assert torch.allclose(ray_values[0, :], weights[0, :] * values[0, :]) assert (ray_values[1, :] == 0).all() assert torch.allclose( ray_values[2, :], (weights[1:, :] * values[1:]).sum(dim=0) ) @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") def test_rendering(): def rgb_sigma_fn(t_starts, t_ends, ray_indices): return torch.hstack([t_starts] * 3), t_starts ray_indices = torch.tensor( [0, 2, 2, 2, 2], dtype=torch.int64, device=device ) # (samples,) sigmas = torch.rand( (ray_indices.shape[0], 1), device=device ) # (n_samples, 1) t_starts = torch.rand_like(sigmas) t_ends = torch.rand_like(sigmas) + 1.0 _, _, _ = rendering( t_starts, t_ends, ray_indices=ray_indices, n_rays=3, rgb_sigma_fn=rgb_sigma_fn, ) @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") def test_grads(): ray_indices = torch.tensor( [0, 2, 2, 2, 2], dtype=torch.int64, device=device ) # (samples,) packed_info = torch.tensor( [[0, 1], [1, 0], [1, 4]], dtype=torch.int32, device=device ) sigmas = torch.tensor([[0.4], [0.8], [0.1], [0.8], [0.1]], device="cuda") sigmas.requires_grad = True t_starts = torch.rand_like(sigmas) t_ends = t_starts + 1.0 weights_ref = torch.tensor( [[0.3297], [0.5507], [0.0428], [0.2239], [0.0174]], device="cuda" ) sigmas_grad_ref = torch.tensor( [[0.6703], [0.1653], [0.1653], [0.1653], [0.1653]], device="cuda" ) # naive impl. trans from sigma trans = render_transmittance_from_density( t_starts, t_ends, sigmas, ray_indices=ray_indices, n_rays=3 ) weights = trans * (1.0 - torch.exp(-sigmas * (t_ends - t_starts))) weights.sum().backward() sigmas_grad = sigmas.grad.clone() sigmas.grad.zero_() assert torch.allclose(weights_ref, weights, atol=1e-4) assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4) # naive impl. trans from alpha trans = render_transmittance_from_density( t_starts, t_ends, sigmas, packed_info=packed_info, n_rays=3 ) weights = trans * (1.0 - torch.exp(-sigmas * (t_ends - t_starts))) weights.sum().backward() sigmas_grad = sigmas.grad.clone() sigmas.grad.zero_() assert torch.allclose(weights_ref, weights, atol=1e-4) assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4) weights = render_weight_from_density( t_starts, t_ends, sigmas, ray_indices=ray_indices, n_rays=3 ) weights.sum().backward() sigmas_grad = sigmas.grad.clone() sigmas.grad.zero_() assert torch.allclose(weights_ref, weights, atol=1e-4) assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4) weights = render_weight_from_density( t_starts, t_ends, sigmas, packed_info=packed_info, n_rays=3 ) weights.sum().backward() sigmas_grad = sigmas.grad.clone() sigmas.grad.zero_() assert torch.allclose(weights_ref, weights, atol=1e-4) assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4) alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts)) weights = render_weight_from_alpha( alphas, ray_indices=ray_indices, n_rays=3 ) weights.sum().backward() sigmas_grad = sigmas.grad.clone() sigmas.grad.zero_() assert torch.allclose(weights_ref, weights, atol=1e-4) assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4) alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts)) weights = render_weight_from_alpha( alphas, packed_info=packed_info, n_rays=3 ) weights.sum().backward() sigmas_grad = sigmas.grad.clone() sigmas.grad.zero_() assert torch.allclose(weights_ref, weights, atol=1e-4) assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4) if __name__ == "__main__": test_render_visibility() test_render_weight_from_alpha() test_render_weight_from_density() test_accumulate_along_rays() test_rendering() test_grads() ================================================ FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/tests/test_resampling.py ================================================ import pytest import torch from nerfacc import pack_info, ray_marching, ray_resampling device = "cuda:0" batch_size = 128 @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") def test_resampling(): rays_o = torch.rand((batch_size, 3), device=device) rays_d = torch.randn((batch_size, 3), device=device) rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True) ray_indices, t_starts, t_ends = ray_marching( rays_o, rays_d, near_plane=0.1, far_plane=1.0, render_step_size=1e-3, ) packed_info = pack_info(ray_indices, n_rays=batch_size) weights = torch.rand((t_starts.shape[0],), device=device) packed_info, t_starts, t_ends = ray_resampling( packed_info, t_starts, t_ends, weights, n_samples=32 ) assert t_starts.shape == t_ends.shape == (batch_size * 32, 1) if __name__ == "__main__": test_resampling() ================================================ FILE: utilities/utils.py ================================================ import numpy as np import cv2 from PIL import Image, ImageChops import os import time import torch from PIL import Image, ImageDraw, ImageFont exp_time = str(time.strftime('%Y_%m_%d_%H_%M_%S', time.localtime(time.time()))) device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Using {device} device") def crop_a_set_of_images(*image_path): from PIL import ImageChops, Image imgs = [] bboxes = [] for im_path in image_path: im = Image.open(im_path) bg = Image.new(im.mode, im.size, im.getpixel((0, 0))) diff = ImageChops.difference(im, bg) diff = ImageChops.add(diff, diff, 2.0, -5) bbox = diff.getbbox() imgs.append(im) bboxes.append(bbox) bbox_aggre = np.asarray(bboxes) bbox_min = np.min(bbox_aggre, 0) bbox_max = np.max(bbox_aggre, 0) bbox_common = (bbox_min[0], bbox_min[1], bbox_max[2], bbox_max[3]) for idx, img in enumerate(imgs): img = img.crop(bbox_common) img.save(image_path[idx]) pass def crop_image_based_on_ref_image(ref_img_path, *img_path): from PIL import ImageChops, Image ref_im = Image.open(ref_img_path) bg = Image.new(ref_im.mode, ref_im.size, ref_im.getpixel((0, 0))) diff = ImageChops.difference(ref_im, bg) diff = ImageChops.add(diff, diff, 2.0, -5) bbox = diff.getbbox() for idx, im_path in enumerate(img_path): img = Image.open(im_path) img = img.crop(bbox) img.save(im_path) def angular_error_map(N1, N2): dot = np.sum(np.multiply(N1, N2), axis=-1) dot = np.clip(dot, -1., 1.) return np.rad2deg(np.arccos(dot)) def crop_mask(mask): if mask.dtype is not np.uint8: mask = mask.astype(np.uint8) * 255 im = Image.fromarray(mask) bg = Image.new(im.mode, im.size, im.getpixel((0, 0))) diff = ImageChops.difference(im, bg) diff = ImageChops.add(diff, diff, 2.0, 0) bbox = diff.getbbox() return bbox def crop_image_by_mask(img, mask): bbox = crop_mask(mask) try: crop_img = img.copy()[bbox[1]:bbox[3], bbox[0]:bbox[2]] except: crop_img = img.copy() return crop_img def save_video(vpath, images, fps): height, width, _ = images[0].shape fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') video = cv2.VideoWriter(vpath, fourcc, fps, (width, height)) for image in images: video.write(image) cv2.destroyAllWindows() video.release() def toRGBA(img, mask): img = cv2.cvtColor(img, cv2.COLOR_RGB2RGBA) img[:, :, 3] = (mask.astype(bool)*255).astype(np.uint8) return img