### Update
- **2024/09/30**: Real-world raw data and step-by-step data pre-processing instructions are available. See [here](./data_capture_and_preprocessing/README.md).
Fast and fine-grained 3D reconstruction from multi-view surface normal maps.
### Quick Start
Code was tested on Ubuntu 18.04 (WSL2) using Python 3.8, PyTorch 2.1.0, and CUDA 11.8 on an Nvidia RTX4090Ti (24GB).
**Before started, please ensure CUDA is installed in your environment ([11.8 can be found here](https://developer.nvidia.com/cuda-11-8-0-download-archive?target_os=Linux&target_arch=x86_64&Distribution=WSL-Ubuntu&target_version=2.0&target_type=deb_local)).**
It is required by [tiny-cuda-nn](https://github.com/NVlabs/tiny-cuda-nn).
You should see something like the following after typing `nvcc --version`
```commandline
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Wed_Sep_21_10:33:58_PDT_2022
Cuda compilation tools, release 11.8, V11.8.89
Build cuda_11.8.r11.8/compiler.31833905_0
```
Clone the repository and prepare the conda environment:
```commandline
git clone https://github.com/CyberAgentAILab/SuperNormal.git
cd SuperNormal
. ./create_env.sh
```
Download data (~1.8GB):
```commandline
./download_data.sh
```
Run on the DiLiGenT-MV benchmark objects or on our captured objects:
```commandline
./run_diligent.sh # Training should take about 50 seconds per object
```
or
```commandline
./run_own_object.sh # Training should take about 5 minutes per object
```
Results are saved under `./exp`.
NOTE: If RuntimeError like below occurs, `apt install ninja-build` may resolve the error.
```
RuntimeError: Ninja is required to load C++ extensions
```
### Hyperparameter tuning tips
Training hyperparameters are defined in `./configs/*.conf`.
Some important hyperparameters are:
- `dataset.normal_dir`: You can choose normal maps estimated by different methods as input for DiLiGenT-MV benchmark objects.
- `train.end_iter`: The number of iterations for training. Should be adjusted according to the number of views and normal map resolutions.
- `train.increase_bindwidth_every`: A strategy used in [Neuralangelo](https://research.nvidia.com/labs/dir/neuralangelo/) to progressively activate finer hash grid during training. Less than `end_iter`/`model.encoding.n_levels` should be fine.
- `train.batch_size`: Number of patches in each batch for training. Should be adjusted according to the GPU memory.
- `train.patch_size`: Better to be fixed to 3, i.e., each patch is 3x3. Large patch size will cause inaccurate volume rendering results for boudary pixels in a patch.
### Modifications to NerfAcc
We add several functions to the original [NerfAcc](https://www.nerfacc.com) to adapt it to patch-based volume rendering.
The key new functions (which are indicated by `patch_based` in function name) are in
[third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/render_weight.cu/](./third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/render_weight.cu)
and [third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/vol_rendering.py](./third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/vol_rendering.py).
### Acknowledgement
This repo is built up on [NeuS](https://github.com/Totoro97/NeuS) and benefits from the amazing [tiny-cuda-nn](https://github.com/NVlabs/tiny-cuda-nn) and [NerfAcc](https://www.nerfacc.com).
We also learned a lot from [instant-nsr-pl](https://github.com/bennyguo/instant-nsr-pl).
### Citation
If you find our work useful in your research, please consider citing:
```bibtex
@inproceedings{supernormal2024cao,
title={SuperNormal: {N}eural Surface Reconstruction via Multi-View Normal Integration},
author={Cao Xu and Taketomi Takafumi},
booktitle={CVPR},
year={2024}
}
```
================================================
FILE: __init__.py
================================================
================================================
FILE: config/diligent.conf
================================================
general {
dataset_class = models.dataset_loader.Dataset
renderer_class = models.renderer.NeuSRenderer
base_exp_dir = ./exp/diligent_mv/CASE_NAME
recording = [
./,
./models
]
}
dataset {
data_dir = data/diligent_mv_normals/CASE_NAME/
normal_dir = normal_world_space_sdmunips # choose normal maps estimated by different methods, should be in the world space
cameras_name = cameras_sphere.npz
exclude_views = [0, 4, 8, 12, 16] # index of views to exclude for test purpose, 0-based
upsample_factor = 1
}
train {
learning_rate = 5e-4
learning_rate_alpha = 0.05
end_iter = 5000
increase_bindwidth_every = 350 # following neuralangelo's strategy
gradient_method = dfd # dfd or fd or ad, for directional finite difference, finite difference, and auto-differentiation
batch_size = 2048
patch_size = 3 # i.e., each training step samples 2048 patches of 3x3 pixels
warm_up_end = 50
use_white_bkgd = False
loss_type = l2 # for normal loss
normal_weight = 1
eikonal_weight = 1
mask_weight = 1
}
val {
save_freq = 1000
val_normal_freq = 5001
val_normal_resolution_level = 1
gradient_method = dfd # dfd or fd or ad, can be different from training
val_mesh_freq = 10000
val_mesh_res = 512
report_freq = 100
eval_metric_freq = 5000
}
model {
sdf_network {
d_out = 1
d_in = 3
d_hidden = 64
n_layers = 1
skip_in = [-1] # -1 for no skip connection
bias = 0.6
geometric_init = True
weight_norm = True
input_concat = True # concat input positions and encoded features
}
variance_network {
init_val = 0.5
}
ray_marching {
start_step_size = 1e-2
end_step_size = 1e-3
occ_threshold = 0.1
occ_sigmoid_k = 80.0
occ_resolution = 128
occ_update_freq = 8 # batches
}
encoding{
otype=HashGrid,
n_levels=14
n_features_per_level=2
log2_hashmap_size=19
base_resolution=32
per_level_scale=1.3195079107728942
}
}
================================================
FILE: config/own_objects.conf
================================================
general {
dataset_class = models.dataset_loader.Dataset
renderer_class = models.renderer.NeuSRenderer
base_exp_dir = ./exp/own_objects/CASE_NAME
recording = [
./,
./models
]
}
dataset {
data_dir = data/own_objects_normals/CASE_NAME/
normal_dir = normal_world_space_sdmunips
cameras_name = cameras_sphere.npz
exclude_views = [] # index of views to exclude, 0-based
upsample_factor = 1
}
train {
learning_rate = 5e-4
learning_rate_alpha = 0.05
end_iter = 30000
increase_bindwidth_every = 2000 # following neuralangelo's strategy
gradient_method = dfd # dfd or fd or ad, for directional finite difference, finite difference, and auto-differentiation
batch_size = 2048
patch_size = 3 # i.e., each training step samples 2048 patches of 3x3 pixels
warm_up_end = 500
use_white_bkgd = False
loss_type = l2 # for normal loss
normal_weight = 1
eikonal_weight = 1
mask_weight = 1
}
val {
save_freq = 10000
val_normal_freq = 30000
val_normal_resolution_level = 2
gradient_method = dfd # dfd or fd or ad, can be different from training
val_mesh_freq = 30000
val_mesh_res = 1024
report_freq = 100
eval_metric_freq = 30000
}
model {
sdf_network {
d_out = 1
d_in = 3
d_hidden = 64
n_layers = 1
skip_in = [-1]
bias = 0.8
geometric_init = True
weight_norm = True
input_concat = True # concat input positions and encoded features
}
variance_network {
init_val = 0.5
}
ray_marching
{
start_step_size = 1e-2
end_step_size = 1e-3
occ_threshold = 0.1
occ_sigmoid_k = 80.0
occ_resolution = 128
occ_update_freq = 8 # batches
}
encoding{
otype=HashGrid,
n_levels=14
n_features_per_level=2
log2_hashmap_size=19
base_resolution=32
per_level_scale=1.3195079107728942
}
}
================================================
FILE: create_env.sh
================================================
conda deactivate
conda remove -y -n sn --all
conda create -y -n sn python=3.8
conda activate sn
pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu118
# install tiny-cuda-nn
export PATH="/usr/local/cuda/bin:$PATH"
export LIBRARY_PATH="/usr/local/cuda/lib64/stubs:$LIBRARY_PATH"
pip install git+https://github.com/NVlabs/tiny-cuda-nn/@2ec562e853e6f482b5d09168705205f46358fb39#subdirectory=bindings/torch
pip install -e ./third_parties/nerfacc-0.3.5/nerfacc-0.3.5/
pip install opencv-python==4.8.1.78 trimesh==3.23.5 open3d==0.17 pyvista==0.42.3 scipy==1.10.1 scikit-image==0.21.0 pyhocon==0.3.59 pyexr==0.3.10 tensorboard==2.14.0 icecream==2.1.3 PyMCubes==0.1.4 pyembree==0.2.11
================================================
FILE: data_capture_and_preprocessing/README.md
================================================
This is a step-by-step guide to preprocess the raw images captured by an iPhone for the MVPS task.
You can download our raw images using the following command *(~6 GB per object)*.
```
gdown 'https://drive.google.com/file/d/1BcCuZR0C-snmCNf8iGhkFgkQ6arfcQ-L/view?usp=sharing' --fuzzy
unzip flower_girl.zip
rm flower_girl.zip
gdown 'https://drive.google.com/file/d/12QzgRbOjBSx295BS4zihnOjcdYh7ZaP9/view?usp=sharing' --fuzzy
unzip lion.zip
rm lion.zip
gdown 'https://drive.google.com/file/d/1cvKbI5VvDhsuA4a06rYqqoAtQd8GtyeI/view?usp=sharing' --fuzzy
unzip dog.zip
rm dog.zip
```
## File structure
You should have the following file structure under each object's folder:
```
- RAW
- mask
- cameras.xml
```
The `RAW` folder contains all the DNG images captured by an iPhone.
The `mask` folder contains the foreground masks for each view.
The `cameras.xml` contains the calibrated camera parameters using [Metashape](https://oakcorp.net/agisoft/download/).
## Step-by-step data pre-processing
First we convert the DNG images to PNG file format.
```
# pip install rawpy
python iPhone_mvps_data_preprocessing.py --data_dir
```
Now the file structure looks like this
```
- RAW
- mvps_png_full
- sfm_png_full
- mask
- cameras.xml
```
The `mvps_png_full` folder contains the pre-processed images for photometric stereo, and the `sfm_png_full` folder contains the images for camera calibration using Structure from Motion.
In each view, we first take an image in ambient light and then additionally illuminate the object with an active light source.
So the first image in each view is collected in `sfm_png_full`.
### Mask preparation
Now we prepare the foreground masks for each view.
We used SAM to interactively segment the foreground objects.
Please install SAM according to the [official instructions](https://github.com/facebookresearch/segment-anything).
After installation, run the following command to segment the foreground objects for all views:
```
python sam_mvps.py --data_dir --checkpoint
```
This will pop up a window where you can interactively segment the foreground objects.
Select points on the object to segment the foreground object, and press `Esc` to check the intermediate results.
Continue to select points until you are satisfied with the segmentation results, and press `Enter` to save the mask.
The process will be repeated for all views.
The same mask will be saved in two places: `obj_folder/mask` and the corresponding folder containing the image from the same viewpoint.
The latter will be used for normal map estimation.
### Camera calibration
In [MetaShape](https://oakcorp.net/agisoft/download/), import the images in the `sfm_png_full` folder and run the camera calibration process.
```
[Workflow] -> [Add Folder] -> select `sfm_png_full` -> select single cameras -> [Workflow] -> [Align Photos]
```
After camera calibration, export the camera parameters to `cameras.xml`.
```
[File] -> [Export] -> [Export Cameras]
```
The resulting `cameras.xml` file is what we have put in the object folder.
### Normal map estimation
Install [SDM-UniPS](https://github.com/satoshi-ikehata/SDM-UniPS-CVPR2023) and run the following command to generate the normal maps for each view:
```
python --session_name YOUR_SESSION_NAME --test_dir --checkpoint --scalable --target normal
```
Tips: Prepare the mask for each view to improve the normal estimation results. This should be done when you have completed the previous mask segmentation step.
The original SDM-UniPS code outputs normal maps in the PNG format. You can instead get EXR format by replacing [this line](https://github.com/satoshi-ikehata/SDM-UniPS-CVPR2023/blob/96e68f353173c2ae85bfe609e4728a19a2f8c92e/sdm_unips/modules/builder/builder.py#L162) with the following one:
```
pyexr.write(f'{testdata.data.data_workspace}/normal.exr', nout)
```
Remember to install the [pyexr](https://github.com/tvogels/pyexr) package and import it in the file.
After normal estimation, we collect the normal maps in the same folder.
Since SDM-UniPS estimates normal maps in camera space, we also convert them to the world space using the camera parameters from the previous step.
```
python gather_and_convert_normal_map.py --data_dir --sdm_unips_result_dir
```
The file structure is now as follows:
```
- RAW
- mvps_png_full
- sfm_png_full
- mask
- normal_camera_space_sdmunips
- normal_world_space_sdmunips
- cameras.xml
- results # if your SDM-UniPS output is in this folder
```
### Convert camera parameters to NeuS format
The last step is to convert the camera parameters to the NeuS format.
```
python metashape2neus.py --xml_path
```
This will create a `cameras_sphere.npz` file in the same folder as `cameras.xml`.
We also provide the converter to NeuS2 format. Check `metashape2neus2_json_and_images.py` for more details.
## Tips for capturing your own data
We used the iPhone's built-in camera app to take the images. Here are some tips for successful reconstruction:
- Use a tripod to stabilize the camera.
- Use a remote shutter release to avoid camera shake.
- Keep the same focus point in each view. On iPhone, you can press and hold the screen to lock the focus point.
- Use a white/black background to simplify the segmentation process.
- Use a turntable to capture the object from different angles.
- Place the object on a textured surface to help the Structure from Motion process.
- Place the object in the center of the image.
- We used a [video light](https://www.ulanzi.com/collections/lighting/products/mini-led-video-light-ulanzi-vl49-1672) to illuminate the object from different angles in each view. Other light sources like a ring light/flashlight may also work.
- In each view, vary the light source's position sufficiently around the camera. We used 12 different light positions in our setup.
- Reduce the exposure if the captured images are overexposed.
The above capture process can be done with off-the-shelf equipment, but it is tedious.
It would be more convenient if you could build a custom rig to automate the capture process, such as [this example](https://youtu.be/zyEw-1QUlkU?si=8RvYC23emoP8TXrU).
================================================
FILE: data_capture_and_preprocessing/gather_and_convert_normal_map.py
================================================
import os
import cv2
import pyexr
from glob import glob
import numpy as np
import shutil
from bs4 import BeautifulSoup # $ pip install beautifulsoup4 lxml
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--sdm_unips_result_dir", type=str, default="../../SDM-UniPS-CVPR2023/flower_girl/results")
parser.add_argument("--data_dir", type=str, default="./flower_girl")
args = parser.parse_args()
xml_path = os.path.join(args.data_dir, "cameras.xml")
obj_name = os.path.basename(args.data_dir)
num_views = len(glob(os.path.join(args.sdm_unips_result_dir, "view_*.data")))
normal_map_camera_dir = os.path.join(args.data_dir, "normal_camera_space_sdmunips")
normal_map_world_dir = os.path.join(args.data_dir, "normal_world_space_sdmunips")
# create directories
os.makedirs(normal_map_camera_dir, exist_ok=True)
os.makedirs(normal_map_world_dir, exist_ok=True)
with open(xml_path, "r") as f:
xml_data = f.read()
bs_data = BeautifulSoup(xml_data, "xml")
b_unique = bs_data.find_all('camera')
for tag in b_unique:
img_name = tag.get("label")
view_idx = int(img_name.split("_")[-1])
# camera to world transform
C2W = np.array([float(i) for i in tag.find("transform").text.split(" ")]).reshape((4, 4))
normal_map_all = []
normal_map_path_all = []
for i in range(num_views):
view_dir = os.path.join(args.sdm_unips_result_dir, f"view_{i:02d}.data")
for tag in b_unique:
img_name = tag.get("label")
view_idx = int(img_name.split("_")[-1])
# camera to world transform
if view_idx == i:
C2W = np.array([float(i) for i in tag.find("transform").text.split(" ")]).reshape((4, 4))
R = C2W[:3, :3]
break
if os.path.exists(view_dir):
# copy normal map
normal_map_file = os.path.join(view_dir, "normal.exr")
new_normal_map_file = os.path.join(normal_map_camera_dir, f"{i:02d}.exr")
shutil.copy(normal_map_file, new_normal_map_file)
# convert normal map to world space
normal_map_camera = pyexr.read(new_normal_map_file)
normal_map_camera[..., [1, 2]] *= -1 # revert y and z axis to match opencv conversion, X right, Y down, Z front
H, W = normal_map_camera.shape[:2]
normal_world = (R @ normal_map_camera.reshape(-1, 3).T).T.reshape([H, W, 3])
pyexr.write(os.path.join(normal_map_world_dir, f"{i:02d}.exr"), normal_world)
================================================
FILE: data_capture_and_preprocessing/iPhone_mvps_data_preprocessing.py
================================================
import rawpy, os
from glob import glob
import cv2
import numpy as np
import os
from tqdm import tqdm
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--data_dir", type=str, default="./flower_girl")
parser.add_argument("--num_img_per_view", type=int, default=13)
arg = parser.parse_args()
dng_list = glob(os.path.join(arg.data_dir, "RAW", "*.DNG"))
dng_list.sort()
num_image_per_view = arg.num_img_per_view
num_view = len(dng_list) // num_image_per_view
resize_factor = 1 # resize the png image to 1/2, 1/4, or 1
if resize_factor == 1/2:
sfm_data_dir = os.path.join(arg.data_dir, "sfm_png_half")
mvps_data_dir = os.path.join(arg.data_dir, "mvps_png_half")
elif resize_factor == 1/4:
sfm_data_dir = os.path.join(arg.data_dir, "sfm_png_quarter")
mvps_data_dir = os.path.join(arg.data_dir, "mvps_png_quarter")
elif resize_factor == 1:
mvps_data_dir = os.path.join(arg.data_dir, "mvps_png_full")
sfm_data_dir = os.path.join(arg.data_dir, "sfm_png_full")
os.makedirs(sfm_data_dir, exist_ok=True)
os.makedirs(mvps_data_dir, exist_ok=True)
for view_idx in tqdm(range(num_view)):
view_dir = os.path.join(mvps_data_dir, f"view_{view_idx:02d}.data")
if os.path.exists(view_dir):
continue
os.makedirs(view_dir, exist_ok=True)
view_dng_list = dng_list[view_idx * num_image_per_view: (view_idx + 1) * num_image_per_view]
for dng_idx, dng_path in enumerate(view_dng_list):
with rawpy.imread(dng_path) as raw:
rgb = raw.postprocess(no_auto_bright=True, output_bps=16)[..., ::-1].astype(np.float32)
rgb = rgb.astype(np.uint16)
rgb_resized = cv2.resize(rgb, (0, 0), fx=resize_factor, fy=resize_factor)
# choose the first image in each view for SfM
if dng_idx == 0:
cv2.imwrite(os.path.join(sfm_data_dir, f"{view_idx:02d}.png"), rgb_resized)
cv2.imwrite(os.path.join(view_dir, f"L{dng_idx:02d}.png"), rgb_resized)
================================================
FILE: data_capture_and_preprocessing/metashape2neus.py
================================================
import os.path
import xml
from bs4 import BeautifulSoup # pip install beautifulsoup4 lxml
import numpy as np
# details of camera normalization can be found in Sec. C.3 in https://openaccess.thecvf.com/content/CVPR2023/supplemental/Cao_Multi-View_Azimuth_Stereo_CVPR_2023_supplemental.pdf
def normalize_camera(R_list, t_list, camera2object_ratio=3):
A_camera_normalize = 0
b_camera_normalize = 0
camera_center_list = []
for view_idx in range(len(R_list)):
R = R_list[view_idx]
t = t_list[view_idx]
camera_center = - R.T @ t # in world coordinate
camera_center_list.append(camera_center)
vi = R[2][:, None] # the camera's principal axis in the world coordinates
Vi = vi @ vi.T
A_camera_normalize += np.eye(3) - Vi
b_camera_normalize += camera_center.T @ (np.eye(3) - Vi)
offset = np.linalg.lstsq(A_camera_normalize, np.squeeze(b_camera_normalize), rcond=None)[0]
camera_center_dist_list = [np.sqrt(np.sum((np.squeeze(c) - offset) ** 2))
for c in camera_center_list]
scale = np.max(camera_center_dist_list) / camera2object_ratio
return offset, scale
def make4x4(P):
assert P.shape[-1] == 4 or P.shape[-1] == 3
assert len(P.shape) == 2
assert P.shape[0] == 3 or P.shape[0] == 4
ret = np.eye(4)
ret[:P.shape[0], :P.shape[1]] = P
return ret
class MetashapePoseLoader:
def __init__(self, xml_path, camera2object_ratio):
with open(xml_path, "r") as f:
xml_data = f.read()
bs_data = BeautifulSoup(xml_data, "xml")
c_unique = bs_data.find_all('resolution')
img_width = int(c_unique[0].get("width"))
img_height = int(c_unique[0].get("height"))
c_intrinsics = bs_data.find_all('calibration')
f = float(c_intrinsics[0].find("f").text)
cx_offset = float(c_intrinsics[0].find("cx").text)
cy_offset = float(c_intrinsics[0].find("cy").text)
K = np.array([[f, 0, (img_width-1)/2 + cx_offset],
[0, f, (img_height-1)/2 + cy_offset],
[0, 0, 1]])
b_unique = bs_data.find_all('camera')
R_list = []
t_list = []
C2W_list = []
camera_sphere = dict()
for tag in b_unique:
img_name = tag.get("label")
view_idx = int(img_name.split("_")[-1])
# camera to world transform
C2W = np.array([float(i) for i in tag.find("transform").text.split(" ")]).reshape((4, 4))
C2W_list.append(C2W)
assert int(img_name) == view_idx
W2C = np.linalg.inv(C2W)
R_list.append(W2C[:3, :3])
t_list.append(W2C[:3, 3])
camera_sphere[f"world_mat_{view_idx}"] = make4x4(K) @ W2C
offset, scale = normalize_camera(R_list, t_list, camera2object_ratio=camera2object_ratio)
print("offset", offset, "scale", scale)
num_views = len(C2W_list)
scale_mat = np.eye(4)
scale_mat[:3, :3] *= scale
scale_mat[:3, 3] = offset
for im_idx in range(num_views):
camera_sphere[f"scale_mat_{im_idx}"] = scale_mat
data_dir = os.path.dirname(xml_path)
np.savez(os.path.join(data_dir, 'cameras_sphere.npz'), **camera_sphere)
if __name__=="__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--xml_path", type=str, required=True)
parser.add_argument("--ratio", type=float, default=10)
args = parser.parse_args()
MetashapePoseLoader(args.xml_path, camera2object_ratio=args.ratio)
================================================
FILE: data_capture_and_preprocessing/metashape2neus2_json_and_images.py
================================================
from glob import glob
import os
import numpy as np
import cv2
from bs4 import BeautifulSoup
from metashape2neus import normalize_camera, make4x4
import json
import argparse
def create_json_file(data, filename):
with open(filename, 'w') as f:
json.dump(data, f, indent=4)
parser = argparse.ArgumentParser()
parser.add_argument('--data_dir', type=str, default="./flower_girl")
arg = parser.parse_args()
data_dir = os.path.join(arg.data_dir, "sfm_png_full")
mask_dir = os.path.join(arg.data_dir, "mask")
xml_path = os.path.join(arg.data_dir, "cameras.xml")
obj_name = os.path.basename(arg.data_dir)
target_dir = os.path.join(arg.data_dir, "neus2_input", "images")
os.makedirs(target_dir, exist_ok=True)
# load images and masks and save them as rgba images
img_list = glob(os.path.join(data_dir, "*.png"))
img_list.sort()
num_view = len(img_list)
print(num_view)
img_h, img_w = cv2.imread(img_list[0]).shape[:2]
for i in range(num_view):
img_path = img_list[i]
mask_path = os.path.join(mask_dir, f"{i:02d}.png")
img = cv2.imread(img_path)
mask = cv2.imread(mask_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2BGRA)
img[..., 3] = mask[..., 0]
new_img_path = os.path.join(target_dir, f"{i:02d}.png")
cv2.imwrite(new_img_path, img)
print(f"Saved {new_img_path}")
data = {
"from_na": True,
"w": img_w,
"h": img_h,
"aabb_scale": 1.0,
"frames": [],
"scale": 1,
"offset": [1, 1, 1],
}
with open(xml_path, "r") as f:
xml_data = f.read()
bs_data = BeautifulSoup(xml_data, "xml")
c_unique = bs_data.find_all('resolution')
img_width = int(c_unique[0].get("width"))
img_height = int(c_unique[0].get("height"))
c_intrinsics = bs_data.find_all('calibration')
f = float(c_intrinsics[0].find("f").text)
cx_offset = float(c_intrinsics[0].find("cx").text)
cy_offset = float(c_intrinsics[0].find("cy").text)
K = np.array([[f, 0, (img_width - 1) / 2 + cx_offset],
[0, f, (img_height - 1) / 2 + cy_offset],
[0, 0, 1]])
b_unique = bs_data.find_all('camera')
R_list = []
t_list = []
C2W_list = []
camera_sphere = dict()
for tag in b_unique:
img_name = tag.get("label")
view_idx = int(img_name.split("_")[-1])
# camera to world transform
C2W = np.array([float(i) for i in tag.find("transform").text.split(" ")]).reshape((4, 4))
C2W_list.append(C2W)
print(img_name, view_idx)
W2C = np.linalg.inv(C2W)
R_list.append(W2C[:3, :3])
t_list.append(W2C[:3, 3])
camera_sphere[f"world_mat_{view_idx}"] = make4x4(K) @ W2C
print(img_name)
data["frames"].append({
"file_path": f"images/{img_name}.png",
"transform_matrix": C2W.tolist(),
"intrinsic_matrix": make4x4(K).tolist()
})
offset, scale = normalize_camera(R_list, t_list, camera2object_ratio=10)
data["scale"] = scale
data["offset"] = list((-offset*scale + 0.5))
create_json_file(data, os.path.join(arg.data_dir, "neus2_input", 'transform.json'))
================================================
FILE: data_capture_and_preprocessing/sam_mvps.py
================================================
import os.path
from glob import glob
import argparse
import torch.cuda
from segment_anything import SamPredictor, sam_model_registry
parser = argparse.ArgumentParser()
parser.add_argument("--checkpoint", type=str, default=None)
parser.add_argument("--data_dir", type=str, default="./")
args = parser.parse_args()
sam = sam_model_registry["vit_h"](checkpoint=args.checkpoint)
sam.to(device="cuda")
predictor = SamPredictor(sam)
import cv2
import numpy as np
import matplotlib.pyplot as plt
import time
from IPython.display import display, clear_output
obj_dir = os.listdir(args.data_dir)
obj_dir = [os.path.join(args.data_dir, obj) for obj in obj_dir if ".data" in obj]
mask_dir = os.path.join(os.path.dirname(os.path.dirname(args.data_dir)), "mask")
os.makedirs(mask_dir, exist_ok=True)
def pick_point(event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDOWN:
print(f'You selected point ({x}, {y})')
points.append(np.array([[x, y]]))
def show_mask(mask, ax, random_color=False):
if random_color:
color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
else:
color = np.array([30 / 255, 144 / 255, 255 / 255, 0.6])
h, w = mask.shape[-2:]
mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
ax.imshow(mask_image)
def show_points(coords, labels, ax, marker_size=375):
pos_points = coords[labels == 1]
neg_points = coords[labels == 0]
ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white',
linewidth=1.25)
ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white',
linewidth=1.25)
def show_box(box, ax):
x0, y0 = box[0], box[1]
w, h = box[2] - box[0], box[3] - box[1]
ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0, 0, 0, 0), lw=2))
for obj_dir_path in obj_dir:
mask_path = os.path.join(obj_dir_path, "mask.png")
if os.path.exists(mask_path):
continue
# randomly pick an image from the object directory
img_list = glob(os.path.join(obj_dir_path, "*.png")) + glob(os.path.join(obj_dir_path, "*.jpg"))
img_test_path = img_list[0]
img_test = cv2.imread(img_test_path)
predictor.set_image(img_test)
torch.cuda.synchronize()
points = []
while True:
# Create a window
cv2.namedWindow('image', cv2.WINDOW_NORMAL)
# Bind the callback function to the window
cv2.setMouseCallback('image', pick_point)
while(1):
cv2.imshow('image', img_test)
if cv2.waitKey(20) & 0xFF == 27: # Break the loop when 'ESC' is pressed
break
cv2.destroyAllWindows()
print(f'Selected points: {points}')
input_point = np.concatenate(points, axis=0).reshape(-1, 2)
input_label = np.ones(input_point.shape[0], dtype=np.int64)
print(f'Input point: {input_point}')
masks, scores, logits = predictor.predict(
point_coords=input_point,
point_labels=input_label,
multimask_output=False,
)
for i, (mask, score) in enumerate(zip(masks, scores)):
plt.figure(figsize=(10,10))
plt.imshow(img_test[:, :, ::-1])
show_mask(mask, plt.gca())
show_points(input_point, input_label, plt.gca())
plt.title(f"Mask {i+1}, Score: {score:.3f}", fontsize=18)
plt.axis('off')
plt.show(block=False)
plt.pause(3)
plt.close()
value = input("Press enter to save the mask, or c to continue selecting points: ")
if value == "c":
continue
elif value == "":
break
# save the mask
base_dir = os.path.dirname(img_test_path)
view_idx = int(base_dir.split("/")[-1].split(".")[0].split("_")[-1])
mask_path1 = os.path.join(base_dir, "mask.png")
mask_path2 = os.path.join(mask_dir, f"{view_idx:02d}.png")
cv2.imwrite(mask_path1, mask.astype(np.uint8) * 255)
cv2.imwrite(mask_path2, mask.astype(np.uint8) * 255)
print(f"Mask saved at {mask_path1} and {mask_path2}")
================================================
FILE: download_data.sh
================================================
pip install gdown==5.1.0
gdown 'https://drive.google.com/file/d/1Y3-v5jo-IRyTsPh8srZxIc2v5WZdPly_/view?usp=sharing' --fuzzy
unzip data.zip
rm data.zip
================================================
FILE: exp_runner.py
================================================
import os
import logging
import argparse
import numpy as np
import cv2 as cv
import trimesh
import torch
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter
from shutil import copyfile
from tqdm.auto import tqdm
from pyhocon import ConfigFactory
from models.fields import SDFNetwork, SingleVarianceNetwork
import pyexr
import time
from utilities.utils import crop_image_by_mask, toRGBA
import open3d as o3d
import pyvista as pv
pv.set_plot_theme("document")
pv.global_theme.transparent_background = True
from models.cd_and_fscore import chamfer_distance_and_f1_score
import csv
from collections import OrderedDict
def get_class(kls):
parts = kls.split('.')
module = ".".join(parts[:-1])
m = __import__(module)
for comp in parts[1:]:
m = getattr(m, comp)
return m
class Runner:
def __init__(self, conf_text, mode='train', is_continue=False, datadir=None):
self.device = torch.device('cuda')
self.conf_text = conf_text
if not is_continue:
exp_time = str(time.strftime('%Y_%m_%d_%H_%M_%S', time.localtime(time.time())))
exp_time_dir = f"exp_{exp_time}"
self.conf = ConfigFactory.parse_string(conf_text)
self.base_exp_dir = os.path.join(self.conf['general.base_exp_dir'], exp_time_dir)
os.makedirs(self.base_exp_dir, exist_ok=True)
self.dataset = get_class(self.conf['general.dataset_class'])(self.conf['dataset'])
self.iter_step = 0
# Training parameters
self.end_iter = self.conf.get_int('train.end_iter')
self.batch_size = self.conf.get_int('train.batch_size')
self.patch_size = self.conf.get_int('train.patch_size', default=3)
self.learning_rate = self.conf.get_float('train.learning_rate')
self.learning_rate_alpha = self.conf.get_float('train.learning_rate_alpha')
self.use_white_bkgd = self.conf.get_bool('train.use_white_bkgd')
self.warm_up_end = self.conf.get_float('train.warm_up_end', default=0.0)
self.loss_type = self.conf.get('train.loss_type', 'l1')
self.normal_weight = self.conf.get_float('train.normal_weight')
self.eikonal_weight = self.conf.get_float('train.eikonal_weight')
self.mask_weight = self.conf.get_float('train.mask_weight')
self.increase_bindwidth_every = self.conf.get_int('train.increase_bindwidth_every', default=350)
# validation parameters
self.val_normal_freq = self.conf.get_int('val.val_normal_freq')
self.val_normal_resolution_level = self.conf.get_int('val.val_normal_resolution_level')
self.val_gradient_method = self.conf.get('val.gradient_method', 'dfd')
self.val_mesh_freq = self.conf.get_int('val.val_mesh_freq')
self.val_mesh_res = self.conf.get_int('val.val_mesh_res')
self.eval_metric_freq = self.conf.get_int('val.eval_metric_freq')
self.report_freq = self.conf.get_int('val.report_freq')
self.save_freq = self.conf.get_int('val.save_freq')
# Ray marching parameters
self.start_step_size = self.conf.get_float('model.ray_marching.start_step_size', default=1e-2)
self.end_step_size = self.conf.get_float('model.ray_marching.end_step_size', default=5e-4)
self.slop_step = (np.log10(self.start_step_size) - np.log10(self.end_step_size)) / self.end_iter
# Networks
params_to_train = []
self.sdf_network = SDFNetwork(**self.conf['model.sdf_network'], encoding_config=self.conf['model.encoding']).to(self.device)
self.deviation_network = SingleVarianceNetwork(**self.conf['model.variance_network']).to(self.device)
params_to_train += list(self.sdf_network.parameters())
params_to_train += list(self.deviation_network.parameters())
self.renderer = get_class(self.conf['general.renderer_class'])(self.sdf_network,
self.deviation_network,
self.conf["train"]["gradient_method"])
self.optimizer = torch.optim.Adam(params_to_train, lr=self.learning_rate)
self.is_continue = is_continue
self.mode = mode
# Load checkpoint
latest_model_name = None
if is_continue:
model_list_raw = os.listdir(os.path.join(self.base_exp_dir, 'checkpoints'))
model_list = []
for model_name in model_list_raw:
if model_name[-3:] == 'pth' and int(model_name[5:-4]) <= self.end_iter:
model_list.append(model_name)
model_list.sort()
latest_model_name = model_list[-1]
if latest_model_name is not None:
logging.info('Find checkpoint: {}'.format(latest_model_name))
self.load_checkpoint(latest_model_name)
# Backup codes and configs for debug
if self.mode[:5] == 'train':
self.file_backup()
def train(self):
print("Start training...")
self.writer = SummaryWriter(log_dir=os.path.join(self.base_exp_dir, 'logs'))
self.writer.add_graph(self.sdf_network, verbose=False, input_to_model=torch.randn(1, 3))
self.update_learning_rate()
# create a csv file to save the evaluation metrics
csv_file_name = f"eval_metrics.csv"
csv_file_path = os.path.join(self.base_exp_dir, csv_file_name)
if not os.path.exists(csv_file_path):
with open(csv_file_path, 'w') as f:
writer = csv.writer(f)
if len(self.dataset.exclude_view_list)>0:
writer.writerow(['iter',
'mae_all_view',
'mae_test_view',
'CD',
'fscore'])
else:
writer.writerow(['iter',
'mae_all_view',
'CD',
'fscore'])
res_step = self.end_iter - self.iter_step
pbar = tqdm(range(res_step))
for iter_i in pbar:
# update ray marching step size
self.renderer.sampling_step_size = 10 ** (np.log10(self.start_step_size) - self.slop_step*iter_i)
# update occupancy grid
self.renderer.occupancy_grid.every_n_step(step=iter_i,
occ_eval_fn=self.renderer.occ_eval_fn,
occ_thre=self.conf["model.ray_marching"]["occ_threshold"],
n=self.conf["model.ray_marching"]["occ_update_freq"])
# following neuralangelo, gradually increase ingp bandwidth
if self.iter_step % self.increase_bindwidth_every == 0:
self.renderer.sdf_network.increase_bandwidth()
# sample patches of pixels for training
rays_o_patch_all, rays_d_patch_all, marching_plane_normal, V_inverse_patch_all, true_normal, mask = \
self.dataset.gen_random_patches(self.batch_size, patch_H=self.patch_size, patch_W=self.patch_size)
rays_o_patch_center = rays_o_patch_all[:, self.patch_size // 2, self.patch_size // 2] # (num_patch, 3)
rays_d_patch_center = rays_d_patch_all[:, self.patch_size // 2, self.patch_size// 2] # (num_patch, 3)
near, far = self.dataset.near_far_from_sphere(rays_o_patch_center, rays_d_patch_center)
if self.mask_weight > 0.0:
mask = (mask > 0.5).float()
else:
mask = torch.ones_like(mask)
mask_sum = mask.sum() + 1e-5
# forward rendering
render_out = self.renderer.render(rays_o_patch_all,
rays_d_patch_all,
marching_plane_normal,
near, far, V_inverse_patch_all)
if render_out['gradients'] is None: # all rays are in the zero region of the occupancy grid
self.update_learning_rate()
continue
comp_normal = render_out['comp_normal'] # rendered normal at pixels
gradients = render_out['gradients'] # gradients at all sampled 3D points
comp_mask = render_out['weight_sum'] # rendered occupancy at pixels
samples_per_ray = render_out['samples_per_ray']
normal_error = (comp_normal - true_normal) * mask
if self.loss_type == 'l1':
normal_loss = F.l1_loss(normal_error, torch.zeros_like(normal_error), reduction='sum') / mask_sum
elif self.loss_type == 'l2':
normal_loss = F.mse_loss(normal_error, torch.zeros_like(normal_error), reduction='sum') / mask_sum
gradients_norm = torch.linalg.norm(gradients, ord=2, dim=-1)
eikonal_loss = F.mse_loss(gradients_norm, torch.ones_like(gradients_norm), reduction='mean')
mask_loss = F.binary_cross_entropy(comp_mask.clip(1e-5, 1.0 - 1e-5), mask)
loss = self.normal_weight * normal_loss + \
self.mask_weight * mask_loss + \
self.eikonal_weight * eikonal_loss
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
self.iter_step += 1
self.update_learning_rate()
if self.iter_step % self.report_freq == 0:
message_postfix = OrderedDict(loss=f"{loss:.3e}",
s=f"{self.deviation_network.variance.item():.3e}",
rm_step=f"{self.renderer.sampling_step_size.item():.3e}",
samples_per_ray=f"{samples_per_ray:.1f}")
pbar.set_postfix(ordered_dict=message_postfix)
if self.iter_step % self.save_freq == 0:
self.save_checkpoint()
if self.iter_step % self.val_mesh_freq == 0:
self.validate_mesh(resolution=self.val_mesh_res)
if self.iter_step % self.val_normal_freq == 0:
for val_idx in range(self.dataset.n_images):
self.validate_normal_patch_based(idx=val_idx, resolution_level=self.val_normal_resolution_level,
gradient_method=self.val_gradient_method)
if self.iter_step % self.eval_metric_freq == 0:
# no gt mesh, skip the evaluation
if self.dataset.mesh_gt is None:
continue
# remove invisible faces in the gt mesh
if self.dataset.mesh_gt is not None and self.dataset.points_gt is None:
self.dataset.mesh_gt.vertices = o3d.utility.Vector3dVector(
(np.asarray(self.dataset.mesh_gt.vertices) -
self.dataset.scale_mats_np[0][:3, 3][None]) /
self.dataset.scale_mats_np[0][0, 0])
mesh = trimesh.Trimesh(np.asarray(self.dataset.mesh_gt.vertices),
np.asarray(self.dataset.mesh_gt.triangles), process=False)
self.dataset.points_gt = self.find_visible_points(mesh) * self.dataset.scale_mats_np[0][0, 0] + \
self.dataset.scale_mats_np[0][:3, 3][None]
cd, fscore = self.eval_geo(resolution=512)
print(f'iter: {self.iter_step} cd: {cd:.3e}, fscore: {fscore:.3e}')
if len(self.dataset.exclude_view_list)>0:
mae_allview, mae_test_view = self.eval_mae(gradient_method=self.val_gradient_method)
print('MAE (all views) {0}: {1:.5f}'.format(self.val_gradient_method, mae_allview))
print('MAE (test views) {0}: {1:.5f}'.format(self.val_gradient_method, mae_test_view))
with open(csv_file_path, 'a') as f:
writer = csv.writer(f)
writer.writerow([self.iter_step,
mae_allview,
mae_test_view,
cd, fscore])
else:
mae_allview = self.eval_mae(gradient_method="dfd")
# write to csv file
with open(csv_file_path, 'a') as f:
writer = csv.writer(f)
writer.writerow([self.iter_step,
mae_allview,
cd, fscore])
def update_learning_rate(self):
if self.iter_step < self.warm_up_end:
learning_factor = self.iter_step / self.warm_up_end
else:
alpha = self.learning_rate_alpha
progress = (self.iter_step - self.warm_up_end) / (self.end_iter - self.warm_up_end)
learning_factor = (np.cos(np.pi * progress) + 1.0) * 0.5 * (1 - alpha) + alpha
for g in self.optimizer.param_groups:
g['lr'] = self.learning_rate * learning_factor
def file_backup(self):
dir_lis = self.conf['general.recording']
os.makedirs(os.path.join(self.base_exp_dir, 'recording'), exist_ok=True)
for dir_name in dir_lis:
cur_dir = os.path.join(self.base_exp_dir, 'recording', dir_name)
os.makedirs(cur_dir, exist_ok=True)
files = os.listdir(dir_name)
for f_name in files:
if f_name[-3:] == '.py':
copyfile(os.path.join(dir_name, f_name), os.path.join(cur_dir, f_name))
try:
copyfile(self.conf_path, os.path.join(self.base_exp_dir, 'recording', 'config.conf'))
except:
# save conf_text into a txt file
with open(os.path.join(self.base_exp_dir, 'recording', 'config.conf'), 'w') as f:
f.write(self.conf_text)
def load_checkpoint(self, checkpoint_name):
checkpoint = torch.load(os.path.join(self.base_exp_dir, 'checkpoints', checkpoint_name), map_location=self.device)
self.sdf_network.load_state_dict(checkpoint['sdf_network_fine'])
self.deviation_network.load_state_dict(checkpoint['variance_network_fine'])
self.optimizer.load_state_dict(checkpoint['optimizer'])
self.iter_step = checkpoint['iter_step']
logging.info('End')
def save_checkpoint(self):
checkpoint = {
'sdf_network_fine': self.sdf_network.state_dict(),
'variance_network_fine': self.deviation_network.state_dict(),
'optimizer': self.optimizer.state_dict(),
'iter_step': self.iter_step,
}
os.makedirs(os.path.join(self.base_exp_dir, 'checkpoints'), exist_ok=True)
torch.save(checkpoint, os.path.join(self.base_exp_dir, 'checkpoints', 'ckpt_{:0>6d}.pth'.format(self.iter_step)))
def validate_normal_pixel_based(self, idx=-1, resolution_level=-1):
if idx < 0:
idx = np.random.randint(self.dataset.n_images)
print('Validate: iter: {}, camera: {}'.format(self.iter_step, idx))
if resolution_level < 0:
resolution_level = self.validate_resolution_level
rays_o, rays_d = self.dataset.gen_rays_at(idx, resolution_level=resolution_level, within_mask=False)
H, W, _ = rays_o.shape
rays_o = rays_o.reshape(-1, 3).split(8192)
rays_d = rays_d.reshape(-1, 3).split(8192)
out_normal_fine = []
out_depth_fine = []
mask_np = self.dataset.masks_np[idx].astype(bool)[..., 0]
mask_np = cv.resize(mask_np.astype(np.uint8),
((int(W), int(H))),
interpolation=cv.INTER_NEAREST).astype(bool)
for rays_o_batch, rays_d_batch in tqdm(zip(rays_o, rays_d)):
near, far = self.dataset.near_far_from_sphere(rays_o_batch, rays_d_batch)
# background_rgb = torch.ones([1, 3]) if self.use_white_bkgd else None
batch_normal, batch_depth = self.renderer.render_normal_pixel_based(rays_o_batch,
rays_d_batch,
near,
far)
out_normal_fine.append(batch_normal.detach().cpu().numpy())
out_depth_fine.append(batch_depth.detach().cpu().numpy())
if len(out_normal_fine) > 0:
normal_img = np.concatenate(out_normal_fine, axis=0)
rot = np.linalg.inv(self.dataset.pose_all[idx, :3, :3].detach().cpu().numpy()) # W2C rotation
# normal_img_world = (normal_img.reshape([H, W, 3]) * 128 + 128).clip(0, 255)
normal_img = np.matmul(rot[None, :, :], normal_img[:, :, None]).reshape([H, W, 3, -1])
normal_img[:,:, [1, 2]] *= -1
normal_img_norm = np.linalg.norm(np.squeeze(normal_img), axis=2, keepdims=True)
normal_img_normalized = np.squeeze(normal_img) / (normal_img_norm+1e-7)
# normal_img = ((np.squeeze(normal_img)/normal_img_norm) * 128 + 128).clip(0, 255)
normal_img = (np.squeeze(normal_img) * 128 + 128).clip(0, 255)
normal_img_normalized = (np.squeeze(normal_img_normalized) * 128 + 128).clip(0, 255)
depth_img = np.concatenate(out_depth_fine, axis=0).reshape([H, W])
os.makedirs(os.path.join(self.base_exp_dir, 'normals'), exist_ok=True)
os.makedirs(os.path.join(self.base_exp_dir, "depth"), exist_ok=True)
normal_img_norm[~mask_np] = np.nan
depth_img[~mask_np] = np.nan
normal_img_norm = np.squeeze(normal_img_norm.clip(0.8, 1.2))
normal_img_norm = (normal_img_norm - np.nanmin(normal_img_norm)) / (np.nanmax(normal_img_norm) - np.nanmin(normal_img_norm))
normal_img_norm = np.nan_to_num(normal_img_norm)
normal_img_norm = (normal_img_norm * 255).astype(np.uint8)
normal_img_norm = cv.applyColorMap(normal_img_norm, cv.COLORMAP_JET)
normal_img_norm[~mask_np] = 0
cv.imwrite(os.path.join(self.base_exp_dir,
'normals',
'{:0>8d}_{}_{}_norm.png'.format(self.iter_step, 0, idx)),
normal_img_norm[..., ::-1])
cv.imwrite(os.path.join(self.base_exp_dir,
'normals',
'{:0>8d}_{}_{}.png'.format(self.iter_step, 0, idx)),
normal_img[..., ::-1])
cv.imwrite(os.path.join(self.base_exp_dir,
'normals',
'{:0>8d}_{}_{}_normalized.png'.format(self.iter_step, 0, idx)),
normal_img_normalized[..., ::-1])
np.save(os.path.join(self.base_exp_dir,
'depth',
'{:0>8d}_{}_{}.npy'.format(self.iter_step, 0, idx)),
depth_img)
return idx, (normal_img - 128) / 128.
def validate_normal_patch_based(self, idx=-1, resolution_level=-1, gradient_method="dfd"):
if idx < 0:
idx = np.random.randint(self.dataset.n_images)
print('Rendering normal maps... iter: {}, camera: {}'.format(self.iter_step, idx))
if resolution_level < 0:
resolution_level = self.validate_resolution_level
rays_o_patch_center, \
rays_d_patch_center, \
rays_o_patches_all, \
rays_v_patches_all, \
rays_ez, \
rays_A_inverse, horizontal_num_patch, vertical_num_patch = self.dataset.gen_patches_at(idx, resolution_level=resolution_level,
patch_H=self.patch_size,
patch_W=self.patch_size)
mask_np = self.dataset.masks_np[idx].astype(bool) # (H, W)
img_w = horizontal_num_patch * self.patch_size
img_h = vertical_num_patch * self.patch_size
# resize mask to the size of the image
mask_np = cv.resize(mask_np.astype(np.uint8),
((int(img_w), int(img_h))),
interpolation=cv.INTER_NEAREST).astype(bool)
num_patches = rays_o_patches_all.shape[0]
eval_patch_size = 1024
comp_normal_map = np.zeros([img_h, img_w, 3])
comp_normal_list = []
for patch_idx in range(0, num_patches, eval_patch_size):
rays_o_patch_center_batch = rays_o_patch_center[patch_idx:patch_idx+eval_patch_size]
rays_d_patch_center_batch = rays_d_patch_center[patch_idx:patch_idx+eval_patch_size]
rays_o_patches_all_batch = rays_o_patches_all[patch_idx:patch_idx+eval_patch_size]
rays_v_patches_all_batch = rays_v_patches_all[patch_idx:patch_idx+eval_patch_size]
rays_ez_batch = rays_ez[patch_idx:patch_idx+eval_patch_size]
rays_A_inverse_batch = rays_A_inverse[patch_idx:patch_idx+eval_patch_size]
near, far = self.dataset.near_far_from_sphere(rays_o_patch_center_batch,
rays_d_patch_center_batch)
render_out = self.renderer.render(rays_o_patches_all_batch,
rays_v_patches_all_batch,
rays_ez_batch,
near, far,
rays_A_inverse_batch, gradient_method, mode='eval')
comp_normal = render_out['comp_normal']
comp_normal = comp_normal.detach().cpu().numpy()
comp_normal_list.append(comp_normal)
comp_normal_list = np.concatenate(comp_normal_list, axis=0)
count = 0
for i in range(0, img_h, self.patch_size):
for j in range(0, img_w, self.patch_size):
comp_normal_map[i:i+self.patch_size, j:j+self.patch_size] = comp_normal_list[count]
count += 1
normal_img_world = comp_normal_map
rot = np.linalg.inv(self.dataset.pose_all[idx, :3, :3].detach().cpu().numpy()) # W2C rotation
normal_img = np.matmul(rot, normal_img_world[..., None]).squeeze()
normal_img[..., [1, 2]] *= -1
normal_img_png = (np.squeeze(normal_img) * 128 + 128).clip(0, 255)
normal_img_norm = np.linalg.norm(np.squeeze(normal_img), axis=2, keepdims=True)
normal_dir = os.path.join(self.base_exp_dir, f'normals_validation_{gradient_method}', 'iter_{:0>6d}'.format(self.iter_step))
os.makedirs(normal_dir, exist_ok=True)
normal_img_normalized = np.squeeze(normal_img) / (normal_img_norm + 1e-7)
normal_img_normalized = (np.squeeze(normal_img_normalized) * 128 + 128).clip(0, 255)
normal_eval = np.zeros((img_h, img_w, 3))
normal_eval[:normal_img_png.shape[0], :normal_img_png.shape[1]] = normal_img_png
normal_eval_normalized = np.zeros((img_h, img_w, 3))
normal_eval_normalized[:normal_img_normalized.shape[0], :normal_img_normalized.shape[1]] = normal_img_normalized
normal_img_normalized = crop_image_by_mask(toRGBA(normal_eval_normalized.astype(np.uint8)[...,::-1], mask_np), mask_np)
cv.imwrite(os.path.join(normal_dir, '{:0>8d}_{}_{}_rendered.png'.format(self.iter_step, 0, idx)),
normal_eval[..., ::-1])
cv.imwrite(os.path.join(normal_dir, '{:0>8d}_{}_{}_normalized.png'.format(self.iter_step, 0, idx)),
normal_img_normalized)
return normal_img_world, normal_dir
def validate_mesh(self, world_space=True, resolution=256, threshold=0.0):
print('Extracting mesh... iter: {}'.format(self.iter_step))
bound_min = torch.tensor(self.dataset.object_bbox_min, dtype=torch.float32)
bound_max = torch.tensor(self.dataset.object_bbox_max, dtype=torch.float32)
vertices, triangles =\
self.renderer.extract_geometry(bound_min, bound_max, resolution=resolution, threshold=threshold)
mesh = trimesh.Trimesh(vertices, triangles)
vertices, triangles = mesh.vertices, mesh.faces
save_dir = os.path.join(self.base_exp_dir, 'meshes_validation')
os.makedirs(save_dir, exist_ok=True)
if world_space:
vertices = vertices * self.dataset.scale_mats_np[0][0, 0] + self.dataset.scale_mats_np[0][:3, 3][None]
self.writer.add_mesh('mesh_eval', vertices=vertices[None,...], faces=triangles[None,...], global_step=self.iter_step)
mesh = self.remove_isolated_clusters(trimesh.Trimesh(vertices, triangles))
mesh_path = os.path.join(save_dir, 'iter_{:0>8d}.ply'.format(self.iter_step))
o3d.io.write_triangle_mesh((mesh_path), mesh)
print(f'Mesh saved at {mesh_path}')
def remove_isolated_clusters(self, mesh):
# cleaning the marching cube extracted mesh
import copy
mesh = mesh.as_open3d
# with o3d.utility.VerbosityContextManager(
# o3d.utility.VerbosityLevel.Debug) as cm:
triangle_clusters, cluster_n_triangles, cluster_area = (
mesh.cluster_connected_triangles())
triangle_clusters = np.asarray(triangle_clusters)
cluster_n_triangles = np.asarray(cluster_n_triangles)
mesh_eval = copy.deepcopy(mesh)
largest_cluster_idx = cluster_n_triangles.argmax()
triangles_to_remove = triangle_clusters != largest_cluster_idx
mesh_eval.remove_triangles_by_mask(triangles_to_remove)
mesh_eval.remove_unreferenced_vertices()
return mesh_eval
@torch.no_grad()
def eval_mae(self, gradient_method):
print("Computing mean angular errors...")
normal_gt_dir = os.path.join(self.dataset.data_dir, "normal_world_space_GT")
ae_map_list = []
normal_map_eval_list = []
ae_map_eval_list = []
ae_map_test_list = []
for idx in range(self.dataset.n_images):
normal_gt = pyexr.read(os.path.join(normal_gt_dir, "{:02d}.exr".format(idx)))[..., :3]
mask_np = self.dataset.masks_np[idx].astype(bool)
normal_map_world, save_dir = self.validate_normal_patch_based(idx, resolution_level=self.val_normal_resolution_level, gradient_method=gradient_method)
normal_map_world = normal_map_world / (1e-10 + np.linalg.norm(normal_map_world, axis=-1, keepdims=True))
normal_eval = np.zeros((self.dataset.H, self.dataset.W, 3))
normal_eval[:normal_map_world.shape[0], :normal_map_world.shape[1]] = normal_map_world
normal_eval[~mask_np] = np.nan
normal_map_eval_list.append(normal_eval)
# self.writer.add_image(step=self.iter_step, data=(normal_eval + 1) / 2, name=("normal_eval_{:02d}".format(idx)))
# pyexr.write(os.path.join(normal_save_dir, "{:02d}.exr".format(idx)), normal_img)
angular_error_map = np.rad2deg(np.arccos(np.clip(np.sum(normal_gt * normal_eval, axis=-1), -1, 1)))
# save angular error map
ae_map_list.append(angular_error_map.copy())
if idx in self.dataset.exclude_view_list:
ae_map_test_list.append(angular_error_map.copy())
# apply jet to angular error map
angular_error_map[~mask_np] = 0
angular_error_map_jet = cv.applyColorMap((angular_error_map / 20 * 255).clip(0, 255).astype(np.uint8),
cv.COLORMAP_JET)
angular_error_map_jet[~mask_np] = 255
angular_error_map_jet = crop_image_by_mask(toRGBA(angular_error_map_jet, mask_np), mask_np)
cv.imwrite(os.path.join(save_dir, '{:0>8d}_{}_{}_ae_up_{}.png'.format(self.iter_step, 0, idx, 20)), angular_error_map_jet)
ae_map_eval_list.append(angular_error_map_jet)
mae = np.nanmean(np.stack(ae_map_list, axis=0))
self.writer.add_scalar('Statistics/mae_allview', mae, self.iter_step)
if len(ae_map_test_list) > 0:
mae_test = np.nanmean(np.stack(ae_map_test_list, axis=0))
self.writer.add_scalar('Statistics/mae_testview', mae_test, self.iter_step)
return mae, mae_test
return mae
@torch.no_grad()
def eval_geo(self, resolution=1024):
# save the mesh
save_dir = os.path.join(self.base_exp_dir, 'points_val')
os.makedirs(save_dir, exist_ok=True)
# save gt points
pcd_gt = o3d.geometry.PointCloud()
pcd_gt.points = o3d.utility.Vector3dVector(self.dataset.points_gt)
if not os.path.exists(os.path.join(save_dir, f"pcd_gt.ply")):
o3d.io.write_point_cloud(os.path.join(save_dir, f"pcd_gt.ply"), pcd_gt)
# marching cubes
bound_min = torch.tensor(self.dataset.object_bbox_min, dtype=torch.float32)
bound_max = torch.tensor(self.dataset.object_bbox_max, dtype=torch.float32)
vertices, triangles = \
self.renderer.extract_geometry(bound_min, bound_max, resolution=resolution, threshold=0)
# vertices = vertices * self.dataset.scale_mats_np[0][0, 0] + self.dataset.scale_mats_np[0][:3, 3][None]
mesh = trimesh.Trimesh(np.asarray(vertices), np.asarray(triangles), process=False)
vertices_world = vertices * self.dataset.scale_mats_np[0][0, 0] + self.dataset.scale_mats_np[0][:3, 3][None]
mesh_world = trimesh.Trimesh(np.asarray(vertices_world), np.asarray(triangles), process=False)
mesh_world_path = os.path.join(save_dir, f"{self.iter_step}_world.obj")
mesh_world.export(mesh_world_path)
points_eval = self.find_visible_points(mesh)*self.dataset.scale_mats_np[0][0, 0] + self.dataset.scale_mats_np[0][:3, 3][None]
# save the sampled points
sampled_points_path = os.path.join(save_dir, f"{self.iter_step}_points_eval.ply")
pcd_eval = o3d.geometry.PointCloud()
pcd_eval.points = o3d.utility.Vector3dVector(points_eval)
o3d.io.write_point_cloud(sampled_points_path, pcd_eval)
cd, fscore = chamfer_distance_and_f1_score(points_eval, self.dataset.points_gt)
self.writer.add_scalar('Statistics/cd', cd, self.iter_step)
self.writer.add_scalar('Statistics/fscore', fscore, self.iter_step)
return cd, fscore
def find_visible_points(self, mesh):
num_view = self.dataset.n_images
points_list = []
for view_idx in range(num_view):
rays_o, rays_v = self.dataset.gen_rays_at(view_idx, resolution_level=1, within_mask=True)
rays_o, rays_v = rays_o.cpu().detach().numpy(), rays_v.cpu().detach().numpy()
rays_v = rays_v / np.linalg.norm(rays_v, axis=-1, keepdims=True)
locations, index_ray, index_tri = mesh.ray.intersects_location(
ray_origins=rays_o,
ray_directions=rays_v,
multiple_hits=False)
points_list.append(locations)
return np.concatenate(points_list, axis=0)
if __name__ == '__main__':
import warnings
warnings.filterwarnings("ignore")
torch.set_default_tensor_type('torch.cuda.FloatTensor')
parser = argparse.ArgumentParser()
parser.add_argument('--conf', type=str, default='./confs/base.conf')
parser.add_argument('--mode', type=str, default='eval_normal')
parser.add_argument('--mcube_threshold', type=float, default=0.0)
parser.add_argument('--is_continue', default=False, action="store_true")
parser.add_argument('--gpu', type=int, default=0)
parser.add_argument('--obj_name', type=str, default='')
args = parser.parse_args()
torch.cuda.set_device(args.gpu)
print(f'Running on the object: {args.obj_name}')
f = open(args.conf)
conf_text = f.read()
conf_text = conf_text.replace('CASE_NAME', args.obj_name)
runner = Runner(conf_text, args.mode, args.is_continue)
runner.train()
================================================
FILE: models/cd_and_fscore.py
================================================
from scipy.spatial import KDTree
import numpy as np
def chamfer_distance_and_f1_score(ref_points, eval_points, f_threshold=0.5):
"""
This function calculates the chamfer distance and f1 score between two sets of points.
Parameters:
ref_points (numpy.ndarray): Reference points. A (p, 3) array representing points in the world space.
eval_points (numpy.ndarray): Points to be evaluated. A (p, 3) array representing points in the world space.
f_threshold (float, optional): Threshold for f1 score calculation. Default is 0.5mm.
Returns:
chamfer_dist (float): The chamfer distance between gt_points and eval_points.
f_score (float): The f1 score between gt_points and eval_points.
"""
print("computing chamfer distance and f1 score...")
distance_eval2gt, _ = KDTree(ref_points).query(eval_points, k=1, p=2) # p=2 for Euclidean distance
distance_gt2eval, _ = KDTree(eval_points).query(ref_points, k=1, p=2)
# following Uncertainty-aware deep multi-view photometric stereo
chamfer_dist = (np.mean(distance_eval2gt) + np.mean(distance_gt2eval))/2
precision = np.mean(distance_eval2gt < f_threshold)
recall = np.mean(distance_gt2eval < f_threshold)
f_score = 2 * precision * recall / (precision + recall)
return chamfer_dist, f_score
================================================
FILE: models/dataset_loader.py
================================================
import torch
import torch.nn.functional as F
import cv2 as cv
import numpy as np
import os
from glob import glob
from icecream import ic
import pyexr
import open3d as o3d
import time
from concurrent.futures import ThreadPoolExecutor
def load_K_Rt_from_P(filename, P=None):
# This function is borrowed from IDR: https://github.com/lioryariv/idr
if P is None:
lines = open(filename).read().splitlines()
if len(lines) == 4:
lines = lines[1:]
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
P = np.asarray(lines).astype(np.float32).squeeze()
K, R, t, *_ = cv.decomposeProjectionMatrix(P)
# CAUTION: R is the W2C rotation matrix but t is the camera position in world coordinate.
K = K / K[2, 2]
intrinsics = np.eye(4)
intrinsics[:3, :3] = K
C2W = np.eye(4, dtype=np.float32)
C2W[:3, :3] = R.T
C2W[:3, 3] = (t[:3] / t[3])[:, 0]
return intrinsics, C2W
class Dataset:
def __init__(self, conf):
super(Dataset, self).__init__()
print('Load data: Begin')
self.device = torch.device('cuda')
self.conf = conf
normal_dir = conf.get_string('normal_dir')
self.data_dir = conf.get_string('data_dir')
self.cameras_name = conf.get_string('cameras_name')
self.exclude_view_list = conf['exclude_views'] # list of views to exclude from training. Used in novel-view normal synthesis evaluation.
self.upsample_factor = conf.get_int('upsample_factor', default=1)
ic(self.exclude_view_list)
# load the GT mesh for evaluation if any
mesh_path = os.path.join(self.data_dir, 'mesh_Gt.ply')
if os.path.exists(mesh_path):
self.mesh_gt = o3d.io.read_triangle_mesh(mesh_path)
else:
self.mesh_gt = None
self.points_gt = None # will be computed from the mesh at evaluation time
camera_dict = np.load(os.path.join(self.data_dir, self.cameras_name))
self.camera_dict = camera_dict
self.normal_lis = sorted(glob(os.path.join(self.data_dir, normal_dir, '*.exr')))
self.n_images = len(self.normal_lis)
self.train_images = set(range(self.n_images)) - set(self.exclude_view_list)
self.img_idx_list = [int(os.path.basename(x).split('.')[0]) for x in self.normal_lis]
print("loading normal maps...")
with ThreadPoolExecutor(max_workers=min(64, os.cpu_count()*5)) as executor:
def read_normal(im_name):
return pyexr.read(im_name)[..., :3]
self.normal_np = np.stack(list(executor.map(read_normal, self.normal_lis)))
if self.upsample_factor > 1:
# resize normal maps
self.normal_np = F.interpolate(torch.from_numpy(self.normal_np).permute(0, 3, 1, 2), scale_factor=self.upsample_factor, mode='bilinear', align_corners=False).permute(0, 2, 3, 1).numpy()
self.normals = torch.from_numpy(self.normal_np.astype(np.float32)).to(self.device) # [n_images, H, W, 3]
print("loading normal maps done.")
self.masks_lis = sorted(glob(os.path.join(self.data_dir, 'mask/*.png')))
with ThreadPoolExecutor(max_workers=min(64, os.cpu_count()*5)) as executor:
def read_mask(im_name):
return cv.imread(im_name)
self.masks_np = np.stack(list(executor.map(read_mask, self.masks_lis))) / 255.0
if self.upsample_factor > 1:
# resize mask
self.masks_np = F.interpolate(torch.from_numpy(self.masks_np).permute(0, 3, 1, 2), scale_factor=self.upsample_factor, mode='nearest').permute(0, 2, 3, 1).numpy()
self.masks_np = self.masks_np[..., 0]
self.total_pixel = np.sum(self.masks_np)
# set background of normal map to 0
self.normal_np[self.masks_np == 0] = 0
# world_mat is a projection matrix from world to image
self.world_mats_np = [camera_dict['world_mat_%d' % idx].astype(np.float32) for idx in self.img_idx_list]
self.scale_mats_np = []
# scale_mat: used for coordinate normalization, we assume the scene to render is inside a unit sphere at origin.
self.scale_mats_np = [camera_dict['scale_mat_%d' % idx].astype(np.float32) for idx in self.img_idx_list]
self.intrinsics_all = []
self.pose_all = []
self.V_inverse_all = []
self.H, self.W = self.normal_np.shape[1], self.normal_np.shape[2]
for scale_mat, world_mat, normal_map, mask in zip(self.scale_mats_np, self.world_mats_np, self.normals, self.masks_np):
P = world_mat @ scale_mat
P = P[:3, :4]
intrinsics, C2W = load_K_Rt_from_P(None, P)
if self.upsample_factor > 1:
# resize intrinsics
intrinsics[0, 0] *= self.upsample_factor
intrinsics[1, 1] *= self.upsample_factor
intrinsics[0, 2] *= self.upsample_factor
intrinsics[1, 2] *= self.upsample_factor
self.intrinsics_all.append(torch.from_numpy(intrinsics).float())
self.pose_all.append(torch.from_numpy(C2W).float())
intrinsics_inverse = torch.inverse(torch.from_numpy(intrinsics).float())
pose = torch.from_numpy(C2W).float()
# compute the V_inverse
tx = torch.linspace(0, self.W - 1, int(self.W))
ty = torch.linspace(0, self.H - 1, int(self.H))
pixels_x, pixels_y = torch.meshgrid(tx, ty)
p = torch.stack([pixels_x, pixels_y, torch.ones_like(pixels_y)], dim=-1).to(intrinsics_inverse.device) # W, H, 3
p = torch.matmul(intrinsics_inverse[None, None, :3, :3],
p[:, :, :, None]).squeeze() # W, H, 3
rays_v = p / torch.linalg.norm(p, ord=2, dim=-1, keepdim=True) # W, H, 3
rays_v = torch.matmul(pose[None, None, :3, :3],
rays_v[:, :, :, None]).squeeze() # W, H, 3
rays_v = rays_v.transpose(0, 1).to(self.device) # H, W, 3
# the axis direction of the camera coordinate system in the world coordinate system
rays_right = pose[None, :3, 0].expand(rays_v.shape).to(self.device) # H, W, 3
rays_down = pose[None, :3, 1].expand(rays_v.shape).to(self.device) # H, W, 3
V_concat = torch.cat([rays_v[..., None, :],
rays_right[..., None, :],
rays_down[..., None, :]], dim=-2) # (H, W, 3, 3)
# computing the inverse may take a while if the resolution is high
# For 512x612, it takes about 0.8ms
V_inverse = torch.inverse(V_concat) # (H, W, 3, 3)
self.V_inverse_all.append(V_inverse)
self.masks = torch.from_numpy(self.masks_np.astype(np.float32)).to(self.device) # [n_images, H, W, 3]
self.intrinsics_all = torch.stack(self.intrinsics_all).to(self.device) # [n_images, 4, 4]
self.intrinsics_all_inv = torch.inverse(self.intrinsics_all) # [n_images, 4, 4]
self.focal_length = self.intrinsics_all[0][0, 0]
self.pose_all = torch.stack(self.pose_all).to(self.device) # [n_images, 4, 4]
self.image_pixels = self.H * self.W
self.V_inverse_all = torch.stack(self.V_inverse_all).to(self.device) # [n_images, H, W, 3, 3]
# for mesh extraction
self.object_bbox_min = np.array([-1., -1., -1.])
self.object_bbox_max = np.array([1., 1., 1.])
print('Load data: End')
def gen_rays_at(self, img_idx, resolution_level=1, within_mask=False):
"""
Generate all rays at world space from one camera.
"""
mask_np = self.masks_np[img_idx].astype(bool)
# resize the mask using resolution_level
mask_np = cv.resize(mask_np.astype(np.uint8)*255, (int(self.W // resolution_level), int(self.H // resolution_level)), interpolation=cv.INTER_NEAREST).astype(bool)
l = resolution_level
tx = torch.linspace(0, self.W - 1, int(self.W // l))
ty = torch.linspace(0, self.H - 1, int(self.H // l))
pixels_x, pixels_y = torch.meshgrid(tx, ty)
p = torch.stack([pixels_x, pixels_y, torch.ones_like(pixels_y)], dim=-1) # W, H, 3
p = torch.matmul(self.intrinsics_all_inv[img_idx, None, None, :3, :3], p[:, :, :, None]).squeeze() # W, H, 3
rays_v = p / torch.linalg.norm(p, ord=2, dim=-1, keepdim=True) # W, H, 3
rays_v = torch.matmul(self.pose_all[img_idx, None, None, :3, :3], rays_v[:, :, :, None]).squeeze() # W, H, 3
rays_o = self.pose_all[img_idx, None, None, :3, 3].expand(rays_v.shape) # W, H, 3
rays_o = rays_o.transpose(0, 1)
rays_v = rays_v.transpose(0, 1)
if within_mask:
return rays_o[mask_np], rays_v[mask_np]
else:
return rays_o, rays_v
def gen_patches_at(self, img_idx, resolution_level=1, patch_H=3, patch_W=3):
tx = torch.linspace(0, self.W - 1, int(self.W // resolution_level))
ty = torch.linspace(0, self.H - 1, int(self.H // resolution_level))
pixels_y, pixels_x = torch.meshgrid(ty, tx)
p = torch.stack([pixels_x, pixels_y, torch.ones_like(pixels_y)], dim=-1) # H, W, 3
p = torch.matmul(self.intrinsics_all_inv[img_idx, :3, :3], p[..., None]).squeeze() # H, W, 3
rays_v = p / torch.linalg.norm(p, ord=2, dim=-1, keepdim=True) # W, H, 3
rays_v = torch.matmul(self.pose_all[img_idx, :3, :3], rays_v[:, :, :, None]).squeeze() # H, W, 3
# split rays_v into non-overlapping patches
height, width, _ = rays_v.shape
horizontal_num_patch = width // patch_W
vertical_num_patch = height // patch_H
rays_v_patches_all = []
rays_V_inverse_patches_all = []
rays_ez_patches_all = []
mask_value = []
for i in range(0, height-patch_H//2-1, patch_H):
for j in range(0, width-patch_W//2-1, patch_W):
rays_v_patch = rays_v[i:i + patch_H, j:j + patch_W]
rays_v_patches_all.append(rays_v_patch)
rays_V_inverse_patch = self.V_inverse_all[img_idx][i:i + patch_H, j:j + patch_W]
rays_V_inverse_patches_all.append(rays_V_inverse_patch)
rays_ez_patch = self.normals[img_idx][i + patch_H//2, j + patch_W//2]
rays_ez_patches_all.append(rays_ez_patch)
mask_value.append(self.masks_np[img_idx][i + patch_H//2, j + patch_W//2].astype(bool))
rays_v_patches_all = torch.stack(rays_v_patches_all, dim=0) # (num_patch, patch_H, patch_W, 3)
rays_V_inverse_patches_all = torch.stack(rays_V_inverse_patches_all, dim=0) # (num_patch, patch_H, patch_W, 3, 3)
rays_o_patches_all = self.pose_all[img_idx, :3, 3].expand(rays_v_patches_all.shape) # (num_patch, patch_H, patch_W, 3)
rays_o_patch_center = rays_o_patches_all[:, patch_H//2, patch_W//2] # (num_patch, 3)
rays_d_patch_center = rays_v_patches_all[:, patch_H//2, patch_W//2] # (num_patch, 3)
marching_plane_normal_patches_all = self.pose_all[img_idx, :3, 2].expand(rays_d_patch_center.shape) # (num_patch, 3)
return rays_o_patch_center, \
rays_d_patch_center, \
rays_o_patches_all, \
rays_v_patches_all, \
marching_plane_normal_patches_all, \
rays_V_inverse_patches_all, horizontal_num_patch, vertical_num_patch
def gen_random_patches(self, num_patch, patch_H=3, patch_W=3):
"""
Generate random patches of rays at world space from all viewpoints.
X-axis right, Y-axis down
Parameters:
num_patch (int): The number of patches to generate.
patch_H (int, optional): The height of the patches. Default is 3.
patch_W (int, optional): The width of the patches. Default is 3.
Returns:
rays_o_patch_all (torch.Tensor): The origins of the rays in each patch. A tensor of shape (num_patch, patch_H, patch_W, 3).
rays_d_patch_all (torch.Tensor): The directions of the rays in each patch. A tensor of shape (num_patch, patch_H, patch_W, 3).
marching_plane_normal (torch.Tensor): The normal direction of the image/marching plane.
Since we randomly sample patches from all viewpoints, this normal is only identical for each patch. A tensor of shape (num_patch, 3).
V_inverse_patch_all (torch.Tensor): The inverse of the V matrix at patches of pixels. A tensor of shape (num_patch, patch_H, patch_W, 3, 3).
normal (torch.Tensor): The normals at patches of pixels. A tensor of shape (num_patch, patch_H, patch_W, 3).
mask (torch.Tensor): The mask values at patches of pixels. A tensor of shape (num_patch, patch_H, patch_W, 1).
"""
# randomly sample center pixel locations of patches
# assume all images have the same resolution
patch_center_x = torch.randint(low=0+patch_W//2, high=self.W-1-patch_W//2, size=[num_patch], device=self.device) # (num_patch, )
patch_center_y = torch.randint(low=0+patch_H//2, high=self.H-1-patch_H//2, size=[num_patch], device=self.device) # (num_patch, )
# compute all pixel locations within the patches given patch size (patch_H, patch_W)
patch_center_x_all = patch_center_x[:, None, None] + torch.arange(-patch_W//2+1, patch_W//2+1, device=self.device).repeat(patch_H, 1) # (num_patch, patch_H, patch_W)
patch_center_y_all = patch_center_y[:, None, None] + torch.arange(-patch_H//2+1, patch_H//2+1, device=self.device).reshape(-1, 1).repeat(1, patch_W) # (num_patch, patch_H, patch_W)
# randomly sample viewpoints
img_idx = np.random.choice(list(self.train_images), size=[num_patch]) # (num_patch, )
img_idx = torch.tensor(img_idx, device=self.device)
img_idx_expand = img_idx.view(-1, 1, 1).expand_as(patch_center_x_all) # (num_patch, patch_H, patch_W)
# input normals and mask values for supervision
normal = self.normals[img_idx_expand, patch_center_y_all, patch_center_x_all] # (num_patch, patch_H, patch_W, 3)
V_inverse_patch_all = self.V_inverse_all[img_idx_expand, patch_center_y_all, patch_center_x_all] # (num_patch, patch_H, patch_W, 3, 3)
mask = self.masks[img_idx_expand, patch_center_y_all, patch_center_x_all].unsqueeze(-1)#[..., :1] # (num_patch, patch_H, patch_W)
# compute all ray directions within patches
p_all = torch.stack([patch_center_x_all, patch_center_y_all, torch.ones_like(patch_center_y_all)], dim=-1).float().to(self.device) # (num_patch, patch_H, patch_W, 3)
p_all = torch.matmul(self.intrinsics_all_inv[img_idx_expand, :3, :3], p_all[..., None])[..., 0] # (num_patch, patch_H, patch_W, 3)
p_norm_all = torch.linalg.norm(p_all, ord=2, dim=-1, keepdim=True) # (num_patch, patch_H, patch_W, 1)
rays_d_patch_all = p_all / p_norm_all # (num_patch, patch_H, patch_W, 3)
rays_d_patch_all = torch.matmul(self.pose_all[img_idx, None, None, :3, :3], rays_d_patch_all[..., None])[..., 0] # (num_patch, patch_H, patch_W, 3)
rays_o_patch_all = self.pose_all[img_idx, None, None, :3, 3].expand(rays_d_patch_all.shape) # (num_patch, patch_H, patch_W, 3)
# the normal direction of the image/marching plane is the 3rd column of world2camera transformation
marching_plane_normal = self.pose_all[img_idx, :3, 2].expand((num_patch, 3)) # (num_patch, 3)
return rays_o_patch_all, \
rays_d_patch_all, \
marching_plane_normal, \
V_inverse_patch_all, \
normal,\
mask
def near_far_from_sphere(self, rays_o, rays_d):
"""
This function calculates the near and far intersection points of rays with a unit sphere.
Parameters:
rays_o (torch.Tensor): Origin of the rays. A tensor of shape (N, 3) where N is the number of rays.
rays_d (torch.Tensor): Direction of the rays. A tensor of shape (N, 3) where N is the number of rays.
Returns:
near (torch.Tensor): Near intersection points of the rays with the unit sphere. A tensor of shape (N, ).
far (torch.Tensor): Far intersection points of the rays with the unit sphere. A tensor of shape (N, ).
"""
a = torch.sum(rays_d**2, dim=-1, keepdim=True)
b = 2.0 * torch.sum(rays_o * rays_d, dim=-1, keepdim=True)
c = torch.sum(rays_o**2, dim=-1, keepdim=True) - 1.0
mid = 0.5 * (-b) / a
near = mid - torch.sqrt(b ** 2 - 4 * a * c) / (2 * a)
far = mid + torch.sqrt(b ** 2 - 4 * a * c) / (2 * a)
return near[..., 0], far[..., 0]
def image_at(self, idx, resolution_level):
img = cv.imread(self.images_lis[idx])
return (cv.resize(img, (self.W // resolution_level, self.H // resolution_level))).clip(0, 255)
================================================
FILE: models/fields.py
================================================
import torch
import torch.nn as nn
import numpy as np
import tinycudann as tcnn
from icecream import ic
class SDFNetwork(nn.Module):
def __init__(self,
d_in,
d_out,
d_hidden,
n_layers,
skip_in=(4,),
bias=0.5,
geometric_init=True,
weight_norm=True,
inside_outside=False,
encoding_config=None,
input_concat=False):
super(SDFNetwork, self).__init__()
self.input_concat = input_concat
dims = [d_in] + [d_hidden for _ in range(n_layers)] + [d_out]
if encoding_config is not None:
self.encoding = tcnn.Encoding(d_in, encoding_config).to(torch.float32)
dims[0] = self.encoding.n_output_dims
if input_concat:
dims[0] += d_in
else:
self.encoding = None
self.num_layers = len(dims)
self.skip_in = skip_in
self.bindwidth = 0
self.enc_dim = self.encoding.n_output_dims
for l in range(0, self.num_layers - 1):
if l + 1 in self.skip_in:
out_dim = dims[l + 1] - dims[0]
else:
out_dim = dims[l + 1]
lin = nn.Linear(dims[l], out_dim)
if geometric_init:
if l == self.num_layers - 2:
if not inside_outside:
torch.nn.init.normal_(lin.weight, mean=np.sqrt(np.pi) / np.sqrt(dims[l]), std=0.0001)
torch.nn.init.constant_(lin.bias, -bias)
else:
torch.nn.init.normal_(lin.weight, mean=-np.sqrt(np.pi) / np.sqrt(dims[l]), std=0.0001)
torch.nn.init.constant_(lin.bias, bias)
elif self.encoding is not None and l == 0:
torch.nn.init.constant_(lin.bias, 0.0)
torch.nn.init.constant_(lin.weight[:, 3:], 0.0)
torch.nn.init.normal_(lin.weight[:, :3], 0.0, np.sqrt(2) / np.sqrt(out_dim))
elif self.encoding is not None and l in self.skip_in:
torch.nn.init.constant_(lin.bias, 0.0)
torch.nn.init.normal_(lin.weight, 0.0, np.sqrt(2) / np.sqrt(out_dim))
torch.nn.init.constant_(lin.weight[:, -(dims[0] - 3):], 0.0)
else:
torch.nn.init.constant_(lin.bias, 0.0)
torch.nn.init.normal_(lin.weight, 0.0, np.sqrt(2) / np.sqrt(out_dim))
if weight_norm:
lin = nn.utils.weight_norm(lin)
setattr(self, "lin" + str(l), lin)
self.activation = nn.Softplus(beta=100)
# self.activation = nn.ReLU()
def increase_bandwidth(self):
self.bindwidth += 1
def forward(self, inputs):
if self.encoding is not None:
encoded = self.encoding(inputs).to(torch.float32)
# set the dimension of the encoding to 0 if the input is outside the bandwidth
enc_mask = torch.ones(self.enc_dim, dtype=torch.bool, device=encoded.device, requires_grad=False)
enc_mask[self.bindwidth*2:] = 0
encoded = encoded * enc_mask
if self.input_concat:
inputs = torch.cat([inputs, encoded], dim=1)
x = inputs
for l in range(0, self.num_layers - 1):
lin = getattr(self, "lin" + str(l))
if l in self.skip_in:
x = torch.cat([x, inputs], 1) / np.sqrt(2)
x = lin(x)
if l < self.num_layers - 2:
x = self.activation(x)
return x
def sdf(self, x):
return self.forward(x)[:, :1]
def sdf_hidden_appearance(self, x):
return self.forward(x)
@torch.enable_grad()
def gradient(self, x):
x.requires_grad_(True)
y = self.sdf(x)
d_output = torch.ones_like(y, requires_grad=False, device=y.device)
gradients = torch.autograd.grad(
outputs=y,
inputs=x,
grad_outputs=d_output,
create_graph=True,
retain_graph=True,
only_inputs=True)[0]
return gradients.unsqueeze(1)
@torch.enable_grad()
def divergence(self, y, x):
div = 0.
for i in range(y.shape[-1]):
div += torch.autograd.grad(y[..., i], x, torch.ones_like(y[..., i]), create_graph=True)[0][..., i:i + 1]
return div
@torch.enable_grad()
def laplace(self, x):
return self.divergence(self.gradient(x), x)
class SingleVarianceNetwork(nn.Module):
def __init__(self, init_val):
super(SingleVarianceNetwork, self).__init__()
self.register_parameter('variance', nn.Parameter(torch.tensor(init_val)))
def forward(self, x):
return torch.ones([len(x), 1]) * torch.exp(self.variance * 10.0)
================================================
FILE: models/renderer.py
================================================
import torch
import numpy as np
import mcubes
from tqdm import tqdm
from nerfacc import ContractionType, OccupancyGrid, ray_marching, \
render_weight_from_alpha_patch_based, accumulate_along_rays_patch_based, \
render_weight_from_alpha, accumulate_along_rays
def extract_fields(bound_min, bound_max, resolution, query_func):
N = 64
X = torch.linspace(bound_min[0], bound_max[0], resolution).split(N)
Y = torch.linspace(bound_min[1], bound_max[1], resolution).split(N)
Z = torch.linspace(bound_min[2], bound_max[2], resolution).split(N)
u = np.zeros([resolution, resolution, resolution], dtype=np.float32)
with torch.no_grad():
for xi, xs in tqdm(enumerate(X)):
for yi, ys in enumerate(Y):
for zi, zs in enumerate(Z):
xx, yy, zz = torch.meshgrid(xs, ys, zs)
pts = torch.cat([xx.reshape(-1, 1), yy.reshape(-1, 1), zz.reshape(-1, 1)], dim=-1)
val = query_func(pts).reshape(len(xs), len(ys), len(zs)).detach().cpu().numpy()
u[xi * N: xi * N + len(xs), yi * N: yi * N + len(ys), zi * N: zi * N + len(zs)] = val
return u
def extract_geometry(bound_min, bound_max, resolution, threshold, query_func):
u = extract_fields(bound_min, bound_max, resolution, query_func)
vertices, triangles = mcubes.marching_cubes(u, threshold)
b_max_np = bound_max.detach().cpu().numpy()
b_min_np = bound_min.detach().cpu().numpy()
vertices = vertices / (resolution - 1.0) * (b_max_np - b_min_np)[None, :] + b_min_np[None, :]
return vertices, triangles
class NeuSRenderer:
def __init__(self, sdf_network, deviation_network,
gradient_method="dfd"):
self.sdf_network = sdf_network
self.deviation_network = deviation_network
# define the occ grid, see NerfAcc for more details
self.scene_aabb = torch.as_tensor([-1., -1., -1., 1., 1., 1.], dtype=torch.float32)
# define the contraction_type for scene contraction
self.contraction_type = ContractionType.AABB
# create Occupancy Grid
self.occupancy_grid = OccupancyGrid(
roi_aabb=self.scene_aabb,
resolution=128, # if res is different along different axis, use [256,128,64]
contraction_type=self.contraction_type).to("cuda")
self.sampling_step_size = 0.01 # ray marching step size, will be modified during training
self.gradient_method = gradient_method # dfd or fd or ad
def occ_eval_fn(self, x):
# function for updating the occ grid given the current sdf
sdf = self.sdf_network(x)[..., :1]
alpha = torch.sigmoid(- sdf * 80) # occ grids with alpha below the occ threshold will be set as 0
return alpha
def render(self, rays_o_patch_all, # (num_patch, patch_H, patch_W, 3)
rays_d_patch_all, # (num_patch, patch_H, patch_W, 3)
marching_plane_normal, # (num_patch, 3)
near, # (num_patch,)
far, # (num_patch,)
V_inverse_patch_all, # (num_patch, patch_H, patch_W, 3, 3)
val_gradient_method='dfd',
mode='train'):
# patch size, should be odd
patch_H = rays_o_patch_all.shape[1]
patch_W = rays_o_patch_all.shape[2]
num_patch = rays_o_patch_all.shape[0]
# extract camera location and ray direction of the patches' center pixels
rays_o_patch_center = rays_o_patch_all[:, patch_H//2, patch_W//2] # (num_patch, 3)
rays_d_patch_center = rays_d_patch_all[:, patch_H//2, patch_W//2] # (num_patch, 3)
def alpha_fn_patch_center(t_starts, t_ends, ray_indices, ret_sdf=False):
# the function used in ray marching
ray_indices = ray_indices.long()
t_origins = rays_o_patch_center[ray_indices]
t_dirs = rays_d_patch_center[ray_indices]
positions_starts = t_origins + t_dirs * t_starts
positions_ends = t_origins + t_dirs * t_ends
t_starts_shift_left = t_starts[1:]
# attach the last element of t_ends to the end of t_starts_shift_left
t_starts_shift_left = torch.cat([t_starts_shift_left, t_starts[-1:]], 0)
# compute the diff mask between t_ends and t_starts_shift_left
diff_mask = ((t_ends - t_starts_shift_left) != 0).squeeze()
# if the diff maks is empty, return
positions_ends_diff = positions_ends[diff_mask].reshape(-1, 3)
positions_all = torch.cat([positions_starts, positions_ends_diff], 0)
sdf_all = self.sdf_network(positions_all)
sdf_start = sdf_all[:positions_starts.shape[0]]
sdf_end_diff = sdf_all[positions_starts.shape[0]:]
sdf_start_shift_left = sdf_start[1:]
sdf_start_shift_left = torch.cat([sdf_start_shift_left, sdf_start[-1:]], 0)
sdf_start_shift_left[diff_mask] = sdf_end_diff
inv_s = self.deviation_network(torch.zeros([1, 3]))[:, :1].clip(1e-6, 1e6) # Single parameter
inv_s = inv_s.expand(sdf_start.shape[0], 1)
prev_cdf = torch.sigmoid(sdf_start * inv_s)
next_cdf = torch.sigmoid(sdf_start_shift_left * inv_s)
p = prev_cdf - next_cdf
c = prev_cdf
alpha = ((p + 1e-5) / (c + 1e-5)).view(-1).clip(0.0, 1.0)
alpha = alpha.reshape(-1, 1)
if ret_sdf:
return alpha, sdf_start, sdf_start_shift_left
else:
return alpha
with torch.no_grad():
patch_indices, t_starts_patch_center, t_ends_patch_center = ray_marching(
rays_o_patch_center, rays_d_patch_center,
t_min=near,
t_max=far,
grid=self.occupancy_grid,
render_step_size=self.sampling_step_size,
stratified=True,
cone_angle=0.0,
early_stop_eps=1e-8,
alpha_fn=alpha_fn_patch_center,
)
samples_per_ray = patch_indices.shape[0] / num_patch
if patch_indices.shape[0] == 0: # all patch center rays are within the zero region of the occ grid. skip this iteration.
return {
"comp_normal": torch.zeros([num_patch, patch_H, patch_W, 3], device=rays_o_patch_center.device)
}
num_samples = patch_indices.shape[0]
patch_indices = patch_indices.long()
# compute the sampling distance on remaining rays
t_starts_patch_all = t_starts_patch_center[:, None, None, :] * (rays_d_patch_center * marching_plane_normal).sum(-1, keepdim=True)[patch_indices][:, None, None, :] \
/(rays_d_patch_all * marching_plane_normal[:, None, None, :]).sum(-1, keepdim=True)[patch_indices]
t_ends_patch_all = t_ends_patch_center[:, None, None, :] * (rays_d_patch_center * marching_plane_normal).sum(-1, keepdim=True)[patch_indices][:, None, None, :] \
/(rays_d_patch_all * marching_plane_normal[:, None, None, :]).sum(-1, keepdim=True)[patch_indices]
t_starts_patch_center_shift_left = t_starts_patch_center[1:]
t_starts_patch_center_shift_left = torch.cat([t_starts_patch_center_shift_left, t_starts_patch_center[-1:]], 0)
diff_mask = ((t_ends_patch_center - t_starts_patch_center_shift_left) != 0)[..., 0]
positions_starts_patch_all = rays_o_patch_all[patch_indices] + rays_d_patch_all[patch_indices] * t_starts_patch_all
positions_ends_patch_all = rays_o_patch_all[patch_indices] + rays_d_patch_all[patch_indices] * t_ends_patch_all # (num_samples, patch_H, patch_W, 3)
positions_ends_diff = positions_ends_patch_all[diff_mask]
positions_all = torch.cat([positions_starts_patch_all, positions_ends_diff], 0)
positions_all_flat = positions_all.reshape(-1, 3)
sdf_all = self.sdf_network(positions_all_flat)
sdf_all = sdf_all.reshape(*positions_all.shape[:-1], 1)
sdf_starts_patch_all = sdf_all[:positions_starts_patch_all.shape[0]]
sdf_end_diff = sdf_all[positions_starts_patch_all.shape[0]:]
sdf_ends_patch_all = sdf_starts_patch_all[1:]
sdf_ends_patch_all = torch.cat([sdf_ends_patch_all, sdf_starts_patch_all[-1:]], 0)
sdf_ends_patch_all[diff_mask] = sdf_end_diff
inv_s = self.deviation_network(torch.zeros([1, 3]))[:, :1].clip(1e-6, 1e6) # Single parameter
prev_cdf = torch.sigmoid(sdf_starts_patch_all * inv_s) # (num_samples, patch_H, patch_W, 1)
next_cdf = torch.sigmoid(sdf_ends_patch_all * inv_s) # (num_samples, patch_H, patch_W, 1)
p = prev_cdf - next_cdf
c = prev_cdf
alpha = ((p + 1e-5) / (c + 1e-5)).clip(0.0, 1.0) # (num_samples, patch_H, patch_W, 1)
weights_cuda = render_weight_from_alpha_patch_based(alpha.reshape(num_samples, patch_H*patch_W, 1), patch_indices) # (num_samples, patch_H, patch_W, 1)
if mode == 'train':
gradient_method = self.gradient_method
elif mode == 'eval':
gradient_method = val_gradient_method
if gradient_method == "dfd":
with torch.no_grad():
# distance between neighboring points on the same marching plane
dist_x = torch.norm(positions_starts_patch_all[:, :, 1:, :] -
positions_starts_patch_all[:, :, :-1, :], dim=-1, keepdim=True) # (num_samples, patch_H, patch_W-1, 1)
dist_y = torch.norm(positions_starts_patch_all[:, 1:, :, :] -
positions_starts_patch_all[:, :-1, :, :], dim=-1, keepdim=True) # (num_samples, patch_H-1, patch_W, 1)
# directional derivatives along the ray direction
# forward difference
df_dt = (sdf_ends_patch_all - sdf_starts_patch_all) / (t_ends_patch_all - t_starts_patch_all) # (num_samples, patch_H, patch_W, 1)
# directional derivatives along the image's x-direction
# central difference
df_dx = (sdf_starts_patch_all[:, :, 2:] - sdf_starts_patch_all[:, :, :-2]) / (dist_x[:, :, :-1] + dist_x[:, :, 1:] ) # (num_samples, patch_H, patch_W-2, 1)
# directional derivatives along the image's y-direction
# central difference
df_dy = (sdf_starts_patch_all[:, 2:, :] - sdf_starts_patch_all[:, :-2, :]) / (dist_y[:, 1:, :] + dist_y[:, :-1, :]) # (num_samples, patch_H-2, patch_W, 1)
# for points only have one-side neighbor point,
# we use forward or backward difference correspondingly
df_dx_left_boundary = (sdf_starts_patch_all[:, :, 1:2] - sdf_starts_patch_all[:, :, 0:1]) / dist_x[:, :, 0:1] # (num_samples, patch_H, 1)
df_dx_right_boundary = (sdf_starts_patch_all[:, :, -1:] - sdf_starts_patch_all[:, :, -2:-1]) / dist_x[:, :, -1:] # (num_samples, patch_H, 1)
df_dy_top_boundary = (sdf_starts_patch_all[:, 1:2, :] - sdf_starts_patch_all[:, 0:1, :]) / dist_y[:, 0:1, :] # (num_samples, 1, patch_W)
df_dy_bottom_boundary = (sdf_starts_patch_all[:, -1:, :] - sdf_starts_patch_all[:, -2:-1, :]) / dist_y[:, -1:, :] # (num_samples, 1, patch_W)
# concat the directional derivatives for boundary points and central points
df_dx = torch.cat([df_dx_left_boundary, df_dx, df_dx_right_boundary], dim=2) # (num_samples, patch_H, patch_W, 1)
df_dy = torch.cat([df_dy_top_boundary, df_dy, df_dy_bottom_boundary], dim=1) # (num_samples, patch_H, patch_W, 1)
# concat the directional partial derivatives in three directions
projected_gradients = torch.cat([df_dt,
df_dx,
df_dy], dim=-1) # (num_patches, patch_H, patch_W, 3)
# recover the gradients from directional partial derivatives using the inverse of known directions
V_inverse = V_inverse_patch_all[patch_indices] # (num_patches, patch_H, patch_W, 3, 3)
gradients = (V_inverse @ projected_gradients[..., None])[..., 0] # (num_samples, patch_H, patch_W, 3)
elif gradient_method == "ad":
gradients = self.sdf_network.gradient(positions_starts_patch_all.reshape(-1, 3)).reshape(num_samples, patch_H, patch_W, 3)
elif gradient_method == "fd":
# 6-point finite difference
self.fd_epsilon = 1e-3
positions_xn = positions_starts_patch_all + torch.tensor([[[[-self.fd_epsilon, 0, 0]]]], device=positions_starts_patch_all.device).expand(
positions_starts_patch_all.shape)
positions_xp = positions_starts_patch_all + torch.tensor([[[[self.fd_epsilon, 0, 0]]]], device=positions_starts_patch_all.device).expand(
positions_starts_patch_all.shape)
positions_yn = positions_starts_patch_all + torch.tensor([[[[0, -self.fd_epsilon, 0]]]], device=positions_starts_patch_all.device).expand(
positions_starts_patch_all.shape)
positions_yp = positions_starts_patch_all + torch.tensor([[[[0, self.fd_epsilon, 0]]]], device=positions_starts_patch_all.device).expand(
positions_starts_patch_all.shape)
positions_zn = positions_starts_patch_all + torch.tensor([[[[0, 0, -self.fd_epsilon]]]], device=positions_starts_patch_all.device).expand(
positions_starts_patch_all.shape)
positions_zp = positions_starts_patch_all + torch.tensor([[[[0, 0, self.fd_epsilon]]]], device=positions_starts_patch_all.device).expand(
positions_starts_patch_all.shape)
positions_concat = torch.cat(
[positions_xn, positions_xp, positions_yn, positions_yp, positions_zn, positions_zp], 0).to(
torch.float32).reshape(-1, 3)
sdf_concat = self.sdf_network(positions_concat).reshape(-1, patch_H, patch_W, 1)
num_samples = positions_starts_patch_all.shape[0]
sdf_xn = sdf_concat[:num_samples].reshape(num_samples, patch_H, patch_W, 1)
sdf_xp = sdf_concat[num_samples:2 * num_samples].reshape(num_samples, patch_H, patch_W, 1)
sdf_yn = sdf_concat[2 * num_samples:3 * num_samples].reshape(num_samples, patch_H, patch_W, 1)
sdf_yp = sdf_concat[3 * num_samples:4 * num_samples].reshape(num_samples, patch_H, patch_W, 1)
sdf_zn = sdf_concat[4 * num_samples:5 * num_samples].reshape(num_samples, patch_H, patch_W, 1)
sdf_zp = sdf_concat[5 * num_samples:].reshape(num_samples, patch_H, patch_W, 1)
df_dx = (sdf_xp - sdf_xn) / (2 * self.fd_epsilon)
df_dy = (sdf_yp - sdf_yn) / (2 * self.fd_epsilon)
df_dz = (sdf_zp - sdf_zn) / (2 * self.fd_epsilon)
gradients = torch.stack([df_dx, df_dy, df_dz], -1)
weights_sum_cuda = accumulate_along_rays_patch_based(weights_cuda, patch_indices, n_patches=num_patch) # (num_samples, patch_H, patch_W, 1)
weights_sum = weights_sum_cuda.reshape(num_patch, patch_H, patch_W, 1)
comp_normals_cuda = accumulate_along_rays_patch_based(weights_cuda, patch_indices, values=gradients.reshape(num_samples,patch_H * patch_W, 3),n_patches=num_patch) # (num_samples, patch_H, patch_W, 3)
comp_normal = comp_normals_cuda.reshape(num_patch, patch_H, patch_W, 3)
inv_s = self.deviation_network(torch.zeros([1, 3]))[:, :1].clip(1e-6, 1e6) # Single parameter
return {
's_val': 1/inv_s,
'weight_sum': weights_sum,
'gradients': gradients,
"comp_normal": comp_normal,
"samples_per_ray": samples_per_ray,
}
@torch.no_grad()
def render_normal_pixel_based(self, rays_o, rays_d, near, far):
def alpha_fn(t_starts, t_ends, ray_indices, ret_sdf=False):
ray_indices = ray_indices.long()
t_origins = rays_o[ray_indices]
t_dirs = rays_d[ray_indices]
positions_starts = t_origins + t_dirs * t_starts
positions_ends = t_origins + t_dirs * t_ends
t_starts_shift_left = t_starts[1:]
# attach the last element of t_ends to the end of t_starts_shift_left
t_starts_shift_left = torch.cat([t_starts_shift_left, t_starts[-1:]], 0)
# compute the diff mask between t_ends and t_starts_shift_left
diff_mask = ((t_ends - t_starts_shift_left) != 0).squeeze()
# if the diff maks is empty, return
positions_ends_diff = positions_ends[diff_mask].reshape(-1, 3)
# ic(diff_mask.shape, positions_ends_diff.shape, positions_starts.shape)
positions_all = torch.cat([positions_starts, positions_ends_diff], 0)
sdf_all = self.sdf_network(positions_all)
sdf_start = sdf_all[:positions_starts.shape[0]]
sdf_end_diff = sdf_all[positions_starts.shape[0]:]
sdf_start_shift_left = sdf_start[1:]
sdf_start_shift_left = torch.cat([sdf_start_shift_left, sdf_start[-1:]], 0)
sdf_start_shift_left[diff_mask] = sdf_end_diff
inv_s = self.deviation_network(torch.zeros([1, 3]))[:, :1].clip(1e-6, 1e6) # Single parameter
inv_s = inv_s.expand(sdf_start.shape[0], 1)
prev_cdf = torch.sigmoid(sdf_start * inv_s)
next_cdf = torch.sigmoid(sdf_start_shift_left * inv_s)
p = prev_cdf - next_cdf
c = prev_cdf
alpha = ((p + 1e-5) / (c + 1e-5)).view(-1).clip(0.0, 1.0)
alpha = alpha.reshape(-1, 1)
if ret_sdf:
return alpha, sdf_start, sdf_start_shift_left
else:
return alpha
ray_indices, t_starts, t_ends = ray_marching(
rays_o, rays_d,
t_min=near.squeeze(),
t_max=far.squeeze(),
grid=self.occupancy_grid,
render_step_size=self.sampling_step_size,
stratified=True,
cone_angle=0.0,
alpha_thre=0.0,
early_stop_eps=1e-3,
alpha_fn=alpha_fn,
)
alpha = alpha_fn(t_starts, t_ends, ray_indices)
ray_indices = ray_indices.long()
t_origins = rays_o[ray_indices]
t_dirs = rays_d[ray_indices]
midpoints = (t_starts + t_ends) / 2.
positions = t_origins + t_dirs * midpoints
gradients = self.sdf_network.gradient(positions).reshape(-1, 3)
n_rays = rays_o.shape[0]
weights = render_weight_from_alpha(alpha, ray_indices=ray_indices, n_rays=n_rays) # [n_samples, 1]
comp_normal = accumulate_along_rays(weights, ray_indices, values=gradients, n_rays=n_rays)
comp_depth = accumulate_along_rays(weights, ray_indices, values=midpoints, n_rays=n_rays)
return comp_normal, comp_depth
def extract_geometry(self, bound_min, bound_max, resolution, threshold=0.0):
return extract_geometry(bound_min,
bound_max,
resolution=resolution,
threshold=threshold,
query_func=lambda pts: -self.sdf_network.sdf(pts))
================================================
FILE: run_diligent.sh
================================================
for obj_name in buddha pot2 reading bear cow; do
python exp_runner.py --conf config/diligent.conf --obj_name $obj_name
done
================================================
FILE: run_own_object.sh
================================================
for obj_name in lion dog1 woman; do
python exp_runner.py --conf config/own_objects.conf --obj_name $obj_name
done
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/building.yml
================================================
name: Building Wheels
on: [workflow_dispatch]
jobs:
wheel:
runs-on: ${{ matrix.os }}
environment: production
strategy:
fail-fast: false
matrix:
os: [ubuntu-18.04, windows-2019]
python-version: ['3.7', '3.8', '3.9']
torch-version: [1.10.0, 1.11.0, 1.12.0, 1.13.0]
cuda-version: ['cu102', 'cu113', 'cu116', 'cu117']
# os: [ubuntu-18.04]
# python-version: ['3.9']
# torch-version: [1.10.0]
# cuda-version: ['cu102']
exclude:
- torch-version: 1.10.0
cuda-version: 'cu116'
- torch-version: 1.10.0
cuda-version: 'cu117'
- torch-version: 1.11.0
cuda-version: 'cu116'
- torch-version: 1.11.0
cuda-version: 'cu117'
- torch-version: 1.12.0
cuda-version: 'cu117'
- torch-version: 1.13.0
cuda-version: 'cu102'
- torch-version: 1.13.0
cuda-version: 'cu113'
- os: windows-2019
torch-version: 1.11.0
cuda-version: 'cu102'
- os: windows-2019
torch-version: 1.12.0
cuda-version: 'cu102'
# - os: macos-10.15
# cuda-version: 'cu102'
# - os: macos-10.15
# cuda-version: 'cu113'
# - os: macos-10.15
# cuda-version: 'cu116'
# - os: macos-10.15
# cuda-version: 'cu117'
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Upgrade pip
run: |
pip install --upgrade setuptools
pip install ninja
- name: Free up disk space
if: ${{ runner.os == 'Linux' }}
run: |
sudo rm -rf /usr/share/dotnet
- name: Install CUDA ${{ matrix.cuda-version }}
if: ${{ matrix.cuda-version != 'cpu' }}
run: |
bash .github/workflows/cuda/${{ matrix.cuda-version }}-${{ runner.os }}.sh
- name: Install PyTorch ${{ matrix.torch-version }}+${{ matrix.cuda-version }}
run: |
pip install torch==${{ matrix.torch-version }} --extra-index-url https://download.pytorch.org/whl/${{ matrix.cuda-version }}
python -c "import torch; print('PyTorch:', torch.__version__)"
python -c "import torch; print('CUDA:', torch.version.cuda)"
python -c "import torch; print('CUDA Available:', torch.cuda.is_available())"
- name: Patch PyTorch static constexpr on Windows
if: ${{ runner.os == 'Windows' }}
run: |
Torch_DIR=`python -c 'import os; import torch; print(os.path.dirname(torch.__file__))'`
sed -i '31,38c\
TORCH_API void lazy_init_num_threads();' ${Torch_DIR}/include/ATen/Parallel.h
shell: bash
- name: Set version
if: ${{ runner.os != 'macOS' }}
run: |
VERSION=`sed -n 's/^__version__ = "\(.*\)"/\1/p' nerfacc/version.py`
TORCH_VERSION=`echo "pt${{ matrix.torch-version }}" | sed "s/..$//" | sed "s/\.//g"`
CUDA_VERSION=`echo ${{ matrix.cuda-version }}`
echo "New version name: $VERSION+$TORCH_VERSION$CUDA_VERSION"
sed -i "s/$VERSION/$VERSION+$TORCH_VERSION$CUDA_VERSION/" nerfacc/version.py
shell:
bash
- name: Install main package for CPU
if: ${{ matrix.cuda-version == 'cpu' }}
run: |
FORCE_ONLY_CPU=1 pip install -e .
shell:
bash
- name: Install main package for GPU
if: ${{ matrix.cuda-version != 'cpu' }}
run: |
source .github/workflows/cuda/${{ matrix.cuda-version }}-${{ runner.os }}-env.sh
pip install .
shell:
bash
- name: Test installation
run: |
python -c "import nerfacc; print('nerfacc:', nerfacc.__version__)"
- name: Build wheel
run: |
pip install wheel
source .github/workflows/cuda/${{ matrix.cuda-version }}-${{ runner.os }}-env.sh
python setup.py bdist_wheel --dist-dir=dist
shell: bash
- name: Configure AWS
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: us-west-2
- name: Upload wheel
run: |
aws s3 sync dist s3://nerfacc-bucket/whl/torch-${{ matrix.torch-version }}_${{ matrix.cuda-version }} --grants read=uri=http://acs.amazonaws.com/groups/global/AllUsers
update_aws_listing:
needs: [wheel]
runs-on: ubuntu-latest
environment: production
steps:
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.9
- name: Upgrade pip
run: |
pip install --upgrade setuptools
pip install boto3
- name: Configure AWS
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: us-west-2
- name: Update AWS listing
run: |
python scripts/run_aws_listing.py \
--access_key_id=${{ secrets.AWS_ACCESS_KEY_ID }} \
--secret_access_key=${{ secrets.AWS_SECRET_ACCESS_KEY }} \
--bucket="nerfacc-bucket" \
--region="us-west-2"
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/code_checks.yml
================================================
name: Core Tests.
on:
push:
branches: [master]
pull_request:
branches: [master]
permissions:
contents: read
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.8.12
uses: actions/setup-python@v4
with:
python-version: "3.8.12"
- name: Install dependencies
run: |
pip install isort==5.10.1 black[jupyter]==22.3.0
- name: Run isort
run: isort docs/ nerfacc/ scripts/ examples/ tests/ --profile black --skip examples/pycolmap --line-length 80 --check
- name: Run Black
run: black docs/ nerfacc/ scripts/ examples/ tests/ --exclude examples/pycolmap --line-length 80 --check
# - name: Python Pylint
# run: |
# pylint nerfacc/ tests/ scripts/ examples/
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu101-Linux-env.sh
================================================
#!/bin/bash
CUDA_HOME=/usr/local/cuda-10.1
LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
PATH=${CUDA_HOME}/bin:${PATH}
export FORCE_CUDA=1
export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5"
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu101-Linux.sh
================================================
#!/bin/bash
OS=ubuntu1804
wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin
sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget -nv https://developer.download.nvidia.com/compute/cuda/10.1/Prod/local_installers/cuda-repo-${OS}-10-1-local-10.1.243-418.87.00_1.0-1_amd64.deb
sudo dpkg -i cuda-repo-${OS}-10-1-local-10.1.243-418.87.00_1.0-1_amd64.deb
sudo apt-key add /var/cuda-repo-10-1-local-10.1.243-418.87.00/7fa2af80.pub
sudo apt-get -qq update
sudo apt install -y cuda-nvcc-10-1 cuda-libraries-dev-10-1
sudo apt clean
rm -f https://developer.download.nvidia.com/compute/cuda/10.1/Prod/local_installers/cuda-repo-${OS}-10-1-local-10.1.243-418.87.00_1.0-1_amd64.deb
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu101-Windows-env.sh
================================================
#!/bin/bash
CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v10.1
PATH=${CUDA_HOME}/bin:$PATH
PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH
export FORCE_CUDA=1
export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5"
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu101-Windows.sh
================================================
#!/bin/bash
# Install NVIDIA drivers, see:
# https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102
curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip"
7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32"
export CUDA_SHORT=10.1
export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}/Prod/local_installers/
export CUDA_FILE=cuda_${CUDA_SHORT}.243_426.00_win10.exe
# Install CUDA:
curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}"
echo ""
echo "Installing from ${CUDA_FILE}..."
PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow"
echo "Done!"
rm -f "${CUDA_FILE}"
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu102-Linux-env.sh
================================================
#!/bin/bash
CUDA_HOME=/usr/local/cuda-10.2
LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
PATH=${CUDA_HOME}/bin:${PATH}
export FORCE_CUDA=1
export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5"
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu102-Linux.sh
================================================
#!/bin/bash
OS=ubuntu1804
wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin
sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget -nv https://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda-repo-${OS}-10-2-local-10.2.89-440.33.01_1.0-1_amd64.deb
sudo dpkg -i cuda-repo-${OS}-10-2-local-10.2.89-440.33.01_1.0-1_amd64.deb
sudo apt-key add /var/cuda-repo-10-2-local-10.2.89-440.33.01/7fa2af80.pub
sudo apt-get -qq update
sudo apt install -y cuda-nvcc-10-2 cuda-libraries-dev-10-2
sudo apt clean
rm -f https://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda-repo-${OS}-10-2-local-10.2.89-440.33.01_1.0-1_amd64.deb
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu102-Windows-env.sh
================================================
#!/bin/bash
CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v10.2
PATH=${CUDA_HOME}/bin:$PATH
PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH
export FORCE_CUDA=1
export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5"
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu102-Windows.sh
================================================
#!/bin/bash
# Install NVIDIA drivers, see:
# https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102
curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip"
7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32"
export CUDA_SHORT=10.2
export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}/Prod/local_installers
export CUDA_FILE=cuda_${CUDA_SHORT}.89_441.22_win10.exe
# Install CUDA:
curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}"
echo ""
echo "Installing from ${CUDA_FILE}..."
PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow"
echo "Done!"
rm -f "${CUDA_FILE}"
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu111-Linux-env.sh
================================================
#!/bin/bash
CUDA_HOME=/usr/local/cuda-11.1
LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
PATH=${CUDA_HOME}/bin:${PATH}
export FORCE_CUDA=1
export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu111-Linux.sh
================================================
#!/bin/bash
OS=ubuntu1804
wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin
sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget -nv https://developer.download.nvidia.com/compute/cuda/11.1.1/local_installers/cuda-repo-${OS}-11-1-local_11.1.1-455.32.00-1_amd64.deb
sudo dpkg -i cuda-repo-${OS}-11-1-local_11.1.1-455.32.00-1_amd64.deb
sudo apt-key add /var/cuda-repo-${OS}-11-1-local/7fa2af80.pub
sudo apt-get -qq update
sudo apt install -y cuda-nvcc-11-1 cuda-libraries-dev-11-1
sudo apt clean
rm -f https://developer.download.nvidia.com/compute/cuda/11.1.1/local_installers/cuda-repo-${OS}-11-1-local_11.1.1-455.32.00-1_amd64.deb
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu111-Windows-env.sh
================================================
#!/bin/bash
CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.1
PATH=${CUDA_HOME}/bin:$PATH
PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH
export FORCE_CUDA=1
export TORCH_CUDA_ARCH_LIST="6.0+PTX"
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu111-Windows.sh
================================================
#!/bin/bash
# Install NVIDIA drivers, see:
# https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102
curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip"
7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32"
export CUDA_SHORT=11.1
export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.1/local_installers
export CUDA_FILE=cuda_${CUDA_SHORT}.1_456.81_win10.exe
# Install CUDA:
curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}"
echo ""
echo "Installing from ${CUDA_FILE}..."
PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow"
echo "Done!"
rm -f "${CUDA_FILE}"
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu113-Linux-env.sh
================================================
#!/bin/bash
CUDA_HOME=/usr/local/cuda-11.3
LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
PATH=${CUDA_HOME}/bin:${PATH}
export FORCE_CUDA=1
export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu113-Linux.sh
================================================
#!/bin/bash
OS=ubuntu1804
wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin
sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget -nv https://developer.download.nvidia.com/compute/cuda/11.3.0/local_installers/cuda-repo-${OS}-11-3-local_11.3.0-465.19.01-1_amd64.deb
sudo dpkg -i cuda-repo-${OS}-11-3-local_11.3.0-465.19.01-1_amd64.deb
sudo apt-key add /var/cuda-repo-${OS}-11-3-local/7fa2af80.pub
sudo apt-get -qq update
sudo apt install -y cuda-nvcc-11-3 cuda-libraries-dev-11-3
sudo apt clean
rm -f https://developer.download.nvidia.com/compute/cuda/11.3.0/local_installers/cuda-repo-${OS}-11-3-local_11.3.0-465.19.01-1_amd64.deb
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu113-Windows-env.sh
================================================
#!/bin/bash
CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.3
PATH=${CUDA_HOME}/bin:$PATH
PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH
export FORCE_CUDA=1
export TORCH_CUDA_ARCH_LIST="6.0+PTX"
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu113-Windows.sh
================================================
#!/bin/bash
# Install NVIDIA drivers, see:
# https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102
curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip"
7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32"
export CUDA_SHORT=11.3
export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers
export CUDA_FILE=cuda_${CUDA_SHORT}.0_465.89_win10.exe
# Install CUDA:
curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}"
echo ""
echo "Installing from ${CUDA_FILE}..."
PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} thrust_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow"
echo "Done!"
rm -f "${CUDA_FILE}"
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu115-Linux-env.sh
================================================
#!/bin/bash
CUDA_HOME=/usr/local/cuda-11.5
LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
PATH=${CUDA_HOME}/bin:${PATH}
export FORCE_CUDA=1
export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu115-Linux.sh
================================================
#!/bin/bash
OS=ubuntu1804
wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin
sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget -nv https://developer.download.nvidia.com/compute/cuda/11.5.2/local_installers/cuda-repo-${OS}-11-5-local_11.5.2-495.29.05-1_amd64.deb
sudo dpkg -i cuda-repo-${OS}-11-5-local_11.5.2-495.29.05-1_amd64.deb
sudo apt-key add /var/cuda-repo-${OS}-11-5-local/7fa2af80.pub
sudo apt-get -qq update
sudo apt install -y cuda-nvcc-11-5 cuda-libraries-dev-11-5
sudo apt clean
rm -f https://developer.download.nvidia.com/compute/cuda/11.5.2/local_installers/cuda-repo-${OS}-11-5-local_11.5.2-495.29.05-1_amd64.deb
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu115-Windows-env.sh
================================================
#!/bin/bash
CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.3
PATH=${CUDA_HOME}/bin:$PATH
PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH
export FORCE_CUDA=1
export TORCH_CUDA_ARCH_LIST="6.0+PTX"
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu115-Windows.sh
================================================
#!/bin/bash
# TODO We currently use CUDA 11.3 to build CUDA 11.5 Windows wheels
# Install NVIDIA drivers, see:
# https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102
curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip"
7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32"
export CUDA_SHORT=11.3
export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers
export CUDA_FILE=cuda_${CUDA_SHORT}.0_465.89_win10.exe
# Install CUDA:
curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}"
echo ""
echo "Installing from ${CUDA_FILE}..."
PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} thrust_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow"
echo "Done!"
rm -f "${CUDA_FILE}"
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu116-Linux-env.sh
================================================
#!/bin/bash
CUDA_HOME=/usr/local/cuda-11.6
LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
PATH=${CUDA_HOME}/bin:${PATH}
export FORCE_CUDA=1
export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu116-Linux.sh
================================================
#!/bin/bash
OS=ubuntu1804
wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin
sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget -nv https://developer.download.nvidia.com/compute/cuda/11.6.2/local_installers/cuda-repo-${OS}-11-6-local_11.6.2-510.47.03-1_amd64.deb
sudo dpkg -i cuda-repo-${OS}-11-6-local_11.6.2-510.47.03-1_amd64.deb
sudo apt-key add /var/cuda-repo-${OS}-11-6-local/7fa2af80.pub
sudo apt-get -qq update
sudo apt install -y cuda-nvcc-11-6 cuda-libraries-dev-11-6
sudo apt clean
rm -f https://developer.download.nvidia.com/compute/cuda/11.5.2/local_installers/cuda-repo-${OS}-11-6-local_11.6.2-510.47.03-1_amd64.deb
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu116-Windows-env.sh
================================================
#!/bin/bash
CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.3
PATH=${CUDA_HOME}/bin:$PATH
PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH
export FORCE_CUDA=1
export TORCH_CUDA_ARCH_LIST="6.0+PTX"
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu116-Windows.sh
================================================
#!/bin/bash
# TODO We currently use CUDA 11.3 to build CUDA 11.6 Windows wheels
# Install NVIDIA drivers, see:
# https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102
curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip"
7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32"
export CUDA_SHORT=11.3
export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers
export CUDA_FILE=cuda_${CUDA_SHORT}.0_465.89_win10.exe
# Install CUDA:
curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}"
echo ""
echo "Installing from ${CUDA_FILE}..."
PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} thrust_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow"
echo "Done!"
rm -f "${CUDA_FILE}"
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu117-Linux-env.sh
================================================
#!/bin/bash
CUDA_HOME=/usr/local/cuda-11.7
LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
PATH=${CUDA_HOME}/bin:${PATH}
export FORCE_CUDA=1
export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu117-Linux.sh
================================================
#!/bin/bash
OS=ubuntu1804
wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin
sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget -nv https://developer.download.nvidia.com/compute/cuda/11.7.1/local_installers/cuda-repo-${OS}-11-7-local_11.7.1-515.65.01-1_amd64.deb
sudo dpkg -i cuda-repo-${OS}-11-7-local_11.7.1-515.65.01-1_amd64.deb
sudo cp /var/cuda-repo-${OS}-11-7-local/cuda-*-keyring.gpg /usr/share/keyrings/
sudo apt-get -qq update
sudo apt install -y cuda-nvcc-11-7 cuda-libraries-dev-11-7
sudo apt clean
rm -f https://developer.download.nvidia.com/compute/cuda/11.7.1/local_installers/cuda-repo-${OS}-11-7-local_11.7.1-515.65.01-1_amd64.deb
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu117-Windows-env.sh
================================================
#!/bin/bash
CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.3
PATH=${CUDA_HOME}/bin:$PATH
PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH
export FORCE_CUDA=1
export TORCH_CUDA_ARCH_LIST="6.0+PTX"
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu117-Windows.sh
================================================
#!/bin/bash
# TODO We currently use CUDA 11.3 to build CUDA 11.7 Windows wheels
# Install NVIDIA drivers, see:
# https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102
curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip"
7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32"
export CUDA_SHORT=11.3
export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers
export CUDA_FILE=cuda_${CUDA_SHORT}.0_465.89_win10.exe
# Install CUDA:
curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}"
echo ""
echo "Installing from ${CUDA_FILE}..."
PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} thrust_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow"
echo "Done!"
rm -f "${CUDA_FILE}"
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/publish.yml
================================================
# This workflows will upload a Python Package using twine when a release is created
# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
name: Upload Python Package
on:
release:
types: [created]
branches: [master]
jobs:
deploy:
runs-on: ubuntu-latest
environment: production
steps:
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v1
with:
python-version: '3.7'
- name: Install dependencies
run: |
python -m pip install build twine
- name: Strip unsupported tags in README
run: |
sed -i '//,//d' README.md
- name: Build and publish
env:
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
run: |
BUILD_NO_CUDA=1 python -m build
twine upload --username __token__ --password $PYPI_TOKEN dist/*
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.gitignore
================================================
# Visual Studio Code configs.
.vscode/
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
# lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.DS_Store
# Direnv config.
.envrc
# line_profiler
*.lprof
# vscode
.vsocde
benchmarks/
outputs/
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.gitmodules
================================================
[submodule "examples/pycolmap"]
path = examples/pycolmap
url = https://github.com/rmbrualla/pycolmap.git
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.pre-commit-config.yaml
================================================
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.3.0
hooks:
- id: end-of-file-fixer
- id: trailing-whitespace
- id: check-yaml
- id: check-merge-conflict
- id: requirements-txt-fixer
- repo: https://github.com/psf/black
rev: 22.10.0
hooks:
- id: black
language_version: python3.8.12
args: # arguments to configure black
- --line-length=80
- repo: https://github.com/pycqa/isort
rev: 5.10.1
hooks:
- id: isort
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.readthedocs.yaml
================================================
version: 2
build:
os: ubuntu-20.04
tools:
python: "3.9"
sphinx:
fail_on_warning: true
configuration: docs/source/conf.py
python:
install:
# Equivalent to 'pip install .'
- method: pip
path: .
# Equivalent to 'pip install -r docs/requirements.txt'
- requirements: docs/requirements.txt
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/CMakeLists.txt
================================================
# cmake_minimum_required(VERSION 3.3)
# project(nerfacc LANGUAGES CXX CUDA)
# find_package(pybind11 REQUIRED)
# find_package(Torch REQUIRED)
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
# set(SOURCE_DIR nerfacc/cuda/csrc)
# set(INCLUDE_DIR nerfacc/cuda/csrc/include)
# file(GLOB SOURCES ${SOURCE_DIR}/*.cu)
# pybind11_add_module(${PROJECT_NAME} SHARED ${SOURCES})
# target_link_libraries(${PROJECT_NAME} PRIVATE "${TORCH_LIBRARIES}")
# target_include_directories(${PROJECT_NAME} PRIVATE "${INCLUDE_DIR}")
# # message(STATUS "CUDA enabled")
# # set( CMAKE_CUDA_STANDARD 14 )
# # set( CMAKE_CUDA_STANDARD_REQUIRED ON)
# # find_package(pybind11 REQUIRED)
# # # find_package(Python3 REQUIRED COMPONENTS Development)
# # # target_link_libraries(${PROJECT_NAME} PRIVATE Python3::Python)
# # find_package(Torch REQUIRED)
# # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
# # target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIBRARIES})
# # set(CSRC nerfacc/cuda/csrc)
# # file(GLOB_RECURSE ALL_SOURCES ${ALL_SOURCES} ${CSRC}/*.cu)
# # file(GLOB_RECURSE ALL_HEADERS ${CSRC}/include/*.h)
# # add_library(${PROJECT_NAME} SHARED ${ALL_SOURCES})
# # target_include_directories(${PROJECT_NAME} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}")
# # set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0")
# # message("-- CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
# # message("-- CMAKE_CXX_FLAGS_DEBUG: ${CMAKE_CXX_FLAGS_DEBUG}")
# # message("-- CMAKE_CXX_FLAGS_RELEASE: ${CMAKE_CXX_FLAGS_RELEASE}")
# # set_target_properties(${PROJECT_NAME} PROPERTIES
# # EXPORT_NAME nerfacc
# # INSTALL_RPATH ${TORCH_INSTALL_PREFIX}/lib)
# # Cmake creates *.dylib by default, but python expects *.so by default
# # if (APPLE)
# # set_property(TARGET ${PROJECT_NAME} PROPERTY SUFFIX .so)
# # endif()
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/LICENSE
================================================
MIT License
Copyright (c) 2022 Ruilong Li
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/MANIFEST.in
================================================
include nerfacc/cuda/csrc/include/*
include nerfacc/cuda/csrc/*
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/README.md
================================================
[](https://github.com/KAIR-BAIR/nerfacc/actions/workflows/code_checks.yml)
[](https://www.nerfacc.com/en/latest/?badge=latest)
[](https://pepy.tech/project/nerfacc)
https://www.nerfacc.com/
NerfAcc is a PyTorch Nerf acceleration toolbox for both training and inference. It focuses on efficient volumetric rendering of radiance fields, which is universal and plug-and-play for most of the NeRFs.
Using NerfAcc,
- The `vanilla NeRF` model with 8-layer MLPs can be trained to *better quality* (+~0.5 PNSR)
in *1 hour* rather than *days* as in the paper.
- The `Instant-NGP NeRF` model can be trained to *equal quality* in *4.5 minutes*,
comparing to the official pure-CUDA implementation.
- The `D-NeRF` model for *dynamic* objects can also be trained in *1 hour*
rather than *2 days* as in the paper, and with *better quality* (+~2.5 PSNR).
- Both *bounded* and *unbounded* scenes are supported.
**And it is a pure Python interface with flexible APIs!**
## Installation
**Dependence**: Please install [Pytorch](https://pytorch.org/get-started/locally/) first.
The easist way is to install from PyPI. In this way it will build the CUDA code **on the first run** (JIT).
```
pip install nerfacc
```
Or install from source. In this way it will build the CUDA code during installation.
```
pip install git+https://github.com/KAIR-BAIR/nerfacc.git
```
We also provide pre-built wheels covering major combinations of Pytorch + CUDA supported by [official Pytorch](https://pytorch.org/get-started/previous-versions/).
```
# e.g., torch 1.13.0 + cu117
pip install nerfacc -f https://nerfacc-bucket.s3.us-west-2.amazonaws.com/whl/torch-1.13.0_cu117.html
```
| Windows & Linux | `cu102` | `cu113` | `cu116` | `cu117` |
|-----------------|---------|---------|---------|---------|
| torch 1.10.0 | ✅ | ✅ | | |
| torch 1.11.0 | ✅* | ✅ | | |
| torch 1.12.0 | ✅* | ✅ | ✅ | |
| torch 1.13.0 | | | ✅ | ✅ |
\* Pytorch does not support Windows pre-built wheels for those combinations thus we do not support as well.
## Usage
The idea of NerfAcc is to perform efficient ray marching and volumetric rendering. So NerfAcc can work with any user-defined radiance field. To plug the NerfAcc rendering pipeline into your code and enjoy the acceleration, you only need to define two functions with your radiance field.
- `sigma_fn`: Compute density at each sample. It will be used by `nerfacc.ray_marching()` to skip the empty and occluded space during ray marching, which is where the major speedup comes from.
- `rgb_sigma_fn`: Compute color and density at each sample. It will be used by `nerfacc.rendering()` to conduct differentiable volumetric rendering. This function will receive gradients to update your network.
A simple example is like this:
``` python
import torch
from torch import Tensor
import nerfacc
radiance_field = ... # network: a NeRF model
rays_o: Tensor = ... # ray origins. (n_rays, 3)
rays_d: Tensor = ... # ray normalized directions. (n_rays, 3)
optimizer = ... # optimizer
def sigma_fn(
t_starts: Tensor, t_ends:Tensor, ray_indices: Tensor
) -> Tensor:
""" Query density values from a user-defined radiance field.
:params t_starts: Start of the sample interval along the ray. (n_samples, 1).
:params t_ends: End of the sample interval along the ray. (n_samples, 1).
:params ray_indices: Ray indices that each sample belongs to. (n_samples,).
:returns The post-activation density values. (n_samples, 1).
"""
t_origins = rays_o[ray_indices] # (n_samples, 3)
t_dirs = rays_d[ray_indices] # (n_samples, 3)
positions = t_origins + t_dirs * (t_starts + t_ends) / 2.0
sigmas = radiance_field.query_density(positions)
return sigmas # (n_samples, 1)
def rgb_sigma_fn(
t_starts: Tensor, t_ends: Tensor, ray_indices: Tensor
) -> Tuple[Tensor, Tensor]:
""" Query rgb and density values from a user-defined radiance field.
:params t_starts: Start of the sample interval along the ray. (n_samples, 1).
:params t_ends: End of the sample interval along the ray. (n_samples, 1).
:params ray_indices: Ray indices that each sample belongs to. (n_samples,).
:returns The post-activation rgb and density values.
(n_samples, 3), (n_samples, 1).
"""
t_origins = rays_o[ray_indices] # (n_samples, 3)
t_dirs = rays_d[ray_indices] # (n_samples, 3)
positions = t_origins + t_dirs * (t_starts + t_ends) / 2.0
rgbs, sigmas = radiance_field(positions, condition=t_dirs)
return rgbs, sigmas # (n_samples, 3), (n_samples, 1)
# Efficient Raymarching: Skip empty and occluded space, pack samples from all rays.
# ray_indices: (n_samples,). t_starts: (n_samples, 1). t_ends: (n_samples, 1).
with torch.no_grad():
ray_indices, t_starts, t_ends = nerfacc.ray_marching(
rays_o, rays_d, sigma_fn=sigma_fn, near_plane=0.2, far_plane=1.0,
early_stop_eps=1e-4, alpha_thre=1e-2,
)
# Differentiable Volumetric Rendering.
# colors: (n_rays, 3). opaicity: (n_rays, 1). depth: (n_rays, 1).
color, opacity, depth = nerfacc.rendering(
t_starts, t_ends, ray_indices, n_rays=rays_o.shape[0], rgb_sigma_fn=rgb_sigma_fn
)
# Optimize: Both the network and rays will receive gradients
optimizer.zero_grad()
loss = F.mse_loss(color, color_gt)
loss.backward()
optimizer.step()
```
## Examples:
Before running those example scripts, please check the script about which dataset it is needed, and download the dataset first.
```bash
# clone the repo with submodules.
git clone --recursive git://github.com/KAIR-BAIR/nerfacc/
```
``` bash
# Instant-NGP NeRF in 4.5 minutes with reproduced performance!
# See results at here: https://www.nerfacc.com/en/latest/examples/ngp.html
python examples/train_ngp_nerf.py --train_split train --scene lego
```
``` bash
# Vanilla MLP NeRF in 1 hour with better performance!
# See results at here: https://www.nerfacc.com/en/latest/examples/vanilla.html
python examples/train_mlp_nerf.py --train_split train --scene lego
```
```bash
# D-NeRF for Dynamic objects in 1 hour with better performance!
# See results at here: https://www.nerfacc.com/en/latest/examples/dnerf.html
python examples/train_mlp_dnerf.py --train_split train --scene lego
```
```bash
# Instant-NGP on unbounded scenes in 20 minutes!
# See results at here: https://www.nerfacc.com/en/latest/examples/unbounded.html
python examples/train_ngp_nerf.py --train_split train --scene garden --auto_aabb --unbounded --cone_angle=0.004
```
Used by:
- [nerfstudio](https://github.com/nerfstudio-project/nerfstudio): A collaboration friendly studio for NeRFs.
- [instant-nsr-pl](https://github.com/bennyguo/instant-nsr-pl): NeuS in 10 minutes.
## Common Installation Issues
ImportError: .../csrc.so: undefined symbol
If you are installing a pre-built wheel, make sure the Pytorch and CUDA version matchs with the nerfacc version (nerfacc.__version__).
## Citation
```bibtex
@article{li2022nerfacc,
title={NerfAcc: A General NeRF Accleration Toolbox.},
author={Li, Ruilong and Tancik, Matthew and Kanazawa, Angjoo},
journal={arXiv preprint arXiv:2210.04847},
year={2022}
}
```
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/Makefile
================================================
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = source
BUILDDIR = build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/requirements.txt
================================================
pytorch_sphinx_theme @ git+https://github.com/liruilong940607/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
sphinx==5.2.1
sphinx-copybutton==0.5.0
sphinx-design==0.2.0
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/_static/css/readthedocs.css
================================================
.header-logo {
background-image: url("../images/logo4x.png");
background-size: 156px 35px;
height: 35px;
width: 156px;
}
code {
word-break: normal;
}
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.accumulate_along_rays.rst
================================================
nerfacc.accumulate\_along\_rays
===============================
.. currentmodule:: nerfacc
.. autofunction:: accumulate_along_rays
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.pack_data.rst
================================================
nerfacc.pack\_data
==================
.. currentmodule:: nerfacc
.. autofunction:: pack_data
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.ray_aabb_intersect.rst
================================================
nerfacc.ray\_aabb\_intersect
============================
.. currentmodule:: nerfacc
.. autofunction:: ray_aabb_intersect
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.ray_resampling.rst
================================================
nerfacc.ray\_resampling
=======================
.. currentmodule:: nerfacc
.. autofunction:: ray_resampling
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.render_transmittance_from_alpha.rst
================================================
nerfacc.render\_transmittance\_from\_alpha
==========================================
.. currentmodule:: nerfacc
.. autofunction:: render_transmittance_from_alpha
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.render_transmittance_from_density.rst
================================================
nerfacc.render\_transmittance\_from\_density
============================================
.. currentmodule:: nerfacc
.. autofunction:: render_transmittance_from_density
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.render_visibility.rst
================================================
nerfacc.render\_visibility
==========================
.. currentmodule:: nerfacc
.. autofunction:: render_visibility
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.render_weight_from_alpha.rst
================================================
nerfacc.render\_weight\_from\_alpha
===================================
.. currentmodule:: nerfacc
.. autofunction:: render_weight_from_alpha
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.render_weight_from_density.rst
================================================
nerfacc.render\_weight\_from\_density
=====================================
.. currentmodule:: nerfacc
.. autofunction:: render_weight_from_density
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.unpack_data.rst
================================================
nerfacc.unpack\_data
====================
.. currentmodule:: nerfacc
.. autofunction:: unpack_data
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.unpack_info.rst
================================================
nerfacc.unpack\_info
====================
.. currentmodule:: nerfacc
.. autofunction:: unpack_info
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/grid.rst
================================================
.. _`Occupancy Grid`:
Occupancy Grid
===================================
.. currentmodule:: nerfacc
.. autoclass:: ContractionType
:members:
.. autoclass:: Grid
:members:
.. autoclass:: OccupancyGrid
:members:
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/rendering.rst
================================================
Volumetric Rendering
===================================
In `nerfacc`, the volumetric rendering pipeline is broken down into 2 steps:
1. **Raymarching**: This is the process of shooting a ray through the scene and
generate samples along the way. To perform efficient volumetric rendering, here we aim
at skipping as many areas as possible. The emtpy space is skipped by using the cached
occupancy grid (see :class:`nerfacc.OccupancyGrid`), and the invisible space is skipped by
checking the transmittance of the ray while marching. Almost in all cases, those skipping
won't result in a noticeable loss of quality as they would contribute very little to the
final rendered image. But they will bring a significant speedup.
2. **Rendering**: This is the process of accumulating samples along the rays into final image.
In this step we also need to query the attributes (a.k.a. color and density) of those samples
generated by raymarching. Early stoping is supported in this step.
|
.. currentmodule:: nerfacc
.. autofunction:: ray_marching
.. autofunction:: rendering
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/utils.rst
================================================
Utils
===================================
.. currentmodule:: nerfacc
.. autosummary::
:nosignatures:
:toctree: generated/
ray_aabb_intersect
unpack_info
accumulate_along_rays
render_transmittance_from_density
render_transmittance_from_alpha
render_weight_from_density
render_weight_from_alpha
render_visibility
ray_resampling
pack_data
unpack_data
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/conf.py
================================================
import pytorch_sphinx_theme
__version__ = None
exec(open("../../nerfacc/version.py", "r").read())
# -- Project information
project = "nerfacc"
copyright = "2022, Ruilong"
author = "Ruilong"
release = __version__
# -- General configuration
extensions = [
"sphinx.ext.napoleon",
"sphinx.ext.duration",
"sphinx.ext.doctest",
"sphinx.ext.autodoc",
"sphinx.ext.autosummary",
"sphinx.ext.intersphinx",
]
intersphinx_mapping = {
"python": ("https://docs.python.org/3/", None),
"sphinx": ("https://www.sphinx-doc.org/en/master/", None),
}
intersphinx_disabled_domains = ["std"]
templates_path = ["_templates"]
# -- Options for HTML output
# html_theme = "furo"
html_theme = "pytorch_sphinx_theme"
html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()]
html_static_path = ["_static"]
html_css_files = ["css/readthedocs.css"]
# Ignore >>> when copying code
copybutton_prompt_text = r">>> |\.\.\. "
copybutton_prompt_is_regexp = True
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
html_theme_options = {
# The target url that the logo directs to. Unset to do nothing
"logo_url": "https://www.nerfacc.com/en/latest/index.html",
# "menu" is a list of dictionaries where you can specify the content and the
# behavior of each item in the menu. Each item can either be a link or a
# dropdown menu containing a list of links.
"menu": [
# A link
{"name": "GitHub", "url": "https://github.com/KAIR-BAIR/nerfacc"},
# A dropdown menu
# {
# "name": "Projects",
# "children": [
# # A vanilla dropdown item
# {
# "name": "nerfstudio",
# "url": "https://docs.nerf.studio/",
# "description": "The all-in-one repo for NeRFs",
# },
# ],
# # Optional, determining whether this dropdown menu will always be
# # highlighted.
# # "active": True,
# },
],
}
# html_theme_options = {
# "canonical_url": "",
# "analytics_id": "",
# "logo_only": False,
# "display_version": True,
# "prev_next_buttons_location": "bottom",
# "style_external_links": False,
# # Toc options
# "collapse_navigation": True,
# "sticky_navigation": True,
# "navigation_depth": 4,
# "includehidden": True,
# "titles_only": False
# }
# -- Options for EPUB output
epub_show_urls = "footnote"
# typehints
autodoc_typehints = "description"
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/examples/dnerf.rst
================================================
Dynamic Scene
====================
See code `examples/train_mlp_dnerf.py` at our `github repository`_ for details.
Benchmarks
------------
*updated on 2022-10-08*
Here we trained a 8-layer-MLP for the radiance field and a 4-layer-MLP for the warping field,
(similar to the T-Nerf model in the `D-Nerf`_ paper) on the `D-Nerf dataset`_. We used train
split for training and test split for evaluation. Our experiments are conducted on a
single NVIDIA TITAN RTX GPU. The training memory footprint is about 11GB.
.. note::
The :ref:`Occupancy Grid` used in this example is shared by all the frames. In other words,
instead of using it to indicate the opacity of an area at a single timestamp,
Here we use it to indicate the `maximum` opacity at this area `over all the timestamps`.
It is not optimal but still makes the rendering very efficient.
+----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+
| PSNR | bouncing | hell | hook | jumping | lego | mutant | standup | trex | MEAN |
| | balls | warrior | | jacks | | | | | |
+======================+==========+=========+=======+=========+=======+========+=========+=======+=======+
| D-Nerf (~ days) | 32.80 | 25.02 | 29.25 | 32.80 | 21.64 | 31.29 | 32.79 | 31.75 | 29.67 |
+----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+
| Ours (~ 1 hr) | 39.49 | 25.58 | 31.86 | 32.73 | 24.32 | 35.55 | 35.90 | 32.33 | 32.22 |
+----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+
| Ours (Training time)| 37min | 52min | 69min | 64min | 44min | 79min | 79min | 39min | 58min |
+----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+
.. _`D-Nerf`: https://arxiv.org/abs/2011.13961
.. _`D-Nerf dataset`: https://www.dropbox.com/s/0bf6fl0ye2vz3vr/data.zip?dl=0
.. _`github repository`: https://github.com/KAIR-BAIR/nerfacc/tree/76c0f9817da4c9c8b5ccf827eb069ee2ce854b75
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/examples/ngp.rst
================================================
.. _`Instant-NGP Example`:
Instant-NGP
====================
See code `examples/train_ngp_nerf.py` at our `github repository`_ for details.
Benchmarks
------------
*updated on 2022-10-12*
Here we trained a `Instant-NGP Nerf`_ model on the `Nerf-Synthetic dataset`_. We follow the same
settings with the Instant-NGP paper, which uses train split for training and test split for
evaluation. All experiments are conducted on a single NVIDIA TITAN RTX GPU. The training
memory footprint is about 3GB.
.. note::
The Instant-NGP paper makes use of the alpha channel in the images to apply random background
augmentation during training. For fair comparision, we rerun their code with a constant white
background during both training and testing. Also it is worth to mention that we didn't strictly
follow the training receipe in the Instant-NGP paper, such as the learning rate schedule etc, as
the purpose of this benchmark is to showcase instead of reproducing the paper.
+-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
| PSNR | Lego | Mic |Materials| Chair |Hotdog | Ficus | Drums | Ship | MEAN |
| | | | | | | | | | |
+=======================+=======+=======+=========+=======+=======+=======+=======+=======+=======+
|Instant-NGP 35k steps | 35.87 | 36.22 | 29.08 | 35.10 | 37.48 | 30.61 | 23.85 | 30.62 | 32.35 |
+-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
|(training time) | 309s | 258s | 256s | 316s | 292s | 207s | 218s | 250s | 263s |
+-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
|Ours 20k steps | 35.50 | 36.16 | 29.14 | 35.23 | 37.15 | 31.71 | 24.88 | 29.91 | 32.46 |
+-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
|(training time) | 287s | 274s | 269s | 317s | 269s | 244s | 249s | 257s | 271s |
+-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
.. _`Instant-NGP Nerf`: https://github.com/NVlabs/instant-ngp/tree/51e4107edf48338e9ab0316d56a222e0adf87143
.. _`github repository`: https://github.com/KAIR-BAIR/nerfacc/tree/76c0f9817da4c9c8b5ccf827eb069ee2ce854b75
.. _`Nerf-Synthetic dataset`: https://drive.google.com/drive/folders/1JDdLGDruGNXWnM1eqY1FNL9PlStjaKWi
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/examples/unbounded.rst
================================================
Unbounded Scene
====================
See code `examples/train_ngp_nerf.py` at our `github repository`_ for details.
Benchmarks
------------
*updated on 2022-11-07*
Here we trained a `Instant-NGP Nerf`_ on the `MipNerf360`_ dataset. We used train
split for training and test split for evaluation. Our experiments are conducted on a
single NVIDIA TITAN RTX GPU. The training memory footprint is about 6-9GB.
The main difference between working with unbounded scenes and bounded scenes, is that
a contraction method is needed to map the infinite space to a finite :ref:`Occupancy Grid`.
We have difference options provided for this (see :ref:`Occupancy Grid`). The experiments
here is basically the Instant-NGP experiments (see :ref:`Instant-NGP Example`) with a contraction method
that takes from `MipNerf360`_.
.. note::
Even though we are comparing with `Nerf++`_ and `MipNerf360`_, the model and everything are
totally different with them. There are plenty of ideas from those papers that would be very
helpful for the performance, but we didn't adopt them. As this is just a simple example to
show how to use the library, we didn't want to make it too complicated.
+----------------------+-------+-------+-------+-------+-------+-------+-------+-------+
| PSNR |Garden |Bicycle|Bonsai |Counter|Kitchen| Room | Stump | MEAN |
| | | | | | | | | |
+======================+=======+=======+=======+=======+=======+=======+=======+=======+
| Nerf++ (~days) | 24.32 | 22.64 | 29.15 | 26.38 | 27.80 | 28.87 | 24.34 | 26.21 |
+----------------------+-------+-------+-------+-------+-------+-------+-------+-------+
| MipNerf360 (~days) | 26.98 | 24.37 | 33.46 | 29.55 | 32.23 | 31.63 | 26.40 | 29.23 |
+----------------------+-------+-------+-------+-------+-------+-------+-------+-------+
| Ours (~20 mins) | 25.41 | 22.97 | 30.71 | 27.34 | 30.32 | 31.00 | 23.43 | 27.31 |
+----------------------+-------+-------+-------+-------+-------+-------+-------+-------+
| Ours (Training time) | 25min | 17min | 19min | 23min | 28min | 20min | 17min | 21min |
+----------------------+-------+-------+-------+-------+-------+-------+-------+-------+
.. _`Instant-NGP Nerf`: https://arxiv.org/abs/2201.05989
.. _`MipNerf360`: https://arxiv.org/abs/2111.12077
.. _`Nerf++`: https://arxiv.org/abs/2010.07492
.. _`github repository`: https://github.com/KAIR-BAIR/nerfacc/tree/76c0f9817da4c9c8b5ccf827eb069ee2ce854b75
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/examples/vanilla.rst
================================================
Vanilla Nerf
====================
See code `examples/train_mlp_nerf.py` at our `github repository`_ for details.
Benchmarks
------------
*updated on 2022-10-08*
Here we trained a 8-layer-MLP for the radiance field as in the `vanilla Nerf`_. We used the
train split for training and test split for evaluation as in the Nerf paper. Our experiments are
conducted on a single NVIDIA TITAN RTX GPU. The training memory footprint is about 10GB.
.. note::
The vanilla Nerf paper uses two MLPs for course-to-fine sampling. Instead here we only use a
single MLP with more samples (1024). Both ways share the same spirit to do dense sampling
around the surface. Our fast rendering inheritly skip samples away from the surface
so we can simplly increase the number of samples with a single MLP, to achieve the same goal
with the coarse-to-fine sampling, without runtime or memory issue.
+----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
| PSNR | Lego | Mic |Materials| Chair |Hotdog | Ficus | Drums | Ship | MEAN |
| | | | | | | | | | |
+======================+=======+=======+=========+=======+=======+=======+=======+=======+=======+
| NeRF (~ days) | 32.54 | 32.91 | 29.62 | 33.00 | 36.18 | 30.13 | 25.01 | 28.65 | 31.00 |
+----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
| Ours (~ 50min) | 33.69 | 33.76 | 29.73 | 33.32 | 35.80 | 32.52 | 25.39 | 28.18 | 31.55 |
+----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
| Ours (Training time)| 58min | 53min | 46min | 62min | 56min | 42min | 52min | 49min | 52min |
+----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
.. _`github repository`: : https://github.com/KAIR-BAIR/nerfacc/tree/76c0f9817da4c9c8b5ccf827eb069ee2ce854b75
.. _`vanilla Nerf`: https://arxiv.org/abs/2003.08934
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/index.rst
================================================
NerfAcc Documentation
===================================
NerfAcc is a PyTorch Nerf acceleration toolbox for both training and inference. It focus on
efficient volumetric rendering of radiance fields, which is universal and plug-and-play for most of the NeRFs.
Using NerfAcc,
- The `vanilla Nerf`_ model with 8-layer MLPs can be trained to *better quality* (+~0.5 PNSR) \
in *1 hour* rather than *1~2 days* as in the paper.
- The `Instant-NGP Nerf`_ model can be trained to *equal quality* in *4.5 minutes*, \
comparing to the official pure-CUDA implementation.
- The `D-Nerf`_ model for *dynamic* objects can also be trained in *1 hour* \
rather than *2 days* as in the paper, and with *better quality* (+~2.5 PSNR).
- Both *bounded* and *unbounded* scenes are supported.
**And it is pure Python interface with flexible APIs!**
| Github: https://github.com/KAIR-BAIR/nerfacc
| Paper: https://arxiv.org/pdf/2210.04847.pdf
| Authors: `Ruilong Li`_, `Matthew Tancik`_, `Angjoo Kanazawa`_
.. note::
This repo is focusing on the single scene situation. Generalizable Nerfs across
multiple scenes is currently out of the scope of this repo. But you may still find
some useful tricks in this repo. :)
Installation:
-------------
.. code-block:: console
$ pip install nerfacc
Usage:
-------------
The idea of NerfAcc is to perform efficient ray marching and volumetric rendering.
So NerfAcc can work with any user-defined radiance field. To plug the NerfAcc rendering
pipeline into your code and enjoy the acceleration, you only need to define two functions
with your radience field.
- `sigma_fn`: Compute density at each sample. It will be used by :func:`nerfacc.ray_marching` to skip the empty and occluded space during ray marching, which is where the major speedup comes from.
- `rgb_sigma_fn`: Compute color and density at each sample. It will be used by :func:`nerfacc.rendering` to conduct differentiable volumetric rendering. This function will receive gradients to update your network.
An simple example is like this:
.. code-block:: python
import torch
from torch import Tensor
import nerfacc
radiance_field = ... # network: a NeRF model
rays_o: Tensor = ... # ray origins. (n_rays, 3)
rays_d: Tensor = ... # ray normalized directions. (n_rays, 3)
optimizer = ... # optimizer
def sigma_fn(
t_starts: Tensor, t_ends:Tensor, ray_indices: Tensor
) -> Tensor:
""" Query density values from a user-defined radiance field.
:params t_starts: Start of the sample interval along the ray. (n_samples, 1).
:params t_ends: End of the sample interval along the ray. (n_samples, 1).
:params ray_indices: Ray indices that each sample belongs to. (n_samples,).
:returns The post-activation density values. (n_samples, 1).
"""
t_origins = rays_o[ray_indices] # (n_samples, 3)
t_dirs = rays_d[ray_indices] # (n_samples, 3)
positions = t_origins + t_dirs * (t_starts + t_ends) / 2.0
sigmas = radiance_field.query_density(positions)
return sigmas # (n_samples, 1)
def rgb_sigma_fn(
t_starts: Tensor, t_ends: Tensor, ray_indices: Tensor
) -> Tuple[Tensor, Tensor]:
""" Query rgb and density values from a user-defined radiance field.
:params t_starts: Start of the sample interval along the ray. (n_samples, 1).
:params t_ends: End of the sample interval along the ray. (n_samples, 1).
:params ray_indices: Ray indices that each sample belongs to. (n_samples,).
:returns The post-activation rgb and density values.
(n_samples, 3), (n_samples, 1).
"""
t_origins = rays_o[ray_indices] # (n_samples, 3)
t_dirs = rays_d[ray_indices] # (n_samples, 3)
positions = t_origins + t_dirs * (t_starts + t_ends) / 2.0
rgbs, sigmas = radiance_field(positions, condition=t_dirs)
return rgbs, sigmas # (n_samples, 3), (n_samples, 1)
# Efficient Raymarching: Skip empty and occluded space, pack samples from all rays.
# ray_indices: (n_samples,). t_starts: (n_samples, 1). t_ends: (n_samples, 1).
with torch.no_grad():
ray_indices, t_starts, t_ends = nerfacc.ray_marching(
rays_o, rays_d, sigma_fn=sigma_fn, near_plane=0.2, far_plane=1.0,
early_stop_eps=1e-4, alpha_thre=1e-2,
)
# Differentiable Volumetric Rendering.
# colors: (n_rays, 3). opaicity: (n_rays, 1). depth: (n_rays, 1).
color, opacity, depth = nerfacc.rendering(
t_starts, t_ends, ray_indices, n_rays=rays_o.shape[0], rgb_sigma_fn=rgb_sigma_fn
)
# Optimize: Both the network and rays will receive gradients
optimizer.zero_grad()
loss = F.mse_loss(color, color_gt)
loss.backward()
optimizer.step()
Links:
-------------
.. toctree::
:glob:
:maxdepth: 1
:caption: Python API
apis/*
.. toctree::
:glob:
:maxdepth: 1
:caption: Example Usages
examples/*
.. toctree::
:maxdepth: 1
:caption: Projects
nerfstudio
.. _`vanilla Nerf`: https://arxiv.org/abs/2003.08934
.. _`Instant-NGP Nerf`: https://arxiv.org/abs/2201.05989
.. _`D-Nerf`: https://arxiv.org/abs/2011.13961
.. _`MipNerf360`: https://arxiv.org/abs/2111.12077
.. _`pixel-Nerf`: https://arxiv.org/abs/2012.02190
.. _`Nerf++`: https://arxiv.org/abs/2010.07492
.. _`Ruilong Li`: https://www.liruilong.cn/
.. _`Matthew Tancik`: https://www.matthewtancik.com/
.. _`Angjoo Kanazawa`: https://people.eecs.berkeley.edu/~kanazawa/
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/datasets/__init__.py
================================================
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/datasets/dnerf_synthetic.py
================================================
"""
Copyright (c) 2022 Ruilong Li, UC Berkeley.
"""
import json
import os
import imageio.v2 as imageio
import numpy as np
import torch
import torch.nn.functional as F
from .utils import Rays
def _load_renderings(root_fp: str, subject_id: str, split: str):
"""Load images from disk."""
if not root_fp.startswith("/"):
# allow relative path. e.g., "./data/dnerf_synthetic/"
root_fp = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
"..",
"..",
root_fp,
)
data_dir = os.path.join(root_fp, subject_id)
with open(
os.path.join(data_dir, "transforms_{}.json".format(split)), "r"
) as fp:
meta = json.load(fp)
images = []
camtoworlds = []
timestamps = []
for i in range(len(meta["frames"])):
frame = meta["frames"][i]
fname = os.path.join(data_dir, frame["file_path"] + ".png")
rgba = imageio.imread(fname)
timestamp = (
frame["time"]
if "time" in frame
else float(i) / (len(meta["frames"]) - 1)
)
timestamps.append(timestamp)
camtoworlds.append(frame["transform_matrix"])
images.append(rgba)
images = np.stack(images, axis=0)
camtoworlds = np.stack(camtoworlds, axis=0)
timestamps = np.stack(timestamps, axis=0)
h, w = images.shape[1:3]
camera_angle_x = float(meta["camera_angle_x"])
focal = 0.5 * w / np.tan(0.5 * camera_angle_x)
return images, camtoworlds, focal, timestamps
class SubjectLoader(torch.utils.data.Dataset):
"""Single subject data loader for training and evaluation."""
SPLITS = ["train", "val", "test"]
SUBJECT_IDS = [
"bouncingballs",
"hellwarrior",
"hook",
"jumpingjacks",
"lego",
"mutant",
"standup",
"trex",
]
WIDTH, HEIGHT = 800, 800
NEAR, FAR = 2.0, 6.0
OPENGL_CAMERA = True
def __init__(
self,
subject_id: str,
root_fp: str,
split: str,
color_bkgd_aug: str = "white",
num_rays: int = None,
near: float = None,
far: float = None,
batch_over_images: bool = True,
):
super().__init__()
assert split in self.SPLITS, "%s" % split
assert subject_id in self.SUBJECT_IDS, "%s" % subject_id
assert color_bkgd_aug in ["white", "black", "random"]
self.split = split
self.num_rays = num_rays
self.near = self.NEAR if near is None else near
self.far = self.FAR if far is None else far
self.training = (num_rays is not None) and (
split in ["train", "trainval"]
)
self.color_bkgd_aug = color_bkgd_aug
self.batch_over_images = batch_over_images
(
self.images,
self.camtoworlds,
self.focal,
self.timestamps,
) = _load_renderings(root_fp, subject_id, split)
self.images = torch.from_numpy(self.images).to(torch.uint8)
self.camtoworlds = torch.from_numpy(self.camtoworlds).to(torch.float32)
self.timestamps = torch.from_numpy(self.timestamps).to(torch.float32)[
:, None
]
self.K = torch.tensor(
[
[self.focal, 0, self.WIDTH / 2.0],
[0, self.focal, self.HEIGHT / 2.0],
[0, 0, 1],
],
dtype=torch.float32,
) # (3, 3)
assert self.images.shape[1:3] == (self.HEIGHT, self.WIDTH)
def __len__(self):
return len(self.images)
@torch.no_grad()
def __getitem__(self, index):
data = self.fetch_data(index)
data = self.preprocess(data)
return data
def preprocess(self, data):
"""Process the fetched / cached data with randomness."""
rgba, rays = data["rgba"], data["rays"]
pixels, alpha = torch.split(rgba, [3, 1], dim=-1)
if self.training:
if self.color_bkgd_aug == "random":
color_bkgd = torch.rand(3, device=self.images.device)
elif self.color_bkgd_aug == "white":
color_bkgd = torch.ones(3, device=self.images.device)
elif self.color_bkgd_aug == "black":
color_bkgd = torch.zeros(3, device=self.images.device)
else:
# just use white during inference
color_bkgd = torch.ones(3, device=self.images.device)
pixels = pixels * alpha + color_bkgd * (1.0 - alpha)
return {
"pixels": pixels, # [n_rays, 3] or [h, w, 3]
"rays": rays, # [n_rays,] or [h, w]
"color_bkgd": color_bkgd, # [3,]
**{k: v for k, v in data.items() if k not in ["rgba", "rays"]},
}
def update_num_rays(self, num_rays):
self.num_rays = num_rays
def fetch_data(self, index):
"""Fetch the data (it maybe cached for multiple batches)."""
num_rays = self.num_rays
if self.training:
if self.batch_over_images:
image_id = torch.randint(
0,
len(self.images),
size=(num_rays,),
device=self.images.device,
)
else:
image_id = [index]
x = torch.randint(
0, self.WIDTH, size=(num_rays,), device=self.images.device
)
y = torch.randint(
0, self.HEIGHT, size=(num_rays,), device=self.images.device
)
else:
image_id = [index]
x, y = torch.meshgrid(
torch.arange(self.WIDTH, device=self.images.device),
torch.arange(self.HEIGHT, device=self.images.device),
indexing="xy",
)
x = x.flatten()
y = y.flatten()
# generate rays
rgba = self.images[image_id, y, x] / 255.0 # (num_rays, 4)
c2w = self.camtoworlds[image_id] # (num_rays, 3, 4)
camera_dirs = F.pad(
torch.stack(
[
(x - self.K[0, 2] + 0.5) / self.K[0, 0],
(y - self.K[1, 2] + 0.5)
/ self.K[1, 1]
* (-1.0 if self.OPENGL_CAMERA else 1.0),
],
dim=-1,
),
(0, 1),
value=(-1.0 if self.OPENGL_CAMERA else 1.0),
) # [num_rays, 3]
# [n_cams, height, width, 3]
directions = (camera_dirs[:, None, :] * c2w[:, :3, :3]).sum(dim=-1)
origins = torch.broadcast_to(c2w[:, :3, -1], directions.shape)
viewdirs = directions / torch.linalg.norm(
directions, dim=-1, keepdims=True
)
if self.training:
origins = torch.reshape(origins, (num_rays, 3))
viewdirs = torch.reshape(viewdirs, (num_rays, 3))
rgba = torch.reshape(rgba, (num_rays, 4))
else:
origins = torch.reshape(origins, (self.HEIGHT, self.WIDTH, 3))
viewdirs = torch.reshape(viewdirs, (self.HEIGHT, self.WIDTH, 3))
rgba = torch.reshape(rgba, (self.HEIGHT, self.WIDTH, 4))
rays = Rays(origins=origins, viewdirs=viewdirs)
timestamps = self.timestamps[image_id]
return {
"rgba": rgba, # [h, w, 4] or [num_rays, 4]
"rays": rays, # [h, w, 3] or [num_rays, 3]
"timestamps": timestamps, # [num_rays, 1]
}
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/datasets/nerf_360_v2.py
================================================
"""
Copyright (c) 2022 Ruilong Li, UC Berkeley.
"""
import collections
import os
import sys
import imageio
import numpy as np
import torch
import torch.nn.functional as F
import tqdm
from .utils import Rays
_PATH = os.path.abspath(__file__)
sys.path.insert(
0, os.path.join(os.path.dirname(_PATH), "..", "pycolmap", "pycolmap")
)
from scene_manager import SceneManager
def _load_colmap(root_fp: str, subject_id: str, split: str, factor: int = 1):
assert factor in [1, 2, 4, 8]
data_dir = os.path.join(root_fp, subject_id)
colmap_dir = os.path.join(data_dir, "sparse/0/")
manager = SceneManager(colmap_dir)
manager.load_cameras()
manager.load_images()
# Assume shared intrinsics between all cameras.
cam = manager.cameras[1]
fx, fy, cx, cy = cam.fx, cam.fy, cam.cx, cam.cy
K = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])
K[:2, :] /= factor
# Extract extrinsic matrices in world-to-camera format.
imdata = manager.images
w2c_mats = []
bottom = np.array([0, 0, 0, 1]).reshape(1, 4)
for k in imdata:
im = imdata[k]
rot = im.R()
trans = im.tvec.reshape(3, 1)
w2c = np.concatenate([np.concatenate([rot, trans], 1), bottom], axis=0)
w2c_mats.append(w2c)
w2c_mats = np.stack(w2c_mats, axis=0)
# Convert extrinsics to camera-to-world.
camtoworlds = np.linalg.inv(w2c_mats)
# Image names from COLMAP. No need for permuting the poses according to
# image names anymore.
image_names = [imdata[k].name for k in imdata]
# # Switch from COLMAP (right, down, fwd) to Nerf (right, up, back) frame.
# poses = poses @ np.diag([1, -1, -1, 1])
# Get distortion parameters.
type_ = cam.camera_type
if type_ == 0 or type_ == "SIMPLE_PINHOLE":
params = None
camtype = "perspective"
elif type_ == 1 or type_ == "PINHOLE":
params = None
camtype = "perspective"
if type_ == 2 or type_ == "SIMPLE_RADIAL":
params = {k: 0.0 for k in ["k1", "k2", "k3", "p1", "p2"]}
params["k1"] = cam.k1
camtype = "perspective"
elif type_ == 3 or type_ == "RADIAL":
params = {k: 0.0 for k in ["k1", "k2", "k3", "p1", "p2"]}
params["k1"] = cam.k1
params["k2"] = cam.k2
camtype = "perspective"
elif type_ == 4 or type_ == "OPENCV":
params = {k: 0.0 for k in ["k1", "k2", "k3", "p1", "p2"]}
params["k1"] = cam.k1
params["k2"] = cam.k2
params["p1"] = cam.p1
params["p2"] = cam.p2
camtype = "perspective"
elif type_ == 5 or type_ == "OPENCV_FISHEYE":
params = {k: 0.0 for k in ["k1", "k2", "k3", "k4"]}
params["k1"] = cam.k1
params["k2"] = cam.k2
params["k3"] = cam.k3
params["k4"] = cam.k4
camtype = "fisheye"
assert params is None, "Only support pinhole camera model."
# Previous Nerf results were generated with images sorted by filename,
# ensure metrics are reported on the same test set.
inds = np.argsort(image_names)
image_names = [image_names[i] for i in inds]
camtoworlds = camtoworlds[inds]
# Load images.
if factor > 1:
image_dir_suffix = f"_{factor}"
else:
image_dir_suffix = ""
colmap_image_dir = os.path.join(data_dir, "images")
image_dir = os.path.join(data_dir, "images" + image_dir_suffix)
for d in [image_dir, colmap_image_dir]:
if not os.path.exists(d):
raise ValueError(f"Image folder {d} does not exist.")
# Downsampled images may have different names vs images used for COLMAP,
# so we need to map between the two sorted lists of files.
colmap_files = sorted(os.listdir(colmap_image_dir))
image_files = sorted(os.listdir(image_dir))
colmap_to_image = dict(zip(colmap_files, image_files))
image_paths = [
os.path.join(image_dir, colmap_to_image[f]) for f in image_names
]
print("loading images")
images = [imageio.imread(x) for x in tqdm.tqdm(image_paths)]
images = np.stack(images, axis=0)
# Select the split.
all_indices = np.arange(images.shape[0])
split_indices = {
"test": all_indices[all_indices % 8 == 0],
"train": all_indices[all_indices % 8 != 0],
}
indices = split_indices[split]
# All per-image quantities must be re-indexed using the split indices.
images = images[indices]
camtoworlds = camtoworlds[indices]
return images, camtoworlds, K
class SubjectLoader(torch.utils.data.Dataset):
"""Single subject data loader for training and evaluation."""
SPLITS = ["train", "test"]
SUBJECT_IDS = [
"garden",
"bicycle",
"bonsai",
"counter",
"kitchen",
"room",
"stump",
]
OPENGL_CAMERA = False
def __init__(
self,
subject_id: str,
root_fp: str,
split: str,
color_bkgd_aug: str = "white",
num_rays: int = None,
near: float = None,
far: float = None,
batch_over_images: bool = True,
factor: int = 1,
):
super().__init__()
assert split in self.SPLITS, "%s" % split
assert subject_id in self.SUBJECT_IDS, "%s" % subject_id
assert color_bkgd_aug in ["white", "black", "random"]
self.split = split
self.num_rays = num_rays
self.near = near
self.far = far
self.training = (num_rays is not None) and (
split in ["train", "trainval"]
)
self.color_bkgd_aug = color_bkgd_aug
self.batch_over_images = batch_over_images
self.images, self.camtoworlds, self.K = _load_colmap(
root_fp, subject_id, split, factor
)
self.images = torch.from_numpy(self.images).to(torch.uint8)
self.camtoworlds = torch.from_numpy(self.camtoworlds).to(torch.float32)
self.K = torch.tensor(self.K).to(torch.float32)
self.height, self.width = self.images.shape[1:3]
def __len__(self):
return len(self.images)
@torch.no_grad()
def __getitem__(self, index):
data = self.fetch_data(index)
data = self.preprocess(data)
return data
def preprocess(self, data):
"""Process the fetched / cached data with randomness."""
pixels, rays = data["rgb"], data["rays"]
if self.training:
if self.color_bkgd_aug == "random":
color_bkgd = torch.rand(3, device=self.images.device)
elif self.color_bkgd_aug == "white":
color_bkgd = torch.ones(3, device=self.images.device)
elif self.color_bkgd_aug == "black":
color_bkgd = torch.zeros(3, device=self.images.device)
else:
# just use white during inference
color_bkgd = torch.ones(3, device=self.images.device)
return {
"pixels": pixels, # [n_rays, 3] or [h, w, 3]
"rays": rays, # [n_rays,] or [h, w]
"color_bkgd": color_bkgd, # [3,]
**{k: v for k, v in data.items() if k not in ["rgb", "rays"]},
}
def update_num_rays(self, num_rays):
self.num_rays = num_rays
def fetch_data(self, index):
"""Fetch the data (it maybe cached for multiple batches)."""
num_rays = self.num_rays
if self.training:
if self.batch_over_images:
image_id = torch.randint(
0,
len(self.images),
size=(num_rays,),
device=self.images.device,
)
else:
image_id = [index]
x = torch.randint(
0, self.width, size=(num_rays,), device=self.images.device
)
y = torch.randint(
0, self.height, size=(num_rays,), device=self.images.device
)
else:
image_id = [index]
x, y = torch.meshgrid(
torch.arange(self.width, device=self.images.device),
torch.arange(self.height, device=self.images.device),
indexing="xy",
)
x = x.flatten()
y = y.flatten()
# generate rays
rgb = self.images[image_id, y, x] / 255.0 # (num_rays, 3)
c2w = self.camtoworlds[image_id] # (num_rays, 3, 4)
camera_dirs = F.pad(
torch.stack(
[
(x - self.K[0, 2] + 0.5) / self.K[0, 0],
(y - self.K[1, 2] + 0.5)
/ self.K[1, 1]
* (-1.0 if self.OPENGL_CAMERA else 1.0),
],
dim=-1,
),
(0, 1),
value=(-1.0 if self.OPENGL_CAMERA else 1.0),
) # [num_rays, 3]
# [n_cams, height, width, 3]
directions = (camera_dirs[:, None, :] * c2w[:, :3, :3]).sum(dim=-1)
origins = torch.broadcast_to(c2w[:, :3, -1], directions.shape)
viewdirs = directions / torch.linalg.norm(
directions, dim=-1, keepdims=True
)
if self.training:
origins = torch.reshape(origins, (num_rays, 3))
viewdirs = torch.reshape(viewdirs, (num_rays, 3))
rgb = torch.reshape(rgb, (num_rays, 3))
else:
origins = torch.reshape(origins, (self.height, self.width, 3))
viewdirs = torch.reshape(viewdirs, (self.height, self.width, 3))
rgb = torch.reshape(rgb, (self.height, self.width, 3))
rays = Rays(origins=origins, viewdirs=viewdirs)
return {
"rgb": rgb, # [h, w, 3] or [num_rays, 3]
"rays": rays, # [h, w, 3] or [num_rays, 3]
}
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/datasets/nerf_synthetic.py
================================================
"""
Copyright (c) 2022 Ruilong Li, UC Berkeley.
"""
import collections
import json
import os
import imageio.v2 as imageio
import numpy as np
import torch
import torch.nn.functional as F
from .utils import Rays
def _load_renderings(root_fp: str, subject_id: str, split: str):
"""Load images from disk."""
if not root_fp.startswith("/"):
# allow relative path. e.g., "./data/nerf_synthetic/"
root_fp = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
"..",
"..",
root_fp,
)
data_dir = os.path.join(root_fp, subject_id)
with open(
os.path.join(data_dir, "transforms_{}.json".format(split)), "r"
) as fp:
meta = json.load(fp)
images = []
camtoworlds = []
for i in range(len(meta["frames"])):
frame = meta["frames"][i]
fname = os.path.join(data_dir, frame["file_path"] + ".png")
rgba = imageio.imread(fname)
camtoworlds.append(frame["transform_matrix"])
images.append(rgba)
images = np.stack(images, axis=0)
camtoworlds = np.stack(camtoworlds, axis=0)
h, w = images.shape[1:3]
camera_angle_x = float(meta["camera_angle_x"])
focal = 0.5 * w / np.tan(0.5 * camera_angle_x)
return images, camtoworlds, focal
class SubjectLoader(torch.utils.data.Dataset):
"""Single subject data loader for training and evaluation."""
SPLITS = ["train", "val", "trainval", "test"]
SUBJECT_IDS = [
"chair",
"drums",
"ficus",
"hotdog",
"lego",
"materials",
"mic",
"ship",
]
WIDTH, HEIGHT = 800, 800
NEAR, FAR = 2.0, 6.0
OPENGL_CAMERA = True
def __init__(
self,
subject_id: str,
root_fp: str,
split: str,
color_bkgd_aug: str = "white",
num_rays: int = None,
near: float = None,
far: float = None,
batch_over_images: bool = True,
):
super().__init__()
assert split in self.SPLITS, "%s" % split
assert subject_id in self.SUBJECT_IDS, "%s" % subject_id
assert color_bkgd_aug in ["white", "black", "random"]
self.split = split
self.num_rays = num_rays
self.near = self.NEAR if near is None else near
self.far = self.FAR if far is None else far
self.training = (num_rays is not None) and (
split in ["train", "trainval"]
)
self.color_bkgd_aug = color_bkgd_aug
self.batch_over_images = batch_over_images
if split == "trainval":
_images_train, _camtoworlds_train, _focal_train = _load_renderings(
root_fp, subject_id, "train"
)
_images_val, _camtoworlds_val, _focal_val = _load_renderings(
root_fp, subject_id, "val"
)
self.images = np.concatenate([_images_train, _images_val])
self.camtoworlds = np.concatenate(
[_camtoworlds_train, _camtoworlds_val]
)
self.focal = _focal_train
else:
self.images, self.camtoworlds, self.focal = _load_renderings(
root_fp, subject_id, split
)
self.images = torch.from_numpy(self.images).to(torch.uint8)
self.camtoworlds = torch.from_numpy(self.camtoworlds).to(torch.float32)
self.K = torch.tensor(
[
[self.focal, 0, self.WIDTH / 2.0],
[0, self.focal, self.HEIGHT / 2.0],
[0, 0, 1],
],
dtype=torch.float32,
) # (3, 3)
assert self.images.shape[1:3] == (self.HEIGHT, self.WIDTH)
def __len__(self):
return len(self.images)
@torch.no_grad()
def __getitem__(self, index):
data = self.fetch_data(index)
data = self.preprocess(data)
return data
def preprocess(self, data):
"""Process the fetched / cached data with randomness."""
rgba, rays = data["rgba"], data["rays"]
pixels, alpha = torch.split(rgba, [3, 1], dim=-1)
if self.training:
if self.color_bkgd_aug == "random":
color_bkgd = torch.rand(3, device=self.images.device)
elif self.color_bkgd_aug == "white":
color_bkgd = torch.ones(3, device=self.images.device)
elif self.color_bkgd_aug == "black":
color_bkgd = torch.zeros(3, device=self.images.device)
else:
# just use white during inference
color_bkgd = torch.ones(3, device=self.images.device)
pixels = pixels * alpha + color_bkgd * (1.0 - alpha)
return {
"pixels": pixels, # [n_rays, 3] or [h, w, 3]
"rays": rays, # [n_rays,] or [h, w]
"color_bkgd": color_bkgd, # [3,]
**{k: v for k, v in data.items() if k not in ["rgba", "rays"]},
}
def update_num_rays(self, num_rays):
self.num_rays = num_rays
def fetch_data(self, index):
"""Fetch the data (it maybe cached for multiple batches)."""
num_rays = self.num_rays
if self.training:
if self.batch_over_images:
image_id = torch.randint(
0,
len(self.images),
size=(num_rays,),
device=self.images.device,
)
else:
image_id = [index]
x = torch.randint(
0, self.WIDTH, size=(num_rays,), device=self.images.device
)
y = torch.randint(
0, self.HEIGHT, size=(num_rays,), device=self.images.device
)
else:
image_id = [index]
x, y = torch.meshgrid(
torch.arange(self.WIDTH, device=self.images.device),
torch.arange(self.HEIGHT, device=self.images.device),
indexing="xy",
)
x = x.flatten()
y = y.flatten()
# generate rays
rgba = self.images[image_id, y, x] / 255.0 # (num_rays, 4)
c2w = self.camtoworlds[image_id] # (num_rays, 3, 4)
camera_dirs = F.pad(
torch.stack(
[
(x - self.K[0, 2] + 0.5) / self.K[0, 0],
(y - self.K[1, 2] + 0.5)
/ self.K[1, 1]
* (-1.0 if self.OPENGL_CAMERA else 1.0),
],
dim=-1,
),
(0, 1),
value=(-1.0 if self.OPENGL_CAMERA else 1.0),
) # [num_rays, 3]
# [n_cams, height, width, 3]
directions = (camera_dirs[:, None, :] * c2w[:, :3, :3]).sum(dim=-1)
origins = torch.broadcast_to(c2w[:, :3, -1], directions.shape)
viewdirs = directions / torch.linalg.norm(
directions, dim=-1, keepdims=True
)
if self.training:
origins = torch.reshape(origins, (num_rays, 3))
viewdirs = torch.reshape(viewdirs, (num_rays, 3))
rgba = torch.reshape(rgba, (num_rays, 4))
else:
origins = torch.reshape(origins, (self.HEIGHT, self.WIDTH, 3))
viewdirs = torch.reshape(viewdirs, (self.HEIGHT, self.WIDTH, 3))
rgba = torch.reshape(rgba, (self.HEIGHT, self.WIDTH, 4))
rays = Rays(origins=origins, viewdirs=viewdirs)
return {
"rgba": rgba, # [h, w, 4] or [num_rays, 4]
"rays": rays, # [h, w, 3] or [num_rays, 3]
}
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/datasets/utils.py
================================================
"""
Copyright (c) 2022 Ruilong Li, UC Berkeley.
"""
import collections
Rays = collections.namedtuple("Rays", ("origins", "viewdirs"))
def namedtuple_map(fn, tup):
"""Apply `fn` to each element of `tup` and cast to `tup`'s namedtuple."""
return type(tup)(*(None if x is None else fn(x) for x in tup))
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/radiance_fields/__init__.py
================================================
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/radiance_fields/mlp.py
================================================
"""
Copyright (c) 2022 Ruilong Li, UC Berkeley.
"""
import functools
import math
from typing import Callable, Optional
import torch
import torch.nn as nn
import torch.nn.functional as F
class MLP(nn.Module):
def __init__(
self,
input_dim: int, # The number of input tensor channels.
output_dim: int = None, # The number of output tensor channels.
net_depth: int = 8, # The depth of the MLP.
net_width: int = 256, # The width of the MLP.
skip_layer: int = 4, # The layer to add skip layers to.
hidden_init: Callable = nn.init.xavier_uniform_,
hidden_activation: Callable = nn.ReLU(),
output_enabled: bool = True,
output_init: Optional[Callable] = nn.init.xavier_uniform_,
output_activation: Optional[Callable] = nn.Identity(),
bias_enabled: bool = True,
bias_init: Callable = nn.init.zeros_,
):
super().__init__()
self.input_dim = input_dim
self.output_dim = output_dim
self.net_depth = net_depth
self.net_width = net_width
self.skip_layer = skip_layer
self.hidden_init = hidden_init
self.hidden_activation = hidden_activation
self.output_enabled = output_enabled
self.output_init = output_init
self.output_activation = output_activation
self.bias_enabled = bias_enabled
self.bias_init = bias_init
self.hidden_layers = nn.ModuleList()
in_features = self.input_dim
for i in range(self.net_depth):
self.hidden_layers.append(
nn.Linear(in_features, self.net_width, bias=bias_enabled)
)
if (
(self.skip_layer is not None)
and (i % self.skip_layer == 0)
and (i > 0)
):
in_features = self.net_width + self.input_dim
else:
in_features = self.net_width
if self.output_enabled:
self.output_layer = nn.Linear(
in_features, self.output_dim, bias=bias_enabled
)
else:
self.output_dim = in_features
self.initialize()
def initialize(self):
def init_func_hidden(m):
if isinstance(m, nn.Linear):
if self.hidden_init is not None:
self.hidden_init(m.weight)
if self.bias_enabled and self.bias_init is not None:
self.bias_init(m.bias)
self.hidden_layers.apply(init_func_hidden)
if self.output_enabled:
def init_func_output(m):
if isinstance(m, nn.Linear):
if self.output_init is not None:
self.output_init(m.weight)
if self.bias_enabled and self.bias_init is not None:
self.bias_init(m.bias)
self.output_layer.apply(init_func_output)
def forward(self, x):
inputs = x
for i in range(self.net_depth):
x = self.hidden_layers[i](x)
x = self.hidden_activation(x)
if (
(self.skip_layer is not None)
and (i % self.skip_layer == 0)
and (i > 0)
):
x = torch.cat([x, inputs], dim=-1)
if self.output_enabled:
x = self.output_layer(x)
x = self.output_activation(x)
return x
class DenseLayer(MLP):
def __init__(self, input_dim, output_dim, **kwargs):
super().__init__(
input_dim=input_dim,
output_dim=output_dim,
net_depth=0, # no hidden layers
**kwargs,
)
class NerfMLP(nn.Module):
def __init__(
self,
input_dim: int, # The number of input tensor channels.
condition_dim: int, # The number of condition tensor channels.
net_depth: int = 8, # The depth of the MLP.
net_width: int = 256, # The width of the MLP.
skip_layer: int = 4, # The layer to add skip layers to.
net_depth_condition: int = 1, # The depth of the second part of MLP.
net_width_condition: int = 128, # The width of the second part of MLP.
):
super().__init__()
self.base = MLP(
input_dim=input_dim,
net_depth=net_depth,
net_width=net_width,
skip_layer=skip_layer,
output_enabled=False,
)
hidden_features = self.base.output_dim
self.sigma_layer = DenseLayer(hidden_features, 1)
if condition_dim > 0:
self.bottleneck_layer = DenseLayer(hidden_features, net_width)
self.rgb_layer = MLP(
input_dim=net_width + condition_dim,
output_dim=3,
net_depth=net_depth_condition,
net_width=net_width_condition,
skip_layer=None,
)
else:
self.rgb_layer = DenseLayer(hidden_features, 3)
def query_density(self, x):
x = self.base(x)
raw_sigma = self.sigma_layer(x)
return raw_sigma
def forward(self, x, condition=None):
x = self.base(x)
raw_sigma = self.sigma_layer(x)
if condition is not None:
if condition.shape[:-1] != x.shape[:-1]:
num_rays, n_dim = condition.shape
condition = condition.view(
[num_rays] + [1] * (x.dim() - condition.dim()) + [n_dim]
).expand(list(x.shape[:-1]) + [n_dim])
bottleneck = self.bottleneck_layer(x)
x = torch.cat([bottleneck, condition], dim=-1)
raw_rgb = self.rgb_layer(x)
return raw_rgb, raw_sigma
class SinusoidalEncoder(nn.Module):
"""Sinusoidal Positional Encoder used in Nerf."""
def __init__(self, x_dim, min_deg, max_deg, use_identity: bool = True):
super().__init__()
self.x_dim = x_dim
self.min_deg = min_deg
self.max_deg = max_deg
self.use_identity = use_identity
self.register_buffer(
"scales", torch.tensor([2**i for i in range(min_deg, max_deg)])
)
@property
def latent_dim(self) -> int:
return (
int(self.use_identity) + (self.max_deg - self.min_deg) * 2
) * self.x_dim
def forward(self, x: torch.Tensor) -> torch.Tensor:
"""
Args:
x: [..., x_dim]
Returns:
latent: [..., latent_dim]
"""
if self.max_deg == self.min_deg:
return x
xb = torch.reshape(
(x[Ellipsis, None, :] * self.scales[:, None]),
list(x.shape[:-1]) + [(self.max_deg - self.min_deg) * self.x_dim],
)
latent = torch.sin(torch.cat([xb, xb + 0.5 * math.pi], dim=-1))
if self.use_identity:
latent = torch.cat([x] + [latent], dim=-1)
return latent
class VanillaNeRFRadianceField(nn.Module):
def __init__(
self,
net_depth: int = 8, # The depth of the MLP.
net_width: int = 256, # The width of the MLP.
skip_layer: int = 4, # The layer to add skip layers to.
net_depth_condition: int = 1, # The depth of the second part of MLP.
net_width_condition: int = 128, # The width of the second part of MLP.
) -> None:
super().__init__()
self.posi_encoder = SinusoidalEncoder(3, 0, 10, True)
self.view_encoder = SinusoidalEncoder(3, 0, 4, True)
self.mlp = NerfMLP(
input_dim=self.posi_encoder.latent_dim,
condition_dim=self.view_encoder.latent_dim,
net_depth=net_depth,
net_width=net_width,
skip_layer=skip_layer,
net_depth_condition=net_depth_condition,
net_width_condition=net_width_condition,
)
def query_opacity(self, x, step_size):
density = self.query_density(x)
# if the density is small enough those two are the same.
# opacity = 1.0 - torch.exp(-density * step_size)
opacity = density * step_size
return opacity
def query_density(self, x):
x = self.posi_encoder(x)
sigma = self.mlp.query_density(x)
return F.relu(sigma)
def forward(self, x, condition=None):
x = self.posi_encoder(x)
if condition is not None:
condition = self.view_encoder(condition)
rgb, sigma = self.mlp(x, condition=condition)
return torch.sigmoid(rgb), F.relu(sigma)
class DNeRFRadianceField(nn.Module):
def __init__(self) -> None:
super().__init__()
self.posi_encoder = SinusoidalEncoder(3, 0, 4, True)
self.time_encoder = SinusoidalEncoder(1, 0, 4, True)
self.warp = MLP(
input_dim=self.posi_encoder.latent_dim
+ self.time_encoder.latent_dim,
output_dim=3,
net_depth=4,
net_width=64,
skip_layer=2,
output_init=functools.partial(torch.nn.init.uniform_, b=1e-4),
)
self.nerf = VanillaNeRFRadianceField()
def query_opacity(self, x, timestamps, step_size):
idxs = torch.randint(0, len(timestamps), (x.shape[0],), device=x.device)
t = timestamps[idxs]
density = self.query_density(x, t)
# if the density is small enough those two are the same.
# opacity = 1.0 - torch.exp(-density * step_size)
opacity = density * step_size
return opacity
def query_density(self, x, t):
x = x + self.warp(
torch.cat([self.posi_encoder(x), self.time_encoder(t)], dim=-1)
)
return self.nerf.query_density(x)
def forward(self, x, t, condition=None):
x = x + self.warp(
torch.cat([self.posi_encoder(x), self.time_encoder(t)], dim=-1)
)
return self.nerf(x, condition=condition)
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/radiance_fields/ngp.py
================================================
"""
Copyright (c) 2022 Ruilong Li, UC Berkeley.
"""
from typing import Callable, List, Union
import torch
from torch.autograd import Function
from torch.cuda.amp import custom_bwd, custom_fwd
try:
import tinycudann as tcnn
except ImportError as e:
print(
f"Error: {e}! "
"Please install tinycudann by: "
"pip install git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch"
)
exit()
class _TruncExp(Function): # pylint: disable=abstract-method
# Implementation from torch-ngp:
# https://github.com/ashawkey/torch-ngp/blob/93b08a0d4ec1cc6e69d85df7f0acdfb99603b628/activation.py
@staticmethod
@custom_fwd(cast_inputs=torch.float32)
def forward(ctx, x): # pylint: disable=arguments-differ
ctx.save_for_backward(x)
return torch.exp(x)
@staticmethod
@custom_bwd
def backward(ctx, g): # pylint: disable=arguments-differ
x = ctx.saved_tensors[0]
return g * torch.exp(torch.clamp(x, max=15))
trunc_exp = _TruncExp.apply
def contract_to_unisphere(
x: torch.Tensor,
aabb: torch.Tensor,
eps: float = 1e-6,
derivative: bool = False,
):
aabb_min, aabb_max = torch.split(aabb, 3, dim=-1)
x = (x - aabb_min) / (aabb_max - aabb_min)
x = x * 2 - 1 # aabb is at [-1, 1]
mag = x.norm(dim=-1, keepdim=True)
mask = mag.squeeze(-1) > 1
if derivative:
dev = (2 * mag - 1) / mag**2 + 2 * x**2 * (
1 / mag**3 - (2 * mag - 1) / mag**4
)
dev[~mask] = 1.0
dev = torch.clamp(dev, min=eps)
return dev
else:
x[mask] = (2 - 1 / mag[mask]) * (x[mask] / mag[mask])
x = x / 4 + 0.5 # [-inf, inf] is at [0, 1]
return x
class NGPradianceField(torch.nn.Module):
"""Instance-NGP radiance Field"""
def __init__(
self,
aabb: Union[torch.Tensor, List[float]],
num_dim: int = 3,
use_viewdirs: bool = True,
density_activation: Callable = lambda x: trunc_exp(x - 1),
unbounded: bool = False,
geo_feat_dim: int = 15,
n_levels: int = 16,
log2_hashmap_size: int = 19,
) -> None:
super().__init__()
if not isinstance(aabb, torch.Tensor):
aabb = torch.tensor(aabb, dtype=torch.float32)
self.register_buffer("aabb", aabb)
self.num_dim = num_dim
self.use_viewdirs = use_viewdirs
self.density_activation = density_activation
self.unbounded = unbounded
self.geo_feat_dim = geo_feat_dim
per_level_scale = 1.4472692012786865
if self.use_viewdirs:
self.direction_encoding = tcnn.Encoding(
n_input_dims=num_dim,
encoding_config={
"otype": "Composite",
"nested": [
{
"n_dims_to_encode": 3,
"otype": "SphericalHarmonics",
"degree": 4,
},
# {"otype": "Identity", "n_bins": 4, "degree": 4},
],
},
)
self.mlp_base = tcnn.NetworkWithInputEncoding(
n_input_dims=num_dim,
n_output_dims=1 + self.geo_feat_dim,
encoding_config={
"otype": "HashGrid",
"n_levels": n_levels,
"n_features_per_level": 2,
"log2_hashmap_size": log2_hashmap_size,
"base_resolution": 16,
"per_level_scale": per_level_scale,
},
network_config={
"otype": "FullyFusedMLP",
"activation": "ReLU",
"output_activation": "None",
"n_neurons": 64,
"n_hidden_layers": 1,
},
)
if self.geo_feat_dim > 0:
self.mlp_head = tcnn.Network(
n_input_dims=(
(
self.direction_encoding.n_output_dims
if self.use_viewdirs
else 0
)
+ self.geo_feat_dim
),
n_output_dims=3,
network_config={
"otype": "FullyFusedMLP",
"activation": "ReLU",
"output_activation": "Sigmoid",
"n_neurons": 64,
"n_hidden_layers": 2,
},
)
def query_density(self, x, return_feat: bool = False):
if self.unbounded:
x = contract_to_unisphere(x, self.aabb)
else:
aabb_min, aabb_max = torch.split(self.aabb, self.num_dim, dim=-1)
x = (x - aabb_min) / (aabb_max - aabb_min)
selector = ((x > 0.0) & (x < 1.0)).all(dim=-1)
x = (
self.mlp_base(x.view(-1, self.num_dim))
.view(list(x.shape[:-1]) + [1 + self.geo_feat_dim])
.to(x)
)
density_before_activation, base_mlp_out = torch.split(
x, [1, self.geo_feat_dim], dim=-1
)
density = (
self.density_activation(density_before_activation)
* selector[..., None]
)
if return_feat:
return density, base_mlp_out
else:
return density
def _query_rgb(self, dir, embedding):
# tcnn requires directions in the range [0, 1]
if self.use_viewdirs:
dir = (dir + 1.0) / 2.0
d = self.direction_encoding(dir.view(-1, dir.shape[-1]))
h = torch.cat([d, embedding.view(-1, self.geo_feat_dim)], dim=-1)
else:
h = embedding.view(-1, self.geo_feat_dim)
rgb = (
self.mlp_head(h)
.view(list(embedding.shape[:-1]) + [3])
.to(embedding)
)
return rgb
def forward(
self,
positions: torch.Tensor,
directions: torch.Tensor = None,
):
if self.use_viewdirs and (directions is not None):
assert (
positions.shape == directions.shape
), f"{positions.shape} v.s. {directions.shape}"
density, embedding = self.query_density(positions, return_feat=True)
rgb = self._query_rgb(directions, embedding=embedding)
return rgb, density
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/requirements.txt
================================================
git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch
opencv-python
imageio
numpy
tqdm
scipy
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/train_mlp_dnerf.py
================================================
"""
Copyright (c) 2022 Ruilong Li, UC Berkeley.
"""
import argparse
import math
import os
import time
import imageio
import numpy as np
import torch
import torch.nn.functional as F
import tqdm
from datasets.dnerf_synthetic import SubjectLoader
from radiance_fields.mlp import DNeRFRadianceField
from utils import render_image, set_random_seed
from nerfacc import ContractionType, OccupancyGrid
if __name__ == "__main__":
device = "cuda:0"
set_random_seed(42)
parser = argparse.ArgumentParser()
parser.add_argument(
"--train_split",
type=str,
default="train",
choices=["train"],
help="which train split to use",
)
parser.add_argument(
"--scene",
type=str,
default="lego",
choices=[
# dnerf
"bouncingballs",
"hellwarrior",
"hook",
"jumpingjacks",
"lego",
"mutant",
"standup",
"trex",
],
help="which scene to use",
)
parser.add_argument(
"--aabb",
type=lambda s: [float(item) for item in s.split(",")],
default="-1.5,-1.5,-1.5,1.5,1.5,1.5",
help="delimited list input",
)
parser.add_argument(
"--test_chunk_size",
type=int,
default=8192,
)
parser.add_argument("--cone_angle", type=float, default=0.0)
args = parser.parse_args()
render_n_samples = 1024
# setup the scene bounding box.
contraction_type = ContractionType.AABB
scene_aabb = torch.tensor(args.aabb, dtype=torch.float32, device=device)
near_plane = None
far_plane = None
render_step_size = (
(scene_aabb[3:] - scene_aabb[:3]).max()
* math.sqrt(3)
/ render_n_samples
).item()
# setup the radiance field we want to train.
max_steps = 30000
grad_scaler = torch.cuda.amp.GradScaler(1)
radiance_field = DNeRFRadianceField().to(device)
optimizer = torch.optim.Adam(radiance_field.parameters(), lr=5e-4)
scheduler = torch.optim.lr_scheduler.MultiStepLR(
optimizer,
milestones=[
max_steps // 2,
max_steps * 3 // 4,
max_steps * 5 // 6,
max_steps * 9 // 10,
],
gamma=0.33,
)
# setup the dataset
data_root_fp = "/home/ruilongli/data/dnerf/"
target_sample_batch_size = 1 << 16
grid_resolution = 128
train_dataset = SubjectLoader(
subject_id=args.scene,
root_fp=data_root_fp,
split=args.train_split,
num_rays=target_sample_batch_size // render_n_samples,
)
train_dataset.images = train_dataset.images.to(device)
train_dataset.camtoworlds = train_dataset.camtoworlds.to(device)
train_dataset.K = train_dataset.K.to(device)
train_dataset.timestamps = train_dataset.timestamps.to(device)
test_dataset = SubjectLoader(
subject_id=args.scene,
root_fp=data_root_fp,
split="test",
num_rays=None,
)
test_dataset.images = test_dataset.images.to(device)
test_dataset.camtoworlds = test_dataset.camtoworlds.to(device)
test_dataset.K = test_dataset.K.to(device)
test_dataset.timestamps = test_dataset.timestamps.to(device)
occupancy_grid = OccupancyGrid(
roi_aabb=args.aabb,
resolution=grid_resolution,
contraction_type=contraction_type,
).to(device)
# training
step = 0
tic = time.time()
for epoch in range(10000000):
for i in range(len(train_dataset)):
radiance_field.train()
data = train_dataset[i]
render_bkgd = data["color_bkgd"]
rays = data["rays"]
pixels = data["pixels"]
timestamps = data["timestamps"]
# update occupancy grid
occupancy_grid.every_n_step(
step=step,
occ_eval_fn=lambda x: radiance_field.query_opacity(
x, timestamps, render_step_size
),
)
# render
rgb, acc, depth, n_rendering_samples = render_image(
radiance_field,
occupancy_grid,
rays,
scene_aabb,
# rendering options
near_plane=near_plane,
far_plane=far_plane,
render_step_size=render_step_size,
render_bkgd=render_bkgd,
cone_angle=args.cone_angle,
alpha_thre=0.01 if step > 1000 else 0.00,
# dnerf options
timestamps=timestamps,
)
if n_rendering_samples == 0:
continue
# dynamic batch size for rays to keep sample batch size constant.
num_rays = len(pixels)
num_rays = int(
num_rays
* (target_sample_batch_size / float(n_rendering_samples))
)
train_dataset.update_num_rays(num_rays)
alive_ray_mask = acc.squeeze(-1) > 0
# compute loss
loss = F.smooth_l1_loss(rgb[alive_ray_mask], pixels[alive_ray_mask])
optimizer.zero_grad()
# do not unscale it because we are using Adam.
grad_scaler.scale(loss).backward()
optimizer.step()
scheduler.step()
if step % 5000 == 0:
elapsed_time = time.time() - tic
loss = F.mse_loss(rgb[alive_ray_mask], pixels[alive_ray_mask])
print(
f"elapsed_time={elapsed_time:.2f}s | step={step} | "
f"loss={loss:.5f} | "
f"alive_ray_mask={alive_ray_mask.long().sum():d} | "
f"n_rendering_samples={n_rendering_samples:d} | num_rays={len(pixels):d} |"
)
if step >= 0 and step % max_steps == 0 and step > 0:
# evaluation
radiance_field.eval()
psnrs = []
with torch.no_grad():
for i in tqdm.tqdm(range(len(test_dataset))):
data = test_dataset[i]
render_bkgd = data["color_bkgd"]
rays = data["rays"]
pixels = data["pixels"]
timestamps = data["timestamps"]
# rendering
rgb, acc, depth, _ = render_image(
radiance_field,
occupancy_grid,
rays,
scene_aabb,
# rendering options
near_plane=None,
far_plane=None,
render_step_size=render_step_size,
render_bkgd=render_bkgd,
cone_angle=args.cone_angle,
alpha_thre=0.01,
# test options
test_chunk_size=args.test_chunk_size,
# dnerf options
timestamps=timestamps,
)
mse = F.mse_loss(rgb, pixels)
psnr = -10.0 * torch.log(mse) / np.log(10.0)
psnrs.append(psnr.item())
# imageio.imwrite(
# "acc_binary_test.png",
# ((acc > 0).float().cpu().numpy() * 255).astype(np.uint8),
# )
# imageio.imwrite(
# "rgb_test.png",
# (rgb.cpu().numpy() * 255).astype(np.uint8),
# )
# break
psnr_avg = sum(psnrs) / len(psnrs)
print(f"evaluation: psnr_avg={psnr_avg}")
train_dataset.training = True
if step == max_steps:
print("training stops")
exit()
step += 1
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/train_mlp_nerf.py
================================================
"""
Copyright (c) 2022 Ruilong Li, UC Berkeley.
"""
import argparse
import math
import os
import time
import imageio
import numpy as np
import torch
import torch.nn.functional as F
import tqdm
from radiance_fields.mlp import VanillaNeRFRadianceField
from utils import render_image, set_random_seed
from nerfacc import ContractionType, OccupancyGrid
if __name__ == "__main__":
device = "cuda:0"
set_random_seed(42)
parser = argparse.ArgumentParser()
parser.add_argument(
"--train_split",
type=str,
default="trainval",
choices=["train", "trainval"],
help="which train split to use",
)
parser.add_argument(
"--scene",
type=str,
default="lego",
choices=[
# nerf synthetic
"chair",
"drums",
"ficus",
"hotdog",
"lego",
"materials",
"mic",
"ship",
# mipnerf360 unbounded
"garden",
],
help="which scene to use",
)
parser.add_argument(
"--aabb",
type=lambda s: [float(item) for item in s.split(",")],
default="-1.5,-1.5,-1.5,1.5,1.5,1.5",
help="delimited list input",
)
parser.add_argument(
"--test_chunk_size",
type=int,
default=8192,
)
parser.add_argument(
"--unbounded",
action="store_true",
help="whether to use unbounded rendering",
)
parser.add_argument("--cone_angle", type=float, default=0.0)
args = parser.parse_args()
render_n_samples = 1024
# setup the scene bounding box.
if args.unbounded:
print("Using unbounded rendering")
contraction_type = ContractionType.UN_BOUNDED_SPHERE
# contraction_type = ContractionType.UN_BOUNDED_TANH
scene_aabb = None
near_plane = 0.2
far_plane = 1e4
render_step_size = 1e-2
else:
contraction_type = ContractionType.AABB
scene_aabb = torch.tensor(args.aabb, dtype=torch.float32, device=device)
near_plane = None
far_plane = None
render_step_size = (
(scene_aabb[3:] - scene_aabb[:3]).max()
* math.sqrt(3)
/ render_n_samples
).item()
# setup the radiance field we want to train.
max_steps = 50000
grad_scaler = torch.cuda.amp.GradScaler(1)
radiance_field = VanillaNeRFRadianceField().to(device)
optimizer = torch.optim.Adam(radiance_field.parameters(), lr=5e-4)
scheduler = torch.optim.lr_scheduler.MultiStepLR(
optimizer,
milestones=[
max_steps // 2,
max_steps * 3 // 4,
max_steps * 5 // 6,
max_steps * 9 // 10,
],
gamma=0.33,
)
# setup the dataset
train_dataset_kwargs = {}
test_dataset_kwargs = {}
if args.scene == "garden":
from datasets.nerf_360_v2 import SubjectLoader
data_root_fp = "/home/ruilongli/data/360_v2/"
target_sample_batch_size = 1 << 16
train_dataset_kwargs = {"color_bkgd_aug": "random", "factor": 4}
test_dataset_kwargs = {"factor": 4}
grid_resolution = 128
else:
from datasets.nerf_synthetic import SubjectLoader
data_root_fp = "/home/ruilongli/data/nerf_synthetic/"
target_sample_batch_size = 1 << 16
grid_resolution = 128
train_dataset = SubjectLoader(
subject_id=args.scene,
root_fp=data_root_fp,
split=args.train_split,
num_rays=target_sample_batch_size // render_n_samples,
**train_dataset_kwargs,
)
train_dataset.images = train_dataset.images.to(device)
train_dataset.camtoworlds = train_dataset.camtoworlds.to(device)
train_dataset.K = train_dataset.K.to(device)
test_dataset = SubjectLoader(
subject_id=args.scene,
root_fp=data_root_fp,
split="test",
num_rays=None,
**test_dataset_kwargs,
)
test_dataset.images = test_dataset.images.to(device)
test_dataset.camtoworlds = test_dataset.camtoworlds.to(device)
test_dataset.K = test_dataset.K.to(device)
occupancy_grid = OccupancyGrid(
roi_aabb=args.aabb,
resolution=grid_resolution,
contraction_type=contraction_type,
).to(device)
# training
step = 0
tic = time.time()
for epoch in range(10000000):
for i in range(len(train_dataset)):
radiance_field.train()
data = train_dataset[i]
render_bkgd = data["color_bkgd"]
rays = data["rays"]
pixels = data["pixels"]
# update occupancy grid
occupancy_grid.every_n_step(
step=step,
occ_eval_fn=lambda x: radiance_field.query_opacity(
x, render_step_size
),
)
# render
rgb, acc, depth, n_rendering_samples = render_image(
radiance_field,
occupancy_grid,
rays,
scene_aabb,
# rendering options
near_plane=near_plane,
far_plane=far_plane,
render_step_size=render_step_size,
render_bkgd=render_bkgd,
cone_angle=args.cone_angle,
)
if n_rendering_samples == 0:
continue
# dynamic batch size for rays to keep sample batch size constant.
num_rays = len(pixels)
num_rays = int(
num_rays
* (target_sample_batch_size / float(n_rendering_samples))
)
train_dataset.update_num_rays(num_rays)
alive_ray_mask = acc.squeeze(-1) > 0
# compute loss
loss = F.smooth_l1_loss(rgb[alive_ray_mask], pixels[alive_ray_mask])
optimizer.zero_grad()
# do not unscale it because we are using Adam.
grad_scaler.scale(loss).backward()
optimizer.step()
scheduler.step()
if step % 5000 == 0:
elapsed_time = time.time() - tic
loss = F.mse_loss(rgb[alive_ray_mask], pixels[alive_ray_mask])
print(
f"elapsed_time={elapsed_time:.2f}s | step={step} | "
f"loss={loss:.5f} | "
f"alive_ray_mask={alive_ray_mask.long().sum():d} | "
f"n_rendering_samples={n_rendering_samples:d} | num_rays={len(pixels):d} |"
)
if step >= 0 and step % max_steps == 0 and step > 0:
# evaluation
radiance_field.eval()
psnrs = []
with torch.no_grad():
for i in tqdm.tqdm(range(len(test_dataset))):
data = test_dataset[i]
render_bkgd = data["color_bkgd"]
rays = data["rays"]
pixels = data["pixels"]
# rendering
rgb, acc, depth, _ = render_image(
radiance_field,
occupancy_grid,
rays,
scene_aabb,
# rendering options
near_plane=None,
far_plane=None,
render_step_size=render_step_size,
render_bkgd=render_bkgd,
cone_angle=args.cone_angle,
# test options
test_chunk_size=args.test_chunk_size,
)
mse = F.mse_loss(rgb, pixels)
psnr = -10.0 * torch.log(mse) / np.log(10.0)
psnrs.append(psnr.item())
# imageio.imwrite(
# "acc_binary_test.png",
# ((acc > 0).float().cpu().numpy() * 255).astype(np.uint8),
# )
# imageio.imwrite(
# "rgb_test.png",
# (rgb.cpu().numpy() * 255).astype(np.uint8),
# )
# break
psnr_avg = sum(psnrs) / len(psnrs)
print(f"evaluation: psnr_avg={psnr_avg}")
train_dataset.training = True
if step == max_steps:
print("training stops")
exit()
step += 1
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/train_ngp_nerf.py
================================================
"""
Copyright (c) 2022 Ruilong Li, UC Berkeley.
"""
import argparse
import math
import os
import time
import imageio
import numpy as np
import torch
import torch.nn.functional as F
import tqdm
from radiance_fields.ngp import NGPradianceField
from utils import render_image, set_random_seed
from nerfacc import ContractionType, OccupancyGrid
if __name__ == "__main__":
device = "cuda:0"
set_random_seed(42)
parser = argparse.ArgumentParser()
parser.add_argument(
"--train_split",
type=str,
default="trainval",
choices=["train", "trainval"],
help="which train split to use",
)
parser.add_argument(
"--scene",
type=str,
default="lego",
choices=[
# nerf synthetic
"chair",
"drums",
"ficus",
"hotdog",
"lego",
"materials",
"mic",
"ship",
# mipnerf360 unbounded
"garden",
"bicycle",
"bonsai",
"counter",
"kitchen",
"room",
"stump",
],
help="which scene to use",
)
parser.add_argument(
"--aabb",
type=lambda s: [float(item) for item in s.split(",")],
default="-1.5,-1.5,-1.5,1.5,1.5,1.5",
help="delimited list input",
)
parser.add_argument(
"--test_chunk_size",
type=int,
default=8192,
)
parser.add_argument(
"--unbounded",
action="store_true",
help="whether to use unbounded rendering",
)
parser.add_argument(
"--auto_aabb",
action="store_true",
help="whether to automatically compute the aabb",
)
parser.add_argument("--cone_angle", type=float, default=0.0)
args = parser.parse_args()
render_n_samples = 1024
# setup the dataset
train_dataset_kwargs = {}
test_dataset_kwargs = {}
if args.unbounded:
from datasets.nerf_360_v2 import SubjectLoader
data_root_fp = "/home/ruilongli/data/360_v2/"
target_sample_batch_size = 1 << 20
train_dataset_kwargs = {"color_bkgd_aug": "random", "factor": 4}
test_dataset_kwargs = {"factor": 4}
grid_resolution = 256
else:
from datasets.nerf_synthetic import SubjectLoader
data_root_fp = "/home/ruilongli/data/nerf_synthetic/"
target_sample_batch_size = 1 << 18
grid_resolution = 128
train_dataset = SubjectLoader(
subject_id=args.scene,
root_fp=data_root_fp,
split=args.train_split,
num_rays=target_sample_batch_size // render_n_samples,
**train_dataset_kwargs,
)
train_dataset.images = train_dataset.images.to(device)
train_dataset.camtoworlds = train_dataset.camtoworlds.to(device)
train_dataset.K = train_dataset.K.to(device)
test_dataset = SubjectLoader(
subject_id=args.scene,
root_fp=data_root_fp,
split="test",
num_rays=None,
**test_dataset_kwargs,
)
test_dataset.images = test_dataset.images.to(device)
test_dataset.camtoworlds = test_dataset.camtoworlds.to(device)
test_dataset.K = test_dataset.K.to(device)
if args.auto_aabb:
camera_locs = torch.cat(
[train_dataset.camtoworlds, test_dataset.camtoworlds]
)[:, :3, -1]
args.aabb = torch.cat(
[camera_locs.min(dim=0).values, camera_locs.max(dim=0).values]
).tolist()
print("Using auto aabb", args.aabb)
# setup the scene bounding box.
if args.unbounded:
print("Using unbounded rendering")
contraction_type = ContractionType.UN_BOUNDED_SPHERE
# contraction_type = ContractionType.UN_BOUNDED_TANH
scene_aabb = None
near_plane = 0.2
far_plane = 1e4
render_step_size = 1e-2
alpha_thre = 1e-2
else:
contraction_type = ContractionType.AABB
scene_aabb = torch.tensor(args.aabb, dtype=torch.float32, device=device)
near_plane = None
far_plane = None
render_step_size = (
(scene_aabb[3:] - scene_aabb[:3]).max()
* math.sqrt(3)
/ render_n_samples
).item()
alpha_thre = 0.0
# setup the radiance field we want to train.
max_steps = 20000
grad_scaler = torch.cuda.amp.GradScaler(2**10)
radiance_field = NGPradianceField(
aabb=args.aabb,
unbounded=args.unbounded,
).to(device)
optimizer = torch.optim.Adam(
radiance_field.parameters(), lr=1e-2, eps=1e-15
)
scheduler = torch.optim.lr_scheduler.MultiStepLR(
optimizer,
milestones=[max_steps // 2, max_steps * 3 // 4, max_steps * 9 // 10],
gamma=0.33,
)
occupancy_grid = OccupancyGrid(
roi_aabb=args.aabb,
resolution=grid_resolution,
contraction_type=contraction_type,
).to(device)
# training
step = 0
tic = time.time()
for epoch in range(10000000):
for i in range(len(train_dataset)):
radiance_field.train()
data = train_dataset[i]
render_bkgd = data["color_bkgd"]
rays = data["rays"]
pixels = data["pixels"]
def occ_eval_fn(x):
if args.cone_angle > 0.0:
# randomly sample a camera for computing step size.
camera_ids = torch.randint(
0, len(train_dataset), (x.shape[0],), device=device
)
origins = train_dataset.camtoworlds[camera_ids, :3, -1]
t = (origins - x).norm(dim=-1, keepdim=True)
# compute actual step size used in marching, based on the distance to the camera.
step_size = torch.clamp(
t * args.cone_angle, min=render_step_size
)
# filter out the points that are not in the near far plane.
if (near_plane is not None) and (far_plane is not None):
step_size = torch.where(
(t > near_plane) & (t < far_plane),
step_size,
torch.zeros_like(step_size),
)
else:
step_size = render_step_size
# compute occupancy
density = radiance_field.query_density(x)
return density * step_size
# update occupancy grid
occupancy_grid.every_n_step(step=step, occ_eval_fn=occ_eval_fn)
# render
rgb, acc, depth, n_rendering_samples = render_image(
radiance_field,
occupancy_grid,
rays,
scene_aabb,
# rendering options
near_plane=near_plane,
far_plane=far_plane,
render_step_size=render_step_size,
render_bkgd=render_bkgd,
cone_angle=args.cone_angle,
alpha_thre=alpha_thre,
)
if n_rendering_samples == 0:
continue
# dynamic batch size for rays to keep sample batch size constant.
num_rays = len(pixels)
num_rays = int(
num_rays
* (target_sample_batch_size / float(n_rendering_samples))
)
train_dataset.update_num_rays(num_rays)
alive_ray_mask = acc.squeeze(-1) > 0
# compute loss
loss = F.smooth_l1_loss(rgb[alive_ray_mask], pixels[alive_ray_mask])
optimizer.zero_grad()
# do not unscale it because we are using Adam.
grad_scaler.scale(loss).backward()
optimizer.step()
scheduler.step()
if step % 10000 == 0:
elapsed_time = time.time() - tic
loss = F.mse_loss(rgb[alive_ray_mask], pixels[alive_ray_mask])
print(
f"elapsed_time={elapsed_time:.2f}s | step={step} | "
f"loss={loss:.5f} | "
f"alive_ray_mask={alive_ray_mask.long().sum():d} | "
f"n_rendering_samples={n_rendering_samples:d} | num_rays={len(pixels):d} |"
)
if step >= 0 and step % max_steps == 0 and step > 0:
# evaluation
radiance_field.eval()
psnrs = []
with torch.no_grad():
for i in tqdm.tqdm(range(len(test_dataset))):
data = test_dataset[i]
render_bkgd = data["color_bkgd"]
rays = data["rays"]
pixels = data["pixels"]
# rendering
rgb, acc, depth, _ = render_image(
radiance_field,
occupancy_grid,
rays,
scene_aabb,
# rendering options
near_plane=near_plane,
far_plane=far_plane,
render_step_size=render_step_size,
render_bkgd=render_bkgd,
cone_angle=args.cone_angle,
alpha_thre=alpha_thre,
# test options
test_chunk_size=args.test_chunk_size,
)
mse = F.mse_loss(rgb, pixels)
psnr = -10.0 * torch.log(mse) / np.log(10.0)
psnrs.append(psnr.item())
# imageio.imwrite(
# "acc_binary_test.png",
# ((acc > 0).float().cpu().numpy() * 255).astype(np.uint8),
# )
# imageio.imwrite(
# "rgb_test.png",
# (rgb.cpu().numpy() * 255).astype(np.uint8),
# )
# break
psnr_avg = sum(psnrs) / len(psnrs)
print(f"evaluation: psnr_avg={psnr_avg}")
train_dataset.training = True
if step == max_steps:
print("training stops")
exit()
step += 1
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/utils.py
================================================
"""
Copyright (c) 2022 Ruilong Li, UC Berkeley.
"""
import random
from typing import Optional
import numpy as np
import torch
from datasets.utils import Rays, namedtuple_map
from nerfacc import OccupancyGrid, ray_marching, rendering
def set_random_seed(seed):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
def render_image(
# scene
radiance_field: torch.nn.Module,
occupancy_grid: OccupancyGrid,
rays: Rays,
scene_aabb: torch.Tensor,
# rendering options
near_plane: Optional[float] = None,
far_plane: Optional[float] = None,
render_step_size: float = 1e-3,
render_bkgd: Optional[torch.Tensor] = None,
cone_angle: float = 0.0,
alpha_thre: float = 0.0,
# test options
test_chunk_size: int = 8192,
# only useful for dnerf
timestamps: Optional[torch.Tensor] = None,
):
"""Render the pixels of an image."""
rays_shape = rays.origins.shape
if len(rays_shape) == 3:
height, width, _ = rays_shape
num_rays = height * width
rays = namedtuple_map(
lambda r: r.reshape([num_rays] + list(r.shape[2:])), rays
)
else:
num_rays, _ = rays_shape
def sigma_fn(t_starts, t_ends, ray_indices):
t_origins = chunk_rays.origins[ray_indices]
t_dirs = chunk_rays.viewdirs[ray_indices]
positions = t_origins + t_dirs * (t_starts + t_ends) / 2.0
if timestamps is not None:
# dnerf
t = (
timestamps[ray_indices]
if radiance_field.training
else timestamps.expand_as(positions[:, :1])
)
return radiance_field.query_density(positions, t)
return radiance_field.query_density(positions)
def rgb_sigma_fn(t_starts, t_ends, ray_indices):
t_origins = chunk_rays.origins[ray_indices]
t_dirs = chunk_rays.viewdirs[ray_indices]
positions = t_origins + t_dirs * (t_starts + t_ends) / 2.0
if timestamps is not None:
# dnerf
t = (
timestamps[ray_indices]
if radiance_field.training
else timestamps.expand_as(positions[:, :1])
)
return radiance_field(positions, t, t_dirs)
return radiance_field(positions, t_dirs)
results = []
chunk = (
torch.iinfo(torch.int32).max
if radiance_field.training
else test_chunk_size
)
for i in range(0, num_rays, chunk):
chunk_rays = namedtuple_map(lambda r: r[i : i + chunk], rays)
ray_indices, t_starts, t_ends = ray_marching(
chunk_rays.origins,
chunk_rays.viewdirs,
scene_aabb=scene_aabb,
grid=occupancy_grid,
sigma_fn=sigma_fn,
near_plane=near_plane,
far_plane=far_plane,
render_step_size=render_step_size,
stratified=radiance_field.training,
cone_angle=cone_angle,
alpha_thre=alpha_thre,
)
rgb, opacity, depth = rendering(
t_starts,
t_ends,
ray_indices,
n_rays=chunk_rays.origins.shape[0],
rgb_sigma_fn=rgb_sigma_fn,
render_bkgd=render_bkgd,
)
chunk_results = [rgb, opacity, depth, len(t_starts)]
results.append(chunk_results)
colors, opacities, depths, n_rendering_samples = [
torch.cat(r, dim=0) if isinstance(r[0], torch.Tensor) else r
for r in zip(*results)
]
return (
colors.view((*rays_shape[:-1], -1)),
opacities.view((*rays_shape[:-1], -1)),
depths.view((*rays_shape[:-1], -1)),
sum(n_rendering_samples),
)
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/__init__.py
================================================
"""
Copyright (c) 2022 Ruilong Li, UC Berkeley.
"""
import warnings
from .cdf import ray_resampling
from .contraction import ContractionType, contract, contract_inv
from .grid import Grid, OccupancyGrid, query_grid
from .intersection import ray_aabb_intersect
from .losses import distortion as loss_distortion
from .pack import pack_data, pack_info, unpack_data, unpack_info
from .ray_marching import ray_marching
from .version import __version__
from .vol_rendering import (
accumulate_along_rays,
accumulate_along_rays_patch_based,
render_transmittance_from_alpha,
render_transmittance_from_density,
render_visibility,
render_visibility_patch_based,
render_weight_from_alpha,
render_weight_from_density,
render_weight_from_alpha_patch_based,
render_weight_and_transmittance_from_alpha_patch_based,
rendering,
)
# About to be deprecated
def unpack_to_ray_indices(*args, **kwargs):
warnings.warn(
"`unpack_to_ray_indices` will be deprecated. Please use `unpack_info` instead.",
DeprecationWarning,
stacklevel=2,
)
return unpack_info(*args, **kwargs)
__all__ = [
"__version__",
"Grid",
"OccupancyGrid",
"query_grid",
"ContractionType",
"contract",
"contract_inv",
"ray_aabb_intersect",
"ray_marching",
"accumulate_along_rays",
"accumulate_along_rays_patch_based",
"render_visibility",
"render_visibility_patch_based",
"render_weight_from_alpha",
"render_weight_from_alpha_patch_based",
"render_weight_from_density",
"rendering",
"pack_data",
"unpack_data",
"unpack_info",
"pack_info",
"ray_resampling",
"loss_distortion",
"unpack_to_ray_indices",
"render_transmittance_from_density",
"render_transmittance_from_alpha",
"render_weight_and_transmittance_from_alpha_patch_based"
]
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cdf.py
================================================
"""
Copyright (c) 2022 Ruilong Li, UC Berkeley.
"""
from typing import Tuple
from torch import Tensor
import nerfacc.cuda as _C
def ray_resampling(
packed_info: Tensor,
t_starts: Tensor,
t_ends: Tensor,
weights: Tensor,
n_samples: int,
) -> Tuple[Tensor, Tensor, Tensor]:
"""Resample a set of rays based on the CDF of the weights.
Args:
packed_info (Tensor): Stores information on which samples belong to the same ray. \
See :func:`nerfacc.ray_marching` for details. Tensor with shape (n_rays, 2).
t_starts: Where the frustum-shape sample starts along a ray. Tensor with \
shape (n_samples, 1).
t_ends: Where the frustum-shape sample ends along a ray. Tensor with \
shape (n_samples, 1).
weights: Volumetric rendering weights for those samples. Tensor with shape \
(n_samples,).
n_samples (int): Number of samples per ray to resample.
Returns:
Resampled packed info (n_rays, 2), t_starts (n_samples, 1), and t_ends (n_samples, 1).
"""
(
resampled_packed_info,
resampled_t_starts,
resampled_t_ends,
) = _C.ray_resampling(
packed_info.contiguous(),
t_starts.contiguous(),
t_ends.contiguous(),
weights.contiguous(),
n_samples,
)
return resampled_packed_info, resampled_t_starts, resampled_t_ends
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/contraction.py
================================================
"""
Copyright (c) 2022 Ruilong Li, UC Berkeley.
"""
from enum import Enum
import torch
import nerfacc.cuda as _C
class ContractionType(Enum):
"""Space contraction options.
This is an enum class that describes how a :class:`nerfacc.Grid` covers the 3D space.
It is also used by :func:`nerfacc.ray_marching` to determine how to perform ray marching
within the grid.
The options in this enum class are:
Attributes:
AABB: Linearly map the region of interest :math:`[x_0, x_1]` to a
unit cube in :math:`[0, 1]`.
.. math:: f(x) = \\frac{x - x_0}{x_1 - x_0}
UN_BOUNDED_TANH: Contract an unbounded space into a unit cube in :math:`[0, 1]`
using tanh. The region of interest :math:`[x_0, x_1]` is first
mapped into :math:`[-0.5, +0.5]` before applying tanh.
.. math:: f(x) = \\frac{1}{2}(tanh(\\frac{x - x_0}{x_1 - x_0} - \\frac{1}{2}) + 1)
UN_BOUNDED_SPHERE: Contract an unbounded space into a unit sphere. Used in
`Mip-Nerf 360: Unbounded Anti-Aliased Neural Radiance Fields`_.
.. math::
f(x) =
\\begin{cases}
z(x) & ||z(x)|| \\leq 1 \\\\
(2 - \\frac{1}{||z(x)||})(\\frac{z(x)}{||z(x)||}) & ||z(x)|| > 1
\\end{cases}
.. math::
z(x) = \\frac{x - x_0}{x_1 - x_0} * 2 - 1
.. _Mip-Nerf 360\: Unbounded Anti-Aliased Neural Radiance Fields:
https://arxiv.org/abs/2111.12077
"""
AABB = 0
UN_BOUNDED_TANH = 1
UN_BOUNDED_SPHERE = 2
def to_cpp_version(self):
"""Convert to the C++ version of the enum class.
Returns:
The C++ version of the enum class.
"""
return _C.ContractionTypeGetter(self.value)
@torch.no_grad()
def contract(
x: torch.Tensor,
roi: torch.Tensor,
type: ContractionType = ContractionType.AABB,
) -> torch.Tensor:
"""Contract the space into [0, 1]^3.
Args:
x (torch.Tensor): Un-contracted points.
roi (torch.Tensor): Region of interest.
type (ContractionType): Contraction type.
Returns:
torch.Tensor: Contracted points ([0, 1]^3).
"""
ctype = type.to_cpp_version()
return _C.contract(x.contiguous(), roi.contiguous(), ctype)
@torch.no_grad()
def contract_inv(
x: torch.Tensor,
roi: torch.Tensor,
type: ContractionType = ContractionType.AABB,
) -> torch.Tensor:
"""Recover the space from [0, 1]^3 by inverse contraction.
Args:
x (torch.Tensor): Contracted points ([0, 1]^3).
roi (torch.Tensor): Region of interest.
type (ContractionType): Contraction type.
Returns:
torch.Tensor: Un-contracted points.
"""
ctype = type.to_cpp_version()
return _C.contract_inv(x.contiguous(), roi.contiguous(), ctype)
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/__init__.py
================================================
"""
Copyright (c) 2022 Ruilong Li, UC Berkeley.
"""
from typing import Any, Callable
def _make_lazy_cuda_func(name: str) -> Callable:
def call_cuda(*args, **kwargs):
# pylint: disable=import-outside-toplevel
from ._backend import _C
return getattr(_C, name)(*args, **kwargs)
return call_cuda
ContractionTypeGetter = _make_lazy_cuda_func("ContractionType")
contract = _make_lazy_cuda_func("contract")
contract_inv = _make_lazy_cuda_func("contract_inv")
grid_query = _make_lazy_cuda_func("grid_query")
ray_aabb_intersect = _make_lazy_cuda_func("ray_aabb_intersect")
ray_marching = _make_lazy_cuda_func("ray_marching")
ray_resampling = _make_lazy_cuda_func("ray_resampling")
is_cub_available = _make_lazy_cuda_func("is_cub_available")
transmittance_from_sigma_forward_cub = _make_lazy_cuda_func(
"transmittance_from_sigma_forward_cub"
)
transmittance_from_sigma_backward_cub = _make_lazy_cuda_func(
"transmittance_from_sigma_backward_cub"
)
transmittance_from_alpha_forward_cub = _make_lazy_cuda_func(
"transmittance_from_alpha_forward_cub"
)
transmittance_from_alpha_backward_cub = _make_lazy_cuda_func(
"transmittance_from_alpha_backward_cub"
)
transmittance_from_sigma_forward_naive = _make_lazy_cuda_func(
"transmittance_from_sigma_forward_naive"
)
transmittance_from_sigma_backward_naive = _make_lazy_cuda_func(
"transmittance_from_sigma_backward_naive"
)
transmittance_from_alpha_forward_naive = _make_lazy_cuda_func(
"transmittance_from_alpha_forward_naive"
)
transmittance_from_alpha_backward_naive = _make_lazy_cuda_func(
"transmittance_from_alpha_backward_naive"
)
transmittance_from_alpha_patch_based_forward_naive = _make_lazy_cuda_func(
"transmittance_from_alpha_patch_based_forward_naive"
)
transmittance_from_alpha_patch_based_backward_naive = _make_lazy_cuda_func(
"transmittance_from_alpha_patch_based_backward_naive"
)
weight_from_sigma_forward_naive = _make_lazy_cuda_func(
"weight_from_sigma_forward_naive"
)
weight_from_sigma_backward_naive = _make_lazy_cuda_func(
"weight_from_sigma_backward_naive"
)
weight_from_alpha_forward_naive = _make_lazy_cuda_func(
"weight_from_alpha_forward_naive"
)
weight_from_alpha_backward_naive = _make_lazy_cuda_func(
"weight_from_alpha_backward_naive"
)
# weight_from_alpha_importance_sampling_forward_naive = _make_lazy_cuda_func(
# "weight_from_alpha_importance_sampling_forward_naive"
# )
#
# weight_from_alpha_importance_sampling_backward_naive = _make_lazy_cuda_func(
# "weight_from_alpha_importance_sampling_backward_naive"
# )
weight_from_alpha_patch_based_forward_naive = _make_lazy_cuda_func(
"weight_from_alpha_patch_based_forward_naive"
)
weight_from_alpha_patch_based_backward_naive = _make_lazy_cuda_func(
"weight_from_alpha_patch_based_backward_naive"
)
weight_and_transmittance_from_alpha_patch_based_forward_naive = _make_lazy_cuda_func(
"weight_and_transmittance_from_alpha_patch_based_forward_naive"
)
weight_and_transmittance_from_alpha_patch_based_backward_naive = _make_lazy_cuda_func(
"weight_and_transmittance_from_alpha_patch_based_backward_naive"
)
unpack_data = _make_lazy_cuda_func("unpack_data")
unpack_info = _make_lazy_cuda_func("unpack_info")
unpack_info_to_mask = _make_lazy_cuda_func("unpack_info_to_mask")
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/_backend.py
================================================
"""
Copyright (c) 2022 Ruilong Li, UC Berkeley.
"""
import glob
import json
import os
import shutil
from subprocess import DEVNULL, call
from rich.console import Console
from torch.utils.cpp_extension import _get_build_directory, load
PATH = os.path.dirname(os.path.abspath(__file__))
def cuda_toolkit_available():
"""Check if the nvcc is avaiable on the machine."""
try:
call(["nvcc"], stdout=DEVNULL, stderr=DEVNULL)
return True
except FileNotFoundError:
return False
def cuda_toolkit_version():
"""Get the cuda toolkit version."""
cuda_home = os.path.join(os.path.dirname(shutil.which("nvcc")), "..")
if os.path.exists(os.path.join(cuda_home, "version.txt")):
with open(os.path.join(cuda_home, "version.txt")) as f:
cuda_version = f.read().strip().split()[-1]
elif os.path.exists(os.path.join(cuda_home, "version.json")):
with open(os.path.join(cuda_home, "version.json")) as f:
cuda_version = json.load(f)["cuda"]["version"]
else:
raise RuntimeError("Cannot find the cuda version.")
return cuda_version
name = "nerfacc_cuda"
build_dir = _get_build_directory(name, verbose=False)
extra_include_paths = []
extra_cflags = ["-O3"]
extra_cuda_cflags = ["-O3"]
_C = None
try:
# try to import the compiled module (via setup.py)
from nerfacc import csrc as _C
except ImportError:
# if failed, try with JIT compilation
if cuda_toolkit_available():
if os.listdir(build_dir) != []:
# If the build exists, we assume the extension has been built
# and we can load it.
_C = load(
name=name,
sources=glob.glob(os.path.join(PATH, "csrc/*.cu")),
extra_cflags=extra_cflags,
extra_cuda_cflags=extra_cuda_cflags,
extra_include_paths=extra_include_paths,
)
else:
# Build from scratch. Remove the build directory just to be safe: pytorch jit might stuck
# if the build directory exists.
shutil.rmtree(build_dir)
with Console().status(
"[bold yellow]NerfAcc: Setting up CUDA (This may take a few minutes the first time)",
spinner="bouncingBall",
):
_C = load(
name=name,
sources=glob.glob(os.path.join(PATH, "csrc/*.cu")),
extra_cflags=extra_cflags,
extra_cuda_cflags=extra_cuda_cflags,
extra_include_paths=extra_include_paths,
)
else:
Console().print(
"[yellow]NerfAcc: No CUDA toolkit found. NerfAcc will be disabled.[/yellow]"
)
__all__ = ["_C"]
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/cdf.cu
================================================
/*
* Copyright (c) 2022 Ruilong Li, UC Berkeley.
*/
#include "include/helpers_cuda.h"
template
__global__ void cdf_resampling_kernel(
const uint32_t n_rays,
const int *packed_info, // input ray & point indices.
const scalar_t *starts, // input start t
const scalar_t *ends, // input end t
const scalar_t *weights, // transmittance weights
const int *resample_packed_info,
scalar_t *resample_starts,
scalar_t *resample_ends)
{
CUDA_GET_THREAD_ID(i, n_rays);
// locate
const int base = packed_info[i * 2 + 0]; // point idx start.
const int steps = packed_info[i * 2 + 1]; // point idx shift.
const int resample_base = resample_packed_info[i * 2 + 0];
const int resample_steps = resample_packed_info[i * 2 + 1];
if (steps == 0)
return;
starts += base;
ends += base;
weights += base;
resample_starts += resample_base;
resample_ends += resample_base;
// normalize weights **per ray**
scalar_t weights_sum = 0.0f;
for (int j = 0; j < steps; j++)
weights_sum += weights[j];
scalar_t padding = fmaxf(1e-5f - weights_sum, 0.0f);
scalar_t padding_step = padding / steps;
weights_sum += padding;
int num_bins = resample_steps + 1;
scalar_t cdf_step_size = (1.0f - 1.0 / num_bins) / resample_steps;
int idx = 0, j = 0;
scalar_t cdf_prev = 0.0f, cdf_next = (weights[idx] + padding_step) / weights_sum;
scalar_t cdf_u = 1.0 / (2 * num_bins);
while (j < num_bins)
{
if (cdf_u < cdf_next)
{
// printf("cdf_u: %f, cdf_next: %f\n", cdf_u, cdf_next);
// resample in this interval
scalar_t scaling = (ends[idx] - starts[idx]) / (cdf_next - cdf_prev);
scalar_t t = (cdf_u - cdf_prev) * scaling + starts[idx];
if (j < num_bins - 1)
resample_starts[j] = t;
if (j > 0)
resample_ends[j - 1] = t;
// going further to next resample
cdf_u += cdf_step_size;
j += 1;
}
else
{
// going to next interval
idx += 1;
cdf_prev = cdf_next;
cdf_next += (weights[idx] + padding_step) / weights_sum;
}
}
if (j != num_bins)
{
printf("Error: %d %d %f\n", j, num_bins, weights_sum);
}
return;
}
// template
// __global__ void cdf_resampling_kernel(
// const uint32_t n_rays,
// const int *packed_info, // input ray & point indices.
// const scalar_t *starts, // input start t
// const scalar_t *ends, // input end t
// const scalar_t *weights, // transmittance weights
// const int *resample_packed_info,
// scalar_t *resample_starts,
// scalar_t *resample_ends)
// {
// CUDA_GET_THREAD_ID(i, n_rays);
// // locate
// const int base = packed_info[i * 2 + 0]; // point idx start.
// const int steps = packed_info[i * 2 + 1]; // point idx shift.
// const int resample_base = resample_packed_info[i * 2 + 0];
// const int resample_steps = resample_packed_info[i * 2 + 1];
// if (steps == 0)
// return;
// starts += base;
// ends += base;
// weights += base;
// resample_starts += resample_base;
// resample_ends += resample_base;
// scalar_t cdf_step_size = 1.0f / resample_steps;
// // normalize weights **per ray**
// scalar_t weights_sum = 0.0f;
// for (int j = 0; j < steps; j++)
// weights_sum += weights[j];
// scalar_t padding = fmaxf(1e-5f - weights_sum, 0.0f);
// scalar_t padding_step = padding / steps;
// weights_sum += padding;
// int idx = 0, j = 0;
// scalar_t cdf_prev = 0.0f, cdf_next = (weights[idx] + padding_step) / weights_sum;
// scalar_t cdf_u = 0.5f * cdf_step_size;
// while (cdf_u < 1.0f)
// {
// if (cdf_u < cdf_next)
// {
// // resample in this interval
// scalar_t scaling = (ends[idx] - starts[idx]) / (cdf_next - cdf_prev);
// scalar_t resample_mid = (cdf_u - cdf_prev) * scaling + starts[idx];
// scalar_t resample_half_size = cdf_step_size * scaling * 0.5;
// resample_starts[j] = fmaxf(resample_mid - resample_half_size, starts[idx]);
// resample_ends[j] = fminf(resample_mid + resample_half_size, ends[idx]);
// // going further to next resample
// cdf_u += cdf_step_size;
// j += 1;
// }
// else
// {
// // go to next interval
// idx += 1;
// if (idx == steps)
// break;
// cdf_prev = cdf_next;
// cdf_next += (weights[idx] + padding_step) / weights_sum;
// }
// }
// if (j != resample_steps)
// {
// printf("Error: %d %d %f\n", j, resample_steps, weights_sum);
// }
// return;
// }
std::vector ray_resampling(
torch::Tensor packed_info,
torch::Tensor starts,
torch::Tensor ends,
torch::Tensor weights,
const int steps)
{
DEVICE_GUARD(packed_info);
CHECK_INPUT(packed_info);
CHECK_INPUT(starts);
CHECK_INPUT(ends);
CHECK_INPUT(weights);
TORCH_CHECK(packed_info.ndimension() == 2 & packed_info.size(1) == 2);
TORCH_CHECK(starts.ndimension() == 2 & starts.size(1) == 1);
TORCH_CHECK(ends.ndimension() == 2 & ends.size(1) == 1);
TORCH_CHECK(weights.ndimension() == 1);
const uint32_t n_rays = packed_info.size(0);
const uint32_t n_samples = weights.size(0);
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
torch::Tensor num_steps = torch::split(packed_info, 1, 1)[1];
torch::Tensor resample_num_steps = (num_steps > 0).to(num_steps.options()) * steps;
torch::Tensor resample_cum_steps = resample_num_steps.cumsum(0, torch::kInt32);
torch::Tensor resample_packed_info = torch::cat(
{resample_cum_steps - resample_num_steps, resample_num_steps}, 1);
int total_steps = resample_cum_steps[resample_cum_steps.size(0) - 1].item();
torch::Tensor resample_starts = torch::zeros({total_steps, 1}, starts.options());
torch::Tensor resample_ends = torch::zeros({total_steps, 1}, ends.options());
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
weights.scalar_type(),
"ray_resampling",
([&]
{ cdf_resampling_kernel<<>>(
n_rays,
// inputs
packed_info.data_ptr(),
starts.data_ptr(),
ends.data_ptr(),
weights.data_ptr(),
resample_packed_info.data_ptr(),
// outputs
resample_starts.data_ptr(),
resample_ends.data_ptr()); }));
return {resample_packed_info, resample_starts, resample_ends};
}
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/contraction.cu
================================================
/*
* Copyright (c) 2022 Ruilong Li, UC Berkeley.
*/
#include "include/helpers_cuda.h"
#include "include/helpers_math.h"
#include "include/helpers_contraction.h"
__global__ void contract_kernel(
// samples info
const uint32_t n_samples,
const float *samples, // (n_samples, 3)
// contraction
const float *roi,
const ContractionType type,
// outputs
float *out_samples)
{
CUDA_GET_THREAD_ID(i, n_samples);
// locate
samples += i * 3;
out_samples += i * 3;
const float3 roi_min = make_float3(roi[0], roi[1], roi[2]);
const float3 roi_max = make_float3(roi[3], roi[4], roi[5]);
const float3 xyz = make_float3(samples[0], samples[1], samples[2]);
float3 xyz_unit = apply_contraction(xyz, roi_min, roi_max, type);
out_samples[0] = xyz_unit.x;
out_samples[1] = xyz_unit.y;
out_samples[2] = xyz_unit.z;
return;
}
__global__ void contract_inv_kernel(
// samples info
const uint32_t n_samples,
const float *samples, // (n_samples, 3)
// contraction
const float *roi,
const ContractionType type,
// outputs
float *out_samples)
{
CUDA_GET_THREAD_ID(i, n_samples);
// locate
samples += i * 3;
out_samples += i * 3;
const float3 roi_min = make_float3(roi[0], roi[1], roi[2]);
const float3 roi_max = make_float3(roi[3], roi[4], roi[5]);
const float3 xyz_unit = make_float3(samples[0], samples[1], samples[2]);
float3 xyz = apply_contraction_inv(xyz_unit, roi_min, roi_max, type);
out_samples[0] = xyz.x;
out_samples[1] = xyz.y;
out_samples[2] = xyz.z;
return;
}
torch::Tensor contract(
const torch::Tensor samples,
// contraction
const torch::Tensor roi,
const ContractionType type)
{
DEVICE_GUARD(samples);
CHECK_INPUT(samples);
const int n_samples = samples.size(0);
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_samples, threads);
torch::Tensor out_samples = torch::empty({n_samples, 3}, samples.options());
contract_kernel<<>>(
n_samples,
samples.data_ptr(),
// contraction
roi.data_ptr(),
type,
// outputs
out_samples.data_ptr());
return out_samples;
}
torch::Tensor contract_inv(
const torch::Tensor samples,
// contraction
const torch::Tensor roi,
const ContractionType type)
{
DEVICE_GUARD(samples);
CHECK_INPUT(samples);
const int n_samples = samples.size(0);
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_samples, threads);
torch::Tensor out_samples = torch::empty({n_samples, 3}, samples.options());
contract_inv_kernel<<>>(
n_samples,
samples.data_ptr(),
// contraction
roi.data_ptr(),
type,
// outputs
out_samples.data_ptr());
return out_samples;
}
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/include/helpers_contraction.h
================================================
/*
* Copyright (c) 2022 Ruilong Li, UC Berkeley.
*/
#pragma once
#include "helpers_math.h"
enum ContractionType
{
AABB = 0,
UN_BOUNDED_TANH = 1,
UN_BOUNDED_SPHERE = 2,
};
inline __device__ __host__ float3 roi_to_unit(
const float3 xyz, const float3 roi_min, const float3 roi_max)
{
// roi -> [0, 1]^3
return (xyz - roi_min) / (roi_max - roi_min);
}
inline __device__ __host__ float3 unit_to_roi(
const float3 xyz, const float3 roi_min, const float3 roi_max)
{
// [0, 1]^3 -> roi
return xyz * (roi_max - roi_min) + roi_min;
}
inline __device__ __host__ float3 inf_to_unit_tanh(
const float3 xyz, float3 roi_min, const float3 roi_max)
{
/**
[-inf, inf]^3 -> [0, 1]^3
roi -> cube of [0.25, 0.75]^3
**/
float3 xyz_unit = roi_to_unit(xyz, roi_min, roi_max); // roi -> [0, 1]^3
xyz_unit = xyz_unit - 0.5f; // roi -> [-0.5, 0.5]^3
return make_float3(tanhf(xyz_unit.x), tanhf(xyz_unit.y), tanhf(xyz_unit.z)) * 0.5f + 0.5f;
}
inline __device__ __host__ float3 unit_to_inf_tanh(
const float3 xyz, float3 roi_min, const float3 roi_max)
{
/**
[0, 1]^3 -> [-inf, inf]^3
cube of [0.25, 0.75]^3 -> roi
**/
float3 xyz_unit = clamp(
make_float3(
atanhf(xyz.x * 2.0f - 1.0f),
atanhf(xyz.y * 2.0f - 1.0f),
atanhf(xyz.z * 2.0f - 1.0f)),
-1e10f,
1e10f);
xyz_unit = xyz_unit + 0.5f;
xyz_unit = unit_to_roi(xyz_unit, roi_min, roi_max);
return xyz_unit;
}
inline __device__ __host__ float3 inf_to_unit_sphere(
const float3 xyz, const float3 roi_min, const float3 roi_max)
{
/** From MipNeRF360
[-inf, inf]^3 -> sphere of [0, 1]^3;
roi -> sphere of [0.25, 0.75]^3
**/
float3 xyz_unit = roi_to_unit(xyz, roi_min, roi_max); // roi -> [0, 1]^3
xyz_unit = xyz_unit * 2.0f - 1.0f; // roi -> [-1, 1]^3
float norm_sq = dot(xyz_unit, xyz_unit);
float norm = sqrt(norm_sq);
if (norm > 1.0f)
{
xyz_unit = (2.0f - 1.0f / norm) * (xyz_unit / norm);
}
xyz_unit = xyz_unit * 0.25f + 0.5f; // [-1, 1]^3 -> [0.25, 0.75]^3
return xyz_unit;
}
inline __device__ __host__ float3 unit_sphere_to_inf(
const float3 xyz, const float3 roi_min, const float3 roi_max)
{
/** From MipNeRF360
sphere of [0, 1]^3 -> [-inf, inf]^3;
sphere of [0.25, 0.75]^3 -> roi
**/
float3 xyz_unit = (xyz - 0.5f) * 4.0f; // [0.25, 0.75]^3 -> [-1, 1]^3
float norm_sq = dot(xyz_unit, xyz_unit);
float norm = sqrt(norm_sq);
if (norm > 1.0f)
{
xyz_unit = xyz_unit / fmaxf((2.0f * norm - 1.0f * norm_sq), 1e-10f);
}
xyz_unit = xyz_unit * 0.5f + 0.5f; // [-1, 1]^3 -> [0, 1]^3
xyz_unit = unit_to_roi(xyz_unit, roi_min, roi_max); // [0, 1]^3 -> roi
return xyz_unit;
}
inline __device__ __host__ float3 apply_contraction(
const float3 xyz, const float3 roi_min, const float3 roi_max,
const ContractionType type)
{
switch (type)
{
case AABB:
return roi_to_unit(xyz, roi_min, roi_max);
case UN_BOUNDED_TANH:
return inf_to_unit_tanh(xyz, roi_min, roi_max);
case UN_BOUNDED_SPHERE:
return inf_to_unit_sphere(xyz, roi_min, roi_max);
}
}
inline __device__ __host__ float3 apply_contraction_inv(
const float3 xyz, const float3 roi_min, const float3 roi_max,
const ContractionType type)
{
switch (type)
{
case AABB:
return unit_to_roi(xyz, roi_min, roi_max);
case UN_BOUNDED_TANH:
return unit_to_inf_tanh(xyz, roi_min, roi_max);
case UN_BOUNDED_SPHERE:
return unit_sphere_to_inf(xyz, roi_min, roi_max);
}
}
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/include/helpers_cuda.h
================================================
/*
* Copyright (c) 2022 Ruilong Li, UC Berkeley.
*/
#pragma once
#include
#include
#include
#include
// #include
// cub support for scan by key is added to cub 1.15
// in https://github.com/NVIDIA/cub/pull/376
#if CUB_VERSION >= 101500
#define CUB_SUPPORTS_SCAN_BY_KEY() 1
#else
#define CUB_SUPPORTS_SCAN_BY_KEY() 0
#endif
#define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x " must be a CUDA tensor")
#define CHECK_CONTIGUOUS(x) \
TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
#define CHECK_INPUT(x) \
CHECK_CUDA(x); \
CHECK_CONTIGUOUS(x)
#define CUDA_GET_THREAD_ID(tid, Q) \
const int tid = blockIdx.x * blockDim.x + threadIdx.x; \
if (tid >= Q) \
return
#define CUDA_GET_THREAD_ID_2D(tidx, tidy, P, Q) \
const int tidx = blockIdx.x * blockDim.x + threadIdx.x; \
const int tidy = blockIdx.y * blockDim.y + threadIdx.y; \
if (tidx >= P || tidy >= Q) \
return
#define CUDA_N_BLOCKS_NEEDED(Q, CUDA_N_THREADS) ((Q - 1) / CUDA_N_THREADS + 1)
#define DEVICE_GUARD(_ten) \
const at::cuda::OptionalCUDAGuard device_guard(device_of(_ten));
// https://github.com/pytorch/pytorch/blob/233305a852e1cd7f319b15b5137074c9eac455f6/aten/src/ATen/cuda/cub.cuh#L38-L46
#define CUB_WRAPPER(func, ...) do { \
size_t temp_storage_bytes = 0; \
func(nullptr, temp_storage_bytes, __VA_ARGS__); \
auto& caching_allocator = *::c10::cuda::CUDACachingAllocator::get(); \
auto temp_storage = caching_allocator.allocate(temp_storage_bytes); \
func(temp_storage.get(), temp_storage_bytes, __VA_ARGS__); \
AT_CUDA_CHECK(cudaGetLastError()); \
} while (false)
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/include/helpers_math.h
================================================
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
* Modified by Ruilong Li, 2022
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* This file implements common mathematical operations on vector types
* (float3, float4 etc.) since these are not provided as standard by CUDA.
*
* The syntax is modeled on the Cg standard library.
*
* This is part of the Helper library includes
*
* Thanks to Linh Hah for additions and fixes.
*/
#ifndef HELPER_MATH_H
#define HELPER_MATH_H
#include "cuda_runtime.h"
typedef unsigned int uint;
typedef unsigned short ushort;
#ifndef EXIT_WAIVED
#define EXIT_WAIVED 2
#endif
#ifndef __CUDACC__
#include
////////////////////////////////////////////////////////////////////////////////
// host implementations of CUDA functions
////////////////////////////////////////////////////////////////////////////////
inline float fminf(float a, float b)
{
return a < b ? a : b;
}
inline float fmaxf(float a, float b)
{
return a > b ? a : b;
}
inline int max(int a, int b)
{
return a > b ? a : b;
}
inline int min(int a, int b)
{
return a < b ? a : b;
}
inline float rsqrtf(float x)
{
return 1.0f / sqrtf(x);
}
#endif
////////////////////////////////////////////////////////////////////////////////
// constructors
////////////////////////////////////////////////////////////////////////////////
inline __host__ __device__ float2 make_float2(float s)
{
return make_float2(s, s);
}
inline __host__ __device__ float2 make_float2(float3 a)
{
return make_float2(a.x, a.y);
}
inline __host__ __device__ float2 make_float2(int2 a)
{
return make_float2(float(a.x), float(a.y));
}
inline __host__ __device__ float2 make_float2(uint2 a)
{
return make_float2(float(a.x), float(a.y));
}
inline __host__ __device__ int2 make_int2(int s)
{
return make_int2(s, s);
}
inline __host__ __device__ int2 make_int2(int3 a)
{
return make_int2(a.x, a.y);
}
inline __host__ __device__ int2 make_int2(uint2 a)
{
return make_int2(int(a.x), int(a.y));
}
inline __host__ __device__ int2 make_int2(float2 a)
{
return make_int2(int(a.x), int(a.y));
}
inline __host__ __device__ uint2 make_uint2(uint s)
{
return make_uint2(s, s);
}
inline __host__ __device__ uint2 make_uint2(uint3 a)
{
return make_uint2(a.x, a.y);
}
inline __host__ __device__ uint2 make_uint2(int2 a)
{
return make_uint2(uint(a.x), uint(a.y));
}
inline __host__ __device__ float3 make_float3(float s)
{
return make_float3(s, s, s);
}
inline __host__ __device__ float3 make_float3(float2 a)
{
return make_float3(a.x, a.y, 0.0f);
}
inline __host__ __device__ float3 make_float3(float2 a, float s)
{
return make_float3(a.x, a.y, s);
}
inline __host__ __device__ float3 make_float3(float4 a)
{
return make_float3(a.x, a.y, a.z);
}
inline __host__ __device__ float3 make_float3(int3 a)
{
return make_float3(float(a.x), float(a.y), float(a.z));
}
inline __host__ __device__ float3 make_float3(uint3 a)
{
return make_float3(float(a.x), float(a.y), float(a.z));
}
inline __host__ __device__ int3 make_int3(int s)
{
return make_int3(s, s, s);
}
inline __host__ __device__ int3 make_int3(int2 a)
{
return make_int3(a.x, a.y, 0);
}
inline __host__ __device__ int3 make_int3(int2 a, int s)
{
return make_int3(a.x, a.y, s);
}
inline __host__ __device__ int3 make_int3(uint3 a)
{
return make_int3(int(a.x), int(a.y), int(a.z));
}
inline __host__ __device__ int3 make_int3(float3 a)
{
return make_int3(int(a.x), int(a.y), int(a.z));
}
inline __host__ __device__ uint3 make_uint3(uint s)
{
return make_uint3(s, s, s);
}
inline __host__ __device__ uint3 make_uint3(uint2 a)
{
return make_uint3(a.x, a.y, 0);
}
inline __host__ __device__ uint3 make_uint3(uint2 a, uint s)
{
return make_uint3(a.x, a.y, s);
}
inline __host__ __device__ uint3 make_uint3(uint4 a)
{
return make_uint3(a.x, a.y, a.z);
}
inline __host__ __device__ uint3 make_uint3(int3 a)
{
return make_uint3(uint(a.x), uint(a.y), uint(a.z));
}
inline __host__ __device__ float4 make_float4(float s)
{
return make_float4(s, s, s, s);
}
inline __host__ __device__ float4 make_float4(float3 a)
{
return make_float4(a.x, a.y, a.z, 0.0f);
}
inline __host__ __device__ float4 make_float4(float3 a, float w)
{
return make_float4(a.x, a.y, a.z, w);
}
inline __host__ __device__ float4 make_float4(int4 a)
{
return make_float4(float(a.x), float(a.y), float(a.z), float(a.w));
}
inline __host__ __device__ float4 make_float4(uint4 a)
{
return make_float4(float(a.x), float(a.y), float(a.z), float(a.w));
}
inline __host__ __device__ int4 make_int4(int s)
{
return make_int4(s, s, s, s);
}
inline __host__ __device__ int4 make_int4(int3 a)
{
return make_int4(a.x, a.y, a.z, 0);
}
inline __host__ __device__ int4 make_int4(int3 a, int w)
{
return make_int4(a.x, a.y, a.z, w);
}
inline __host__ __device__ int4 make_int4(uint4 a)
{
return make_int4(int(a.x), int(a.y), int(a.z), int(a.w));
}
inline __host__ __device__ int4 make_int4(float4 a)
{
return make_int4(int(a.x), int(a.y), int(a.z), int(a.w));
}
inline __host__ __device__ uint4 make_uint4(uint s)
{
return make_uint4(s, s, s, s);
}
inline __host__ __device__ uint4 make_uint4(uint3 a)
{
return make_uint4(a.x, a.y, a.z, 0);
}
inline __host__ __device__ uint4 make_uint4(uint3 a, uint w)
{
return make_uint4(a.x, a.y, a.z, w);
}
inline __host__ __device__ uint4 make_uint4(int4 a)
{
return make_uint4(uint(a.x), uint(a.y), uint(a.z), uint(a.w));
}
////////////////////////////////////////////////////////////////////////////////
// negate
////////////////////////////////////////////////////////////////////////////////
inline __host__ __device__ float2 operator-(float2 &a)
{
return make_float2(-a.x, -a.y);
}
inline __host__ __device__ int2 operator-(int2 &a)
{
return make_int2(-a.x, -a.y);
}
inline __host__ __device__ float3 operator-(float3 &a)
{
return make_float3(-a.x, -a.y, -a.z);
}
inline __host__ __device__ int3 operator-(int3 &a)
{
return make_int3(-a.x, -a.y, -a.z);
}
inline __host__ __device__ float4 operator-(float4 &a)
{
return make_float4(-a.x, -a.y, -a.z, -a.w);
}
inline __host__ __device__ int4 operator-(int4 &a)
{
return make_int4(-a.x, -a.y, -a.z, -a.w);
}
////////////////////////////////////////////////////////////////////////////////
// addition
////////////////////////////////////////////////////////////////////////////////
inline __host__ __device__ float2 operator+(float2 a, float2 b)
{
return make_float2(a.x + b.x, a.y + b.y);
}
inline __host__ __device__ void operator+=(float2 &a, float2 b)
{
a.x += b.x;
a.y += b.y;
}
inline __host__ __device__ float2 operator+(float2 a, float b)
{
return make_float2(a.x + b, a.y + b);
}
inline __host__ __device__ float2 operator+(float b, float2 a)
{
return make_float2(a.x + b, a.y + b);
}
inline __host__ __device__ void operator+=(float2 &a, float b)
{
a.x += b;
a.y += b;
}
inline __host__ __device__ int2 operator+(int2 a, int2 b)
{
return make_int2(a.x + b.x, a.y + b.y);
}
inline __host__ __device__ void operator+=(int2 &a, int2 b)
{
a.x += b.x;
a.y += b.y;
}
inline __host__ __device__ int2 operator+(int2 a, int b)
{
return make_int2(a.x + b, a.y + b);
}
inline __host__ __device__ int2 operator+(int b, int2 a)
{
return make_int2(a.x + b, a.y + b);
}
inline __host__ __device__ void operator+=(int2 &a, int b)
{
a.x += b;
a.y += b;
}
inline __host__ __device__ uint2 operator+(uint2 a, uint2 b)
{
return make_uint2(a.x + b.x, a.y + b.y);
}
inline __host__ __device__ void operator+=(uint2 &a, uint2 b)
{
a.x += b.x;
a.y += b.y;
}
inline __host__ __device__ uint2 operator+(uint2 a, uint b)
{
return make_uint2(a.x + b, a.y + b);
}
inline __host__ __device__ uint2 operator+(uint b, uint2 a)
{
return make_uint2(a.x + b, a.y + b);
}
inline __host__ __device__ void operator+=(uint2 &a, uint b)
{
a.x += b;
a.y += b;
}
inline __host__ __device__ float3 operator+(float3 a, float3 b)
{
return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
}
inline __host__ __device__ void operator+=(float3 &a, float3 b)
{
a.x += b.x;
a.y += b.y;
a.z += b.z;
}
inline __host__ __device__ float3 operator+(float3 a, float b)
{
return make_float3(a.x + b, a.y + b, a.z + b);
}
inline __host__ __device__ void operator+=(float3 &a, float b)
{
a.x += b;
a.y += b;
a.z += b;
}
inline __host__ __device__ int3 operator+(int3 a, int3 b)
{
return make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
}
inline __host__ __device__ void operator+=(int3 &a, int3 b)
{
a.x += b.x;
a.y += b.y;
a.z += b.z;
}
inline __host__ __device__ int3 operator+(int3 a, int b)
{
return make_int3(a.x + b, a.y + b, a.z + b);
}
inline __host__ __device__ void operator+=(int3 &a, int b)
{
a.x += b;
a.y += b;
a.z += b;
}
inline __host__ __device__ uint3 operator+(uint3 a, uint3 b)
{
return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z);
}
inline __host__ __device__ void operator+=(uint3 &a, uint3 b)
{
a.x += b.x;
a.y += b.y;
a.z += b.z;
}
inline __host__ __device__ uint3 operator+(uint3 a, uint b)
{
return make_uint3(a.x + b, a.y + b, a.z + b);
}
inline __host__ __device__ void operator+=(uint3 &a, uint b)
{
a.x += b;
a.y += b;
a.z += b;
}
inline __host__ __device__ int3 operator+(int b, int3 a)
{
return make_int3(a.x + b, a.y + b, a.z + b);
}
inline __host__ __device__ uint3 operator+(uint b, uint3 a)
{
return make_uint3(a.x + b, a.y + b, a.z + b);
}
inline __host__ __device__ float3 operator+(float b, float3 a)
{
return make_float3(a.x + b, a.y + b, a.z + b);
}
inline __host__ __device__ float4 operator+(float4 a, float4 b)
{
return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
}
inline __host__ __device__ void operator+=(float4 &a, float4 b)
{
a.x += b.x;
a.y += b.y;
a.z += b.z;
a.w += b.w;
}
inline __host__ __device__ float4 operator+(float4 a, float b)
{
return make_float4(a.x + b, a.y + b, a.z + b, a.w + b);
}
inline __host__ __device__ float4 operator+(float b, float4 a)
{
return make_float4(a.x + b, a.y + b, a.z + b, a.w + b);
}
inline __host__ __device__ void operator+=(float4 &a, float b)
{
a.x += b;
a.y += b;
a.z += b;
a.w += b;
}
inline __host__ __device__ int4 operator+(int4 a, int4 b)
{
return make_int4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
}
inline __host__ __device__ void operator+=(int4 &a, int4 b)
{
a.x += b.x;
a.y += b.y;
a.z += b.z;
a.w += b.w;
}
inline __host__ __device__ int4 operator+(int4 a, int b)
{
return make_int4(a.x + b, a.y + b, a.z + b, a.w + b);
}
inline __host__ __device__ int4 operator+(int b, int4 a)
{
return make_int4(a.x + b, a.y + b, a.z + b, a.w + b);
}
inline __host__ __device__ void operator+=(int4 &a, int b)
{
a.x += b;
a.y += b;
a.z += b;
a.w += b;
}
inline __host__ __device__ uint4 operator+(uint4 a, uint4 b)
{
return make_uint4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
}
inline __host__ __device__ void operator+=(uint4 &a, uint4 b)
{
a.x += b.x;
a.y += b.y;
a.z += b.z;
a.w += b.w;
}
inline __host__ __device__ uint4 operator+(uint4 a, uint b)
{
return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b);
}
inline __host__ __device__ uint4 operator+(uint b, uint4 a)
{
return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b);
}
inline __host__ __device__ void operator+=(uint4 &a, uint b)
{
a.x += b;
a.y += b;
a.z += b;
a.w += b;
}
////////////////////////////////////////////////////////////////////////////////
// subtract
////////////////////////////////////////////////////////////////////////////////
inline __host__ __device__ float2 operator-(float2 a, float2 b)
{
return make_float2(a.x - b.x, a.y - b.y);
}
inline __host__ __device__ void operator-=(float2 &a, float2 b)
{
a.x -= b.x;
a.y -= b.y;
}
inline __host__ __device__ float2 operator-(float2 a, float b)
{
return make_float2(a.x - b, a.y - b);
}
inline __host__ __device__ float2 operator-(float b, float2 a)
{
return make_float2(b - a.x, b - a.y);
}
inline __host__ __device__ void operator-=(float2 &a, float b)
{
a.x -= b;
a.y -= b;
}
inline __host__ __device__ int2 operator-(int2 a, int2 b)
{
return make_int2(a.x - b.x, a.y - b.y);
}
inline __host__ __device__ void operator-=(int2 &a, int2 b)
{
a.x -= b.x;
a.y -= b.y;
}
inline __host__ __device__ int2 operator-(int2 a, int b)
{
return make_int2(a.x - b, a.y - b);
}
inline __host__ __device__ int2 operator-(int b, int2 a)
{
return make_int2(b - a.x, b - a.y);
}
inline __host__ __device__ void operator-=(int2 &a, int b)
{
a.x -= b;
a.y -= b;
}
inline __host__ __device__ uint2 operator-(uint2 a, uint2 b)
{
return make_uint2(a.x - b.x, a.y - b.y);
}
inline __host__ __device__ void operator-=(uint2 &a, uint2 b)
{
a.x -= b.x;
a.y -= b.y;
}
inline __host__ __device__ uint2 operator-(uint2 a, uint b)
{
return make_uint2(a.x - b, a.y - b);
}
inline __host__ __device__ uint2 operator-(uint b, uint2 a)
{
return make_uint2(b - a.x, b - a.y);
}
inline __host__ __device__ void operator-=(uint2 &a, uint b)
{
a.x -= b;
a.y -= b;
}
inline __host__ __device__ float3 operator-(float3 a, float3 b)
{
return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
}
inline __host__ __device__ void operator-=(float3 &a, float3 b)
{
a.x -= b.x;
a.y -= b.y;
a.z -= b.z;
}
inline __host__ __device__ float3 operator-(float3 a, float b)
{
return make_float3(a.x - b, a.y - b, a.z - b);
}
inline __host__ __device__ float3 operator-(float b, float3 a)
{
return make_float3(b - a.x, b - a.y, b - a.z);
}
inline __host__ __device__ void operator-=(float3 &a, float b)
{
a.x -= b;
a.y -= b;
a.z -= b;
}
inline __host__ __device__ int3 operator-(int3 a, int3 b)
{
return make_int3(a.x - b.x, a.y - b.y, a.z - b.z);
}
inline __host__ __device__ void operator-=(int3 &a, int3 b)
{
a.x -= b.x;
a.y -= b.y;
a.z -= b.z;
}
inline __host__ __device__ int3 operator-(int3 a, int b)
{
return make_int3(a.x - b, a.y - b, a.z - b);
}
inline __host__ __device__ int3 operator-(int b, int3 a)
{
return make_int3(b - a.x, b - a.y, b - a.z);
}
inline __host__ __device__ void operator-=(int3 &a, int b)
{
a.x -= b;
a.y -= b;
a.z -= b;
}
inline __host__ __device__ uint3 operator-(uint3 a, uint3 b)
{
return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z);
}
inline __host__ __device__ void operator-=(uint3 &a, uint3 b)
{
a.x -= b.x;
a.y -= b.y;
a.z -= b.z;
}
inline __host__ __device__ uint3 operator-(uint3 a, uint b)
{
return make_uint3(a.x - b, a.y - b, a.z - b);
}
inline __host__ __device__ uint3 operator-(uint b, uint3 a)
{
return make_uint3(b - a.x, b - a.y, b - a.z);
}
inline __host__ __device__ void operator-=(uint3 &a, uint b)
{
a.x -= b;
a.y -= b;
a.z -= b;
}
inline __host__ __device__ float4 operator-(float4 a, float4 b)
{
return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
}
inline __host__ __device__ void operator-=(float4 &a, float4 b)
{
a.x -= b.x;
a.y -= b.y;
a.z -= b.z;
a.w -= b.w;
}
inline __host__ __device__ float4 operator-(float4 a, float b)
{
return make_float4(a.x - b, a.y - b, a.z - b, a.w - b);
}
inline __host__ __device__ void operator-=(float4 &a, float b)
{
a.x -= b;
a.y -= b;
a.z -= b;
a.w -= b;
}
inline __host__ __device__ int4 operator-(int4 a, int4 b)
{
return make_int4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
}
inline __host__ __device__ void operator-=(int4 &a, int4 b)
{
a.x -= b.x;
a.y -= b.y;
a.z -= b.z;
a.w -= b.w;
}
inline __host__ __device__ int4 operator-(int4 a, int b)
{
return make_int4(a.x - b, a.y - b, a.z - b, a.w - b);
}
inline __host__ __device__ int4 operator-(int b, int4 a)
{
return make_int4(b - a.x, b - a.y, b - a.z, b - a.w);
}
inline __host__ __device__ void operator-=(int4 &a, int b)
{
a.x -= b;
a.y -= b;
a.z -= b;
a.w -= b;
}
inline __host__ __device__ uint4 operator-(uint4 a, uint4 b)
{
return make_uint4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
}
inline __host__ __device__ void operator-=(uint4 &a, uint4 b)
{
a.x -= b.x;
a.y -= b.y;
a.z -= b.z;
a.w -= b.w;
}
inline __host__ __device__ uint4 operator-(uint4 a, uint b)
{
return make_uint4(a.x - b, a.y - b, a.z - b, a.w - b);
}
inline __host__ __device__ uint4 operator-(uint b, uint4 a)
{
return make_uint4(b - a.x, b - a.y, b - a.z, b - a.w);
}
inline __host__ __device__ void operator-=(uint4 &a, uint b)
{
a.x -= b;
a.y -= b;
a.z -= b;
a.w -= b;
}
////////////////////////////////////////////////////////////////////////////////
// multiply
////////////////////////////////////////////////////////////////////////////////
inline __host__ __device__ float2 operator*(float2 a, float2 b)
{
return make_float2(a.x * b.x, a.y * b.y);
}
inline __host__ __device__ void operator*=(float2 &a, float2 b)
{
a.x *= b.x;
a.y *= b.y;
}
inline __host__ __device__ float2 operator*(float2 a, float b)
{
return make_float2(a.x * b, a.y * b);
}
inline __host__ __device__ float2 operator*(float b, float2 a)
{
return make_float2(b * a.x, b * a.y);
}
inline __host__ __device__ void operator*=(float2 &a, float b)
{
a.x *= b;
a.y *= b;
}
inline __host__ __device__ int2 operator*(int2 a, int2 b)
{
return make_int2(a.x * b.x, a.y * b.y);
}
inline __host__ __device__ void operator*=(int2 &a, int2 b)
{
a.x *= b.x;
a.y *= b.y;
}
inline __host__ __device__ int2 operator*(int2 a, int b)
{
return make_int2(a.x * b, a.y * b);
}
inline __host__ __device__ int2 operator*(int b, int2 a)
{
return make_int2(b * a.x, b * a.y);
}
inline __host__ __device__ void operator*=(int2 &a, int b)
{
a.x *= b;
a.y *= b;
}
inline __host__ __device__ uint2 operator*(uint2 a, uint2 b)
{
return make_uint2(a.x * b.x, a.y * b.y);
}
inline __host__ __device__ void operator*=(uint2 &a, uint2 b)
{
a.x *= b.x;
a.y *= b.y;
}
inline __host__ __device__ uint2 operator*(uint2 a, uint b)
{
return make_uint2(a.x * b, a.y * b);
}
inline __host__ __device__ uint2 operator*(uint b, uint2 a)
{
return make_uint2(b * a.x, b * a.y);
}
inline __host__ __device__ void operator*=(uint2 &a, uint b)
{
a.x *= b;
a.y *= b;
}
inline __host__ __device__ float3 operator*(float3 a, float3 b)
{
return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
}
inline __host__ __device__ void operator*=(float3 &a, float3 b)
{
a.x *= b.x;
a.y *= b.y;
a.z *= b.z;
}
inline __host__ __device__ float3 operator*(float3 a, float b)
{
return make_float3(a.x * b, a.y * b, a.z * b);
}
inline __host__ __device__ float3 operator*(float b, float3 a)
{
return make_float3(b * a.x, b * a.y, b * a.z);
}
inline __host__ __device__ void operator*=(float3 &a, float b)
{
a.x *= b;
a.y *= b;
a.z *= b;
}
inline __host__ __device__ int3 operator*(int3 a, int3 b)
{
return make_int3(a.x * b.x, a.y * b.y, a.z * b.z);
}
inline __host__ __device__ void operator*=(int3 &a, int3 b)
{
a.x *= b.x;
a.y *= b.y;
a.z *= b.z;
}
inline __host__ __device__ int3 operator*(int3 a, int b)
{
return make_int3(a.x * b, a.y * b, a.z * b);
}
inline __host__ __device__ int3 operator*(int b, int3 a)
{
return make_int3(b * a.x, b * a.y, b * a.z);
}
inline __host__ __device__ void operator*=(int3 &a, int b)
{
a.x *= b;
a.y *= b;
a.z *= b;
}
inline __host__ __device__ uint3 operator*(uint3 a, uint3 b)
{
return make_uint3(a.x * b.x, a.y * b.y, a.z * b.z);
}
inline __host__ __device__ void operator*=(uint3 &a, uint3 b)
{
a.x *= b.x;
a.y *= b.y;
a.z *= b.z;
}
inline __host__ __device__ uint3 operator*(uint3 a, uint b)
{
return make_uint3(a.x * b, a.y * b, a.z * b);
}
inline __host__ __device__ uint3 operator*(uint b, uint3 a)
{
return make_uint3(b * a.x, b * a.y, b * a.z);
}
inline __host__ __device__ void operator*=(uint3 &a, uint b)
{
a.x *= b;
a.y *= b;
a.z *= b;
}
inline __host__ __device__ float4 operator*(float4 a, float4 b)
{
return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
}
inline __host__ __device__ void operator*=(float4 &a, float4 b)
{
a.x *= b.x;
a.y *= b.y;
a.z *= b.z;
a.w *= b.w;
}
inline __host__ __device__ float4 operator*(float4 a, float b)
{
return make_float4(a.x * b, a.y * b, a.z * b, a.w * b);
}
inline __host__ __device__ float4 operator*(float b, float4 a)
{
return make_float4(b * a.x, b * a.y, b * a.z, b * a.w);
}
inline __host__ __device__ void operator*=(float4 &a, float b)
{
a.x *= b;
a.y *= b;
a.z *= b;
a.w *= b;
}
inline __host__ __device__ int4 operator*(int4 a, int4 b)
{
return make_int4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
}
inline __host__ __device__ void operator*=(int4 &a, int4 b)
{
a.x *= b.x;
a.y *= b.y;
a.z *= b.z;
a.w *= b.w;
}
inline __host__ __device__ int4 operator*(int4 a, int b)
{
return make_int4(a.x * b, a.y * b, a.z * b, a.w * b);
}
inline __host__ __device__ int4 operator*(int b, int4 a)
{
return make_int4(b * a.x, b * a.y, b * a.z, b * a.w);
}
inline __host__ __device__ void operator*=(int4 &a, int b)
{
a.x *= b;
a.y *= b;
a.z *= b;
a.w *= b;
}
inline __host__ __device__ uint4 operator*(uint4 a, uint4 b)
{
return make_uint4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
}
inline __host__ __device__ void operator*=(uint4 &a, uint4 b)
{
a.x *= b.x;
a.y *= b.y;
a.z *= b.z;
a.w *= b.w;
}
inline __host__ __device__ uint4 operator*(uint4 a, uint b)
{
return make_uint4(a.x * b, a.y * b, a.z * b, a.w * b);
}
inline __host__ __device__ uint4 operator*(uint b, uint4 a)
{
return make_uint4(b * a.x, b * a.y, b * a.z, b * a.w);
}
inline __host__ __device__ void operator*=(uint4 &a, uint b)
{
a.x *= b;
a.y *= b;
a.z *= b;
a.w *= b;
}
////////////////////////////////////////////////////////////////////////////////
// divide
////////////////////////////////////////////////////////////////////////////////
inline __host__ __device__ float2 operator/(float2 a, float2 b)
{
return make_float2(a.x / b.x, a.y / b.y);
}
inline __host__ __device__ void operator/=(float2 &a, float2 b)
{
a.x /= b.x;
a.y /= b.y;
}
inline __host__ __device__ float2 operator/(float2 a, float b)
{
return make_float2(a.x / b, a.y / b);
}
inline __host__ __device__ void operator/=(float2 &a, float b)
{
a.x /= b;
a.y /= b;
}
inline __host__ __device__ float2 operator/(float b, float2 a)
{
return make_float2(b / a.x, b / a.y);
}
inline __host__ __device__ float3 operator/(float3 a, float3 b)
{
return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
}
inline __host__ __device__ void operator/=(float3 &a, float3 b)
{
a.x /= b.x;
a.y /= b.y;
a.z /= b.z;
}
inline __host__ __device__ float3 operator/(float3 a, float b)
{
return make_float3(a.x / b, a.y / b, a.z / b);
}
inline __host__ __device__ void operator/=(float3 &a, float b)
{
a.x /= b;
a.y /= b;
a.z /= b;
}
inline __host__ __device__ float3 operator/(float b, float3 a)
{
return make_float3(b / a.x, b / a.y, b / a.z);
}
inline __host__ __device__ float4 operator/(float4 a, float4 b)
{
return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
}
inline __host__ __device__ void operator/=(float4 &a, float4 b)
{
a.x /= b.x;
a.y /= b.y;
a.z /= b.z;
a.w /= b.w;
}
inline __host__ __device__ float4 operator/(float4 a, float b)
{
return make_float4(a.x / b, a.y / b, a.z / b, a.w / b);
}
inline __host__ __device__ void operator/=(float4 &a, float b)
{
a.x /= b;
a.y /= b;
a.z /= b;
a.w /= b;
}
inline __host__ __device__ float4 operator/(float b, float4 a)
{
return make_float4(b / a.x, b / a.y, b / a.z, b / a.w);
}
////////////////////////////////////////////////////////////////////////////////
// min
////////////////////////////////////////////////////////////////////////////////
inline __host__ __device__ float2 fminf(float2 a, float2 b)
{
return make_float2(fminf(a.x, b.x), fminf(a.y, b.y));
}
inline __host__ __device__ float3 fminf(float3 a, float3 b)
{
return make_float3(fminf(a.x, b.x), fminf(a.y, b.y), fminf(a.z, b.z));
}
inline __host__ __device__ float4 fminf(float4 a, float4 b)
{
return make_float4(fminf(a.x, b.x), fminf(a.y, b.y), fminf(a.z, b.z), fminf(a.w, b.w));
}
inline __host__ __device__ int2 min(int2 a, int2 b)
{
return make_int2(min(a.x, b.x), min(a.y, b.y));
}
inline __host__ __device__ int3 min(int3 a, int3 b)
{
return make_int3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z));
}
inline __host__ __device__ int4 min(int4 a, int4 b)
{
return make_int4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w));
}
inline __host__ __device__ uint2 min(uint2 a, uint2 b)
{
return make_uint2(min(a.x, b.x), min(a.y, b.y));
}
inline __host__ __device__ uint3 min(uint3 a, uint3 b)
{
return make_uint3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z));
}
inline __host__ __device__ uint4 min(uint4 a, uint4 b)
{
return make_uint4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w));
}
////////////////////////////////////////////////////////////////////////////////
// max
////////////////////////////////////////////////////////////////////////////////
inline __host__ __device__ float2 fmaxf(float2 a, float2 b)
{
return make_float2(fmaxf(a.x, b.x), fmaxf(a.y, b.y));
}
inline __host__ __device__ float3 fmaxf(float3 a, float3 b)
{
return make_float3(fmaxf(a.x, b.x), fmaxf(a.y, b.y), fmaxf(a.z, b.z));
}
inline __host__ __device__ float4 fmaxf(float4 a, float4 b)
{
return make_float4(fmaxf(a.x, b.x), fmaxf(a.y, b.y), fmaxf(a.z, b.z), fmaxf(a.w, b.w));
}
inline __host__ __device__ int2 max(int2 a, int2 b)
{
return make_int2(max(a.x, b.x), max(a.y, b.y));
}
inline __host__ __device__ int3 max(int3 a, int3 b)
{
return make_int3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z));
}
inline __host__ __device__ int4 max(int4 a, int4 b)
{
return make_int4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w));
}
inline __host__ __device__ uint2 max(uint2 a, uint2 b)
{
return make_uint2(max(a.x, b.x), max(a.y, b.y));
}
inline __host__ __device__ uint3 max(uint3 a, uint3 b)
{
return make_uint3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z));
}
inline __host__ __device__ uint4 max(uint4 a, uint4 b)
{
return make_uint4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w));
}
////////////////////////////////////////////////////////////////////////////////
// lerp
// - linear interpolation between a and b, based on value t in [0, 1] range
////////////////////////////////////////////////////////////////////////////////
inline __device__ __host__ float lerp(float a, float b, float t)
{
return a + t * (b - a);
}
inline __device__ __host__ float2 lerp(float2 a, float2 b, float t)
{
return a + t * (b - a);
}
inline __device__ __host__ float3 lerp(float3 a, float3 b, float t)
{
return a + t * (b - a);
}
inline __device__ __host__ float4 lerp(float4 a, float4 b, float t)
{
return a + t * (b - a);
}
////////////////////////////////////////////////////////////////////////////////
// clamp
// - clamp the value v to be in the range [a, b]
////////////////////////////////////////////////////////////////////////////////
inline __device__ __host__ float clamp(float f, float a, float b)
{
return fmaxf(a, fminf(f, b));
}
inline __device__ __host__ int clamp(int f, int a, int b)
{
return max(a, min(f, b));
}
inline __device__ __host__ uint clamp(uint f, uint a, uint b)
{
return max(a, min(f, b));
}
inline __device__ __host__ float2 clamp(float2 v, float a, float b)
{
return make_float2(clamp(v.x, a, b), clamp(v.y, a, b));
}
inline __device__ __host__ float2 clamp(float2 v, float2 a, float2 b)
{
return make_float2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
}
inline __device__ __host__ float3 clamp(float3 v, float a, float b)
{
return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
}
inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b)
{
return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
}
inline __device__ __host__ float4 clamp(float4 v, float a, float b)
{
return make_float4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
}
inline __device__ __host__ float4 clamp(float4 v, float4 a, float4 b)
{
return make_float4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
}
inline __device__ __host__ int2 clamp(int2 v, int a, int b)
{
return make_int2(clamp(v.x, a, b), clamp(v.y, a, b));
}
inline __device__ __host__ int2 clamp(int2 v, int2 a, int2 b)
{
return make_int2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
}
inline __device__ __host__ int3 clamp(int3 v, int a, int b)
{
return make_int3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
}
inline __device__ __host__ int3 clamp(int3 v, int3 a, int3 b)
{
return make_int3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
}
inline __device__ __host__ int4 clamp(int4 v, int a, int b)
{
return make_int4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
}
inline __device__ __host__ int4 clamp(int4 v, int4 a, int4 b)
{
return make_int4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
}
inline __device__ __host__ uint2 clamp(uint2 v, uint a, uint b)
{
return make_uint2(clamp(v.x, a, b), clamp(v.y, a, b));
}
inline __device__ __host__ uint2 clamp(uint2 v, uint2 a, uint2 b)
{
return make_uint2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
}
inline __device__ __host__ uint3 clamp(uint3 v, uint a, uint b)
{
return make_uint3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
}
inline __device__ __host__ uint3 clamp(uint3 v, uint3 a, uint3 b)
{
return make_uint3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
}
inline __device__ __host__ uint4 clamp(uint4 v, uint a, uint b)
{
return make_uint4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
}
inline __device__ __host__ uint4 clamp(uint4 v, uint4 a, uint4 b)
{
return make_uint4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
}
////////////////////////////////////////////////////////////////////////////////
// dot product
////////////////////////////////////////////////////////////////////////////////
inline __host__ __device__ float dot(float2 a, float2 b)
{
return a.x * b.x + a.y * b.y;
}
inline __host__ __device__ float dot(float3 a, float3 b)
{
return a.x * b.x + a.y * b.y + a.z * b.z;
}
inline __host__ __device__ float dot(float4 a, float4 b)
{
return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
}
inline __host__ __device__ int dot(int2 a, int2 b)
{
return a.x * b.x + a.y * b.y;
}
inline __host__ __device__ int dot(int3 a, int3 b)
{
return a.x * b.x + a.y * b.y + a.z * b.z;
}
inline __host__ __device__ int dot(int4 a, int4 b)
{
return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
}
inline __host__ __device__ uint dot(uint2 a, uint2 b)
{
return a.x * b.x + a.y * b.y;
}
inline __host__ __device__ uint dot(uint3 a, uint3 b)
{
return a.x * b.x + a.y * b.y + a.z * b.z;
}
inline __host__ __device__ uint dot(uint4 a, uint4 b)
{
return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
}
////////////////////////////////////////////////////////////////////////////////
// length
////////////////////////////////////////////////////////////////////////////////
inline __host__ __device__ float length(float2 v)
{
return sqrtf(dot(v, v));
}
inline __host__ __device__ float length(float3 v)
{
return sqrtf(dot(v, v));
}
inline __host__ __device__ float length(float4 v)
{
return sqrtf(dot(v, v));
}
////////////////////////////////////////////////////////////////////////////////
// normalize
////////////////////////////////////////////////////////////////////////////////
inline __host__ __device__ float2 normalize(float2 v)
{
float invLen = rsqrtf(dot(v, v));
return v * invLen;
}
inline __host__ __device__ float3 normalize(float3 v)
{
float invLen = rsqrtf(dot(v, v));
return v * invLen;
}
inline __host__ __device__ float4 normalize(float4 v)
{
float invLen = rsqrtf(dot(v, v));
return v * invLen;
}
////////////////////////////////////////////////////////////////////////////////
// floor
////////////////////////////////////////////////////////////////////////////////
inline __host__ __device__ float2 floorf(float2 v)
{
return make_float2(floorf(v.x), floorf(v.y));
}
inline __host__ __device__ float3 floorf(float3 v)
{
return make_float3(floorf(v.x), floorf(v.y), floorf(v.z));
}
inline __host__ __device__ float4 floorf(float4 v)
{
return make_float4(floorf(v.x), floorf(v.y), floorf(v.z), floorf(v.w));
}
////////////////////////////////////////////////////////////////////////////////
// frac - returns the fractional portion of a scalar or each vector component
////////////////////////////////////////////////////////////////////////////////
inline __host__ __device__ float fracf(float v)
{
return v - floorf(v);
}
inline __host__ __device__ float2 fracf(float2 v)
{
return make_float2(fracf(v.x), fracf(v.y));
}
inline __host__ __device__ float3 fracf(float3 v)
{
return make_float3(fracf(v.x), fracf(v.y), fracf(v.z));
}
inline __host__ __device__ float4 fracf(float4 v)
{
return make_float4(fracf(v.x), fracf(v.y), fracf(v.z), fracf(v.w));
}
////////////////////////////////////////////////////////////////////////////////
// fmod
////////////////////////////////////////////////////////////////////////////////
inline __host__ __device__ float2 fmodf(float2 a, float2 b)
{
return make_float2(fmodf(a.x, b.x), fmodf(a.y, b.y));
}
inline __host__ __device__ float3 fmodf(float3 a, float3 b)
{
return make_float3(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z));
}
inline __host__ __device__ float4 fmodf(float4 a, float4 b)
{
return make_float4(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z), fmodf(a.w, b.w));
}
////////////////////////////////////////////////////////////////////////////////
// absolute value
////////////////////////////////////////////////////////////////////////////////
inline __host__ __device__ float2 fabs(float2 v)
{
return make_float2(fabs(v.x), fabs(v.y));
}
inline __host__ __device__ float3 fabs(float3 v)
{
return make_float3(fabs(v.x), fabs(v.y), fabs(v.z));
}
inline __host__ __device__ float4 fabs(float4 v)
{
return make_float4(fabs(v.x), fabs(v.y), fabs(v.z), fabs(v.w));
}
inline __host__ __device__ int2 abs(int2 v)
{
return make_int2(abs(v.x), abs(v.y));
}
inline __host__ __device__ int3 abs(int3 v)
{
return make_int3(abs(v.x), abs(v.y), abs(v.z));
}
inline __host__ __device__ int4 abs(int4 v)
{
return make_int4(abs(v.x), abs(v.y), abs(v.z), abs(v.w));
}
////////////////////////////////////////////////////////////////////////////////
// reflect
// - returns reflection of incident ray I around surface normal N
// - N should be normalized, reflected vector's length is equal to length of I
////////////////////////////////////////////////////////////////////////////////
inline __host__ __device__ float3 reflect(float3 i, float3 n)
{
return i - 2.0f * n * dot(n, i);
}
////////////////////////////////////////////////////////////////////////////////
// cross product
////////////////////////////////////////////////////////////////////////////////
inline __host__ __device__ float3 cross(float3 a, float3 b)
{
return make_float3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x);
}
////////////////////////////////////////////////////////////////////////////////
// smoothstep
// - returns 0 if x < a
// - returns 1 if x > b
// - otherwise returns smooth interpolation between 0 and 1 based on x
////////////////////////////////////////////////////////////////////////////////
inline __device__ __host__ float smoothstep(float a, float b, float x)
{
float y = clamp((x - a) / (b - a), 0.0f, 1.0f);
return (y * y * (3.0f - (2.0f * y)));
}
inline __device__ __host__ float2 smoothstep(float2 a, float2 b, float2 x)
{
float2 y = clamp((x - a) / (b - a), 0.0f, 1.0f);
return (y * y * (make_float2(3.0f) - (make_float2(2.0f) * y)));
}
inline __device__ __host__ float3 smoothstep(float3 a, float3 b, float3 x)
{
float3 y = clamp((x - a) / (b - a), 0.0f, 1.0f);
return (y * y * (make_float3(3.0f) - (make_float3(2.0f) * y)));
}
inline __device__ __host__ float4 smoothstep(float4 a, float4 b, float4 x)
{
float4 y = clamp((x - a) / (b - a), 0.0f, 1.0f);
return (y * y * (make_float4(3.0f) - (make_float4(2.0f) * y)));
}
////////////////////////////////////////////////////////////////////////////////
// sign
////////////////////////////////////////////////////////////////////////////////
inline __device__ __host__ float3 sign(float3 a)
{
return make_float3(
copysignf(1.0f, a.x), copysignf(1.0f, a.y), copysignf(1.0f, a.z));
}
#endif
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/intersection.cu
================================================
/*
* Copyright (c) 2022 Ruilong Li, UC Berkeley.
*/
#include "include/helpers_cuda.h"
template
inline __host__ __device__ void _swap(scalar_t &a, scalar_t &b)
{
scalar_t c = a;
a = b;
b = c;
}
template
inline __host__ __device__ void _ray_aabb_intersect(
const scalar_t *rays_o,
const scalar_t *rays_d,
const scalar_t *aabb,
scalar_t *near,
scalar_t *far)
{
// aabb is [xmin, ymin, zmin, xmax, ymax, zmax]
scalar_t tmin = (aabb[0] - rays_o[0]) / rays_d[0];
scalar_t tmax = (aabb[3] - rays_o[0]) / rays_d[0];
if (tmin > tmax)
_swap(tmin, tmax);
scalar_t tymin = (aabb[1] - rays_o[1]) / rays_d[1];
scalar_t tymax = (aabb[4] - rays_o[1]) / rays_d[1];
if (tymin > tymax)
_swap(tymin, tymax);
if (tmin > tymax || tymin > tmax)
{
*near = 1e10;
*far = 1e10;
return;
}
if (tymin > tmin)
tmin = tymin;
if (tymax < tmax)
tmax = tymax;
scalar_t tzmin = (aabb[2] - rays_o[2]) / rays_d[2];
scalar_t tzmax = (aabb[5] - rays_o[2]) / rays_d[2];
if (tzmin > tzmax)
_swap(tzmin, tzmax);
if (tmin > tzmax || tzmin > tmax)
{
*near = 1e10;
*far = 1e10;
return;
}
if (tzmin > tmin)
tmin = tzmin;
if (tzmax < tmax)
tmax = tzmax;
*near = tmin;
*far = tmax;
return;
}
template
__global__ void ray_aabb_intersect_kernel(
const int N,
const scalar_t *rays_o,
const scalar_t *rays_d,
const scalar_t *aabb,
scalar_t *t_min,
scalar_t *t_max)
{
// aabb is [xmin, ymin, zmin, xmax, ymax, zmax]
CUDA_GET_THREAD_ID(thread_id, N);
// locate
rays_o += thread_id * 3;
rays_d += thread_id * 3;
t_min += thread_id;
t_max += thread_id;
_ray_aabb_intersect(rays_o, rays_d, aabb, t_min, t_max);
scalar_t zero = static_cast(0.f);
*t_min = *t_min > zero ? *t_min : zero;
return;
}
/**
* @brief Ray AABB Test
*
* @param rays_o Ray origins. Tensor with shape [N, 3].
* @param rays_d Normalized ray directions. Tensor with shape [N, 3].
* @param aabb Scene AABB [xmin, ymin, zmin, xmax, ymax, zmax]. Tensor with shape [6].
* @return std::vector
* Ray AABB intersection {t_min, t_max} with shape [N] respectively. Note the t_min is
* clipped to minimum zero. 1e10 is returned if no intersection.
*/
std::vector ray_aabb_intersect(
const torch::Tensor rays_o, const torch::Tensor rays_d, const torch::Tensor aabb)
{
DEVICE_GUARD(rays_o);
CHECK_INPUT(rays_o);
CHECK_INPUT(rays_d);
CHECK_INPUT(aabb);
TORCH_CHECK(rays_o.ndimension() == 2 & rays_o.size(1) == 3)
TORCH_CHECK(rays_d.ndimension() == 2 & rays_d.size(1) == 3)
TORCH_CHECK(aabb.ndimension() == 1 & aabb.size(0) == 6)
const int N = rays_o.size(0);
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(N, threads);
torch::Tensor t_min = torch::empty({N}, rays_o.options());
torch::Tensor t_max = torch::empty({N}, rays_o.options());
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
rays_o.scalar_type(), "ray_aabb_intersect",
([&]
{ ray_aabb_intersect_kernel<<>>(
N,
rays_o.data_ptr(),
rays_d.data_ptr(),
aabb.data_ptr(),
t_min.data_ptr(),
t_max.data_ptr()); }));
return {t_min, t_max};
}
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/pack.cu
================================================
/*
* Copyright (c) 2022 Ruilong Li, UC Berkeley.
*/
#include "include/helpers_cuda.h"
__global__ void unpack_info_kernel(
// input
const int n_rays,
const int *packed_info,
// output
int64_t *ray_indices)
{
CUDA_GET_THREAD_ID(i, n_rays);
// locate
const int base = packed_info[i * 2 + 0]; // point idx start.
const int steps = packed_info[i * 2 + 1]; // point idx shift.
if (steps == 0)
return;
ray_indices += base;
for (int j = 0; j < steps; ++j)
{
ray_indices[j] = i;
}
}
__global__ void unpack_info_to_mask_kernel(
// input
const int n_rays,
const int *packed_info,
const int n_samples,
// output
bool *masks) // [n_rays, n_samples]
{
CUDA_GET_THREAD_ID(i, n_rays);
// locate
const int base = packed_info[i * 2 + 0]; // point idx start.
const int steps = packed_info[i * 2 + 1]; // point idx shift.
if (steps == 0)
return;
masks += i * n_samples;
for (int j = 0; j < steps; ++j)
{
masks[j] = true;
}
}
template
__global__ void unpack_data_kernel(
const uint32_t n_rays,
const int *packed_info, // input ray & point indices.
const int data_dim,
const scalar_t *data,
const int n_sampler_per_ray,
scalar_t *unpacked_data) // (n_rays, n_sampler_per_ray, data_dim)
{
CUDA_GET_THREAD_ID(i, n_rays);
// locate
const int base = packed_info[i * 2 + 0]; // point idx start.
const int steps = packed_info[i * 2 + 1]; // point idx shift.
if (steps == 0)
return;
data += base * data_dim;
unpacked_data += i * n_sampler_per_ray * data_dim;
for (int j = 0; j < steps; j++)
{
for (int k = 0; k < data_dim; k++)
{
unpacked_data[j * data_dim + k] = data[j * data_dim + k];
}
}
return;
}
torch::Tensor unpack_info(const torch::Tensor packed_info, const int n_samples)
{
DEVICE_GUARD(packed_info);
CHECK_INPUT(packed_info);
const int n_rays = packed_info.size(0);
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
// int n_samples = packed_info[n_rays - 1].sum(0).item();
torch::Tensor ray_indices = torch::empty(
{n_samples}, packed_info.options().dtype(torch::kLong));
unpack_info_kernel<<>>(
n_rays,
packed_info.data_ptr(),
ray_indices.data_ptr());
return ray_indices;
}
torch::Tensor unpack_info_to_mask(
const torch::Tensor packed_info, const int n_samples)
{
DEVICE_GUARD(packed_info);
CHECK_INPUT(packed_info);
const int n_rays = packed_info.size(0);
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
torch::Tensor masks = torch::zeros(
{n_rays, n_samples}, packed_info.options().dtype(torch::kBool));
unpack_info_to_mask_kernel<<>>(
n_rays,
packed_info.data_ptr(),
n_samples,
masks.data_ptr());
return masks;
}
torch::Tensor unpack_data(
torch::Tensor packed_info,
torch::Tensor data,
int n_samples_per_ray)
{
DEVICE_GUARD(packed_info);
CHECK_INPUT(packed_info);
CHECK_INPUT(data);
TORCH_CHECK(packed_info.ndimension() == 2 & packed_info.size(1) == 2);
TORCH_CHECK(data.ndimension() == 2);
const int n_rays = packed_info.size(0);
const int n_samples = data.size(0);
const int data_dim = data.size(1);
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
torch::Tensor unpacked_data = torch::zeros(
{n_rays, n_samples_per_ray, data_dim}, data.options());
AT_DISPATCH_ALL_TYPES(
data.scalar_type(),
"unpack_data",
([&]
{ unpack_data_kernel<<>>(
n_rays,
// inputs
packed_info.data_ptr(),
data_dim,
data.data_ptr(),
n_samples_per_ray,
// outputs
unpacked_data.data_ptr()); }));
return unpacked_data;
}
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/pybind.cu
================================================
/*
* Copyright (c) 2022 Ruilong Li, UC Berkeley.
*/
#include "include/helpers_cuda.h"
#include "include/helpers_math.h"
#include "include/helpers_contraction.h"
std::vector ray_aabb_intersect(
const torch::Tensor rays_o,
const torch::Tensor rays_d,
const torch::Tensor aabb);
std::vector ray_marching(
// rays
const torch::Tensor rays_o,
const torch::Tensor rays_d,
const torch::Tensor t_min,
const torch::Tensor t_max,
// occupancy grid & contraction
const torch::Tensor roi,
const torch::Tensor grid_binary,
const ContractionType type,
// sampling
const float step_size,
const float cone_angle);
torch::Tensor unpack_info(
const torch::Tensor packed_info, const int n_samples);
torch::Tensor unpack_info_to_mask(
const torch::Tensor packed_info, const int n_samples);
torch::Tensor grid_query(
const torch::Tensor samples,
// occupancy grid & contraction
const torch::Tensor roi,
const torch::Tensor grid_value,
const ContractionType type);
torch::Tensor contract(
const torch::Tensor samples,
// contraction
const torch::Tensor roi,
const ContractionType type);
torch::Tensor contract_inv(
const torch::Tensor samples,
// contraction
const torch::Tensor roi,
const ContractionType type);
std::vector ray_resampling(
torch::Tensor packed_info,
torch::Tensor starts,
torch::Tensor ends,
torch::Tensor weights,
const int steps);
torch::Tensor unpack_data(
torch::Tensor packed_info,
torch::Tensor data,
int n_samples_per_ray);
// cub implementations: parallel across samples
bool is_cub_available() {
return (bool) CUB_SUPPORTS_SCAN_BY_KEY();
}
torch::Tensor transmittance_from_sigma_forward_cub(
torch::Tensor ray_indices,
torch::Tensor starts,
torch::Tensor ends,
torch::Tensor sigmas);
torch::Tensor transmittance_from_sigma_backward_cub(
torch::Tensor ray_indices,
torch::Tensor starts,
torch::Tensor ends,
torch::Tensor transmittance,
torch::Tensor transmittance_grad);
torch::Tensor transmittance_from_alpha_forward_cub(
torch::Tensor ray_indices, torch::Tensor alphas);
torch::Tensor transmittance_from_alpha_backward_cub(
torch::Tensor ray_indices,
torch::Tensor alphas,
torch::Tensor transmittance,
torch::Tensor transmittance_grad);
// naive implementations: parallel across rays
torch::Tensor transmittance_from_sigma_forward_naive(
torch::Tensor packed_info,
torch::Tensor starts,
torch::Tensor ends,
torch::Tensor sigmas);
torch::Tensor transmittance_from_sigma_backward_naive(
torch::Tensor packed_info,
torch::Tensor starts,
torch::Tensor ends,
torch::Tensor transmittance,
torch::Tensor transmittance_grad);
torch::Tensor transmittance_from_alpha_forward_naive(
torch::Tensor packed_info,
torch::Tensor alphas);
torch::Tensor transmittance_from_alpha_backward_naive(
torch::Tensor packed_info,
torch::Tensor alphas,
torch::Tensor transmittance,
torch::Tensor transmittance_grad);
torch::Tensor weight_from_sigma_forward_naive(
torch::Tensor packed_info,
torch::Tensor starts,
torch::Tensor ends,
torch::Tensor sigmas);
torch::Tensor weight_from_sigma_backward_naive(
torch::Tensor weights,
torch::Tensor grad_weights,
torch::Tensor packed_info,
torch::Tensor starts,
torch::Tensor ends,
torch::Tensor sigmas);
torch::Tensor weight_from_alpha_forward_naive(
torch::Tensor packed_info,
torch::Tensor alphas);
torch::Tensor weight_from_alpha_backward_naive(
torch::Tensor weights,
torch::Tensor grad_weights,
torch::Tensor packed_info,
torch::Tensor alphas);
torch::Tensor weight_from_alpha_patch_based_forward_naive(
torch::Tensor packed_info,
torch::Tensor alphas);
torch::Tensor weight_from_alpha_patch_based_backward_naive(
torch::Tensor weights,
torch::Tensor grad_weights,
torch::Tensor packed_info,
torch::Tensor alphas);
std::vector weight_and_transmittance_from_alpha_patch_based_forward_naive(
torch::Tensor packed_info, // (n_patches, 2)
torch::Tensor alphas // (n_samples, patches_size, 1)
);
torch::Tensor weight_and_transmittance_from_alpha_patch_based_backward_naive(
torch::Tensor weights,
torch::Tensor grad_weights, // (n_samples, patches_size, 1)
torch::Tensor packed_info,
torch::Tensor alphas);
torch::Tensor transmittance_from_alpha_patch_based_forward_naive(
torch::Tensor packed_info, torch::Tensor alphas);
torch::Tensor transmittance_from_alpha_patch_based_backward_naive(
torch::Tensor packed_info,
torch::Tensor alphas,
torch::Tensor transmittance,
torch::Tensor transmittance_grad);
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
{
// contraction
py::enum_(m, "ContractionType")
.value("AABB", ContractionType::AABB)
.value("UN_BOUNDED_TANH", ContractionType::UN_BOUNDED_TANH)
.value("UN_BOUNDED_SPHERE", ContractionType::UN_BOUNDED_SPHERE);
m.def("contract", &contract);
m.def("contract_inv", &contract_inv);
// grid
m.def("grid_query", &grid_query);
// marching
m.def("ray_aabb_intersect", &ray_aabb_intersect);
m.def("ray_marching", &ray_marching);
m.def("ray_resampling", &ray_resampling);
// rendering
m.def("is_cub_available", is_cub_available);
m.def("transmittance_from_sigma_forward_cub", transmittance_from_sigma_forward_cub);
m.def("transmittance_from_sigma_backward_cub", transmittance_from_sigma_backward_cub);
m.def("transmittance_from_alpha_forward_cub", transmittance_from_alpha_forward_cub);
m.def("transmittance_from_alpha_backward_cub", transmittance_from_alpha_backward_cub);
m.def("transmittance_from_sigma_forward_naive", transmittance_from_sigma_forward_naive);
m.def("transmittance_from_sigma_backward_naive", transmittance_from_sigma_backward_naive);
m.def("transmittance_from_alpha_forward_naive", transmittance_from_alpha_forward_naive);
m.def("transmittance_from_alpha_backward_naive", transmittance_from_alpha_backward_naive);
m.def("weight_from_sigma_forward_naive", weight_from_sigma_forward_naive);
m.def("weight_from_sigma_backward_naive", weight_from_sigma_backward_naive);
m.def("weight_from_alpha_forward_naive", weight_from_alpha_forward_naive);
m.def("weight_from_alpha_backward_naive", weight_from_alpha_backward_naive);
m.def("weight_from_alpha_patch_based_forward_naive", weight_from_alpha_patch_based_forward_naive);
m.def("weight_from_alpha_patch_based_backward_naive", weight_from_alpha_patch_based_backward_naive);
m.def("weight_and_transmittance_from_alpha_patch_based_forward_naive", weight_and_transmittance_from_alpha_patch_based_forward_naive);
m.def("weight_and_transmittance_from_alpha_patch_based_backward_naive", weight_and_transmittance_from_alpha_patch_based_backward_naive);
m.def("transmittance_from_alpha_patch_based_forward_naive", transmittance_from_alpha_patch_based_forward_naive);
m.def("transmittance_from_alpha_patch_based_backward_naive", transmittance_from_alpha_patch_based_backward_naive);
// pack & unpack
m.def("unpack_data", &unpack_data);
m.def("unpack_info", &unpack_info);
m.def("unpack_info_to_mask", &unpack_info_to_mask);
}
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/ray_marching.cu
================================================
/*
* Copyright (c) 2022 Ruilong Li, UC Berkeley.
*/
#include "include/helpers_cuda.h"
#include "include/helpers_math.h"
#include "include/helpers_contraction.h"
inline __device__ __host__ float calc_dt(
const float t, const float cone_angle,
const float dt_min, const float dt_max)
{
return clamp(t * cone_angle, dt_min, dt_max);
}
inline __device__ __host__ int grid_idx_at(
const float3 xyz_unit, const int3 grid_res)
{
// xyz should be always in [0, 1]^3.
int3 ixyz = make_int3(xyz_unit * make_float3(grid_res));
ixyz = clamp(ixyz, make_int3(0, 0, 0), grid_res - 1);
int3 grid_offset = make_int3(grid_res.y * grid_res.z, grid_res.z, 1);
int idx = dot(ixyz, grid_offset);
return idx;
}
template
inline __device__ __host__ scalar_t grid_occupied_at(
const float3 xyz,
const float3 roi_min, const float3 roi_max,
ContractionType type,
const int3 grid_res, const scalar_t *grid_value)
{
if (type == ContractionType::AABB &&
(xyz.x < roi_min.x || xyz.x > roi_max.x ||
xyz.y < roi_min.y || xyz.y > roi_max.y ||
xyz.z < roi_min.z || xyz.z > roi_max.z))
{
return false;
}
float3 xyz_unit = apply_contraction(
xyz, roi_min, roi_max, type);
int idx = grid_idx_at(xyz_unit, grid_res);
return grid_value[idx];
}
// dda like step
inline __device__ __host__ float distance_to_next_voxel(
const float3 xyz, const float3 dir, const float3 inv_dir,
const float3 roi_min, const float3 roi_max, const int3 grid_res)
{
float3 _occ_res = make_float3(grid_res);
float3 _xyz = roi_to_unit(xyz, roi_min, roi_max) * _occ_res;
float3 txyz = ((floorf(_xyz + 0.5f + 0.5f * sign(dir)) - _xyz) * inv_dir) / _occ_res * (roi_max - roi_min);
float t = min(min(txyz.x, txyz.y), txyz.z);
return fmaxf(t, 0.0f);
}
inline __device__ __host__ float advance_to_next_voxel(
const float t, const float dt_min,
const float3 xyz, const float3 dir, const float3 inv_dir,
const float3 roi_min, const float3 roi_max, const int3 grid_res, const float far)
{
// Regular stepping (may be slower but matches non-empty space)
float t_target = t + distance_to_next_voxel(
xyz, dir, inv_dir, roi_min, roi_max, grid_res);
t_target = min(t_target, far);
float _t = t;
do
{
_t += dt_min;
} while (_t < t_target);
return _t;
}
// -------------------------------------------------------------------------------
// Raymarching
// -------------------------------------------------------------------------------
__global__ void ray_marching_kernel(
// rays info
const uint32_t n_rays,
const float *rays_o, // shape (n_rays, 3)
const float *rays_d, // shape (n_rays, 3)
const float *t_min, // shape (n_rays,)
const float *t_max, // shape (n_rays,)
// occupancy grid & contraction
const float *roi,
const int3 grid_res,
const bool *grid_binary, // shape (reso_x, reso_y, reso_z)
const ContractionType type,
// sampling
const float step_size,
const float cone_angle,
const int *packed_info,
// first round outputs
int *num_steps,
// second round outputs
int64_t *ray_indices,
float *t_starts,
float *t_ends)
{
CUDA_GET_THREAD_ID(i, n_rays);
bool is_first_round = (packed_info == nullptr);
// locate
rays_o += i * 3;
rays_d += i * 3;
t_min += i;
t_max += i;
if (is_first_round)
{
num_steps += i;
}
else
{
int base = packed_info[i * 2 + 0];
int steps = packed_info[i * 2 + 1];
t_starts += base;
t_ends += base;
ray_indices += base;
}
const float3 origin = make_float3(rays_o[0], rays_o[1], rays_o[2]);
const float3 dir = make_float3(rays_d[0], rays_d[1], rays_d[2]);
const float3 inv_dir = 1.0f / dir;
const float near = t_min[0], far = t_max[0];
const float3 roi_min = make_float3(roi[0], roi[1], roi[2]);
const float3 roi_max = make_float3(roi[3], roi[4], roi[5]);
// TODO: compute dt_max from occ resolution.
float dt_min = step_size;
float dt_max = 1e10f;
int j = 0;
float t0 = near;
float dt = calc_dt(t0, cone_angle, dt_min, dt_max);
float t1 = t0 + dt;
float t_mid = (t0 + t1) * 0.5f;
while (t_mid < far)
{
// current center
const float3 xyz = origin + t_mid * dir;
if (grid_occupied_at(xyz, roi_min, roi_max, type, grid_res, grid_binary))
{
if (!is_first_round)
{
t_starts[j] = t0;
t_ends[j] = t1;
ray_indices[j] = i;
}
++j;
// march to next sample
t0 = t1;
t1 = t0 + calc_dt(t0, cone_angle, dt_min, dt_max);
t_mid = (t0 + t1) * 0.5f;
}
else
{
// march to next sample
switch (type)
{
case ContractionType::AABB:
// no contraction
t_mid = advance_to_next_voxel(
t_mid, dt_min, xyz, dir, inv_dir, roi_min, roi_max, grid_res, far);
dt = calc_dt(t_mid, cone_angle, dt_min, dt_max);
t0 = t_mid - dt * 0.5f;
t1 = t_mid + dt * 0.5f;
break;
default:
// any type of scene contraction does not work with DDA.
t0 = t1;
t1 = t0 + calc_dt(t0, cone_angle, dt_min, dt_max);
t_mid = (t0 + t1) * 0.5f;
break;
}
}
}
if (is_first_round)
{
*num_steps = j;
}
return;
}
std::vector ray_marching(
// rays
const torch::Tensor rays_o,
const torch::Tensor rays_d,
const torch::Tensor t_min,
const torch::Tensor t_max,
// occupancy grid & contraction
const torch::Tensor roi,
const torch::Tensor grid_binary,
const ContractionType type,
// sampling
const float step_size,
const float cone_angle)
{
DEVICE_GUARD(rays_o);
CHECK_INPUT(rays_o);
CHECK_INPUT(rays_d);
CHECK_INPUT(t_min);
CHECK_INPUT(t_max);
CHECK_INPUT(roi);
CHECK_INPUT(grid_binary);
TORCH_CHECK(rays_o.ndimension() == 2 & rays_o.size(1) == 3)
TORCH_CHECK(rays_d.ndimension() == 2 & rays_d.size(1) == 3)
TORCH_CHECK(t_min.ndimension() == 1)
TORCH_CHECK(t_max.ndimension() == 1)
TORCH_CHECK(roi.ndimension() == 1 & roi.size(0) == 6)
TORCH_CHECK(grid_binary.ndimension() == 3)
const int n_rays = rays_o.size(0);
const int3 grid_res = make_int3(
grid_binary.size(0), grid_binary.size(1), grid_binary.size(2));
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
// helper counter
torch::Tensor num_steps = torch::empty(
{n_rays}, rays_o.options().dtype(torch::kInt32));
// count number of samples per ray
ray_marching_kernel<<>>(
// rays
n_rays,
rays_o.data_ptr(),
rays_d.data_ptr(),
t_min.data_ptr(),
t_max.data_ptr(),
// occupancy grid & contraction
roi.data_ptr(),
grid_res,
grid_binary.data_ptr(),
type,
// sampling
step_size,
cone_angle,
nullptr, /* packed_info */
// outputs
num_steps.data_ptr(),
nullptr, /* ray_indices */
nullptr, /* t_starts */
nullptr /* t_ends */);
torch::Tensor cum_steps = num_steps.cumsum(0, torch::kInt32);
torch::Tensor packed_info = torch::stack({cum_steps - num_steps, num_steps}, 1);
// output samples starts and ends
int total_steps = cum_steps[cum_steps.size(0) - 1].item();
torch::Tensor t_starts = torch::empty({total_steps, 1}, rays_o.options());
torch::Tensor t_ends = torch::empty({total_steps, 1}, rays_o.options());
torch::Tensor ray_indices = torch::empty({total_steps}, cum_steps.options().dtype(torch::kLong));
ray_marching_kernel<<>>(
// rays
n_rays,
rays_o.data_ptr(),
rays_d.data_ptr(),
t_min.data_ptr(),
t_max.data_ptr(),
// occupancy grid & contraction
roi.data_ptr(),
grid_res,
grid_binary.data_ptr(),
type,
// sampling
step_size,
cone_angle,
packed_info.data_ptr(),
// outputs
nullptr, /* num_steps */
ray_indices.data_ptr(),
t_starts.data_ptr(),
t_ends.data_ptr());
return {packed_info, ray_indices, t_starts, t_ends};
}
// ----------------------------------------------------------------------------
// Query the occupancy grid
// ----------------------------------------------------------------------------
template
__global__ void query_occ_kernel(
// rays info
const uint32_t n_samples,
const float *samples, // shape (n_samples, 3)
// occupancy grid & contraction
const float *roi,
const int3 grid_res,
const scalar_t *grid_value, // shape (reso_x, reso_y, reso_z)
const ContractionType type,
// outputs
scalar_t *occs)
{
CUDA_GET_THREAD_ID(i, n_samples);
// locate
samples += i * 3;
occs += i;
const float3 roi_min = make_float3(roi[0], roi[1], roi[2]);
const float3 roi_max = make_float3(roi[3], roi[4], roi[5]);
const float3 xyz = make_float3(samples[0], samples[1], samples[2]);
*occs = grid_occupied_at(xyz, roi_min, roi_max, type, grid_res, grid_value);
return;
}
torch::Tensor grid_query(
const torch::Tensor samples,
// occupancy grid & contraction
const torch::Tensor roi,
const torch::Tensor grid_value,
const ContractionType type)
{
DEVICE_GUARD(samples);
CHECK_INPUT(samples);
const int n_samples = samples.size(0);
const int3 grid_res = make_int3(
grid_value.size(0), grid_value.size(1), grid_value.size(2));
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_samples, threads);
torch::Tensor occs = torch::empty({n_samples}, grid_value.options());
AT_DISPATCH_FLOATING_TYPES_AND(
at::ScalarType::Bool,
occs.scalar_type(),
"grid_query",
([&]
{ query_occ_kernel<<>>(
n_samples,
samples.data_ptr(),
// grid
roi.data_ptr(),
grid_res,
grid_value.data_ptr(),
type,
// outputs
occs.data_ptr()); }));
return occs;
}
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/render_transmittance.cu
================================================
/*
* Copyright (c) 2022 Ruilong Li, UC Berkeley.
*/
#include "include/helpers_cuda.h"
__global__ void transmittance_from_sigma_forward_kernel(
const uint32_t n_rays,
// inputs
const int *packed_info,
const float *starts,
const float *ends,
const float *sigmas,
// outputs
float *transmittance)
{
CUDA_GET_THREAD_ID(i, n_rays);
// locate
const int base = packed_info[i * 2 + 0];
const int steps = packed_info[i * 2 + 1];
if (steps == 0)
return;
starts += base;
ends += base;
sigmas += base;
transmittance += base;
// accumulation
float cumsum = 0.0f;
for (int j = 0; j < steps; ++j)
{
transmittance[j] = __expf(-cumsum);
cumsum += sigmas[j] * (ends[j] - starts[j]);
}
// // another way to impl:
// float T = 1.f;
// for (int j = 0; j < steps; ++j)
// {
// const float delta = ends[j] - starts[j];
// const float alpha = 1.f - __expf(-sigmas[j] * delta);
// transmittance[j] = T;
// T *= (1.f - alpha);
// }
return;
}
__global__ void transmittance_from_sigma_backward_kernel(
const uint32_t n_rays,
// inputs
const int *packed_info,
const float *starts,
const float *ends,
const float *transmittance,
const float *transmittance_grad,
// outputs
float *sigmas_grad)
{
CUDA_GET_THREAD_ID(i, n_rays);
// locate
const int base = packed_info[i * 2 + 0];
const int steps = packed_info[i * 2 + 1];
if (steps == 0)
return;
transmittance += base;
transmittance_grad += base;
starts += base;
ends += base;
sigmas_grad += base;
// accumulation
float cumsum = 0.0f;
for (int j = steps - 1; j >= 0; --j)
{
sigmas_grad[j] = cumsum * (ends[j] - starts[j]);
cumsum += -transmittance_grad[j] * transmittance[j];
}
return;
}
__global__ void transmittance_from_alpha_forward_kernel(
const uint32_t n_rays,
// inputs
const int *packed_info,
const float *alphas,
// outputs
float *transmittance)
{
CUDA_GET_THREAD_ID(i, n_rays);
// locate
const int base = packed_info[i * 2 + 0];
const int steps = packed_info[i * 2 + 1];
if (steps == 0)
return;
alphas += base;
transmittance += base;
// accumulation
float T = 1.0f;
for (int j = 0; j < steps; ++j)
{
transmittance[j] = T;
T *= (1.0f - alphas[j]);
}
return;
}
__global__ void transmittance_from_alpha_backward_kernel(
const uint32_t n_rays,
// inputs
const int *packed_info,
const float *alphas,
const float *transmittance,
const float *transmittance_grad,
// outputs
float *alphas_grad)
{
CUDA_GET_THREAD_ID(i, n_rays);
// locate
const int base = packed_info[i * 2 + 0];
const int steps = packed_info[i * 2 + 1];
if (steps == 0)
return;
alphas += base;
transmittance += base;
transmittance_grad += base;
alphas_grad += base;
// accumulation
float cumsum = 0.0f;
for (int j = steps - 1; j >= 0; --j)
{
alphas_grad[j] = cumsum / fmax(1.0f - alphas[j], 1e-10f);
cumsum += -transmittance_grad[j] * transmittance[j];
}
return;
}
__global__ void transmittance_from_alpha_patch_based_forward_kernel(
const uint32_t n_patches,
const uint32_t patch_size,
// inputs
const int *packed_info,
const float *alphas,
// outputs
float *transmittance)
{
CUDA_GET_THREAD_ID_2D(i, k, n_patches, patch_size); // i is the patch id, k is the ray id within the patch
// locate
const int base = packed_info[i * 2 + 0]; // get the base of the patch
const int steps = packed_info[i * 2 + 1]; // get the steps of the patch
if (steps == 0)
return;
alphas += base * patch_size; // move the pointer to the base
transmittance += base * patch_size; // move the pointer to the base
// accumulation
float T = 1.0f;
for (int j = 0; j < steps; ++j)
{
const uint32_t ray_id = j * patch_size + k;
transmittance[ray_id] = T;
T *= (1.0f - alphas[j]);
}
return;
}
__global__ void transmittance_from_alpha_patch_based_backward_kernel(
const uint32_t n_patches,
const uint32_t patch_size,
// inputs
const int *packed_info,
const float *alphas,
const float *transmittance,
const float *transmittance_grad,
// outputs
float *alphas_grad)
{
CUDA_GET_THREAD_ID_2D(i, k, n_patches, patch_size); // i is the patch id, k is the ray id within the patch
// locate
const int base = packed_info[i * 2 + 0];
const int steps = packed_info[i * 2 + 1];
if (steps == 0)
return;
alphas += base * patch_size;
transmittance += base * patch_size;
transmittance_grad += base * patch_size;
alphas_grad += base * patch_size;
// accumulation
float cumsum = 0.0f;
for (int j = steps - 1; j >= 0; --j)
{
const uint32_t sample_idx = j * patch_size + k;
alphas_grad[sample_idx] = cumsum / fmax(1.0f - alphas[sample_idx], 1e-10f);
cumsum += -transmittance_grad[sample_idx] * transmittance[sample_idx];
}
return;
}
torch::Tensor transmittance_from_sigma_forward_naive(
torch::Tensor packed_info,
torch::Tensor starts,
torch::Tensor ends,
torch::Tensor sigmas)
{
DEVICE_GUARD(packed_info);
CHECK_INPUT(packed_info);
CHECK_INPUT(starts);
CHECK_INPUT(ends);
CHECK_INPUT(sigmas);
TORCH_CHECK(packed_info.ndimension() == 2);
TORCH_CHECK(starts.ndimension() == 2 & starts.size(1) == 1);
TORCH_CHECK(ends.ndimension() == 2 & ends.size(1) == 1);
TORCH_CHECK(sigmas.ndimension() == 2 & sigmas.size(1) == 1);
const uint32_t n_samples = sigmas.size(0);
const uint32_t n_rays = packed_info.size(0);
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
// outputs
torch::Tensor transmittance = torch::empty_like(sigmas);
// parallel across rays
transmittance_from_sigma_forward_kernel<<<
blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
n_rays,
// inputs
packed_info.data_ptr(),
starts.data_ptr(),
ends.data_ptr(),
sigmas.data_ptr(),
// outputs
transmittance.data_ptr());
return transmittance;
}
torch::Tensor transmittance_from_sigma_backward_naive(
torch::Tensor packed_info,
torch::Tensor starts,
torch::Tensor ends,
torch::Tensor transmittance,
torch::Tensor transmittance_grad)
{
DEVICE_GUARD(packed_info);
CHECK_INPUT(packed_info);
CHECK_INPUT(starts);
CHECK_INPUT(ends);
CHECK_INPUT(transmittance);
CHECK_INPUT(transmittance_grad);
TORCH_CHECK(packed_info.ndimension() == 2);
TORCH_CHECK(starts.ndimension() == 2 & starts.size(1) == 1);
TORCH_CHECK(ends.ndimension() == 2 & ends.size(1) == 1);
TORCH_CHECK(transmittance.ndimension() == 2 & transmittance.size(1) == 1);
TORCH_CHECK(transmittance_grad.ndimension() == 2 & transmittance_grad.size(1) == 1);
const uint32_t n_samples = transmittance.size(0);
const uint32_t n_rays = packed_info.size(0);
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
// outputs
torch::Tensor sigmas_grad = torch::empty_like(transmittance);
// parallel across rays
transmittance_from_sigma_backward_kernel<<<
blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
n_rays,
// inputs
packed_info.data_ptr(),
starts.data_ptr(),
ends.data_ptr(),
transmittance.data_ptr(),
transmittance_grad.data_ptr(),
// outputs
sigmas_grad.data_ptr());
return sigmas_grad;
}
torch::Tensor transmittance_from_alpha_forward_naive(
torch::Tensor packed_info, torch::Tensor alphas)
{
DEVICE_GUARD(packed_info);
CHECK_INPUT(packed_info);
CHECK_INPUT(alphas);
TORCH_CHECK(alphas.ndimension() == 2 & alphas.size(1) == 1);
TORCH_CHECK(packed_info.ndimension() == 2);
const uint32_t n_samples = alphas.size(0);
const uint32_t n_rays = packed_info.size(0);
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
// outputs
torch::Tensor transmittance = torch::empty_like(alphas);
// parallel across rays
transmittance_from_alpha_forward_kernel<<<
blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
n_rays,
// inputs
packed_info.data_ptr(),
alphas.data_ptr(),
// outputs
transmittance.data_ptr());
return transmittance;
}
torch::Tensor transmittance_from_alpha_backward_naive(
torch::Tensor packed_info,
torch::Tensor alphas,
torch::Tensor transmittance,
torch::Tensor transmittance_grad)
{
DEVICE_GUARD(packed_info);
CHECK_INPUT(packed_info);
CHECK_INPUT(transmittance);
CHECK_INPUT(transmittance_grad);
TORCH_CHECK(packed_info.ndimension() == 2);
TORCH_CHECK(transmittance.ndimension() == 2 & transmittance.size(1) == 1);
TORCH_CHECK(transmittance_grad.ndimension() == 2 & transmittance_grad.size(1) == 1);
const uint32_t n_samples = transmittance.size(0);
const uint32_t n_rays = packed_info.size(0);
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
// outputs
torch::Tensor alphas_grad = torch::empty_like(alphas);
// parallel across rays
transmittance_from_alpha_backward_kernel<<<
blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
n_rays,
// inputs
packed_info.data_ptr(),
alphas.data_ptr(),
transmittance.data_ptr(),
transmittance_grad.data_ptr(),
// outputs
alphas_grad.data_ptr());
return alphas_grad;
}
torch::Tensor transmittance_from_alpha_patch_based_forward_naive(
torch::Tensor packed_info, torch::Tensor alphas)
{
DEVICE_GUARD(packed_info);
CHECK_INPUT(packed_info);
CHECK_INPUT(alphas);
TORCH_CHECK(packed_info.ndimension() == 2);
TORCH_CHECK(alphas.ndimension() == 3 & alphas.size(2) == 1);
const uint32_t n_samples = alphas.size(0);
const uint32_t n_patches = packed_info.size(0);
const uint32_t patch_size = alphas.size(1);
// compute the required number of thread.y from patch size
// take the log2 of patch size and round up to the next power of 2
const uint32_t thread_for_a_patch = pow(2, ceil(log2(patch_size)));
const uint32_t thread_for_n_samples = 256 / thread_for_a_patch;
const dim3 threads(thread_for_n_samples, thread_for_a_patch);
const dim3 blocks((n_patches+threads.x-1)/threads.x, (patch_size+threads.y-1)/threads.y);
// outputs
torch::Tensor transmittance = torch::empty_like(alphas);
// parallel across rays
transmittance_from_alpha_patch_based_forward_kernel<<<
blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
n_patches,
patch_size,
// inputs
packed_info.data_ptr(),
alphas.data_ptr(),
// outputs
transmittance.data_ptr());
return transmittance;
}
torch::Tensor transmittance_from_alpha_patch_based_backward_naive(
torch::Tensor packed_info,
torch::Tensor alphas,
torch::Tensor transmittance,
torch::Tensor transmittance_grad)
{
DEVICE_GUARD(packed_info);
CHECK_INPUT(packed_info);
CHECK_INPUT(transmittance);
CHECK_INPUT(transmittance_grad);
TORCH_CHECK(packed_info.ndimension() == 2);
TORCH_CHECK(transmittance.ndimension() == 3 & transmittance.size(2) == 1);
TORCH_CHECK(transmittance_grad.ndimension() == 3 & transmittance_grad.size(2) == 1);
const uint32_t n_samples = alphas.size(0);
const uint32_t n_patches = packed_info.size(0);
const uint32_t patch_size = alphas.size(1);
// compute the required number of thread.y from patch size
// take the log2 of patch size and round up to the next power of 2
const uint32_t thread_for_a_patch = pow(2, ceil(log2(patch_size)));
const uint32_t thread_for_n_samples = 256 / thread_for_a_patch;
const dim3 threads(thread_for_n_samples, thread_for_a_patch);
const dim3 blocks((n_patches+threads.x-1)/threads.x, (patch_size+threads.y-1)/threads.y);
// outputs
torch::Tensor alphas_grad = torch::empty_like(alphas);
// parallel across rays
transmittance_from_alpha_patch_based_backward_kernel<<<
blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
n_patches,
patch_size,
// inputs
packed_info.data_ptr(),
alphas.data_ptr(),
transmittance.data_ptr(),
transmittance_grad.data_ptr(),
// outputs
alphas_grad.data_ptr());
return alphas_grad;
}
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/render_transmittance_cub.cu
================================================
/*
* Copyright (c) 2022 Ruilong Li, UC Berkeley.
*/
// CUB is supported in CUDA >= 11.0
// ExclusiveScanByKey is supported in CUB >= 1.15.0 (CUDA >= 11.6)
// See: https://github.com/NVIDIA/cub/tree/main#releases
#include "include/helpers_cuda.h"
#if CUB_SUPPORTS_SCAN_BY_KEY()
#include
#endif
struct Product
{
template
__host__ __device__ __forceinline__ T operator()(const T &a, const T &b) const { return a * b; }
};
#if CUB_SUPPORTS_SCAN_BY_KEY()
template
inline void exclusive_sum_by_key(
KeysInputIteratorT keys, ValuesInputIteratorT input, ValuesOutputIteratorT output, int64_t num_items)
{
TORCH_CHECK(num_items <= std::numeric_limits::max(),
"cub ExclusiveSumByKey does not support more than LONG_MAX elements");
CUB_WRAPPER(cub::DeviceScan::ExclusiveSumByKey, keys, input, output,
num_items, cub::Equality(), at::cuda::getCurrentCUDAStream());
}
template
inline void exclusive_prod_by_key(
KeysInputIteratorT keys, ValuesInputIteratorT input, ValuesOutputIteratorT output, int64_t num_items)
{
TORCH_CHECK(num_items <= std::numeric_limits::max(),
"cub ExclusiveScanByKey does not support more than LONG_MAX elements");
CUB_WRAPPER(cub::DeviceScan::ExclusiveScanByKey, keys, input, output, Product(), 1.0f,
num_items, cub::Equality(), at::cuda::getCurrentCUDAStream());
}
#endif
torch::Tensor transmittance_from_sigma_forward_cub(
torch::Tensor ray_indices,
torch::Tensor starts,
torch::Tensor ends,
torch::Tensor sigmas)
{
DEVICE_GUARD(ray_indices);
CHECK_INPUT(ray_indices);
CHECK_INPUT(starts);
CHECK_INPUT(ends);
CHECK_INPUT(sigmas);
TORCH_CHECK(ray_indices.ndimension() == 1);
TORCH_CHECK(starts.ndimension() == 2 & starts.size(1) == 1);
TORCH_CHECK(ends.ndimension() == 2 & ends.size(1) == 1);
TORCH_CHECK(sigmas.ndimension() == 2 & sigmas.size(1) == 1);
const uint32_t n_samples = sigmas.size(0);
// parallel across samples
torch::Tensor sigmas_dt = sigmas * (ends - starts);
torch::Tensor sigmas_dt_cumsum = torch::empty_like(sigmas);
#if CUB_SUPPORTS_SCAN_BY_KEY()
exclusive_sum_by_key(
ray_indices.data_ptr(),
sigmas_dt.data_ptr(),
sigmas_dt_cumsum.data_ptr(),
n_samples);
#else
std::runtime_error("CUB functions are only supported in CUDA >= 11.6.");
#endif
torch::Tensor transmittance = (-sigmas_dt_cumsum).exp();
return transmittance;
}
torch::Tensor transmittance_from_sigma_backward_cub(
torch::Tensor ray_indices,
torch::Tensor starts,
torch::Tensor ends,
torch::Tensor transmittance,
torch::Tensor transmittance_grad)
{
DEVICE_GUARD(ray_indices);
CHECK_INPUT(ray_indices);
CHECK_INPUT(starts);
CHECK_INPUT(ends);
CHECK_INPUT(transmittance);
CHECK_INPUT(transmittance_grad);
TORCH_CHECK(ray_indices.ndimension() == 1);
TORCH_CHECK(starts.ndimension() == 2 & starts.size(1) == 1);
TORCH_CHECK(ends.ndimension() == 2 & ends.size(1) == 1);
TORCH_CHECK(transmittance.ndimension() == 2 & transmittance.size(1) == 1);
TORCH_CHECK(transmittance_grad.ndimension() == 2 & transmittance_grad.size(1) == 1);
const uint32_t n_samples = transmittance.size(0);
// parallel across samples
torch::Tensor sigmas_dt_cumsum_grad = -transmittance_grad * transmittance;
torch::Tensor sigmas_dt_grad = torch::empty_like(transmittance_grad);
#if CUB_SUPPORTS_SCAN_BY_KEY()
exclusive_sum_by_key(
thrust::make_reverse_iterator(ray_indices.data_ptr() + n_samples),
thrust::make_reverse_iterator(sigmas_dt_cumsum_grad.data_ptr() + n_samples),
thrust::make_reverse_iterator(sigmas_dt_grad.data_ptr() + n_samples),
n_samples);
#else
std::runtime_error("CUB functions are only supported in CUDA >= 11.6.");
#endif
torch::Tensor sigmas_grad = sigmas_dt_grad * (ends - starts);
return sigmas_grad;
}
torch::Tensor transmittance_from_alpha_forward_cub(
torch::Tensor ray_indices, torch::Tensor alphas)
{
DEVICE_GUARD(ray_indices);
CHECK_INPUT(ray_indices);
CHECK_INPUT(alphas);
TORCH_CHECK(alphas.ndimension() == 2 & alphas.size(1) == 1);
TORCH_CHECK(ray_indices.ndimension() == 1);
const uint32_t n_samples = alphas.size(0);
// parallel across samples
torch::Tensor transmittance = torch::empty_like(alphas);
#if CUB_SUPPORTS_SCAN_BY_KEY()
exclusive_prod_by_key(
ray_indices.data_ptr(),
(1.0f - alphas).data_ptr(),
transmittance.data_ptr(),
n_samples);
#else
std::runtime_error("CUB functions are only supported in CUDA >= 11.6.");
#endif
return transmittance;
}
torch::Tensor transmittance_from_alpha_backward_cub(
torch::Tensor ray_indices,
torch::Tensor alphas,
torch::Tensor transmittance,
torch::Tensor transmittance_grad)
{
DEVICE_GUARD(ray_indices);
CHECK_INPUT(ray_indices);
CHECK_INPUT(transmittance);
CHECK_INPUT(transmittance_grad);
TORCH_CHECK(ray_indices.ndimension() == 1);
TORCH_CHECK(transmittance.ndimension() == 2 & transmittance.size(1) == 1);
TORCH_CHECK(transmittance_grad.ndimension() == 2 & transmittance_grad.size(1) == 1);
const uint32_t n_samples = transmittance.size(0);
// parallel across samples
torch::Tensor sigmas_dt_cumsum_grad = -transmittance_grad * transmittance;
torch::Tensor sigmas_dt_grad = torch::empty_like(transmittance_grad);
#if CUB_SUPPORTS_SCAN_BY_KEY()
exclusive_sum_by_key(
thrust::make_reverse_iterator(ray_indices.data_ptr() + n_samples),
thrust::make_reverse_iterator(sigmas_dt_cumsum_grad.data_ptr() + n_samples),
thrust::make_reverse_iterator(sigmas_dt_grad.data_ptr() + n_samples),
n_samples);
#else
std::runtime_error("CUB functions are only supported in CUDA >= 11.6.");
#endif
torch::Tensor alphas_grad = sigmas_dt_grad / (1.0f - alphas).clamp_min(1e-10f);
return alphas_grad;
}
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/render_weight.cu
================================================
/*
* Copyright (c) 2022 Ruilong Li, UC Berkeley.
*/
#include "include/helpers_cuda.h"
__global__ void weight_from_sigma_forward_kernel(
const uint32_t n_rays,
const int *packed_info,
const float *starts,
const float *ends,
const float *sigmas,
// outputs
float *weights)
{
CUDA_GET_THREAD_ID(i, n_rays);
// locate
const int base = packed_info[i * 2 + 0];
const int steps = packed_info[i * 2 + 1];
if (steps == 0)
return;
starts += base;
ends += base;
sigmas += base;
weights += base;
// accumulation
float T = 1.f;
for (int j = 0; j < steps; ++j)
{
const float delta = ends[j] - starts[j];
const float alpha = 1.f - __expf(-sigmas[j] * delta);
weights[j] = alpha * T;
T *= (1.f - alpha);
}
return;
}
__global__ void weight_from_sigma_backward_kernel(
const uint32_t n_rays,
const int *packed_info,
const float *starts,
const float *ends,
const float *sigmas,
const float *weights,
const float *grad_weights,
// outputs
float *grad_sigmas)
{
CUDA_GET_THREAD_ID(i, n_rays);
// locate
const int base = packed_info[i * 2 + 0];
const int steps = packed_info[i * 2 + 1];
if (steps == 0)
return;
starts += base;
ends += base;
sigmas += base;
weights += base;
grad_weights += base;
grad_sigmas += base;
float accum = 0;
for (int j = 0; j < steps; ++j)
{
accum += grad_weights[j] * weights[j];
}
// accumulation
float T = 1.f;
for (int j = 0; j < steps; ++j)
{
const float delta = ends[j] - starts[j];
const float alpha = 1.f - __expf(-sigmas[j] * delta);
grad_sigmas[j] = (grad_weights[j] * T - accum) * delta;
accum -= grad_weights[j] * weights[j];
T *= (1.f - alpha);
}
return;
}
// template
__global__ void weight_from_alpha_patch_based_forward_kernel(
const uint32_t n_patches,
const uint32_t patch_size,
const int *packed_info, // (n_patches, 2)
const float *alphas, // (n_samples, patch_size, 1)
// outputs
float *weights// ()
){
CUDA_GET_THREAD_ID_2D(i, k, n_patches, patch_size); // i is the patch id, k is the ray id within the patch
// locate
const int base = packed_info[i * 2 + 0]; // get the base of the patch
const int steps = packed_info[i * 2 + 1]; // get the steps of the patch
if (steps == 0)
return;
alphas += base * patch_size; // move the pointer to the base
weights += base * patch_size; // move the pointer to the base
// transmittance += base * patch_size; // move the pointer to the base
// accumulation
float T = 1.f;
for (int j = 0; j < steps; ++j)
{
const uint32_t ray_id = j * patch_size + k;
const float alpha = alphas[ray_id]; // get the alpha value
// transmittance[ray_id] = T;
weights[ray_id] = alpha * T; // calculate the weight
T *= (1.f - alpha); // update the T value
}
return;
}
__global__ void weight_and_transmittance_from_alpha_patch_based_forward_kernel(
const uint32_t n_patches,
const uint32_t patch_size,
const int *packed_info, // (n_patches, 2)
const float *alphas, // (n_samples, patch_size, 1)
// outputs
float *weights,
float *transmittance// ()
){
CUDA_GET_THREAD_ID_2D(i, k, n_patches, patch_size); // i is the patch id, k is the ray id within the patch
// locate
const int base = packed_info[i * 2 + 0]; // get the base of the patch
const int steps = packed_info[i * 2 + 1]; // get the steps of the patch
if (steps == 0)
return;
alphas += base * patch_size; // move the pointer to the base
weights += base * patch_size; // move the pointer to the base
transmittance += base * patch_size; // move the pointer to the base
// accumulation
float T = 1.f;
for (int j = 0; j < steps; ++j)
{
const uint32_t ray_id = j * patch_size + k;
const float alpha = alphas[ray_id]; // get the alpha value
transmittance[ray_id] = T;
weights[ray_id] = alpha * T; // calculate the weight
T *= (1.f - alpha); // update the T value
}
return;
}
__global__ void weight_from_alpha_forward_kernel(
const uint32_t n_rays,
const int *packed_info,
const float *alphas,
// outputs
float *weights)
{
CUDA_GET_THREAD_ID(i, n_rays); // i is the thread id
// locate
const int base = packed_info[i * 2 + 0]; // get the base
const int steps = packed_info[i * 2 + 1]; // get the steps
if (steps == 0)
return;
alphas += base; // move the pointer to the base
weights += base; // move the pointer to the base
// accumulation
float T = 1.f;
for (int j = 0; j < steps; ++j)
{
const float alpha = alphas[j]; // get the alpha value
weights[j] = alpha * T; // calculate the weight
T *= (1.f - alpha); // update the T value
}
return;
}
__global__ void weight_from_alpha_backward_kernel(
const uint32_t n_rays,
const int *packed_info,
const float *alphas,
const float *weights,
const float *grad_weights,
// outputs
float *grad_alphas)
{
CUDA_GET_THREAD_ID(i, n_rays);
// locate
const int base = packed_info[i * 2 + 0];
const int steps = packed_info[i * 2 + 1];
if (steps == 0)
return;
alphas += base;
weights += base;
grad_weights += base;
grad_alphas += base;
float accum = 0;
for (int j = 0; j < steps; ++j)
{
accum += grad_weights[j] * weights[j];
}
// accumulation
float T = 1.f;
for (int j = 0; j < steps; ++j)
{
const float alpha = alphas[j];
grad_alphas[j] = (grad_weights[j] * T - accum) / fmaxf(1.f - alpha, 1e-10f);
accum -= grad_weights[j] * weights[j];
T *= (1.f - alpha);
}
return;
}
__global__ void weight_from_alpha_importance_sampling_forward_kernel(
const uint32_t n_rays,
const int *packed_info,
const float *alphas,
const float *importance,
// outputs
float *weights)
{
CUDA_GET_THREAD_ID(i, n_rays); // i is the thread id
// locate
const int base = packed_info[i * 2 + 0]; // get the base
const int steps = packed_info[i * 2 + 1]; // get the steps
if (steps == 0)
return;
alphas += base; // move the pointer to the base
weights += base; // move the pointer to the base
importance += base; // move the pointer to the base
// accumulation
float T = 1.f;
for (int j = 0; j < steps; ++j)
{
const float alpha = alphas[j]; // get the alpha value
weights[j] = alpha * T / importance[j]; // calculate the weight
T *= (1.f - alpha); // update the T value
}
return;
}
__global__ void weight_from_alpha_importance_sampling_backward_kernel(
const uint32_t n_rays,
const int *packed_info,
const float *alphas,
const float *weights,
const float *grad_weights,
const float *importance,
// outputs
float *grad_alphas)
{
CUDA_GET_THREAD_ID(i, n_rays);
// locate
const int base = packed_info[i * 2 + 0];
const int steps = packed_info[i * 2 + 1];
if (steps == 0)
return;
alphas += base;
weights += base;
grad_weights += base;
grad_alphas += base;
importance += base;
float accum = 0;
for (int j = 0; j < steps; ++j)
{
accum += grad_weights[j] * weights[j];
}
// accumulation
float T = 1.f;
for (int j = 0; j < steps; ++j)
{
const float alpha = alphas[j];
grad_alphas[j] = (grad_weights[j] * T - importance[j] * accum) / (importance[j] * fmaxf(1.f - alpha, 1e-10f));
accum -= grad_weights[j] * weights[j];
T *= (1.f - alpha);
}
return;
}
__global__ void weight_from_alpha_patch_based_backward_kernel(
const uint32_t n_patches,
const uint32_t patch_size,
const int *packed_info,
const float *alphas,
const float *weights,
const float *grad_weights,
// outputs
float *grad_alphas)
{
CUDA_GET_THREAD_ID_2D(i, k, n_patches, patch_size); // i is the patch id, k is the ray id within the patch
// locate
const int base = packed_info[i * 2 + 0];
const int steps = packed_info[i * 2 + 1];
if (steps == 0)
return;
alphas += base * patch_size; // move the pointer to the base
weights += base * patch_size; // move the pointer to the base
grad_weights += base * patch_size; // move the pointer to the base
grad_alphas += base * patch_size; // move the pointer to the base
float accum = 0;
for (int j = 0; j < steps; ++j)
{
const uint32_t sample_idx = j * patch_size + k;
accum += grad_weights[sample_idx] * weights[sample_idx];
}
// accumulation
float T = 1.f;
for (int j = 0; j < steps; ++j)
{
const uint32_t sample_idx = j * patch_size + k;
const float alpha = alphas[sample_idx];
grad_alphas[sample_idx] = (grad_weights[sample_idx] * T - accum) / fmaxf(1.f - alpha, 1e-10f);
accum -= grad_weights[sample_idx] * weights[sample_idx];
T *= (1.f - alpha);
}
return;
}
__global__ void weight_and_transmittance_from_alpha_patch_based_backward_kernel(
const uint32_t n_patches,
const uint32_t patch_size,
const int *packed_info,
const float *alphas,
const float *weights,
const float *grad_weights,
// outputs
float *grad_alphas)
{
CUDA_GET_THREAD_ID_2D(i, k, n_patches, patch_size); // i is the patch id, k is the ray id within the patch
// locate
const int base = packed_info[i * 2 + 0];
const int steps = packed_info[i * 2 + 1];
if (steps == 0)
return;
alphas += base * patch_size; // move the pointer to the base
weights += base * patch_size; // move the pointer to the base
grad_weights += base * patch_size; // move the pointer to the base
grad_alphas += base * patch_size; // move the pointer to the base
float accum = 0;
for (int j = 0; j < steps; ++j)
{
const uint32_t sample_idx = j * patch_size + k;
accum += grad_weights[sample_idx] * weights[sample_idx];
}
// accumulation
float T = 1.f;
for (int j = 0; j < steps; ++j)
{
const uint32_t sample_idx = j * patch_size + k;
const float alpha = alphas[sample_idx];
grad_alphas[sample_idx] = (grad_weights[sample_idx] * T - accum) / fmaxf(1.f - alpha, 1e-10f);
accum -= grad_weights[sample_idx] * weights[sample_idx];
T *= (1.f - alpha);
}
return;
}
torch::Tensor weight_from_sigma_forward_naive(
torch::Tensor packed_info,
torch::Tensor starts,
torch::Tensor ends,
torch::Tensor sigmas)
{
DEVICE_GUARD(packed_info);
CHECK_INPUT(packed_info);
CHECK_INPUT(starts);
CHECK_INPUT(ends);
CHECK_INPUT(sigmas);
TORCH_CHECK(packed_info.ndimension() == 2);
TORCH_CHECK(starts.ndimension() == 2 & starts.size(1) == 1);
TORCH_CHECK(ends.ndimension() == 2 & ends.size(1) == 1);
TORCH_CHECK(sigmas.ndimension() == 2 & sigmas.size(1) == 1);
const uint32_t n_samples = sigmas.size(0);
const uint32_t n_rays = packed_info.size(0);
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
// outputs
torch::Tensor weights = torch::empty_like(sigmas);
weight_from_sigma_forward_kernel<<<
blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
n_rays,
// inputs
packed_info.data_ptr(),
starts.data_ptr(),
ends.data_ptr(),
sigmas.data_ptr(),
// outputs
weights.data_ptr());
return weights;
}
torch::Tensor weight_from_sigma_backward_naive(
torch::Tensor weights,
torch::Tensor grad_weights,
torch::Tensor packed_info,
torch::Tensor starts,
torch::Tensor ends,
torch::Tensor sigmas)
{
DEVICE_GUARD(packed_info);
CHECK_INPUT(weights);
CHECK_INPUT(grad_weights);
CHECK_INPUT(packed_info);
CHECK_INPUT(starts);
CHECK_INPUT(ends);
CHECK_INPUT(sigmas);
TORCH_CHECK(packed_info.ndimension() == 2);
TORCH_CHECK(starts.ndimension() == 2 & starts.size(1) == 1);
TORCH_CHECK(ends.ndimension() == 2 & ends.size(1) == 1);
TORCH_CHECK(sigmas.ndimension() == 2 & sigmas.size(1) == 1);
TORCH_CHECK(weights.ndimension() == 2 & weights.size(1) == 1);
TORCH_CHECK(grad_weights.ndimension() == 2 & grad_weights.size(1) == 1);
const uint32_t n_samples = sigmas.size(0);
const uint32_t n_rays = packed_info.size(0);
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
// outputs
torch::Tensor grad_sigmas = torch::empty_like(sigmas);
weight_from_sigma_backward_kernel<<<
blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
n_rays,
// inputs
packed_info.data_ptr(),
starts.data_ptr(),
ends.data_ptr(),
sigmas.data_ptr(),
weights.data_ptr(),
grad_weights.data_ptr(),
// outputs
grad_sigmas.data_ptr());
return grad_sigmas;
}
torch::Tensor weight_from_alpha_forward_naive(
torch::Tensor packed_info, torch::Tensor alphas)
{
DEVICE_GUARD(packed_info);
CHECK_INPUT(packed_info);
CHECK_INPUT(alphas);
TORCH_CHECK(packed_info.ndimension() == 2);
TORCH_CHECK(alphas.ndimension() == 2 & alphas.size(1) == 1);
const uint32_t n_samples = alphas.size(0);
const uint32_t n_rays = packed_info.size(0);
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
// outputs
torch::Tensor weights = torch::empty_like(alphas);
weight_from_alpha_forward_kernel<<<
blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
n_rays,
// inputs
packed_info.data_ptr(),
alphas.data_ptr(),
// outputs
weights.data_ptr());
return weights;
}
torch::Tensor weight_from_alpha_patch_based_forward_naive(
torch::Tensor packed_info, // (n_patches, 2)
torch::Tensor alphas // (n_samples, patches_size, 1)
)
{
DEVICE_GUARD(packed_info);
CHECK_INPUT(packed_info);
CHECK_INPUT(alphas);
TORCH_CHECK(packed_info.ndimension() == 2);
TORCH_CHECK(alphas.ndimension() == 3 & alphas.size(2) == 1);
const uint32_t n_samples = alphas.size(0);
const uint32_t n_patches = packed_info.size(0);
const uint32_t patch_size = alphas.size(1);
// compute the required number of thread.y from patch size
// take the log2 of patch size and round up to the next power of 2
const uint32_t thread_for_a_patch = pow(2, ceil(log2(patch_size)));
const uint32_t thread_for_n_samples = 256 / thread_for_a_patch;
// convert to uint
// thread_for_a_patch = static_cast(thread_for_a_patch);
// thread_for_n_samples = static_cast(thread_for_n_samples);
const dim3 threads(thread_for_n_samples, thread_for_a_patch);
// const dim3 blocks = CUDA_N_BLOCKS_NEEDED(n_samples, threads);
const dim3 blocks((n_patches+threads.x-1)/threads.x, (patch_size+threads.y-1)/threads.y);
// outputs
torch::Tensor weights = torch::empty_like(alphas);
torch::Tensor transmittance = torch::empty_like(alphas);
weight_from_alpha_patch_based_forward_kernel<<<
blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
n_patches,
patch_size,
// inputs
packed_info.data_ptr(),
alphas.data_ptr(),
// outputs
weights.data_ptr());
return weights;
}
torch::Tensor weight_from_alpha_backward_naive(
torch::Tensor weights,
torch::Tensor grad_weights,
torch::Tensor packed_info,
torch::Tensor alphas)
{
DEVICE_GUARD(packed_info);
CHECK_INPUT(packed_info);
CHECK_INPUT(alphas);
CHECK_INPUT(weights);
CHECK_INPUT(grad_weights);
TORCH_CHECK(packed_info.ndimension() == 2);
TORCH_CHECK(alphas.ndimension() == 2 & alphas.size(1) == 1);
TORCH_CHECK(weights.ndimension() == 2 & weights.size(1) == 1);
TORCH_CHECK(grad_weights.ndimension() == 2 & grad_weights.size(1) == 1);
const uint32_t n_samples = alphas.size(0);
const uint32_t n_rays = packed_info.size(0);
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
// outputs
torch::Tensor grad_alphas = torch::empty_like(alphas);
weight_from_alpha_backward_kernel<<<
blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
n_rays,
// inputs
packed_info.data_ptr(),
alphas.data_ptr(),
weights.data_ptr(),
grad_weights.data_ptr(),
// outputs
grad_alphas.data_ptr());
return grad_alphas;
}
torch::Tensor weight_from_alpha_patch_based_backward_naive(
torch::Tensor weights,
torch::Tensor grad_weights, // (n_samples, patches_size, 1)
torch::Tensor packed_info,
torch::Tensor alphas) // (n_samples, patches_size, 1)
{
DEVICE_GUARD(packed_info);
CHECK_INPUT(packed_info);
CHECK_INPUT(alphas);
CHECK_INPUT(weights);
CHECK_INPUT(grad_weights);
TORCH_CHECK(packed_info.ndimension() == 2);
TORCH_CHECK(alphas.ndimension() == 3 & alphas.size(2) == 1);
TORCH_CHECK(weights.ndimension() == 3 & weights.size(2) == 1);
TORCH_CHECK(grad_weights.ndimension() == 3 & grad_weights.size(2) == 1);
const uint32_t n_samples = alphas.size(0);
const uint32_t n_patches = packed_info.size(0);
const uint32_t patch_size = alphas.size(1);
// compute the required number of thread.y from patch size
// take the log2 of patch size and round up to the next power of 2
const uint32_t thread_for_a_patch = pow(2, ceil(log2(patch_size)));
const uint32_t thread_for_n_samples = 256 / thread_for_a_patch;
const dim3 threads(thread_for_n_samples, thread_for_a_patch);
const dim3 blocks((n_patches+threads.x-1)/threads.x, (patch_size+threads.y-1)/threads.y);
// outputs
torch::Tensor grad_alphas = torch::empty_like(alphas);
weight_from_alpha_patch_based_backward_kernel<<<
blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
n_patches,
patch_size,
// inputs
packed_info.data_ptr(),
alphas.data_ptr(),
weights.data_ptr(),
grad_weights.data_ptr(),
// outputs
grad_alphas.data_ptr());
return grad_alphas;
}
std::vector weight_and_transmittance_from_alpha_patch_based_forward_naive(
torch::Tensor packed_info, // (n_patches, 2)
torch::Tensor alphas // (n_samples, patches_size, 1)
)
{
DEVICE_GUARD(packed_info);
CHECK_INPUT(packed_info);
CHECK_INPUT(alphas);
TORCH_CHECK(packed_info.ndimension() == 2);
TORCH_CHECK(alphas.ndimension() == 3 & alphas.size(2) == 1);
const uint32_t n_samples = alphas.size(0);
const uint32_t n_patches = packed_info.size(0);
const uint32_t patch_size = alphas.size(1);
// compute the required number of thread.y from patch size
// take the log2 of patch size and round up to the next power of 2
const uint32_t thread_for_a_patch = pow(2, ceil(log2(patch_size)));
const uint32_t thread_for_n_samples = 256 / thread_for_a_patch;
const dim3 threads(thread_for_n_samples, thread_for_a_patch);
const dim3 blocks((n_patches+threads.x-1)/threads.x, (patch_size+threads.y-1)/threads.y);
// outputs
torch::Tensor weights = torch::empty_like(alphas);
torch::Tensor transmittance = torch::empty_like(alphas);
weight_and_transmittance_from_alpha_patch_based_forward_kernel<<<
blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
n_patches,
patch_size,
// inputs
packed_info.data_ptr(),
alphas.data_ptr(),
// outputs
weights.data_ptr(),
transmittance.data_ptr());
return {weights, transmittance};
}
torch::Tensor weight_and_transmittance_from_alpha_patch_based_backward_naive(
torch::Tensor weights,
torch::Tensor grad_weights, // (n_samples, patches_size, 1)
torch::Tensor packed_info,
torch::Tensor alphas) // (n_samples, patches_size, 1)
{
DEVICE_GUARD(packed_info);
CHECK_INPUT(packed_info);
CHECK_INPUT(alphas);
CHECK_INPUT(weights);
CHECK_INPUT(grad_weights);
TORCH_CHECK(packed_info.ndimension() == 2);
TORCH_CHECK(alphas.ndimension() == 3 & alphas.size(2) == 1);
TORCH_CHECK(weights.ndimension() == 3 & weights.size(2) == 1);
TORCH_CHECK(grad_weights.ndimension() == 3 & grad_weights.size(2) == 1);
const uint32_t n_samples = alphas.size(0);
const uint32_t n_patches = packed_info.size(0);
const uint32_t patch_size = alphas.size(1);
// compute the required number of thread.y from patch size
// take the log2 of patch size and round up to the next power of 2
const uint32_t thread_for_a_patch = pow(2, ceil(log2(patch_size)));
const uint32_t thread_for_n_samples = 256 / thread_for_a_patch;
const dim3 threads(thread_for_n_samples, thread_for_a_patch);
const dim3 blocks((n_patches+threads.x-1)/threads.x, (patch_size+threads.y-1)/threads.y);
// outputs
torch::Tensor grad_alphas = torch::empty_like(alphas);
weight_and_transmittance_from_alpha_patch_based_backward_kernel<<<
blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
n_patches,
patch_size,
// inputs
packed_info.data_ptr(),
alphas.data_ptr(),
weights.data_ptr(),
grad_weights.data_ptr(),
// outputs
grad_alphas.data_ptr());
return grad_alphas;
}
// torch::Tensor weight_from_alpha_importance_sampling_forward_naive(
// torch::Tensor packed_info, torch::Tensor alphas, torch::Tensor importance_pdfs)
// {
// DEVICE_GUARD(packed_info);
// CHECK_INPUT(packed_info);
// CHECK_INPUT(alphas);
// CHECK_INPUT(importance_pdfs);
// TORCH_CHECK(packed_info.ndimension() == 2);
// TORCH_CHECK(alphas.ndimension() == 2 & alphas.size(1) == 1);
// TORCH_CHECK(importance_pdfs.ndimension() == 2 & importance_pdfs.size(1) == 1);
//
// const uint32_t n_samples = alphas.size(0);
// const uint32_t n_rays = packed_info.size(0);
//
// const int threads = 256;
// const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
//
// // outputs
// torch::Tensor weights = torch::empty_like(alphas);
//
// weight_from_alpha_forward_kernel<<<
// blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
// n_rays,
// // inputs
// packed_info.data_ptr(),
// alphas.data_ptr(),
// importance_pdfs.data_ptr(),
// // outputs
// weights.data_ptr());
// return weights;
// }
//
// torch::Tensor weight_from_alpha_importance_sampling_backward_naive(
// torch::Tensor weights,
// torch::Tensor grad_weights,
// torch::Tensor packed_info,
// torch::Tensor alphas,
// torch::Tensor importance_pdfs)
// {
// DEVICE_GUARD(packed_info);
// CHECK_INPUT(packed_info);
// CHECK_INPUT(alphas);
// CHECK_INPUT(weights);
// CHECK_INPUT(grad_weights);
// CHECK_INPUT(importance_pdfs);
// TORCH_CHECK(packed_info.ndimension() == 2);
// TORCH_CHECK(alphas.ndimension() == 2 & alphas.size(1) == 1);
// TORCH_CHECK(weights.ndimension() == 2 & weights.size(1) == 1);
// TORCH_CHECK(importance_pdfs.ndimension() == 2 & importance_pdfs.size(1) == 1);
// TORCH_CHECK(grad_weights.ndimension() == 2 & grad_weights.size(1) == 1);
//
//
// const uint32_t n_samples = alphas.size(0);
// const uint32_t n_rays = packed_info.size(0);
//
// const int threads = 256;
// const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
//
// // outputs
// torch::Tensor grad_alphas = torch::empty_like(alphas);
//
// weight_from_alpha_backward_kernel<<<
// blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
// n_rays,
// // inputs
// packed_info.data_ptr(),
// alphas.data_ptr(),
// weights.data_ptr(),
// grad_weights.data_ptr(),
// importance_pdfs.data_ptr(),
// // outputs
// grad_alphas.data_ptr());
// return grad_alphas;
// }
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/grid.py
================================================
"""
Copyright (c) 2022 Ruilong Li @ UC Berkeley
"""
from typing import Callable, List, Union
import torch
import torch.nn as nn
import nerfacc.cuda as _C
from .contraction import ContractionType, contract_inv
# TODO: check torch.scatter_reduce_
# from torch_scatter import scatter_max
@torch.no_grad()
def query_grid(
samples: torch.Tensor,
grid_roi: torch.Tensor,
grid_values: torch.Tensor,
grid_type: ContractionType,
):
"""Query grid values given coordinates.
Args:
samples: (n_samples, 3) tensor of coordinates.
grid_roi: (6,) region of interest of the grid. Usually it should be
accquired from the grid itself using `grid.roi_aabb`.
grid_values: A 3D tensor of grid values in the shape of (resx, resy, resz).
grid_type: Contraction type of the grid. Usually it should be
accquired from the grid itself using `grid.contraction_type`.
Returns:
(n_samples) values for those samples queried from the grid.
"""
assert samples.dim() == 2 and samples.size(-1) == 3
assert grid_roi.dim() == 1 and grid_roi.size(0) == 6
assert grid_values.dim() == 3
assert isinstance(grid_type, ContractionType)
return _C.grid_query(
samples.contiguous(),
grid_roi.contiguous(),
grid_values.contiguous(),
grid_type.to_cpp_version(),
)
class Grid(nn.Module):
"""An abstract Grid class.
The grid is used as a cache of the 3D space to indicate whether each voxel
area is important or not for the differentiable rendering process. The
ray marching function (see :func:`nerfacc.ray_marching`) would use the
grid to skip the unimportant voxel areas.
To work with :func:`nerfacc.ray_marching`, three attributes must exist:
- :attr:`roi_aabb`: The axis-aligned bounding box of the region of interest.
- :attr:`binary`: A 3D binarized tensor of shape {resx, resy, resz}, \
with torch.bool data type.
- :attr:`contraction_type`: The contraction type of the grid, indicating how \
the 3D space is mapped to the grid.
"""
def __init__(self, *args, **kwargs):
super().__init__()
self.register_buffer("_dummy", torch.empty(0), persistent=False)
@property
def device(self) -> torch.device:
return self._dummy.device
@property
def roi_aabb(self) -> torch.Tensor:
"""The axis-aligned bounding box of the region of interest.
Its is a shape (6,) tensor in the format of {minx, miny, minz, maxx, maxy, maxz}.
"""
if hasattr(self, "_roi_aabb"):
return getattr(self, "_roi_aabb")
else:
raise NotImplementedError("please set an attribute named _roi_aabb")
@property
def binary(self) -> torch.Tensor:
"""A 3D binarized tensor with torch.bool data type.
The tensor is of shape (resx, resy, resz), in which each boolen value
represents whether the corresponding voxel should be kept or not.
"""
if hasattr(self, "_binary"):
return getattr(self, "_binary")
else:
raise NotImplementedError("please set an attribute named _binary")
@property
def contraction_type(self) -> ContractionType:
"""The contraction type of the grid.
The contraction type is an indicator of how the 3D space is contracted
to this voxel grid. See :class:`nerfacc.ContractionType` for more details.
"""
if hasattr(self, "_contraction_type"):
return getattr(self, "_contraction_type")
else:
raise NotImplementedError(
"please set an attribute named _contraction_type"
)
class OccupancyGrid(Grid):
"""Occupancy grid: whether each voxel area is occupied or not.
Args:
roi_aabb: The axis-aligned bounding box of the region of interest. Useful for mapping
the 3D space to the grid.
resolution: The resolution of the grid. If an integer is given, the grid is assumed to
be a cube. Otherwise, a list or a tensor of shape (3,) is expected. Default: 128.
contraction_type: The contraction type of the grid. See :class:`nerfacc.ContractionType`
for more details. Default: :attr:`nerfacc.ContractionType.AABB`.
"""
NUM_DIM: int = 3
def __init__(
self,
roi_aabb: Union[List[int], torch.Tensor],
resolution: Union[int, List[int], torch.Tensor] = 128,
contraction_type: ContractionType = ContractionType.AABB,
) -> None:
super().__init__()
if isinstance(resolution, int):
resolution = [resolution] * self.NUM_DIM
if isinstance(resolution, (list, tuple)):
resolution = torch.tensor(resolution, dtype=torch.int32)
assert isinstance(
resolution, torch.Tensor
), f"Invalid type: {type(resolution)}"
assert resolution.shape == (
self.NUM_DIM,
), f"Invalid shape: {resolution.shape}"
if isinstance(roi_aabb, (list, tuple)):
roi_aabb = torch.tensor(roi_aabb, dtype=torch.float32)
assert isinstance(
roi_aabb, torch.Tensor
), f"Invalid type: {type(roi_aabb)}"
assert roi_aabb.shape == torch.Size(
[self.NUM_DIM * 2]
), f"Invalid shape: {roi_aabb.shape}"
# total number of voxels
self.num_cells = int(resolution.prod().item())
# required attributes
self.register_buffer("_roi_aabb", roi_aabb)
self.register_buffer(
"_binary", torch.zeros(resolution.tolist(), dtype=torch.bool)
)
self._contraction_type = contraction_type
# helper attributes
self.register_buffer("resolution", resolution)
self.register_buffer("occs", torch.zeros(self.num_cells))
# Grid coords & indices
grid_coords = _meshgrid3d(resolution).reshape(
self.num_cells, self.NUM_DIM
)
self.register_buffer("grid_coords", grid_coords, persistent=False)
grid_indices = torch.arange(self.num_cells)
self.register_buffer("grid_indices", grid_indices, persistent=False)
@torch.no_grad()
def _get_all_cells(self) -> torch.Tensor:
"""Returns all cells of the grid."""
return self.grid_indices
@torch.no_grad()
def _sample_uniform_and_occupied_cells(self, n: int) -> torch.Tensor:
"""Samples both n uniform and occupied cells."""
uniform_indices = torch.randint(
self.num_cells, (n,), device=self.device
)
occupied_indices = torch.nonzero(self._binary.flatten())[:, 0]
if n < len(occupied_indices):
selector = torch.randint(
len(occupied_indices), (n,), device=self.device
)
occupied_indices = occupied_indices[selector]
indices = torch.cat([uniform_indices, occupied_indices], dim=0)
return indices
@torch.no_grad()
def _update(
self,
step: int,
occ_eval_fn: Callable,
occ_thre: float = 0.01,
ema_decay: float = 0.95,
warmup_steps: int = 256,
) -> None:
"""Update the occ field in the EMA way."""
# sample cells
if step < warmup_steps:
indices = self._get_all_cells()
else:
N = self.num_cells // 4
indices = self._sample_uniform_and_occupied_cells(N)
# infer occupancy: density * step_size
grid_coords = self.grid_coords[indices]
x = (
grid_coords + torch.rand_like(grid_coords, dtype=torch.float32)
) / self.resolution
if self._contraction_type == ContractionType.UN_BOUNDED_SPHERE:
# only the points inside the sphere are valid
mask = (x - 0.5).norm(dim=1) < 0.5
x = x[mask]
indices = indices[mask]
# voxel coordinates [0, 1]^3 -> world
x = contract_inv(
x,
roi=self._roi_aabb,
type=self._contraction_type,
)
occ = occ_eval_fn(x).squeeze(-1)
# ema update
self.occs[indices] = torch.maximum(self.occs[indices] * ema_decay, occ)
# suppose to use scatter max but emperically it is almost the same.
# self.occs, _ = scatter_max(
# occ, indices, dim=0, out=self.occs * ema_decay
# )
self._binary = (
self.occs > torch.clamp(self.occs.mean(), max=occ_thre)
).view(self._binary.shape)
@torch.no_grad()
def every_n_step(
self,
step: int,
occ_eval_fn: Callable,
occ_thre: float = 1e-2,
ema_decay: float = 0.95,
warmup_steps: int = 256,
n: int = 16,
) -> None:
"""Update the grid every n steps during training.
Args:
step: Current training step.
occ_eval_fn: A function that takes in sample locations :math:`(N, 3)` and
returns the occupancy values :math:`(N, 1)` at those locations.
occ_thre: Threshold used to binarize the occupancy grid. Default: 1e-2.
ema_decay: The decay rate for EMA updates. Default: 0.95.
warmup_steps: Sample all cells during the warmup stage. After the warmup
stage we change the sampling strategy to 1/4 uniformly sampled cells
together with 1/4 occupied cells. Default: 256.
n: Update the grid every n steps. Default: 16.
"""
if not self.training:
raise RuntimeError(
"You should only call this function only during training. "
"Please call _update() directly if you want to update the "
"field during inference."
)
if step % n == 0 and self.training:
self._update(
step=step,
occ_eval_fn=occ_eval_fn,
occ_thre=occ_thre,
ema_decay=ema_decay,
warmup_steps=warmup_steps,
)
@torch.no_grad()
def query_occ(self, samples: torch.Tensor) -> torch.Tensor:
"""Query the occupancy field at the given samples.
Args:
samples: Samples in the world coordinates. (n_samples, 3)
Returns:
Occupancy values at the given samples. (n_samples,)
"""
return query_grid(
samples,
self._roi_aabb,
self.binary,
self.contraction_type,
)
def _meshgrid3d(
res: torch.Tensor, device: Union[torch.device, str] = "cpu"
) -> torch.Tensor:
"""Create 3D grid coordinates."""
assert len(res) == 3
res = res.tolist()
return torch.stack(
torch.meshgrid(
[
torch.arange(res[0], dtype=torch.long),
torch.arange(res[1], dtype=torch.long),
torch.arange(res[2], dtype=torch.long),
],
indexing="ij",
),
dim=-1,
).to(device)
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/intersection.py
================================================
"""
Copyright (c) 2022 Ruilong Li, UC Berkeley.
"""
from typing import Tuple
import torch
from torch import Tensor
import nerfacc.cuda as _C
@torch.no_grad()
def ray_aabb_intersect(
rays_o: Tensor, rays_d: Tensor, aabb: Tensor
) -> Tuple[Tensor, Tensor]:
"""Ray AABB Test.
Note:
this function is not differentiable to any inputs.
Args:
rays_o: Ray origins of shape (n_rays, 3).
rays_d: Normalized ray directions of shape (n_rays, 3).
aabb: Scene bounding box {xmin, ymin, zmin, xmax, ymax, zmax}. \
Tensor with shape (6)
Returns:
Ray AABB intersection {t_min, t_max} with shape (n_rays) respectively. \
Note the t_min is clipped to minimum zero. 1e10 means no intersection.
Examples:
.. code-block:: python
aabb = torch.tensor([0.0, 0.0, 0.0, 1.0, 1.0, 1.0], device="cuda:0")
rays_o = torch.rand((128, 3), device="cuda:0")
rays_d = torch.randn((128, 3), device="cuda:0")
rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
t_min, t_max = ray_aabb_intersect(rays_o, rays_d, aabb)
"""
if rays_o.is_cuda and rays_d.is_cuda and aabb.is_cuda:
rays_o = rays_o.contiguous()
rays_d = rays_d.contiguous()
aabb = aabb.contiguous()
t_min, t_max = _C.ray_aabb_intersect(rays_o, rays_d, aabb)
else:
raise NotImplementedError("Only support cuda inputs.")
return t_min, t_max
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/losses.py
================================================
from torch import Tensor
from .pack import unpack_data
def distortion(
packed_info: Tensor, weights: Tensor, t_starts: Tensor, t_ends: Tensor
) -> Tensor:
"""Distortion loss from Mip-NeRF 360 paper, Equ. 15.
Args:
packed_info: Packed info for the samples. (n_rays, 2)
weights: Weights for the samples. (all_samples,)
t_starts: Per-sample start distance. Tensor with shape (all_samples, 1).
t_ends: Per-sample end distance. Tensor with shape (all_samples, 1).
Returns:
Distortion loss. (n_rays,)
"""
# (all_samples, 1) -> (n_rays, n_samples)
w = unpack_data(packed_info, weights[..., None]).squeeze(-1)
t1 = unpack_data(packed_info, t_starts).squeeze(-1)
t2 = unpack_data(packed_info, t_ends).squeeze(-1)
interval = t2 - t1
tmid = (t1 + t2) / 2
loss_uni = (1 / 3) * (interval * w.pow(2)).sum(-1)
ww = w.unsqueeze(-1) * w.unsqueeze(-2)
mm = (tmid.unsqueeze(-1) - tmid.unsqueeze(-2)).abs()
loss_bi = (ww * mm).sum((-1, -2))
return loss_uni + loss_bi
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/pack.py
================================================
"""
Copyright (c) 2022 Ruilong Li, UC Berkeley.
"""
from typing import Optional, Tuple
import torch
from torch import Tensor
import nerfacc.cuda as _C
def pack_data(data: Tensor, mask: Tensor) -> Tuple[Tensor, Tensor]:
"""Pack per-ray data (n_rays, n_samples, D) to (all_samples, D) based on mask.
Args:
data: Tensor with shape (n_rays, n_samples, D).
mask: Boolen tensor with shape (n_rays, n_samples).
Returns:
Tuple of Tensors including packed data (all_samples, D), \
and packed_info (n_rays, 2) which stores the start index of the sample,
and the number of samples kept for each ray. \
Examples:
.. code-block:: python
data = torch.rand((10, 3, 4), device="cuda:0")
mask = data.rand((10, 3), dtype=torch.bool, device="cuda:0")
packed_data, packed_info = pack(data, mask)
print(packed_data.shape, packed_info.shape)
"""
assert data.dim() == 3, "data must be with shape of (n_rays, n_samples, D)."
assert (
mask.shape == data.shape[:2]
), "mask must be with shape of (n_rays, n_samples)."
assert mask.dtype == torch.bool, "mask must be a boolean tensor."
packed_data = data[mask]
num_steps = mask.sum(dim=-1, dtype=torch.int32)
cum_steps = num_steps.cumsum(dim=0, dtype=torch.int32)
packed_info = torch.stack([cum_steps - num_steps, num_steps], dim=-1)
return packed_data, packed_info
@torch.no_grad()
def pack_info(ray_indices: Tensor, n_rays: int = None) -> Tensor:
"""Pack `ray_indices` to `packed_info`. Useful for converting per sample data to per ray data.
Note:
this function is not differentiable to any inputs.
Args:
ray_indices: Ray index of each sample. LongTensor with shape (n_sample).
Returns:
packed_info: Stores information on which samples belong to the same ray. \
See :func:`nerfacc.ray_marching` for details. IntTensor with shape (n_rays, 2).
"""
assert (
ray_indices.dim() == 1
), "ray_indices must be a 1D tensor with shape (n_samples)."
if ray_indices.is_cuda:
ray_indices = ray_indices
device = ray_indices.device
if n_rays is None:
n_rays = int(ray_indices.max()) + 1
# else:
# assert n_rays > ray_indices.max()
src = torch.ones_like(ray_indices, dtype=torch.int)
num_steps = torch.zeros((n_rays,), device=device, dtype=torch.int)
num_steps.scatter_add_(0, ray_indices, src)
cum_steps = num_steps.cumsum(dim=0, dtype=torch.int)
packed_info = torch.stack([cum_steps - num_steps, num_steps], dim=-1)
else:
raise NotImplementedError("Only support cuda inputs.")
return packed_info
@torch.no_grad()
def unpack_info(packed_info: Tensor, n_samples: int) -> Tensor:
"""Unpack `packed_info` to `ray_indices`. Useful for converting per ray data to per sample data.
Note:
this function is not differentiable to any inputs.
Args:
packed_info: Stores information on which samples belong to the same ray. \
See :func:`nerfacc.ray_marching` for details. IntTensor with shape (n_rays, 2).
n_samples: Total number of samples.
Returns:
Ray index of each sample. LongTensor with shape (n_sample).
Examples:
.. code-block:: python
rays_o = torch.rand((128, 3), device="cuda:0")
rays_d = torch.randn((128, 3), device="cuda:0")
rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
# Ray marching with near far plane.
packed_info, t_starts, t_ends = ray_marching(
rays_o, rays_d, near_plane=0.1, far_plane=1.0, render_step_size=1e-3
)
# torch.Size([128, 2]) torch.Size([115200, 1]) torch.Size([115200, 1])
print(packed_info.shape, t_starts.shape, t_ends.shape)
# Unpack per-ray info to per-sample info.
ray_indices = unpack_info(packed_info, t_starts.shape[0])
# torch.Size([115200]) torch.int64
print(ray_indices.shape, ray_indices.dtype)
"""
assert (
packed_info.dim() == 2 and packed_info.shape[-1] == 2
), "packed_info must be a 2D tensor with shape (n_rays, 2)."
if packed_info.is_cuda:
ray_indices = _C.unpack_info(packed_info.contiguous(), n_samples)
else:
raise NotImplementedError("Only support cuda inputs.")
return ray_indices
def unpack_data(
packed_info: Tensor,
data: Tensor,
n_samples: Optional[int] = None,
) -> Tensor:
"""Unpack packed data (all_samples, D) to per-ray data (n_rays, n_samples, D).
Args:
packed_info (Tensor): Stores information on which samples belong to the same ray. \
See :func:`nerfacc.ray_marching` for details. Tensor with shape (n_rays, 2).
data: Packed data to unpack. Tensor with shape (n_samples, D).
n_samples (int): Optional Number of samples per ray. If not provided, it \
will be inferred from the packed_info.
Returns:
Unpacked data (n_rays, n_samples, D).
Examples:
.. code-block:: python
rays_o = torch.rand((128, 3), device="cuda:0")
rays_d = torch.randn((128, 3), device="cuda:0")
rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
# Ray marching with aabb.
scene_aabb = torch.tensor([0.0, 0.0, 0.0, 1.0, 1.0, 1.0], device="cuda:0")
packed_info, t_starts, t_ends = ray_marching(
rays_o, rays_d, scene_aabb=scene_aabb, render_step_size=1e-2
)
print(t_starts.shape) # torch.Size([all_samples, 1])
t_starts = unpack_data(packed_info, t_starts, n_samples=1024)
print(t_starts.shape) # torch.Size([128, 1024, 1])
"""
assert (
packed_info.dim() == 2 and packed_info.shape[-1] == 2
), "packed_info must be a 2D tensor with shape (n_rays, 2)."
assert (
data.dim() == 2
), "data must be a 2D tensor with shape (n_samples, D)."
if n_samples is None:
n_samples = packed_info[:, 1].max().item()
return _UnpackData.apply(packed_info, data, n_samples)
class _UnpackData(torch.autograd.Function):
"""Unpack packed data (all_samples, D) to per-ray data (n_rays, n_samples, D)."""
@staticmethod
def forward(ctx, packed_info: Tensor, data: Tensor, n_samples: int):
# shape of the data should be (all_samples, D)
packed_info = packed_info.contiguous()
data = data.contiguous()
if ctx.needs_input_grad[1]:
ctx.save_for_backward(packed_info)
ctx.n_samples = n_samples
return _C.unpack_data(packed_info, data, n_samples)
@staticmethod
def backward(ctx, grad: Tensor):
# shape of the grad should be (n_rays, n_samples, D)
packed_info = ctx.saved_tensors[0]
n_samples = ctx.n_samples
mask = _C.unpack_info_to_mask(packed_info, n_samples)
packed_grad = grad[mask].contiguous()
return None, packed_grad, None
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/ray_marching.py
================================================
from typing import Callable, Optional, Tuple
import torch
import nerfacc.cuda as _C
from .contraction import ContractionType
from .grid import Grid
from .intersection import ray_aabb_intersect
from .vol_rendering import render_visibility
@torch.no_grad()
def ray_marching(
# rays
rays_o: torch.Tensor,
rays_d: torch.Tensor,
t_min: Optional[torch.Tensor] = None,
t_max: Optional[torch.Tensor] = None,
# bounding box of the scene
scene_aabb: Optional[torch.Tensor] = None,
# binarized grid for skipping empty space
grid: Optional[Grid] = None,
# sigma/alpha function for skipping invisible space
sigma_fn: Optional[Callable] = None,
alpha_fn: Optional[Callable] = None,
early_stop_eps: float = 1e-4,
alpha_thre: float = 0.0,
# rendering options
near_plane: Optional[float] = None,
far_plane: Optional[float] = None,
render_step_size: float = 1e-3,
stratified: bool = False,
cone_angle: float = 0.0,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
"""Ray marching with space skipping.
Note:
The logic for computing `t_min` and `t_max`:
1. If `t_min` and `t_max` are given, use them with highest priority.
2. If `t_min` and `t_max` are not given, but `scene_aabb` is given, use \
:func:`ray_aabb_intersect` to compute `t_min` and `t_max`.
3. If `t_min` and `t_max` are not given, and `scene_aabb` is not given, \
set `t_min` to 0.0, and `t_max` to 1e10. (the case of unbounded scene)
4. Always clip `t_min` with `near_plane` and `t_max` with `far_plane` if given.
Warning:
This function is not differentiable to any inputs.
Args:
rays_o: Ray origins of shape (n_rays, 3).
rays_d: Normalized ray directions of shape (n_rays, 3).
t_min: Optional. Per-ray minimum distance. Tensor with shape (n_rays).
t_max: Optional. Per-ray maximum distance. Tensor with shape (n_rays).
scene_aabb: Optional. Scene bounding box for computing t_min and t_max.
A tensor with shape (6,) {xmin, ymin, zmin, xmax, ymax, zmax}.
`scene_aabb` will be ignored if both `t_min` and `t_max` are provided.
grid: Optional. Grid that idicates where to skip during marching.
See :class:`nerfacc.Grid` for details.
sigma_fn: Optional. If provided, the marching will skip the invisible space
by evaluating the density along the ray with `sigma_fn`. It should be a
function that takes in samples {t_starts (N, 1), t_ends (N, 1),
ray indices (N,)} and returns the post-activation density values (N, 1).
You should only provide either `sigma_fn` or `alpha_fn`.
alpha_fn: Optional. If provided, the marching will skip the invisible space
by evaluating the density along the ray with `alpha_fn`. It should be a
function that takes in samples {t_starts (N, 1), t_ends (N, 1),
ray indices (N,)} and returns the post-activation opacity values (N, 1).
You should only provide either `sigma_fn` or `alpha_fn`.
early_stop_eps: Early stop threshold for skipping invisible space. Default: 1e-4.
alpha_thre: Alpha threshold for skipping empty space. Default: 0.0.
near_plane: Optional. Near plane distance. If provided, it will be used
to clip t_min.
far_plane: Optional. Far plane distance. If provided, it will be used
to clip t_max.
render_step_size: Step size for marching. Default: 1e-3.
stratified: Whether to use stratified sampling. Default: False.
cone_angle: Cone angle for linearly-increased step size. 0. means
constant step size. Default: 0.0.
Returns:
A tuple of tensors.
- **ray_indices**: Ray index of each sample. IntTensor with shape (n_samples).
- **t_starts**: Per-sample start distance. Tensor with shape (n_samples, 1).
- **t_ends**: Per-sample end distance. Tensor with shape (n_samples, 1).
Examples:
.. code-block:: python
import torch
from nerfacc import OccupancyGrid, ray_marching, unpack_info
device = "cuda:0"
batch_size = 128
rays_o = torch.rand((batch_size, 3), device=device)
rays_d = torch.randn((batch_size, 3), device=device)
rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
# Ray marching with near far plane.
ray_indices, t_starts, t_ends = ray_marching(
rays_o, rays_d, near_plane=0.1, far_plane=1.0, render_step_size=1e-3
)
# Ray marching with aabb.
scene_aabb = torch.tensor([0.0, 0.0, 0.0, 1.0, 1.0, 1.0], device=device)
ray_indices, t_starts, t_ends = ray_marching(
rays_o, rays_d, scene_aabb=scene_aabb, render_step_size=1e-3
)
# Ray marching with per-ray t_min and t_max.
t_min = torch.zeros((batch_size,), device=device)
t_max = torch.ones((batch_size,), device=device)
ray_indices, t_starts, t_ends = ray_marching(
rays_o, rays_d, t_min=t_min, t_max=t_max, render_step_size=1e-3
)
# Ray marching with aabb and skip areas based on occupancy grid.
scene_aabb = torch.tensor([0.0, 0.0, 0.0, 1.0, 1.0, 1.0], device=device)
grid = OccupancyGrid(roi_aabb=[0.0, 0.0, 0.0, 0.5, 0.5, 0.5]).to(device)
ray_indices, t_starts, t_ends = ray_marching(
rays_o, rays_d, scene_aabb=scene_aabb, grid=grid, render_step_size=1e-3
)
# Convert t_starts and t_ends to sample locations.
t_mid = (t_starts + t_ends) / 2.0
sample_locs = rays_o[ray_indices] + t_mid * rays_d[ray_indices]
"""
if not rays_o.is_cuda:
raise NotImplementedError("Only support cuda inputs.")
if alpha_fn is not None and sigma_fn is not None:
raise ValueError(
"Only one of `alpha_fn` and `sigma_fn` should be provided."
)
# logic for t_min and t_max:
# 1. if t_min and t_max are given, use them with highest priority.
# 2. if t_min and t_max are not given, but scene_aabb is given, use
# ray_aabb_intersect to compute t_min and t_max.
# 3. if t_min and t_max are not given, and scene_aabb is not given,
# set t_min to 0.0, and t_max to 1e10. (the case of unbounded scene)
# 4. always clip t_min with near_plane and t_max with far_plane if given.
if t_min is None or t_max is None:
if scene_aabb is not None:
t_min, t_max = ray_aabb_intersect(rays_o, rays_d, scene_aabb)
else:
t_min = torch.zeros_like(rays_o[..., 0])
t_max = torch.ones_like(rays_o[..., 0]) * 1e10
if near_plane is not None:
t_min = torch.clamp(t_min, min=near_plane)
if far_plane is not None:
t_max = torch.clamp(t_max, max=far_plane)
# stratified sampling: prevent overfitting during training
if stratified:
t_min = t_min + torch.rand_like(t_min) * render_step_size
# use grid for skipping if given
if grid is not None:
grid_roi_aabb = grid.roi_aabb
grid_binary = grid.binary
contraction_type = grid.contraction_type.to_cpp_version()
else:
grid_roi_aabb = torch.tensor(
[-1e10, -1e10, -1e10, 1e10, 1e10, 1e10],
dtype=torch.float32,
device=rays_o.device,
)
grid_binary = torch.ones(
[1, 1, 1], dtype=torch.bool, device=rays_o.device
)
contraction_type = ContractionType.AABB.to_cpp_version()
# marching with grid-based skipping
packed_info, ray_indices, t_starts, t_ends = _C.ray_marching(
# rays
rays_o.contiguous(),
rays_d.contiguous(),
t_min.contiguous(),
t_max.contiguous(),
# coontraction and grid
grid_roi_aabb.contiguous(),
grid_binary.contiguous(),
contraction_type,
# sampling
render_step_size,
cone_angle,
)
# skip invisible space
if sigma_fn is not None or alpha_fn is not None:
# Query sigma without gradients
if sigma_fn is not None:
sigmas = sigma_fn(t_starts, t_ends, ray_indices)
assert (
sigmas.shape == t_starts.shape
), "sigmas must have shape of (N, 1)! Got {}".format(sigmas.shape)
alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts))
elif alpha_fn is not None:
alphas = alpha_fn(t_starts, t_ends, ray_indices)
assert (
alphas.shape == t_starts.shape
), "alphas must have shape of (N, 1)! Got {}".format(alphas.shape)
# Compute visibility of the samples, and filter out invisible samples
masks = render_visibility(
alphas,
ray_indices=ray_indices,
packed_info=packed_info,
early_stop_eps=early_stop_eps,
alpha_thre=alpha_thre,
n_rays=rays_o.shape[0],
)
ray_indices, t_starts, t_ends = (
ray_indices[masks],
t_starts[masks],
t_ends[masks],
)
return ray_indices, t_starts, t_ends
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/sampling.py
================================================
import math
from typing import Callable, Optional, Tuple, Union, overload
import torch
import nerfacc.cuda as _C
from .cdf import ray_resampling
from .grid import Grid
from .pack import pack_info, unpack_info
from .vol_rendering import (
render_transmittance_from_alpha,
render_weight_from_density,
)
@overload
def sample_along_rays(
rays_o: torch.Tensor, # [n_rays, 3]
rays_d: torch.Tensor, # [n_rays, 3]
t_min: torch.Tensor, # [n_rays,]
t_max: torch.Tensor, # [n_rays,]
step_size: float,
cone_angle: float = 0.0,
grid: Optional[Grid] = None,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
"""Sample along rays with per-ray min max."""
...
@overload
def sample_along_rays(
rays_o: torch.Tensor, # [n_rays, 3]
rays_d: torch.Tensor, # [n_rays, 3]
t_min: float,
t_max: float,
step_size: float,
cone_angle: float = 0.0,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
"""Sample along rays with near far plane."""
...
@torch.no_grad()
def sample_along_rays(
rays_o: torch.Tensor, # [n_rays, 3]
rays_d: torch.Tensor, # [n_rays, 3]
t_min: Union[float, torch.Tensor], # [n_rays,]
t_max: Union[float, torch.Tensor], # [n_rays,]
step_size: float,
cone_angle: float = 0.0,
grid: Optional[Grid] = None,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
"""Sample intervals along rays."""
if isinstance(t_min, float) and isinstance(t_max, float):
n_rays = rays_o.shape[0]
device = rays_o.device
num_steps = math.floor((t_max - t_min) / step_size)
t_starts = (
(t_min + torch.arange(0, num_steps, device=device) * step_size)
.expand(n_rays, -1)
.reshape(-1, 1)
)
t_ends = t_starts + step_size
ray_indices = torch.arange(0, n_rays, device=device).repeat_interleave(
num_steps, dim=0
)
else:
if grid is None:
packed_info, ray_indices, t_starts, t_ends = _C.ray_marching(
# rays
t_min.contiguous(),
t_max.contiguous(),
# sampling
step_size,
cone_angle,
)
else:
(
packed_info,
ray_indices,
t_starts,
t_ends,
) = _C.ray_marching_with_grid(
# rays
rays_o.contiguous(),
rays_d.contiguous(),
t_min.contiguous(),
t_max.contiguous(),
# coontraction and grid
grid.roi_aabb.contiguous(),
grid.binary.contiguous(),
grid.contraction_type.to_cpp_version(),
# sampling
step_size,
cone_angle,
)
return ray_indices, t_starts, t_ends
@torch.no_grad()
def proposal_sampling_with_filter(
t_starts: torch.Tensor, # [n_samples, 1]
t_ends: torch.Tensor, # [n_samples, 1]
ray_indices: torch.Tensor, # [n_samples,]
n_rays: Optional[int] = None,
# compute density of samples: {t_starts, t_ends, ray_indices} -> density
sigma_fn: Optional[Callable] = None,
# proposal density fns: {t_starts, t_ends, ray_indices} -> density
proposal_sigma_fns: Tuple[Callable, ...] = [],
proposal_n_samples: Tuple[int, ...] = [],
proposal_require_grads: bool = False,
# acceleration options
early_stop_eps: float = 1e-4,
alpha_thre: float = 0.0,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
"""Hueristic marching with proposal fns."""
assert len(proposal_sigma_fns) == len(proposal_n_samples), (
"proposal_sigma_fns and proposal_n_samples must have the same length, "
f"but got {len(proposal_sigma_fns)} and {len(proposal_n_samples)}."
)
if n_rays is None:
n_rays = ray_indices.max() + 1
# compute density from proposal fns
proposal_samples = []
for proposal_fn, n_samples in zip(proposal_sigma_fns, proposal_n_samples):
# compute weights for resampling
sigmas = proposal_fn(t_starts, t_ends, ray_indices)
assert (
sigmas.shape == t_starts.shape
), "sigmas must have shape of (N, 1)! Got {}".format(sigmas.shape)
alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts))
transmittance = render_transmittance_from_alpha(
alphas, ray_indices=ray_indices, n_rays=n_rays
)
weights = alphas * transmittance
# Compute visibility for filtering
if alpha_thre > 0 or early_stop_eps > 0:
vis = (alphas >= alpha_thre) & (transmittance >= early_stop_eps)
vis = vis.squeeze(-1)
ray_indices, t_starts, t_ends, weights = (
ray_indices[vis],
t_starts[vis],
t_ends[vis],
weights[vis],
)
packed_info = pack_info(ray_indices, n_rays=n_rays)
# Rerun the proposal function **with** gradients on filtered samples.
if proposal_require_grads:
with torch.enable_grad():
sigmas = proposal_fn(t_starts, t_ends, ray_indices)
weights = render_weight_from_density(
t_starts, t_ends, sigmas, ray_indices=ray_indices
)
proposal_samples.append(
(packed_info, t_starts, t_ends, weights)
)
# resampling on filtered samples
packed_info, t_starts, t_ends = ray_resampling(
packed_info, t_starts, t_ends, weights, n_samples=n_samples
)
ray_indices = unpack_info(packed_info, t_starts.shape[0])
# last round filtering with sigma_fn
if (alpha_thre > 0 or early_stop_eps > 0) and (sigma_fn is not None):
sigmas = sigma_fn(t_starts, t_ends, ray_indices)
assert (
sigmas.shape == t_starts.shape
), "sigmas must have shape of (N, 1)! Got {}".format(sigmas.shape)
alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts))
transmittance = render_transmittance_from_alpha(
alphas, ray_indices=ray_indices, n_rays=n_rays
)
vis = (alphas >= alpha_thre) & (transmittance >= early_stop_eps)
vis = vis.squeeze(-1)
ray_indices, t_starts, t_ends = (
ray_indices[vis],
t_starts[vis],
t_ends[vis],
)
return ray_indices, t_starts, t_ends, proposal_samples
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/version.py
================================================
"""
Copyright (c) 2022 Ruilong Li, UC Berkeley.
"""
__version__ = "0.3.5"
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/vol_rendering.py
================================================
"""
Copyright (c) 2022 Ruilong Li, UC Berkeley.
"""
from typing import Callable, Optional, Tuple
import torch
from torch import Tensor
import nerfacc.cuda as _C
from .pack import pack_info
def rendering(
# ray marching results
t_starts: torch.Tensor,
t_ends: torch.Tensor,
ray_indices: torch.Tensor,
n_rays: int,
# radiance field
rgb_sigma_fn: Optional[Callable] = None,
rgb_alpha_fn: Optional[Callable] = None,
# rendering options
render_bkgd: Optional[torch.Tensor] = None,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
"""Render the rays through the radience field defined by `rgb_sigma_fn`.
This function is differentiable to the outputs of `rgb_sigma_fn` so it can
be used for gradient-based optimization.
Note:
Either `rgb_sigma_fn` or `rgb_alpha_fn` should be provided.
Warning:
This function is not differentiable to `t_starts`, `t_ends` and `ray_indices`.
Args:
t_starts: Per-sample start distance. Tensor with shape (n_samples, 1).
t_ends: Per-sample end distance. Tensor with shape (n_samples, 1).
ray_indices: Ray index of each sample. IntTensor with shape (n_samples).
n_rays: Total number of rays. This will decide the shape of the ouputs.
rgb_sigma_fn: A function that takes in samples {t_starts (N, 1), t_ends (N, 1), \
ray indices (N,)} and returns the post-activation rgb (N, 3) and density \
values (N, 1).
rgb_alpha_fn: A function that takes in samples {t_starts (N, 1), t_ends (N, 1), \
ray indices (N,)} and returns the post-activation rgb (N, 3) and opacity \
values (N, 1).
render_bkgd: Optional. Background color. Tensor with shape (3,).
Returns:
Ray colors (n_rays, 3), opacities (n_rays, 1) and depths (n_rays, 1).
Examples:
.. code-block:: python
>>> rays_o = torch.rand((128, 3), device="cuda:0")
>>> rays_d = torch.randn((128, 3), device="cuda:0")
>>> rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
>>> ray_indices, t_starts, t_ends = ray_marching(
>>> rays_o, rays_d, near_plane=0.1, far_plane=1.0, render_step_size=1e-3)
>>> def rgb_sigma_fn(t_starts, t_ends, ray_indices):
>>> # This is a dummy function that returns random values.
>>> rgbs = torch.rand((t_starts.shape[0], 3), device="cuda:0")
>>> sigmas = torch.rand((t_starts.shape[0], 1), device="cuda:0")
>>> return rgbs, sigmas
>>> colors, opacities, depths = rendering(
>>> t_starts, t_ends, ray_indices, n_rays=128, rgb_sigma_fn=rgb_sigma_fn)
>>> print(colors.shape, opacities.shape, depths.shape)
torch.Size([128, 3]) torch.Size([128, 1]) torch.Size([128, 1])
"""
if rgb_sigma_fn is None and rgb_alpha_fn is None:
raise ValueError(
"At least one of `rgb_sigma_fn` and `rgb_alpha_fn` should be specified."
)
# Query sigma/alpha and color with gradients
if rgb_sigma_fn is not None:
rgbs, sigmas = rgb_sigma_fn(t_starts, t_ends, ray_indices)
assert rgbs.shape[-1] == 3, "rgbs must have 3 channels, got {}".format(
rgbs.shape
)
assert (
sigmas.shape == t_starts.shape
), "sigmas must have shape of (N, 1)! Got {}".format(sigmas.shape)
# Rendering: compute weights.
weights = render_weight_from_density(
t_starts,
t_ends,
sigmas,
ray_indices=ray_indices,
n_rays=n_rays,
)
elif rgb_alpha_fn is not None:
rgbs, alphas = rgb_alpha_fn(t_starts, t_ends, ray_indices)
assert rgbs.shape[-1] == 3, "rgbs must have 3 channels, got {}".format(
rgbs.shape
)
assert (
alphas.shape == t_starts.shape
), "alphas must have shape of (N, 1)! Got {}".format(alphas.shape)
# Rendering: compute weights.
weights = render_weight_from_alpha(
alphas,
ray_indices=ray_indices,
n_rays=n_rays,
)
# Rendering: accumulate rgbs, opacities, and depths along the rays.
colors = accumulate_along_rays(
weights, ray_indices, values=rgbs, n_rays=n_rays
)
opacities = accumulate_along_rays(
weights, ray_indices, values=None, n_rays=n_rays
)
depths = accumulate_along_rays(
weights,
ray_indices,
values=(t_starts + t_ends) / 2.0,
n_rays=n_rays,
)
# Background composition.
if render_bkgd is not None:
colors = colors + render_bkgd * (1.0 - opacities)
return colors, opacities, depths
def accumulate_along_rays(
weights: Tensor,
ray_indices: Tensor,
values: Optional[Tensor] = None,
n_rays: Optional[int] = None,
) -> Tensor:
"""Accumulate volumetric values along the ray.
Note:
This function is only differentiable to `weights` and `values`.
Args:
weights: Volumetric rendering weights for those samples. Tensor with shape \
(n_samples,).
ray_indices: Ray index of each sample. LongTensor with shape (n_samples).
values: The values to be accmulated. Tensor with shape (n_samples, D). If \
None, the accumulated values are just weights. Default is None.
n_rays: Total number of rays. This will decide the shape of the ouputs. If \
None, it will be inferred from `ray_indices.max() + 1`. If specified \
it should be at least larger than `ray_indices.max()`. Default is None.
Returns:
Accumulated values with shape (n_rays, D). If `values` is not given then we return \
the accumulated weights, in which case D == 1.
Examples:
.. code-block:: python
# Rendering: accumulate rgbs, opacities, and depths along the rays.
colors = accumulate_along_rays(weights, ray_indices, values=rgbs, n_rays=n_rays)
opacities = accumulate_along_rays(weights, ray_indices, values=None, n_rays=n_rays)
depths = accumulate_along_rays(
weights,
ray_indices,
values=(t_starts + t_ends) / 2.0,
n_rays=n_rays,
)
# (n_rays, 3), (n_rays, 1), (n_rays, 1)
print(colors.shape, opacities.shape, depths.shape)
"""
assert ray_indices.dim() == 1 and weights.dim() == 2
if not weights.is_cuda:
raise NotImplementedError("Only support cuda inputs.")
if values is not None:
assert (
values.dim() == 2 and values.shape[0] == weights.shape[0]
), "Invalid shapes: {} vs {}".format(values.shape, weights.shape)
src = weights * values
else:
src = weights
if ray_indices.numel() == 0:
assert n_rays is not None
return torch.zeros((n_rays, src.shape[-1]), device=weights.device)
if n_rays is None:
n_rays = int(ray_indices.max()) + 1
# assert n_rays > ray_indices.max()
index = ray_indices[:, None].expand(-1, src.shape[-1])
outputs = torch.zeros(
(n_rays, src.shape[-1]), device=src.device, dtype=src.dtype
)
outputs.scatter_add_(0, index, src)
return outputs
def accumulate_along_rays_importance(
weights: Tensor,
ray_indices: Tensor,
values: Optional[Tensor] = None,
n_rays: Optional[int] = None,
) -> Tensor:
"""Accumulate volumetric values along the ray.
Note:
This function is only differentiable to `weights` and `values`.
Args:
weights: Volumetric rendering weights for those samples. Tensor with shape \
(n_samples,).
ray_indices: Ray index of each sample. LongTensor with shape (n_samples).
values: The values to be accmulated. Tensor with shape (n_samples, D). If \
None, the accumulated values are just weights. Default is None.
n_rays: Total number of rays. This will decide the shape of the ouputs. If \
None, it will be inferred from `ray_indices.max() + 1`. If specified \
it should be at least larger than `ray_indices.max()`. Default is None.
Returns:
Accumulated values with shape (n_rays, D). If `values` is not given then we return \
the accumulated weights, in which case D == 1.
Examples:
.. code-block:: python
# Rendering: accumulate rgbs, opacities, and depths along the rays.
colors = accumulate_along_rays(weights, ray_indices, values=rgbs, n_rays=n_rays)
opacities = accumulate_along_rays(weights, ray_indices, values=None, n_rays=n_rays)
depths = accumulate_along_rays(
weights,
ray_indices,
values=(t_starts + t_ends) / 2.0,
n_rays=n_rays,
)
# (n_rays, 3), (n_rays, 1), (n_rays, 1)
print(colors.shape, opacities.shape, depths.shape)
"""
assert ray_indices.dim() == 1 and weights.dim() == 2
if not weights.is_cuda:
raise NotImplementedError("Only support cuda inputs.")
if values is not None:
assert (
values.dim() == 2 and values.shape[0] == weights.shape[0]
), "Invalid shapes: {} vs {}".format(values.shape, weights.shape)
src = weights * values
else:
src = weights
if ray_indices.numel() == 0:
assert n_rays is not None
return torch.zeros((n_rays, src.shape[-1]), device=weights.device)
if n_rays is None:
n_rays = int(ray_indices.max()) + 1
# assert n_rays > ray_indices.max()
index = ray_indices[:, None].expand(-1, src.shape[-1])
outputs = torch.zeros(
(n_rays, src.shape[-1]), device=src.device, dtype=src.dtype
)
outputs.scatter_add_(0, index, src)
return outputs
def accumulate_along_rays_patch_based(
weights: Tensor,
ray_indices: Tensor,
values: Optional[Tensor] = None,
n_patches: Optional[int] = None,
) -> Tensor:
"""Accumulate volumetric values along the ray.
Note:
This function is only differentiable to `weights` and `values`.
Args:
weights: Volumetric rendering weights for those samples. Tensor with shape \
(n_samples,).
ray_indices: Ray index of each sample. LongTensor with shape (n_samples).
values: The values to be accmulated. Tensor with shape (n_samples, D). If \
None, the accumulated values are just weights. Default is None.
n_rays: Total number of rays. This will decide the shape of the ouputs. If \
None, it will be inferred from `ray_indices.max() + 1`. If specified \
it should be at least larger than `ray_indices.max()`. Default is None.
Returns:
Accumulated values with shape (n_rays, D). If `values` is not given then we return \
the accumulated weights, in which case D == 1.
Examples:
.. code-block:: python
# Rendering: accumulate rgbs, opacities, and depths along the rays.
colors = accumulate_along_rays(weights, ray_indices, values=rgbs, n_rays=n_rays)
opacities = accumulate_along_rays(weights, ray_indices, values=None, n_rays=n_rays)
depths = accumulate_along_rays(
weights,
ray_indices,
values=(t_starts + t_ends) / 2.0,
n_rays=n_rays,
)
# (n_rays, 3), (n_rays, 1), (n_rays, 1)
print(colors.shape, opacities.shape, depths.shape)
"""
assert ray_indices.dim() == 1 and weights.dim() == 3 # (num_samples, patch_size, 1)
if not weights.is_cuda:
raise NotImplementedError("Only support cuda inputs.")
if values is not None:
assert (
values.dim() == 3 and values.shape[0] == weights.shape[0]
), "Invalid shapes: {} vs {}".format(values.shape, weights.shape)
src = weights * values
else:
src = weights
if ray_indices.numel() == 0:
assert n_patches is not None
return torch.zeros((n_patches, src.shape[1], src.shape[-1]), device=weights.device)
if n_patches is None:
n_patches = int(ray_indices.max()) + 1
# assert n_rays > ray_indices.max()
index = ray_indices[:, None, None].expand(-1, src.shape[1], src.shape[-1])
outputs = torch.zeros(
(n_patches, src.shape[1], src.shape[-1]), device=src.device, dtype=src.dtype
)
outputs.scatter_add_(0, index, src)
return outputs
def render_transmittance_from_density(
t_starts: Tensor,
t_ends: Tensor,
sigmas: Tensor,
*,
packed_info: Optional[torch.Tensor] = None,
ray_indices: Optional[torch.Tensor] = None,
n_rays: Optional[int] = None,
) -> Tensor:
"""Compute transmittance :math:`T_i` from density :math:`\\sigma_i`.
.. math::
T_i = exp(-\\sum_{j=1}^{i-1}\\sigma_j\delta_j)
Note:
Either `ray_indices` or `packed_info` should be provided. If `ray_indices` is
provided, CUB acceleration will be used if available (CUDA >= 11.6). Otherwise,
we will use the naive implementation with `packed_info`.
Args:
t_starts: Where the frustum-shape sample starts along a ray. Tensor with \
shape (n_samples, 1).
t_ends: Where the frustum-shape sample ends along a ray. Tensor with \
shape (n_samples, 1).
sigmas: The density values of the samples. Tensor with shape (n_samples, 1).
packed_info: Optional. Stores information on which samples belong to the same ray. \
See :func:`nerfacc.ray_marching` for details. LongTensor with shape (n_rays, 2).
ray_indices: Optional. Ray index of each sample. LongTensor with shape (n_sample).
n_rays: Optional. Number of rays. Only useful when `ray_indices` is provided yet \
CUB acceleration is not available. We will implicitly convert `ray_indices` to \
`packed_info` and use the naive implementation. If not provided, we will infer \
it from `ray_indices` but it will be slower.
Returns:
The rendering transmittance. Tensor with shape (n_sample, 1).
Examples:
.. code-block:: python
>>> t_starts = torch.tensor(
>>> [[0.0], [1.0], [2.0], [3.0], [4.0], [5.0], [6.0]], device="cuda")
>>> t_ends = torch.tensor(
>>> [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0], [7.0]], device="cuda")
>>> sigmas = torch.tensor(
>>> [[0.4], [0.8], [0.1], [0.8], [0.1], [0.0], [0.9]], device="cuda")
>>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device="cuda")
>>> transmittance = render_transmittance_from_density(
>>> t_starts, t_ends, sigmas, ray_indices=ray_indices)
[[1.00], [0.67], [0.30], [1.00], [0.45], [1.00], [1.00]]
"""
assert (
ray_indices is not None or packed_info is not None
), "Either ray_indices or packed_info should be provided."
if ray_indices is not None and _C.is_cub_available():
transmittance = _RenderingTransmittanceFromDensityCUB.apply(
ray_indices, t_starts, t_ends, sigmas
)
else:
if packed_info is None:
packed_info = pack_info(ray_indices, n_rays=n_rays)
transmittance = _RenderingTransmittanceFromDensityNaive.apply(
packed_info, t_starts, t_ends, sigmas
)
return transmittance
def render_transmittance_from_alpha(
alphas: Tensor,
*,
packed_info: Optional[torch.Tensor] = None,
ray_indices: Optional[torch.Tensor] = None,
n_rays: Optional[int] = None,
) -> Tensor:
"""Compute transmittance :math:`T_i` from alpha :math:`\\alpha_i`.
.. math::
T_i = \\prod_{j=1}^{i-1}(1-\\alpha_j)
Note:
Either `ray_indices` or `packed_info` should be provided. If `ray_indices` is
provided, CUB acceleration will be used if available (CUDA >= 11.6). Otherwise,
we will use the naive implementation with `packed_info`.
Args:
alphas: The opacity values of the samples. Tensor with shape (n_samples, 1).
packed_info: Optional. Stores information on which samples belong to the same ray. \
See :func:`nerfacc.ray_marching` for details. LongTensor with shape (n_rays, 2).
ray_indices: Optional. Ray index of each sample. LongTensor with shape (n_sample).
n_rays: Optional. Number of rays. Only useful when `ray_indices` is provided yet \
CUB acceleration is not available. We will implicitly convert `ray_indices` to \
`packed_info` and use the naive implementation. If not provided, we will infer \
it from `ray_indices` but it will be slower.
Returns:
The rendering transmittance. Tensor with shape (n_sample, 1).
Examples:
.. code-block:: python
>>> alphas = torch.tensor(
>>> [[0.4], [0.8], [0.1], [0.8], [0.1], [0.0], [0.9]], device="cuda"))
>>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device="cuda")
>>> transmittance = render_transmittance_from_alpha(alphas, ray_indices=ray_indices)
tensor([[1.0], [0.6], [0.12], [1.0], [0.2], [1.0], [1.0]])
"""
assert (
ray_indices is not None or packed_info is not None
), "Either ray_indices or packed_info should be provided."
if ray_indices is not None and _C.is_cub_available():
transmittance = _RenderingTransmittanceFromAlphaCUB.apply(
ray_indices, alphas
)
else:
if packed_info is None:
packed_info = pack_info(ray_indices, n_rays=n_rays)
transmittance = _RenderingTransmittanceFromAlphaNaive.apply(
packed_info, alphas
)
return transmittance
def render_weight_from_density(
t_starts: Tensor,
t_ends: Tensor,
sigmas: Tensor,
*,
packed_info: Optional[torch.Tensor] = None,
ray_indices: Optional[torch.Tensor] = None,
n_rays: Optional[int] = None,
) -> torch.Tensor:
"""Compute rendering weights :math:`w_i` from density :math:`\\sigma_i` and interval :math:`\\delta_i`.
.. math::
w_i = T_i(1 - exp(-\\sigma_i\delta_i)), \\quad\\textrm{where}\\quad T_i = exp(-\\sum_{j=1}^{i-1}\\sigma_j\delta_j)
Note:
Either `ray_indices` or `packed_info` should be provided. If `ray_indices` is
provided, CUB acceleration will be used if available (CUDA >= 11.6). Otherwise,
we will use the naive implementation with `packed_info`.
Args:
t_starts: Where the frustum-shape sample starts along a ray. Tensor with \
shape (n_samples, 1).
t_ends: Where the frustum-shape sample ends along a ray. Tensor with \
shape (n_samples, 1).
sigmas: The density values of the samples. Tensor with shape (n_samples, 1).
packed_info: Optional. Stores information on which samples belong to the same ray. \
See :func:`nerfacc.ray_marching` for details. LongTensor with shape (n_rays, 2).
ray_indices: Optional. Ray index of each sample. LongTensor with shape (n_sample).
n_rays: Optional. Number of rays. Only useful when `ray_indices` is provided yet \
CUB acceleration is not available. We will implicitly convert `ray_indices` to \
`packed_info` and use the naive implementation. If not provided, we will infer \
it from `ray_indices` but it will be slower.
Returns:
The rendering weights. Tensor with shape (n_sample, 1).
Examples:
.. code-block:: python
>>> t_starts = torch.tensor(
>>> [[0.0], [1.0], [2.0], [3.0], [4.0], [5.0], [6.0]], device="cuda")
>>> t_ends = torch.tensor(
>>> [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0], [7.0]], device="cuda")
>>> sigmas = torch.tensor(
>>> [[0.4], [0.8], [0.1], [0.8], [0.1], [0.0], [0.9]], device="cuda")
>>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device="cuda")
>>> weights = render_weight_from_density(
>>> t_starts, t_ends, sigmas, ray_indices=ray_indices)
[[0.33], [0.37], [0.03], [0.55], [0.04], [0.00], [0.59]]
"""
assert (
ray_indices is not None or packed_info is not None
), "Either ray_indices or packed_info should be provided."
if ray_indices is not None and _C.is_cub_available():
transmittance = _RenderingTransmittanceFromDensityCUB.apply(
ray_indices, t_starts, t_ends, sigmas
)
alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts))
weights = transmittance * alphas
else:
if packed_info is None:
packed_info = pack_info(ray_indices, n_rays=n_rays)
weights = _RenderingWeightFromDensityNaive.apply(
packed_info, t_starts, t_ends, sigmas
)
return weights
def render_weight_from_alpha_patch_based(
alphas: Tensor,
ray_indices: Tensor,
*,
# packed_info: Optional[torch.Tensor] = None
n_rays: Optional[int] = None,
) -> torch.Tensor:
"""Compute rendering weights :math:`w_i` from opacity :math:`\\alpha_i`.
.. math::
w_i = T_i\\alpha_i, \\quad\\textrm{where}\\quad T_i = \\prod_{j=1}^{i-1}(1-\\alpha_j)
Note:
Either `ray_indices` or `packed_info` should be provided. If `ray_indices` is
provided, CUB acceleration will be used if available (CUDA >= 11.6). Otherwise,
we will use the naive implementation with `packed_info`.
Args:
alphas: The opacity values of the samples. Tensor with shape (n_samples, 1).
packed_info: Optional. Stores information on which samples belong to the same ray. \
See :func:`nerfacc.ray_marching` for details. LongTensor with shape (n_rays, 2).
ray_indices: Optional. Ray index of each sample. LongTensor with shape (n_sample).
n_rays: Optional. Number of rays. Only useful when `ray_indices` is provided yet \
CUB acceleration is not available. We will implicitly convert `ray_indices` to \
`packed_info` and use the naive implementation. If not provided, we will infer \
it from `ray_indices` but it will be slower.
Returns:
The rendering weights. Tensor with shape (n_sample, 1).
Examples:
.. code-block:: python
>>> alphas = torch.tensor(
>>> [[0.4], [0.8], [0.1], [0.8], [0.1], [0.0], [0.9]], device="cuda"))
>>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device="cuda")
>>> weights = render_weight_from_alpha(alphas, ray_indices=ray_indices)
tensor([[0.4], [0.48], [0.012], [0.8], [0.02], [0.0], [0.9]])
"""
packed_info = pack_info(ray_indices, n_rays=n_rays)
weights = _RenderingWeightFromAlphaPatchBasedNaive.apply(packed_info, alphas)
return weights
def render_weight_and_transmittance_from_alpha_patch_based(
alphas: Tensor,
ray_indices: Tensor,
*,
# packed_info: Optional[torch.Tensor] = None
n_rays: Optional[int] = None,
) -> torch.Tensor:
"""Compute rendering weights :math:`w_i` from opacity :math:`\\alpha_i`.
.. math::
w_i = T_i\\alpha_i, \\quad\\textrm{where}\\quad T_i = \\prod_{j=1}^{i-1}(1-\\alpha_j)
Note:
Either `ray_indices` or `packed_info` should be provided. If `ray_indices` is
provided, CUB acceleration will be used if available (CUDA >= 11.6). Otherwise,
we will use the naive implementation with `packed_info`.
Args:
alphas: The opacity values of the samples. Tensor with shape (n_samples, 1).
packed_info: Optional. Stores information on which samples belong to the same ray. \
See :func:`nerfacc.ray_marching` for details. LongTensor with shape (n_rays, 2).
ray_indices: Optional. Ray index of each sample. LongTensor with shape (n_sample).
n_rays: Optional. Number of rays. Only useful when `ray_indices` is provided yet \
CUB acceleration is not available. We will implicitly convert `ray_indices` to \
`packed_info` and use the naive implementation. If not provided, we will infer \
it from `ray_indices` but it will be slower.
Returns:
The rendering weights. Tensor with shape (n_sample, 1).
Examples:
.. code-block:: python
>>> alphas = torch.tensor(
>>> [[0.4], [0.8], [0.1], [0.8], [0.1], [0.0], [0.9]], device="cuda"))
>>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device="cuda")
>>> weights = render_weight_from_alpha(alphas, ray_indices=ray_indices)
tensor([[0.4], [0.48], [0.012], [0.8], [0.02], [0.0], [0.9]])
"""
packed_info = pack_info(ray_indices, n_rays=n_rays)
weights, transmittance = _RenderingWeightAndTransmittanceFromAlphaPatchBasedNaive.apply(packed_info, alphas)
return weights, transmittance
def render_weight_from_alpha(
alphas: Tensor,
*,
packed_info: Optional[torch.Tensor] = None,
ray_indices: Optional[torch.Tensor] = None,
n_rays: Optional[int] = None,
) -> torch.Tensor:
"""Compute rendering weights :math:`w_i` from opacity :math:`\\alpha_i`.
.. math::
w_i = T_i\\alpha_i, \\quad\\textrm{where}\\quad T_i = \\prod_{j=1}^{i-1}(1-\\alpha_j)
Note:
Either `ray_indices` or `packed_info` should be provided. If `ray_indices` is
provided, CUB acceleration will be used if available (CUDA >= 11.6). Otherwise,
we will use the naive implementation with `packed_info`.
Args:
alphas: The opacity values of the samples. Tensor with shape (n_samples, 1).
packed_info: Optional. Stores information on which samples belong to the same ray. \
See :func:`nerfacc.ray_marching` for details. LongTensor with shape (n_rays, 2).
ray_indices: Optional. Ray index of each sample. LongTensor with shape (n_sample).
n_rays: Optional. Number of rays. Only useful when `ray_indices` is provided yet \
CUB acceleration is not available. We will implicitly convert `ray_indices` to \
`packed_info` and use the naive implementation. If not provided, we will infer \
it from `ray_indices` but it will be slower.
Returns:
The rendering weights. Tensor with shape (n_sample, 1).
Examples:
.. code-block:: python
>>> alphas = torch.tensor(
>>> [[0.4], [0.8], [0.1], [0.8], [0.1], [0.0], [0.9]], device="cuda"))
>>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device="cuda")
>>> weights = render_weight_from_alpha(alphas, ray_indices=ray_indices)
tensor([[0.4], [0.48], [0.012], [0.8], [0.02], [0.0], [0.9]])
"""
assert (
ray_indices is not None or packed_info is not None
), "Either ray_indices or packed_info should be provided."
if ray_indices is not None and _C.is_cub_available():
transmittance = _RenderingTransmittanceFromAlphaCUB.apply(
ray_indices, alphas
)
weights = transmittance * alphas
else:
if packed_info is None:
packed_info = pack_info(ray_indices, n_rays=n_rays)
weights = _RenderingWeightFromAlphaNaive.apply(packed_info, alphas)
return weights
@torch.no_grad()
def render_visibility(
alphas: torch.Tensor,
*,
ray_indices: Optional[torch.Tensor] = None,
packed_info: Optional[torch.Tensor] = None,
n_rays: Optional[int] = None,
early_stop_eps: float = 1e-4,
alpha_thre: float = 0.0,
) -> torch.Tensor:
"""Filter out transparent and occluded samples.
In this function, we first compute the transmittance from the sample opacity. The
transmittance is then used to filter out occluded samples. And opacity is used to
filter out transparent samples. The function returns a boolean tensor indicating
which samples are visible (`transmittance > early_stop_eps` and `opacity > alpha_thre`).
Note:
Either `ray_indices` or `packed_info` should be provided. If `ray_indices` is
provided, CUB acceleration will be used if available (CUDA >= 11.6). Otherwise,
we will use the naive implementation with `packed_info`.
Args:
alphas: The opacity values of the samples. Tensor with shape (n_samples, 1).
packed_info: Optional. Stores information on which samples belong to the same ray. \
See :func:`nerfacc.ray_marching` for details. LongTensor with shape (n_rays, 2).
ray_indices: Optional. Ray index of each sample. LongTensor with shape (n_sample).
n_rays: Optional. Number of rays. Only useful when `ray_indices` is provided yet \
CUB acceleration is not available. We will implicitly convert `ray_indices` to \
`packed_info` and use the naive implementation. If not provided, we will infer \
it from `ray_indices` but it will be slower.
early_stop_eps: The early stopping threshold on transmittance.
alpha_thre: The threshold on opacity.
Returns:
The visibility of each sample. Tensor with shape (n_samples, 1).
Examples:
.. code-block:: python
>>> alphas = torch.tensor(
>>> [[0.4], [0.8], [0.1], [0.8], [0.1], [0.0], [0.9]], device="cuda")
>>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device="cuda")
>>> transmittance = render_transmittance_from_alpha(alphas, ray_indices=ray_indices)
tensor([[1.0], [0.6], [0.12], [1.0], [0.2], [1.0], [1.0]])
>>> visibility = render_visibility(
>>> alphas, ray_indices=ray_indices, early_stop_eps=0.3, alpha_thre=0.2)
tensor([True, True, False, True, False, False, True])
"""
assert (
ray_indices is not None or packed_info is not None
), "Either ray_indices or packed_info should be provided."
if ray_indices is not None and _C.is_cub_available():
transmittance = _RenderingTransmittanceFromAlphaCUB.apply(
ray_indices, alphas
)
else:
if packed_info is None:
packed_info = pack_info(ray_indices, n_rays=n_rays)
transmittance = _RenderingTransmittanceFromAlphaNaive.apply(
packed_info, alphas
)
visibility = transmittance >= early_stop_eps
if alpha_thre > 0:
visibility = visibility & (alphas >= alpha_thre)
visibility = visibility.squeeze(-1)
return visibility
@torch.no_grad()
def render_visibility_patch_based(
alphas: torch.Tensor,
*,
ray_indices: Optional[torch.Tensor] = None,
packed_info: Optional[torch.Tensor] = None,
n_patches: Optional[int] = None,
early_stop_eps: float = 1e-4,
alpha_thre: float = 0.0,
) -> torch.Tensor:
"""Filter out transparent and occluded samples.
In this function, we first compute the transmittance from the sample opacity. The
transmittance is then used to filter out occluded samples. And opacity is used to
filter out transparent samples. The function returns a boolean tensor indicating
which samples are visible (`transmittance > early_stop_eps` and `opacity > alpha_thre`).
Note:
Either `ray_indices` or `packed_info` should be provided. If `ray_indices` is
provided, CUB acceleration will be used if available (CUDA >= 11.6). Otherwise,
we will use the naive implementation with `packed_info`.
Args:
alphas: The opacity values of the samples. Tensor with shape (n_samples, 1).
packed_info: Optional. Stores information on which samples belong to the same ray. \
See :func:`nerfacc.ray_marching` for details. LongTensor with shape (n_rays, 2).
ray_indices: Optional. Ray index of each sample. LongTensor with shape (n_sample).
n_rays: Optional. Number of rays. Only useful when `ray_indices` is provided yet \
CUB acceleration is not available. We will implicitly convert `ray_indices` to \
`packed_info` and use the naive implementation. If not provided, we will infer \
it from `ray_indices` but it will be slower.
early_stop_eps: The early stopping threshold on transmittance.
alpha_thre: The threshold on opacity.
Returns:
The visibility of each sample. Tensor with shape (n_samples, 1).
Examples:
.. code-block:: python
>>> alphas = torch.tensor(
>>> [[0.4], [0.8], [0.1], [0.8], [0.1], [0.0], [0.9]], device="cuda")
>>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device="cuda")
>>> transmittance = render_transmittance_from_alpha(alphas, ray_indices=ray_indices)
tensor([[1.0], [0.6], [0.12], [1.0], [0.2], [1.0], [1.0]])
>>> visibility = render_visibility(
>>> alphas, ray_indices=ray_indices, early_stop_eps=0.3, alpha_thre=0.2)
tensor([True, True, False, True, False, False, True])
"""
assert (
ray_indices is not None or packed_info is not None
), "Either ray_indices or packed_info should be provided."
if ray_indices is not None and _C.is_cub_available():
transmittance = _RenderingTransmittanceFromAlphaCUB.apply(
ray_indices, alphas
)
else:
if packed_info is None:
packed_info = pack_info(ray_indices, n_rays=n_patches)
transmittance = _RenderingTransmittanceFromAlphaPatchBasedNaive.apply(
packed_info, alphas
)
visibility = torch.any(transmittance >= early_stop_eps, dim=1, keepdim=True)
if alpha_thre > 0:
visibility = visibility & (alphas >= alpha_thre)
visibility = visibility.squeeze()
return visibility
class _RenderingTransmittanceFromDensityCUB(torch.autograd.Function):
"""Rendering transmittance from density with CUB implementation."""
@staticmethod
def forward(ctx, ray_indices, t_starts, t_ends, sigmas):
ray_indices = ray_indices.contiguous()
t_starts = t_starts.contiguous()
t_ends = t_ends.contiguous()
sigmas = sigmas.contiguous()
transmittance = _C.transmittance_from_sigma_forward_cub(
ray_indices, t_starts, t_ends, sigmas
)
if ctx.needs_input_grad[3]:
ctx.save_for_backward(ray_indices, t_starts, t_ends, transmittance)
return transmittance
@staticmethod
def backward(ctx, transmittance_grads):
transmittance_grads = transmittance_grads.contiguous()
ray_indices, t_starts, t_ends, transmittance = ctx.saved_tensors
grad_sigmas = _C.transmittance_from_sigma_backward_cub(
ray_indices, t_starts, t_ends, transmittance, transmittance_grads
)
return None, None, None, grad_sigmas
class _RenderingTransmittanceFromDensityNaive(torch.autograd.Function):
"""Rendering transmittance from density with naive forloop."""
@staticmethod
def forward(ctx, packed_info, t_starts, t_ends, sigmas):
packed_info = packed_info.contiguous()
t_starts = t_starts.contiguous()
t_ends = t_ends.contiguous()
sigmas = sigmas.contiguous()
transmittance = _C.transmittance_from_sigma_forward_naive(
packed_info, t_starts, t_ends, sigmas
)
if ctx.needs_input_grad[3]:
ctx.save_for_backward(packed_info, t_starts, t_ends, transmittance)
return transmittance
@staticmethod
def backward(ctx, transmittance_grads):
transmittance_grads = transmittance_grads.contiguous()
packed_info, t_starts, t_ends, transmittance = ctx.saved_tensors
grad_sigmas = _C.transmittance_from_sigma_backward_naive(
packed_info, t_starts, t_ends, transmittance, transmittance_grads
)
return None, None, None, grad_sigmas
class _RenderingTransmittanceFromAlphaCUB(torch.autograd.Function):
"""Rendering transmittance from opacity with CUB implementation."""
@staticmethod
def forward(ctx, ray_indices, alphas):
ray_indices = ray_indices.contiguous()
alphas = alphas.contiguous()
transmittance = _C.transmittance_from_alpha_forward_cub(
ray_indices, alphas
)
if ctx.needs_input_grad[1]:
ctx.save_for_backward(ray_indices, transmittance, alphas)
return transmittance
@staticmethod
def backward(ctx, transmittance_grads):
transmittance_grads = transmittance_grads.contiguous()
ray_indices, transmittance, alphas = ctx.saved_tensors
grad_alphas = _C.transmittance_from_alpha_backward_cub(
ray_indices, alphas, transmittance, transmittance_grads
)
return None, grad_alphas
class _RenderingTransmittanceFromAlphaNaive(torch.autograd.Function):
"""Rendering transmittance from opacity with naive forloop."""
@staticmethod
def forward(ctx, packed_info, alphas):
packed_info = packed_info.contiguous()
alphas = alphas.contiguous()
transmittance = _C.transmittance_from_alpha_forward_naive(
packed_info, alphas
)
if ctx.needs_input_grad[1]:
ctx.save_for_backward(packed_info, transmittance, alphas)
return transmittance
@staticmethod
def backward(ctx, transmittance_grads):
transmittance_grads = transmittance_grads.contiguous()
packed_info, transmittance, alphas = ctx.saved_tensors
grad_alphas = _C.transmittance_from_alpha_backward_naive(
packed_info, alphas, transmittance, transmittance_grads
)
return None, grad_alphas
class _RenderingTransmittanceFromAlphaPatchBasedNaive(torch.autograd.Function):
"""Rendering weight from opacity with naive forloop."""
@staticmethod
def forward(ctx, packed_info, alphas):
packed_info = packed_info.contiguous()
alphas = alphas.contiguous()
transmittance = _C.transmittance_from_alpha_patch_based_forward_naive(packed_info, alphas)
if ctx.needs_input_grad[1]:
ctx.save_for_backward(packed_info, transmittance, alphas)
return transmittance
@staticmethod
def backward(ctx, grad_transmittance):
grad_transmittance = grad_transmittance.contiguous()
packed_info, transmittance, alphas = ctx.saved_tensors
grad_alphas = _C.weight_and_transmittance_from_alpha_patch_based_backward_naive(
packed_info, alphas, transmittance, grad_transmittance
)
return None, grad_alphas
class _RenderingWeightFromDensityNaive(torch.autograd.Function):
"""Rendering weight from density with naive forloop."""
@staticmethod
def forward(ctx, packed_info, t_starts, t_ends, sigmas):
packed_info = packed_info.contiguous()
t_starts = t_starts.contiguous()
t_ends = t_ends.contiguous()
sigmas = sigmas.contiguous()
weights = _C.weight_from_sigma_forward_naive(
packed_info, t_starts, t_ends, sigmas
)
if ctx.needs_input_grad[3]:
ctx.save_for_backward(
packed_info, t_starts, t_ends, sigmas, weights
)
return weights
@staticmethod
def backward(ctx, grad_weights):
grad_weights = grad_weights.contiguous()
packed_info, t_starts, t_ends, sigmas, weights = ctx.saved_tensors
grad_sigmas = _C.weight_from_sigma_backward_naive(
weights, grad_weights, packed_info, t_starts, t_ends, sigmas
)
return None, None, None, grad_sigmas
class _RenderingWeightFromAlphaNaive(torch.autograd.Function):
"""Rendering weight from opacity with naive forloop."""
@staticmethod
def forward(ctx, packed_info, alphas):
packed_info = packed_info.contiguous()
alphas = alphas.contiguous()
weights = _C.weight_from_alpha_forward_naive(packed_info, alphas)
if ctx.needs_input_grad[1]:
ctx.save_for_backward(packed_info, alphas, weights)
return weights
@staticmethod
def backward(ctx, grad_weights):
grad_weights = grad_weights.contiguous()
packed_info, alphas, weights = ctx.saved_tensors
grad_alphas = _C.weight_from_alpha_backward_naive(
weights, grad_weights, packed_info, alphas
)
return None, grad_alphas
class _RenderingWeightFromAlphaPatchBasedNaive(torch.autograd.Function):
"""Rendering weight from opacity with naive forloop."""
@staticmethod
def forward(ctx, packed_info, alphas):
packed_info = packed_info.contiguous()
alphas = alphas.contiguous()
weights = _C.weight_from_alpha_patch_based_forward_naive(packed_info, alphas)
# print(weights.shape, transmittance.shape)
if ctx.needs_input_grad[1]:
ctx.save_for_backward(packed_info, alphas, weights)
return weights
@staticmethod
def backward(ctx, grad_weights):
grad_weights = grad_weights.contiguous()
packed_info, alphas, weights = ctx.saved_tensors
grad_alphas = _C.weight_from_alpha_patch_based_backward_naive(
weights, grad_weights, packed_info, alphas
)
return None, grad_alphas
class _RenderingWeightAndTransmittanceFromAlphaPatchBasedNaive(torch.autograd.Function):
"""Rendering weight from opacity with naive forloop."""
@staticmethod
def forward(ctx, packed_info, alphas):
packed_info = packed_info.contiguous()
alphas = alphas.contiguous()
weights, transmittance = _C.weight_and_transmittance_from_alpha_patch_based_forward_naive(packed_info, alphas)
# print(weights.shape, transmittance.shape)
if ctx.needs_input_grad[1]:
ctx.save_for_backward(packed_info, alphas, weights)
return weights, transmittance
@staticmethod
def backward(ctx, grad_weights, grad_transmittance):
grad_weights = grad_weights.contiguous()
packed_info, alphas, weights = ctx.saved_tensors
grad_alphas = _C.weight_and_transmittance_from_alpha_patch_based_backward_naive(
weights, grad_weights, packed_info, alphas
)
return None, grad_alphas
class _RenderingWeightFromAlphaImportanceSamplingNaive(torch.autograd.Function):
"""Rendering weight from opacity with naive forloop."""
@staticmethod
def forward(ctx, packed_info, alphas, importance_pdfs):
packed_info = packed_info.contiguous()
alphas = alphas.contiguous()
importance_pdfs = importance_pdfs.contiguous()
weights = _C.weight_from_alpha_importance_sampling_forward_naive(packed_info, alphas, importance_pdfs)
if ctx.needs_input_grad[1]:
ctx.save_for_backward(packed_info, alphas, importance_pdfs, weights)
return weights
@staticmethod
def backward(ctx, grad_weights):
grad_weights = grad_weights.contiguous()
packed_info, alphas, importance_pdfs, weights = ctx.saved_tensors
grad_alphas = _C.weight_from_alpha_backward_naive(
weights, grad_weights, packed_info, alphas, importance_pdfs
)
return None, grad_alphas
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/scripts/run_aws_listing.py
================================================
import argparse
import os
from boto3 import client
parser = argparse.ArgumentParser()
parser.add_argument("--access_key_id", type=str, required=True)
parser.add_argument("--secret_access_key", type=str, required=True)
parser.add_argument("--bucket", type=str, required=True)
parser.add_argument("--region", type=str, required=True)
args = parser.parse_args()
URL = f"https://{args.bucket}.s3.{args.region}.amazonaws.com/"
s3 = client(
"s3",
aws_access_key_id=args.access_key_id,
aws_secret_access_key=args.secret_access_key,
)
responses = s3.list_objects_v2(Bucket=args.bucket, Prefix="whl/")["Contents"]
subdirectories = {}
for data in responses:
splits = data["Key"].split("/")
if len(splits) == 3:
subdirectories[splits[1]] = []
for dir in subdirectories.keys():
responses = s3.list_objects_v2(Bucket=args.bucket, Prefix=f"whl/{dir}")[
"Contents"
]
for data in responses:
splits = data["Key"].split("/")
if len(splits) == 3:
subdirectories[dir].append(splits[2])
for dir, files in subdirectories.items():
lines = ""
for file in files:
href = os.path.join(URL, "whl", dir, file)
lines += f"{file}\n \n"
html = f"\n\n\n{lines}\n\n\n"
html_file = f"/tmp/{dir}.html"
with open(html_file, "w") as f:
f.write(html)
s3.upload_file(
html_file,
args.bucket,
f"whl/{dir}.html",
ExtraArgs={"ContentType": "text/html"},
)
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/scripts/run_dev_checks.py
================================================
#!/usr/bin/env python
"""Simple yaml debugger"""
import subprocess
import yaml
from rich.console import Console
from rich.style import Style
console = Console(width=120)
LOCAL_TESTS = [
"Run license checks",
"Run isort",
"Run Black",
"Python Pylint",
"Test with pytest",
]
def run_command(command: str) -> bool:
"""Run a command kill actions if it fails
Args:
command: command to run
continue_on_fail: whether to continue running commands if the current one fails.
"""
ret_code = subprocess.call(command, shell=True)
if ret_code != 0:
console.print(f"[bold red]Error: `{command}` failed.")
return ret_code == 0
def run_github_actions_file(filename: str):
"""Run a github actions file locally.
Args:
filename: Which yml github actions file to run.
"""
with open(filename, "rb") as f:
my_dict = yaml.safe_load(f)
steps = my_dict["jobs"]["build"]["steps"]
success = True
for step in steps:
if "name" in step and step["name"] in LOCAL_TESTS:
compressed = step["run"].replace("\n", ";").replace("\\", "")
compressed = compressed.replace("--check", "")
curr_command = f"{compressed}"
console.line()
console.rule(f"[bold green]Running: {curr_command}")
success = success and run_command(curr_command)
else:
skip_name = step["name"] if "name" in step else step["uses"]
console.print(f"Skipping {skip_name}")
# Code Testing
console.line()
console.rule("[bold green]Running pytest")
success = success and run_command("pytest")
# Add checks for building documentation
console.line()
console.rule("[bold green]Building Documentation")
success = success and run_command(
"cd docs/; make clean; make html SPHINXOPTS='-W;'"
)
if success:
console.line()
console.rule(characters="=")
console.print(
"[bold green]:TADA: :TADA: :TADA: ALL CHECKS PASSED :TADA: :TADA: :TADA:",
justify="center",
)
console.rule(characters="=")
else:
console.line()
console.rule(characters="=", style=Style(color="red"))
console.print(
"[bold red]:skull: :skull: :skull: ERRORS FOUND :skull: :skull: :skull:",
justify="center",
)
console.rule(characters="=", style=Style(color="red"))
if __name__ == "__main__":
run_github_actions_file(filename=".github/workflows/code_checks.yml")
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/scripts/run_profiler.py
================================================
from typing import Callable
import torch
import tqdm
import nerfacc
# timing
# https://github.com/pytorch/pytorch/commit/d2784c233bfc57a1d836d961694bcc8ec4ed45e4
class Profiler:
def __init__(self, warmup=10, repeat=1000):
self.warmup = warmup
self.repeat = repeat
def __call__(self, func: Callable):
# warmup
for _ in range(self.warmup):
func()
torch.cuda.synchronize()
# profile
with torch.profiler.profile(
activities=[
torch.profiler.ProfilerActivity.CPU,
torch.profiler.ProfilerActivity.CUDA,
],
profile_memory=True,
) as prof:
for _ in range(self.repeat):
func()
torch.cuda.synchronize()
# return
events = prof.key_averages()
# print(events.table(sort_by="self_cpu_time_total", row_limit=10))
self_cpu_time_total = (
sum([event.self_cpu_time_total for event in events]) / self.repeat
)
self_cuda_time_total = (
sum([event.self_cuda_time_total for event in events]) / self.repeat
)
self_cuda_memory_usage = max(
[event.self_cuda_memory_usage for event in events]
)
return (
self_cpu_time_total, # in us
self_cuda_time_total, # in us
self_cuda_memory_usage, # in bytes
)
def main():
device = "cuda:0"
torch.manual_seed(42)
profiler = Profiler(warmup=10, repeat=100)
# # contract
# print("* contract")
# x = torch.rand([1024, 3], device=device)
# roi = torch.tensor([0, 0, 0, 1, 1, 1], dtype=torch.float32, device=device)
# fn = lambda: nerfacc.contract(
# x, roi=roi, type=nerfacc.ContractionType.UN_BOUNDED_TANH
# )
# cpu_t, cuda_t, cuda_bytes = profiler(fn)
# print(f"{cpu_t:.2f} us, {cuda_t:.2f} us, {cuda_bytes / 1024 / 1024:.2f} MB")
# rendering
print("* rendering")
batch_size = 81920
rays_o = torch.rand((batch_size, 3), device=device)
rays_d = torch.randn((batch_size, 3), device=device)
rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
ray_indices, t_starts, t_ends = nerfacc.ray_marching(
rays_o,
rays_d,
near_plane=0.1,
far_plane=1.0,
render_step_size=1e-1,
)
sigmas = torch.randn_like(t_starts, requires_grad=True)
fn = (
lambda: nerfacc.render_weight_from_density(
ray_indices, t_starts, t_ends, sigmas
)
.sum()
.backward()
)
fn()
torch.cuda.synchronize()
for _ in tqdm.tqdm(range(100)):
fn()
torch.cuda.synchronize()
cpu_t, cuda_t, cuda_bytes = profiler(fn)
print(f"{cpu_t:.2f} us, {cuda_t:.2f} us, {cuda_bytes / 1024 / 1024:.2f} MB")
packed_info = nerfacc.pack_info(ray_indices, n_rays=batch_size)
fn = (
lambda: nerfacc.vol_rendering._RenderingDensity.apply(
packed_info, t_starts, t_ends, sigmas, 0
)
.sum()
.backward()
)
fn()
torch.cuda.synchronize()
for _ in tqdm.tqdm(range(100)):
fn()
torch.cuda.synchronize()
cpu_t, cuda_t, cuda_bytes = profiler(fn)
print(f"{cpu_t:.2f} us, {cuda_t:.2f} us, {cuda_bytes / 1024 / 1024:.2f} MB")
if __name__ == "__main__":
main()
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/setup.cfg
================================================
[isort]
multi_line_output = 3
line_length = 80
include_trailing_comma = true
skip=./examples/pycolmap
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/setup.py
================================================
import glob
import os
import os.path as osp
import platform
import sys
from setuptools import find_packages, setup
__version__ = None
exec(open("nerfacc/version.py", "r").read())
URL = "https://github.com/KAIR-BAIR/nerfacc"
BUILD_NO_CUDA = os.getenv("BUILD_NO_CUDA", "0") == "1"
WITH_SYMBOLS = os.getenv("WITH_SYMBOLS", "0") == "1"
def get_ext():
from torch.utils.cpp_extension import BuildExtension
return BuildExtension.with_options(
no_python_abi_suffix=True, use_ninja=False
)
def get_extensions():
import torch
from torch.__config__ import parallel_info
from torch.utils.cpp_extension import CUDAExtension
extensions_dir = osp.join("nerfacc", "cuda", "csrc")
sources = glob.glob(osp.join(extensions_dir, "*.cu"))
# remove generated 'hip' files, in case of rebuilds
sources = [path for path in sources if "hip" not in path]
undef_macros = []
define_macros = []
if sys.platform == "win32":
define_macros += [("nerfacc_EXPORTS", None)]
extra_compile_args = {"cxx": ["-O3"]}
if not os.name == "nt": # Not on Windows:
extra_compile_args["cxx"] += ["-Wno-sign-compare"]
extra_link_args = [] if WITH_SYMBOLS else ["-s"]
info = parallel_info()
if (
"backend: OpenMP" in info
and "OpenMP not found" not in info
and sys.platform != "darwin"
):
extra_compile_args["cxx"] += ["-DAT_PARALLEL_OPENMP"]
if sys.platform == "win32":
extra_compile_args["cxx"] += ["/openmp"]
else:
extra_compile_args["cxx"] += ["-fopenmp"]
else:
print("Compiling without OpenMP...")
# Compile for mac arm64
if sys.platform == "darwin" and platform.machine() == "arm64":
extra_compile_args["cxx"] += ["-arch", "arm64"]
extra_link_args += ["-arch", "arm64"]
nvcc_flags = os.getenv("NVCC_FLAGS", "")
nvcc_flags = [] if nvcc_flags == "" else nvcc_flags.split(" ")
nvcc_flags += ["-O3"]
if torch.version.hip:
# USE_ROCM was added to later versions of PyTorch.
# Define here to support older PyTorch versions as well:
define_macros += [("USE_ROCM", None)]
undef_macros += ["__HIP_NO_HALF_CONVERSIONS__"]
else:
nvcc_flags += ["--expt-relaxed-constexpr"]
extra_compile_args["nvcc"] = nvcc_flags
extension = CUDAExtension(
f"nerfacc.csrc",
sources,
include_dirs=[osp.join(extensions_dir, "include")],
define_macros=define_macros,
undef_macros=undef_macros,
extra_compile_args=extra_compile_args,
extra_link_args=extra_link_args,
)
return [extension]
# work-around hipify abs paths
include_package_data = True
# if torch.cuda.is_available() and torch.version.hip:
# include_package_data = False
setup(
name="nerfacc",
version=__version__,
description="A General NeRF Acceleration Toolbox",
author="Ruilong",
author_email="ruilongli94@gmail.com",
url=URL,
download_url=f"{URL}/archive/{__version__}.tar.gz",
keywords=[],
python_requires=">=3.7",
install_requires=["rich>=12", "torch"],
extras_require={
# dev dependencies. Install them by `pip install nerfacc[dev]`
"dev": [
"black[jupyter]==22.3.0",
"isort==5.10.1",
"pylint==2.13.4",
"pytest==7.1.2",
"pytest-xdist==2.5.0",
"typeguard>=2.13.3",
"pyyaml==6.0",
"build",
"twine",
],
},
ext_modules=get_extensions() if not BUILD_NO_CUDA else [],
cmdclass={"build_ext": get_ext()} if not BUILD_NO_CUDA else {},
packages=find_packages(),
include_package_data=include_package_data,
)
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/tests/test_contraction.py
================================================
import pytest
import torch
import nerfacc.cuda as _C
from nerfacc import ContractionType, contract, contract_inv
device = "cuda:0"
batch_size = 32
eps = 1e-6
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_ContractionType():
ctype = ContractionType.AABB.to_cpp_version()
assert ctype == _C.ContractionTypeGetter(0)
ctype = ContractionType.UN_BOUNDED_TANH.to_cpp_version()
assert ctype == _C.ContractionTypeGetter(1)
ctype = ContractionType.UN_BOUNDED_SPHERE.to_cpp_version()
assert ctype == _C.ContractionTypeGetter(2)
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_identity():
x = torch.rand([batch_size, 3], device=device)
roi = torch.tensor([0, 0, 0, 1, 1, 1], dtype=torch.float32, device=device)
x_out = contract(x, roi=roi, type=ContractionType.AABB)
assert torch.allclose(x_out, x, atol=eps)
x_inv = contract_inv(x_out, roi=roi, type=ContractionType.AABB)
assert torch.allclose(x_inv, x, atol=eps)
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_aabb():
x = torch.rand([batch_size, 3], device=device)
roi = torch.tensor(
[-1, -1, -1, 1, 1, 1], dtype=torch.float32, device=device
)
x_out = contract(x, roi=roi, type=ContractionType.AABB)
x_out_tgt = x * 0.5 + 0.5
assert torch.allclose(x_out, x_out_tgt, atol=eps)
x_inv = contract_inv(x_out, roi=roi, type=ContractionType.AABB)
assert torch.allclose(x_inv, x, atol=eps)
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_tanh():
x = torch.randn([batch_size, 3], device=device)
roi = torch.tensor(
[-0.2, -0.3, -0.4, 0.7, 0.8, 0.6], dtype=torch.float32, device=device
)
x_out = contract(x, roi=roi, type=ContractionType.UN_BOUNDED_TANH)
x_out_tgt = (
torch.tanh((x - roi[:3]) / (roi[3:] - roi[:3]) - 0.5) * 0.5 + 0.5
)
assert torch.allclose(x_out, x_out_tgt, atol=eps)
x_inv = contract_inv(x_out, roi=roi, type=ContractionType.UN_BOUNDED_TANH)
assert torch.allclose(x_inv, x, atol=eps)
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_sphere():
x = torch.randn([batch_size, 3], device=device)
roi = torch.tensor(
[-0.2, -0.3, -0.4, 0.7, 0.8, 0.6], dtype=torch.float32, device=device
)
x_out = contract(x, roi=roi, type=ContractionType.UN_BOUNDED_SPHERE)
assert ((x_out - 0.5).norm(dim=-1) < 0.5).all()
x_inv = contract_inv(x_out, roi=roi, type=ContractionType.UN_BOUNDED_SPHERE)
assert torch.allclose(x_inv, x, atol=eps)
if __name__ == "__main__":
test_ContractionType()
test_identity()
test_aabb()
test_tanh()
test_sphere()
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/tests/test_grid.py
================================================
import pytest
import torch
from nerfacc import OccupancyGrid
device = "cuda:0"
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def occ_eval_fn(x: torch.Tensor) -> torch.Tensor:
"""Pesudo occupancy function: (N, 3) -> (N, 1)."""
return ((x - 0.5).norm(dim=-1, keepdim=True) < 0.5).float()
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_occ_grid():
roi_aabb = [0, 0, 0, 1, 1, 1]
occ_grid = OccupancyGrid(roi_aabb=roi_aabb, resolution=128).to(device)
occ_grid.every_n_step(0, occ_eval_fn, occ_thre=0.1)
assert occ_grid.roi_aabb.shape == (6,)
assert occ_grid.binary.shape == (128, 128, 128)
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_query_grid():
roi_aabb = [0, 0, 0, 1, 1, 1]
occ_grid = OccupancyGrid(roi_aabb=roi_aabb, resolution=128).to(device)
occ_grid.every_n_step(0, occ_eval_fn, occ_thre=0.1)
samples = torch.rand((100, 3), device=device)
occs = occ_grid.query_occ(samples)
assert occs.shape == (100,)
if __name__ == "__main__":
test_occ_grid()
test_query_grid()
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/tests/test_intersection.py
================================================
import pytest
import torch
from nerfacc import ray_aabb_intersect
device = "cuda:0"
batch_size = 32
eps = 1e-6
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_intersection():
rays_o = torch.rand([batch_size, 3], device=device)
rays_d = torch.randn([batch_size, 3], device=device)
aabb = torch.tensor([0, 0, 0, 1, 1, 1], dtype=torch.float32, device=device)
t_min, t_max = ray_aabb_intersect(rays_o, rays_d, aabb)
assert (t_min == 0).all()
t = torch.rand_like(t_min) * (t_max - t_min) + t_min
x = rays_o + t.unsqueeze(-1) * rays_d
assert (x >= 0).all() and (x <= 1).all()
if __name__ == "__main__":
test_intersection()
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/tests/test_loss.py
================================================
import pytest
import torch
from nerfacc import pack_info, ray_marching
from nerfacc.losses import distortion
device = "cuda:0"
batch_size = 32
eps = 1e-6
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_distortion():
rays_o = torch.rand((batch_size, 3), device=device)
rays_d = torch.randn((batch_size, 3), device=device)
rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
ray_indices, t_starts, t_ends = ray_marching(
rays_o,
rays_d,
near_plane=0.1,
far_plane=1.0,
render_step_size=1e-3,
)
packed_info = pack_info(ray_indices, n_rays=batch_size)
weights = torch.rand((t_starts.shape[0],), device=device)
loss = distortion(packed_info, weights, t_starts, t_ends)
assert loss.shape == (batch_size,)
if __name__ == "__main__":
test_distortion()
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/tests/test_pack.py
================================================
import pytest
import torch
from nerfacc import pack_data, pack_info, unpack_data, unpack_info
device = "cuda:0"
batch_size = 32
eps = 1e-6
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_pack_data():
n_rays = 2
n_samples = 3
data = torch.rand((n_rays, n_samples, 2), device=device, requires_grad=True)
mask = torch.rand((n_rays, n_samples), device=device) > 0.5
packed_data, packed_info = pack_data(data, mask)
unpacked_data = unpack_data(packed_info, packed_data, n_samples)
unpacked_data.sum().backward()
assert (data.grad[mask] == 1).all()
assert torch.allclose(
unpacked_data.sum(dim=1), (data * mask[..., None]).sum(dim=1)
)
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_unpack_info():
packed_info = torch.tensor(
[[0, 1], [1, 0], [1, 4]], dtype=torch.int32, device=device
)
ray_indices_tgt = torch.tensor(
[0, 2, 2, 2, 2], dtype=torch.int64, device=device
)
ray_indices = unpack_info(packed_info, n_samples=5)
packed_info_2 = pack_info(ray_indices, n_rays=packed_info.shape[0])
assert torch.allclose(packed_info.int(), packed_info_2.int())
assert torch.allclose(ray_indices, ray_indices_tgt)
if __name__ == "__main__":
test_pack_data()
test_unpack_info()
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/tests/test_ray_marching.py
================================================
import pytest
import torch
from nerfacc import OccupancyGrid, ray_marching, unpack_info
device = "cuda:0"
batch_size = 128
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_marching_with_near_far():
rays_o = torch.rand((batch_size, 3), device=device)
rays_d = torch.randn((batch_size, 3), device=device)
rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
ray_indices, t_starts, t_ends = ray_marching(
rays_o,
rays_d,
near_plane=0.1,
far_plane=1.0,
render_step_size=1e-3,
)
return
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_marching_with_grid():
rays_o = torch.rand((batch_size, 3), device=device)
rays_d = torch.randn((batch_size, 3), device=device)
rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
grid = OccupancyGrid(roi_aabb=[0, 0, 0, 1, 1, 1]).to(device)
grid._binary[:] = True
ray_indices, t_starts, t_ends = ray_marching(
rays_o,
rays_d,
grid=grid,
near_plane=0.0,
far_plane=1.0,
render_step_size=1e-2,
)
ray_indices = ray_indices
samples = (
rays_o[ray_indices] + rays_d[ray_indices] * (t_starts + t_ends) / 2.0
)
assert (samples <= grid.roi_aabb[3:].unsqueeze(0)).all()
assert (samples >= grid.roi_aabb[:3].unsqueeze(0)).all()
return
if __name__ == "__main__":
test_marching_with_near_far()
test_marching_with_grid()
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/tests/test_rendering.py
================================================
import pytest
import torch
from nerfacc import (
accumulate_along_rays,
render_transmittance_from_density,
render_visibility,
render_weight_from_alpha,
render_weight_from_density,
rendering,
)
device = "cuda:0"
batch_size = 32
eps = 1e-6
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_render_visibility():
ray_indices = torch.tensor(
[0, 2, 2, 2, 2], dtype=torch.int64, device=device
) # (samples,)
alphas = torch.tensor(
[0.4, 0.3, 0.8, 0.8, 0.5], dtype=torch.float32, device=device
).unsqueeze(
-1
) # (n_samples, 1)
# transmittance: [1.0, 1.0, 0.7, 0.14, 0.028]
vis = render_visibility(
alphas, ray_indices=ray_indices, early_stop_eps=0.03, alpha_thre=0.0
)
vis_tgt = torch.tensor(
[True, True, True, True, False], dtype=torch.bool, device=device
)
assert torch.allclose(vis, vis_tgt)
# transmittance: [1.0, 1.0, 1.0, 0.2, 0.04]
vis = render_visibility(
alphas, ray_indices=ray_indices, early_stop_eps=0.05, alpha_thre=0.35
)
vis_tgt = torch.tensor(
[True, False, True, True, False], dtype=torch.bool, device=device
)
assert torch.allclose(vis, vis_tgt)
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_render_weight_from_alpha():
ray_indices = torch.tensor(
[0, 2, 2, 2, 2], dtype=torch.int64, device=device
) # (samples,)
alphas = torch.tensor(
[0.4, 0.3, 0.8, 0.8, 0.5], dtype=torch.float32, device=device
).unsqueeze(
-1
) # (n_samples, 1)
# transmittance: [1.0, 1.0, 0.7, 0.14, 0.028]
weights = render_weight_from_alpha(
alphas, ray_indices=ray_indices, n_rays=3
)
weights_tgt = torch.tensor(
[1.0 * 0.4, 1.0 * 0.3, 0.7 * 0.8, 0.14 * 0.8, 0.028 * 0.5],
dtype=torch.float32,
device=device,
).unsqueeze(-1)
assert torch.allclose(weights, weights_tgt)
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_render_weight_from_density():
ray_indices = torch.tensor(
[0, 2, 2, 2, 2], dtype=torch.int64, device=device
) # (samples,)
sigmas = torch.rand(
(ray_indices.shape[0], 1), device=device
) # (n_samples, 1)
t_starts = torch.rand_like(sigmas)
t_ends = torch.rand_like(sigmas) + 1.0
alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts))
weights = render_weight_from_density(
t_starts, t_ends, sigmas, ray_indices=ray_indices, n_rays=3
)
weights_tgt = render_weight_from_alpha(
alphas, ray_indices=ray_indices, n_rays=3
)
assert torch.allclose(weights, weights_tgt)
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_accumulate_along_rays():
ray_indices = torch.tensor(
[0, 2, 2, 2, 2], dtype=torch.int64, device=device
) # (n_rays,)
weights = torch.tensor(
[0.4, 0.3, 0.8, 0.8, 0.5], dtype=torch.float32, device=device
).unsqueeze(-1)
values = torch.rand((5, 2), device=device) # (n_samples, 1)
ray_values = accumulate_along_rays(
weights, ray_indices, values=values, n_rays=3
)
assert ray_values.shape == (3, 2)
assert torch.allclose(ray_values[0, :], weights[0, :] * values[0, :])
assert (ray_values[1, :] == 0).all()
assert torch.allclose(
ray_values[2, :], (weights[1:, :] * values[1:]).sum(dim=0)
)
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_rendering():
def rgb_sigma_fn(t_starts, t_ends, ray_indices):
return torch.hstack([t_starts] * 3), t_starts
ray_indices = torch.tensor(
[0, 2, 2, 2, 2], dtype=torch.int64, device=device
) # (samples,)
sigmas = torch.rand(
(ray_indices.shape[0], 1), device=device
) # (n_samples, 1)
t_starts = torch.rand_like(sigmas)
t_ends = torch.rand_like(sigmas) + 1.0
_, _, _ = rendering(
t_starts,
t_ends,
ray_indices=ray_indices,
n_rays=3,
rgb_sigma_fn=rgb_sigma_fn,
)
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_grads():
ray_indices = torch.tensor(
[0, 2, 2, 2, 2], dtype=torch.int64, device=device
) # (samples,)
packed_info = torch.tensor(
[[0, 1], [1, 0], [1, 4]], dtype=torch.int32, device=device
)
sigmas = torch.tensor([[0.4], [0.8], [0.1], [0.8], [0.1]], device="cuda")
sigmas.requires_grad = True
t_starts = torch.rand_like(sigmas)
t_ends = t_starts + 1.0
weights_ref = torch.tensor(
[[0.3297], [0.5507], [0.0428], [0.2239], [0.0174]], device="cuda"
)
sigmas_grad_ref = torch.tensor(
[[0.6703], [0.1653], [0.1653], [0.1653], [0.1653]], device="cuda"
)
# naive impl. trans from sigma
trans = render_transmittance_from_density(
t_starts, t_ends, sigmas, ray_indices=ray_indices, n_rays=3
)
weights = trans * (1.0 - torch.exp(-sigmas * (t_ends - t_starts)))
weights.sum().backward()
sigmas_grad = sigmas.grad.clone()
sigmas.grad.zero_()
assert torch.allclose(weights_ref, weights, atol=1e-4)
assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4)
# naive impl. trans from alpha
trans = render_transmittance_from_density(
t_starts, t_ends, sigmas, packed_info=packed_info, n_rays=3
)
weights = trans * (1.0 - torch.exp(-sigmas * (t_ends - t_starts)))
weights.sum().backward()
sigmas_grad = sigmas.grad.clone()
sigmas.grad.zero_()
assert torch.allclose(weights_ref, weights, atol=1e-4)
assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4)
weights = render_weight_from_density(
t_starts, t_ends, sigmas, ray_indices=ray_indices, n_rays=3
)
weights.sum().backward()
sigmas_grad = sigmas.grad.clone()
sigmas.grad.zero_()
assert torch.allclose(weights_ref, weights, atol=1e-4)
assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4)
weights = render_weight_from_density(
t_starts, t_ends, sigmas, packed_info=packed_info, n_rays=3
)
weights.sum().backward()
sigmas_grad = sigmas.grad.clone()
sigmas.grad.zero_()
assert torch.allclose(weights_ref, weights, atol=1e-4)
assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4)
alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts))
weights = render_weight_from_alpha(
alphas, ray_indices=ray_indices, n_rays=3
)
weights.sum().backward()
sigmas_grad = sigmas.grad.clone()
sigmas.grad.zero_()
assert torch.allclose(weights_ref, weights, atol=1e-4)
assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4)
alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts))
weights = render_weight_from_alpha(
alphas, packed_info=packed_info, n_rays=3
)
weights.sum().backward()
sigmas_grad = sigmas.grad.clone()
sigmas.grad.zero_()
assert torch.allclose(weights_ref, weights, atol=1e-4)
assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4)
if __name__ == "__main__":
test_render_visibility()
test_render_weight_from_alpha()
test_render_weight_from_density()
test_accumulate_along_rays()
test_rendering()
test_grads()
================================================
FILE: third_parties/nerfacc-0.3.5/nerfacc-0.3.5/tests/test_resampling.py
================================================
import pytest
import torch
from nerfacc import pack_info, ray_marching, ray_resampling
device = "cuda:0"
batch_size = 128
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_resampling():
rays_o = torch.rand((batch_size, 3), device=device)
rays_d = torch.randn((batch_size, 3), device=device)
rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
ray_indices, t_starts, t_ends = ray_marching(
rays_o,
rays_d,
near_plane=0.1,
far_plane=1.0,
render_step_size=1e-3,
)
packed_info = pack_info(ray_indices, n_rays=batch_size)
weights = torch.rand((t_starts.shape[0],), device=device)
packed_info, t_starts, t_ends = ray_resampling(
packed_info, t_starts, t_ends, weights, n_samples=32
)
assert t_starts.shape == t_ends.shape == (batch_size * 32, 1)
if __name__ == "__main__":
test_resampling()
================================================
FILE: utilities/utils.py
================================================
import numpy as np
import cv2
from PIL import Image, ImageChops
import os
import time
import torch
from PIL import Image, ImageDraw, ImageFont
exp_time = str(time.strftime('%Y_%m_%d_%H_%M_%S', time.localtime(time.time())))
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")
def crop_a_set_of_images(*image_path):
from PIL import ImageChops, Image
imgs = []
bboxes = []
for im_path in image_path:
im = Image.open(im_path)
bg = Image.new(im.mode, im.size, im.getpixel((0, 0)))
diff = ImageChops.difference(im, bg)
diff = ImageChops.add(diff, diff, 2.0, -5)
bbox = diff.getbbox()
imgs.append(im)
bboxes.append(bbox)
bbox_aggre = np.asarray(bboxes)
bbox_min = np.min(bbox_aggre, 0)
bbox_max = np.max(bbox_aggre, 0)
bbox_common = (bbox_min[0], bbox_min[1], bbox_max[2], bbox_max[3])
for idx, img in enumerate(imgs):
img = img.crop(bbox_common)
img.save(image_path[idx])
pass
def crop_image_based_on_ref_image(ref_img_path, *img_path):
from PIL import ImageChops, Image
ref_im = Image.open(ref_img_path)
bg = Image.new(ref_im.mode, ref_im.size, ref_im.getpixel((0, 0)))
diff = ImageChops.difference(ref_im, bg)
diff = ImageChops.add(diff, diff, 2.0, -5)
bbox = diff.getbbox()
for idx, im_path in enumerate(img_path):
img = Image.open(im_path)
img = img.crop(bbox)
img.save(im_path)
def angular_error_map(N1, N2):
dot = np.sum(np.multiply(N1, N2), axis=-1)
dot = np.clip(dot, -1., 1.)
return np.rad2deg(np.arccos(dot))
def crop_mask(mask):
if mask.dtype is not np.uint8:
mask = mask.astype(np.uint8) * 255
im = Image.fromarray(mask)
bg = Image.new(im.mode, im.size, im.getpixel((0, 0)))
diff = ImageChops.difference(im, bg)
diff = ImageChops.add(diff, diff, 2.0, 0)
bbox = diff.getbbox()
return bbox
def crop_image_by_mask(img, mask):
bbox = crop_mask(mask)
try:
crop_img = img.copy()[bbox[1]:bbox[3], bbox[0]:bbox[2]]
except:
crop_img = img.copy()
return crop_img
def save_video(vpath, images, fps):
height, width, _ = images[0].shape
fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
video = cv2.VideoWriter(vpath, fourcc, fps, (width, height))
for image in images:
video.write(image)
cv2.destroyAllWindows()
video.release()
def toRGBA(img, mask):
img = cv2.cvtColor(img, cv2.COLOR_RGB2RGBA)
img[:, :, 3] = (mask.astype(bool)*255).astype(np.uint8)
return img