[
  {
    "path": ".gitignore",
    "content": "data/\r\nexp/\r\n\r\n# Byte-compiled / optimized / DLL files\r\n__pycache__/\r\n*.py[cod]\r\n*$py.class\r\n\r\n# C extensions\r\n*.so\r\n\r\n# Distribution / packaging\r\n.Python\r\nbuild/\r\ndevelop-eggs/\r\ndist/\r\ndownloads/\r\neggs/\r\n.eggs/\r\nlib/\r\nlib64/\r\nparts/\r\nsdist/\r\nvar/\r\nwheels/\r\nshare/python-wheels/\r\n*.egg-info/\r\n.installed.cfg\r\n*.egg\r\nMANIFEST\r\n\r\n# PyInstaller\r\n#  Usually these files are written by a python script from a template\r\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\r\n*.manifest\r\n*.spec\r\n\r\n# Installer logs\r\npip-log.txt\r\npip-delete-this-directory.txt\r\n\r\n# Unit test / coverage reports\r\nhtmlcov/\r\n.tox/\r\n.nox/\r\n.coverage\r\n.coverage.*\r\n.cache\r\nnosetests.xml\r\ncoverage.xml\r\n*.cover\r\n*.py,cover\r\n.hypothesis/\r\n.pytest_cache/\r\ncover/\r\n\r\n# Translations\r\n*.mo\r\n*.pot\r\n\r\n# Django stuff:\r\n*.log\r\nlocal_settings.py\r\ndb.sqlite3\r\ndb.sqlite3-journal\r\n\r\n# Flask stuff:\r\ninstance/\r\n.webassets-cache\r\n\r\n# Scrapy stuff:\r\n.scrapy\r\n\r\n# Sphinx documentation\r\ndocs/_build/\r\n\r\n# PyBuilder\r\n.pybuilder/\r\ntarget/\r\n\r\n# Jupyter Notebook\r\n.ipynb_checkpoints\r\n\r\n# IPython\r\nprofile_default/\r\nipython_config.py\r\n\r\n# pyenv\r\n#   For a library or package, you might want to ignore these files since the code is\r\n#   intended to run in multiple environments; otherwise, check them in:\r\n# .python-version\r\n\r\n# pipenv\r\n#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.\r\n#   However, in case of collaboration, if having platform-specific dependencies or dependencies\r\n#   having no cross-platform support, pipenv may install dependencies that don't work, or not\r\n#   install all needed dependencies.\r\n#Pipfile.lock\r\n\r\n# poetry\r\n#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.\r\n#   This is especially recommended for binary packages to ensure reproducibility, and is more\r\n#   commonly ignored for libraries.\r\n#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control\r\n#poetry.lock\r\n\r\n# pdm\r\n#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.\r\n#pdm.lock\r\n#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it\r\n#   in version control.\r\n#   https://pdm.fming.dev/#use-with-ide\r\n.pdm.toml\r\n\r\n# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm\r\n__pypackages__/\r\n\r\n# Celery stuff\r\ncelerybeat-schedule\r\ncelerybeat.pid\r\n\r\n# SageMath parsed files\r\n*.sage.py\r\n\r\n# Environments\r\n.env\r\n.venv\r\nenv/\r\nvenv/\r\nENV/\r\nenv.bak/\r\nvenv.bak/\r\n\r\n# Spyder project settings\r\n.spyderproject\r\n.spyproject\r\n\r\n# Rope project settings\r\n.ropeproject\r\n\r\n# mkdocs documentation\r\n/site\r\n\r\n# mypy\r\n.mypy_cache/\r\n.dmypy.json\r\ndmypy.json\r\n\r\n# Pyre type checker\r\n.pyre/\r\n\r\n# pytype static type analyzer\r\n.pytype/\r\n\r\n# Cython debug symbols\r\ncython_debug/\r\n\r\n# PyCharm\r\n#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can\r\n#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore\r\n#  and can be added to the global gitignore or merged into this file.  For a more nuclear\r\n#  option (not recommended) you can uncomment the following to ignore the entire idea folder.\r\n.idea/"
  },
  {
    "path": "LICENSE",
    "content": "MIT License\n\nCopyright (c) 2024 CyberAgent AI Lab\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
  },
  {
    "path": "README.md",
    "content": "<h2 align=\"center\">SuperNormal: Neural Surface Reconstruction via Multi-View Normal Integration</h2>\n<h4 align=\"center\">\n    <a href=\"https://xucao-42.github.io/homepage/\"><strong>Xu Cao</strong></a>\n    ·\n    <a href=\"https://taketomitakafumi.sakura.ne.jp/web/en/\"><strong>Takafumi Taketomi</strong></a>\n<br>\nCyberAgent </h4>\n<h4 align=\"center\"><a href=\"https://cvpr.thecvf.com/\">CVPR 2024 </a></h3>\n<p align=\"center\">\n  <br>\n    <a href=\"https://arxiv.org/abs/2312.04803\">\n      <img src='https://img.shields.io/badge/arXiv-Paper-981E32?style=for-the-badge&Color=B31B1B' alt='arXiv PDF'>\n    </a>\n\n[//]: # (    <a href='https://xucao-42.github.io/mvas_homepage/'>)\n\n[//]: # (      <img src='https://img.shields.io/badge/MVAS-Project Page-5468FF?style=for-the-badge' alt='Project Page'></a>)\n</p>\n\n\n### Update\n- **2024/09/30**: Real-world raw data and step-by-step data pre-processing instructions are available. See [here](./data_capture_and_preprocessing/README.md).\n\n<div align=\"center\">\n<img src=\"./media/teaser.png\" alt=\"Teaser\" width=\"100%\">\nFast and fine-grained 3D reconstruction from multi-view surface normal maps. \n</div>\n\n### Quick Start\nCode was tested on Ubuntu 18.04 (WSL2) using Python 3.8, PyTorch 2.1.0, and CUDA 11.8 on an Nvidia RTX4090Ti (24GB). \n\n**Before started, please ensure CUDA is installed in your environment ([11.8 can be found here](https://developer.nvidia.com/cuda-11-8-0-download-archive?target_os=Linux&target_arch=x86_64&Distribution=WSL-Ubuntu&target_version=2.0&target_type=deb_local)).**\nIt is required by [tiny-cuda-nn](https://github.com/NVlabs/tiny-cuda-nn).\n\n<details><summary> You should see something like the following after typing `nvcc --version` </summary>\n\n```commandline\nnvcc: NVIDIA (R) Cuda compiler driver\nCopyright (c) 2005-2022 NVIDIA Corporation\nBuilt on Wed_Sep_21_10:33:58_PDT_2022\nCuda compilation tools, release 11.8, V11.8.89\nBuild cuda_11.8.r11.8/compiler.31833905_0\n```\n</details>\n\nClone the repository and prepare the conda environment:\n```commandline\ngit clone https://github.com/CyberAgentAILab/SuperNormal.git\ncd SuperNormal\n. ./create_env.sh\n```\n\nDownload data (~1.8GB):\n```commandline\n./download_data.sh\n```\n\nRun on the DiLiGenT-MV benchmark objects or on our captured objects:\n```commandline\n./run_diligent.sh  # Training should take about 50 seconds per object\n```\nor \n```commandline\n./run_own_object.sh  # Training should take about 5 minutes per object\n```\nResults are saved under `./exp`.\n\nNOTE: If RuntimeError like below occurs, `apt install ninja-build` may resolve the error.\n```\nRuntimeError: Ninja is required to load C++ extensions\n```\n\n### Hyperparameter tuning tips\nTraining hyperparameters are defined in `./configs/*.conf`.\nSome important hyperparameters are:\n- `dataset.normal_dir`: You can choose normal maps estimated by different methods as input for DiLiGenT-MV benchmark objects.\n- `train.end_iter`: The number of iterations for training. Should be adjusted according to the number of views and normal map resolutions.\n- `train.increase_bindwidth_every`: A strategy used in [Neuralangelo](https://research.nvidia.com/labs/dir/neuralangelo/) to progressively activate finer hash grid during training. Less than `end_iter`/`model.encoding.n_levels` should be fine.\n- `train.batch_size`: Number of patches in each batch for training. Should be adjusted according to the GPU memory.\n- `train.patch_size`: Better to be fixed to 3, i.e., each patch is 3x3. Large patch size will cause inaccurate volume rendering results for boudary pixels in a patch.\n\n### Modifications to NerfAcc\nWe add several functions to the original [NerfAcc](https://www.nerfacc.com) to adapt it to patch-based volume rendering.\nThe key new functions (which are indicated by `patch_based` in function name) are in \n[third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/render_weight.cu/](./third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/render_weight.cu) \nand [third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/vol_rendering.py](./third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/vol_rendering.py).\n\n\n### Acknowledgement\nThis repo is built up on [NeuS](https://github.com/Totoro97/NeuS) and benefits from the amazing [tiny-cuda-nn](https://github.com/NVlabs/tiny-cuda-nn) and [NerfAcc](https://www.nerfacc.com).\nWe also learned a lot from [instant-nsr-pl](https://github.com/bennyguo/instant-nsr-pl).\n\n### Citation\nIf you find our work useful in your research, please consider citing:\n```bibtex\n@inproceedings{supernormal2024cao,\n  title={SuperNormal: {N}eural Surface Reconstruction via Multi-View Normal Integration},\n  author={Cao Xu and Taketomi Takafumi},\n  booktitle={CVPR},\n  year={2024}\n}\n```\n\n\n"
  },
  {
    "path": "__init__.py",
    "content": ""
  },
  {
    "path": "config/diligent.conf",
    "content": "general {\r\n    dataset_class = models.dataset_loader.Dataset\r\n    renderer_class = models.renderer.NeuSRenderer\r\n\r\n    base_exp_dir = ./exp/diligent_mv/CASE_NAME\r\n    recording = [\r\n        ./,\r\n        ./models\r\n    ]\r\n}\r\n\r\ndataset {\r\n    data_dir = data/diligent_mv_normals/CASE_NAME/\r\n    normal_dir = normal_world_space_sdmunips # choose normal maps estimated by different methods, should be in the world space\r\n    cameras_name = cameras_sphere.npz\r\n    exclude_views = [0, 4, 8, 12, 16]  # index of views to exclude for test purpose, 0-based\r\n    upsample_factor = 1\r\n}\r\n\r\ntrain {\r\n    learning_rate = 5e-4\r\n    learning_rate_alpha = 0.05\r\n    end_iter = 5000\r\n    increase_bindwidth_every = 350  # following neuralangelo's strategy\r\n\r\n    gradient_method = dfd  # dfd or fd or ad, for directional finite difference, finite difference, and auto-differentiation\r\n\r\n    batch_size = 2048\r\n    patch_size = 3  # i.e., each training step samples 2048 patches of 3x3 pixels\r\n\r\n    warm_up_end = 50\r\n    use_white_bkgd = False\r\n\r\n    loss_type = l2  # for normal loss\r\n    normal_weight = 1\r\n    eikonal_weight = 1\r\n    mask_weight = 1\r\n}\r\n\r\nval {\r\n    save_freq = 1000\r\n\r\n    val_normal_freq = 5001\r\n    val_normal_resolution_level = 1\r\n    gradient_method = dfd  # dfd or fd or ad, can be different from training\r\n\r\n    val_mesh_freq = 10000\r\n    val_mesh_res = 512\r\n\r\n    report_freq = 100\r\n    eval_metric_freq = 5000\r\n}\r\n\r\nmodel {\r\n    sdf_network {\r\n        d_out = 1\r\n        d_in = 3\r\n        d_hidden = 64\r\n        n_layers = 1\r\n        skip_in = [-1]  # -1 for no skip connection\r\n        bias = 0.6\r\n        geometric_init = True\r\n        weight_norm = True\r\n        input_concat = True  # concat input positions and encoded features\r\n    }\r\n\r\n    variance_network {\r\n        init_val = 0.5\r\n    }\r\n\r\n    ray_marching {\r\n        start_step_size = 1e-2\r\n        end_step_size = 1e-3\r\n        occ_threshold = 0.1\r\n        occ_sigmoid_k = 80.0\r\n        occ_resolution = 128\r\n        occ_update_freq = 8  # batches\r\n    }\r\n\r\n    encoding{\r\n        otype=HashGrid,\r\n\t\tn_levels=14\r\n\t\tn_features_per_level=2\r\n\t\tlog2_hashmap_size=19\r\n\t\tbase_resolution=32\r\n\t\tper_level_scale=1.3195079107728942\r\n   }\r\n}"
  },
  {
    "path": "config/own_objects.conf",
    "content": "general {\r\n    dataset_class = models.dataset_loader.Dataset\r\n    renderer_class = models.renderer.NeuSRenderer\r\n\r\n    base_exp_dir = ./exp/own_objects/CASE_NAME\r\n    recording = [\r\n        ./,\r\n        ./models\r\n    ]\r\n}\r\n\r\ndataset {\r\n    data_dir = data/own_objects_normals/CASE_NAME/\r\n    normal_dir = normal_world_space_sdmunips\r\n    cameras_name = cameras_sphere.npz\r\n    exclude_views = []  # index of views to exclude, 0-based\r\n    upsample_factor = 1\r\n}\r\n\r\ntrain {\r\n    learning_rate = 5e-4\r\n    learning_rate_alpha = 0.05\r\n    end_iter = 30000\r\n    increase_bindwidth_every = 2000  # following neuralangelo's strategy\r\n\r\n    gradient_method = dfd  # dfd or fd or ad, for directional finite difference, finite difference, and auto-differentiation\r\n\r\n    batch_size = 2048\r\n    patch_size = 3  # i.e., each training step samples 2048 patches of 3x3 pixels\r\n\r\n    warm_up_end = 500\r\n    use_white_bkgd = False\r\n\r\n    loss_type = l2  # for normal loss\r\n    normal_weight = 1\r\n    eikonal_weight = 1\r\n    mask_weight = 1\r\n}\r\n\r\nval {\r\n    save_freq = 10000\r\n\r\n    val_normal_freq = 30000\r\n    val_normal_resolution_level = 2\r\n    gradient_method = dfd  # dfd or fd or ad, can be different from training\r\n\r\n    val_mesh_freq = 30000\r\n    val_mesh_res = 1024\r\n\r\n    report_freq = 100\r\n    eval_metric_freq = 30000\r\n}\r\n\r\nmodel {\r\n    sdf_network {\r\n        d_out = 1\r\n        d_in = 3\r\n        d_hidden = 64\r\n        n_layers = 1\r\n        skip_in = [-1]\r\n        bias = 0.8\r\n        geometric_init = True\r\n        weight_norm = True\r\n        input_concat = True  # concat input positions and encoded features\r\n    }\r\n\r\n    variance_network {\r\n        init_val = 0.5\r\n    }\r\n\r\n    ray_marching\r\n    {\r\n        start_step_size = 1e-2\r\n        end_step_size = 1e-3\r\n        occ_threshold = 0.1\r\n        occ_sigmoid_k = 80.0\r\n        occ_resolution = 128\r\n        occ_update_freq = 8  # batches\r\n    }\r\n\r\n\r\n    encoding{\r\n        otype=HashGrid,\r\n\t\tn_levels=14\r\n\t\tn_features_per_level=2\r\n\t\tlog2_hashmap_size=19\r\n\t\tbase_resolution=32\r\n\t\tper_level_scale=1.3195079107728942\r\n    }\r\n}"
  },
  {
    "path": "create_env.sh",
    "content": "conda deactivate\nconda remove -y -n sn --all\nconda create -y -n sn python=3.8\nconda activate sn\n\npip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu118\n\n# install tiny-cuda-nn\nexport PATH=\"/usr/local/cuda/bin:$PATH\"\nexport LIBRARY_PATH=\"/usr/local/cuda/lib64/stubs:$LIBRARY_PATH\"\npip install git+https://github.com/NVlabs/tiny-cuda-nn/@2ec562e853e6f482b5d09168705205f46358fb39#subdirectory=bindings/torch\n\npip install -e ./third_parties/nerfacc-0.3.5/nerfacc-0.3.5/\npip install opencv-python==4.8.1.78 trimesh==3.23.5 open3d==0.17 pyvista==0.42.3 scipy==1.10.1 scikit-image==0.21.0 pyhocon==0.3.59 pyexr==0.3.10 tensorboard==2.14.0 icecream==2.1.3 PyMCubes==0.1.4 pyembree==0.2.11"
  },
  {
    "path": "data_capture_and_preprocessing/README.md",
    "content": "\nThis is a step-by-step guide to preprocess the raw images captured by an iPhone for the MVPS task.\nYou can download our raw images using the following command *(~6 GB per object)*.\n\n```\ngdown 'https://drive.google.com/file/d/1BcCuZR0C-snmCNf8iGhkFgkQ6arfcQ-L/view?usp=sharing' --fuzzy\nunzip flower_girl.zip\nrm flower_girl.zip\n\ngdown 'https://drive.google.com/file/d/12QzgRbOjBSx295BS4zihnOjcdYh7ZaP9/view?usp=sharing' --fuzzy\nunzip lion.zip\nrm lion.zip\n\ngdown 'https://drive.google.com/file/d/1cvKbI5VvDhsuA4a06rYqqoAtQd8GtyeI/view?usp=sharing'  --fuzzy\nunzip dog.zip\nrm dog.zip\n```\n\n## File structure\nYou should have the following file structure under each object's folder:\n```\n - RAW\n - mask\n - cameras.xml\n```\n\nThe `RAW` folder contains all the DNG images captured by an iPhone. \nThe `mask` folder contains the foreground masks for each view.\nThe `cameras.xml` contains the calibrated camera parameters using [Metashape](https://oakcorp.net/agisoft/download/). \n\n## Step-by-step data pre-processing\nFirst we convert the DNG images to PNG file format.\n```\n# pip install rawpy\npython iPhone_mvps_data_preprocessing.py --data_dir <path/to/obj_folder>\n```\nNow the file structure looks like this\n```\n    - RAW\n    - mvps_png_full\n    - sfm_png_full\n    - mask\n    - cameras.xml\n```\nThe `mvps_png_full` folder contains the pre-processed images for photometric stereo, and the `sfm_png_full` folder contains the images for camera calibration using Structure from Motion.\nIn each view, we first take an image in ambient light and then additionally illuminate the object with an active light source.\nSo the first image in each view is collected in `sfm_png_full`.\n\n### Mask preparation\nNow we prepare the foreground masks for each view.\nWe used SAM to interactively segment the foreground objects.\nPlease install SAM according to the [official instructions](https://github.com/facebookresearch/segment-anything).\nAfter installation, run the following command to segment the foreground objects for all views:\n\n```\npython sam_mvps.py --data_dir <path/to/obj_folder/mvps_png_full> --checkpoint <path/to/sam_vit_h_4b8939.pth>\n```\nThis will pop up a window where you can interactively segment the foreground objects.\nSelect points on the object to segment the foreground object, and press `Esc` to check the intermediate results.\nContinue to select points until you are satisfied with the segmentation results, and press `Enter` to save the mask.\nThe process will be repeated for all views.\n\nThe same mask will be saved in two places: `obj_folder/mask` and the corresponding folder containing the image from the same viewpoint. \nThe latter will be used for normal map estimation.\n\n### Camera calibration\nIn [MetaShape](https://oakcorp.net/agisoft/download/), import the images in the `sfm_png_full` folder and run the camera calibration process.\n```\n[Workflow] -> [Add Folder] -> select `sfm_png_full` -> select single cameras -> [Workflow] -> [Align Photos]\n```\n\nAfter camera calibration, export the camera parameters to `cameras.xml`.\n```\n[File] -> [Export] -> [Export Cameras]\n```\n\nThe resulting `cameras.xml` file is what we have put in the object folder.\n\n\n### Normal map estimation\nInstall [SDM-UniPS](https://github.com/satoshi-ikehata/SDM-UniPS-CVPR2023) and run the following command to generate the normal maps for each view:\n```\npython <path/to/sdm_unips/main.py> --session_name YOUR_SESSION_NAME --test_dir <path/to/obj_folder/mvps_png_full> --checkpoint <path/to/sdm_unips_checkpoint_dir> --scalable --target normal\n```\nTips: Prepare the mask for each view to improve the normal estimation results. This should be done when you have completed the previous mask segmentation step.\n\nThe original SDM-UniPS code outputs normal maps in the PNG format. You can instead get EXR format by replacing [this line](https://github.com/satoshi-ikehata/SDM-UniPS-CVPR2023/blob/96e68f353173c2ae85bfe609e4728a19a2f8c92e/sdm_unips/modules/builder/builder.py#L162) with the following one:\n```\npyexr.write(f'{testdata.data.data_workspace}/normal.exr', nout)\n```\nRemember to install the [pyexr](https://github.com/tvogels/pyexr) package and import it in the file.\nAfter normal estimation, we collect the normal maps in the same folder.\nSince SDM-UniPS estimates normal maps in camera space, we also convert them to the world space using the camera parameters from the previous step.\n\n```\npython gather_and_convert_normal_map.py --data_dir <path/to/obj_folder> --sdm_unips_result_dir <path/to/YOUR_SESSION_NAME/results>\n```\nThe file structure is now as follows:\n```\n    - RAW\n    - mvps_png_full\n    - sfm_png_full\n    - mask\n    - normal_camera_space_sdmunips\n    - normal_world_space_sdmunips\n    - cameras.xml\n    - results # if your SDM-UniPS output is in this folder\n```\n\n### Convert camera parameters to NeuS format\nThe last step is to convert the camera parameters to the NeuS format.\n```\npython metashape2neus.py --xml_path <path/to/obj_folder/cameras.xml>\n```\nThis will create a `cameras_sphere.npz` file in the same folder as `cameras.xml`.\nWe also provide the converter to NeuS2 format. Check `metashape2neus2_json_and_images.py` for more details.\n\n## Tips for capturing your own data\nWe used the iPhone's built-in camera app to take the images. Here are some tips for successful reconstruction:\n- Use a tripod to stabilize the camera.\n- Use a remote shutter release to avoid camera shake.\n- Keep the same focus point in each view. On iPhone, you can press and hold the screen to lock the focus point.\n- Use a white/black background to simplify the segmentation process.\n- Use a turntable to capture the object from different angles. \n- Place the object on a textured surface to help the Structure from Motion process.\n- Place the object in the center of the image.\n- We used a [video light](https://www.ulanzi.com/collections/lighting/products/mini-led-video-light-ulanzi-vl49-1672) to illuminate the object from different angles in each view. Other light sources like a ring light/flashlight may also work.\n- In each view, vary the light source's position sufficiently around the camera. We used 12 different light positions in our setup. \n- Reduce the exposure if the captured images are overexposed.\n\nThe above capture process can be done with off-the-shelf equipment, but it is tedious. \nIt would be more convenient if you could build a custom rig to automate the capture process, such as [this example](https://youtu.be/zyEw-1QUlkU?si=8RvYC23emoP8TXrU)."
  },
  {
    "path": "data_capture_and_preprocessing/gather_and_convert_normal_map.py",
    "content": "import os\nimport cv2\nimport pyexr\nfrom glob import glob\nimport numpy as np\nimport shutil\nfrom bs4 import BeautifulSoup  # $ pip install beautifulsoup4 lxml\nimport argparse\n\nparser = argparse.ArgumentParser()\nparser.add_argument(\"--sdm_unips_result_dir\", type=str, default=\"../../SDM-UniPS-CVPR2023/flower_girl/results\")\nparser.add_argument(\"--data_dir\", type=str, default=\"./flower_girl\")\nargs = parser.parse_args()\n\nxml_path = os.path.join(args.data_dir, \"cameras.xml\")\nobj_name = os.path.basename(args.data_dir)\nnum_views = len(glob(os.path.join(args.sdm_unips_result_dir, \"view_*.data\")))\n\nnormal_map_camera_dir = os.path.join(args.data_dir, \"normal_camera_space_sdmunips\")\nnormal_map_world_dir = os.path.join(args.data_dir, \"normal_world_space_sdmunips\")\n\n# create directories\nos.makedirs(normal_map_camera_dir, exist_ok=True)\nos.makedirs(normal_map_world_dir, exist_ok=True)\n\nwith open(xml_path, \"r\") as f:\n    xml_data = f.read()\nbs_data = BeautifulSoup(xml_data, \"xml\")\nb_unique = bs_data.find_all('camera')\n\nfor tag in b_unique:\n    img_name = tag.get(\"label\")\n    view_idx = int(img_name.split(\"_\")[-1])\n    # camera to world transform\n    C2W = np.array([float(i) for i in tag.find(\"transform\").text.split(\" \")]).reshape((4, 4))\n\n\nnormal_map_all = []\nnormal_map_path_all = []\nfor i in range(num_views):\n    view_dir = os.path.join(args.sdm_unips_result_dir, f\"view_{i:02d}.data\")\n    for tag in b_unique:\n        img_name = tag.get(\"label\")\n        view_idx = int(img_name.split(\"_\")[-1])\n        # camera to world transform\n        if view_idx == i:\n            C2W = np.array([float(i) for i in tag.find(\"transform\").text.split(\" \")]).reshape((4, 4))\n            R = C2W[:3, :3]\n            break\n    if os.path.exists(view_dir):\n        # copy normal map\n        normal_map_file = os.path.join(view_dir, \"normal.exr\")\n        new_normal_map_file = os.path.join(normal_map_camera_dir, f\"{i:02d}.exr\")\n        shutil.copy(normal_map_file, new_normal_map_file)\n\n        # convert normal map to world space\n        normal_map_camera = pyexr.read(new_normal_map_file)\n        normal_map_camera[..., [1, 2]] *= -1  # revert y and z axis to match opencv conversion, X right, Y down, Z front\n        H, W = normal_map_camera.shape[:2]\n        normal_world = (R @ normal_map_camera.reshape(-1, 3).T).T.reshape([H, W, 3])\n        pyexr.write(os.path.join(normal_map_world_dir, f\"{i:02d}.exr\"), normal_world)\n"
  },
  {
    "path": "data_capture_and_preprocessing/iPhone_mvps_data_preprocessing.py",
    "content": "import rawpy, os\nfrom glob import glob\nimport cv2\nimport numpy as np\nimport os\nfrom tqdm import tqdm\nimport argparse\n\nparser = argparse.ArgumentParser()\nparser.add_argument(\"--data_dir\", type=str, default=\"./flower_girl\")\nparser.add_argument(\"--num_img_per_view\", type=int, default=13)\narg = parser.parse_args()\n\ndng_list = glob(os.path.join(arg.data_dir, \"RAW\", \"*.DNG\"))\ndng_list.sort()\nnum_image_per_view = arg.num_img_per_view\nnum_view = len(dng_list) // num_image_per_view\n\nresize_factor = 1  # resize the png image to 1/2, 1/4, or 1\n\nif resize_factor == 1/2:\n    sfm_data_dir = os.path.join(arg.data_dir, \"sfm_png_half\")\n    mvps_data_dir = os.path.join(arg.data_dir, \"mvps_png_half\")\nelif resize_factor == 1/4:\n    sfm_data_dir = os.path.join(arg.data_dir, \"sfm_png_quarter\")\n    mvps_data_dir = os.path.join(arg.data_dir, \"mvps_png_quarter\")\nelif resize_factor == 1:\n    mvps_data_dir = os.path.join(arg.data_dir, \"mvps_png_full\")\n    sfm_data_dir = os.path.join(arg.data_dir, \"sfm_png_full\")\n\nos.makedirs(sfm_data_dir, exist_ok=True)\nos.makedirs(mvps_data_dir, exist_ok=True)\n\nfor view_idx in tqdm(range(num_view)):\n    view_dir = os.path.join(mvps_data_dir, f\"view_{view_idx:02d}.data\")\n    if os.path.exists(view_dir):\n        continue\n    os.makedirs(view_dir, exist_ok=True)\n    view_dng_list = dng_list[view_idx * num_image_per_view: (view_idx + 1) * num_image_per_view]\n\n    for dng_idx, dng_path in enumerate(view_dng_list):\n        with rawpy.imread(dng_path) as raw:\n            rgb = raw.postprocess(no_auto_bright=True, output_bps=16)[..., ::-1].astype(np.float32)\n            rgb = rgb.astype(np.uint16)\n        rgb_resized = cv2.resize(rgb, (0, 0), fx=resize_factor, fy=resize_factor)\n\n        # choose the first image in each view for SfM\n        if dng_idx == 0:\n            cv2.imwrite(os.path.join(sfm_data_dir, f\"{view_idx:02d}.png\"), rgb_resized)\n\n        cv2.imwrite(os.path.join(view_dir, f\"L{dng_idx:02d}.png\"), rgb_resized)\n\n\n"
  },
  {
    "path": "data_capture_and_preprocessing/metashape2neus.py",
    "content": "import os.path\nimport xml\nfrom bs4 import BeautifulSoup  # pip install beautifulsoup4 lxml\nimport numpy as np\n\n# details of camera normalization can be found in Sec. C.3 in https://openaccess.thecvf.com/content/CVPR2023/supplemental/Cao_Multi-View_Azimuth_Stereo_CVPR_2023_supplemental.pdf\ndef normalize_camera(R_list, t_list, camera2object_ratio=3):\n    A_camera_normalize = 0\n    b_camera_normalize = 0\n    camera_center_list = []\n    for view_idx in range(len(R_list)):\n        R = R_list[view_idx]\n        t = t_list[view_idx]\n        camera_center = - R.T @ t  # in world coordinate\n        camera_center_list.append(camera_center)\n        vi = R[2][:, None]  # the camera's principal axis in the world coordinates\n        Vi = vi @ vi.T\n        A_camera_normalize += np.eye(3) - Vi\n        b_camera_normalize += camera_center.T @ (np.eye(3) - Vi)\n    offset = np.linalg.lstsq(A_camera_normalize, np.squeeze(b_camera_normalize), rcond=None)[0]\n    camera_center_dist_list = [np.sqrt(np.sum((np.squeeze(c) - offset) ** 2))\n                               for c in camera_center_list]\n    scale = np.max(camera_center_dist_list) / camera2object_ratio\n    return offset, scale\n\ndef make4x4(P):\n    assert P.shape[-1] == 4 or P.shape[-1] == 3\n    assert len(P.shape) == 2\n    assert P.shape[0] == 3 or P.shape[0] == 4\n    ret = np.eye(4)\n    ret[:P.shape[0], :P.shape[1]] = P\n    return ret\n\nclass MetashapePoseLoader:\n    def __init__(self, xml_path, camera2object_ratio):\n        with open(xml_path, \"r\") as f:\n            xml_data = f.read()\n        bs_data = BeautifulSoup(xml_data, \"xml\")\n        c_unique = bs_data.find_all('resolution')\n        img_width = int(c_unique[0].get(\"width\"))\n        img_height = int(c_unique[0].get(\"height\"))\n        c_intrinsics = bs_data.find_all('calibration')\n        f = float(c_intrinsics[0].find(\"f\").text)\n        cx_offset = float(c_intrinsics[0].find(\"cx\").text)\n        cy_offset = float(c_intrinsics[0].find(\"cy\").text)\n        K = np.array([[f, 0, (img_width-1)/2 + cx_offset],\n                        [0, f, (img_height-1)/2 + cy_offset],\n                        [0, 0, 1]])\n\n        b_unique = bs_data.find_all('camera')\n        R_list = []\n        t_list = []\n        C2W_list = []\n        camera_sphere = dict()\n        for tag in b_unique:\n            img_name = tag.get(\"label\")\n            view_idx = int(img_name.split(\"_\")[-1])\n            # camera to world transform\n            C2W = np.array([float(i) for i in tag.find(\"transform\").text.split(\" \")]).reshape((4, 4))\n            C2W_list.append(C2W)\n\n            assert int(img_name) == view_idx\n\n            W2C = np.linalg.inv(C2W)\n            R_list.append(W2C[:3, :3])\n            t_list.append(W2C[:3, 3])\n\n            camera_sphere[f\"world_mat_{view_idx}\"] = make4x4(K) @ W2C\n\n        offset, scale = normalize_camera(R_list, t_list, camera2object_ratio=camera2object_ratio)\n        print(\"offset\", offset, \"scale\", scale)\n        num_views = len(C2W_list)\n\n        scale_mat = np.eye(4)\n        scale_mat[:3, :3] *= scale\n        scale_mat[:3, 3] = offset\n        for im_idx in range(num_views):\n            camera_sphere[f\"scale_mat_{im_idx}\"] = scale_mat\n\n        data_dir = os.path.dirname(xml_path)\n        np.savez(os.path.join(data_dir, 'cameras_sphere.npz'), **camera_sphere)\n\n\nif __name__==\"__main__\":\n    import argparse\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"--xml_path\", type=str, required=True)\n    parser.add_argument(\"--ratio\", type=float, default=10)\n    args = parser.parse_args()\n\n    MetashapePoseLoader(args.xml_path, camera2object_ratio=args.ratio)"
  },
  {
    "path": "data_capture_and_preprocessing/metashape2neus2_json_and_images.py",
    "content": "from glob import glob\nimport os\nimport numpy as np\nimport cv2\nfrom bs4 import BeautifulSoup\nfrom metashape2neus import normalize_camera, make4x4\nimport json\nimport argparse\n\ndef create_json_file(data, filename):\n    with open(filename, 'w') as f:\n        json.dump(data, f, indent=4)\n\nparser = argparse.ArgumentParser()\nparser.add_argument('--data_dir', type=str, default=\"./flower_girl\")\narg = parser.parse_args()\n\ndata_dir = os.path.join(arg.data_dir, \"sfm_png_full\")\nmask_dir = os.path.join(arg.data_dir, \"mask\")\nxml_path = os.path.join(arg.data_dir, \"cameras.xml\")\nobj_name = os.path.basename(arg.data_dir)\n\ntarget_dir = os.path.join(arg.data_dir, \"neus2_input\", \"images\")\nos.makedirs(target_dir, exist_ok=True)\n\n# load images and masks and save them as rgba images\nimg_list = glob(os.path.join(data_dir, \"*.png\"))\nimg_list.sort()\nnum_view = len(img_list)\nprint(num_view)\nimg_h, img_w = cv2.imread(img_list[0]).shape[:2]\n\nfor i in range(num_view):\n    img_path = img_list[i]\n    mask_path = os.path.join(mask_dir, f\"{i:02d}.png\")\n    img = cv2.imread(img_path)\n    mask = cv2.imread(mask_path)\n    img = cv2.cvtColor(img, cv2.COLOR_BGR2BGRA)\n    img[..., 3] = mask[..., 0]\n    new_img_path = os.path.join(target_dir, f\"{i:02d}.png\")\n    cv2.imwrite(new_img_path, img)\n    print(f\"Saved {new_img_path}\")\n\ndata = {\n    \"from_na\": True,\n    \"w\": img_w,\n    \"h\": img_h,\n    \"aabb_scale\": 1.0,\n    \"frames\": [],\n    \"scale\": 1,\n    \"offset\": [1, 1, 1],\n}\n\nwith open(xml_path, \"r\") as f:\n    xml_data = f.read()\nbs_data = BeautifulSoup(xml_data, \"xml\")\nc_unique = bs_data.find_all('resolution')\nimg_width = int(c_unique[0].get(\"width\"))\nimg_height = int(c_unique[0].get(\"height\"))\nc_intrinsics = bs_data.find_all('calibration')\nf = float(c_intrinsics[0].find(\"f\").text)\ncx_offset = float(c_intrinsics[0].find(\"cx\").text)\ncy_offset = float(c_intrinsics[0].find(\"cy\").text)\nK = np.array([[f, 0, (img_width - 1) / 2 + cx_offset],\n              [0, f, (img_height - 1) / 2 + cy_offset],\n              [0, 0, 1]])\n\nb_unique = bs_data.find_all('camera')\nR_list = []\nt_list = []\nC2W_list = []\ncamera_sphere = dict()\nfor tag in b_unique:\n    img_name = tag.get(\"label\")\n    view_idx = int(img_name.split(\"_\")[-1])\n    # camera to world transform\n    C2W = np.array([float(i) for i in tag.find(\"transform\").text.split(\" \")]).reshape((4, 4))\n    C2W_list.append(C2W)\n\n    print(img_name, view_idx)\n    W2C = np.linalg.inv(C2W)\n    R_list.append(W2C[:3, :3])\n    t_list.append(W2C[:3, 3])\n\n    camera_sphere[f\"world_mat_{view_idx}\"] = make4x4(K) @ W2C\n    print(img_name)\n    data[\"frames\"].append({\n        \"file_path\": f\"images/{img_name}.png\",\n        \"transform_matrix\": C2W.tolist(),\n        \"intrinsic_matrix\": make4x4(K).tolist()\n    })\n\noffset, scale = normalize_camera(R_list, t_list, camera2object_ratio=10)\ndata[\"scale\"] = scale\ndata[\"offset\"] = list((-offset*scale + 0.5))\n\n\ncreate_json_file(data, os.path.join(arg.data_dir, \"neus2_input\", 'transform.json'))"
  },
  {
    "path": "data_capture_and_preprocessing/sam_mvps.py",
    "content": "import os.path\nfrom glob import glob\nimport argparse\nimport torch.cuda\nfrom segment_anything import SamPredictor, sam_model_registry\n\nparser = argparse.ArgumentParser()\nparser.add_argument(\"--checkpoint\", type=str, default=None)\nparser.add_argument(\"--data_dir\", type=str, default=\"./\")\nargs = parser.parse_args()\n\nsam = sam_model_registry[\"vit_h\"](checkpoint=args.checkpoint)\nsam.to(device=\"cuda\")\npredictor = SamPredictor(sam)\n\nimport cv2\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport time\nfrom IPython.display import display, clear_output\n\nobj_dir = os.listdir(args.data_dir)\nobj_dir = [os.path.join(args.data_dir, obj) for obj in obj_dir if \".data\" in obj]\nmask_dir = os.path.join(os.path.dirname(os.path.dirname(args.data_dir)), \"mask\")\nos.makedirs(mask_dir, exist_ok=True)\n\ndef pick_point(event, x, y, flags, param):\n    if event == cv2.EVENT_LBUTTONDOWN:\n        print(f'You selected point ({x}, {y})')\n        points.append(np.array([[x, y]]))\n\ndef show_mask(mask, ax, random_color=False):\n    if random_color:\n        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)\n    else:\n        color = np.array([30 / 255, 144 / 255, 255 / 255, 0.6])\n    h, w = mask.shape[-2:]\n    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)\n    ax.imshow(mask_image)\n\n\ndef show_points(coords, labels, ax, marker_size=375):\n    pos_points = coords[labels == 1]\n    neg_points = coords[labels == 0]\n    ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white',\n               linewidth=1.25)\n    ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white',\n               linewidth=1.25)\n\n\ndef show_box(box, ax):\n    x0, y0 = box[0], box[1]\n    w, h = box[2] - box[0], box[3] - box[1]\n    ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0, 0, 0, 0), lw=2))\n\n\n\nfor obj_dir_path in obj_dir:\n    mask_path = os.path.join(obj_dir_path, \"mask.png\")\n    if os.path.exists(mask_path):\n        continue\n    # randomly pick an image from the object directory\n    img_list = glob(os.path.join(obj_dir_path, \"*.png\")) + glob(os.path.join(obj_dir_path, \"*.jpg\"))\n    img_test_path = img_list[0]\n    img_test = cv2.imread(img_test_path)\n\n    predictor.set_image(img_test)\n    torch.cuda.synchronize()\n\n    points = []\n\n    while True:\n        # Create a window\n        cv2.namedWindow('image', cv2.WINDOW_NORMAL)\n\n        # Bind the callback function to the window\n        cv2.setMouseCallback('image', pick_point)\n\n        while(1):\n            cv2.imshow('image', img_test)\n            if cv2.waitKey(20) & 0xFF == 27:  # Break the loop when 'ESC' is pressed\n                break\n\n        cv2.destroyAllWindows()\n        print(f'Selected points: {points}')\n\n        input_point = np.concatenate(points, axis=0).reshape(-1, 2)\n        input_label = np.ones(input_point.shape[0], dtype=np.int64)\n        print(f'Input point: {input_point}')\n\n        masks, scores, logits = predictor.predict(\n            point_coords=input_point,\n            point_labels=input_label,\n            multimask_output=False,\n        )\n\n        for i, (mask, score) in enumerate(zip(masks, scores)):\n            plt.figure(figsize=(10,10))\n            plt.imshow(img_test[:, :, ::-1])\n            show_mask(mask, plt.gca())\n            show_points(input_point, input_label, plt.gca())\n            plt.title(f\"Mask {i+1}, Score: {score:.3f}\", fontsize=18)\n            plt.axis('off')\n            plt.show(block=False)\n            plt.pause(3)\n            plt.close()\n\n        value = input(\"Press enter to save the mask, or c to continue selecting points: \")\n        if value == \"c\":\n            continue\n        elif value == \"\":\n            break\n\n    # save the mask\n    base_dir = os.path.dirname(img_test_path)\n    view_idx = int(base_dir.split(\"/\")[-1].split(\".\")[0].split(\"_\")[-1])\n    mask_path1 = os.path.join(base_dir, \"mask.png\")\n    mask_path2 = os.path.join(mask_dir, f\"{view_idx:02d}.png\")\n    cv2.imwrite(mask_path1, mask.astype(np.uint8) * 255)\n    cv2.imwrite(mask_path2, mask.astype(np.uint8) * 255)\n    print(f\"Mask saved at {mask_path1} and {mask_path2}\")\n\n"
  },
  {
    "path": "download_data.sh",
    "content": "pip install gdown==5.1.0\ngdown 'https://drive.google.com/file/d/1Y3-v5jo-IRyTsPh8srZxIc2v5WZdPly_/view?usp=sharing' --fuzzy\nunzip data.zip\nrm data.zip"
  },
  {
    "path": "exp_runner.py",
    "content": "import os\r\nimport logging\r\nimport argparse\r\nimport numpy as np\r\nimport cv2 as cv\r\nimport trimesh\r\nimport torch\r\nimport torch.nn.functional as F\r\nfrom torch.utils.tensorboard import SummaryWriter\r\nfrom shutil import copyfile\r\nfrom tqdm.auto import tqdm\r\nfrom pyhocon import ConfigFactory\r\nfrom models.fields import SDFNetwork, SingleVarianceNetwork\r\n\r\nimport pyexr\r\nimport time\r\nfrom utilities.utils import crop_image_by_mask, toRGBA\r\n\r\nimport open3d as o3d\r\nimport pyvista as pv\r\npv.set_plot_theme(\"document\")\r\npv.global_theme.transparent_background = True\r\nfrom models.cd_and_fscore import chamfer_distance_and_f1_score\r\nimport csv\r\nfrom collections import OrderedDict\r\n\r\n\r\ndef get_class(kls):\r\n    parts = kls.split('.')\r\n    module = \".\".join(parts[:-1])\r\n    m = __import__(module)\r\n    for comp in parts[1:]:\r\n        m = getattr(m, comp)\r\n    return m\r\n\r\nclass Runner:\r\n    def __init__(self, conf_text, mode='train', is_continue=False, datadir=None):\r\n        self.device = torch.device('cuda')\r\n        self.conf_text = conf_text\r\n\r\n        if not is_continue:\r\n            exp_time = str(time.strftime('%Y_%m_%d_%H_%M_%S', time.localtime(time.time())))\r\n            exp_time_dir = f\"exp_{exp_time}\"\r\n\r\n        self.conf = ConfigFactory.parse_string(conf_text)\r\n        self.base_exp_dir = os.path.join(self.conf['general.base_exp_dir'], exp_time_dir)\r\n        os.makedirs(self.base_exp_dir, exist_ok=True)\r\n        self.dataset = get_class(self.conf['general.dataset_class'])(self.conf['dataset'])\r\n        self.iter_step = 0\r\n\r\n        # Training parameters\r\n        self.end_iter = self.conf.get_int('train.end_iter')\r\n        self.batch_size = self.conf.get_int('train.batch_size')\r\n        self.patch_size = self.conf.get_int('train.patch_size', default=3)\r\n\r\n        self.learning_rate = self.conf.get_float('train.learning_rate')\r\n        self.learning_rate_alpha = self.conf.get_float('train.learning_rate_alpha')\r\n        self.use_white_bkgd = self.conf.get_bool('train.use_white_bkgd')\r\n        self.warm_up_end = self.conf.get_float('train.warm_up_end', default=0.0)\r\n\r\n        self.loss_type = self.conf.get('train.loss_type', 'l1')\r\n        self.normal_weight = self.conf.get_float('train.normal_weight')\r\n        self.eikonal_weight = self.conf.get_float('train.eikonal_weight')\r\n        self.mask_weight = self.conf.get_float('train.mask_weight')\r\n\r\n        self.increase_bindwidth_every = self.conf.get_int('train.increase_bindwidth_every', default=350)\r\n\r\n        # validation parameters\r\n        self.val_normal_freq = self.conf.get_int('val.val_normal_freq')\r\n        self.val_normal_resolution_level = self.conf.get_int('val.val_normal_resolution_level')\r\n        self.val_gradient_method = self.conf.get('val.gradient_method', 'dfd')\r\n\r\n        self.val_mesh_freq = self.conf.get_int('val.val_mesh_freq')\r\n        self.val_mesh_res = self.conf.get_int('val.val_mesh_res')\r\n\r\n        self.eval_metric_freq = self.conf.get_int('val.eval_metric_freq')\r\n        self.report_freq = self.conf.get_int('val.report_freq')\r\n        self.save_freq = self.conf.get_int('val.save_freq')\r\n\r\n        # Ray marching parameters\r\n        self.start_step_size = self.conf.get_float('model.ray_marching.start_step_size', default=1e-2)\r\n        self.end_step_size = self.conf.get_float('model.ray_marching.end_step_size', default=5e-4)\r\n        self.slop_step = (np.log10(self.start_step_size) - np.log10(self.end_step_size)) / self.end_iter\r\n\r\n        # Networks\r\n        params_to_train = []\r\n        self.sdf_network = SDFNetwork(**self.conf['model.sdf_network'], encoding_config=self.conf['model.encoding']).to(self.device)\r\n        self.deviation_network = SingleVarianceNetwork(**self.conf['model.variance_network']).to(self.device)\r\n\r\n        params_to_train += list(self.sdf_network.parameters())\r\n        params_to_train += list(self.deviation_network.parameters())\r\n\r\n        self.renderer = get_class(self.conf['general.renderer_class'])(self.sdf_network,\r\n                                                                       self.deviation_network,\r\n                                                                       self.conf[\"train\"][\"gradient_method\"])\r\n\r\n        self.optimizer = torch.optim.Adam(params_to_train, lr=self.learning_rate)\r\n\r\n        self.is_continue = is_continue\r\n        self.mode = mode\r\n\r\n        # Load checkpoint\r\n        latest_model_name = None\r\n        if is_continue:\r\n            model_list_raw = os.listdir(os.path.join(self.base_exp_dir, 'checkpoints'))\r\n            model_list = []\r\n            for model_name in model_list_raw:\r\n                if model_name[-3:] == 'pth' and int(model_name[5:-4]) <= self.end_iter:\r\n                    model_list.append(model_name)\r\n            model_list.sort()\r\n            latest_model_name = model_list[-1]\r\n\r\n        if latest_model_name is not None:\r\n            logging.info('Find checkpoint: {}'.format(latest_model_name))\r\n            self.load_checkpoint(latest_model_name)\r\n\r\n        # Backup codes and configs for debug\r\n        if self.mode[:5] == 'train':\r\n            self.file_backup()\r\n\r\n    def train(self):\r\n        print(\"Start training...\")\r\n        self.writer = SummaryWriter(log_dir=os.path.join(self.base_exp_dir, 'logs'))\r\n        self.writer.add_graph(self.sdf_network, verbose=False, input_to_model=torch.randn(1, 3))\r\n        self.update_learning_rate()\r\n\r\n        # create a csv file to save the evaluation metrics\r\n        csv_file_name = f\"eval_metrics.csv\"\r\n        csv_file_path = os.path.join(self.base_exp_dir, csv_file_name)\r\n        if not os.path.exists(csv_file_path):\r\n            with open(csv_file_path, 'w') as f:\r\n                writer = csv.writer(f)\r\n                if len(self.dataset.exclude_view_list)>0:\r\n                    writer.writerow(['iter',\r\n                                     'mae_all_view',\r\n                                     'mae_test_view',\r\n                                     'CD',\r\n                                     'fscore'])\r\n                else:\r\n                    writer.writerow(['iter',\r\n                                     'mae_all_view',\r\n                                     'CD',\r\n                                     'fscore'])\r\n\r\n        res_step = self.end_iter - self.iter_step\r\n        pbar = tqdm(range(res_step))\r\n        for iter_i in pbar:\r\n            # update ray marching step size\r\n            self.renderer.sampling_step_size = 10 ** (np.log10(self.start_step_size) - self.slop_step*iter_i)\r\n\r\n            # update occupancy grid\r\n            self.renderer.occupancy_grid.every_n_step(step=iter_i,\r\n                                                      occ_eval_fn=self.renderer.occ_eval_fn,\r\n                                                      occ_thre=self.conf[\"model.ray_marching\"][\"occ_threshold\"],\r\n                                                      n=self.conf[\"model.ray_marching\"][\"occ_update_freq\"])\r\n\r\n            # following neuralangelo, gradually increase ingp bandwidth\r\n            if self.iter_step % self.increase_bindwidth_every == 0:\r\n                self.renderer.sdf_network.increase_bandwidth()\r\n\r\n            # sample patches of pixels for training\r\n            rays_o_patch_all, rays_d_patch_all, marching_plane_normal, V_inverse_patch_all, true_normal, mask = \\\r\n                self.dataset.gen_random_patches(self.batch_size, patch_H=self.patch_size, patch_W=self.patch_size)\r\n\r\n            rays_o_patch_center = rays_o_patch_all[:, self.patch_size // 2, self.patch_size // 2]  # (num_patch, 3)\r\n            rays_d_patch_center = rays_d_patch_all[:, self.patch_size // 2, self.patch_size// 2]  # (num_patch, 3)\r\n            near, far = self.dataset.near_far_from_sphere(rays_o_patch_center, rays_d_patch_center)\r\n\r\n            if self.mask_weight > 0.0:\r\n                mask = (mask > 0.5).float()\r\n            else:\r\n                mask = torch.ones_like(mask)\r\n\r\n            mask_sum = mask.sum() + 1e-5\r\n\r\n            # forward rendering\r\n            render_out = self.renderer.render(rays_o_patch_all,\r\n                                              rays_d_patch_all,\r\n                                              marching_plane_normal,\r\n                                              near, far, V_inverse_patch_all)\r\n\r\n            if render_out['gradients'] is None:  # all rays are in the zero region of the occupancy grid\r\n                self.update_learning_rate()\r\n                continue\r\n\r\n            comp_normal = render_out['comp_normal']  # rendered normal at pixels\r\n            gradients = render_out['gradients']  # gradients at all sampled 3D points\r\n            comp_mask = render_out['weight_sum']  # rendered occupancy at pixels\r\n            samples_per_ray = render_out['samples_per_ray']\r\n\r\n            normal_error = (comp_normal - true_normal) * mask\r\n            if self.loss_type == 'l1':\r\n                normal_loss = F.l1_loss(normal_error, torch.zeros_like(normal_error), reduction='sum') / mask_sum\r\n            elif self.loss_type == 'l2':\r\n                normal_loss = F.mse_loss(normal_error, torch.zeros_like(normal_error), reduction='sum') / mask_sum\r\n\r\n            gradients_norm = torch.linalg.norm(gradients, ord=2, dim=-1)\r\n            eikonal_loss = F.mse_loss(gradients_norm, torch.ones_like(gradients_norm), reduction='mean')\r\n            mask_loss = F.binary_cross_entropy(comp_mask.clip(1e-5, 1.0 - 1e-5), mask)\r\n\r\n            loss = self.normal_weight * normal_loss + \\\r\n                   self.mask_weight * mask_loss + \\\r\n                   self.eikonal_weight * eikonal_loss\r\n\r\n            self.optimizer.zero_grad()\r\n            loss.backward()\r\n            self.optimizer.step()\r\n\r\n            self.iter_step += 1\r\n            self.update_learning_rate()\r\n\r\n            if self.iter_step % self.report_freq == 0:\r\n                message_postfix = OrderedDict(loss=f\"{loss:.3e}\",\r\n                                              s=f\"{self.deviation_network.variance.item():.3e}\",\r\n                                              rm_step=f\"{self.renderer.sampling_step_size.item():.3e}\",\r\n                                              samples_per_ray=f\"{samples_per_ray:.1f}\")\r\n                pbar.set_postfix(ordered_dict=message_postfix)\r\n\r\n            if self.iter_step % self.save_freq == 0:\r\n                self.save_checkpoint()\r\n\r\n            if self.iter_step % self.val_mesh_freq == 0:\r\n                self.validate_mesh(resolution=self.val_mesh_res)\r\n\r\n            if self.iter_step % self.val_normal_freq == 0:\r\n                for val_idx in range(self.dataset.n_images):\r\n                    self.validate_normal_patch_based(idx=val_idx, resolution_level=self.val_normal_resolution_level,\r\n                                                     gradient_method=self.val_gradient_method)\r\n\r\n            if self.iter_step % self.eval_metric_freq == 0:\r\n                # no gt mesh, skip the evaluation\r\n                if self.dataset.mesh_gt is None:\r\n                    continue\r\n\r\n                # remove invisible faces in the gt mesh\r\n                if self.dataset.mesh_gt is not None and self.dataset.points_gt is None:\r\n                    self.dataset.mesh_gt.vertices = o3d.utility.Vector3dVector(\r\n                        (np.asarray(self.dataset.mesh_gt.vertices) -\r\n                         self.dataset.scale_mats_np[0][:3, 3][None]) /\r\n                        self.dataset.scale_mats_np[0][0, 0])\r\n                    mesh = trimesh.Trimesh(np.asarray(self.dataset.mesh_gt.vertices),\r\n                                           np.asarray(self.dataset.mesh_gt.triangles), process=False)\r\n                    self.dataset.points_gt = self.find_visible_points(mesh) * self.dataset.scale_mats_np[0][0, 0] + \\\r\n                                             self.dataset.scale_mats_np[0][:3, 3][None]\r\n\r\n                cd, fscore = self.eval_geo(resolution=512)\r\n                print(f'iter: {self.iter_step} cd: {cd:.3e}, fscore: {fscore:.3e}')\r\n                if len(self.dataset.exclude_view_list)>0:\r\n                    mae_allview, mae_test_view = self.eval_mae(gradient_method=self.val_gradient_method)\r\n\r\n                    print('MAE (all views) {0}: {1:.5f}'.format(self.val_gradient_method, mae_allview))\r\n                    print('MAE (test views) {0}: {1:.5f}'.format(self.val_gradient_method, mae_test_view))\r\n\r\n                    with open(csv_file_path, 'a') as f:\r\n                        writer = csv.writer(f)\r\n                        writer.writerow([self.iter_step,\r\n                                         mae_allview,\r\n                                         mae_test_view,\r\n                                         cd, fscore])\r\n\r\n                else:\r\n                    mae_allview = self.eval_mae(gradient_method=\"dfd\")\r\n                    # write to csv file\r\n                    with open(csv_file_path, 'a') as f:\r\n                        writer = csv.writer(f)\r\n                        writer.writerow([self.iter_step,\r\n                                         mae_allview,\r\n                                         cd, fscore])\r\n\r\n    def update_learning_rate(self):\r\n        if self.iter_step < self.warm_up_end:\r\n            learning_factor = self.iter_step / self.warm_up_end\r\n        else:\r\n            alpha = self.learning_rate_alpha\r\n            progress = (self.iter_step - self.warm_up_end) / (self.end_iter - self.warm_up_end)\r\n            learning_factor = (np.cos(np.pi * progress) + 1.0) * 0.5 * (1 - alpha) + alpha\r\n\r\n        for g in self.optimizer.param_groups:\r\n            g['lr'] = self.learning_rate * learning_factor\r\n\r\n    def file_backup(self):\r\n        dir_lis = self.conf['general.recording']\r\n        os.makedirs(os.path.join(self.base_exp_dir, 'recording'), exist_ok=True)\r\n        for dir_name in dir_lis:\r\n            cur_dir = os.path.join(self.base_exp_dir, 'recording', dir_name)\r\n            os.makedirs(cur_dir, exist_ok=True)\r\n            files = os.listdir(dir_name)\r\n            for f_name in files:\r\n                if f_name[-3:] == '.py':\r\n                    copyfile(os.path.join(dir_name, f_name), os.path.join(cur_dir, f_name))\r\n        try:\r\n            copyfile(self.conf_path, os.path.join(self.base_exp_dir, 'recording', 'config.conf'))\r\n        except:\r\n            # save conf_text into a txt file\r\n            with open(os.path.join(self.base_exp_dir, 'recording', 'config.conf'), 'w') as f:\r\n                f.write(self.conf_text)\r\n\r\n    def load_checkpoint(self, checkpoint_name):\r\n        checkpoint = torch.load(os.path.join(self.base_exp_dir, 'checkpoints', checkpoint_name), map_location=self.device)\r\n        self.sdf_network.load_state_dict(checkpoint['sdf_network_fine'])\r\n        self.deviation_network.load_state_dict(checkpoint['variance_network_fine'])\r\n        self.optimizer.load_state_dict(checkpoint['optimizer'])\r\n        self.iter_step = checkpoint['iter_step']\r\n        logging.info('End')\r\n\r\n    def save_checkpoint(self):\r\n        checkpoint = {\r\n            'sdf_network_fine': self.sdf_network.state_dict(),\r\n            'variance_network_fine': self.deviation_network.state_dict(),\r\n            'optimizer': self.optimizer.state_dict(),\r\n            'iter_step': self.iter_step,\r\n        }\r\n\r\n        os.makedirs(os.path.join(self.base_exp_dir, 'checkpoints'), exist_ok=True)\r\n        torch.save(checkpoint, os.path.join(self.base_exp_dir, 'checkpoints', 'ckpt_{:0>6d}.pth'.format(self.iter_step)))\r\n\r\n    def validate_normal_pixel_based(self, idx=-1, resolution_level=-1):\r\n        if idx < 0:\r\n            idx = np.random.randint(self.dataset.n_images)\r\n\r\n        print('Validate: iter: {}, camera: {}'.format(self.iter_step, idx))\r\n\r\n        if resolution_level < 0:\r\n            resolution_level = self.validate_resolution_level\r\n        rays_o, rays_d = self.dataset.gen_rays_at(idx, resolution_level=resolution_level, within_mask=False)\r\n        H, W, _ = rays_o.shape\r\n        rays_o = rays_o.reshape(-1, 3).split(8192)\r\n        rays_d = rays_d.reshape(-1, 3).split(8192)\r\n\r\n        out_normal_fine = []\r\n        out_depth_fine = []\r\n\r\n        mask_np = self.dataset.masks_np[idx].astype(bool)[..., 0]\r\n        mask_np = cv.resize(mask_np.astype(np.uint8),\r\n                            ((int(W), int(H))),\r\n                            interpolation=cv.INTER_NEAREST).astype(bool)\r\n\r\n        for rays_o_batch, rays_d_batch in tqdm(zip(rays_o, rays_d)):\r\n            near, far = self.dataset.near_far_from_sphere(rays_o_batch, rays_d_batch)\r\n            # background_rgb = torch.ones([1, 3]) if self.use_white_bkgd else None\r\n\r\n            batch_normal, batch_depth = self.renderer.render_normal_pixel_based(rays_o_batch,\r\n                                              rays_d_batch,\r\n                                              near,\r\n                                              far)\r\n\r\n            out_normal_fine.append(batch_normal.detach().cpu().numpy())\r\n            out_depth_fine.append(batch_depth.detach().cpu().numpy())\r\n\r\n        if len(out_normal_fine) > 0:\r\n            normal_img = np.concatenate(out_normal_fine, axis=0)\r\n            rot = np.linalg.inv(self.dataset.pose_all[idx, :3, :3].detach().cpu().numpy())  # W2C rotation\r\n            # normal_img_world = (normal_img.reshape([H, W, 3]) * 128 + 128).clip(0, 255)\r\n            normal_img = np.matmul(rot[None, :, :], normal_img[:, :, None]).reshape([H, W, 3, -1])\r\n            normal_img[:,:, [1, 2]] *= -1\r\n            normal_img_norm = np.linalg.norm(np.squeeze(normal_img), axis=2, keepdims=True)\r\n            normal_img_normalized = np.squeeze(normal_img) / (normal_img_norm+1e-7)\r\n\r\n            # normal_img = ((np.squeeze(normal_img)/normal_img_norm) * 128 + 128).clip(0, 255)\r\n            normal_img = (np.squeeze(normal_img) * 128 + 128).clip(0, 255)\r\n            normal_img_normalized = (np.squeeze(normal_img_normalized) * 128 + 128).clip(0, 255)\r\n\r\n\r\n            depth_img = np.concatenate(out_depth_fine, axis=0).reshape([H, W])\r\n\r\n        os.makedirs(os.path.join(self.base_exp_dir, 'normals'), exist_ok=True)\r\n        os.makedirs(os.path.join(self.base_exp_dir, \"depth\"), exist_ok=True)\r\n\r\n        normal_img_norm[~mask_np] = np.nan\r\n        depth_img[~mask_np] = np.nan\r\n\r\n        normal_img_norm = np.squeeze(normal_img_norm.clip(0.8, 1.2))\r\n        normal_img_norm = (normal_img_norm - np.nanmin(normal_img_norm)) / (np.nanmax(normal_img_norm) - np.nanmin(normal_img_norm))\r\n        normal_img_norm = np.nan_to_num(normal_img_norm)\r\n        normal_img_norm = (normal_img_norm * 255).astype(np.uint8)\r\n        normal_img_norm = cv.applyColorMap(normal_img_norm, cv.COLORMAP_JET)\r\n        normal_img_norm[~mask_np] = 0\r\n        cv.imwrite(os.path.join(self.base_exp_dir,\r\n                                        'normals',\r\n                                        '{:0>8d}_{}_{}_norm.png'.format(self.iter_step, 0, idx)),\r\n                           normal_img_norm[..., ::-1])\r\n\r\n        cv.imwrite(os.path.join(self.base_exp_dir,\r\n                                        'normals',\r\n                                        '{:0>8d}_{}_{}.png'.format(self.iter_step, 0, idx)),\r\n                           normal_img[..., ::-1])\r\n        cv.imwrite(os.path.join(self.base_exp_dir,\r\n                                        'normals',\r\n                                        '{:0>8d}_{}_{}_normalized.png'.format(self.iter_step, 0, idx)),\r\n                            normal_img_normalized[..., ::-1])\r\n        np.save(os.path.join(self.base_exp_dir,\r\n                                'depth',\r\n                                '{:0>8d}_{}_{}.npy'.format(self.iter_step, 0, idx)),\r\n                    depth_img)\r\n        return idx, (normal_img - 128) / 128.\r\n\r\n    def validate_normal_patch_based(self, idx=-1, resolution_level=-1, gradient_method=\"dfd\"):\r\n        if idx < 0:\r\n            idx = np.random.randint(self.dataset.n_images)\r\n\r\n        print('Rendering normal maps...  iter: {}, camera: {}'.format(self.iter_step, idx))\r\n\r\n        if resolution_level < 0:\r\n            resolution_level = self.validate_resolution_level\r\n        rays_o_patch_center, \\\r\n            rays_d_patch_center, \\\r\n            rays_o_patches_all, \\\r\n            rays_v_patches_all, \\\r\n            rays_ez, \\\r\n            rays_A_inverse, horizontal_num_patch, vertical_num_patch = self.dataset.gen_patches_at(idx, resolution_level=resolution_level,\r\n                                                                                                   patch_H=self.patch_size,\r\n                                                                                                   patch_W=self.patch_size)\r\n        mask_np = self.dataset.masks_np[idx].astype(bool)  # (H, W)\r\n\r\n        img_w = horizontal_num_patch * self.patch_size\r\n        img_h = vertical_num_patch * self.patch_size\r\n        # resize mask to the size of the image\r\n        mask_np = cv.resize(mask_np.astype(np.uint8),\r\n                            ((int(img_w), int(img_h))),\r\n                            interpolation=cv.INTER_NEAREST).astype(bool)\r\n\r\n        num_patches = rays_o_patches_all.shape[0]\r\n        eval_patch_size = 1024\r\n        comp_normal_map = np.zeros([img_h, img_w, 3])\r\n        comp_normal_list = []\r\n\r\n        for patch_idx in range(0, num_patches, eval_patch_size):\r\n            rays_o_patch_center_batch = rays_o_patch_center[patch_idx:patch_idx+eval_patch_size]\r\n            rays_d_patch_center_batch = rays_d_patch_center[patch_idx:patch_idx+eval_patch_size]\r\n            rays_o_patches_all_batch = rays_o_patches_all[patch_idx:patch_idx+eval_patch_size]\r\n            rays_v_patches_all_batch = rays_v_patches_all[patch_idx:patch_idx+eval_patch_size]\r\n            rays_ez_batch = rays_ez[patch_idx:patch_idx+eval_patch_size]\r\n            rays_A_inverse_batch = rays_A_inverse[patch_idx:patch_idx+eval_patch_size]\r\n\r\n            near, far = self.dataset.near_far_from_sphere(rays_o_patch_center_batch,\r\n                                                          rays_d_patch_center_batch)\r\n            render_out = self.renderer.render(rays_o_patches_all_batch,\r\n                                                    rays_v_patches_all_batch,\r\n                                                    rays_ez_batch,\r\n                                                    near, far,\r\n                                                    rays_A_inverse_batch, gradient_method, mode='eval')\r\n\r\n            comp_normal = render_out['comp_normal']\r\n            comp_normal = comp_normal.detach().cpu().numpy()\r\n            comp_normal_list.append(comp_normal)\r\n\r\n        comp_normal_list = np.concatenate(comp_normal_list, axis=0)\r\n\r\n        count = 0\r\n        for i in range(0, img_h, self.patch_size):\r\n            for j in range(0, img_w, self.patch_size):\r\n                comp_normal_map[i:i+self.patch_size, j:j+self.patch_size] = comp_normal_list[count]\r\n                count += 1\r\n        normal_img_world = comp_normal_map\r\n\r\n        rot = np.linalg.inv(self.dataset.pose_all[idx, :3, :3].detach().cpu().numpy())  # W2C rotation\r\n\r\n        normal_img = np.matmul(rot, normal_img_world[..., None]).squeeze()\r\n        normal_img[..., [1, 2]] *= -1\r\n        normal_img_png = (np.squeeze(normal_img) * 128 + 128).clip(0, 255)\r\n        normal_img_norm = np.linalg.norm(np.squeeze(normal_img), axis=2, keepdims=True)\r\n        normal_dir = os.path.join(self.base_exp_dir, f'normals_validation_{gradient_method}', 'iter_{:0>6d}'.format(self.iter_step))\r\n        os.makedirs(normal_dir, exist_ok=True)\r\n\r\n        normal_img_normalized = np.squeeze(normal_img) / (normal_img_norm + 1e-7)\r\n        normal_img_normalized = (np.squeeze(normal_img_normalized) * 128 + 128).clip(0, 255)\r\n\r\n        normal_eval = np.zeros((img_h, img_w, 3))\r\n        normal_eval[:normal_img_png.shape[0], :normal_img_png.shape[1]] = normal_img_png\r\n\r\n        normal_eval_normalized = np.zeros((img_h, img_w, 3))\r\n        normal_eval_normalized[:normal_img_normalized.shape[0], :normal_img_normalized.shape[1]] = normal_img_normalized\r\n\r\n        normal_img_normalized = crop_image_by_mask(toRGBA(normal_eval_normalized.astype(np.uint8)[...,::-1], mask_np), mask_np)\r\n\r\n        cv.imwrite(os.path.join(normal_dir, '{:0>8d}_{}_{}_rendered.png'.format(self.iter_step, 0, idx)),\r\n                           normal_eval[..., ::-1])\r\n\r\n        cv.imwrite(os.path.join(normal_dir, '{:0>8d}_{}_{}_normalized.png'.format(self.iter_step, 0, idx)),\r\n                            normal_img_normalized)\r\n        return normal_img_world, normal_dir\r\n\r\n    def validate_mesh(self, world_space=True, resolution=256, threshold=0.0):\r\n        print('Extracting mesh...  iter: {}'.format(self.iter_step))\r\n        bound_min = torch.tensor(self.dataset.object_bbox_min, dtype=torch.float32)\r\n        bound_max = torch.tensor(self.dataset.object_bbox_max, dtype=torch.float32)\r\n\r\n        vertices, triangles =\\\r\n            self.renderer.extract_geometry(bound_min, bound_max, resolution=resolution, threshold=threshold)\r\n\r\n        mesh = trimesh.Trimesh(vertices, triangles)\r\n        vertices, triangles = mesh.vertices, mesh.faces\r\n\r\n        save_dir = os.path.join(self.base_exp_dir, 'meshes_validation')\r\n        os.makedirs(save_dir, exist_ok=True)\r\n\r\n        if world_space:\r\n            vertices = vertices * self.dataset.scale_mats_np[0][0, 0] + self.dataset.scale_mats_np[0][:3, 3][None]\r\n\r\n        self.writer.add_mesh('mesh_eval', vertices=vertices[None,...], faces=triangles[None,...], global_step=self.iter_step)\r\n\r\n        mesh = self.remove_isolated_clusters(trimesh.Trimesh(vertices, triangles))\r\n        mesh_path = os.path.join(save_dir, 'iter_{:0>8d}.ply'.format(self.iter_step))\r\n        o3d.io.write_triangle_mesh((mesh_path), mesh)\r\n\r\n        print(f'Mesh saved at {mesh_path}')\r\n\r\n    def remove_isolated_clusters(self, mesh):\r\n        # cleaning the marching cube extracted mesh\r\n        import copy\r\n        mesh = mesh.as_open3d\r\n        # with o3d.utility.VerbosityContextManager(\r\n        #         o3d.utility.VerbosityLevel.Debug) as cm:\r\n        triangle_clusters, cluster_n_triangles, cluster_area = (\r\n            mesh.cluster_connected_triangles())\r\n        triangle_clusters = np.asarray(triangle_clusters)\r\n        cluster_n_triangles = np.asarray(cluster_n_triangles)\r\n\r\n        mesh_eval = copy.deepcopy(mesh)\r\n        largest_cluster_idx = cluster_n_triangles.argmax()\r\n        triangles_to_remove = triangle_clusters != largest_cluster_idx\r\n        mesh_eval.remove_triangles_by_mask(triangles_to_remove)\r\n        mesh_eval.remove_unreferenced_vertices()\r\n        return mesh_eval\r\n\r\n    @torch.no_grad()\r\n    def eval_mae(self, gradient_method):\r\n        print(\"Computing mean angular errors...\")\r\n        normal_gt_dir = os.path.join(self.dataset.data_dir, \"normal_world_space_GT\")\r\n\r\n        ae_map_list = []\r\n        normal_map_eval_list = []\r\n        ae_map_eval_list = []\r\n        ae_map_test_list = []\r\n        for idx in range(self.dataset.n_images):\r\n            normal_gt = pyexr.read(os.path.join(normal_gt_dir, \"{:02d}.exr\".format(idx)))[..., :3]\r\n\r\n            mask_np = self.dataset.masks_np[idx].astype(bool)\r\n\r\n            normal_map_world, save_dir = self.validate_normal_patch_based(idx, resolution_level=self.val_normal_resolution_level, gradient_method=gradient_method)\r\n\r\n            normal_map_world = normal_map_world / (1e-10 + np.linalg.norm(normal_map_world, axis=-1, keepdims=True))\r\n\r\n            normal_eval = np.zeros((self.dataset.H, self.dataset.W, 3))\r\n            normal_eval[:normal_map_world.shape[0], :normal_map_world.shape[1]] = normal_map_world\r\n            normal_eval[~mask_np] = np.nan\r\n            normal_map_eval_list.append(normal_eval)\r\n            # self.writer.add_image(step=self.iter_step, data=(normal_eval + 1) / 2, name=(\"normal_eval_{:02d}\".format(idx)))\r\n            # pyexr.write(os.path.join(normal_save_dir, \"{:02d}.exr\".format(idx)), normal_img)\r\n\r\n            angular_error_map = np.rad2deg(np.arccos(np.clip(np.sum(normal_gt * normal_eval, axis=-1), -1, 1)))\r\n            # save angular error map\r\n\r\n            ae_map_list.append(angular_error_map.copy())\r\n            if idx in self.dataset.exclude_view_list:\r\n                ae_map_test_list.append(angular_error_map.copy())\r\n\r\n            # apply jet to angular error map\r\n            angular_error_map[~mask_np] = 0\r\n            angular_error_map_jet = cv.applyColorMap((angular_error_map / 20 * 255).clip(0, 255).astype(np.uint8),\r\n                                                     cv.COLORMAP_JET)\r\n            angular_error_map_jet[~mask_np] = 255\r\n            angular_error_map_jet = crop_image_by_mask(toRGBA(angular_error_map_jet, mask_np), mask_np)\r\n            cv.imwrite(os.path.join(save_dir, '{:0>8d}_{}_{}_ae_up_{}.png'.format(self.iter_step, 0, idx, 20)), angular_error_map_jet)\r\n\r\n\r\n            ae_map_eval_list.append(angular_error_map_jet)\r\n\r\n        mae = np.nanmean(np.stack(ae_map_list, axis=0))\r\n        self.writer.add_scalar('Statistics/mae_allview', mae, self.iter_step)\r\n\r\n        if len(ae_map_test_list) > 0:\r\n            mae_test = np.nanmean(np.stack(ae_map_test_list, axis=0))\r\n            self.writer.add_scalar('Statistics/mae_testview', mae_test, self.iter_step)\r\n            return mae, mae_test\r\n\r\n        return mae\r\n\r\n    @torch.no_grad()\r\n    def eval_geo(self, resolution=1024):\r\n        # save the mesh\r\n        save_dir = os.path.join(self.base_exp_dir, 'points_val')\r\n        os.makedirs(save_dir, exist_ok=True)\r\n\r\n        # save gt points\r\n        pcd_gt = o3d.geometry.PointCloud()\r\n        pcd_gt.points = o3d.utility.Vector3dVector(self.dataset.points_gt)\r\n        if not os.path.exists(os.path.join(save_dir, f\"pcd_gt.ply\")):\r\n            o3d.io.write_point_cloud(os.path.join(save_dir, f\"pcd_gt.ply\"), pcd_gt)\r\n\r\n        # marching cubes\r\n        bound_min = torch.tensor(self.dataset.object_bbox_min, dtype=torch.float32)\r\n        bound_max = torch.tensor(self.dataset.object_bbox_max, dtype=torch.float32)\r\n\r\n        vertices, triangles = \\\r\n            self.renderer.extract_geometry(bound_min, bound_max, resolution=resolution, threshold=0)\r\n\r\n        # vertices = vertices * self.dataset.scale_mats_np[0][0, 0] + self.dataset.scale_mats_np[0][:3, 3][None]\r\n        mesh = trimesh.Trimesh(np.asarray(vertices), np.asarray(triangles), process=False)\r\n        vertices_world = vertices * self.dataset.scale_mats_np[0][0, 0] + self.dataset.scale_mats_np[0][:3, 3][None]\r\n        mesh_world = trimesh.Trimesh(np.asarray(vertices_world), np.asarray(triangles), process=False)\r\n        mesh_world_path = os.path.join(save_dir, f\"{self.iter_step}_world.obj\")\r\n        mesh_world.export(mesh_world_path)\r\n\r\n        points_eval = self.find_visible_points(mesh)*self.dataset.scale_mats_np[0][0, 0] + self.dataset.scale_mats_np[0][:3, 3][None]\r\n\r\n        # save the sampled points\r\n        sampled_points_path = os.path.join(save_dir, f\"{self.iter_step}_points_eval.ply\")\r\n        pcd_eval = o3d.geometry.PointCloud()\r\n        pcd_eval.points = o3d.utility.Vector3dVector(points_eval)\r\n        o3d.io.write_point_cloud(sampled_points_path, pcd_eval)\r\n\r\n        cd, fscore = chamfer_distance_and_f1_score(points_eval, self.dataset.points_gt)\r\n        self.writer.add_scalar('Statistics/cd', cd, self.iter_step)\r\n        self.writer.add_scalar('Statistics/fscore', fscore, self.iter_step)\r\n        return cd, fscore\r\n\r\n    def find_visible_points(self, mesh):\r\n        num_view = self.dataset.n_images\r\n        points_list = []\r\n        for view_idx in range(num_view):\r\n            rays_o, rays_v = self.dataset.gen_rays_at(view_idx, resolution_level=1, within_mask=True)\r\n            rays_o, rays_v = rays_o.cpu().detach().numpy(), rays_v.cpu().detach().numpy()\r\n            rays_v = rays_v / np.linalg.norm(rays_v, axis=-1, keepdims=True)\r\n            locations, index_ray, index_tri = mesh.ray.intersects_location(\r\n                ray_origins=rays_o,\r\n                ray_directions=rays_v,\r\n                multiple_hits=False)\r\n            points_list.append(locations)\r\n        return np.concatenate(points_list, axis=0)\r\n\r\n\r\nif __name__ == '__main__':\r\n    import warnings\r\n    warnings.filterwarnings(\"ignore\")\r\n\r\n    torch.set_default_tensor_type('torch.cuda.FloatTensor')\r\n\r\n    parser = argparse.ArgumentParser()\r\n    parser.add_argument('--conf', type=str, default='./confs/base.conf')\r\n    parser.add_argument('--mode', type=str, default='eval_normal')\r\n    parser.add_argument('--mcube_threshold', type=float, default=0.0)\r\n    parser.add_argument('--is_continue', default=False, action=\"store_true\")\r\n    parser.add_argument('--gpu', type=int, default=0)\r\n    parser.add_argument('--obj_name', type=str, default='')\r\n\r\n    args = parser.parse_args()\r\n    torch.cuda.set_device(args.gpu)\r\n\r\n    print(f'Running on the object: {args.obj_name}')\r\n\r\n    f = open(args.conf)\r\n    conf_text = f.read()\r\n    conf_text = conf_text.replace('CASE_NAME', args.obj_name)\r\n\r\n    runner = Runner(conf_text, args.mode, args.is_continue)\r\n    runner.train()\r\n\r\n"
  },
  {
    "path": "models/cd_and_fscore.py",
    "content": "from scipy.spatial import KDTree\r\nimport numpy as np\r\n\r\n\r\ndef chamfer_distance_and_f1_score(ref_points, eval_points, f_threshold=0.5):\r\n    \"\"\"\r\n    This function calculates the chamfer distance and f1 score between two sets of points.\r\n\r\n    Parameters:\r\n    ref_points (numpy.ndarray): Reference points. A (p, 3) array representing points in the world space.\r\n    eval_points (numpy.ndarray): Points to be evaluated. A (p, 3) array representing points in the world space.\r\n    f_threshold (float, optional): Threshold for f1 score calculation. Default is 0.5mm.\r\n\r\n    Returns:\r\n    chamfer_dist (float): The chamfer distance between gt_points and eval_points.\r\n    f_score (float): The f1 score between gt_points and eval_points.\r\n    \"\"\"\r\n    print(\"computing chamfer distance and f1 score...\")\r\n    distance_eval2gt, _ = KDTree(ref_points).query(eval_points, k=1, p=2)   # p=2 for Euclidean distance\r\n    distance_gt2eval, _ = KDTree(eval_points).query(ref_points, k=1, p=2)\r\n\r\n    # following Uncertainty-aware deep multi-view photometric stereo\r\n    chamfer_dist = (np.mean(distance_eval2gt) + np.mean(distance_gt2eval))/2\r\n\r\n    precision = np.mean(distance_eval2gt < f_threshold)\r\n    recall = np.mean(distance_gt2eval < f_threshold)\r\n    f_score = 2 * precision * recall / (precision + recall)\r\n\r\n    return chamfer_dist, f_score\r\n"
  },
  {
    "path": "models/dataset_loader.py",
    "content": "import torch\r\nimport torch.nn.functional as F\r\nimport cv2 as cv\r\nimport numpy as np\r\nimport os\r\nfrom glob import glob\r\nfrom icecream import ic\r\nimport pyexr\r\nimport open3d as o3d\r\nimport time\r\nfrom concurrent.futures import ThreadPoolExecutor\r\n\r\n\r\ndef load_K_Rt_from_P(filename, P=None):\r\n    # This function is borrowed from IDR: https://github.com/lioryariv/idr\r\n    if P is None:\r\n        lines = open(filename).read().splitlines()\r\n        if len(lines) == 4:\r\n            lines = lines[1:]\r\n        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(\" \") for x in lines)]\r\n        P = np.asarray(lines).astype(np.float32).squeeze()\r\n\r\n    K, R, t, *_ = cv.decomposeProjectionMatrix(P)\r\n    # CAUTION: R is the W2C rotation matrix but t is the camera position in world coordinate.\r\n    K = K / K[2, 2]\r\n\r\n    intrinsics = np.eye(4)\r\n    intrinsics[:3, :3] = K\r\n\r\n    C2W = np.eye(4, dtype=np.float32)\r\n    C2W[:3, :3] = R.T\r\n    C2W[:3, 3] = (t[:3] / t[3])[:, 0]\r\n\r\n    return intrinsics, C2W\r\n\r\n\r\nclass Dataset:\r\n    def __init__(self, conf):\r\n        super(Dataset, self).__init__()\r\n        print('Load data: Begin')\r\n        self.device = torch.device('cuda')\r\n        self.conf = conf\r\n        normal_dir = conf.get_string('normal_dir')\r\n\r\n        self.data_dir = conf.get_string('data_dir')\r\n        self.cameras_name = conf.get_string('cameras_name')\r\n        self.exclude_view_list = conf['exclude_views']  # list of views to exclude from training. Used in novel-view normal synthesis evaluation.\r\n        self.upsample_factor = conf.get_int('upsample_factor', default=1)\r\n        ic(self.exclude_view_list)\r\n\r\n        # load the GT mesh for evaluation if any\r\n        mesh_path = os.path.join(self.data_dir, 'mesh_Gt.ply')\r\n        if os.path.exists(mesh_path):\r\n            self.mesh_gt = o3d.io.read_triangle_mesh(mesh_path)\r\n        else:\r\n            self.mesh_gt = None\r\n        self.points_gt = None  # will be computed from the mesh at evaluation time\r\n\r\n        camera_dict = np.load(os.path.join(self.data_dir, self.cameras_name))\r\n        self.camera_dict = camera_dict\r\n        self.normal_lis = sorted(glob(os.path.join(self.data_dir, normal_dir, '*.exr')))\r\n        self.n_images = len(self.normal_lis)\r\n        self.train_images = set(range(self.n_images)) - set(self.exclude_view_list)\r\n        self.img_idx_list = [int(os.path.basename(x).split('.')[0]) for x in self.normal_lis]\r\n\r\n        print(\"loading normal maps...\")\r\n        with ThreadPoolExecutor(max_workers=min(64, os.cpu_count()*5)) as executor:\r\n            def read_normal(im_name):\r\n                return pyexr.read(im_name)[..., :3]\r\n            self.normal_np = np.stack(list(executor.map(read_normal, self.normal_lis)))\r\n\r\n        if self.upsample_factor > 1:\r\n            # resize normal maps\r\n            self.normal_np = F.interpolate(torch.from_numpy(self.normal_np).permute(0, 3, 1, 2), scale_factor=self.upsample_factor, mode='bilinear', align_corners=False).permute(0, 2, 3, 1).numpy()\r\n        self.normals = torch.from_numpy(self.normal_np.astype(np.float32)).to(self.device)  # [n_images, H, W, 3]\r\n        print(\"loading normal maps done.\")\r\n\r\n        self.masks_lis = sorted(glob(os.path.join(self.data_dir, 'mask/*.png')))\r\n        with ThreadPoolExecutor(max_workers=min(64, os.cpu_count()*5)) as executor:\r\n            def read_mask(im_name):\r\n                return cv.imread(im_name)\r\n            self.masks_np = np.stack(list(executor.map(read_mask, self.masks_lis))) / 255.0\r\n\r\n        if self.upsample_factor > 1:\r\n            # resize mask\r\n            self.masks_np = F.interpolate(torch.from_numpy(self.masks_np).permute(0, 3, 1, 2), scale_factor=self.upsample_factor, mode='nearest').permute(0, 2, 3, 1).numpy()\r\n        self.masks_np = self.masks_np[..., 0]\r\n        self.total_pixel = np.sum(self.masks_np)\r\n\r\n        # set background of normal map to 0\r\n        self.normal_np[self.masks_np == 0] = 0\r\n\r\n        # world_mat is a projection matrix from world to image\r\n        self.world_mats_np = [camera_dict['world_mat_%d' % idx].astype(np.float32) for idx in self.img_idx_list]\r\n        self.scale_mats_np = []\r\n\r\n        # scale_mat: used for coordinate normalization, we assume the scene to render is inside a unit sphere at origin.\r\n        self.scale_mats_np = [camera_dict['scale_mat_%d' % idx].astype(np.float32) for idx in self.img_idx_list]\r\n\r\n        self.intrinsics_all = []\r\n        self.pose_all = []\r\n        self.V_inverse_all = []\r\n\r\n        self.H, self.W = self.normal_np.shape[1], self.normal_np.shape[2]\r\n        for scale_mat, world_mat, normal_map, mask in zip(self.scale_mats_np, self.world_mats_np, self.normals, self.masks_np):\r\n            P = world_mat @ scale_mat\r\n            P = P[:3, :4]\r\n            intrinsics, C2W = load_K_Rt_from_P(None, P)\r\n            if self.upsample_factor > 1:\r\n                # resize intrinsics\r\n                intrinsics[0, 0] *= self.upsample_factor\r\n                intrinsics[1, 1] *= self.upsample_factor\r\n                intrinsics[0, 2] *= self.upsample_factor\r\n                intrinsics[1, 2] *= self.upsample_factor\r\n            self.intrinsics_all.append(torch.from_numpy(intrinsics).float())\r\n            self.pose_all.append(torch.from_numpy(C2W).float())\r\n\r\n            intrinsics_inverse = torch.inverse(torch.from_numpy(intrinsics).float())\r\n            pose = torch.from_numpy(C2W).float()\r\n            # compute the V_inverse\r\n            tx = torch.linspace(0, self.W - 1, int(self.W))\r\n            ty = torch.linspace(0, self.H - 1, int(self.H))\r\n            pixels_x, pixels_y = torch.meshgrid(tx, ty)\r\n            p = torch.stack([pixels_x, pixels_y, torch.ones_like(pixels_y)], dim=-1).to(intrinsics_inverse.device)  # W, H, 3\r\n            p = torch.matmul(intrinsics_inverse[None, None, :3, :3],\r\n                             p[:, :, :, None]).squeeze()  # W, H, 3\r\n            rays_v = p / torch.linalg.norm(p, ord=2, dim=-1, keepdim=True)  # W, H, 3\r\n            rays_v = torch.matmul(pose[None, None, :3, :3],\r\n                                  rays_v[:, :, :, None]).squeeze()  # W, H, 3\r\n            rays_v = rays_v.transpose(0, 1).to(self.device) # H, W, 3\r\n\r\n            # the axis direction of the camera coordinate system in the world coordinate system\r\n            rays_right = pose[None, :3, 0].expand(rays_v.shape).to(self.device)  # H, W, 3\r\n            rays_down = pose[None, :3, 1].expand(rays_v.shape).to(self.device)  # H, W, 3\r\n\r\n            V_concat = torch.cat([rays_v[..., None, :],\r\n                                  rays_right[..., None, :],\r\n                                  rays_down[..., None, :]], dim=-2)  # (H, W, 3, 3)\r\n\r\n            # computing the inverse may take a while if the resolution is high\r\n            # For 512x612, it takes about 0.8ms\r\n            V_inverse = torch.inverse(V_concat)  # (H, W, 3, 3)\r\n            self.V_inverse_all.append(V_inverse)\r\n\r\n        self.masks = torch.from_numpy(self.masks_np.astype(np.float32)).to(self.device) # [n_images, H, W, 3]\r\n        self.intrinsics_all = torch.stack(self.intrinsics_all).to(self.device)   # [n_images, 4, 4]\r\n        self.intrinsics_all_inv = torch.inverse(self.intrinsics_all)  # [n_images, 4, 4]\r\n        self.focal_length = self.intrinsics_all[0][0, 0]\r\n        self.pose_all = torch.stack(self.pose_all).to(self.device)  # [n_images, 4, 4]\r\n        self.image_pixels = self.H * self.W\r\n        self.V_inverse_all = torch.stack(self.V_inverse_all).to(self.device)  # [n_images, H, W, 3, 3]\r\n\r\n        # for mesh extraction\r\n        self.object_bbox_min = np.array([-1., -1., -1.])\r\n        self.object_bbox_max = np.array([1.,  1.,  1.])\r\n        print('Load data: End')\r\n\r\n    def gen_rays_at(self, img_idx, resolution_level=1, within_mask=False):\r\n        \"\"\"\r\n        Generate all rays at world space from one camera.\r\n        \"\"\"\r\n        mask_np = self.masks_np[img_idx].astype(bool)\r\n        # resize the mask using resolution_level\r\n        mask_np = cv.resize(mask_np.astype(np.uint8)*255, (int(self.W // resolution_level), int(self.H // resolution_level)), interpolation=cv.INTER_NEAREST).astype(bool)\r\n\r\n        l = resolution_level\r\n        tx = torch.linspace(0, self.W - 1, int(self.W // l))\r\n        ty = torch.linspace(0, self.H - 1, int(self.H // l))\r\n        pixels_x, pixels_y = torch.meshgrid(tx, ty)\r\n        p = torch.stack([pixels_x, pixels_y, torch.ones_like(pixels_y)], dim=-1) # W, H, 3\r\n        p = torch.matmul(self.intrinsics_all_inv[img_idx, None, None, :3, :3], p[:, :, :, None]).squeeze()  # W, H, 3\r\n        rays_v = p / torch.linalg.norm(p, ord=2, dim=-1, keepdim=True)  # W, H, 3\r\n        rays_v = torch.matmul(self.pose_all[img_idx, None, None, :3, :3], rays_v[:, :, :, None]).squeeze()  # W, H, 3\r\n        rays_o = self.pose_all[img_idx, None, None, :3, 3].expand(rays_v.shape)  # W, H, 3\r\n        rays_o = rays_o.transpose(0, 1)\r\n        rays_v = rays_v.transpose(0, 1)\r\n\r\n        if within_mask:\r\n            return rays_o[mask_np], rays_v[mask_np]\r\n        else:\r\n            return rays_o, rays_v\r\n\r\n    def gen_patches_at(self, img_idx, resolution_level=1, patch_H=3, patch_W=3):\r\n        tx = torch.linspace(0, self.W - 1, int(self.W // resolution_level))\r\n        ty = torch.linspace(0, self.H - 1, int(self.H // resolution_level))\r\n        pixels_y, pixels_x = torch.meshgrid(ty, tx)\r\n\r\n        p = torch.stack([pixels_x, pixels_y, torch.ones_like(pixels_y)], dim=-1) # H, W, 3\r\n        p = torch.matmul(self.intrinsics_all_inv[img_idx, :3, :3], p[..., None]).squeeze()  # H, W, 3\r\n        rays_v = p / torch.linalg.norm(p, ord=2, dim=-1, keepdim=True)  # W, H, 3\r\n        rays_v = torch.matmul(self.pose_all[img_idx, :3, :3], rays_v[:, :, :, None]).squeeze()  # H, W, 3\r\n\r\n        # split rays_v into non-overlapping patches\r\n        height, width, _ = rays_v.shape\r\n        horizontal_num_patch = width // patch_W\r\n        vertical_num_patch = height // patch_H\r\n        rays_v_patches_all = []\r\n        rays_V_inverse_patches_all = []\r\n        rays_ez_patches_all = []\r\n        mask_value = []\r\n        for i in range(0, height-patch_H//2-1, patch_H):\r\n            for j in range(0, width-patch_W//2-1, patch_W):\r\n                rays_v_patch = rays_v[i:i + patch_H, j:j + patch_W]\r\n                rays_v_patches_all.append(rays_v_patch)\r\n\r\n                rays_V_inverse_patch = self.V_inverse_all[img_idx][i:i + patch_H, j:j + patch_W]\r\n                rays_V_inverse_patches_all.append(rays_V_inverse_patch)\r\n\r\n                rays_ez_patch = self.normals[img_idx][i + patch_H//2, j + patch_W//2]\r\n                rays_ez_patches_all.append(rays_ez_patch)\r\n\r\n                mask_value.append(self.masks_np[img_idx][i + patch_H//2, j + patch_W//2].astype(bool))\r\n        rays_v_patches_all = torch.stack(rays_v_patches_all, dim=0)  # (num_patch, patch_H, patch_W, 3)\r\n        rays_V_inverse_patches_all = torch.stack(rays_V_inverse_patches_all, dim=0)  # (num_patch, patch_H, patch_W, 3, 3)\r\n        rays_o_patches_all = self.pose_all[img_idx, :3, 3].expand(rays_v_patches_all.shape)  # (num_patch, patch_H, patch_W, 3)\r\n\r\n        rays_o_patch_center = rays_o_patches_all[:, patch_H//2, patch_W//2]  # (num_patch, 3)\r\n        rays_d_patch_center = rays_v_patches_all[:, patch_H//2, patch_W//2]  # (num_patch, 3)\r\n\r\n        marching_plane_normal_patches_all = self.pose_all[img_idx, :3, 2].expand(rays_d_patch_center.shape)  # (num_patch, 3)\r\n\r\n        return rays_o_patch_center, \\\r\n                rays_d_patch_center, \\\r\n            rays_o_patches_all, \\\r\n            rays_v_patches_all, \\\r\n            marching_plane_normal_patches_all, \\\r\n            rays_V_inverse_patches_all, horizontal_num_patch, vertical_num_patch\r\n\r\n    def gen_random_patches(self, num_patch, patch_H=3, patch_W=3):\r\n        \"\"\"\r\n        Generate random patches of rays at world space from all viewpoints.\r\n        X-axis right, Y-axis down\r\n\r\n        Parameters:\r\n        num_patch (int): The number of patches to generate.\r\n        patch_H (int, optional): The height of the patches. Default is 3.\r\n        patch_W (int, optional): The width of the patches. Default is 3.\r\n\r\n        Returns:\r\n        rays_o_patch_all (torch.Tensor): The origins of the rays in each patch. A tensor of shape (num_patch, patch_H, patch_W, 3).\r\n        rays_d_patch_all (torch.Tensor): The directions of the rays in each patch. A tensor of shape (num_patch, patch_H, patch_W, 3).\r\n        marching_plane_normal (torch.Tensor): The normal direction of the image/marching plane.\r\n                Since we randomly sample patches from all viewpoints, this normal is only identical for each patch. A tensor of shape (num_patch, 3).\r\n        V_inverse_patch_all (torch.Tensor): The inverse of the V matrix at patches of pixels. A tensor of shape (num_patch, patch_H, patch_W, 3, 3).\r\n        normal (torch.Tensor): The normals at patches of pixels. A tensor of shape (num_patch, patch_H, patch_W, 3).\r\n        mask (torch.Tensor): The mask values at patches of pixels. A tensor of shape (num_patch, patch_H, patch_W, 1).\r\n        \"\"\"\r\n        # randomly sample center pixel locations of patches\r\n        # assume all images have the same resolution\r\n        patch_center_x = torch.randint(low=0+patch_W//2, high=self.W-1-patch_W//2, size=[num_patch], device=self.device)  # (num_patch, )\r\n        patch_center_y = torch.randint(low=0+patch_H//2, high=self.H-1-patch_H//2, size=[num_patch], device=self.device)  # (num_patch, )\r\n\r\n        # compute all pixel locations within the patches given patch size (patch_H, patch_W)\r\n        patch_center_x_all = patch_center_x[:, None, None] + torch.arange(-patch_W//2+1, patch_W//2+1, device=self.device).repeat(patch_H, 1)   # (num_patch, patch_H, patch_W)\r\n        patch_center_y_all = patch_center_y[:, None, None] + torch.arange(-patch_H//2+1, patch_H//2+1, device=self.device).reshape(-1, 1).repeat(1, patch_W)   # (num_patch, patch_H, patch_W)\r\n\r\n        # randomly sample viewpoints\r\n        img_idx = np.random.choice(list(self.train_images), size=[num_patch])  # (num_patch, )\r\n        img_idx = torch.tensor(img_idx, device=self.device)\r\n        img_idx_expand = img_idx.view(-1, 1, 1).expand_as(patch_center_x_all)  # (num_patch, patch_H, patch_W)\r\n\r\n        # input normals and mask values for supervision\r\n        normal = self.normals[img_idx_expand, patch_center_y_all, patch_center_x_all]  # (num_patch, patch_H, patch_W, 3)\r\n        V_inverse_patch_all = self.V_inverse_all[img_idx_expand, patch_center_y_all, patch_center_x_all]  # (num_patch, patch_H, patch_W, 3, 3)\r\n        mask = self.masks[img_idx_expand, patch_center_y_all, patch_center_x_all].unsqueeze(-1)#[..., :1]     # (num_patch, patch_H, patch_W)\r\n\r\n        # compute all ray directions within patches\r\n        p_all = torch.stack([patch_center_x_all, patch_center_y_all, torch.ones_like(patch_center_y_all)], dim=-1).float().to(self.device)  # (num_patch, patch_H, patch_W, 3)\r\n        p_all = torch.matmul(self.intrinsics_all_inv[img_idx_expand, :3, :3], p_all[..., None])[..., 0]  # (num_patch, patch_H, patch_W, 3)\r\n        p_norm_all = torch.linalg.norm(p_all, ord=2, dim=-1, keepdim=True)  # (num_patch, patch_H, patch_W, 1)\r\n        rays_d_patch_all = p_all / p_norm_all  # (num_patch, patch_H, patch_W, 3)\r\n        rays_d_patch_all = torch.matmul(self.pose_all[img_idx, None, None, :3, :3], rays_d_patch_all[..., None])[..., 0]  # (num_patch, patch_H, patch_W, 3)\r\n        rays_o_patch_all = self.pose_all[img_idx, None, None, :3, 3].expand(rays_d_patch_all.shape)  # (num_patch, patch_H, patch_W, 3)\r\n\r\n        # the normal direction of the image/marching plane is the 3rd column of world2camera transformation\r\n        marching_plane_normal = self.pose_all[img_idx, :3, 2].expand((num_patch, 3))  # (num_patch, 3)\r\n\r\n        return rays_o_patch_all, \\\r\n                rays_d_patch_all, \\\r\n                marching_plane_normal, \\\r\n                V_inverse_patch_all, \\\r\n                normal,\\\r\n                mask\r\n\r\n    def near_far_from_sphere(self, rays_o, rays_d):\r\n        \"\"\"\r\n        This function calculates the near and far intersection points of rays with a unit sphere.\r\n\r\n        Parameters:\r\n        rays_o (torch.Tensor): Origin of the rays. A tensor of shape (N, 3) where N is the number of rays.\r\n        rays_d (torch.Tensor): Direction of the rays. A tensor of shape (N, 3) where N is the number of rays.\r\n\r\n        Returns:\r\n        near (torch.Tensor): Near intersection points of the rays with the unit sphere. A tensor of shape (N, ).\r\n        far (torch.Tensor): Far intersection points of the rays with the unit sphere. A tensor of shape (N, ).\r\n        \"\"\"\r\n        a = torch.sum(rays_d**2, dim=-1, keepdim=True)\r\n        b = 2.0 * torch.sum(rays_o * rays_d, dim=-1, keepdim=True)\r\n        c = torch.sum(rays_o**2, dim=-1, keepdim=True) - 1.0\r\n        mid = 0.5 * (-b) / a\r\n        near = mid - torch.sqrt(b ** 2 - 4 * a * c) / (2 * a)\r\n        far = mid + torch.sqrt(b ** 2 - 4 * a * c) / (2 * a)\r\n        return near[..., 0], far[..., 0]\r\n\r\n    def image_at(self, idx, resolution_level):\r\n        img = cv.imread(self.images_lis[idx])\r\n        return (cv.resize(img, (self.W // resolution_level, self.H // resolution_level))).clip(0, 255)\r\n\r\n"
  },
  {
    "path": "models/fields.py",
    "content": "import torch\r\nimport torch.nn as nn\r\nimport numpy as np\r\nimport tinycudann as tcnn\r\nfrom icecream import ic\r\n\r\nclass SDFNetwork(nn.Module):\r\n    def __init__(self,\r\n                 d_in,\r\n                 d_out,\r\n                 d_hidden,\r\n                 n_layers,\r\n                 skip_in=(4,),\r\n                 bias=0.5,\r\n                 geometric_init=True,\r\n                 weight_norm=True,\r\n                 inside_outside=False,\r\n                 encoding_config=None,\r\n                 input_concat=False):\r\n        super(SDFNetwork, self).__init__()\r\n        self.input_concat = input_concat\r\n\r\n        dims = [d_in] + [d_hidden for _ in range(n_layers)] + [d_out]\r\n\r\n        if encoding_config is not None:\r\n            self.encoding = tcnn.Encoding(d_in, encoding_config).to(torch.float32)\r\n            dims[0] = self.encoding.n_output_dims\r\n            if input_concat:\r\n                dims[0] += d_in\r\n        else:\r\n            self.encoding = None\r\n\r\n        self.num_layers = len(dims)\r\n        self.skip_in = skip_in\r\n\r\n        self.bindwidth = 0\r\n        self.enc_dim = self.encoding.n_output_dims\r\n\r\n        for l in range(0, self.num_layers - 1):\r\n            if l + 1 in self.skip_in:\r\n                out_dim = dims[l + 1] - dims[0]\r\n            else:\r\n                out_dim = dims[l + 1]\r\n\r\n            lin = nn.Linear(dims[l], out_dim)\r\n\r\n            if geometric_init:\r\n                if l == self.num_layers - 2:\r\n                    if not inside_outside:\r\n                        torch.nn.init.normal_(lin.weight, mean=np.sqrt(np.pi) / np.sqrt(dims[l]), std=0.0001)\r\n                        torch.nn.init.constant_(lin.bias, -bias)\r\n                    else:\r\n                        torch.nn.init.normal_(lin.weight, mean=-np.sqrt(np.pi) / np.sqrt(dims[l]), std=0.0001)\r\n                        torch.nn.init.constant_(lin.bias, bias)\r\n                elif self.encoding is not None and l == 0:\r\n                    torch.nn.init.constant_(lin.bias, 0.0)\r\n                    torch.nn.init.constant_(lin.weight[:, 3:], 0.0)\r\n                    torch.nn.init.normal_(lin.weight[:, :3], 0.0, np.sqrt(2) / np.sqrt(out_dim))\r\n                elif self.encoding is not None and l in self.skip_in:\r\n                    torch.nn.init.constant_(lin.bias, 0.0)\r\n                    torch.nn.init.normal_(lin.weight, 0.0, np.sqrt(2) / np.sqrt(out_dim))\r\n                    torch.nn.init.constant_(lin.weight[:, -(dims[0] - 3):], 0.0)\r\n                else:\r\n                    torch.nn.init.constant_(lin.bias, 0.0)\r\n                    torch.nn.init.normal_(lin.weight, 0.0, np.sqrt(2) / np.sqrt(out_dim))\r\n            if weight_norm:\r\n                lin = nn.utils.weight_norm(lin)\r\n\r\n            setattr(self, \"lin\" + str(l), lin)\r\n        self.activation = nn.Softplus(beta=100)\r\n        # self.activation = nn.ReLU()\r\n\r\n    def increase_bandwidth(self):\r\n        self.bindwidth += 1\r\n\r\n    def forward(self, inputs):\r\n        if self.encoding is not None:\r\n            encoded = self.encoding(inputs).to(torch.float32)\r\n\r\n            # set the dimension of the encoding to 0 if the input is outside the bandwidth\r\n            enc_mask = torch.ones(self.enc_dim, dtype=torch.bool, device=encoded.device, requires_grad=False)\r\n            enc_mask[self.bindwidth*2:] = 0\r\n            encoded = encoded * enc_mask\r\n\r\n        if self.input_concat:\r\n            inputs = torch.cat([inputs, encoded], dim=1)\r\n\r\n        x = inputs\r\n        for l in range(0, self.num_layers - 1):\r\n            lin = getattr(self, \"lin\" + str(l))\r\n\r\n            if l in self.skip_in:\r\n                x = torch.cat([x, inputs], 1) / np.sqrt(2)\r\n\r\n            x = lin(x)\r\n\r\n            if l < self.num_layers - 2:\r\n                x = self.activation(x)\r\n        return x\r\n\r\n    def sdf(self, x):\r\n        return self.forward(x)[:, :1]\r\n\r\n    def sdf_hidden_appearance(self, x):\r\n        return self.forward(x)\r\n\r\n    @torch.enable_grad()\r\n    def gradient(self, x):\r\n        x.requires_grad_(True)\r\n        y = self.sdf(x)\r\n        d_output = torch.ones_like(y, requires_grad=False, device=y.device)\r\n        gradients = torch.autograd.grad(\r\n            outputs=y,\r\n            inputs=x,\r\n            grad_outputs=d_output,\r\n            create_graph=True,\r\n            retain_graph=True,\r\n            only_inputs=True)[0]\r\n        return gradients.unsqueeze(1)\r\n\r\n    @torch.enable_grad()\r\n    def divergence(self, y, x):\r\n        div = 0.\r\n        for i in range(y.shape[-1]):\r\n            div += torch.autograd.grad(y[..., i], x, torch.ones_like(y[..., i]), create_graph=True)[0][..., i:i + 1]\r\n        return div\r\n\r\n    @torch.enable_grad()\r\n    def laplace(self, x):\r\n        return self.divergence(self.gradient(x), x)\r\n\r\n\r\nclass SingleVarianceNetwork(nn.Module):\r\n    def __init__(self, init_val):\r\n        super(SingleVarianceNetwork, self).__init__()\r\n        self.register_parameter('variance', nn.Parameter(torch.tensor(init_val)))\r\n\r\n    def forward(self, x):\r\n        return torch.ones([len(x), 1]) * torch.exp(self.variance * 10.0)"
  },
  {
    "path": "models/renderer.py",
    "content": "import torch\r\nimport numpy as np\r\nimport mcubes\r\nfrom tqdm import tqdm\r\nfrom nerfacc import ContractionType, OccupancyGrid, ray_marching, \\\r\n    render_weight_from_alpha_patch_based, accumulate_along_rays_patch_based, \\\r\n    render_weight_from_alpha, accumulate_along_rays\r\n\r\ndef extract_fields(bound_min, bound_max, resolution, query_func):\r\n    N = 64\r\n    X = torch.linspace(bound_min[0], bound_max[0], resolution).split(N)\r\n    Y = torch.linspace(bound_min[1], bound_max[1], resolution).split(N)\r\n    Z = torch.linspace(bound_min[2], bound_max[2], resolution).split(N)\r\n\r\n    u = np.zeros([resolution, resolution, resolution], dtype=np.float32)\r\n    with torch.no_grad():\r\n        for xi, xs in tqdm(enumerate(X)):\r\n            for yi, ys in enumerate(Y):\r\n                for zi, zs in enumerate(Z):\r\n                    xx, yy, zz = torch.meshgrid(xs, ys, zs)\r\n                    pts = torch.cat([xx.reshape(-1, 1), yy.reshape(-1, 1), zz.reshape(-1, 1)], dim=-1)\r\n                    val = query_func(pts).reshape(len(xs), len(ys), len(zs)).detach().cpu().numpy()\r\n                    u[xi * N: xi * N + len(xs), yi * N: yi * N + len(ys), zi * N: zi * N + len(zs)] = val\r\n    return u\r\n\r\n\r\ndef extract_geometry(bound_min, bound_max, resolution, threshold, query_func):\r\n    u = extract_fields(bound_min, bound_max, resolution, query_func)\r\n    vertices, triangles = mcubes.marching_cubes(u, threshold)\r\n    b_max_np = bound_max.detach().cpu().numpy()\r\n    b_min_np = bound_min.detach().cpu().numpy()\r\n\r\n    vertices = vertices / (resolution - 1.0) * (b_max_np - b_min_np)[None, :] + b_min_np[None, :]\r\n    return vertices, triangles\r\n\r\n\r\nclass NeuSRenderer:\r\n    def __init__(self, sdf_network, deviation_network,\r\n                 gradient_method=\"dfd\"):\r\n        self.sdf_network = sdf_network\r\n        self.deviation_network = deviation_network\r\n\r\n        # define the occ grid, see NerfAcc for more details\r\n        self.scene_aabb = torch.as_tensor([-1., -1., -1., 1., 1., 1.], dtype=torch.float32)\r\n        # define the contraction_type for scene contraction\r\n        self.contraction_type = ContractionType.AABB\r\n        # create Occupancy Grid\r\n        self.occupancy_grid = OccupancyGrid(\r\n            roi_aabb=self.scene_aabb,\r\n            resolution=128,  # if res is different along different axis, use [256,128,64]\r\n            contraction_type=self.contraction_type).to(\"cuda\")\r\n        self.sampling_step_size = 0.01  # ray marching step size, will be modified during training\r\n        self.gradient_method = gradient_method   # dfd or fd or ad\r\n\r\n\r\n    def occ_eval_fn(self, x):\r\n        # function for updating the occ grid given the current sdf\r\n        sdf = self.sdf_network(x)[..., :1]\r\n        alpha = torch.sigmoid(- sdf * 80)  # occ grids with alpha below the occ threshold will be set as 0\r\n        return alpha\r\n\r\n\r\n    def render(self, rays_o_patch_all,  # (num_patch, patch_H, patch_W, 3)\r\n                     rays_d_patch_all,  # (num_patch, patch_H, patch_W, 3)\r\n                     marching_plane_normal,  # (num_patch, 3)\r\n                     near,  # (num_patch,)\r\n                     far,  # (num_patch,)\r\n                     V_inverse_patch_all,  # (num_patch, patch_H, patch_W, 3, 3)\r\n                     val_gradient_method='dfd',\r\n                     mode='train'):\r\n        # patch size, should be odd\r\n        patch_H = rays_o_patch_all.shape[1]\r\n        patch_W = rays_o_patch_all.shape[2]\r\n        num_patch = rays_o_patch_all.shape[0]\r\n\r\n        # extract camera location and ray direction of the patches' center pixels\r\n        rays_o_patch_center = rays_o_patch_all[:, patch_H//2, patch_W//2]  # (num_patch, 3)\r\n        rays_d_patch_center = rays_d_patch_all[:, patch_H//2, patch_W//2]  # (num_patch, 3)\r\n\r\n        def alpha_fn_patch_center(t_starts, t_ends, ray_indices, ret_sdf=False):\r\n            # the function used in ray marching\r\n            ray_indices = ray_indices.long()\r\n            t_origins = rays_o_patch_center[ray_indices]\r\n            t_dirs = rays_d_patch_center[ray_indices]\r\n            positions_starts = t_origins + t_dirs * t_starts\r\n            positions_ends = t_origins + t_dirs * t_ends\r\n\r\n            t_starts_shift_left = t_starts[1:]\r\n            # attach the last element of t_ends to the end of t_starts_shift_left\r\n            t_starts_shift_left = torch.cat([t_starts_shift_left, t_starts[-1:]], 0)\r\n\r\n            # compute the diff mask between t_ends and t_starts_shift_left\r\n            diff_mask = ((t_ends - t_starts_shift_left) != 0).squeeze()\r\n            # if the diff maks is empty, return\r\n            positions_ends_diff = positions_ends[diff_mask].reshape(-1, 3)\r\n\r\n            positions_all = torch.cat([positions_starts, positions_ends_diff], 0)\r\n\r\n            sdf_all = self.sdf_network(positions_all)\r\n            sdf_start = sdf_all[:positions_starts.shape[0]]\r\n            sdf_end_diff = sdf_all[positions_starts.shape[0]:]\r\n\r\n            sdf_start_shift_left = sdf_start[1:]\r\n            sdf_start_shift_left = torch.cat([sdf_start_shift_left, sdf_start[-1:]], 0)\r\n\r\n            sdf_start_shift_left[diff_mask] = sdf_end_diff\r\n\r\n            inv_s = self.deviation_network(torch.zeros([1, 3]))[:, :1].clip(1e-6, 1e6)  # Single parameter\r\n            inv_s = inv_s.expand(sdf_start.shape[0], 1)\r\n\r\n            prev_cdf = torch.sigmoid(sdf_start * inv_s)\r\n            next_cdf = torch.sigmoid(sdf_start_shift_left * inv_s)\r\n\r\n            p = prev_cdf - next_cdf\r\n            c = prev_cdf\r\n\r\n            alpha = ((p + 1e-5) / (c + 1e-5)).view(-1).clip(0.0, 1.0)\r\n            alpha = alpha.reshape(-1, 1)\r\n            if ret_sdf:\r\n                return alpha, sdf_start, sdf_start_shift_left\r\n            else:\r\n                return alpha\r\n\r\n        with torch.no_grad():\r\n            patch_indices, t_starts_patch_center, t_ends_patch_center = ray_marching(\r\n                rays_o_patch_center, rays_d_patch_center,\r\n                t_min=near,\r\n                t_max=far,\r\n                grid=self.occupancy_grid,\r\n                render_step_size=self.sampling_step_size,\r\n                stratified=True,\r\n                cone_angle=0.0,\r\n                early_stop_eps=1e-8,\r\n                alpha_fn=alpha_fn_patch_center,\r\n            )\r\n            samples_per_ray = patch_indices.shape[0] / num_patch\r\n            if patch_indices.shape[0] == 0:  # all patch center rays are within the zero region of the occ grid. skip this iteration.\r\n                return {\r\n                    \"comp_normal\": torch.zeros([num_patch, patch_H, patch_W, 3], device=rays_o_patch_center.device)\r\n                }\r\n\r\n            num_samples = patch_indices.shape[0]\r\n            patch_indices = patch_indices.long()\r\n\r\n            # compute the sampling distance on remaining rays\r\n            t_starts_patch_all = t_starts_patch_center[:, None, None, :] * (rays_d_patch_center * marching_plane_normal).sum(-1, keepdim=True)[patch_indices][:, None, None, :] \\\r\n                                 /(rays_d_patch_all * marching_plane_normal[:, None, None, :]).sum(-1, keepdim=True)[patch_indices]\r\n            t_ends_patch_all = t_ends_patch_center[:, None, None, :] * (rays_d_patch_center * marching_plane_normal).sum(-1, keepdim=True)[patch_indices][:, None, None, :] \\\r\n                               /(rays_d_patch_all * marching_plane_normal[:, None, None, :]).sum(-1, keepdim=True)[patch_indices]\r\n\r\n\r\n            t_starts_patch_center_shift_left = t_starts_patch_center[1:]\r\n            t_starts_patch_center_shift_left = torch.cat([t_starts_patch_center_shift_left, t_starts_patch_center[-1:]], 0)\r\n            diff_mask = ((t_ends_patch_center - t_starts_patch_center_shift_left) != 0)[..., 0]\r\n            positions_starts_patch_all = rays_o_patch_all[patch_indices] + rays_d_patch_all[patch_indices] * t_starts_patch_all\r\n            positions_ends_patch_all = rays_o_patch_all[patch_indices] + rays_d_patch_all[patch_indices] * t_ends_patch_all  # (num_samples, patch_H, patch_W, 3)\r\n            positions_ends_diff = positions_ends_patch_all[diff_mask]\r\n            positions_all = torch.cat([positions_starts_patch_all, positions_ends_diff], 0)\r\n            positions_all_flat = positions_all.reshape(-1, 3)\r\n\r\n        sdf_all = self.sdf_network(positions_all_flat)\r\n        sdf_all = sdf_all.reshape(*positions_all.shape[:-1], 1)\r\n\r\n        sdf_starts_patch_all = sdf_all[:positions_starts_patch_all.shape[0]]\r\n\r\n        sdf_end_diff = sdf_all[positions_starts_patch_all.shape[0]:]\r\n        sdf_ends_patch_all = sdf_starts_patch_all[1:]\r\n        sdf_ends_patch_all = torch.cat([sdf_ends_patch_all, sdf_starts_patch_all[-1:]], 0)\r\n        sdf_ends_patch_all[diff_mask] = sdf_end_diff\r\n\r\n        inv_s = self.deviation_network(torch.zeros([1, 3]))[:, :1].clip(1e-6, 1e6)  # Single parameter\r\n\r\n        prev_cdf = torch.sigmoid(sdf_starts_patch_all * inv_s)  # (num_samples, patch_H, patch_W, 1)\r\n        next_cdf = torch.sigmoid(sdf_ends_patch_all * inv_s)   # (num_samples, patch_H, patch_W, 1)\r\n\r\n        p = prev_cdf - next_cdf\r\n        c = prev_cdf\r\n\r\n        alpha = ((p + 1e-5) / (c + 1e-5)).clip(0.0, 1.0)  # (num_samples, patch_H, patch_W, 1)\r\n        weights_cuda = render_weight_from_alpha_patch_based(alpha.reshape(num_samples, patch_H*patch_W, 1), patch_indices)  # (num_samples, patch_H, patch_W, 1)\r\n\r\n        if mode == 'train':\r\n            gradient_method = self.gradient_method\r\n        elif mode == 'eval':\r\n            gradient_method = val_gradient_method\r\n\r\n        if gradient_method == \"dfd\":\r\n            with torch.no_grad():\r\n                # distance between neighboring points on the same marching plane\r\n                dist_x = torch.norm(positions_starts_patch_all[:, :, 1:, :] -\r\n                                    positions_starts_patch_all[:, :, :-1, :], dim=-1, keepdim=True)  # (num_samples, patch_H, patch_W-1, 1)\r\n                dist_y = torch.norm(positions_starts_patch_all[:, 1:, :, :] -\r\n                                    positions_starts_patch_all[:, :-1, :, :], dim=-1, keepdim=True)  # (num_samples, patch_H-1, patch_W, 1)\r\n\r\n            # directional derivatives along the ray direction\r\n            # forward difference\r\n            df_dt = (sdf_ends_patch_all - sdf_starts_patch_all) / (t_ends_patch_all - t_starts_patch_all)  # (num_samples, patch_H, patch_W, 1)\r\n            # directional derivatives along the image's x-direction\r\n            # central difference\r\n            df_dx = (sdf_starts_patch_all[:, :, 2:] - sdf_starts_patch_all[:, :, :-2]) / (dist_x[:, :, :-1] + dist_x[:, :, 1:] )  # (num_samples, patch_H, patch_W-2, 1)\r\n            # directional derivatives along the image's y-direction\r\n            # central difference\r\n            df_dy = (sdf_starts_patch_all[:, 2:, :] - sdf_starts_patch_all[:, :-2, :]) / (dist_y[:, 1:, :] + dist_y[:, :-1, :])  # (num_samples, patch_H-2, patch_W, 1)\r\n\r\n            # for points only have one-side neighbor point,\r\n            # we use forward or backward difference correspondingly\r\n            df_dx_left_boundary = (sdf_starts_patch_all[:, :, 1:2] - sdf_starts_patch_all[:, :, 0:1]) / dist_x[:, :, 0:1]  # (num_samples, patch_H, 1)\r\n            df_dx_right_boundary = (sdf_starts_patch_all[:, :, -1:] - sdf_starts_patch_all[:, :, -2:-1]) / dist_x[:, :, -1:]  # (num_samples, patch_H, 1)\r\n            df_dy_top_boundary = (sdf_starts_patch_all[:, 1:2, :] - sdf_starts_patch_all[:, 0:1, :]) / dist_y[:, 0:1, :]  # (num_samples, 1, patch_W)\r\n            df_dy_bottom_boundary = (sdf_starts_patch_all[:, -1:, :] - sdf_starts_patch_all[:, -2:-1, :]) / dist_y[:, -1:, :]  # (num_samples, 1, patch_W)\r\n\r\n            # concat the directional derivatives for boundary points and central points\r\n            df_dx = torch.cat([df_dx_left_boundary, df_dx, df_dx_right_boundary], dim=2)  # (num_samples, patch_H, patch_W, 1)\r\n            df_dy = torch.cat([df_dy_top_boundary, df_dy, df_dy_bottom_boundary], dim=1)  # (num_samples, patch_H, patch_W, 1)\r\n\r\n            # concat the directional partial derivatives in three directions\r\n            projected_gradients = torch.cat([df_dt,\r\n                                             df_dx,\r\n                                             df_dy], dim=-1)  # (num_patches, patch_H, patch_W, 3)\r\n\r\n            # recover the gradients from directional partial derivatives using the inverse of known directions\r\n            V_inverse = V_inverse_patch_all[patch_indices]  # (num_patches, patch_H, patch_W, 3, 3)\r\n            gradients = (V_inverse @ projected_gradients[..., None])[..., 0]  # (num_samples, patch_H, patch_W, 3)\r\n\r\n        elif gradient_method == \"ad\":\r\n            gradients = self.sdf_network.gradient(positions_starts_patch_all.reshape(-1, 3)).reshape(num_samples, patch_H, patch_W, 3)\r\n\r\n        elif gradient_method == \"fd\":\r\n            # 6-point finite difference\r\n            self.fd_epsilon = 1e-3\r\n            positions_xn = positions_starts_patch_all + torch.tensor([[[[-self.fd_epsilon, 0, 0]]]], device=positions_starts_patch_all.device).expand(\r\n                positions_starts_patch_all.shape)\r\n            positions_xp = positions_starts_patch_all + torch.tensor([[[[self.fd_epsilon, 0, 0]]]], device=positions_starts_patch_all.device).expand(\r\n                positions_starts_patch_all.shape)\r\n            positions_yn = positions_starts_patch_all + torch.tensor([[[[0, -self.fd_epsilon, 0]]]], device=positions_starts_patch_all.device).expand(\r\n                positions_starts_patch_all.shape)\r\n            positions_yp = positions_starts_patch_all + torch.tensor([[[[0, self.fd_epsilon, 0]]]], device=positions_starts_patch_all.device).expand(\r\n                positions_starts_patch_all.shape)\r\n            positions_zn = positions_starts_patch_all + torch.tensor([[[[0, 0, -self.fd_epsilon]]]], device=positions_starts_patch_all.device).expand(\r\n                positions_starts_patch_all.shape)\r\n            positions_zp = positions_starts_patch_all + torch.tensor([[[[0, 0, self.fd_epsilon]]]], device=positions_starts_patch_all.device).expand(\r\n                positions_starts_patch_all.shape)\r\n\r\n            positions_concat = torch.cat(\r\n                [positions_xn, positions_xp, positions_yn, positions_yp, positions_zn, positions_zp], 0).to(\r\n                torch.float32).reshape(-1, 3)\r\n\r\n            sdf_concat = self.sdf_network(positions_concat).reshape(-1, patch_H, patch_W, 1)\r\n            num_samples = positions_starts_patch_all.shape[0]\r\n            sdf_xn = sdf_concat[:num_samples].reshape(num_samples, patch_H, patch_W, 1)\r\n            sdf_xp = sdf_concat[num_samples:2 * num_samples].reshape(num_samples, patch_H, patch_W, 1)\r\n            sdf_yn = sdf_concat[2 * num_samples:3 * num_samples].reshape(num_samples, patch_H, patch_W, 1)\r\n            sdf_yp = sdf_concat[3 * num_samples:4 * num_samples].reshape(num_samples, patch_H, patch_W, 1)\r\n            sdf_zn = sdf_concat[4 * num_samples:5 * num_samples].reshape(num_samples, patch_H, patch_W, 1)\r\n            sdf_zp = sdf_concat[5 * num_samples:].reshape(num_samples, patch_H, patch_W, 1)\r\n\r\n            df_dx = (sdf_xp - sdf_xn) / (2 * self.fd_epsilon)\r\n            df_dy = (sdf_yp - sdf_yn) / (2 * self.fd_epsilon)\r\n            df_dz = (sdf_zp - sdf_zn) / (2 * self.fd_epsilon)\r\n\r\n            gradients = torch.stack([df_dx, df_dy, df_dz], -1)\r\n\r\n        weights_sum_cuda = accumulate_along_rays_patch_based(weights_cuda, patch_indices, n_patches=num_patch)  # (num_samples, patch_H, patch_W, 1)\r\n        weights_sum = weights_sum_cuda.reshape(num_patch, patch_H, patch_W, 1)\r\n\r\n        comp_normals_cuda = accumulate_along_rays_patch_based(weights_cuda, patch_indices, values=gradients.reshape(num_samples,patch_H * patch_W, 3),n_patches=num_patch)  # (num_samples, patch_H, patch_W, 3)\r\n        comp_normal = comp_normals_cuda.reshape(num_patch, patch_H, patch_W, 3)\r\n        inv_s = self.deviation_network(torch.zeros([1, 3]))[:, :1].clip(1e-6, 1e6)  # Single parameter\r\n\r\n        return {\r\n            's_val': 1/inv_s,\r\n            'weight_sum': weights_sum,\r\n            'gradients': gradients,\r\n            \"comp_normal\": comp_normal,\r\n            \"samples_per_ray\": samples_per_ray,\r\n        }\r\n\r\n    @torch.no_grad()\r\n    def render_normal_pixel_based(self, rays_o, rays_d, near, far):\r\n        def alpha_fn(t_starts, t_ends, ray_indices, ret_sdf=False):\r\n            ray_indices = ray_indices.long()\r\n            t_origins = rays_o[ray_indices]\r\n            t_dirs = rays_d[ray_indices]\r\n            positions_starts = t_origins + t_dirs * t_starts\r\n            positions_ends = t_origins + t_dirs * t_ends\r\n\r\n            t_starts_shift_left = t_starts[1:]\r\n            # attach the last element of t_ends to the end of t_starts_shift_left\r\n            t_starts_shift_left = torch.cat([t_starts_shift_left, t_starts[-1:]], 0)\r\n\r\n            # compute the diff mask between t_ends and t_starts_shift_left\r\n            diff_mask = ((t_ends - t_starts_shift_left) != 0).squeeze()\r\n            # if the diff maks is empty, return\r\n\r\n            positions_ends_diff = positions_ends[diff_mask].reshape(-1, 3)\r\n\r\n            # ic(diff_mask.shape, positions_ends_diff.shape, positions_starts.shape)\r\n            positions_all = torch.cat([positions_starts, positions_ends_diff], 0)\r\n\r\n            sdf_all = self.sdf_network(positions_all)\r\n            sdf_start = sdf_all[:positions_starts.shape[0]]\r\n            sdf_end_diff = sdf_all[positions_starts.shape[0]:]\r\n\r\n            sdf_start_shift_left = sdf_start[1:]\r\n            sdf_start_shift_left = torch.cat([sdf_start_shift_left, sdf_start[-1:]], 0)\r\n\r\n            sdf_start_shift_left[diff_mask] = sdf_end_diff\r\n\r\n            inv_s = self.deviation_network(torch.zeros([1, 3]))[:, :1].clip(1e-6, 1e6)  # Single parameter\r\n            inv_s = inv_s.expand(sdf_start.shape[0], 1)\r\n\r\n            prev_cdf = torch.sigmoid(sdf_start * inv_s)\r\n            next_cdf = torch.sigmoid(sdf_start_shift_left * inv_s)\r\n\r\n            p = prev_cdf - next_cdf\r\n            c = prev_cdf\r\n\r\n            alpha = ((p + 1e-5) / (c + 1e-5)).view(-1).clip(0.0, 1.0)\r\n            alpha = alpha.reshape(-1, 1)\r\n            if ret_sdf:\r\n                return alpha, sdf_start, sdf_start_shift_left\r\n            else:\r\n                return alpha\r\n\r\n        ray_indices, t_starts, t_ends = ray_marching(\r\n            rays_o, rays_d,\r\n            t_min=near.squeeze(),\r\n            t_max=far.squeeze(),\r\n            grid=self.occupancy_grid,\r\n            render_step_size=self.sampling_step_size,\r\n            stratified=True,\r\n            cone_angle=0.0,\r\n            alpha_thre=0.0,\r\n            early_stop_eps=1e-3,\r\n            alpha_fn=alpha_fn,\r\n        )\r\n\r\n        alpha = alpha_fn(t_starts, t_ends, ray_indices)\r\n\r\n        ray_indices = ray_indices.long()\r\n        t_origins = rays_o[ray_indices]\r\n        t_dirs = rays_d[ray_indices]\r\n        midpoints = (t_starts + t_ends) / 2.\r\n        positions = t_origins + t_dirs * midpoints\r\n        gradients = self.sdf_network.gradient(positions).reshape(-1, 3)\r\n\r\n        n_rays = rays_o.shape[0]\r\n        weights = render_weight_from_alpha(alpha, ray_indices=ray_indices, n_rays=n_rays)  # [n_samples, 1]\r\n        comp_normal = accumulate_along_rays(weights, ray_indices, values=gradients, n_rays=n_rays)\r\n        comp_depth = accumulate_along_rays(weights, ray_indices, values=midpoints, n_rays=n_rays)\r\n        return comp_normal, comp_depth\r\n\r\n    def extract_geometry(self, bound_min, bound_max, resolution, threshold=0.0):\r\n        return extract_geometry(bound_min,\r\n                                bound_max,\r\n                                resolution=resolution,\r\n                                threshold=threshold,\r\n                                query_func=lambda pts: -self.sdf_network.sdf(pts))\r\n"
  },
  {
    "path": "run_diligent.sh",
    "content": "for obj_name in buddha pot2 reading bear cow; do\n     python exp_runner.py --conf config/diligent.conf --obj_name $obj_name\ndone\n"
  },
  {
    "path": "run_own_object.sh",
    "content": "for obj_name in lion dog1 woman; do\n     python exp_runner.py --conf config/own_objects.conf --obj_name $obj_name\ndone"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/building.yml",
    "content": "name: Building Wheels\n\non: [workflow_dispatch]\n\njobs:\n\n  wheel:\n    runs-on: ${{ matrix.os }}\n    environment: production\n\n    strategy:\n      fail-fast: false\n      matrix:\n        os: [ubuntu-18.04, windows-2019]\n        python-version: ['3.7', '3.8', '3.9']\n        torch-version: [1.10.0, 1.11.0, 1.12.0, 1.13.0]\n        cuda-version: ['cu102', 'cu113', 'cu116', 'cu117']\n        # os: [ubuntu-18.04]\n        # python-version: ['3.9']\n        # torch-version: [1.10.0]\n        # cuda-version: ['cu102']\n        exclude:\n          - torch-version: 1.10.0\n            cuda-version: 'cu116'\n          - torch-version: 1.10.0\n            cuda-version: 'cu117'\n          - torch-version: 1.11.0\n            cuda-version: 'cu116'\n          - torch-version: 1.11.0\n            cuda-version: 'cu117'\n          - torch-version: 1.12.0\n            cuda-version: 'cu117'\n          - torch-version: 1.13.0\n            cuda-version: 'cu102'\n          - torch-version: 1.13.0\n            cuda-version: 'cu113'\n          - os: windows-2019\n            torch-version: 1.11.0\n            cuda-version: 'cu102'\n          - os: windows-2019\n            torch-version: 1.12.0\n            cuda-version: 'cu102'\n          # - os: macos-10.15\n          #   cuda-version: 'cu102'\n          # - os: macos-10.15\n          #   cuda-version: 'cu113'\n          # - os: macos-10.15\n          #   cuda-version: 'cu116'\n          # - os: macos-10.15\n          #   cuda-version: 'cu117'\n\n    steps:\n      - uses: actions/checkout@v2\n\n      - name: Set up Python ${{ matrix.python-version }}\n        uses: actions/setup-python@v2\n        with:\n          python-version: ${{ matrix.python-version }}\n\n      - name: Upgrade pip\n        run: |\n          pip install --upgrade setuptools\n          pip install ninja\n\n      - name: Free up disk space\n        if: ${{ runner.os == 'Linux' }}\n        run: |\n          sudo rm -rf /usr/share/dotnet\n\n      - name: Install CUDA ${{ matrix.cuda-version }}\n        if: ${{ matrix.cuda-version != 'cpu' }}\n        run: |\n          bash .github/workflows/cuda/${{ matrix.cuda-version }}-${{ runner.os }}.sh\n\n      - name: Install PyTorch ${{ matrix.torch-version }}+${{ matrix.cuda-version }}\n        run: |\n          pip install torch==${{ matrix.torch-version }} --extra-index-url https://download.pytorch.org/whl/${{ matrix.cuda-version }}\n          python -c \"import torch; print('PyTorch:', torch.__version__)\"\n          python -c \"import torch; print('CUDA:', torch.version.cuda)\"\n          python -c \"import torch; print('CUDA Available:', torch.cuda.is_available())\"\n\n      - name: Patch PyTorch static constexpr on Windows\n        if: ${{ runner.os == 'Windows' }}\n        run: |\n          Torch_DIR=`python -c 'import os; import torch; print(os.path.dirname(torch.__file__))'`\n          sed -i '31,38c\\\n          TORCH_API void lazy_init_num_threads();' ${Torch_DIR}/include/ATen/Parallel.h\n        shell: bash\n\n      - name: Set version\n        if: ${{ runner.os != 'macOS' }}\n        run: |\n          VERSION=`sed -n 's/^__version__ = \"\\(.*\\)\"/\\1/p' nerfacc/version.py`\n          TORCH_VERSION=`echo \"pt${{ matrix.torch-version }}\" | sed \"s/..$//\" | sed \"s/\\.//g\"`\n          CUDA_VERSION=`echo ${{ matrix.cuda-version }}`\n          echo \"New version name: $VERSION+$TORCH_VERSION$CUDA_VERSION\"\n          sed -i \"s/$VERSION/$VERSION+$TORCH_VERSION$CUDA_VERSION/\" nerfacc/version.py\n        shell:\n          bash\n\n      - name: Install main package for CPU\n        if: ${{ matrix.cuda-version == 'cpu' }}\n        run: |\n          FORCE_ONLY_CPU=1 pip install -e .\n        shell:\n          bash\n\n      - name: Install main package for GPU\n        if: ${{ matrix.cuda-version != 'cpu' }}\n        run: |\n          source .github/workflows/cuda/${{ matrix.cuda-version }}-${{ runner.os }}-env.sh\n          pip install .\n        shell:\n          bash\n\n      - name: Test installation\n        run: |\n          python -c \"import nerfacc; print('nerfacc:', nerfacc.__version__)\"\n\n      - name: Build wheel\n        run: |\n          pip install wheel\n          source .github/workflows/cuda/${{ matrix.cuda-version }}-${{ runner.os }}-env.sh\n          python setup.py bdist_wheel --dist-dir=dist\n        shell: bash\n\n      - name: Configure AWS\n        uses: aws-actions/configure-aws-credentials@v1\n        with:\n          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}\n          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}\n          aws-region: us-west-2\n\n      - name: Upload wheel\n        run: |\n          aws s3 sync dist s3://nerfacc-bucket/whl/torch-${{ matrix.torch-version }}_${{ matrix.cuda-version }} --grants read=uri=http://acs.amazonaws.com/groups/global/AllUsers\n\n  update_aws_listing:\n    needs: [wheel]\n    runs-on: ubuntu-latest\n    environment: production\n\n    steps:\n      - uses: actions/checkout@v2\n      \n      - name: Set up Python\n        uses: actions/setup-python@v2\n        with:\n          python-version: 3.9\n\n      - name: Upgrade pip\n        run: |\n          pip install --upgrade setuptools\n          pip install boto3\n      \n      - name: Configure AWS\n        uses: aws-actions/configure-aws-credentials@v1\n        with:\n          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}\n          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}\n          aws-region: us-west-2\n\n      - name: Update AWS listing\n        run: |\n          python scripts/run_aws_listing.py \\\n            --access_key_id=${{ secrets.AWS_ACCESS_KEY_ID }} \\\n            --secret_access_key=${{ secrets.AWS_SECRET_ACCESS_KEY }} \\\n            --bucket=\"nerfacc-bucket\" \\\n            --region=\"us-west-2\"\n\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/code_checks.yml",
    "content": "name: Core Tests.\n\non:\n  push:\n    branches: [master]\n  pull_request:\n    branches: [master]\n\npermissions:\n  contents: read\n\njobs:\n  build:\n    runs-on: ubuntu-latest\n\n    steps:\n      - uses: actions/checkout@v3\n      - name: Set up Python 3.8.12\n        uses: actions/setup-python@v4\n        with:\n          python-version: \"3.8.12\"\n      - name: Install dependencies\n        run: |\n          pip install isort==5.10.1 black[jupyter]==22.3.0\n      - name: Run isort\n        run: isort docs/ nerfacc/ scripts/ examples/ tests/ --profile black --skip examples/pycolmap --line-length 80 --check\n      - name: Run Black\n        run: black docs/ nerfacc/ scripts/ examples/ tests/ --exclude examples/pycolmap --line-length 80 --check\n      # - name: Python Pylint\n      #   run: |\n      #     pylint nerfacc/ tests/ scripts/ examples/\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu101-Linux-env.sh",
    "content": "#!/bin/bash\n\nCUDA_HOME=/usr/local/cuda-10.1\nLD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}\nPATH=${CUDA_HOME}/bin:${PATH}\n\nexport FORCE_CUDA=1\nexport TORCH_CUDA_ARCH_LIST=\"3.5;5.0+PTX;6.0;7.0;7.5\"\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu101-Linux.sh",
    "content": "#!/bin/bash\n\nOS=ubuntu1804\n\nwget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin\nsudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600\nwget -nv https://developer.download.nvidia.com/compute/cuda/10.1/Prod/local_installers/cuda-repo-${OS}-10-1-local-10.1.243-418.87.00_1.0-1_amd64.deb\nsudo dpkg -i cuda-repo-${OS}-10-1-local-10.1.243-418.87.00_1.0-1_amd64.deb\nsudo apt-key add /var/cuda-repo-10-1-local-10.1.243-418.87.00/7fa2af80.pub\n\nsudo apt-get -qq update\nsudo apt install -y cuda-nvcc-10-1 cuda-libraries-dev-10-1\nsudo apt clean\n\nrm -f https://developer.download.nvidia.com/compute/cuda/10.1/Prod/local_installers/cuda-repo-${OS}-10-1-local-10.1.243-418.87.00_1.0-1_amd64.deb\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu101-Windows-env.sh",
    "content": "#!/bin/bash\n\nCUDA_HOME=/c/Program\\ Files/NVIDIA\\ GPU\\ Computing\\ Toolkit/CUDA/v10.1\nPATH=${CUDA_HOME}/bin:$PATH\nPATH=/c/Program\\ Files\\ \\(x86\\)/Microsoft\\ Visual\\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH\n\nexport FORCE_CUDA=1\nexport TORCH_CUDA_ARCH_LIST=\"3.5;5.0+PTX;6.0;7.0;7.5\"\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu101-Windows.sh",
    "content": "#!/bin/bash\n\n# Install NVIDIA drivers, see:\n# https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102\ncurl -k -L \"https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download\" --output \"/tmp/gpu_driver_dlls.zip\"\n7z x \"/tmp/gpu_driver_dlls.zip\" -o\"/c/Windows/System32\"\n\nexport CUDA_SHORT=10.1\nexport CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}/Prod/local_installers/\nexport CUDA_FILE=cuda_${CUDA_SHORT}.243_426.00_win10.exe\n\n# Install CUDA:\ncurl -k -L \"${CUDA_URL}/${CUDA_FILE}\" --output \"${CUDA_FILE}\"\necho \"\"\necho \"Installing from ${CUDA_FILE}...\"\nPowerShell -Command \"Start-Process -FilePath \\\"${CUDA_FILE}\\\" -ArgumentList \\\"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\\\" -Wait -NoNewWindow\"\necho \"Done!\"\nrm -f \"${CUDA_FILE}\"\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu102-Linux-env.sh",
    "content": "#!/bin/bash\n\nCUDA_HOME=/usr/local/cuda-10.2\nLD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}\nPATH=${CUDA_HOME}/bin:${PATH}\n\nexport FORCE_CUDA=1\nexport TORCH_CUDA_ARCH_LIST=\"3.5;5.0+PTX;6.0;7.0;7.5\"\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu102-Linux.sh",
    "content": "#!/bin/bash\n\nOS=ubuntu1804\n\nwget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin\nsudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600\nwget -nv https://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda-repo-${OS}-10-2-local-10.2.89-440.33.01_1.0-1_amd64.deb\nsudo dpkg -i cuda-repo-${OS}-10-2-local-10.2.89-440.33.01_1.0-1_amd64.deb\nsudo apt-key add /var/cuda-repo-10-2-local-10.2.89-440.33.01/7fa2af80.pub\n\nsudo apt-get -qq update\nsudo apt install -y cuda-nvcc-10-2 cuda-libraries-dev-10-2\nsudo apt clean\n\nrm -f https://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda-repo-${OS}-10-2-local-10.2.89-440.33.01_1.0-1_amd64.deb\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu102-Windows-env.sh",
    "content": "#!/bin/bash\n\nCUDA_HOME=/c/Program\\ Files/NVIDIA\\ GPU\\ Computing\\ Toolkit/CUDA/v10.2\nPATH=${CUDA_HOME}/bin:$PATH\nPATH=/c/Program\\ Files\\ \\(x86\\)/Microsoft\\ Visual\\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH\n\nexport FORCE_CUDA=1\nexport TORCH_CUDA_ARCH_LIST=\"3.5;5.0+PTX;6.0;7.0;7.5\"\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu102-Windows.sh",
    "content": "#!/bin/bash\n\n# Install NVIDIA drivers, see:\n# https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102\ncurl -k -L \"https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download\" --output \"/tmp/gpu_driver_dlls.zip\"\n7z x \"/tmp/gpu_driver_dlls.zip\" -o\"/c/Windows/System32\"\n\nexport CUDA_SHORT=10.2\nexport CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}/Prod/local_installers\nexport CUDA_FILE=cuda_${CUDA_SHORT}.89_441.22_win10.exe\n\n# Install CUDA:\ncurl -k -L \"${CUDA_URL}/${CUDA_FILE}\" --output \"${CUDA_FILE}\"\necho \"\"\necho \"Installing from ${CUDA_FILE}...\"\nPowerShell -Command \"Start-Process -FilePath \\\"${CUDA_FILE}\\\" -ArgumentList \\\"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\\\" -Wait -NoNewWindow\"\necho \"Done!\"\nrm -f \"${CUDA_FILE}\"\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu111-Linux-env.sh",
    "content": "#!/bin/bash\n\nCUDA_HOME=/usr/local/cuda-11.1\nLD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}\nPATH=${CUDA_HOME}/bin:${PATH}\n\nexport FORCE_CUDA=1\nexport TORCH_CUDA_ARCH_LIST=\"3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6\"\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu111-Linux.sh",
    "content": "#!/bin/bash\n\nOS=ubuntu1804\n\nwget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin\nsudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600\nwget -nv https://developer.download.nvidia.com/compute/cuda/11.1.1/local_installers/cuda-repo-${OS}-11-1-local_11.1.1-455.32.00-1_amd64.deb\nsudo dpkg -i cuda-repo-${OS}-11-1-local_11.1.1-455.32.00-1_amd64.deb\nsudo apt-key add /var/cuda-repo-${OS}-11-1-local/7fa2af80.pub\n\nsudo apt-get -qq update\nsudo apt install -y cuda-nvcc-11-1 cuda-libraries-dev-11-1\nsudo apt clean\n\nrm -f https://developer.download.nvidia.com/compute/cuda/11.1.1/local_installers/cuda-repo-${OS}-11-1-local_11.1.1-455.32.00-1_amd64.deb\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu111-Windows-env.sh",
    "content": "#!/bin/bash\n\nCUDA_HOME=/c/Program\\ Files/NVIDIA\\ GPU\\ Computing\\ Toolkit/CUDA/v11.1\nPATH=${CUDA_HOME}/bin:$PATH\nPATH=/c/Program\\ Files\\ \\(x86\\)/Microsoft\\ Visual\\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH\n\nexport FORCE_CUDA=1\nexport TORCH_CUDA_ARCH_LIST=\"6.0+PTX\"\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu111-Windows.sh",
    "content": "#!/bin/bash\n\n# Install NVIDIA drivers, see:\n# https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102\ncurl -k -L \"https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download\" --output \"/tmp/gpu_driver_dlls.zip\"\n7z x \"/tmp/gpu_driver_dlls.zip\" -o\"/c/Windows/System32\"\n\nexport CUDA_SHORT=11.1\nexport CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.1/local_installers\nexport CUDA_FILE=cuda_${CUDA_SHORT}.1_456.81_win10.exe\n\n# Install CUDA:\ncurl -k -L \"${CUDA_URL}/${CUDA_FILE}\" --output \"${CUDA_FILE}\"\necho \"\"\necho \"Installing from ${CUDA_FILE}...\"\nPowerShell -Command \"Start-Process -FilePath \\\"${CUDA_FILE}\\\" -ArgumentList \\\"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\\\" -Wait -NoNewWindow\"\necho \"Done!\"\nrm -f \"${CUDA_FILE}\"\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu113-Linux-env.sh",
    "content": "#!/bin/bash\n\nCUDA_HOME=/usr/local/cuda-11.3\nLD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}\nPATH=${CUDA_HOME}/bin:${PATH}\n\nexport FORCE_CUDA=1\nexport TORCH_CUDA_ARCH_LIST=\"3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6\"\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu113-Linux.sh",
    "content": "#!/bin/bash\n\nOS=ubuntu1804\n\nwget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin\nsudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600\nwget -nv https://developer.download.nvidia.com/compute/cuda/11.3.0/local_installers/cuda-repo-${OS}-11-3-local_11.3.0-465.19.01-1_amd64.deb\nsudo dpkg -i cuda-repo-${OS}-11-3-local_11.3.0-465.19.01-1_amd64.deb\nsudo apt-key add /var/cuda-repo-${OS}-11-3-local/7fa2af80.pub\n\nsudo apt-get -qq update\nsudo apt install -y cuda-nvcc-11-3 cuda-libraries-dev-11-3\nsudo apt clean\n\nrm -f https://developer.download.nvidia.com/compute/cuda/11.3.0/local_installers/cuda-repo-${OS}-11-3-local_11.3.0-465.19.01-1_amd64.deb\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu113-Windows-env.sh",
    "content": "#!/bin/bash\n\nCUDA_HOME=/c/Program\\ Files/NVIDIA\\ GPU\\ Computing\\ Toolkit/CUDA/v11.3\nPATH=${CUDA_HOME}/bin:$PATH\nPATH=/c/Program\\ Files\\ \\(x86\\)/Microsoft\\ Visual\\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH\n\nexport FORCE_CUDA=1\nexport TORCH_CUDA_ARCH_LIST=\"6.0+PTX\"\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu113-Windows.sh",
    "content": "#!/bin/bash\n\n# Install NVIDIA drivers, see:\n# https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102\ncurl -k -L \"https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download\" --output \"/tmp/gpu_driver_dlls.zip\"\n7z x \"/tmp/gpu_driver_dlls.zip\" -o\"/c/Windows/System32\"\n\nexport CUDA_SHORT=11.3\nexport CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers\nexport CUDA_FILE=cuda_${CUDA_SHORT}.0_465.89_win10.exe\n\n# Install CUDA:\ncurl -k -L \"${CUDA_URL}/${CUDA_FILE}\" --output \"${CUDA_FILE}\"\necho \"\"\necho \"Installing from ${CUDA_FILE}...\"\nPowerShell -Command \"Start-Process -FilePath \\\"${CUDA_FILE}\\\" -ArgumentList \\\"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} thrust_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\\\" -Wait -NoNewWindow\"\necho \"Done!\"\nrm -f \"${CUDA_FILE}\"\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu115-Linux-env.sh",
    "content": "#!/bin/bash\n\nCUDA_HOME=/usr/local/cuda-11.5\nLD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}\nPATH=${CUDA_HOME}/bin:${PATH}\n\nexport FORCE_CUDA=1\nexport TORCH_CUDA_ARCH_LIST=\"3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6\"\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu115-Linux.sh",
    "content": "#!/bin/bash\n\nOS=ubuntu1804\n\nwget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin\nsudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600\nwget -nv https://developer.download.nvidia.com/compute/cuda/11.5.2/local_installers/cuda-repo-${OS}-11-5-local_11.5.2-495.29.05-1_amd64.deb\nsudo dpkg -i cuda-repo-${OS}-11-5-local_11.5.2-495.29.05-1_amd64.deb\nsudo apt-key add /var/cuda-repo-${OS}-11-5-local/7fa2af80.pub\n\nsudo apt-get -qq update\nsudo apt install -y cuda-nvcc-11-5 cuda-libraries-dev-11-5\nsudo apt clean\n\nrm -f https://developer.download.nvidia.com/compute/cuda/11.5.2/local_installers/cuda-repo-${OS}-11-5-local_11.5.2-495.29.05-1_amd64.deb\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu115-Windows-env.sh",
    "content": "#!/bin/bash\n\nCUDA_HOME=/c/Program\\ Files/NVIDIA\\ GPU\\ Computing\\ Toolkit/CUDA/v11.3\nPATH=${CUDA_HOME}/bin:$PATH\nPATH=/c/Program\\ Files\\ \\(x86\\)/Microsoft\\ Visual\\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH\n\nexport FORCE_CUDA=1\nexport TORCH_CUDA_ARCH_LIST=\"6.0+PTX\"\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu115-Windows.sh",
    "content": "#!/bin/bash\n\n# TODO We currently use CUDA 11.3 to build CUDA 11.5 Windows wheels\n\n# Install NVIDIA drivers, see:\n# https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102\ncurl -k -L \"https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download\" --output \"/tmp/gpu_driver_dlls.zip\"\n7z x \"/tmp/gpu_driver_dlls.zip\" -o\"/c/Windows/System32\"\n\nexport CUDA_SHORT=11.3\nexport CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers\nexport CUDA_FILE=cuda_${CUDA_SHORT}.0_465.89_win10.exe\n\n# Install CUDA:\ncurl -k -L \"${CUDA_URL}/${CUDA_FILE}\" --output \"${CUDA_FILE}\"\necho \"\"\necho \"Installing from ${CUDA_FILE}...\"\nPowerShell -Command \"Start-Process -FilePath \\\"${CUDA_FILE}\\\" -ArgumentList \\\"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} thrust_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\\\" -Wait -NoNewWindow\"\necho \"Done!\"\nrm -f \"${CUDA_FILE}\"\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu116-Linux-env.sh",
    "content": "#!/bin/bash\n\nCUDA_HOME=/usr/local/cuda-11.6\nLD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}\nPATH=${CUDA_HOME}/bin:${PATH}\n\nexport FORCE_CUDA=1\nexport TORCH_CUDA_ARCH_LIST=\"3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6\"\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu116-Linux.sh",
    "content": "#!/bin/bash\n\nOS=ubuntu1804\n\nwget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin\nsudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600\nwget -nv https://developer.download.nvidia.com/compute/cuda/11.6.2/local_installers/cuda-repo-${OS}-11-6-local_11.6.2-510.47.03-1_amd64.deb\nsudo dpkg -i cuda-repo-${OS}-11-6-local_11.6.2-510.47.03-1_amd64.deb\nsudo apt-key add /var/cuda-repo-${OS}-11-6-local/7fa2af80.pub\n\nsudo apt-get -qq update\nsudo apt install -y cuda-nvcc-11-6 cuda-libraries-dev-11-6\nsudo apt clean\n\nrm -f https://developer.download.nvidia.com/compute/cuda/11.5.2/local_installers/cuda-repo-${OS}-11-6-local_11.6.2-510.47.03-1_amd64.deb\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu116-Windows-env.sh",
    "content": "#!/bin/bash\n\nCUDA_HOME=/c/Program\\ Files/NVIDIA\\ GPU\\ Computing\\ Toolkit/CUDA/v11.3\nPATH=${CUDA_HOME}/bin:$PATH\nPATH=/c/Program\\ Files\\ \\(x86\\)/Microsoft\\ Visual\\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH\n\nexport FORCE_CUDA=1\nexport TORCH_CUDA_ARCH_LIST=\"6.0+PTX\"\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu116-Windows.sh",
    "content": "#!/bin/bash\n\n# TODO We currently use CUDA 11.3 to build CUDA 11.6 Windows wheels\n\n# Install NVIDIA drivers, see:\n# https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102\ncurl -k -L \"https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download\" --output \"/tmp/gpu_driver_dlls.zip\"\n7z x \"/tmp/gpu_driver_dlls.zip\" -o\"/c/Windows/System32\"\n\nexport CUDA_SHORT=11.3\nexport CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers\nexport CUDA_FILE=cuda_${CUDA_SHORT}.0_465.89_win10.exe\n\n# Install CUDA:\ncurl -k -L \"${CUDA_URL}/${CUDA_FILE}\" --output \"${CUDA_FILE}\"\necho \"\"\necho \"Installing from ${CUDA_FILE}...\"\nPowerShell -Command \"Start-Process -FilePath \\\"${CUDA_FILE}\\\" -ArgumentList \\\"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} thrust_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\\\" -Wait -NoNewWindow\"\necho \"Done!\"\nrm -f \"${CUDA_FILE}\"\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu117-Linux-env.sh",
    "content": "#!/bin/bash\n\nCUDA_HOME=/usr/local/cuda-11.7\nLD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}\nPATH=${CUDA_HOME}/bin:${PATH}\n\nexport FORCE_CUDA=1\nexport TORCH_CUDA_ARCH_LIST=\"3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6\"\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu117-Linux.sh",
    "content": "#!/bin/bash\n\nOS=ubuntu1804\n\nwget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin\nsudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600\nwget -nv https://developer.download.nvidia.com/compute/cuda/11.7.1/local_installers/cuda-repo-${OS}-11-7-local_11.7.1-515.65.01-1_amd64.deb\nsudo dpkg -i cuda-repo-${OS}-11-7-local_11.7.1-515.65.01-1_amd64.deb\nsudo cp /var/cuda-repo-${OS}-11-7-local/cuda-*-keyring.gpg /usr/share/keyrings/\n\nsudo apt-get -qq update\nsudo apt install -y cuda-nvcc-11-7 cuda-libraries-dev-11-7\nsudo apt clean\n\nrm -f https://developer.download.nvidia.com/compute/cuda/11.7.1/local_installers/cuda-repo-${OS}-11-7-local_11.7.1-515.65.01-1_amd64.deb\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu117-Windows-env.sh",
    "content": "#!/bin/bash\n\nCUDA_HOME=/c/Program\\ Files/NVIDIA\\ GPU\\ Computing\\ Toolkit/CUDA/v11.3\nPATH=${CUDA_HOME}/bin:$PATH\nPATH=/c/Program\\ Files\\ \\(x86\\)/Microsoft\\ Visual\\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH\n\nexport FORCE_CUDA=1\nexport TORCH_CUDA_ARCH_LIST=\"6.0+PTX\"\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/cuda/cu117-Windows.sh",
    "content": "#!/bin/bash\n\n# TODO We currently use CUDA 11.3 to build CUDA 11.7 Windows wheels\n\n# Install NVIDIA drivers, see:\n# https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102\ncurl -k -L \"https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download\" --output \"/tmp/gpu_driver_dlls.zip\"\n7z x \"/tmp/gpu_driver_dlls.zip\" -o\"/c/Windows/System32\"\n\nexport CUDA_SHORT=11.3\nexport CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers\nexport CUDA_FILE=cuda_${CUDA_SHORT}.0_465.89_win10.exe\n\n# Install CUDA:\ncurl -k -L \"${CUDA_URL}/${CUDA_FILE}\" --output \"${CUDA_FILE}\"\necho \"\"\necho \"Installing from ${CUDA_FILE}...\"\nPowerShell -Command \"Start-Process -FilePath \\\"${CUDA_FILE}\\\" -ArgumentList \\\"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} thrust_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\\\" -Wait -NoNewWindow\"\necho \"Done!\"\nrm -f \"${CUDA_FILE}\"\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.github/workflows/publish.yml",
    "content": "# This workflows will upload a Python Package using twine when a release is created\n# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries\n\nname: Upload Python Package\n\non:\n  release:\n    types: [created]\n    branches: [master]\n\njobs:\n  deploy:\n    runs-on: ubuntu-latest\n    environment: production\n\n    steps:\n      - uses: actions/checkout@v2\n      - name: Set up Python\n        uses: actions/setup-python@v1\n        with:\n          python-version: '3.7'\n      - name: Install dependencies\n        run: |\n          python -m pip install build twine\n      - name: Strip unsupported tags in README\n        run: |\n          sed -i '/<!-- pypi-strip -->/,/<!-- \\/pypi-strip -->/d' README.md\n      - name: Build and publish\n        env:\n          PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}\n        run: |\n          BUILD_NO_CUDA=1 python -m build\n          twine upload --username __token__ --password $PYPI_TOKEN dist/*"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.gitignore",
    "content": "# Visual Studio Code configs.\n.vscode/\n\n# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\n# lib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\n*.egg-info/\n.installed.cfg\n*.egg\nMANIFEST\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*.cover\n.hypothesis/\n.pytest_cache/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\ndb.sqlite3\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\n\n# PyBuilder\ntarget/\n\n# Jupyter Notebook\n.ipynb_checkpoints\n\n# pyenv\n.python-version\n\n# celery beat schedule file\ncelerybeat-schedule\n\n# SageMath parsed files\n*.sage.py\n\n# Environments\n.env\n.venv\nenv/\nvenv/\nENV/\nenv.bak/\nvenv.bak/\n\n# Spyder project settings\n.spyderproject\n.spyproject\n\n# Rope project settings\n.ropeproject\n\n# mkdocs documentation\n/site\n\n# mypy\n.mypy_cache/\n\n.DS_Store\n\n# Direnv config.\n.envrc\n\n# line_profiler\n*.lprof\n\n# vscode\n.vsocde\n\nbenchmarks/\noutputs/"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.gitmodules",
    "content": "[submodule \"examples/pycolmap\"]\n\tpath = examples/pycolmap\n\turl = https://github.com/rmbrualla/pycolmap.git"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.pre-commit-config.yaml",
    "content": "repos:\n-   repo: https://github.com/pre-commit/pre-commit-hooks\n    rev: v2.3.0\n    hooks:\n    -   id: end-of-file-fixer\n    -   id: trailing-whitespace\n    -   id: check-yaml\n    -   id: check-merge-conflict\n    -   id: requirements-txt-fixer\n-   repo: https://github.com/psf/black\n    rev: 22.10.0\n    hooks:\n      - id: black\n        language_version: python3.8.12\n        args: # arguments to configure black\n          - --line-length=80\n\n-   repo: https://github.com/pycqa/isort\n    rev: 5.10.1\n    hooks:\n      - id: isort\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/.readthedocs.yaml",
    "content": "version: 2\n\nbuild:\n  os: ubuntu-20.04\n  tools:\n    python: \"3.9\"\n\nsphinx:\n  fail_on_warning: true\n  configuration: docs/source/conf.py\n\npython:\n  install:\n    # Equivalent to 'pip install .'\n    - method: pip\n      path: .\n    # Equivalent to 'pip install -r docs/requirements.txt'\n    - requirements: docs/requirements.txt"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/CMakeLists.txt",
    "content": "# cmake_minimum_required(VERSION 3.3)\n# project(nerfacc LANGUAGES CXX CUDA)\n\n# find_package(pybind11 REQUIRED)\n# find_package(Torch REQUIRED)\n# set(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}\")\n\n# set(SOURCE_DIR nerfacc/cuda/csrc)\n# set(INCLUDE_DIR nerfacc/cuda/csrc/include)\n# file(GLOB SOURCES ${SOURCE_DIR}/*.cu)\n\n# pybind11_add_module(${PROJECT_NAME} SHARED ${SOURCES})\n# target_link_libraries(${PROJECT_NAME} PRIVATE \"${TORCH_LIBRARIES}\")\n# target_include_directories(${PROJECT_NAME} PRIVATE \"${INCLUDE_DIR}\")\n\n\n# # message(STATUS \"CUDA enabled\")\n\n# # set( CMAKE_CUDA_STANDARD 14 )\n# # set( CMAKE_CUDA_STANDARD_REQUIRED ON)\n\n# # find_package(pybind11 REQUIRED)\n\n# # # find_package(Python3 REQUIRED COMPONENTS Development)\n# # # target_link_libraries(${PROJECT_NAME} PRIVATE Python3::Python)\n\n# # find_package(Torch REQUIRED)\n# # set(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}\")\n# # target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIBRARIES})\n\n# # set(CSRC nerfacc/cuda/csrc)\n# # file(GLOB_RECURSE ALL_SOURCES ${ALL_SOURCES} ${CSRC}/*.cu)\n# # file(GLOB_RECURSE ALL_HEADERS ${CSRC}/include/*.h)\n# # add_library(${PROJECT_NAME} SHARED ${ALL_SOURCES})\n# # target_include_directories(${PROJECT_NAME} PUBLIC \"${CMAKE_CURRENT_SOURCE_DIR}\")\n\n# # set(CMAKE_CXX_FLAGS_DEBUG \"${CMAKE_CXX_FLAGS_DEBUG} -O0\")\n\n# # message(\"-- CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}\")\n# # message(\"-- CMAKE_CXX_FLAGS_DEBUG: ${CMAKE_CXX_FLAGS_DEBUG}\")\n# # message(\"-- CMAKE_CXX_FLAGS_RELEASE: ${CMAKE_CXX_FLAGS_RELEASE}\")\n\n# # set_target_properties(${PROJECT_NAME} PROPERTIES\n# #   EXPORT_NAME nerfacc\n# #   INSTALL_RPATH ${TORCH_INSTALL_PREFIX}/lib)\n\n# # Cmake creates *.dylib by default, but python expects *.so by default\n# # if (APPLE)\n# #   set_property(TARGET ${PROJECT_NAME} PROPERTY SUFFIX .so)\n# # endif()"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/LICENSE",
    "content": "MIT License\n\nCopyright (c) 2022 Ruilong Li\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/MANIFEST.in",
    "content": "include nerfacc/cuda/csrc/include/*\ninclude nerfacc/cuda/csrc/*\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/README.md",
    "content": "<p>\n  <!-- pypi-strip -->\n  <picture>\n  <source media=\"(prefers-color-scheme: dark)\" srcset=\"https://user-images.githubusercontent.com/3310961/199083722-881a2372-62c1-4255-8521-31a95a721851.png\" />\n  <source media=\"(prefers-color-scheme: light)\" srcset=\"https://user-images.githubusercontent.com/3310961/199084143-0d63eb40-3f35-48d2-a9d5-78d1d60b7d66.png\" />\n  <!-- /pypi-strip -->\n  <img alt=\"nerfacc logo\" src=\"https://user-images.githubusercontent.com/3310961/199084143-0d63eb40-3f35-48d2-a9d5-78d1d60b7d66.png\" width=\"350px\" />\n  <!-- pypi-strip -->\n  </picture>\n  <!-- /pypi-strip -->\n</p>\n\n[![Core Tests.](https://github.com/KAIR-BAIR/nerfacc/actions/workflows/code_checks.yml/badge.svg)](https://github.com/KAIR-BAIR/nerfacc/actions/workflows/code_checks.yml)\n[![Documentation Status](https://readthedocs.com/projects/plenoptix-nerfacc/badge/?version=latest)](https://www.nerfacc.com/en/latest/?badge=latest)\n[![Downloads](https://pepy.tech/badge/nerfacc)](https://pepy.tech/project/nerfacc)\n\nhttps://www.nerfacc.com/\n\nNerfAcc is a PyTorch Nerf acceleration toolbox for both training and inference. It focuses on efficient volumetric rendering of radiance fields, which is universal and plug-and-play for most of the NeRFs.\n\nUsing NerfAcc, \n\n- The `vanilla NeRF` model with 8-layer MLPs can be trained to *better quality* (+~0.5 PNSR)\n  in *1 hour* rather than *days* as in the paper.\n- The `Instant-NGP NeRF` model can be trained to *equal quality* in *4.5 minutes*,\n  comparing to the official pure-CUDA implementation.\n- The `D-NeRF` model for *dynamic* objects can also be trained in *1 hour*\n  rather than *2 days* as in the paper, and with *better quality* (+~2.5 PSNR).\n- Both *bounded* and *unbounded* scenes are supported.\n\n**And it is a pure Python interface with flexible APIs!**\n\n## Installation\n\n**Dependence**: Please install [Pytorch](https://pytorch.org/get-started/locally/) first.\n\nThe easist way is to install from PyPI. In this way it will build the CUDA code **on the first run** (JIT).\n```\npip install nerfacc\n```\n\nOr install from source. In this way it will build the CUDA code during installation.\n```\npip install git+https://github.com/KAIR-BAIR/nerfacc.git\n```\n\nWe also provide pre-built wheels covering major combinations of Pytorch + CUDA supported by [official Pytorch](https://pytorch.org/get-started/previous-versions/).\n\n```\n# e.g., torch 1.13.0 + cu117\npip install nerfacc -f https://nerfacc-bucket.s3.us-west-2.amazonaws.com/whl/torch-1.13.0_cu117.html\n```\n\n| Windows & Linux | `cu102` | `cu113` | `cu116` | `cu117` |\n|-----------------|---------|---------|---------|---------|\n| torch 1.10.0    | ✅      | ✅      |         |         |\n| torch 1.11.0    | ✅*     | ✅      |         |         |\n| torch 1.12.0    | ✅*     | ✅      | ✅      |         |\n| torch 1.13.0    |         |         | ✅      | ✅      |\n\n\\* Pytorch does not support Windows pre-built wheels for those combinations thus we do not support as well.\n\n## Usage\n\nThe idea of NerfAcc is to perform efficient ray marching and volumetric rendering. So NerfAcc can work with any user-defined radiance field. To plug the NerfAcc rendering pipeline into your code and enjoy the acceleration, you only need to define two functions with your radiance field.\n- `sigma_fn`: Compute density at each sample. It will be used by `nerfacc.ray_marching()` to skip the empty and occluded space during ray marching, which is where the major speedup comes from. \n- `rgb_sigma_fn`: Compute color and density at each sample. It will be used by `nerfacc.rendering()` to conduct differentiable volumetric rendering. This function will receive gradients to update your network.\n\nA simple example is like this:\n\n``` python\nimport torch\nfrom torch import Tensor\nimport nerfacc \n\nradiance_field = ...  # network: a NeRF model\nrays_o: Tensor = ...  # ray origins. (n_rays, 3)\nrays_d: Tensor = ...  # ray normalized directions. (n_rays, 3)\noptimizer = ...  # optimizer\n\ndef sigma_fn(\n    t_starts: Tensor, t_ends:Tensor, ray_indices: Tensor\n) -> Tensor:\n    \"\"\" Query density values from a user-defined radiance field.\n    :params t_starts: Start of the sample interval along the ray. (n_samples, 1).\n    :params t_ends: End of the sample interval along the ray. (n_samples, 1).\n    :params ray_indices: Ray indices that each sample belongs to. (n_samples,).\n    :returns The post-activation density values. (n_samples, 1).\n    \"\"\"\n    t_origins = rays_o[ray_indices]  # (n_samples, 3)\n    t_dirs = rays_d[ray_indices]  # (n_samples, 3)\n    positions = t_origins + t_dirs * (t_starts + t_ends) / 2.0\n    sigmas = radiance_field.query_density(positions) \n    return sigmas  # (n_samples, 1)\n\ndef rgb_sigma_fn(\n    t_starts: Tensor, t_ends: Tensor, ray_indices: Tensor\n) -> Tuple[Tensor, Tensor]:\n    \"\"\" Query rgb and density values from a user-defined radiance field.\n    :params t_starts: Start of the sample interval along the ray. (n_samples, 1).\n    :params t_ends: End of the sample interval along the ray. (n_samples, 1).\n    :params ray_indices: Ray indices that each sample belongs to. (n_samples,).\n    :returns The post-activation rgb and density values. \n        (n_samples, 3), (n_samples, 1).\n    \"\"\"\n    t_origins = rays_o[ray_indices]  # (n_samples, 3)\n    t_dirs = rays_d[ray_indices]  # (n_samples, 3)\n    positions = t_origins + t_dirs * (t_starts + t_ends) / 2.0\n    rgbs, sigmas = radiance_field(positions, condition=t_dirs)  \n    return rgbs, sigmas  # (n_samples, 3), (n_samples, 1)\n\n# Efficient Raymarching: Skip empty and occluded space, pack samples from all rays.\n# ray_indices: (n_samples,). t_starts: (n_samples, 1). t_ends: (n_samples, 1).\nwith torch.no_grad():\n    ray_indices, t_starts, t_ends = nerfacc.ray_marching(\n        rays_o, rays_d, sigma_fn=sigma_fn, near_plane=0.2, far_plane=1.0, \n        early_stop_eps=1e-4, alpha_thre=1e-2, \n    )\n\n# Differentiable Volumetric Rendering.\n# colors: (n_rays, 3). opaicity: (n_rays, 1). depth: (n_rays, 1).\ncolor, opacity, depth = nerfacc.rendering(\n    t_starts, t_ends, ray_indices, n_rays=rays_o.shape[0], rgb_sigma_fn=rgb_sigma_fn\n)\n\n# Optimize: Both the network and rays will receive gradients\noptimizer.zero_grad()\nloss = F.mse_loss(color, color_gt)\nloss.backward()\noptimizer.step()\n```\n\n## Examples: \n\nBefore running those example scripts, please check the script about which dataset it is needed, and download the dataset first.\n\n```bash\n# clone the repo with submodules.\ngit clone --recursive git://github.com/KAIR-BAIR/nerfacc/\n```\n\n``` bash\n# Instant-NGP NeRF in 4.5 minutes with reproduced performance!\n# See results at here: https://www.nerfacc.com/en/latest/examples/ngp.html\npython examples/train_ngp_nerf.py --train_split train --scene lego\n```\n\n``` bash\n# Vanilla MLP NeRF in 1 hour with better performance!\n# See results at here: https://www.nerfacc.com/en/latest/examples/vanilla.html\npython examples/train_mlp_nerf.py --train_split train --scene lego\n```\n\n```bash\n# D-NeRF for Dynamic objects in 1 hour with better performance!\n# See results at here: https://www.nerfacc.com/en/latest/examples/dnerf.html\npython examples/train_mlp_dnerf.py --train_split train --scene lego\n```\n\n```bash\n# Instant-NGP on unbounded scenes in 20 minutes!\n# See results at here: https://www.nerfacc.com/en/latest/examples/unbounded.html\npython examples/train_ngp_nerf.py --train_split train --scene garden --auto_aabb --unbounded --cone_angle=0.004\n```\n\nUsed by:\n- [nerfstudio](https://github.com/nerfstudio-project/nerfstudio): A collaboration friendly studio for NeRFs.\n- [instant-nsr-pl](https://github.com/bennyguo/instant-nsr-pl): NeuS in 10 minutes.\n\n\n## Common Installation Issues\n\n\n<details>\n    <summary>ImportError: .../csrc.so: undefined symbol</summary>\n    If you are installing a pre-built wheel, make sure the Pytorch and CUDA version matchs with the nerfacc version (nerfacc.__version__).\n</details>\n\n## Citation\n\n```bibtex\n@article{li2022nerfacc,\n  title={NerfAcc: A General NeRF Accleration Toolbox.},\n  author={Li, Ruilong and Tancik, Matthew and Kanazawa, Angjoo},\n  journal={arXiv preprint arXiv:2210.04847},\n  year={2022}\n}\n```\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/Makefile",
    "content": "# Minimal makefile for Sphinx documentation\n#\n\n# You can set these variables from the command line, and also\n# from the environment for the first two.\nSPHINXOPTS    ?=\nSPHINXBUILD   ?= sphinx-build\nSOURCEDIR     = source\nBUILDDIR      = build\n\n# Put it first so that \"make\" without argument is like \"make help\".\nhelp:\n\t@$(SPHINXBUILD) -M help \"$(SOURCEDIR)\" \"$(BUILDDIR)\" $(SPHINXOPTS) $(O)\n\n.PHONY: help Makefile\n\n# Catch-all target: route all unknown targets to Sphinx using the new\n# \"make mode\" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).\n%: Makefile\n\t@$(SPHINXBUILD) -M $@ \"$(SOURCEDIR)\" \"$(BUILDDIR)\" $(SPHINXOPTS) $(O)"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/requirements.txt",
    "content": "pytorch_sphinx_theme @ git+https://github.com/liruilong940607/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme\nsphinx==5.2.1\nsphinx-copybutton==0.5.0\nsphinx-design==0.2.0"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/_static/css/readthedocs.css",
    "content": ".header-logo {\n    background-image: url(\"../images/logo4x.png\");\n    background-size: 156px 35px;\n    height: 35px;\n    width: 156px;\n}\ncode {\n    word-break: normal;\n}"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.accumulate_along_rays.rst",
    "content": "﻿nerfacc.accumulate\\_along\\_rays\n===============================\n\n.. currentmodule:: nerfacc\n\n.. autofunction:: accumulate_along_rays"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.pack_data.rst",
    "content": "﻿nerfacc.pack\\_data\n==================\n\n.. currentmodule:: nerfacc\n\n.. autofunction:: pack_data"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.ray_aabb_intersect.rst",
    "content": "﻿nerfacc.ray\\_aabb\\_intersect\n============================\n\n.. currentmodule:: nerfacc\n\n.. autofunction:: ray_aabb_intersect"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.ray_resampling.rst",
    "content": "﻿nerfacc.ray\\_resampling\n=======================\n\n.. currentmodule:: nerfacc\n\n.. autofunction:: ray_resampling"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.render_transmittance_from_alpha.rst",
    "content": "﻿nerfacc.render\\_transmittance\\_from\\_alpha\n==========================================\n\n.. currentmodule:: nerfacc\n\n.. autofunction:: render_transmittance_from_alpha"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.render_transmittance_from_density.rst",
    "content": "﻿nerfacc.render\\_transmittance\\_from\\_density\n============================================\n\n.. currentmodule:: nerfacc\n\n.. autofunction:: render_transmittance_from_density"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.render_visibility.rst",
    "content": "﻿nerfacc.render\\_visibility\n==========================\n\n.. currentmodule:: nerfacc\n\n.. autofunction:: render_visibility"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.render_weight_from_alpha.rst",
    "content": "﻿nerfacc.render\\_weight\\_from\\_alpha\n===================================\n\n.. currentmodule:: nerfacc\n\n.. autofunction:: render_weight_from_alpha"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.render_weight_from_density.rst",
    "content": "﻿nerfacc.render\\_weight\\_from\\_density\n=====================================\n\n.. currentmodule:: nerfacc\n\n.. autofunction:: render_weight_from_density"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.unpack_data.rst",
    "content": "﻿nerfacc.unpack\\_data\n====================\n\n.. currentmodule:: nerfacc\n\n.. autofunction:: unpack_data"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/generated/nerfacc.unpack_info.rst",
    "content": "﻿nerfacc.unpack\\_info\n====================\n\n.. currentmodule:: nerfacc\n\n.. autofunction:: unpack_info"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/grid.rst",
    "content": ".. _`Occupancy Grid`:\n\nOccupancy Grid\n===================================\n\n.. currentmodule:: nerfacc\n\n.. autoclass:: ContractionType\n    :members:\n\n.. autoclass:: Grid\n    :members:\n\n.. autoclass:: OccupancyGrid\n    :members:\n\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/rendering.rst",
    "content": "Volumetric Rendering\n===================================\n\nIn `nerfacc`, the volumetric rendering pipeline is broken down into 2 steps:\n\n1. **Raymarching**: This is the process of shooting a ray through the scene and\n   generate samples along the way. To perform efficient volumetric rendering, here we aim\n   at skipping as many areas as possible. The emtpy space is skipped by using the cached\n   occupancy grid (see :class:`nerfacc.OccupancyGrid`), and the invisible space is skipped by\n   checking the transmittance of the ray while marching. Almost in all cases, those skipping\n   won't result in a noticeable loss of quality as they would contribute very little to the\n   final rendered image. But they will bring a significant speedup.\n\n2. **Rendering**: This is the process of accumulating samples along the rays into final image.\n   In this step we also need to query the attributes (a.k.a. color and density) of those samples\n   generated by raymarching. Early stoping is supported in this step.\n\n|\n\n.. currentmodule:: nerfacc\n\n.. autofunction:: ray_marching\n.. autofunction:: rendering\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/apis/utils.rst",
    "content": "Utils\n===================================\n\n.. currentmodule:: nerfacc\n\n.. autosummary::\n   :nosignatures:\n   :toctree: generated/\n\n   ray_aabb_intersect\n   unpack_info\n\n   accumulate_along_rays\n   render_transmittance_from_density\n   render_transmittance_from_alpha\n   render_weight_from_density\n   render_weight_from_alpha\n   render_visibility\n\n   ray_resampling\n   pack_data\n   unpack_data\n   "
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/conf.py",
    "content": "import pytorch_sphinx_theme\n\n__version__ = None\nexec(open(\"../../nerfacc/version.py\", \"r\").read())\n\n# -- Project information\n\nproject = \"nerfacc\"\ncopyright = \"2022, Ruilong\"\nauthor = \"Ruilong\"\n\nrelease = __version__\n\n# -- General configuration\n\nextensions = [\n    \"sphinx.ext.napoleon\",\n    \"sphinx.ext.duration\",\n    \"sphinx.ext.doctest\",\n    \"sphinx.ext.autodoc\",\n    \"sphinx.ext.autosummary\",\n    \"sphinx.ext.intersphinx\",\n]\n\nintersphinx_mapping = {\n    \"python\": (\"https://docs.python.org/3/\", None),\n    \"sphinx\": (\"https://www.sphinx-doc.org/en/master/\", None),\n}\nintersphinx_disabled_domains = [\"std\"]\n\ntemplates_path = [\"_templates\"]\n\n# -- Options for HTML output\n\n# html_theme = \"furo\"\n\nhtml_theme = \"pytorch_sphinx_theme\"\nhtml_theme_path = [pytorch_sphinx_theme.get_html_theme_path()]\nhtml_static_path = [\"_static\"]\nhtml_css_files = [\"css/readthedocs.css\"]\n\n# Ignore >>> when copying code\ncopybutton_prompt_text = r\">>> |\\.\\.\\. \"\ncopybutton_prompt_is_regexp = True\n\n# Theme options are theme-specific and customize the look and feel of a theme\n# further.  For a list of options available for each theme, see the\n# documentation.\nhtml_theme_options = {\n    # The target url that the logo directs to. Unset to do nothing\n    \"logo_url\": \"https://www.nerfacc.com/en/latest/index.html\",\n    # \"menu\" is a list of dictionaries where you can specify the content and the\n    # behavior of each item in the menu. Each item can either be a link or a\n    # dropdown menu containing a list of links.\n    \"menu\": [\n        # A link\n        {\"name\": \"GitHub\", \"url\": \"https://github.com/KAIR-BAIR/nerfacc\"},\n        # A dropdown menu\n        # {\n        #     \"name\": \"Projects\",\n        #     \"children\": [\n        #         # A vanilla dropdown item\n        #         {\n        #             \"name\": \"nerfstudio\",\n        #             \"url\": \"https://docs.nerf.studio/\",\n        #             \"description\": \"The all-in-one repo for NeRFs\",\n        #         },\n        #     ],\n        #     # Optional, determining whether this dropdown menu will always be\n        #     # highlighted.\n        #     # \"active\": True,\n        # },\n    ],\n}\n# html_theme_options = {\n#     \"canonical_url\": \"\",\n#     \"analytics_id\": \"\",\n#     \"logo_only\": False,\n#     \"display_version\": True,\n#     \"prev_next_buttons_location\": \"bottom\",\n#     \"style_external_links\": False,\n#     # Toc options\n#     \"collapse_navigation\": True,\n#     \"sticky_navigation\": True,\n#     \"navigation_depth\": 4,\n#     \"includehidden\": True,\n#     \"titles_only\": False\n# }\n\n# -- Options for EPUB output\nepub_show_urls = \"footnote\"\n\n# typehints\nautodoc_typehints = \"description\"\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/examples/dnerf.rst",
    "content": "Dynamic Scene\n====================\n\nSee code `examples/train_mlp_dnerf.py` at our `github repository`_ for details.\n\nBenchmarks\n------------\n*updated on 2022-10-08*\n\nHere we trained a 8-layer-MLP for the radiance field and a 4-layer-MLP for the warping field,\n(similar to the T-Nerf model in the `D-Nerf`_ paper) on the `D-Nerf dataset`_. We used train \nsplit for training and test split for evaluation. Our experiments are conducted on a \nsingle NVIDIA TITAN RTX GPU. The training memory footprint is about 11GB.\n\n.. note::\n\n    The :ref:`Occupancy Grid` used in this example is shared by all the frames. In other words, \n    instead of using it to indicate the opacity of an area at a single timestamp, \n    Here we use it to indicate the `maximum` opacity at this area `over all the timestamps`.\n    It is not optimal but still makes the rendering very efficient.\n\n+----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+\n| PSNR                 | bouncing | hell    | hook  | jumping | lego  | mutant | standup | trex  | MEAN  |\n|                      | balls    | warrior |       | jacks   |       |        |         |       |       |\n+======================+==========+=========+=======+=========+=======+========+=========+=======+=======+\n| D-Nerf (~ days)      | 32.80    | 25.02   | 29.25 | 32.80   | 21.64 | 31.29  | 32.79   | 31.75 | 29.67 |\n+----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+\n| Ours  (~ 1 hr)       | 39.49    | 25.58   | 31.86 | 32.73   | 24.32 | 35.55  | 35.90   | 32.33 | 32.22 |\n+----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+\n| Ours  (Training time)| 37min    | 52min   | 69min | 64min   | 44min | 79min  | 79min   | 39min | 58min |\n+----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+\n\n.. _`D-Nerf`: https://arxiv.org/abs/2011.13961\n.. _`D-Nerf dataset`: https://www.dropbox.com/s/0bf6fl0ye2vz3vr/data.zip?dl=0\n.. _`github repository`: https://github.com/KAIR-BAIR/nerfacc/tree/76c0f9817da4c9c8b5ccf827eb069ee2ce854b75\n\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/examples/ngp.rst",
    "content": ".. _`Instant-NGP Example`:\n\nInstant-NGP\n====================\n\nSee code `examples/train_ngp_nerf.py` at our `github repository`_ for details.\n\nBenchmarks\n------------\n*updated on 2022-10-12*\n\nHere we trained a `Instant-NGP Nerf`_ model on the `Nerf-Synthetic dataset`_. We follow the same\nsettings with the Instant-NGP paper, which uses train split for training and test split for\nevaluation. All experiments are conducted on a single NVIDIA TITAN RTX GPU. The training\nmemory footprint is about 3GB.\n\n.. note::\n    \n    The Instant-NGP paper makes use of the alpha channel in the images to apply random background\n    augmentation during training. For fair comparision, we rerun their code with a constant white\n    background during both training and testing. Also it is worth to mention that we didn't strictly\n    follow the training receipe in the Instant-NGP paper, such as the learning rate schedule etc, as\n    the purpose of this benchmark is to showcase instead of reproducing the paper.\n\n+-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+\n| PSNR                  | Lego  | Mic   |Materials| Chair |Hotdog | Ficus | Drums | Ship  | MEAN  |\n|                       |       |       |         |       |       |       |       |       |       |\n+=======================+=======+=======+=========+=======+=======+=======+=======+=======+=======+\n|Instant-NGP 35k steps  | 35.87 | 36.22 | 29.08   | 35.10 | 37.48 | 30.61 | 23.85 | 30.62 | 32.35 |\n+-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+\n|(training time)        | 309s  | 258s  | 256s    | 316s  | 292s  | 207s  | 218s  | 250s  | 263s  |\n+-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+\n|Ours 20k steps         | 35.50 | 36.16 | 29.14   | 35.23 | 37.15 | 31.71 | 24.88 | 29.91 | 32.46 |\n+-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+\n|(training time)        | 287s  | 274s  | 269s    | 317s  | 269s  | 244s  | 249s  | 257s  | 271s  |\n+-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+\n\n.. _`Instant-NGP Nerf`: https://github.com/NVlabs/instant-ngp/tree/51e4107edf48338e9ab0316d56a222e0adf87143\n.. _`github repository`: https://github.com/KAIR-BAIR/nerfacc/tree/76c0f9817da4c9c8b5ccf827eb069ee2ce854b75\n.. _`Nerf-Synthetic dataset`: https://drive.google.com/drive/folders/1JDdLGDruGNXWnM1eqY1FNL9PlStjaKWi\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/examples/unbounded.rst",
    "content": "Unbounded Scene\n====================\n\nSee code `examples/train_ngp_nerf.py` at our `github repository`_ for details.\n\nBenchmarks\n------------\n*updated on 2022-11-07*\n\nHere we trained a `Instant-NGP Nerf`_  on the `MipNerf360`_ dataset. We used train \nsplit for training and test split for evaluation. Our experiments are conducted on a \nsingle NVIDIA TITAN RTX GPU. The training memory footprint is about 6-9GB.\n\nThe main difference between working with unbounded scenes and bounded scenes, is that\na contraction method is needed to map the infinite space to a finite :ref:`Occupancy Grid`.\nWe have difference options provided for this (see :ref:`Occupancy Grid`). The experiments\nhere is basically the Instant-NGP experiments (see :ref:`Instant-NGP Example`) with a contraction method\nthat takes from `MipNerf360`_.\n\n.. note:: \n    Even though we are comparing with `Nerf++`_ and `MipNerf360`_, the model and everything are\n    totally different with them. There are plenty of ideas from those papers that would be very\n    helpful for the performance, but we didn't adopt them. As this is just a simple example to \n    show how to use the library, we didn't want to make it too complicated.\n\n\n+----------------------+-------+-------+-------+-------+-------+-------+-------+-------+\n| PSNR                 |Garden |Bicycle|Bonsai |Counter|Kitchen| Room  | Stump | MEAN  |\n|                      |       |       |       |       |       |       |       |       |\n+======================+=======+=======+=======+=======+=======+=======+=======+=======+\n| Nerf++ (~days)       | 24.32 | 22.64 | 29.15 | 26.38 | 27.80 | 28.87 | 24.34 | 26.21 |\n+----------------------+-------+-------+-------+-------+-------+-------+-------+-------+\n| MipNerf360 (~days)   | 26.98 | 24.37 | 33.46 | 29.55 | 32.23 | 31.63 | 26.40 | 29.23 |\n+----------------------+-------+-------+-------+-------+-------+-------+-------+-------+\n| Ours (~20 mins)      | 25.41 | 22.97 | 30.71 | 27.34 | 30.32 | 31.00 | 23.43 | 27.31 |\n+----------------------+-------+-------+-------+-------+-------+-------+-------+-------+\n| Ours (Training time) | 25min | 17min | 19min | 23min | 28min | 20min | 17min | 21min |\n+----------------------+-------+-------+-------+-------+-------+-------+-------+-------+\n\n.. _`Instant-NGP Nerf`: https://arxiv.org/abs/2201.05989\n.. _`MipNerf360`: https://arxiv.org/abs/2111.12077\n.. _`Nerf++`: https://arxiv.org/abs/2010.07492\n.. _`github repository`: https://github.com/KAIR-BAIR/nerfacc/tree/76c0f9817da4c9c8b5ccf827eb069ee2ce854b75\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/examples/vanilla.rst",
    "content": "Vanilla Nerf \n====================\n\nSee code `examples/train_mlp_nerf.py` at our `github repository`_ for details.\n\nBenchmarks\n------------\n*updated on 2022-10-08*\n\nHere we trained a 8-layer-MLP for the radiance field as in the `vanilla Nerf`_. We used the \ntrain split for training and test split for evaluation as in the Nerf paper. Our experiments are \nconducted on a single NVIDIA TITAN RTX GPU. The training memory footprint is about 10GB.\n\n.. note:: \n    The vanilla Nerf paper uses two MLPs for course-to-fine sampling. Instead here we only use a \n    single MLP with more samples (1024). Both ways share the same spirit to do dense sampling \n    around the surface. Our fast rendering inheritly skip samples away from the surface \n    so we can simplly increase the number of samples with a single MLP, to achieve the same goal \n    with the coarse-to-fine sampling, without runtime or memory issue.\n\n+----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+\n| PSNR                 | Lego  | Mic   |Materials| Chair |Hotdog | Ficus | Drums | Ship  | MEAN  |\n|                      |       |       |         |       |       |       |       |       |       |\n+======================+=======+=======+=========+=======+=======+=======+=======+=======+=======+\n| NeRF  (~ days)       | 32.54 | 32.91 | 29.62   | 33.00 | 36.18 | 30.13 | 25.01 | 28.65 | 31.00 |\n+----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+\n| Ours  (~ 50min)      | 33.69 | 33.76 | 29.73   | 33.32 | 35.80 | 32.52 | 25.39 | 28.18 | 31.55 |\n+----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+\n| Ours  (Training time)| 58min | 53min | 46min   | 62min | 56min | 42min | 52min | 49min | 52min |\n+----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+\n\n.. _`github repository`: : https://github.com/KAIR-BAIR/nerfacc/tree/76c0f9817da4c9c8b5ccf827eb069ee2ce854b75\n.. _`vanilla Nerf`: https://arxiv.org/abs/2003.08934\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/docs/source/index.rst",
    "content": "NerfAcc Documentation\n===================================\n\nNerfAcc is a PyTorch Nerf acceleration toolbox for both training and inference. It focus on\nefficient volumetric rendering of radiance fields, which is universal and plug-and-play for most of the NeRFs.\n\nUsing NerfAcc, \n\n- The `vanilla Nerf`_ model with 8-layer MLPs can be trained to *better quality* (+~0.5 PNSR) \\\n  in *1 hour* rather than *1~2 days* as in the paper.\n- The `Instant-NGP Nerf`_ model can be trained to *equal quality* in *4.5 minutes*, \\\n  comparing to the official pure-CUDA implementation.\n- The `D-Nerf`_ model for *dynamic* objects can also be trained in *1 hour* \\\n  rather than *2 days* as in the paper, and with *better quality* (+~2.5 PSNR).\n- Both *bounded* and *unbounded* scenes are supported.\n\n**And it is pure Python interface with flexible APIs!**\n\n| Github: https://github.com/KAIR-BAIR/nerfacc\n| Paper: https://arxiv.org/pdf/2210.04847.pdf\n| Authors: `Ruilong Li`_, `Matthew Tancik`_, `Angjoo Kanazawa`_\n\n.. note::\n\n   This repo is focusing on the single scene situation. Generalizable Nerfs across\n   multiple scenes is currently out of the scope of this repo. But you may still find\n   some useful tricks in this repo. :)\n\n\nInstallation:\n-------------\n\n.. code-block:: console\n\n   $ pip install nerfacc\n\nUsage:\n-------------\n\nThe idea of NerfAcc is to perform efficient ray marching and volumetric rendering. \nSo NerfAcc can work with any user-defined radiance field. To plug the NerfAcc rendering\npipeline into your code and enjoy the acceleration, you only need to define two functions \nwith your radience field.\n\n- `sigma_fn`: Compute density at each sample. It will be used by :func:`nerfacc.ray_marching` to skip the empty and occluded space during ray marching, which is where the major speedup comes from. \n- `rgb_sigma_fn`: Compute color and density at each sample. It will be used by :func:`nerfacc.rendering` to conduct differentiable volumetric rendering. This function will receive gradients to update your network.\n\nAn simple example is like this:\n\n.. code-block:: python\n\n   import torch\n   from torch import Tensor\n   import nerfacc \n\n   radiance_field = ...  # network: a NeRF model\n   rays_o: Tensor = ...  # ray origins. (n_rays, 3)\n   rays_d: Tensor = ...  # ray normalized directions. (n_rays, 3)\n   optimizer = ...  # optimizer\n\n   def sigma_fn(\n      t_starts: Tensor, t_ends:Tensor, ray_indices: Tensor\n   ) -> Tensor:\n      \"\"\" Query density values from a user-defined radiance field.\n      :params t_starts: Start of the sample interval along the ray. (n_samples, 1).\n      :params t_ends: End of the sample interval along the ray. (n_samples, 1).\n      :params ray_indices: Ray indices that each sample belongs to. (n_samples,).\n      :returns The post-activation density values. (n_samples, 1).\n      \"\"\"\n      t_origins = rays_o[ray_indices]  # (n_samples, 3)\n      t_dirs = rays_d[ray_indices]  # (n_samples, 3)\n      positions = t_origins + t_dirs * (t_starts + t_ends) / 2.0\n      sigmas = radiance_field.query_density(positions) \n      return sigmas  # (n_samples, 1)\n\n   def rgb_sigma_fn(\n      t_starts: Tensor, t_ends: Tensor, ray_indices: Tensor\n   ) -> Tuple[Tensor, Tensor]:\n      \"\"\" Query rgb and density values from a user-defined radiance field.\n      :params t_starts: Start of the sample interval along the ray. (n_samples, 1).\n      :params t_ends: End of the sample interval along the ray. (n_samples, 1).\n      :params ray_indices: Ray indices that each sample belongs to. (n_samples,).\n      :returns The post-activation rgb and density values. \n         (n_samples, 3), (n_samples, 1).\n      \"\"\"\n      t_origins = rays_o[ray_indices]  # (n_samples, 3)\n      t_dirs = rays_d[ray_indices]  # (n_samples, 3)\n      positions = t_origins + t_dirs * (t_starts + t_ends) / 2.0\n      rgbs, sigmas = radiance_field(positions, condition=t_dirs)  \n      return rgbs, sigmas  # (n_samples, 3), (n_samples, 1)\n\n   # Efficient Raymarching: Skip empty and occluded space, pack samples from all rays.\n   # ray_indices: (n_samples,). t_starts: (n_samples, 1). t_ends: (n_samples, 1).\n   with torch.no_grad():\n      ray_indices, t_starts, t_ends = nerfacc.ray_marching(\n         rays_o, rays_d, sigma_fn=sigma_fn, near_plane=0.2, far_plane=1.0, \n         early_stop_eps=1e-4, alpha_thre=1e-2, \n      )\n\n   # Differentiable Volumetric Rendering.\n   # colors: (n_rays, 3). opaicity: (n_rays, 1). depth: (n_rays, 1).\n   color, opacity, depth = nerfacc.rendering(\n      t_starts, t_ends, ray_indices, n_rays=rays_o.shape[0], rgb_sigma_fn=rgb_sigma_fn\n   )\n\n   # Optimize: Both the network and rays will receive gradients\n   optimizer.zero_grad()\n   loss = F.mse_loss(color, color_gt)\n   loss.backward()\n   optimizer.step()\n\n\nLinks:\n-------------\n\n.. toctree::\n   :glob:\n   :maxdepth: 1\n   :caption: Python API\n\n   apis/*\n\n.. toctree::\n   :glob:\n   :maxdepth: 1\n   :caption: Example Usages\n\n   examples/*\n\n.. toctree::\n   :maxdepth: 1\n   :caption: Projects\n\n   nerfstudio <https://docs.nerf.studio/>\n\n\n.. _`vanilla Nerf`: https://arxiv.org/abs/2003.08934\n.. _`Instant-NGP Nerf`: https://arxiv.org/abs/2201.05989\n.. _`D-Nerf`: https://arxiv.org/abs/2011.13961\n.. _`MipNerf360`: https://arxiv.org/abs/2111.12077\n.. _`pixel-Nerf`: https://arxiv.org/abs/2012.02190\n.. _`Nerf++`: https://arxiv.org/abs/2010.07492\n\n.. _`Ruilong Li`: https://www.liruilong.cn/\n.. _`Matthew Tancik`: https://www.matthewtancik.com/\n.. _`Angjoo Kanazawa`: https://people.eecs.berkeley.edu/~kanazawa/"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/datasets/__init__.py",
    "content": ""
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/datasets/dnerf_synthetic.py",
    "content": "\"\"\"\nCopyright (c) 2022 Ruilong Li, UC Berkeley.\n\"\"\"\n\nimport json\nimport os\n\nimport imageio.v2 as imageio\nimport numpy as np\nimport torch\nimport torch.nn.functional as F\n\nfrom .utils import Rays\n\n\ndef _load_renderings(root_fp: str, subject_id: str, split: str):\n    \"\"\"Load images from disk.\"\"\"\n    if not root_fp.startswith(\"/\"):\n        # allow relative path. e.g., \"./data/dnerf_synthetic/\"\n        root_fp = os.path.join(\n            os.path.dirname(os.path.abspath(__file__)),\n            \"..\",\n            \"..\",\n            root_fp,\n        )\n\n    data_dir = os.path.join(root_fp, subject_id)\n    with open(\n        os.path.join(data_dir, \"transforms_{}.json\".format(split)), \"r\"\n    ) as fp:\n        meta = json.load(fp)\n    images = []\n    camtoworlds = []\n    timestamps = []\n\n    for i in range(len(meta[\"frames\"])):\n        frame = meta[\"frames\"][i]\n        fname = os.path.join(data_dir, frame[\"file_path\"] + \".png\")\n        rgba = imageio.imread(fname)\n        timestamp = (\n            frame[\"time\"]\n            if \"time\" in frame\n            else float(i) / (len(meta[\"frames\"]) - 1)\n        )\n        timestamps.append(timestamp)\n        camtoworlds.append(frame[\"transform_matrix\"])\n        images.append(rgba)\n\n    images = np.stack(images, axis=0)\n    camtoworlds = np.stack(camtoworlds, axis=0)\n    timestamps = np.stack(timestamps, axis=0)\n\n    h, w = images.shape[1:3]\n    camera_angle_x = float(meta[\"camera_angle_x\"])\n    focal = 0.5 * w / np.tan(0.5 * camera_angle_x)\n\n    return images, camtoworlds, focal, timestamps\n\n\nclass SubjectLoader(torch.utils.data.Dataset):\n    \"\"\"Single subject data loader for training and evaluation.\"\"\"\n\n    SPLITS = [\"train\", \"val\", \"test\"]\n    SUBJECT_IDS = [\n        \"bouncingballs\",\n        \"hellwarrior\",\n        \"hook\",\n        \"jumpingjacks\",\n        \"lego\",\n        \"mutant\",\n        \"standup\",\n        \"trex\",\n    ]\n\n    WIDTH, HEIGHT = 800, 800\n    NEAR, FAR = 2.0, 6.0\n    OPENGL_CAMERA = True\n\n    def __init__(\n        self,\n        subject_id: str,\n        root_fp: str,\n        split: str,\n        color_bkgd_aug: str = \"white\",\n        num_rays: int = None,\n        near: float = None,\n        far: float = None,\n        batch_over_images: bool = True,\n    ):\n        super().__init__()\n        assert split in self.SPLITS, \"%s\" % split\n        assert subject_id in self.SUBJECT_IDS, \"%s\" % subject_id\n        assert color_bkgd_aug in [\"white\", \"black\", \"random\"]\n        self.split = split\n        self.num_rays = num_rays\n        self.near = self.NEAR if near is None else near\n        self.far = self.FAR if far is None else far\n        self.training = (num_rays is not None) and (\n            split in [\"train\", \"trainval\"]\n        )\n        self.color_bkgd_aug = color_bkgd_aug\n        self.batch_over_images = batch_over_images\n        (\n            self.images,\n            self.camtoworlds,\n            self.focal,\n            self.timestamps,\n        ) = _load_renderings(root_fp, subject_id, split)\n        self.images = torch.from_numpy(self.images).to(torch.uint8)\n        self.camtoworlds = torch.from_numpy(self.camtoworlds).to(torch.float32)\n        self.timestamps = torch.from_numpy(self.timestamps).to(torch.float32)[\n            :, None\n        ]\n        self.K = torch.tensor(\n            [\n                [self.focal, 0, self.WIDTH / 2.0],\n                [0, self.focal, self.HEIGHT / 2.0],\n                [0, 0, 1],\n            ],\n            dtype=torch.float32,\n        )  # (3, 3)\n        assert self.images.shape[1:3] == (self.HEIGHT, self.WIDTH)\n\n    def __len__(self):\n        return len(self.images)\n\n    @torch.no_grad()\n    def __getitem__(self, index):\n        data = self.fetch_data(index)\n        data = self.preprocess(data)\n        return data\n\n    def preprocess(self, data):\n        \"\"\"Process the fetched / cached data with randomness.\"\"\"\n        rgba, rays = data[\"rgba\"], data[\"rays\"]\n        pixels, alpha = torch.split(rgba, [3, 1], dim=-1)\n\n        if self.training:\n            if self.color_bkgd_aug == \"random\":\n                color_bkgd = torch.rand(3, device=self.images.device)\n            elif self.color_bkgd_aug == \"white\":\n                color_bkgd = torch.ones(3, device=self.images.device)\n            elif self.color_bkgd_aug == \"black\":\n                color_bkgd = torch.zeros(3, device=self.images.device)\n        else:\n            # just use white during inference\n            color_bkgd = torch.ones(3, device=self.images.device)\n\n        pixels = pixels * alpha + color_bkgd * (1.0 - alpha)\n        return {\n            \"pixels\": pixels,  # [n_rays, 3] or [h, w, 3]\n            \"rays\": rays,  # [n_rays,] or [h, w]\n            \"color_bkgd\": color_bkgd,  # [3,]\n            **{k: v for k, v in data.items() if k not in [\"rgba\", \"rays\"]},\n        }\n\n    def update_num_rays(self, num_rays):\n        self.num_rays = num_rays\n\n    def fetch_data(self, index):\n        \"\"\"Fetch the data (it maybe cached for multiple batches).\"\"\"\n        num_rays = self.num_rays\n\n        if self.training:\n            if self.batch_over_images:\n                image_id = torch.randint(\n                    0,\n                    len(self.images),\n                    size=(num_rays,),\n                    device=self.images.device,\n                )\n            else:\n                image_id = [index]\n            x = torch.randint(\n                0, self.WIDTH, size=(num_rays,), device=self.images.device\n            )\n            y = torch.randint(\n                0, self.HEIGHT, size=(num_rays,), device=self.images.device\n            )\n        else:\n            image_id = [index]\n            x, y = torch.meshgrid(\n                torch.arange(self.WIDTH, device=self.images.device),\n                torch.arange(self.HEIGHT, device=self.images.device),\n                indexing=\"xy\",\n            )\n            x = x.flatten()\n            y = y.flatten()\n\n        # generate rays\n        rgba = self.images[image_id, y, x] / 255.0  # (num_rays, 4)\n        c2w = self.camtoworlds[image_id]  # (num_rays, 3, 4)\n        camera_dirs = F.pad(\n            torch.stack(\n                [\n                    (x - self.K[0, 2] + 0.5) / self.K[0, 0],\n                    (y - self.K[1, 2] + 0.5)\n                    / self.K[1, 1]\n                    * (-1.0 if self.OPENGL_CAMERA else 1.0),\n                ],\n                dim=-1,\n            ),\n            (0, 1),\n            value=(-1.0 if self.OPENGL_CAMERA else 1.0),\n        )  # [num_rays, 3]\n\n        # [n_cams, height, width, 3]\n        directions = (camera_dirs[:, None, :] * c2w[:, :3, :3]).sum(dim=-1)\n        origins = torch.broadcast_to(c2w[:, :3, -1], directions.shape)\n        viewdirs = directions / torch.linalg.norm(\n            directions, dim=-1, keepdims=True\n        )\n\n        if self.training:\n            origins = torch.reshape(origins, (num_rays, 3))\n            viewdirs = torch.reshape(viewdirs, (num_rays, 3))\n            rgba = torch.reshape(rgba, (num_rays, 4))\n        else:\n            origins = torch.reshape(origins, (self.HEIGHT, self.WIDTH, 3))\n            viewdirs = torch.reshape(viewdirs, (self.HEIGHT, self.WIDTH, 3))\n            rgba = torch.reshape(rgba, (self.HEIGHT, self.WIDTH, 4))\n\n        rays = Rays(origins=origins, viewdirs=viewdirs)\n        timestamps = self.timestamps[image_id]\n\n        return {\n            \"rgba\": rgba,  # [h, w, 4] or [num_rays, 4]\n            \"rays\": rays,  # [h, w, 3] or [num_rays, 3]\n            \"timestamps\": timestamps,  # [num_rays, 1]\n        }\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/datasets/nerf_360_v2.py",
    "content": "\"\"\"\nCopyright (c) 2022 Ruilong Li, UC Berkeley.\n\"\"\"\n\nimport collections\nimport os\nimport sys\n\nimport imageio\nimport numpy as np\nimport torch\nimport torch.nn.functional as F\nimport tqdm\n\nfrom .utils import Rays\n\n_PATH = os.path.abspath(__file__)\n\nsys.path.insert(\n    0, os.path.join(os.path.dirname(_PATH), \"..\", \"pycolmap\", \"pycolmap\")\n)\nfrom scene_manager import SceneManager\n\n\ndef _load_colmap(root_fp: str, subject_id: str, split: str, factor: int = 1):\n    assert factor in [1, 2, 4, 8]\n\n    data_dir = os.path.join(root_fp, subject_id)\n    colmap_dir = os.path.join(data_dir, \"sparse/0/\")\n\n    manager = SceneManager(colmap_dir)\n    manager.load_cameras()\n    manager.load_images()\n\n    # Assume shared intrinsics between all cameras.\n    cam = manager.cameras[1]\n    fx, fy, cx, cy = cam.fx, cam.fy, cam.cx, cam.cy\n    K = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])\n    K[:2, :] /= factor\n\n    # Extract extrinsic matrices in world-to-camera format.\n    imdata = manager.images\n    w2c_mats = []\n    bottom = np.array([0, 0, 0, 1]).reshape(1, 4)\n    for k in imdata:\n        im = imdata[k]\n        rot = im.R()\n        trans = im.tvec.reshape(3, 1)\n        w2c = np.concatenate([np.concatenate([rot, trans], 1), bottom], axis=0)\n        w2c_mats.append(w2c)\n    w2c_mats = np.stack(w2c_mats, axis=0)\n\n    # Convert extrinsics to camera-to-world.\n    camtoworlds = np.linalg.inv(w2c_mats)\n\n    # Image names from COLMAP. No need for permuting the poses according to\n    # image names anymore.\n    image_names = [imdata[k].name for k in imdata]\n\n    # # Switch from COLMAP (right, down, fwd) to Nerf (right, up, back) frame.\n    # poses = poses @ np.diag([1, -1, -1, 1])\n\n    # Get distortion parameters.\n    type_ = cam.camera_type\n\n    if type_ == 0 or type_ == \"SIMPLE_PINHOLE\":\n        params = None\n        camtype = \"perspective\"\n\n    elif type_ == 1 or type_ == \"PINHOLE\":\n        params = None\n        camtype = \"perspective\"\n\n    if type_ == 2 or type_ == \"SIMPLE_RADIAL\":\n        params = {k: 0.0 for k in [\"k1\", \"k2\", \"k3\", \"p1\", \"p2\"]}\n        params[\"k1\"] = cam.k1\n        camtype = \"perspective\"\n\n    elif type_ == 3 or type_ == \"RADIAL\":\n        params = {k: 0.0 for k in [\"k1\", \"k2\", \"k3\", \"p1\", \"p2\"]}\n        params[\"k1\"] = cam.k1\n        params[\"k2\"] = cam.k2\n        camtype = \"perspective\"\n\n    elif type_ == 4 or type_ == \"OPENCV\":\n        params = {k: 0.0 for k in [\"k1\", \"k2\", \"k3\", \"p1\", \"p2\"]}\n        params[\"k1\"] = cam.k1\n        params[\"k2\"] = cam.k2\n        params[\"p1\"] = cam.p1\n        params[\"p2\"] = cam.p2\n        camtype = \"perspective\"\n\n    elif type_ == 5 or type_ == \"OPENCV_FISHEYE\":\n        params = {k: 0.0 for k in [\"k1\", \"k2\", \"k3\", \"k4\"]}\n        params[\"k1\"] = cam.k1\n        params[\"k2\"] = cam.k2\n        params[\"k3\"] = cam.k3\n        params[\"k4\"] = cam.k4\n        camtype = \"fisheye\"\n\n    assert params is None, \"Only support pinhole camera model.\"\n\n    # Previous Nerf results were generated with images sorted by filename,\n    # ensure metrics are reported on the same test set.\n    inds = np.argsort(image_names)\n    image_names = [image_names[i] for i in inds]\n    camtoworlds = camtoworlds[inds]\n\n    # Load images.\n    if factor > 1:\n        image_dir_suffix = f\"_{factor}\"\n    else:\n        image_dir_suffix = \"\"\n    colmap_image_dir = os.path.join(data_dir, \"images\")\n    image_dir = os.path.join(data_dir, \"images\" + image_dir_suffix)\n    for d in [image_dir, colmap_image_dir]:\n        if not os.path.exists(d):\n            raise ValueError(f\"Image folder {d} does not exist.\")\n    # Downsampled images may have different names vs images used for COLMAP,\n    # so we need to map between the two sorted lists of files.\n    colmap_files = sorted(os.listdir(colmap_image_dir))\n    image_files = sorted(os.listdir(image_dir))\n    colmap_to_image = dict(zip(colmap_files, image_files))\n    image_paths = [\n        os.path.join(image_dir, colmap_to_image[f]) for f in image_names\n    ]\n    print(\"loading images\")\n    images = [imageio.imread(x) for x in tqdm.tqdm(image_paths)]\n    images = np.stack(images, axis=0)\n\n    # Select the split.\n    all_indices = np.arange(images.shape[0])\n    split_indices = {\n        \"test\": all_indices[all_indices % 8 == 0],\n        \"train\": all_indices[all_indices % 8 != 0],\n    }\n    indices = split_indices[split]\n    # All per-image quantities must be re-indexed using the split indices.\n    images = images[indices]\n    camtoworlds = camtoworlds[indices]\n\n    return images, camtoworlds, K\n\n\nclass SubjectLoader(torch.utils.data.Dataset):\n    \"\"\"Single subject data loader for training and evaluation.\"\"\"\n\n    SPLITS = [\"train\", \"test\"]\n    SUBJECT_IDS = [\n        \"garden\",\n        \"bicycle\",\n        \"bonsai\",\n        \"counter\",\n        \"kitchen\",\n        \"room\",\n        \"stump\",\n    ]\n\n    OPENGL_CAMERA = False\n\n    def __init__(\n        self,\n        subject_id: str,\n        root_fp: str,\n        split: str,\n        color_bkgd_aug: str = \"white\",\n        num_rays: int = None,\n        near: float = None,\n        far: float = None,\n        batch_over_images: bool = True,\n        factor: int = 1,\n    ):\n        super().__init__()\n        assert split in self.SPLITS, \"%s\" % split\n        assert subject_id in self.SUBJECT_IDS, \"%s\" % subject_id\n        assert color_bkgd_aug in [\"white\", \"black\", \"random\"]\n        self.split = split\n        self.num_rays = num_rays\n        self.near = near\n        self.far = far\n        self.training = (num_rays is not None) and (\n            split in [\"train\", \"trainval\"]\n        )\n        self.color_bkgd_aug = color_bkgd_aug\n        self.batch_over_images = batch_over_images\n        self.images, self.camtoworlds, self.K = _load_colmap(\n            root_fp, subject_id, split, factor\n        )\n        self.images = torch.from_numpy(self.images).to(torch.uint8)\n        self.camtoworlds = torch.from_numpy(self.camtoworlds).to(torch.float32)\n        self.K = torch.tensor(self.K).to(torch.float32)\n        self.height, self.width = self.images.shape[1:3]\n\n    def __len__(self):\n        return len(self.images)\n\n    @torch.no_grad()\n    def __getitem__(self, index):\n        data = self.fetch_data(index)\n        data = self.preprocess(data)\n        return data\n\n    def preprocess(self, data):\n        \"\"\"Process the fetched / cached data with randomness.\"\"\"\n        pixels, rays = data[\"rgb\"], data[\"rays\"]\n\n        if self.training:\n            if self.color_bkgd_aug == \"random\":\n                color_bkgd = torch.rand(3, device=self.images.device)\n            elif self.color_bkgd_aug == \"white\":\n                color_bkgd = torch.ones(3, device=self.images.device)\n            elif self.color_bkgd_aug == \"black\":\n                color_bkgd = torch.zeros(3, device=self.images.device)\n        else:\n            # just use white during inference\n            color_bkgd = torch.ones(3, device=self.images.device)\n\n        return {\n            \"pixels\": pixels,  # [n_rays, 3] or [h, w, 3]\n            \"rays\": rays,  # [n_rays,] or [h, w]\n            \"color_bkgd\": color_bkgd,  # [3,]\n            **{k: v for k, v in data.items() if k not in [\"rgb\", \"rays\"]},\n        }\n\n    def update_num_rays(self, num_rays):\n        self.num_rays = num_rays\n\n    def fetch_data(self, index):\n        \"\"\"Fetch the data (it maybe cached for multiple batches).\"\"\"\n        num_rays = self.num_rays\n\n        if self.training:\n            if self.batch_over_images:\n                image_id = torch.randint(\n                    0,\n                    len(self.images),\n                    size=(num_rays,),\n                    device=self.images.device,\n                )\n            else:\n                image_id = [index]\n            x = torch.randint(\n                0, self.width, size=(num_rays,), device=self.images.device\n            )\n            y = torch.randint(\n                0, self.height, size=(num_rays,), device=self.images.device\n            )\n        else:\n            image_id = [index]\n            x, y = torch.meshgrid(\n                torch.arange(self.width, device=self.images.device),\n                torch.arange(self.height, device=self.images.device),\n                indexing=\"xy\",\n            )\n            x = x.flatten()\n            y = y.flatten()\n\n        # generate rays\n        rgb = self.images[image_id, y, x] / 255.0  # (num_rays, 3)\n        c2w = self.camtoworlds[image_id]  # (num_rays, 3, 4)\n        camera_dirs = F.pad(\n            torch.stack(\n                [\n                    (x - self.K[0, 2] + 0.5) / self.K[0, 0],\n                    (y - self.K[1, 2] + 0.5)\n                    / self.K[1, 1]\n                    * (-1.0 if self.OPENGL_CAMERA else 1.0),\n                ],\n                dim=-1,\n            ),\n            (0, 1),\n            value=(-1.0 if self.OPENGL_CAMERA else 1.0),\n        )  # [num_rays, 3]\n\n        # [n_cams, height, width, 3]\n        directions = (camera_dirs[:, None, :] * c2w[:, :3, :3]).sum(dim=-1)\n        origins = torch.broadcast_to(c2w[:, :3, -1], directions.shape)\n        viewdirs = directions / torch.linalg.norm(\n            directions, dim=-1, keepdims=True\n        )\n\n        if self.training:\n            origins = torch.reshape(origins, (num_rays, 3))\n            viewdirs = torch.reshape(viewdirs, (num_rays, 3))\n            rgb = torch.reshape(rgb, (num_rays, 3))\n        else:\n            origins = torch.reshape(origins, (self.height, self.width, 3))\n            viewdirs = torch.reshape(viewdirs, (self.height, self.width, 3))\n            rgb = torch.reshape(rgb, (self.height, self.width, 3))\n\n        rays = Rays(origins=origins, viewdirs=viewdirs)\n\n        return {\n            \"rgb\": rgb,  # [h, w, 3] or [num_rays, 3]\n            \"rays\": rays,  # [h, w, 3] or [num_rays, 3]\n        }\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/datasets/nerf_synthetic.py",
    "content": "\"\"\"\nCopyright (c) 2022 Ruilong Li, UC Berkeley.\n\"\"\"\n\nimport collections\nimport json\nimport os\n\nimport imageio.v2 as imageio\nimport numpy as np\nimport torch\nimport torch.nn.functional as F\n\nfrom .utils import Rays\n\n\ndef _load_renderings(root_fp: str, subject_id: str, split: str):\n    \"\"\"Load images from disk.\"\"\"\n    if not root_fp.startswith(\"/\"):\n        # allow relative path. e.g., \"./data/nerf_synthetic/\"\n        root_fp = os.path.join(\n            os.path.dirname(os.path.abspath(__file__)),\n            \"..\",\n            \"..\",\n            root_fp,\n        )\n\n    data_dir = os.path.join(root_fp, subject_id)\n    with open(\n        os.path.join(data_dir, \"transforms_{}.json\".format(split)), \"r\"\n    ) as fp:\n        meta = json.load(fp)\n    images = []\n    camtoworlds = []\n\n    for i in range(len(meta[\"frames\"])):\n        frame = meta[\"frames\"][i]\n        fname = os.path.join(data_dir, frame[\"file_path\"] + \".png\")\n        rgba = imageio.imread(fname)\n        camtoworlds.append(frame[\"transform_matrix\"])\n        images.append(rgba)\n\n    images = np.stack(images, axis=0)\n    camtoworlds = np.stack(camtoworlds, axis=0)\n\n    h, w = images.shape[1:3]\n    camera_angle_x = float(meta[\"camera_angle_x\"])\n    focal = 0.5 * w / np.tan(0.5 * camera_angle_x)\n\n    return images, camtoworlds, focal\n\n\nclass SubjectLoader(torch.utils.data.Dataset):\n    \"\"\"Single subject data loader for training and evaluation.\"\"\"\n\n    SPLITS = [\"train\", \"val\", \"trainval\", \"test\"]\n    SUBJECT_IDS = [\n        \"chair\",\n        \"drums\",\n        \"ficus\",\n        \"hotdog\",\n        \"lego\",\n        \"materials\",\n        \"mic\",\n        \"ship\",\n    ]\n\n    WIDTH, HEIGHT = 800, 800\n    NEAR, FAR = 2.0, 6.0\n    OPENGL_CAMERA = True\n\n    def __init__(\n        self,\n        subject_id: str,\n        root_fp: str,\n        split: str,\n        color_bkgd_aug: str = \"white\",\n        num_rays: int = None,\n        near: float = None,\n        far: float = None,\n        batch_over_images: bool = True,\n    ):\n        super().__init__()\n        assert split in self.SPLITS, \"%s\" % split\n        assert subject_id in self.SUBJECT_IDS, \"%s\" % subject_id\n        assert color_bkgd_aug in [\"white\", \"black\", \"random\"]\n        self.split = split\n        self.num_rays = num_rays\n        self.near = self.NEAR if near is None else near\n        self.far = self.FAR if far is None else far\n        self.training = (num_rays is not None) and (\n            split in [\"train\", \"trainval\"]\n        )\n        self.color_bkgd_aug = color_bkgd_aug\n        self.batch_over_images = batch_over_images\n        if split == \"trainval\":\n            _images_train, _camtoworlds_train, _focal_train = _load_renderings(\n                root_fp, subject_id, \"train\"\n            )\n            _images_val, _camtoworlds_val, _focal_val = _load_renderings(\n                root_fp, subject_id, \"val\"\n            )\n            self.images = np.concatenate([_images_train, _images_val])\n            self.camtoworlds = np.concatenate(\n                [_camtoworlds_train, _camtoworlds_val]\n            )\n            self.focal = _focal_train\n        else:\n            self.images, self.camtoworlds, self.focal = _load_renderings(\n                root_fp, subject_id, split\n            )\n        self.images = torch.from_numpy(self.images).to(torch.uint8)\n        self.camtoworlds = torch.from_numpy(self.camtoworlds).to(torch.float32)\n        self.K = torch.tensor(\n            [\n                [self.focal, 0, self.WIDTH / 2.0],\n                [0, self.focal, self.HEIGHT / 2.0],\n                [0, 0, 1],\n            ],\n            dtype=torch.float32,\n        )  # (3, 3)\n        assert self.images.shape[1:3] == (self.HEIGHT, self.WIDTH)\n\n    def __len__(self):\n        return len(self.images)\n\n    @torch.no_grad()\n    def __getitem__(self, index):\n        data = self.fetch_data(index)\n        data = self.preprocess(data)\n        return data\n\n    def preprocess(self, data):\n        \"\"\"Process the fetched / cached data with randomness.\"\"\"\n        rgba, rays = data[\"rgba\"], data[\"rays\"]\n        pixels, alpha = torch.split(rgba, [3, 1], dim=-1)\n\n        if self.training:\n            if self.color_bkgd_aug == \"random\":\n                color_bkgd = torch.rand(3, device=self.images.device)\n            elif self.color_bkgd_aug == \"white\":\n                color_bkgd = torch.ones(3, device=self.images.device)\n            elif self.color_bkgd_aug == \"black\":\n                color_bkgd = torch.zeros(3, device=self.images.device)\n        else:\n            # just use white during inference\n            color_bkgd = torch.ones(3, device=self.images.device)\n\n        pixels = pixels * alpha + color_bkgd * (1.0 - alpha)\n        return {\n            \"pixels\": pixels,  # [n_rays, 3] or [h, w, 3]\n            \"rays\": rays,  # [n_rays,] or [h, w]\n            \"color_bkgd\": color_bkgd,  # [3,]\n            **{k: v for k, v in data.items() if k not in [\"rgba\", \"rays\"]},\n        }\n\n    def update_num_rays(self, num_rays):\n        self.num_rays = num_rays\n\n    def fetch_data(self, index):\n        \"\"\"Fetch the data (it maybe cached for multiple batches).\"\"\"\n        num_rays = self.num_rays\n\n        if self.training:\n            if self.batch_over_images:\n                image_id = torch.randint(\n                    0,\n                    len(self.images),\n                    size=(num_rays,),\n                    device=self.images.device,\n                )\n            else:\n                image_id = [index]\n            x = torch.randint(\n                0, self.WIDTH, size=(num_rays,), device=self.images.device\n            )\n            y = torch.randint(\n                0, self.HEIGHT, size=(num_rays,), device=self.images.device\n            )\n        else:\n            image_id = [index]\n            x, y = torch.meshgrid(\n                torch.arange(self.WIDTH, device=self.images.device),\n                torch.arange(self.HEIGHT, device=self.images.device),\n                indexing=\"xy\",\n            )\n            x = x.flatten()\n            y = y.flatten()\n\n        # generate rays\n        rgba = self.images[image_id, y, x] / 255.0  # (num_rays, 4)\n        c2w = self.camtoworlds[image_id]  # (num_rays, 3, 4)\n        camera_dirs = F.pad(\n            torch.stack(\n                [\n                    (x - self.K[0, 2] + 0.5) / self.K[0, 0],\n                    (y - self.K[1, 2] + 0.5)\n                    / self.K[1, 1]\n                    * (-1.0 if self.OPENGL_CAMERA else 1.0),\n                ],\n                dim=-1,\n            ),\n            (0, 1),\n            value=(-1.0 if self.OPENGL_CAMERA else 1.0),\n        )  # [num_rays, 3]\n\n        # [n_cams, height, width, 3]\n        directions = (camera_dirs[:, None, :] * c2w[:, :3, :3]).sum(dim=-1)\n        origins = torch.broadcast_to(c2w[:, :3, -1], directions.shape)\n        viewdirs = directions / torch.linalg.norm(\n            directions, dim=-1, keepdims=True\n        )\n\n        if self.training:\n            origins = torch.reshape(origins, (num_rays, 3))\n            viewdirs = torch.reshape(viewdirs, (num_rays, 3))\n            rgba = torch.reshape(rgba, (num_rays, 4))\n        else:\n            origins = torch.reshape(origins, (self.HEIGHT, self.WIDTH, 3))\n            viewdirs = torch.reshape(viewdirs, (self.HEIGHT, self.WIDTH, 3))\n            rgba = torch.reshape(rgba, (self.HEIGHT, self.WIDTH, 4))\n\n        rays = Rays(origins=origins, viewdirs=viewdirs)\n\n        return {\n            \"rgba\": rgba,  # [h, w, 4] or [num_rays, 4]\n            \"rays\": rays,  # [h, w, 3] or [num_rays, 3]\n        }\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/datasets/utils.py",
    "content": "\"\"\"\nCopyright (c) 2022 Ruilong Li, UC Berkeley.\n\"\"\"\n\nimport collections\n\nRays = collections.namedtuple(\"Rays\", (\"origins\", \"viewdirs\"))\n\n\ndef namedtuple_map(fn, tup):\n    \"\"\"Apply `fn` to each element of `tup` and cast to `tup`'s namedtuple.\"\"\"\n    return type(tup)(*(None if x is None else fn(x) for x in tup))\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/radiance_fields/__init__.py",
    "content": ""
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/radiance_fields/mlp.py",
    "content": "\"\"\"\nCopyright (c) 2022 Ruilong Li, UC Berkeley.\n\"\"\"\n\nimport functools\nimport math\nfrom typing import Callable, Optional\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\n\nclass MLP(nn.Module):\n    def __init__(\n        self,\n        input_dim: int,  # The number of input tensor channels.\n        output_dim: int = None,  # The number of output tensor channels.\n        net_depth: int = 8,  # The depth of the MLP.\n        net_width: int = 256,  # The width of the MLP.\n        skip_layer: int = 4,  # The layer to add skip layers to.\n        hidden_init: Callable = nn.init.xavier_uniform_,\n        hidden_activation: Callable = nn.ReLU(),\n        output_enabled: bool = True,\n        output_init: Optional[Callable] = nn.init.xavier_uniform_,\n        output_activation: Optional[Callable] = nn.Identity(),\n        bias_enabled: bool = True,\n        bias_init: Callable = nn.init.zeros_,\n    ):\n        super().__init__()\n        self.input_dim = input_dim\n        self.output_dim = output_dim\n        self.net_depth = net_depth\n        self.net_width = net_width\n        self.skip_layer = skip_layer\n        self.hidden_init = hidden_init\n        self.hidden_activation = hidden_activation\n        self.output_enabled = output_enabled\n        self.output_init = output_init\n        self.output_activation = output_activation\n        self.bias_enabled = bias_enabled\n        self.bias_init = bias_init\n\n        self.hidden_layers = nn.ModuleList()\n        in_features = self.input_dim\n        for i in range(self.net_depth):\n            self.hidden_layers.append(\n                nn.Linear(in_features, self.net_width, bias=bias_enabled)\n            )\n            if (\n                (self.skip_layer is not None)\n                and (i % self.skip_layer == 0)\n                and (i > 0)\n            ):\n                in_features = self.net_width + self.input_dim\n            else:\n                in_features = self.net_width\n        if self.output_enabled:\n            self.output_layer = nn.Linear(\n                in_features, self.output_dim, bias=bias_enabled\n            )\n        else:\n            self.output_dim = in_features\n\n        self.initialize()\n\n    def initialize(self):\n        def init_func_hidden(m):\n            if isinstance(m, nn.Linear):\n                if self.hidden_init is not None:\n                    self.hidden_init(m.weight)\n                if self.bias_enabled and self.bias_init is not None:\n                    self.bias_init(m.bias)\n\n        self.hidden_layers.apply(init_func_hidden)\n        if self.output_enabled:\n\n            def init_func_output(m):\n                if isinstance(m, nn.Linear):\n                    if self.output_init is not None:\n                        self.output_init(m.weight)\n                    if self.bias_enabled and self.bias_init is not None:\n                        self.bias_init(m.bias)\n\n            self.output_layer.apply(init_func_output)\n\n    def forward(self, x):\n        inputs = x\n        for i in range(self.net_depth):\n            x = self.hidden_layers[i](x)\n            x = self.hidden_activation(x)\n            if (\n                (self.skip_layer is not None)\n                and (i % self.skip_layer == 0)\n                and (i > 0)\n            ):\n                x = torch.cat([x, inputs], dim=-1)\n        if self.output_enabled:\n            x = self.output_layer(x)\n            x = self.output_activation(x)\n        return x\n\n\nclass DenseLayer(MLP):\n    def __init__(self, input_dim, output_dim, **kwargs):\n        super().__init__(\n            input_dim=input_dim,\n            output_dim=output_dim,\n            net_depth=0,  # no hidden layers\n            **kwargs,\n        )\n\n\nclass NerfMLP(nn.Module):\n    def __init__(\n        self,\n        input_dim: int,  # The number of input tensor channels.\n        condition_dim: int,  # The number of condition tensor channels.\n        net_depth: int = 8,  # The depth of the MLP.\n        net_width: int = 256,  # The width of the MLP.\n        skip_layer: int = 4,  # The layer to add skip layers to.\n        net_depth_condition: int = 1,  # The depth of the second part of MLP.\n        net_width_condition: int = 128,  # The width of the second part of MLP.\n    ):\n        super().__init__()\n        self.base = MLP(\n            input_dim=input_dim,\n            net_depth=net_depth,\n            net_width=net_width,\n            skip_layer=skip_layer,\n            output_enabled=False,\n        )\n        hidden_features = self.base.output_dim\n        self.sigma_layer = DenseLayer(hidden_features, 1)\n\n        if condition_dim > 0:\n            self.bottleneck_layer = DenseLayer(hidden_features, net_width)\n            self.rgb_layer = MLP(\n                input_dim=net_width + condition_dim,\n                output_dim=3,\n                net_depth=net_depth_condition,\n                net_width=net_width_condition,\n                skip_layer=None,\n            )\n        else:\n            self.rgb_layer = DenseLayer(hidden_features, 3)\n\n    def query_density(self, x):\n        x = self.base(x)\n        raw_sigma = self.sigma_layer(x)\n        return raw_sigma\n\n    def forward(self, x, condition=None):\n        x = self.base(x)\n        raw_sigma = self.sigma_layer(x)\n        if condition is not None:\n            if condition.shape[:-1] != x.shape[:-1]:\n                num_rays, n_dim = condition.shape\n                condition = condition.view(\n                    [num_rays] + [1] * (x.dim() - condition.dim()) + [n_dim]\n                ).expand(list(x.shape[:-1]) + [n_dim])\n            bottleneck = self.bottleneck_layer(x)\n            x = torch.cat([bottleneck, condition], dim=-1)\n        raw_rgb = self.rgb_layer(x)\n        return raw_rgb, raw_sigma\n\n\nclass SinusoidalEncoder(nn.Module):\n    \"\"\"Sinusoidal Positional Encoder used in Nerf.\"\"\"\n\n    def __init__(self, x_dim, min_deg, max_deg, use_identity: bool = True):\n        super().__init__()\n        self.x_dim = x_dim\n        self.min_deg = min_deg\n        self.max_deg = max_deg\n        self.use_identity = use_identity\n        self.register_buffer(\n            \"scales\", torch.tensor([2**i for i in range(min_deg, max_deg)])\n        )\n\n    @property\n    def latent_dim(self) -> int:\n        return (\n            int(self.use_identity) + (self.max_deg - self.min_deg) * 2\n        ) * self.x_dim\n\n    def forward(self, x: torch.Tensor) -> torch.Tensor:\n        \"\"\"\n        Args:\n            x: [..., x_dim]\n        Returns:\n            latent: [..., latent_dim]\n        \"\"\"\n        if self.max_deg == self.min_deg:\n            return x\n        xb = torch.reshape(\n            (x[Ellipsis, None, :] * self.scales[:, None]),\n            list(x.shape[:-1]) + [(self.max_deg - self.min_deg) * self.x_dim],\n        )\n        latent = torch.sin(torch.cat([xb, xb + 0.5 * math.pi], dim=-1))\n        if self.use_identity:\n            latent = torch.cat([x] + [latent], dim=-1)\n        return latent\n\n\nclass VanillaNeRFRadianceField(nn.Module):\n    def __init__(\n        self,\n        net_depth: int = 8,  # The depth of the MLP.\n        net_width: int = 256,  # The width of the MLP.\n        skip_layer: int = 4,  # The layer to add skip layers to.\n        net_depth_condition: int = 1,  # The depth of the second part of MLP.\n        net_width_condition: int = 128,  # The width of the second part of MLP.\n    ) -> None:\n        super().__init__()\n        self.posi_encoder = SinusoidalEncoder(3, 0, 10, True)\n        self.view_encoder = SinusoidalEncoder(3, 0, 4, True)\n        self.mlp = NerfMLP(\n            input_dim=self.posi_encoder.latent_dim,\n            condition_dim=self.view_encoder.latent_dim,\n            net_depth=net_depth,\n            net_width=net_width,\n            skip_layer=skip_layer,\n            net_depth_condition=net_depth_condition,\n            net_width_condition=net_width_condition,\n        )\n\n    def query_opacity(self, x, step_size):\n        density = self.query_density(x)\n        # if the density is small enough those two are the same.\n        # opacity = 1.0 - torch.exp(-density * step_size)\n        opacity = density * step_size\n        return opacity\n\n    def query_density(self, x):\n        x = self.posi_encoder(x)\n        sigma = self.mlp.query_density(x)\n        return F.relu(sigma)\n\n    def forward(self, x, condition=None):\n        x = self.posi_encoder(x)\n        if condition is not None:\n            condition = self.view_encoder(condition)\n        rgb, sigma = self.mlp(x, condition=condition)\n        return torch.sigmoid(rgb), F.relu(sigma)\n\n\nclass DNeRFRadianceField(nn.Module):\n    def __init__(self) -> None:\n        super().__init__()\n        self.posi_encoder = SinusoidalEncoder(3, 0, 4, True)\n        self.time_encoder = SinusoidalEncoder(1, 0, 4, True)\n        self.warp = MLP(\n            input_dim=self.posi_encoder.latent_dim\n            + self.time_encoder.latent_dim,\n            output_dim=3,\n            net_depth=4,\n            net_width=64,\n            skip_layer=2,\n            output_init=functools.partial(torch.nn.init.uniform_, b=1e-4),\n        )\n        self.nerf = VanillaNeRFRadianceField()\n\n    def query_opacity(self, x, timestamps, step_size):\n        idxs = torch.randint(0, len(timestamps), (x.shape[0],), device=x.device)\n        t = timestamps[idxs]\n        density = self.query_density(x, t)\n        # if the density is small enough those two are the same.\n        # opacity = 1.0 - torch.exp(-density * step_size)\n        opacity = density * step_size\n        return opacity\n\n    def query_density(self, x, t):\n        x = x + self.warp(\n            torch.cat([self.posi_encoder(x), self.time_encoder(t)], dim=-1)\n        )\n        return self.nerf.query_density(x)\n\n    def forward(self, x, t, condition=None):\n        x = x + self.warp(\n            torch.cat([self.posi_encoder(x), self.time_encoder(t)], dim=-1)\n        )\n        return self.nerf(x, condition=condition)\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/radiance_fields/ngp.py",
    "content": "\"\"\"\nCopyright (c) 2022 Ruilong Li, UC Berkeley.\n\"\"\"\n\nfrom typing import Callable, List, Union\n\nimport torch\nfrom torch.autograd import Function\nfrom torch.cuda.amp import custom_bwd, custom_fwd\n\ntry:\n    import tinycudann as tcnn\nexcept ImportError as e:\n    print(\n        f\"Error: {e}! \"\n        \"Please install tinycudann by: \"\n        \"pip install git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch\"\n    )\n    exit()\n\n\nclass _TruncExp(Function):  # pylint: disable=abstract-method\n    # Implementation from torch-ngp:\n    # https://github.com/ashawkey/torch-ngp/blob/93b08a0d4ec1cc6e69d85df7f0acdfb99603b628/activation.py\n    @staticmethod\n    @custom_fwd(cast_inputs=torch.float32)\n    def forward(ctx, x):  # pylint: disable=arguments-differ\n        ctx.save_for_backward(x)\n        return torch.exp(x)\n\n    @staticmethod\n    @custom_bwd\n    def backward(ctx, g):  # pylint: disable=arguments-differ\n        x = ctx.saved_tensors[0]\n        return g * torch.exp(torch.clamp(x, max=15))\n\n\ntrunc_exp = _TruncExp.apply\n\n\ndef contract_to_unisphere(\n    x: torch.Tensor,\n    aabb: torch.Tensor,\n    eps: float = 1e-6,\n    derivative: bool = False,\n):\n    aabb_min, aabb_max = torch.split(aabb, 3, dim=-1)\n    x = (x - aabb_min) / (aabb_max - aabb_min)\n    x = x * 2 - 1  # aabb is at [-1, 1]\n    mag = x.norm(dim=-1, keepdim=True)\n    mask = mag.squeeze(-1) > 1\n\n    if derivative:\n        dev = (2 * mag - 1) / mag**2 + 2 * x**2 * (\n            1 / mag**3 - (2 * mag - 1) / mag**4\n        )\n        dev[~mask] = 1.0\n        dev = torch.clamp(dev, min=eps)\n        return dev\n    else:\n        x[mask] = (2 - 1 / mag[mask]) * (x[mask] / mag[mask])\n        x = x / 4 + 0.5  # [-inf, inf] is at [0, 1]\n        return x\n\n\nclass NGPradianceField(torch.nn.Module):\n    \"\"\"Instance-NGP radiance Field\"\"\"\n\n    def __init__(\n        self,\n        aabb: Union[torch.Tensor, List[float]],\n        num_dim: int = 3,\n        use_viewdirs: bool = True,\n        density_activation: Callable = lambda x: trunc_exp(x - 1),\n        unbounded: bool = False,\n        geo_feat_dim: int = 15,\n        n_levels: int = 16,\n        log2_hashmap_size: int = 19,\n    ) -> None:\n        super().__init__()\n        if not isinstance(aabb, torch.Tensor):\n            aabb = torch.tensor(aabb, dtype=torch.float32)\n        self.register_buffer(\"aabb\", aabb)\n        self.num_dim = num_dim\n        self.use_viewdirs = use_viewdirs\n        self.density_activation = density_activation\n        self.unbounded = unbounded\n\n        self.geo_feat_dim = geo_feat_dim\n        per_level_scale = 1.4472692012786865\n\n        if self.use_viewdirs:\n            self.direction_encoding = tcnn.Encoding(\n                n_input_dims=num_dim,\n                encoding_config={\n                    \"otype\": \"Composite\",\n                    \"nested\": [\n                        {\n                            \"n_dims_to_encode\": 3,\n                            \"otype\": \"SphericalHarmonics\",\n                            \"degree\": 4,\n                        },\n                        # {\"otype\": \"Identity\", \"n_bins\": 4, \"degree\": 4},\n                    ],\n                },\n            )\n\n        self.mlp_base = tcnn.NetworkWithInputEncoding(\n            n_input_dims=num_dim,\n            n_output_dims=1 + self.geo_feat_dim,\n            encoding_config={\n                \"otype\": \"HashGrid\",\n                \"n_levels\": n_levels,\n                \"n_features_per_level\": 2,\n                \"log2_hashmap_size\": log2_hashmap_size,\n                \"base_resolution\": 16,\n                \"per_level_scale\": per_level_scale,\n            },\n            network_config={\n                \"otype\": \"FullyFusedMLP\",\n                \"activation\": \"ReLU\",\n                \"output_activation\": \"None\",\n                \"n_neurons\": 64,\n                \"n_hidden_layers\": 1,\n            },\n        )\n        if self.geo_feat_dim > 0:\n            self.mlp_head = tcnn.Network(\n                n_input_dims=(\n                    (\n                        self.direction_encoding.n_output_dims\n                        if self.use_viewdirs\n                        else 0\n                    )\n                    + self.geo_feat_dim\n                ),\n                n_output_dims=3,\n                network_config={\n                    \"otype\": \"FullyFusedMLP\",\n                    \"activation\": \"ReLU\",\n                    \"output_activation\": \"Sigmoid\",\n                    \"n_neurons\": 64,\n                    \"n_hidden_layers\": 2,\n                },\n            )\n\n    def query_density(self, x, return_feat: bool = False):\n        if self.unbounded:\n            x = contract_to_unisphere(x, self.aabb)\n        else:\n            aabb_min, aabb_max = torch.split(self.aabb, self.num_dim, dim=-1)\n            x = (x - aabb_min) / (aabb_max - aabb_min)\n        selector = ((x > 0.0) & (x < 1.0)).all(dim=-1)\n        x = (\n            self.mlp_base(x.view(-1, self.num_dim))\n            .view(list(x.shape[:-1]) + [1 + self.geo_feat_dim])\n            .to(x)\n        )\n        density_before_activation, base_mlp_out = torch.split(\n            x, [1, self.geo_feat_dim], dim=-1\n        )\n        density = (\n            self.density_activation(density_before_activation)\n            * selector[..., None]\n        )\n        if return_feat:\n            return density, base_mlp_out\n        else:\n            return density\n\n    def _query_rgb(self, dir, embedding):\n        # tcnn requires directions in the range [0, 1]\n        if self.use_viewdirs:\n            dir = (dir + 1.0) / 2.0\n            d = self.direction_encoding(dir.view(-1, dir.shape[-1]))\n            h = torch.cat([d, embedding.view(-1, self.geo_feat_dim)], dim=-1)\n        else:\n            h = embedding.view(-1, self.geo_feat_dim)\n        rgb = (\n            self.mlp_head(h)\n            .view(list(embedding.shape[:-1]) + [3])\n            .to(embedding)\n        )\n        return rgb\n\n    def forward(\n        self,\n        positions: torch.Tensor,\n        directions: torch.Tensor = None,\n    ):\n        if self.use_viewdirs and (directions is not None):\n            assert (\n                positions.shape == directions.shape\n            ), f\"{positions.shape} v.s. {directions.shape}\"\n            density, embedding = self.query_density(positions, return_feat=True)\n            rgb = self._query_rgb(directions, embedding=embedding)\n        return rgb, density\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/requirements.txt",
    "content": "git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch\nopencv-python\nimageio\nnumpy\ntqdm\nscipy"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/train_mlp_dnerf.py",
    "content": "\"\"\"\nCopyright (c) 2022 Ruilong Li, UC Berkeley.\n\"\"\"\n\nimport argparse\nimport math\nimport os\nimport time\n\nimport imageio\nimport numpy as np\nimport torch\nimport torch.nn.functional as F\nimport tqdm\nfrom datasets.dnerf_synthetic import SubjectLoader\nfrom radiance_fields.mlp import DNeRFRadianceField\nfrom utils import render_image, set_random_seed\n\nfrom nerfacc import ContractionType, OccupancyGrid\n\nif __name__ == \"__main__\":\n\n    device = \"cuda:0\"\n    set_random_seed(42)\n\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\n        \"--train_split\",\n        type=str,\n        default=\"train\",\n        choices=[\"train\"],\n        help=\"which train split to use\",\n    )\n    parser.add_argument(\n        \"--scene\",\n        type=str,\n        default=\"lego\",\n        choices=[\n            # dnerf\n            \"bouncingballs\",\n            \"hellwarrior\",\n            \"hook\",\n            \"jumpingjacks\",\n            \"lego\",\n            \"mutant\",\n            \"standup\",\n            \"trex\",\n        ],\n        help=\"which scene to use\",\n    )\n    parser.add_argument(\n        \"--aabb\",\n        type=lambda s: [float(item) for item in s.split(\",\")],\n        default=\"-1.5,-1.5,-1.5,1.5,1.5,1.5\",\n        help=\"delimited list input\",\n    )\n    parser.add_argument(\n        \"--test_chunk_size\",\n        type=int,\n        default=8192,\n    )\n    parser.add_argument(\"--cone_angle\", type=float, default=0.0)\n    args = parser.parse_args()\n\n    render_n_samples = 1024\n\n    # setup the scene bounding box.\n    contraction_type = ContractionType.AABB\n    scene_aabb = torch.tensor(args.aabb, dtype=torch.float32, device=device)\n    near_plane = None\n    far_plane = None\n    render_step_size = (\n        (scene_aabb[3:] - scene_aabb[:3]).max()\n        * math.sqrt(3)\n        / render_n_samples\n    ).item()\n\n    # setup the radiance field we want to train.\n    max_steps = 30000\n    grad_scaler = torch.cuda.amp.GradScaler(1)\n    radiance_field = DNeRFRadianceField().to(device)\n    optimizer = torch.optim.Adam(radiance_field.parameters(), lr=5e-4)\n    scheduler = torch.optim.lr_scheduler.MultiStepLR(\n        optimizer,\n        milestones=[\n            max_steps // 2,\n            max_steps * 3 // 4,\n            max_steps * 5 // 6,\n            max_steps * 9 // 10,\n        ],\n        gamma=0.33,\n    )\n    # setup the dataset\n    data_root_fp = \"/home/ruilongli/data/dnerf/\"\n    target_sample_batch_size = 1 << 16\n    grid_resolution = 128\n\n    train_dataset = SubjectLoader(\n        subject_id=args.scene,\n        root_fp=data_root_fp,\n        split=args.train_split,\n        num_rays=target_sample_batch_size // render_n_samples,\n    )\n    train_dataset.images = train_dataset.images.to(device)\n    train_dataset.camtoworlds = train_dataset.camtoworlds.to(device)\n    train_dataset.K = train_dataset.K.to(device)\n    train_dataset.timestamps = train_dataset.timestamps.to(device)\n\n    test_dataset = SubjectLoader(\n        subject_id=args.scene,\n        root_fp=data_root_fp,\n        split=\"test\",\n        num_rays=None,\n    )\n    test_dataset.images = test_dataset.images.to(device)\n    test_dataset.camtoworlds = test_dataset.camtoworlds.to(device)\n    test_dataset.K = test_dataset.K.to(device)\n    test_dataset.timestamps = test_dataset.timestamps.to(device)\n\n    occupancy_grid = OccupancyGrid(\n        roi_aabb=args.aabb,\n        resolution=grid_resolution,\n        contraction_type=contraction_type,\n    ).to(device)\n\n    # training\n    step = 0\n    tic = time.time()\n    for epoch in range(10000000):\n        for i in range(len(train_dataset)):\n            radiance_field.train()\n            data = train_dataset[i]\n\n            render_bkgd = data[\"color_bkgd\"]\n            rays = data[\"rays\"]\n            pixels = data[\"pixels\"]\n            timestamps = data[\"timestamps\"]\n\n            # update occupancy grid\n            occupancy_grid.every_n_step(\n                step=step,\n                occ_eval_fn=lambda x: radiance_field.query_opacity(\n                    x, timestamps, render_step_size\n                ),\n            )\n\n            # render\n            rgb, acc, depth, n_rendering_samples = render_image(\n                radiance_field,\n                occupancy_grid,\n                rays,\n                scene_aabb,\n                # rendering options\n                near_plane=near_plane,\n                far_plane=far_plane,\n                render_step_size=render_step_size,\n                render_bkgd=render_bkgd,\n                cone_angle=args.cone_angle,\n                alpha_thre=0.01 if step > 1000 else 0.00,\n                # dnerf options\n                timestamps=timestamps,\n            )\n            if n_rendering_samples == 0:\n                continue\n\n            # dynamic batch size for rays to keep sample batch size constant.\n            num_rays = len(pixels)\n            num_rays = int(\n                num_rays\n                * (target_sample_batch_size / float(n_rendering_samples))\n            )\n            train_dataset.update_num_rays(num_rays)\n            alive_ray_mask = acc.squeeze(-1) > 0\n\n            # compute loss\n            loss = F.smooth_l1_loss(rgb[alive_ray_mask], pixels[alive_ray_mask])\n\n            optimizer.zero_grad()\n            # do not unscale it because we are using Adam.\n            grad_scaler.scale(loss).backward()\n            optimizer.step()\n            scheduler.step()\n\n            if step % 5000 == 0:\n                elapsed_time = time.time() - tic\n                loss = F.mse_loss(rgb[alive_ray_mask], pixels[alive_ray_mask])\n                print(\n                    f\"elapsed_time={elapsed_time:.2f}s | step={step} | \"\n                    f\"loss={loss:.5f} | \"\n                    f\"alive_ray_mask={alive_ray_mask.long().sum():d} | \"\n                    f\"n_rendering_samples={n_rendering_samples:d} | num_rays={len(pixels):d} |\"\n                )\n\n            if step >= 0 and step % max_steps == 0 and step > 0:\n                # evaluation\n                radiance_field.eval()\n\n                psnrs = []\n                with torch.no_grad():\n                    for i in tqdm.tqdm(range(len(test_dataset))):\n                        data = test_dataset[i]\n                        render_bkgd = data[\"color_bkgd\"]\n                        rays = data[\"rays\"]\n                        pixels = data[\"pixels\"]\n                        timestamps = data[\"timestamps\"]\n\n                        # rendering\n                        rgb, acc, depth, _ = render_image(\n                            radiance_field,\n                            occupancy_grid,\n                            rays,\n                            scene_aabb,\n                            # rendering options\n                            near_plane=None,\n                            far_plane=None,\n                            render_step_size=render_step_size,\n                            render_bkgd=render_bkgd,\n                            cone_angle=args.cone_angle,\n                            alpha_thre=0.01,\n                            # test options\n                            test_chunk_size=args.test_chunk_size,\n                            # dnerf options\n                            timestamps=timestamps,\n                        )\n                        mse = F.mse_loss(rgb, pixels)\n                        psnr = -10.0 * torch.log(mse) / np.log(10.0)\n                        psnrs.append(psnr.item())\n                        # imageio.imwrite(\n                        #     \"acc_binary_test.png\",\n                        #     ((acc > 0).float().cpu().numpy() * 255).astype(np.uint8),\n                        # )\n                        # imageio.imwrite(\n                        #     \"rgb_test.png\",\n                        #     (rgb.cpu().numpy() * 255).astype(np.uint8),\n                        # )\n                        # break\n                psnr_avg = sum(psnrs) / len(psnrs)\n                print(f\"evaluation: psnr_avg={psnr_avg}\")\n                train_dataset.training = True\n\n            if step == max_steps:\n                print(\"training stops\")\n                exit()\n\n            step += 1\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/train_mlp_nerf.py",
    "content": "\"\"\"\nCopyright (c) 2022 Ruilong Li, UC Berkeley.\n\"\"\"\n\nimport argparse\nimport math\nimport os\nimport time\n\nimport imageio\nimport numpy as np\nimport torch\nimport torch.nn.functional as F\nimport tqdm\nfrom radiance_fields.mlp import VanillaNeRFRadianceField\nfrom utils import render_image, set_random_seed\n\nfrom nerfacc import ContractionType, OccupancyGrid\n\nif __name__ == \"__main__\":\n\n    device = \"cuda:0\"\n    set_random_seed(42)\n\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\n        \"--train_split\",\n        type=str,\n        default=\"trainval\",\n        choices=[\"train\", \"trainval\"],\n        help=\"which train split to use\",\n    )\n    parser.add_argument(\n        \"--scene\",\n        type=str,\n        default=\"lego\",\n        choices=[\n            # nerf synthetic\n            \"chair\",\n            \"drums\",\n            \"ficus\",\n            \"hotdog\",\n            \"lego\",\n            \"materials\",\n            \"mic\",\n            \"ship\",\n            # mipnerf360 unbounded\n            \"garden\",\n        ],\n        help=\"which scene to use\",\n    )\n    parser.add_argument(\n        \"--aabb\",\n        type=lambda s: [float(item) for item in s.split(\",\")],\n        default=\"-1.5,-1.5,-1.5,1.5,1.5,1.5\",\n        help=\"delimited list input\",\n    )\n    parser.add_argument(\n        \"--test_chunk_size\",\n        type=int,\n        default=8192,\n    )\n    parser.add_argument(\n        \"--unbounded\",\n        action=\"store_true\",\n        help=\"whether to use unbounded rendering\",\n    )\n    parser.add_argument(\"--cone_angle\", type=float, default=0.0)\n    args = parser.parse_args()\n\n    render_n_samples = 1024\n\n    # setup the scene bounding box.\n    if args.unbounded:\n        print(\"Using unbounded rendering\")\n        contraction_type = ContractionType.UN_BOUNDED_SPHERE\n        # contraction_type = ContractionType.UN_BOUNDED_TANH\n        scene_aabb = None\n        near_plane = 0.2\n        far_plane = 1e4\n        render_step_size = 1e-2\n    else:\n        contraction_type = ContractionType.AABB\n        scene_aabb = torch.tensor(args.aabb, dtype=torch.float32, device=device)\n        near_plane = None\n        far_plane = None\n        render_step_size = (\n            (scene_aabb[3:] - scene_aabb[:3]).max()\n            * math.sqrt(3)\n            / render_n_samples\n        ).item()\n\n    # setup the radiance field we want to train.\n    max_steps = 50000\n    grad_scaler = torch.cuda.amp.GradScaler(1)\n    radiance_field = VanillaNeRFRadianceField().to(device)\n    optimizer = torch.optim.Adam(radiance_field.parameters(), lr=5e-4)\n    scheduler = torch.optim.lr_scheduler.MultiStepLR(\n        optimizer,\n        milestones=[\n            max_steps // 2,\n            max_steps * 3 // 4,\n            max_steps * 5 // 6,\n            max_steps * 9 // 10,\n        ],\n        gamma=0.33,\n    )\n\n    # setup the dataset\n    train_dataset_kwargs = {}\n    test_dataset_kwargs = {}\n    if args.scene == \"garden\":\n        from datasets.nerf_360_v2 import SubjectLoader\n\n        data_root_fp = \"/home/ruilongli/data/360_v2/\"\n        target_sample_batch_size = 1 << 16\n        train_dataset_kwargs = {\"color_bkgd_aug\": \"random\", \"factor\": 4}\n        test_dataset_kwargs = {\"factor\": 4}\n        grid_resolution = 128\n    else:\n        from datasets.nerf_synthetic import SubjectLoader\n\n        data_root_fp = \"/home/ruilongli/data/nerf_synthetic/\"\n        target_sample_batch_size = 1 << 16\n        grid_resolution = 128\n\n    train_dataset = SubjectLoader(\n        subject_id=args.scene,\n        root_fp=data_root_fp,\n        split=args.train_split,\n        num_rays=target_sample_batch_size // render_n_samples,\n        **train_dataset_kwargs,\n    )\n\n    train_dataset.images = train_dataset.images.to(device)\n    train_dataset.camtoworlds = train_dataset.camtoworlds.to(device)\n    train_dataset.K = train_dataset.K.to(device)\n\n    test_dataset = SubjectLoader(\n        subject_id=args.scene,\n        root_fp=data_root_fp,\n        split=\"test\",\n        num_rays=None,\n        **test_dataset_kwargs,\n    )\n    test_dataset.images = test_dataset.images.to(device)\n    test_dataset.camtoworlds = test_dataset.camtoworlds.to(device)\n    test_dataset.K = test_dataset.K.to(device)\n\n    occupancy_grid = OccupancyGrid(\n        roi_aabb=args.aabb,\n        resolution=grid_resolution,\n        contraction_type=contraction_type,\n    ).to(device)\n\n    # training\n    step = 0\n    tic = time.time()\n    for epoch in range(10000000):\n        for i in range(len(train_dataset)):\n            radiance_field.train()\n            data = train_dataset[i]\n\n            render_bkgd = data[\"color_bkgd\"]\n            rays = data[\"rays\"]\n            pixels = data[\"pixels\"]\n\n            # update occupancy grid\n            occupancy_grid.every_n_step(\n                step=step,\n                occ_eval_fn=lambda x: radiance_field.query_opacity(\n                    x, render_step_size\n                ),\n            )\n\n            # render\n            rgb, acc, depth, n_rendering_samples = render_image(\n                radiance_field,\n                occupancy_grid,\n                rays,\n                scene_aabb,\n                # rendering options\n                near_plane=near_plane,\n                far_plane=far_plane,\n                render_step_size=render_step_size,\n                render_bkgd=render_bkgd,\n                cone_angle=args.cone_angle,\n            )\n            if n_rendering_samples == 0:\n                continue\n\n            # dynamic batch size for rays to keep sample batch size constant.\n            num_rays = len(pixels)\n            num_rays = int(\n                num_rays\n                * (target_sample_batch_size / float(n_rendering_samples))\n            )\n            train_dataset.update_num_rays(num_rays)\n            alive_ray_mask = acc.squeeze(-1) > 0\n\n            # compute loss\n            loss = F.smooth_l1_loss(rgb[alive_ray_mask], pixels[alive_ray_mask])\n\n            optimizer.zero_grad()\n            # do not unscale it because we are using Adam.\n            grad_scaler.scale(loss).backward()\n            optimizer.step()\n            scheduler.step()\n\n            if step % 5000 == 0:\n                elapsed_time = time.time() - tic\n                loss = F.mse_loss(rgb[alive_ray_mask], pixels[alive_ray_mask])\n                print(\n                    f\"elapsed_time={elapsed_time:.2f}s | step={step} | \"\n                    f\"loss={loss:.5f} | \"\n                    f\"alive_ray_mask={alive_ray_mask.long().sum():d} | \"\n                    f\"n_rendering_samples={n_rendering_samples:d} | num_rays={len(pixels):d} |\"\n                )\n\n            if step >= 0 and step % max_steps == 0 and step > 0:\n                # evaluation\n                radiance_field.eval()\n\n                psnrs = []\n                with torch.no_grad():\n                    for i in tqdm.tqdm(range(len(test_dataset))):\n                        data = test_dataset[i]\n                        render_bkgd = data[\"color_bkgd\"]\n                        rays = data[\"rays\"]\n                        pixels = data[\"pixels\"]\n\n                        # rendering\n                        rgb, acc, depth, _ = render_image(\n                            radiance_field,\n                            occupancy_grid,\n                            rays,\n                            scene_aabb,\n                            # rendering options\n                            near_plane=None,\n                            far_plane=None,\n                            render_step_size=render_step_size,\n                            render_bkgd=render_bkgd,\n                            cone_angle=args.cone_angle,\n                            # test options\n                            test_chunk_size=args.test_chunk_size,\n                        )\n                        mse = F.mse_loss(rgb, pixels)\n                        psnr = -10.0 * torch.log(mse) / np.log(10.0)\n                        psnrs.append(psnr.item())\n                        # imageio.imwrite(\n                        #     \"acc_binary_test.png\",\n                        #     ((acc > 0).float().cpu().numpy() * 255).astype(np.uint8),\n                        # )\n                        # imageio.imwrite(\n                        #     \"rgb_test.png\",\n                        #     (rgb.cpu().numpy() * 255).astype(np.uint8),\n                        # )\n                        # break\n                psnr_avg = sum(psnrs) / len(psnrs)\n                print(f\"evaluation: psnr_avg={psnr_avg}\")\n                train_dataset.training = True\n\n            if step == max_steps:\n                print(\"training stops\")\n                exit()\n\n            step += 1\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/train_ngp_nerf.py",
    "content": "\"\"\"\nCopyright (c) 2022 Ruilong Li, UC Berkeley.\n\"\"\"\n\nimport argparse\nimport math\nimport os\nimport time\n\nimport imageio\nimport numpy as np\nimport torch\nimport torch.nn.functional as F\nimport tqdm\nfrom radiance_fields.ngp import NGPradianceField\nfrom utils import render_image, set_random_seed\n\nfrom nerfacc import ContractionType, OccupancyGrid\n\nif __name__ == \"__main__\":\n\n    device = \"cuda:0\"\n    set_random_seed(42)\n\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\n        \"--train_split\",\n        type=str,\n        default=\"trainval\",\n        choices=[\"train\", \"trainval\"],\n        help=\"which train split to use\",\n    )\n    parser.add_argument(\n        \"--scene\",\n        type=str,\n        default=\"lego\",\n        choices=[\n            # nerf synthetic\n            \"chair\",\n            \"drums\",\n            \"ficus\",\n            \"hotdog\",\n            \"lego\",\n            \"materials\",\n            \"mic\",\n            \"ship\",\n            # mipnerf360 unbounded\n            \"garden\",\n            \"bicycle\",\n            \"bonsai\",\n            \"counter\",\n            \"kitchen\",\n            \"room\",\n            \"stump\",\n        ],\n        help=\"which scene to use\",\n    )\n    parser.add_argument(\n        \"--aabb\",\n        type=lambda s: [float(item) for item in s.split(\",\")],\n        default=\"-1.5,-1.5,-1.5,1.5,1.5,1.5\",\n        help=\"delimited list input\",\n    )\n    parser.add_argument(\n        \"--test_chunk_size\",\n        type=int,\n        default=8192,\n    )\n    parser.add_argument(\n        \"--unbounded\",\n        action=\"store_true\",\n        help=\"whether to use unbounded rendering\",\n    )\n    parser.add_argument(\n        \"--auto_aabb\",\n        action=\"store_true\",\n        help=\"whether to automatically compute the aabb\",\n    )\n    parser.add_argument(\"--cone_angle\", type=float, default=0.0)\n    args = parser.parse_args()\n\n    render_n_samples = 1024\n\n    # setup the dataset\n    train_dataset_kwargs = {}\n    test_dataset_kwargs = {}\n    if args.unbounded:\n        from datasets.nerf_360_v2 import SubjectLoader\n\n        data_root_fp = \"/home/ruilongli/data/360_v2/\"\n        target_sample_batch_size = 1 << 20\n        train_dataset_kwargs = {\"color_bkgd_aug\": \"random\", \"factor\": 4}\n        test_dataset_kwargs = {\"factor\": 4}\n        grid_resolution = 256\n    else:\n        from datasets.nerf_synthetic import SubjectLoader\n\n        data_root_fp = \"/home/ruilongli/data/nerf_synthetic/\"\n        target_sample_batch_size = 1 << 18\n        grid_resolution = 128\n\n    train_dataset = SubjectLoader(\n        subject_id=args.scene,\n        root_fp=data_root_fp,\n        split=args.train_split,\n        num_rays=target_sample_batch_size // render_n_samples,\n        **train_dataset_kwargs,\n    )\n\n    train_dataset.images = train_dataset.images.to(device)\n    train_dataset.camtoworlds = train_dataset.camtoworlds.to(device)\n    train_dataset.K = train_dataset.K.to(device)\n\n    test_dataset = SubjectLoader(\n        subject_id=args.scene,\n        root_fp=data_root_fp,\n        split=\"test\",\n        num_rays=None,\n        **test_dataset_kwargs,\n    )\n    test_dataset.images = test_dataset.images.to(device)\n    test_dataset.camtoworlds = test_dataset.camtoworlds.to(device)\n    test_dataset.K = test_dataset.K.to(device)\n\n    if args.auto_aabb:\n        camera_locs = torch.cat(\n            [train_dataset.camtoworlds, test_dataset.camtoworlds]\n        )[:, :3, -1]\n        args.aabb = torch.cat(\n            [camera_locs.min(dim=0).values, camera_locs.max(dim=0).values]\n        ).tolist()\n        print(\"Using auto aabb\", args.aabb)\n\n    # setup the scene bounding box.\n    if args.unbounded:\n        print(\"Using unbounded rendering\")\n        contraction_type = ContractionType.UN_BOUNDED_SPHERE\n        # contraction_type = ContractionType.UN_BOUNDED_TANH\n        scene_aabb = None\n        near_plane = 0.2\n        far_plane = 1e4\n        render_step_size = 1e-2\n        alpha_thre = 1e-2\n    else:\n        contraction_type = ContractionType.AABB\n        scene_aabb = torch.tensor(args.aabb, dtype=torch.float32, device=device)\n        near_plane = None\n        far_plane = None\n        render_step_size = (\n            (scene_aabb[3:] - scene_aabb[:3]).max()\n            * math.sqrt(3)\n            / render_n_samples\n        ).item()\n        alpha_thre = 0.0\n\n    # setup the radiance field we want to train.\n    max_steps = 20000\n    grad_scaler = torch.cuda.amp.GradScaler(2**10)\n    radiance_field = NGPradianceField(\n        aabb=args.aabb,\n        unbounded=args.unbounded,\n    ).to(device)\n    optimizer = torch.optim.Adam(\n        radiance_field.parameters(), lr=1e-2, eps=1e-15\n    )\n    scheduler = torch.optim.lr_scheduler.MultiStepLR(\n        optimizer,\n        milestones=[max_steps // 2, max_steps * 3 // 4, max_steps * 9 // 10],\n        gamma=0.33,\n    )\n\n    occupancy_grid = OccupancyGrid(\n        roi_aabb=args.aabb,\n        resolution=grid_resolution,\n        contraction_type=contraction_type,\n    ).to(device)\n\n    # training\n    step = 0\n    tic = time.time()\n    for epoch in range(10000000):\n        for i in range(len(train_dataset)):\n            radiance_field.train()\n            data = train_dataset[i]\n\n            render_bkgd = data[\"color_bkgd\"]\n            rays = data[\"rays\"]\n            pixels = data[\"pixels\"]\n\n            def occ_eval_fn(x):\n                if args.cone_angle > 0.0:\n                    # randomly sample a camera for computing step size.\n                    camera_ids = torch.randint(\n                        0, len(train_dataset), (x.shape[0],), device=device\n                    )\n                    origins = train_dataset.camtoworlds[camera_ids, :3, -1]\n                    t = (origins - x).norm(dim=-1, keepdim=True)\n                    # compute actual step size used in marching, based on the distance to the camera.\n                    step_size = torch.clamp(\n                        t * args.cone_angle, min=render_step_size\n                    )\n                    # filter out the points that are not in the near far plane.\n                    if (near_plane is not None) and (far_plane is not None):\n                        step_size = torch.where(\n                            (t > near_plane) & (t < far_plane),\n                            step_size,\n                            torch.zeros_like(step_size),\n                        )\n                else:\n                    step_size = render_step_size\n                # compute occupancy\n                density = radiance_field.query_density(x)\n                return density * step_size\n\n            # update occupancy grid\n            occupancy_grid.every_n_step(step=step, occ_eval_fn=occ_eval_fn)\n\n            # render\n            rgb, acc, depth, n_rendering_samples = render_image(\n                radiance_field,\n                occupancy_grid,\n                rays,\n                scene_aabb,\n                # rendering options\n                near_plane=near_plane,\n                far_plane=far_plane,\n                render_step_size=render_step_size,\n                render_bkgd=render_bkgd,\n                cone_angle=args.cone_angle,\n                alpha_thre=alpha_thre,\n            )\n            if n_rendering_samples == 0:\n                continue\n\n            # dynamic batch size for rays to keep sample batch size constant.\n            num_rays = len(pixels)\n            num_rays = int(\n                num_rays\n                * (target_sample_batch_size / float(n_rendering_samples))\n            )\n            train_dataset.update_num_rays(num_rays)\n            alive_ray_mask = acc.squeeze(-1) > 0\n\n            # compute loss\n            loss = F.smooth_l1_loss(rgb[alive_ray_mask], pixels[alive_ray_mask])\n\n            optimizer.zero_grad()\n            # do not unscale it because we are using Adam.\n            grad_scaler.scale(loss).backward()\n            optimizer.step()\n            scheduler.step()\n\n            if step % 10000 == 0:\n                elapsed_time = time.time() - tic\n                loss = F.mse_loss(rgb[alive_ray_mask], pixels[alive_ray_mask])\n                print(\n                    f\"elapsed_time={elapsed_time:.2f}s | step={step} | \"\n                    f\"loss={loss:.5f} | \"\n                    f\"alive_ray_mask={alive_ray_mask.long().sum():d} | \"\n                    f\"n_rendering_samples={n_rendering_samples:d} | num_rays={len(pixels):d} |\"\n                )\n\n            if step >= 0 and step % max_steps == 0 and step > 0:\n                # evaluation\n                radiance_field.eval()\n\n                psnrs = []\n                with torch.no_grad():\n                    for i in tqdm.tqdm(range(len(test_dataset))):\n                        data = test_dataset[i]\n                        render_bkgd = data[\"color_bkgd\"]\n                        rays = data[\"rays\"]\n                        pixels = data[\"pixels\"]\n\n                        # rendering\n                        rgb, acc, depth, _ = render_image(\n                            radiance_field,\n                            occupancy_grid,\n                            rays,\n                            scene_aabb,\n                            # rendering options\n                            near_plane=near_plane,\n                            far_plane=far_plane,\n                            render_step_size=render_step_size,\n                            render_bkgd=render_bkgd,\n                            cone_angle=args.cone_angle,\n                            alpha_thre=alpha_thre,\n                            # test options\n                            test_chunk_size=args.test_chunk_size,\n                        )\n                        mse = F.mse_loss(rgb, pixels)\n                        psnr = -10.0 * torch.log(mse) / np.log(10.0)\n                        psnrs.append(psnr.item())\n                        # imageio.imwrite(\n                        #     \"acc_binary_test.png\",\n                        #     ((acc > 0).float().cpu().numpy() * 255).astype(np.uint8),\n                        # )\n                        # imageio.imwrite(\n                        #     \"rgb_test.png\",\n                        #     (rgb.cpu().numpy() * 255).astype(np.uint8),\n                        # )\n                        # break\n                psnr_avg = sum(psnrs) / len(psnrs)\n                print(f\"evaluation: psnr_avg={psnr_avg}\")\n                train_dataset.training = True\n\n            if step == max_steps:\n                print(\"training stops\")\n                exit()\n\n            step += 1\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/examples/utils.py",
    "content": "\"\"\"\nCopyright (c) 2022 Ruilong Li, UC Berkeley.\n\"\"\"\n\nimport random\nfrom typing import Optional\n\nimport numpy as np\nimport torch\nfrom datasets.utils import Rays, namedtuple_map\n\nfrom nerfacc import OccupancyGrid, ray_marching, rendering\n\n\ndef set_random_seed(seed):\n    random.seed(seed)\n    np.random.seed(seed)\n    torch.manual_seed(seed)\n\n\ndef render_image(\n    # scene\n    radiance_field: torch.nn.Module,\n    occupancy_grid: OccupancyGrid,\n    rays: Rays,\n    scene_aabb: torch.Tensor,\n    # rendering options\n    near_plane: Optional[float] = None,\n    far_plane: Optional[float] = None,\n    render_step_size: float = 1e-3,\n    render_bkgd: Optional[torch.Tensor] = None,\n    cone_angle: float = 0.0,\n    alpha_thre: float = 0.0,\n    # test options\n    test_chunk_size: int = 8192,\n    # only useful for dnerf\n    timestamps: Optional[torch.Tensor] = None,\n):\n    \"\"\"Render the pixels of an image.\"\"\"\n    rays_shape = rays.origins.shape\n    if len(rays_shape) == 3:\n        height, width, _ = rays_shape\n        num_rays = height * width\n        rays = namedtuple_map(\n            lambda r: r.reshape([num_rays] + list(r.shape[2:])), rays\n        )\n    else:\n        num_rays, _ = rays_shape\n\n    def sigma_fn(t_starts, t_ends, ray_indices):\n        t_origins = chunk_rays.origins[ray_indices]\n        t_dirs = chunk_rays.viewdirs[ray_indices]\n        positions = t_origins + t_dirs * (t_starts + t_ends) / 2.0\n        if timestamps is not None:\n            # dnerf\n            t = (\n                timestamps[ray_indices]\n                if radiance_field.training\n                else timestamps.expand_as(positions[:, :1])\n            )\n            return radiance_field.query_density(positions, t)\n        return radiance_field.query_density(positions)\n\n    def rgb_sigma_fn(t_starts, t_ends, ray_indices):\n        t_origins = chunk_rays.origins[ray_indices]\n        t_dirs = chunk_rays.viewdirs[ray_indices]\n        positions = t_origins + t_dirs * (t_starts + t_ends) / 2.0\n        if timestamps is not None:\n            # dnerf\n            t = (\n                timestamps[ray_indices]\n                if radiance_field.training\n                else timestamps.expand_as(positions[:, :1])\n            )\n            return radiance_field(positions, t, t_dirs)\n        return radiance_field(positions, t_dirs)\n\n    results = []\n    chunk = (\n        torch.iinfo(torch.int32).max\n        if radiance_field.training\n        else test_chunk_size\n    )\n    for i in range(0, num_rays, chunk):\n        chunk_rays = namedtuple_map(lambda r: r[i : i + chunk], rays)\n        ray_indices, t_starts, t_ends = ray_marching(\n            chunk_rays.origins,\n            chunk_rays.viewdirs,\n            scene_aabb=scene_aabb,\n            grid=occupancy_grid,\n            sigma_fn=sigma_fn,\n            near_plane=near_plane,\n            far_plane=far_plane,\n            render_step_size=render_step_size,\n            stratified=radiance_field.training,\n            cone_angle=cone_angle,\n            alpha_thre=alpha_thre,\n        )\n        rgb, opacity, depth = rendering(\n            t_starts,\n            t_ends,\n            ray_indices,\n            n_rays=chunk_rays.origins.shape[0],\n            rgb_sigma_fn=rgb_sigma_fn,\n            render_bkgd=render_bkgd,\n        )\n        chunk_results = [rgb, opacity, depth, len(t_starts)]\n        results.append(chunk_results)\n    colors, opacities, depths, n_rendering_samples = [\n        torch.cat(r, dim=0) if isinstance(r[0], torch.Tensor) else r\n        for r in zip(*results)\n    ]\n    return (\n        colors.view((*rays_shape[:-1], -1)),\n        opacities.view((*rays_shape[:-1], -1)),\n        depths.view((*rays_shape[:-1], -1)),\n        sum(n_rendering_samples),\n    )\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/__init__.py",
    "content": "\"\"\"\nCopyright (c) 2022 Ruilong Li, UC Berkeley.\n\"\"\"\nimport warnings\n\nfrom .cdf import ray_resampling\nfrom .contraction import ContractionType, contract, contract_inv\nfrom .grid import Grid, OccupancyGrid, query_grid\nfrom .intersection import ray_aabb_intersect\nfrom .losses import distortion as loss_distortion\nfrom .pack import pack_data, pack_info, unpack_data, unpack_info\nfrom .ray_marching import ray_marching\nfrom .version import __version__\nfrom .vol_rendering import (\n    accumulate_along_rays,\n    accumulate_along_rays_patch_based,\n    render_transmittance_from_alpha,\n    render_transmittance_from_density,\n    render_visibility,\n    render_visibility_patch_based,\n    render_weight_from_alpha,\n    render_weight_from_density,\n    render_weight_from_alpha_patch_based,\n    render_weight_and_transmittance_from_alpha_patch_based,\n    rendering,\n)\n\n\n# About to be deprecated\ndef unpack_to_ray_indices(*args, **kwargs):\n    warnings.warn(\n        \"`unpack_to_ray_indices` will be deprecated. Please use `unpack_info` instead.\",\n        DeprecationWarning,\n        stacklevel=2,\n    )\n    return unpack_info(*args, **kwargs)\n\n\n__all__ = [\n    \"__version__\",\n    \"Grid\",\n    \"OccupancyGrid\",\n    \"query_grid\",\n    \"ContractionType\",\n    \"contract\",\n    \"contract_inv\",\n    \"ray_aabb_intersect\",\n    \"ray_marching\",\n    \"accumulate_along_rays\",\n    \"accumulate_along_rays_patch_based\",\n    \"render_visibility\",\n    \"render_visibility_patch_based\",\n    \"render_weight_from_alpha\",\n    \"render_weight_from_alpha_patch_based\",\n    \"render_weight_from_density\",\n    \"rendering\",\n    \"pack_data\",\n    \"unpack_data\",\n    \"unpack_info\",\n    \"pack_info\",\n    \"ray_resampling\",\n    \"loss_distortion\",\n    \"unpack_to_ray_indices\",\n    \"render_transmittance_from_density\",\n    \"render_transmittance_from_alpha\",\n    \"render_weight_and_transmittance_from_alpha_patch_based\"\n]\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cdf.py",
    "content": "\"\"\"\nCopyright (c) 2022 Ruilong Li, UC Berkeley.\n\"\"\"\n\nfrom typing import Tuple\n\nfrom torch import Tensor\n\nimport nerfacc.cuda as _C\n\n\ndef ray_resampling(\n    packed_info: Tensor,\n    t_starts: Tensor,\n    t_ends: Tensor,\n    weights: Tensor,\n    n_samples: int,\n) -> Tuple[Tensor, Tensor, Tensor]:\n    \"\"\"Resample a set of rays based on the CDF of the weights.\n\n    Args:\n        packed_info (Tensor): Stores information on which samples belong to the same ray. \\\n            See :func:`nerfacc.ray_marching` for details. Tensor with shape (n_rays, 2).\n        t_starts: Where the frustum-shape sample starts along a ray. Tensor with \\\n            shape (n_samples, 1).\n        t_ends: Where the frustum-shape sample ends along a ray. Tensor with \\\n            shape (n_samples, 1).\n        weights: Volumetric rendering weights for those samples. Tensor with shape \\\n            (n_samples,).\n        n_samples (int): Number of samples per ray to resample.\n\n    Returns:\n        Resampled packed info (n_rays, 2), t_starts (n_samples, 1), and t_ends (n_samples, 1).\n    \"\"\"\n    (\n        resampled_packed_info,\n        resampled_t_starts,\n        resampled_t_ends,\n    ) = _C.ray_resampling(\n        packed_info.contiguous(),\n        t_starts.contiguous(),\n        t_ends.contiguous(),\n        weights.contiguous(),\n        n_samples,\n    )\n    return resampled_packed_info, resampled_t_starts, resampled_t_ends\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/contraction.py",
    "content": "\"\"\"\nCopyright (c) 2022 Ruilong Li, UC Berkeley.\n\"\"\"\n\nfrom enum import Enum\n\nimport torch\n\nimport nerfacc.cuda as _C\n\n\nclass ContractionType(Enum):\n    \"\"\"Space contraction options.\n\n    This is an enum class that describes how a :class:`nerfacc.Grid` covers the 3D space.\n    It is also used by :func:`nerfacc.ray_marching` to determine how to perform ray marching\n    within the grid.\n\n    The options in this enum class are:\n\n    Attributes:\n        AABB: Linearly map the region of interest :math:`[x_0, x_1]` to a\n            unit cube in :math:`[0, 1]`.\n\n            .. math:: f(x) = \\\\frac{x - x_0}{x_1 - x_0}\n\n        UN_BOUNDED_TANH: Contract an unbounded space into a unit cube in :math:`[0, 1]`\n            using tanh. The region of interest :math:`[x_0, x_1]` is first\n            mapped into :math:`[-0.5, +0.5]` before applying tanh.\n\n            .. math:: f(x) = \\\\frac{1}{2}(tanh(\\\\frac{x - x_0}{x_1 - x_0} - \\\\frac{1}{2}) + 1)\n\n        UN_BOUNDED_SPHERE: Contract an unbounded space into a unit sphere. Used in\n            `Mip-Nerf 360: Unbounded Anti-Aliased Neural Radiance Fields`_.\n\n            .. math:: \n                f(x) = \n                \\\\begin{cases}\n                z(x) & ||z(x)|| \\\\leq 1 \\\\\\\\\n                (2 - \\\\frac{1}{||z(x)||})(\\\\frac{z(x)}{||z(x)||}) & ||z(x)|| > 1\n                \\\\end{cases}\n            \n            .. math::\n                z(x) = \\\\frac{x - x_0}{x_1 - x_0} * 2 - 1\n\n            .. _Mip-Nerf 360\\: Unbounded Anti-Aliased Neural Radiance Fields:\n                https://arxiv.org/abs/2111.12077\n\n    \"\"\"\n\n    AABB = 0\n    UN_BOUNDED_TANH = 1\n    UN_BOUNDED_SPHERE = 2\n\n    def to_cpp_version(self):\n        \"\"\"Convert to the C++ version of the enum class.\n\n        Returns:\n            The C++ version of the enum class.\n\n        \"\"\"\n        return _C.ContractionTypeGetter(self.value)\n\n\n@torch.no_grad()\ndef contract(\n    x: torch.Tensor,\n    roi: torch.Tensor,\n    type: ContractionType = ContractionType.AABB,\n) -> torch.Tensor:\n    \"\"\"Contract the space into [0, 1]^3.\n\n    Args:\n        x (torch.Tensor): Un-contracted points.\n        roi (torch.Tensor): Region of interest.\n        type (ContractionType): Contraction type.\n\n    Returns:\n        torch.Tensor: Contracted points ([0, 1]^3).\n    \"\"\"\n    ctype = type.to_cpp_version()\n    return _C.contract(x.contiguous(), roi.contiguous(), ctype)\n\n\n@torch.no_grad()\ndef contract_inv(\n    x: torch.Tensor,\n    roi: torch.Tensor,\n    type: ContractionType = ContractionType.AABB,\n) -> torch.Tensor:\n    \"\"\"Recover the space from [0, 1]^3 by inverse contraction.\n\n    Args:\n        x (torch.Tensor): Contracted points ([0, 1]^3).\n        roi (torch.Tensor): Region of interest.\n        type (ContractionType): Contraction type.\n\n    Returns:\n        torch.Tensor: Un-contracted points.\n    \"\"\"\n    ctype = type.to_cpp_version()\n    return _C.contract_inv(x.contiguous(), roi.contiguous(), ctype)\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/__init__.py",
    "content": "\"\"\"\nCopyright (c) 2022 Ruilong Li, UC Berkeley.\n\"\"\"\n\nfrom typing import Any, Callable\n\n\ndef _make_lazy_cuda_func(name: str) -> Callable:\n    def call_cuda(*args, **kwargs):\n        # pylint: disable=import-outside-toplevel\n        from ._backend import _C\n\n        return getattr(_C, name)(*args, **kwargs)\n\n    return call_cuda\n\n\nContractionTypeGetter = _make_lazy_cuda_func(\"ContractionType\")\ncontract = _make_lazy_cuda_func(\"contract\")\ncontract_inv = _make_lazy_cuda_func(\"contract_inv\")\n\ngrid_query = _make_lazy_cuda_func(\"grid_query\")\n\nray_aabb_intersect = _make_lazy_cuda_func(\"ray_aabb_intersect\")\nray_marching = _make_lazy_cuda_func(\"ray_marching\")\nray_resampling = _make_lazy_cuda_func(\"ray_resampling\")\n\nis_cub_available = _make_lazy_cuda_func(\"is_cub_available\")\ntransmittance_from_sigma_forward_cub = _make_lazy_cuda_func(\n    \"transmittance_from_sigma_forward_cub\"\n)\ntransmittance_from_sigma_backward_cub = _make_lazy_cuda_func(\n    \"transmittance_from_sigma_backward_cub\"\n)\ntransmittance_from_alpha_forward_cub = _make_lazy_cuda_func(\n    \"transmittance_from_alpha_forward_cub\"\n)\ntransmittance_from_alpha_backward_cub = _make_lazy_cuda_func(\n    \"transmittance_from_alpha_backward_cub\"\n)\n\ntransmittance_from_sigma_forward_naive = _make_lazy_cuda_func(\n    \"transmittance_from_sigma_forward_naive\"\n)\ntransmittance_from_sigma_backward_naive = _make_lazy_cuda_func(\n    \"transmittance_from_sigma_backward_naive\"\n)\ntransmittance_from_alpha_forward_naive = _make_lazy_cuda_func(\n    \"transmittance_from_alpha_forward_naive\"\n)\ntransmittance_from_alpha_backward_naive = _make_lazy_cuda_func(\n    \"transmittance_from_alpha_backward_naive\"\n)\n\ntransmittance_from_alpha_patch_based_forward_naive = _make_lazy_cuda_func(\n    \"transmittance_from_alpha_patch_based_forward_naive\"\n)\ntransmittance_from_alpha_patch_based_backward_naive = _make_lazy_cuda_func(\n    \"transmittance_from_alpha_patch_based_backward_naive\"\n)\n\nweight_from_sigma_forward_naive = _make_lazy_cuda_func(\n    \"weight_from_sigma_forward_naive\"\n)\nweight_from_sigma_backward_naive = _make_lazy_cuda_func(\n    \"weight_from_sigma_backward_naive\"\n)\nweight_from_alpha_forward_naive = _make_lazy_cuda_func(\n    \"weight_from_alpha_forward_naive\"\n)\nweight_from_alpha_backward_naive = _make_lazy_cuda_func(\n    \"weight_from_alpha_backward_naive\"\n)\n\n# weight_from_alpha_importance_sampling_forward_naive = _make_lazy_cuda_func(\n#     \"weight_from_alpha_importance_sampling_forward_naive\"\n# )\n#\n# weight_from_alpha_importance_sampling_backward_naive = _make_lazy_cuda_func(\n#     \"weight_from_alpha_importance_sampling_backward_naive\"\n# )\n\nweight_from_alpha_patch_based_forward_naive = _make_lazy_cuda_func(\n    \"weight_from_alpha_patch_based_forward_naive\"\n)\nweight_from_alpha_patch_based_backward_naive = _make_lazy_cuda_func(\n    \"weight_from_alpha_patch_based_backward_naive\"\n)\nweight_and_transmittance_from_alpha_patch_based_forward_naive = _make_lazy_cuda_func(\n    \"weight_and_transmittance_from_alpha_patch_based_forward_naive\"\n)\nweight_and_transmittance_from_alpha_patch_based_backward_naive = _make_lazy_cuda_func(\n    \"weight_and_transmittance_from_alpha_patch_based_backward_naive\"\n)\nunpack_data = _make_lazy_cuda_func(\"unpack_data\")\nunpack_info = _make_lazy_cuda_func(\"unpack_info\")\nunpack_info_to_mask = _make_lazy_cuda_func(\"unpack_info_to_mask\")\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/_backend.py",
    "content": "\"\"\"\nCopyright (c) 2022 Ruilong Li, UC Berkeley.\n\"\"\"\n\nimport glob\nimport json\nimport os\nimport shutil\nfrom subprocess import DEVNULL, call\n\nfrom rich.console import Console\nfrom torch.utils.cpp_extension import _get_build_directory, load\n\nPATH = os.path.dirname(os.path.abspath(__file__))\n\n\ndef cuda_toolkit_available():\n    \"\"\"Check if the nvcc is avaiable on the machine.\"\"\"\n    try:\n        call([\"nvcc\"], stdout=DEVNULL, stderr=DEVNULL)\n        return True\n    except FileNotFoundError:\n        return False\n\n\ndef cuda_toolkit_version():\n    \"\"\"Get the cuda toolkit version.\"\"\"\n    cuda_home = os.path.join(os.path.dirname(shutil.which(\"nvcc\")), \"..\")\n    if os.path.exists(os.path.join(cuda_home, \"version.txt\")):\n        with open(os.path.join(cuda_home, \"version.txt\")) as f:\n            cuda_version = f.read().strip().split()[-1]\n    elif os.path.exists(os.path.join(cuda_home, \"version.json\")):\n        with open(os.path.join(cuda_home, \"version.json\")) as f:\n            cuda_version = json.load(f)[\"cuda\"][\"version\"]\n    else:\n        raise RuntimeError(\"Cannot find the cuda version.\")\n    return cuda_version\n\n\nname = \"nerfacc_cuda\"\nbuild_dir = _get_build_directory(name, verbose=False)\nextra_include_paths = []\nextra_cflags = [\"-O3\"]\nextra_cuda_cflags = [\"-O3\"]\n\n_C = None\n\ntry:\n    # try to import the compiled module (via setup.py)\n    from nerfacc import csrc as _C\nexcept ImportError:\n    # if failed, try with JIT compilation\n    if cuda_toolkit_available():\n        if os.listdir(build_dir) != []:\n            # If the build exists, we assume the extension has been built\n            # and we can load it.\n\n            _C = load(\n                name=name,\n                sources=glob.glob(os.path.join(PATH, \"csrc/*.cu\")),\n                extra_cflags=extra_cflags,\n                extra_cuda_cflags=extra_cuda_cflags,\n                extra_include_paths=extra_include_paths,\n            )\n        else:\n            # Build from scratch. Remove the build directory just to be safe: pytorch jit might stuck\n            # if the build directory exists.\n            shutil.rmtree(build_dir)\n            with Console().status(\n                \"[bold yellow]NerfAcc: Setting up CUDA (This may take a few minutes the first time)\",\n                spinner=\"bouncingBall\",\n            ):\n                _C = load(\n                    name=name,\n                    sources=glob.glob(os.path.join(PATH, \"csrc/*.cu\")),\n                    extra_cflags=extra_cflags,\n                    extra_cuda_cflags=extra_cuda_cflags,\n                    extra_include_paths=extra_include_paths,\n                )\n    else:\n        Console().print(\n            \"[yellow]NerfAcc: No CUDA toolkit found. NerfAcc will be disabled.[/yellow]\"\n        )\n\n\n__all__ = [\"_C\"]\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/cdf.cu",
    "content": "/*\n * Copyright (c) 2022 Ruilong Li, UC Berkeley.\n */\n\n#include \"include/helpers_cuda.h\"\n\ntemplate <typename scalar_t>\n__global__ void cdf_resampling_kernel(\n    const uint32_t n_rays,\n    const int *packed_info,  // input ray & point indices.\n    const scalar_t *starts,  // input start t\n    const scalar_t *ends,    // input end t\n    const scalar_t *weights, // transmittance weights\n    const int *resample_packed_info,\n    scalar_t *resample_starts,\n    scalar_t *resample_ends)\n{\n    CUDA_GET_THREAD_ID(i, n_rays);\n\n    // locate\n    const int base = packed_info[i * 2 + 0];  // point idx start.\n    const int steps = packed_info[i * 2 + 1]; // point idx shift.\n    const int resample_base = resample_packed_info[i * 2 + 0];\n    const int resample_steps = resample_packed_info[i * 2 + 1];\n    if (steps == 0)\n        return;\n\n    starts += base;\n    ends += base;\n    weights += base;\n    resample_starts += resample_base;\n    resample_ends += resample_base;\n\n    // normalize weights **per ray**\n    scalar_t weights_sum = 0.0f;\n    for (int j = 0; j < steps; j++)\n        weights_sum += weights[j];\n    scalar_t padding = fmaxf(1e-5f - weights_sum, 0.0f);\n    scalar_t padding_step = padding / steps;\n    weights_sum += padding;\n\n    int num_bins = resample_steps + 1;\n    scalar_t cdf_step_size = (1.0f - 1.0 / num_bins) / resample_steps;\n\n    int idx = 0, j = 0;\n    scalar_t cdf_prev = 0.0f, cdf_next = (weights[idx] + padding_step) / weights_sum;\n    scalar_t cdf_u = 1.0 / (2 * num_bins);\n    while (j < num_bins)\n    {\n        if (cdf_u < cdf_next)\n        {\n            // printf(\"cdf_u: %f, cdf_next: %f\\n\", cdf_u, cdf_next);\n            // resample in this interval\n            scalar_t scaling = (ends[idx] - starts[idx]) / (cdf_next - cdf_prev);\n            scalar_t t = (cdf_u - cdf_prev) * scaling + starts[idx];\n            if (j < num_bins - 1)\n                resample_starts[j] = t;\n            if (j > 0)\n                resample_ends[j - 1] = t;\n            // going further to next resample\n            cdf_u += cdf_step_size;\n            j += 1;\n        }\n        else\n        {\n            // going to next interval\n            idx += 1;\n            cdf_prev = cdf_next;\n            cdf_next += (weights[idx] + padding_step) / weights_sum;\n        }\n    }\n    if (j != num_bins)\n    {\n        printf(\"Error: %d %d %f\\n\", j, num_bins, weights_sum);\n    }\n    return;\n}\n\n// template <typename scalar_t>\n// __global__ void cdf_resampling_kernel(\n//     const uint32_t n_rays,\n//     const int *packed_info,   // input ray & point indices.\n//     const scalar_t *starts,   // input start t\n//     const scalar_t *ends,     // input end t\n//     const scalar_t *weights,  // transmittance weights\n//     const int *resample_packed_info,\n//     scalar_t *resample_starts,\n//     scalar_t *resample_ends)\n// {\n//     CUDA_GET_THREAD_ID(i, n_rays);\n\n//     // locate\n//     const int base = packed_info[i * 2 + 0];  // point idx start.\n//     const int steps = packed_info[i * 2 + 1]; // point idx shift.\n//     const int resample_base = resample_packed_info[i * 2 + 0];\n//     const int resample_steps = resample_packed_info[i * 2 + 1];\n//     if (steps == 0)\n//         return;\n\n//     starts += base;\n//     ends += base;\n//     weights += base;\n//     resample_starts += resample_base;\n//     resample_ends += resample_base;\n\n//     scalar_t cdf_step_size = 1.0f / resample_steps;\n\n//     // normalize weights **per ray**\n//     scalar_t weights_sum = 0.0f;\n//     for (int j = 0; j < steps; j++)\n//         weights_sum += weights[j];\n\n//     scalar_t padding = fmaxf(1e-5f - weights_sum, 0.0f);\n//     scalar_t padding_step = padding / steps;\n//     weights_sum += padding;\n\n//     int idx = 0, j = 0;\n//     scalar_t cdf_prev = 0.0f, cdf_next = (weights[idx] + padding_step) / weights_sum;\n//     scalar_t cdf_u = 0.5f * cdf_step_size;\n//     while (cdf_u < 1.0f)\n//     {\n//         if (cdf_u < cdf_next)\n//         {\n//             // resample in this interval\n//             scalar_t scaling = (ends[idx] - starts[idx]) / (cdf_next - cdf_prev);\n//             scalar_t resample_mid = (cdf_u - cdf_prev) * scaling + starts[idx];\n//             scalar_t resample_half_size = cdf_step_size * scaling * 0.5;\n//             resample_starts[j] = fmaxf(resample_mid - resample_half_size, starts[idx]);\n//             resample_ends[j] = fminf(resample_mid + resample_half_size, ends[idx]);\n//             // going further to next resample\n//             cdf_u += cdf_step_size;\n//             j += 1;\n//         }\n//         else\n//         {\n//             // go to next interval\n//             idx += 1;\n//             if (idx == steps)\n//                 break;\n//             cdf_prev = cdf_next;\n//             cdf_next += (weights[idx] + padding_step) / weights_sum;\n//         }\n//     }\n//     if (j != resample_steps)\n//     {\n//         printf(\"Error: %d %d %f\\n\", j, resample_steps, weights_sum);\n//     }\n//     return;\n// }\n\nstd::vector<torch::Tensor> ray_resampling(\n    torch::Tensor packed_info,\n    torch::Tensor starts,\n    torch::Tensor ends,\n    torch::Tensor weights,\n    const int steps)\n{\n    DEVICE_GUARD(packed_info);\n\n    CHECK_INPUT(packed_info);\n    CHECK_INPUT(starts);\n    CHECK_INPUT(ends);\n    CHECK_INPUT(weights);\n\n    TORCH_CHECK(packed_info.ndimension() == 2 & packed_info.size(1) == 2);\n    TORCH_CHECK(starts.ndimension() == 2 & starts.size(1) == 1);\n    TORCH_CHECK(ends.ndimension() == 2 & ends.size(1) == 1);\n    TORCH_CHECK(weights.ndimension() == 1);\n\n    const uint32_t n_rays = packed_info.size(0);\n    const uint32_t n_samples = weights.size(0);\n\n    const int threads = 256;\n    const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);\n\n    torch::Tensor num_steps = torch::split(packed_info, 1, 1)[1];\n    torch::Tensor resample_num_steps = (num_steps > 0).to(num_steps.options()) * steps;\n    torch::Tensor resample_cum_steps = resample_num_steps.cumsum(0, torch::kInt32);\n    torch::Tensor resample_packed_info = torch::cat(\n        {resample_cum_steps - resample_num_steps, resample_num_steps}, 1);\n\n    int total_steps = resample_cum_steps[resample_cum_steps.size(0) - 1].item<int>();\n    torch::Tensor resample_starts = torch::zeros({total_steps, 1}, starts.options());\n    torch::Tensor resample_ends = torch::zeros({total_steps, 1}, ends.options());\n\n    AT_DISPATCH_FLOATING_TYPES_AND_HALF(\n        weights.scalar_type(),\n        \"ray_resampling\",\n        ([&]\n         { cdf_resampling_kernel<scalar_t><<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(\n               n_rays,\n               // inputs\n               packed_info.data_ptr<int>(),\n               starts.data_ptr<scalar_t>(),\n               ends.data_ptr<scalar_t>(),\n               weights.data_ptr<scalar_t>(),\n               resample_packed_info.data_ptr<int>(),\n               // outputs\n               resample_starts.data_ptr<scalar_t>(),\n               resample_ends.data_ptr<scalar_t>()); }));\n\n    return {resample_packed_info, resample_starts, resample_ends};\n}\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/contraction.cu",
    "content": "/*\n * Copyright (c) 2022 Ruilong Li, UC Berkeley.\n */\n\n#include \"include/helpers_cuda.h\"\n#include \"include/helpers_math.h\"\n#include \"include/helpers_contraction.h\"\n\n__global__ void contract_kernel(\n    // samples info\n    const uint32_t n_samples,\n    const float *samples, // (n_samples, 3)\n    // contraction\n    const float *roi,\n    const ContractionType type,\n    // outputs\n    float *out_samples)\n{\n    CUDA_GET_THREAD_ID(i, n_samples);\n\n    // locate\n    samples += i * 3;\n    out_samples += i * 3;\n\n    const float3 roi_min = make_float3(roi[0], roi[1], roi[2]);\n    const float3 roi_max = make_float3(roi[3], roi[4], roi[5]);\n    const float3 xyz = make_float3(samples[0], samples[1], samples[2]);\n    float3 xyz_unit = apply_contraction(xyz, roi_min, roi_max, type);\n\n    out_samples[0] = xyz_unit.x;\n    out_samples[1] = xyz_unit.y;\n    out_samples[2] = xyz_unit.z;\n    return;\n}\n\n__global__ void contract_inv_kernel(\n    // samples info\n    const uint32_t n_samples,\n    const float *samples, // (n_samples, 3)\n    // contraction\n    const float *roi,\n    const ContractionType type,\n    // outputs\n    float *out_samples)\n{\n    CUDA_GET_THREAD_ID(i, n_samples);\n\n    // locate\n    samples += i * 3;\n    out_samples += i * 3;\n\n    const float3 roi_min = make_float3(roi[0], roi[1], roi[2]);\n    const float3 roi_max = make_float3(roi[3], roi[4], roi[5]);\n    const float3 xyz_unit = make_float3(samples[0], samples[1], samples[2]);\n    float3 xyz = apply_contraction_inv(xyz_unit, roi_min, roi_max, type);\n\n    out_samples[0] = xyz.x;\n    out_samples[1] = xyz.y;\n    out_samples[2] = xyz.z;\n    return;\n}\n\ntorch::Tensor contract(\n    const torch::Tensor samples,\n    // contraction\n    const torch::Tensor roi,\n    const ContractionType type)\n{\n    DEVICE_GUARD(samples);\n    CHECK_INPUT(samples);\n\n    const int n_samples = samples.size(0);\n    const int threads = 256;\n    const int blocks = CUDA_N_BLOCKS_NEEDED(n_samples, threads);\n\n    torch::Tensor out_samples = torch::empty({n_samples, 3}, samples.options());\n\n    contract_kernel<<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(\n        n_samples,\n        samples.data_ptr<float>(),\n        // contraction\n        roi.data_ptr<float>(),\n        type,\n        // outputs\n        out_samples.data_ptr<float>());\n    return out_samples;\n}\n\ntorch::Tensor contract_inv(\n    const torch::Tensor samples,\n    // contraction\n    const torch::Tensor roi,\n    const ContractionType type)\n{\n    DEVICE_GUARD(samples);\n    CHECK_INPUT(samples);\n\n    const int n_samples = samples.size(0);\n    const int threads = 256;\n    const int blocks = CUDA_N_BLOCKS_NEEDED(n_samples, threads);\n\n    torch::Tensor out_samples = torch::empty({n_samples, 3}, samples.options());\n\n    contract_inv_kernel<<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(\n        n_samples,\n        samples.data_ptr<float>(),\n        // contraction\n        roi.data_ptr<float>(),\n        type,\n        // outputs\n        out_samples.data_ptr<float>());\n    return out_samples;\n}\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/include/helpers_contraction.h",
    "content": "/*\n * Copyright (c) 2022 Ruilong Li, UC Berkeley.\n */\n\n#pragma once\n\n#include \"helpers_math.h\"\n\nenum ContractionType\n{\n    AABB = 0,\n    UN_BOUNDED_TANH = 1,\n    UN_BOUNDED_SPHERE = 2,\n};\n\ninline __device__ __host__ float3 roi_to_unit(\n    const float3 xyz, const float3 roi_min, const float3 roi_max)\n{\n    // roi -> [0, 1]^3\n    return (xyz - roi_min) / (roi_max - roi_min);\n}\n\ninline __device__ __host__ float3 unit_to_roi(\n    const float3 xyz, const float3 roi_min, const float3 roi_max)\n{\n    // [0, 1]^3 -> roi\n    return xyz * (roi_max - roi_min) + roi_min;\n}\n\ninline __device__ __host__ float3 inf_to_unit_tanh(\n    const float3 xyz, float3 roi_min, const float3 roi_max)\n{\n    /**\n      [-inf, inf]^3 -> [0, 1]^3\n      roi -> cube of [0.25, 0.75]^3\n    **/\n    float3 xyz_unit = roi_to_unit(xyz, roi_min, roi_max); // roi -> [0, 1]^3\n    xyz_unit = xyz_unit - 0.5f;                           // roi -> [-0.5, 0.5]^3\n    return make_float3(tanhf(xyz_unit.x), tanhf(xyz_unit.y), tanhf(xyz_unit.z)) * 0.5f + 0.5f;\n}\n\ninline __device__ __host__ float3 unit_to_inf_tanh(\n    const float3 xyz, float3 roi_min, const float3 roi_max)\n{\n    /**\n      [0, 1]^3 -> [-inf, inf]^3\n      cube of [0.25, 0.75]^3 -> roi\n    **/\n    float3 xyz_unit = clamp(\n        make_float3(\n            atanhf(xyz.x * 2.0f - 1.0f),\n            atanhf(xyz.y * 2.0f - 1.0f),\n            atanhf(xyz.z * 2.0f - 1.0f)),\n        -1e10f,\n        1e10f);\n    xyz_unit = xyz_unit + 0.5f;\n    xyz_unit = unit_to_roi(xyz_unit, roi_min, roi_max);\n    return xyz_unit;\n}\n\ninline __device__ __host__ float3 inf_to_unit_sphere(\n    const float3 xyz, const float3 roi_min, const float3 roi_max)\n{\n    /** From MipNeRF360\n        [-inf, inf]^3 -> sphere of [0, 1]^3;\n        roi -> sphere of [0.25, 0.75]^3\n    **/\n    float3 xyz_unit = roi_to_unit(xyz, roi_min, roi_max); // roi -> [0, 1]^3\n    xyz_unit = xyz_unit * 2.0f - 1.0f;                    // roi -> [-1, 1]^3\n\n    float norm_sq = dot(xyz_unit, xyz_unit);\n    float norm = sqrt(norm_sq);\n    if (norm > 1.0f)\n    {\n        xyz_unit = (2.0f - 1.0f / norm) * (xyz_unit / norm);\n    }\n    xyz_unit = xyz_unit * 0.25f + 0.5f; // [-1, 1]^3 -> [0.25, 0.75]^3\n    return xyz_unit;\n}\n\ninline __device__ __host__ float3 unit_sphere_to_inf(\n    const float3 xyz, const float3 roi_min, const float3 roi_max)\n{\n    /** From MipNeRF360\n        sphere of [0, 1]^3 -> [-inf, inf]^3;\n        sphere of [0.25, 0.75]^3 -> roi\n    **/\n    float3 xyz_unit = (xyz - 0.5f) * 4.0f; // [0.25, 0.75]^3 -> [-1, 1]^3\n\n    float norm_sq = dot(xyz_unit, xyz_unit);\n    float norm = sqrt(norm_sq);\n    if (norm > 1.0f)\n    {\n        xyz_unit = xyz_unit / fmaxf((2.0f * norm - 1.0f * norm_sq), 1e-10f);\n    }\n    xyz_unit = xyz_unit * 0.5f + 0.5f;                  // [-1, 1]^3 -> [0, 1]^3\n    xyz_unit = unit_to_roi(xyz_unit, roi_min, roi_max); // [0, 1]^3 -> roi\n    return xyz_unit;\n}\n\ninline __device__ __host__ float3 apply_contraction(\n    const float3 xyz, const float3 roi_min, const float3 roi_max,\n    const ContractionType type)\n{\n    switch (type)\n    {\n    case AABB:\n        return roi_to_unit(xyz, roi_min, roi_max);\n    case UN_BOUNDED_TANH:\n        return inf_to_unit_tanh(xyz, roi_min, roi_max);\n    case UN_BOUNDED_SPHERE:\n        return inf_to_unit_sphere(xyz, roi_min, roi_max);\n    }\n}\n\ninline __device__ __host__ float3 apply_contraction_inv(\n    const float3 xyz, const float3 roi_min, const float3 roi_max,\n    const ContractionType type)\n{\n    switch (type)\n    {\n    case AABB:\n        return unit_to_roi(xyz, roi_min, roi_max);\n    case UN_BOUNDED_TANH:\n        return unit_to_inf_tanh(xyz, roi_min, roi_max);\n    case UN_BOUNDED_SPHERE:\n        return unit_sphere_to_inf(xyz, roi_min, roi_max);\n    }\n}\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/include/helpers_cuda.h",
    "content": "/*\n * Copyright (c) 2022 Ruilong Li, UC Berkeley.\n */\n\n#pragma once\n\n#include <torch/extension.h>\n#include <c10/cuda/CUDAGuard.h>\n#include <ATen/cuda/Exceptions.h>\n#include <cmath>\n// #include <ATen/cuda/cub_definitions.cuh>\n\n// cub support for scan by key is added to cub 1.15\n// in https://github.com/NVIDIA/cub/pull/376\n#if CUB_VERSION >= 101500\n#define CUB_SUPPORTS_SCAN_BY_KEY() 1\n#else\n#define CUB_SUPPORTS_SCAN_BY_KEY() 0\n#endif\n\n#define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x \" must be a CUDA tensor\")\n#define CHECK_CONTIGUOUS(x) \\\n    TORCH_CHECK(x.is_contiguous(), #x \" must be contiguous\")\n#define CHECK_INPUT(x) \\\n    CHECK_CUDA(x);     \\\n    CHECK_CONTIGUOUS(x)\n#define CUDA_GET_THREAD_ID(tid, Q)                         \\\n    const int tid = blockIdx.x * blockDim.x + threadIdx.x; \\\n    if (tid >= Q)                                          \\\n    return\n#define CUDA_GET_THREAD_ID_2D(tidx, tidy, P, Q)                         \\\n    const int tidx = blockIdx.x * blockDim.x + threadIdx.x; \\\n    const int tidy = blockIdx.y * blockDim.y + threadIdx.y; \\\n    if (tidx >= P || tidy >= Q)                                          \\\n    return\n#define CUDA_N_BLOCKS_NEEDED(Q, CUDA_N_THREADS) ((Q - 1) / CUDA_N_THREADS + 1)\n#define DEVICE_GUARD(_ten) \\\n    const at::cuda::OptionalCUDAGuard device_guard(device_of(_ten));\n\n// https://github.com/pytorch/pytorch/blob/233305a852e1cd7f319b15b5137074c9eac455f6/aten/src/ATen/cuda/cub.cuh#L38-L46\n#define CUB_WRAPPER(func, ...) do {                                       \\\n  size_t temp_storage_bytes = 0;                                          \\\n  func(nullptr, temp_storage_bytes, __VA_ARGS__);                         \\\n  auto& caching_allocator = *::c10::cuda::CUDACachingAllocator::get();    \\\n  auto temp_storage = caching_allocator.allocate(temp_storage_bytes);     \\\n  func(temp_storage.get(), temp_storage_bytes, __VA_ARGS__);              \\\n  AT_CUDA_CHECK(cudaGetLastError());                                      \\\n} while (false)\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/include/helpers_math.h",
    "content": "/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.\n * Modified by Ruilong Li, 2022\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions\n * are met:\n *  * Redistributions of source code must retain the above copyright\n *    notice, this list of conditions and the following disclaimer.\n *  * Redistributions in binary form must reproduce the above copyright\n *    notice, this list of conditions and the following disclaimer in the\n *    documentation and/or other materials provided with the distribution.\n *  * Neither the name of NVIDIA CORPORATION nor the names of its\n *    contributors may be used to endorse or promote products derived\n *    from this software without specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY\n * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR\n * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR\n * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\n * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\n * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\n * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY\n * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n */\n\n/*\n *  This file implements common mathematical operations on vector types\n *  (float3, float4 etc.) since these are not provided as standard by CUDA.\n *\n *  The syntax is modeled on the Cg standard library.\n *\n *  This is part of the Helper library includes\n *\n *    Thanks to Linh Hah for additions and fixes.\n */\n\n#ifndef HELPER_MATH_H\n#define HELPER_MATH_H\n\n#include \"cuda_runtime.h\"\n\ntypedef unsigned int uint;\ntypedef unsigned short ushort;\n\n#ifndef EXIT_WAIVED\n#define EXIT_WAIVED 2\n#endif\n\n#ifndef __CUDACC__\n#include <math.h>\n\n////////////////////////////////////////////////////////////////////////////////\n// host implementations of CUDA functions\n////////////////////////////////////////////////////////////////////////////////\n\ninline float fminf(float a, float b)\n{\n    return a < b ? a : b;\n}\n\ninline float fmaxf(float a, float b)\n{\n    return a > b ? a : b;\n}\n\ninline int max(int a, int b)\n{\n    return a > b ? a : b;\n}\n\ninline int min(int a, int b)\n{\n    return a < b ? a : b;\n}\n\ninline float rsqrtf(float x)\n{\n    return 1.0f / sqrtf(x);\n}\n#endif\n\n////////////////////////////////////////////////////////////////////////////////\n// constructors\n////////////////////////////////////////////////////////////////////////////////\n\ninline __host__ __device__ float2 make_float2(float s)\n{\n    return make_float2(s, s);\n}\ninline __host__ __device__ float2 make_float2(float3 a)\n{\n    return make_float2(a.x, a.y);\n}\ninline __host__ __device__ float2 make_float2(int2 a)\n{\n    return make_float2(float(a.x), float(a.y));\n}\ninline __host__ __device__ float2 make_float2(uint2 a)\n{\n    return make_float2(float(a.x), float(a.y));\n}\n\ninline __host__ __device__ int2 make_int2(int s)\n{\n    return make_int2(s, s);\n}\ninline __host__ __device__ int2 make_int2(int3 a)\n{\n    return make_int2(a.x, a.y);\n}\ninline __host__ __device__ int2 make_int2(uint2 a)\n{\n    return make_int2(int(a.x), int(a.y));\n}\ninline __host__ __device__ int2 make_int2(float2 a)\n{\n    return make_int2(int(a.x), int(a.y));\n}\n\ninline __host__ __device__ uint2 make_uint2(uint s)\n{\n    return make_uint2(s, s);\n}\ninline __host__ __device__ uint2 make_uint2(uint3 a)\n{\n    return make_uint2(a.x, a.y);\n}\ninline __host__ __device__ uint2 make_uint2(int2 a)\n{\n    return make_uint2(uint(a.x), uint(a.y));\n}\n\ninline __host__ __device__ float3 make_float3(float s)\n{\n    return make_float3(s, s, s);\n}\ninline __host__ __device__ float3 make_float3(float2 a)\n{\n    return make_float3(a.x, a.y, 0.0f);\n}\ninline __host__ __device__ float3 make_float3(float2 a, float s)\n{\n    return make_float3(a.x, a.y, s);\n}\ninline __host__ __device__ float3 make_float3(float4 a)\n{\n    return make_float3(a.x, a.y, a.z);\n}\ninline __host__ __device__ float3 make_float3(int3 a)\n{\n    return make_float3(float(a.x), float(a.y), float(a.z));\n}\ninline __host__ __device__ float3 make_float3(uint3 a)\n{\n    return make_float3(float(a.x), float(a.y), float(a.z));\n}\n\ninline __host__ __device__ int3 make_int3(int s)\n{\n    return make_int3(s, s, s);\n}\ninline __host__ __device__ int3 make_int3(int2 a)\n{\n    return make_int3(a.x, a.y, 0);\n}\ninline __host__ __device__ int3 make_int3(int2 a, int s)\n{\n    return make_int3(a.x, a.y, s);\n}\ninline __host__ __device__ int3 make_int3(uint3 a)\n{\n    return make_int3(int(a.x), int(a.y), int(a.z));\n}\ninline __host__ __device__ int3 make_int3(float3 a)\n{\n    return make_int3(int(a.x), int(a.y), int(a.z));\n}\n\ninline __host__ __device__ uint3 make_uint3(uint s)\n{\n    return make_uint3(s, s, s);\n}\ninline __host__ __device__ uint3 make_uint3(uint2 a)\n{\n    return make_uint3(a.x, a.y, 0);\n}\ninline __host__ __device__ uint3 make_uint3(uint2 a, uint s)\n{\n    return make_uint3(a.x, a.y, s);\n}\ninline __host__ __device__ uint3 make_uint3(uint4 a)\n{\n    return make_uint3(a.x, a.y, a.z);\n}\ninline __host__ __device__ uint3 make_uint3(int3 a)\n{\n    return make_uint3(uint(a.x), uint(a.y), uint(a.z));\n}\n\ninline __host__ __device__ float4 make_float4(float s)\n{\n    return make_float4(s, s, s, s);\n}\ninline __host__ __device__ float4 make_float4(float3 a)\n{\n    return make_float4(a.x, a.y, a.z, 0.0f);\n}\ninline __host__ __device__ float4 make_float4(float3 a, float w)\n{\n    return make_float4(a.x, a.y, a.z, w);\n}\ninline __host__ __device__ float4 make_float4(int4 a)\n{\n    return make_float4(float(a.x), float(a.y), float(a.z), float(a.w));\n}\ninline __host__ __device__ float4 make_float4(uint4 a)\n{\n    return make_float4(float(a.x), float(a.y), float(a.z), float(a.w));\n}\n\ninline __host__ __device__ int4 make_int4(int s)\n{\n    return make_int4(s, s, s, s);\n}\ninline __host__ __device__ int4 make_int4(int3 a)\n{\n    return make_int4(a.x, a.y, a.z, 0);\n}\ninline __host__ __device__ int4 make_int4(int3 a, int w)\n{\n    return make_int4(a.x, a.y, a.z, w);\n}\ninline __host__ __device__ int4 make_int4(uint4 a)\n{\n    return make_int4(int(a.x), int(a.y), int(a.z), int(a.w));\n}\ninline __host__ __device__ int4 make_int4(float4 a)\n{\n    return make_int4(int(a.x), int(a.y), int(a.z), int(a.w));\n}\n\ninline __host__ __device__ uint4 make_uint4(uint s)\n{\n    return make_uint4(s, s, s, s);\n}\ninline __host__ __device__ uint4 make_uint4(uint3 a)\n{\n    return make_uint4(a.x, a.y, a.z, 0);\n}\ninline __host__ __device__ uint4 make_uint4(uint3 a, uint w)\n{\n    return make_uint4(a.x, a.y, a.z, w);\n}\ninline __host__ __device__ uint4 make_uint4(int4 a)\n{\n    return make_uint4(uint(a.x), uint(a.y), uint(a.z), uint(a.w));\n}\n\n////////////////////////////////////////////////////////////////////////////////\n// negate\n////////////////////////////////////////////////////////////////////////////////\n\ninline __host__ __device__ float2 operator-(float2 &a)\n{\n    return make_float2(-a.x, -a.y);\n}\ninline __host__ __device__ int2 operator-(int2 &a)\n{\n    return make_int2(-a.x, -a.y);\n}\ninline __host__ __device__ float3 operator-(float3 &a)\n{\n    return make_float3(-a.x, -a.y, -a.z);\n}\ninline __host__ __device__ int3 operator-(int3 &a)\n{\n    return make_int3(-a.x, -a.y, -a.z);\n}\ninline __host__ __device__ float4 operator-(float4 &a)\n{\n    return make_float4(-a.x, -a.y, -a.z, -a.w);\n}\ninline __host__ __device__ int4 operator-(int4 &a)\n{\n    return make_int4(-a.x, -a.y, -a.z, -a.w);\n}\n\n////////////////////////////////////////////////////////////////////////////////\n// addition\n////////////////////////////////////////////////////////////////////////////////\n\ninline __host__ __device__ float2 operator+(float2 a, float2 b)\n{\n    return make_float2(a.x + b.x, a.y + b.y);\n}\ninline __host__ __device__ void operator+=(float2 &a, float2 b)\n{\n    a.x += b.x;\n    a.y += b.y;\n}\ninline __host__ __device__ float2 operator+(float2 a, float b)\n{\n    return make_float2(a.x + b, a.y + b);\n}\ninline __host__ __device__ float2 operator+(float b, float2 a)\n{\n    return make_float2(a.x + b, a.y + b);\n}\ninline __host__ __device__ void operator+=(float2 &a, float b)\n{\n    a.x += b;\n    a.y += b;\n}\n\ninline __host__ __device__ int2 operator+(int2 a, int2 b)\n{\n    return make_int2(a.x + b.x, a.y + b.y);\n}\ninline __host__ __device__ void operator+=(int2 &a, int2 b)\n{\n    a.x += b.x;\n    a.y += b.y;\n}\ninline __host__ __device__ int2 operator+(int2 a, int b)\n{\n    return make_int2(a.x + b, a.y + b);\n}\ninline __host__ __device__ int2 operator+(int b, int2 a)\n{\n    return make_int2(a.x + b, a.y + b);\n}\ninline __host__ __device__ void operator+=(int2 &a, int b)\n{\n    a.x += b;\n    a.y += b;\n}\n\ninline __host__ __device__ uint2 operator+(uint2 a, uint2 b)\n{\n    return make_uint2(a.x + b.x, a.y + b.y);\n}\ninline __host__ __device__ void operator+=(uint2 &a, uint2 b)\n{\n    a.x += b.x;\n    a.y += b.y;\n}\ninline __host__ __device__ uint2 operator+(uint2 a, uint b)\n{\n    return make_uint2(a.x + b, a.y + b);\n}\ninline __host__ __device__ uint2 operator+(uint b, uint2 a)\n{\n    return make_uint2(a.x + b, a.y + b);\n}\ninline __host__ __device__ void operator+=(uint2 &a, uint b)\n{\n    a.x += b;\n    a.y += b;\n}\n\ninline __host__ __device__ float3 operator+(float3 a, float3 b)\n{\n    return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);\n}\ninline __host__ __device__ void operator+=(float3 &a, float3 b)\n{\n    a.x += b.x;\n    a.y += b.y;\n    a.z += b.z;\n}\ninline __host__ __device__ float3 operator+(float3 a, float b)\n{\n    return make_float3(a.x + b, a.y + b, a.z + b);\n}\ninline __host__ __device__ void operator+=(float3 &a, float b)\n{\n    a.x += b;\n    a.y += b;\n    a.z += b;\n}\n\ninline __host__ __device__ int3 operator+(int3 a, int3 b)\n{\n    return make_int3(a.x + b.x, a.y + b.y, a.z + b.z);\n}\ninline __host__ __device__ void operator+=(int3 &a, int3 b)\n{\n    a.x += b.x;\n    a.y += b.y;\n    a.z += b.z;\n}\ninline __host__ __device__ int3 operator+(int3 a, int b)\n{\n    return make_int3(a.x + b, a.y + b, a.z + b);\n}\ninline __host__ __device__ void operator+=(int3 &a, int b)\n{\n    a.x += b;\n    a.y += b;\n    a.z += b;\n}\n\ninline __host__ __device__ uint3 operator+(uint3 a, uint3 b)\n{\n    return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z);\n}\ninline __host__ __device__ void operator+=(uint3 &a, uint3 b)\n{\n    a.x += b.x;\n    a.y += b.y;\n    a.z += b.z;\n}\ninline __host__ __device__ uint3 operator+(uint3 a, uint b)\n{\n    return make_uint3(a.x + b, a.y + b, a.z + b);\n}\ninline __host__ __device__ void operator+=(uint3 &a, uint b)\n{\n    a.x += b;\n    a.y += b;\n    a.z += b;\n}\n\ninline __host__ __device__ int3 operator+(int b, int3 a)\n{\n    return make_int3(a.x + b, a.y + b, a.z + b);\n}\ninline __host__ __device__ uint3 operator+(uint b, uint3 a)\n{\n    return make_uint3(a.x + b, a.y + b, a.z + b);\n}\ninline __host__ __device__ float3 operator+(float b, float3 a)\n{\n    return make_float3(a.x + b, a.y + b, a.z + b);\n}\n\ninline __host__ __device__ float4 operator+(float4 a, float4 b)\n{\n    return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);\n}\ninline __host__ __device__ void operator+=(float4 &a, float4 b)\n{\n    a.x += b.x;\n    a.y += b.y;\n    a.z += b.z;\n    a.w += b.w;\n}\ninline __host__ __device__ float4 operator+(float4 a, float b)\n{\n    return make_float4(a.x + b, a.y + b, a.z + b, a.w + b);\n}\ninline __host__ __device__ float4 operator+(float b, float4 a)\n{\n    return make_float4(a.x + b, a.y + b, a.z + b, a.w + b);\n}\ninline __host__ __device__ void operator+=(float4 &a, float b)\n{\n    a.x += b;\n    a.y += b;\n    a.z += b;\n    a.w += b;\n}\n\ninline __host__ __device__ int4 operator+(int4 a, int4 b)\n{\n    return make_int4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);\n}\ninline __host__ __device__ void operator+=(int4 &a, int4 b)\n{\n    a.x += b.x;\n    a.y += b.y;\n    a.z += b.z;\n    a.w += b.w;\n}\ninline __host__ __device__ int4 operator+(int4 a, int b)\n{\n    return make_int4(a.x + b, a.y + b, a.z + b, a.w + b);\n}\ninline __host__ __device__ int4 operator+(int b, int4 a)\n{\n    return make_int4(a.x + b, a.y + b, a.z + b, a.w + b);\n}\ninline __host__ __device__ void operator+=(int4 &a, int b)\n{\n    a.x += b;\n    a.y += b;\n    a.z += b;\n    a.w += b;\n}\n\ninline __host__ __device__ uint4 operator+(uint4 a, uint4 b)\n{\n    return make_uint4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);\n}\ninline __host__ __device__ void operator+=(uint4 &a, uint4 b)\n{\n    a.x += b.x;\n    a.y += b.y;\n    a.z += b.z;\n    a.w += b.w;\n}\ninline __host__ __device__ uint4 operator+(uint4 a, uint b)\n{\n    return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b);\n}\ninline __host__ __device__ uint4 operator+(uint b, uint4 a)\n{\n    return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b);\n}\ninline __host__ __device__ void operator+=(uint4 &a, uint b)\n{\n    a.x += b;\n    a.y += b;\n    a.z += b;\n    a.w += b;\n}\n\n////////////////////////////////////////////////////////////////////////////////\n// subtract\n////////////////////////////////////////////////////////////////////////////////\n\ninline __host__ __device__ float2 operator-(float2 a, float2 b)\n{\n    return make_float2(a.x - b.x, a.y - b.y);\n}\ninline __host__ __device__ void operator-=(float2 &a, float2 b)\n{\n    a.x -= b.x;\n    a.y -= b.y;\n}\ninline __host__ __device__ float2 operator-(float2 a, float b)\n{\n    return make_float2(a.x - b, a.y - b);\n}\ninline __host__ __device__ float2 operator-(float b, float2 a)\n{\n    return make_float2(b - a.x, b - a.y);\n}\ninline __host__ __device__ void operator-=(float2 &a, float b)\n{\n    a.x -= b;\n    a.y -= b;\n}\n\ninline __host__ __device__ int2 operator-(int2 a, int2 b)\n{\n    return make_int2(a.x - b.x, a.y - b.y);\n}\ninline __host__ __device__ void operator-=(int2 &a, int2 b)\n{\n    a.x -= b.x;\n    a.y -= b.y;\n}\ninline __host__ __device__ int2 operator-(int2 a, int b)\n{\n    return make_int2(a.x - b, a.y - b);\n}\ninline __host__ __device__ int2 operator-(int b, int2 a)\n{\n    return make_int2(b - a.x, b - a.y);\n}\ninline __host__ __device__ void operator-=(int2 &a, int b)\n{\n    a.x -= b;\n    a.y -= b;\n}\n\ninline __host__ __device__ uint2 operator-(uint2 a, uint2 b)\n{\n    return make_uint2(a.x - b.x, a.y - b.y);\n}\ninline __host__ __device__ void operator-=(uint2 &a, uint2 b)\n{\n    a.x -= b.x;\n    a.y -= b.y;\n}\ninline __host__ __device__ uint2 operator-(uint2 a, uint b)\n{\n    return make_uint2(a.x - b, a.y - b);\n}\ninline __host__ __device__ uint2 operator-(uint b, uint2 a)\n{\n    return make_uint2(b - a.x, b - a.y);\n}\ninline __host__ __device__ void operator-=(uint2 &a, uint b)\n{\n    a.x -= b;\n    a.y -= b;\n}\n\ninline __host__ __device__ float3 operator-(float3 a, float3 b)\n{\n    return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);\n}\ninline __host__ __device__ void operator-=(float3 &a, float3 b)\n{\n    a.x -= b.x;\n    a.y -= b.y;\n    a.z -= b.z;\n}\ninline __host__ __device__ float3 operator-(float3 a, float b)\n{\n    return make_float3(a.x - b, a.y - b, a.z - b);\n}\ninline __host__ __device__ float3 operator-(float b, float3 a)\n{\n    return make_float3(b - a.x, b - a.y, b - a.z);\n}\ninline __host__ __device__ void operator-=(float3 &a, float b)\n{\n    a.x -= b;\n    a.y -= b;\n    a.z -= b;\n}\n\ninline __host__ __device__ int3 operator-(int3 a, int3 b)\n{\n    return make_int3(a.x - b.x, a.y - b.y, a.z - b.z);\n}\ninline __host__ __device__ void operator-=(int3 &a, int3 b)\n{\n    a.x -= b.x;\n    a.y -= b.y;\n    a.z -= b.z;\n}\ninline __host__ __device__ int3 operator-(int3 a, int b)\n{\n    return make_int3(a.x - b, a.y - b, a.z - b);\n}\ninline __host__ __device__ int3 operator-(int b, int3 a)\n{\n    return make_int3(b - a.x, b - a.y, b - a.z);\n}\ninline __host__ __device__ void operator-=(int3 &a, int b)\n{\n    a.x -= b;\n    a.y -= b;\n    a.z -= b;\n}\n\ninline __host__ __device__ uint3 operator-(uint3 a, uint3 b)\n{\n    return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z);\n}\ninline __host__ __device__ void operator-=(uint3 &a, uint3 b)\n{\n    a.x -= b.x;\n    a.y -= b.y;\n    a.z -= b.z;\n}\ninline __host__ __device__ uint3 operator-(uint3 a, uint b)\n{\n    return make_uint3(a.x - b, a.y - b, a.z - b);\n}\ninline __host__ __device__ uint3 operator-(uint b, uint3 a)\n{\n    return make_uint3(b - a.x, b - a.y, b - a.z);\n}\ninline __host__ __device__ void operator-=(uint3 &a, uint b)\n{\n    a.x -= b;\n    a.y -= b;\n    a.z -= b;\n}\n\ninline __host__ __device__ float4 operator-(float4 a, float4 b)\n{\n    return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);\n}\ninline __host__ __device__ void operator-=(float4 &a, float4 b)\n{\n    a.x -= b.x;\n    a.y -= b.y;\n    a.z -= b.z;\n    a.w -= b.w;\n}\ninline __host__ __device__ float4 operator-(float4 a, float b)\n{\n    return make_float4(a.x - b, a.y - b, a.z - b, a.w - b);\n}\ninline __host__ __device__ void operator-=(float4 &a, float b)\n{\n    a.x -= b;\n    a.y -= b;\n    a.z -= b;\n    a.w -= b;\n}\n\ninline __host__ __device__ int4 operator-(int4 a, int4 b)\n{\n    return make_int4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);\n}\ninline __host__ __device__ void operator-=(int4 &a, int4 b)\n{\n    a.x -= b.x;\n    a.y -= b.y;\n    a.z -= b.z;\n    a.w -= b.w;\n}\ninline __host__ __device__ int4 operator-(int4 a, int b)\n{\n    return make_int4(a.x - b, a.y - b, a.z - b, a.w - b);\n}\ninline __host__ __device__ int4 operator-(int b, int4 a)\n{\n    return make_int4(b - a.x, b - a.y, b - a.z, b - a.w);\n}\ninline __host__ __device__ void operator-=(int4 &a, int b)\n{\n    a.x -= b;\n    a.y -= b;\n    a.z -= b;\n    a.w -= b;\n}\n\ninline __host__ __device__ uint4 operator-(uint4 a, uint4 b)\n{\n    return make_uint4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);\n}\ninline __host__ __device__ void operator-=(uint4 &a, uint4 b)\n{\n    a.x -= b.x;\n    a.y -= b.y;\n    a.z -= b.z;\n    a.w -= b.w;\n}\ninline __host__ __device__ uint4 operator-(uint4 a, uint b)\n{\n    return make_uint4(a.x - b, a.y - b, a.z - b, a.w - b);\n}\ninline __host__ __device__ uint4 operator-(uint b, uint4 a)\n{\n    return make_uint4(b - a.x, b - a.y, b - a.z, b - a.w);\n}\ninline __host__ __device__ void operator-=(uint4 &a, uint b)\n{\n    a.x -= b;\n    a.y -= b;\n    a.z -= b;\n    a.w -= b;\n}\n\n////////////////////////////////////////////////////////////////////////////////\n// multiply\n////////////////////////////////////////////////////////////////////////////////\n\ninline __host__ __device__ float2 operator*(float2 a, float2 b)\n{\n    return make_float2(a.x * b.x, a.y * b.y);\n}\ninline __host__ __device__ void operator*=(float2 &a, float2 b)\n{\n    a.x *= b.x;\n    a.y *= b.y;\n}\ninline __host__ __device__ float2 operator*(float2 a, float b)\n{\n    return make_float2(a.x * b, a.y * b);\n}\ninline __host__ __device__ float2 operator*(float b, float2 a)\n{\n    return make_float2(b * a.x, b * a.y);\n}\ninline __host__ __device__ void operator*=(float2 &a, float b)\n{\n    a.x *= b;\n    a.y *= b;\n}\n\ninline __host__ __device__ int2 operator*(int2 a, int2 b)\n{\n    return make_int2(a.x * b.x, a.y * b.y);\n}\ninline __host__ __device__ void operator*=(int2 &a, int2 b)\n{\n    a.x *= b.x;\n    a.y *= b.y;\n}\ninline __host__ __device__ int2 operator*(int2 a, int b)\n{\n    return make_int2(a.x * b, a.y * b);\n}\ninline __host__ __device__ int2 operator*(int b, int2 a)\n{\n    return make_int2(b * a.x, b * a.y);\n}\ninline __host__ __device__ void operator*=(int2 &a, int b)\n{\n    a.x *= b;\n    a.y *= b;\n}\n\ninline __host__ __device__ uint2 operator*(uint2 a, uint2 b)\n{\n    return make_uint2(a.x * b.x, a.y * b.y);\n}\ninline __host__ __device__ void operator*=(uint2 &a, uint2 b)\n{\n    a.x *= b.x;\n    a.y *= b.y;\n}\ninline __host__ __device__ uint2 operator*(uint2 a, uint b)\n{\n    return make_uint2(a.x * b, a.y * b);\n}\ninline __host__ __device__ uint2 operator*(uint b, uint2 a)\n{\n    return make_uint2(b * a.x, b * a.y);\n}\ninline __host__ __device__ void operator*=(uint2 &a, uint b)\n{\n    a.x *= b;\n    a.y *= b;\n}\n\ninline __host__ __device__ float3 operator*(float3 a, float3 b)\n{\n    return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);\n}\ninline __host__ __device__ void operator*=(float3 &a, float3 b)\n{\n    a.x *= b.x;\n    a.y *= b.y;\n    a.z *= b.z;\n}\ninline __host__ __device__ float3 operator*(float3 a, float b)\n{\n    return make_float3(a.x * b, a.y * b, a.z * b);\n}\ninline __host__ __device__ float3 operator*(float b, float3 a)\n{\n    return make_float3(b * a.x, b * a.y, b * a.z);\n}\ninline __host__ __device__ void operator*=(float3 &a, float b)\n{\n    a.x *= b;\n    a.y *= b;\n    a.z *= b;\n}\n\ninline __host__ __device__ int3 operator*(int3 a, int3 b)\n{\n    return make_int3(a.x * b.x, a.y * b.y, a.z * b.z);\n}\ninline __host__ __device__ void operator*=(int3 &a, int3 b)\n{\n    a.x *= b.x;\n    a.y *= b.y;\n    a.z *= b.z;\n}\ninline __host__ __device__ int3 operator*(int3 a, int b)\n{\n    return make_int3(a.x * b, a.y * b, a.z * b);\n}\ninline __host__ __device__ int3 operator*(int b, int3 a)\n{\n    return make_int3(b * a.x, b * a.y, b * a.z);\n}\ninline __host__ __device__ void operator*=(int3 &a, int b)\n{\n    a.x *= b;\n    a.y *= b;\n    a.z *= b;\n}\n\ninline __host__ __device__ uint3 operator*(uint3 a, uint3 b)\n{\n    return make_uint3(a.x * b.x, a.y * b.y, a.z * b.z);\n}\ninline __host__ __device__ void operator*=(uint3 &a, uint3 b)\n{\n    a.x *= b.x;\n    a.y *= b.y;\n    a.z *= b.z;\n}\ninline __host__ __device__ uint3 operator*(uint3 a, uint b)\n{\n    return make_uint3(a.x * b, a.y * b, a.z * b);\n}\ninline __host__ __device__ uint3 operator*(uint b, uint3 a)\n{\n    return make_uint3(b * a.x, b * a.y, b * a.z);\n}\ninline __host__ __device__ void operator*=(uint3 &a, uint b)\n{\n    a.x *= b;\n    a.y *= b;\n    a.z *= b;\n}\n\ninline __host__ __device__ float4 operator*(float4 a, float4 b)\n{\n    return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);\n}\ninline __host__ __device__ void operator*=(float4 &a, float4 b)\n{\n    a.x *= b.x;\n    a.y *= b.y;\n    a.z *= b.z;\n    a.w *= b.w;\n}\ninline __host__ __device__ float4 operator*(float4 a, float b)\n{\n    return make_float4(a.x * b, a.y * b, a.z * b, a.w * b);\n}\ninline __host__ __device__ float4 operator*(float b, float4 a)\n{\n    return make_float4(b * a.x, b * a.y, b * a.z, b * a.w);\n}\ninline __host__ __device__ void operator*=(float4 &a, float b)\n{\n    a.x *= b;\n    a.y *= b;\n    a.z *= b;\n    a.w *= b;\n}\n\ninline __host__ __device__ int4 operator*(int4 a, int4 b)\n{\n    return make_int4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);\n}\ninline __host__ __device__ void operator*=(int4 &a, int4 b)\n{\n    a.x *= b.x;\n    a.y *= b.y;\n    a.z *= b.z;\n    a.w *= b.w;\n}\ninline __host__ __device__ int4 operator*(int4 a, int b)\n{\n    return make_int4(a.x * b, a.y * b, a.z * b, a.w * b);\n}\ninline __host__ __device__ int4 operator*(int b, int4 a)\n{\n    return make_int4(b * a.x, b * a.y, b * a.z, b * a.w);\n}\ninline __host__ __device__ void operator*=(int4 &a, int b)\n{\n    a.x *= b;\n    a.y *= b;\n    a.z *= b;\n    a.w *= b;\n}\n\ninline __host__ __device__ uint4 operator*(uint4 a, uint4 b)\n{\n    return make_uint4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);\n}\ninline __host__ __device__ void operator*=(uint4 &a, uint4 b)\n{\n    a.x *= b.x;\n    a.y *= b.y;\n    a.z *= b.z;\n    a.w *= b.w;\n}\ninline __host__ __device__ uint4 operator*(uint4 a, uint b)\n{\n    return make_uint4(a.x * b, a.y * b, a.z * b, a.w * b);\n}\ninline __host__ __device__ uint4 operator*(uint b, uint4 a)\n{\n    return make_uint4(b * a.x, b * a.y, b * a.z, b * a.w);\n}\ninline __host__ __device__ void operator*=(uint4 &a, uint b)\n{\n    a.x *= b;\n    a.y *= b;\n    a.z *= b;\n    a.w *= b;\n}\n\n////////////////////////////////////////////////////////////////////////////////\n// divide\n////////////////////////////////////////////////////////////////////////////////\n\ninline __host__ __device__ float2 operator/(float2 a, float2 b)\n{\n    return make_float2(a.x / b.x, a.y / b.y);\n}\ninline __host__ __device__ void operator/=(float2 &a, float2 b)\n{\n    a.x /= b.x;\n    a.y /= b.y;\n}\ninline __host__ __device__ float2 operator/(float2 a, float b)\n{\n    return make_float2(a.x / b, a.y / b);\n}\ninline __host__ __device__ void operator/=(float2 &a, float b)\n{\n    a.x /= b;\n    a.y /= b;\n}\ninline __host__ __device__ float2 operator/(float b, float2 a)\n{\n    return make_float2(b / a.x, b / a.y);\n}\n\ninline __host__ __device__ float3 operator/(float3 a, float3 b)\n{\n    return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);\n}\ninline __host__ __device__ void operator/=(float3 &a, float3 b)\n{\n    a.x /= b.x;\n    a.y /= b.y;\n    a.z /= b.z;\n}\ninline __host__ __device__ float3 operator/(float3 a, float b)\n{\n    return make_float3(a.x / b, a.y / b, a.z / b);\n}\ninline __host__ __device__ void operator/=(float3 &a, float b)\n{\n    a.x /= b;\n    a.y /= b;\n    a.z /= b;\n}\ninline __host__ __device__ float3 operator/(float b, float3 a)\n{\n    return make_float3(b / a.x, b / a.y, b / a.z);\n}\n\ninline __host__ __device__ float4 operator/(float4 a, float4 b)\n{\n    return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);\n}\ninline __host__ __device__ void operator/=(float4 &a, float4 b)\n{\n    a.x /= b.x;\n    a.y /= b.y;\n    a.z /= b.z;\n    a.w /= b.w;\n}\ninline __host__ __device__ float4 operator/(float4 a, float b)\n{\n    return make_float4(a.x / b, a.y / b, a.z / b, a.w / b);\n}\ninline __host__ __device__ void operator/=(float4 &a, float b)\n{\n    a.x /= b;\n    a.y /= b;\n    a.z /= b;\n    a.w /= b;\n}\ninline __host__ __device__ float4 operator/(float b, float4 a)\n{\n    return make_float4(b / a.x, b / a.y, b / a.z, b / a.w);\n}\n\n////////////////////////////////////////////////////////////////////////////////\n// min\n////////////////////////////////////////////////////////////////////////////////\n\ninline __host__ __device__ float2 fminf(float2 a, float2 b)\n{\n    return make_float2(fminf(a.x, b.x), fminf(a.y, b.y));\n}\ninline __host__ __device__ float3 fminf(float3 a, float3 b)\n{\n    return make_float3(fminf(a.x, b.x), fminf(a.y, b.y), fminf(a.z, b.z));\n}\ninline __host__ __device__ float4 fminf(float4 a, float4 b)\n{\n    return make_float4(fminf(a.x, b.x), fminf(a.y, b.y), fminf(a.z, b.z), fminf(a.w, b.w));\n}\n\ninline __host__ __device__ int2 min(int2 a, int2 b)\n{\n    return make_int2(min(a.x, b.x), min(a.y, b.y));\n}\ninline __host__ __device__ int3 min(int3 a, int3 b)\n{\n    return make_int3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z));\n}\ninline __host__ __device__ int4 min(int4 a, int4 b)\n{\n    return make_int4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w));\n}\n\ninline __host__ __device__ uint2 min(uint2 a, uint2 b)\n{\n    return make_uint2(min(a.x, b.x), min(a.y, b.y));\n}\ninline __host__ __device__ uint3 min(uint3 a, uint3 b)\n{\n    return make_uint3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z));\n}\ninline __host__ __device__ uint4 min(uint4 a, uint4 b)\n{\n    return make_uint4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w));\n}\n\n////////////////////////////////////////////////////////////////////////////////\n// max\n////////////////////////////////////////////////////////////////////////////////\n\ninline __host__ __device__ float2 fmaxf(float2 a, float2 b)\n{\n    return make_float2(fmaxf(a.x, b.x), fmaxf(a.y, b.y));\n}\ninline __host__ __device__ float3 fmaxf(float3 a, float3 b)\n{\n    return make_float3(fmaxf(a.x, b.x), fmaxf(a.y, b.y), fmaxf(a.z, b.z));\n}\ninline __host__ __device__ float4 fmaxf(float4 a, float4 b)\n{\n    return make_float4(fmaxf(a.x, b.x), fmaxf(a.y, b.y), fmaxf(a.z, b.z), fmaxf(a.w, b.w));\n}\n\ninline __host__ __device__ int2 max(int2 a, int2 b)\n{\n    return make_int2(max(a.x, b.x), max(a.y, b.y));\n}\ninline __host__ __device__ int3 max(int3 a, int3 b)\n{\n    return make_int3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z));\n}\ninline __host__ __device__ int4 max(int4 a, int4 b)\n{\n    return make_int4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w));\n}\n\ninline __host__ __device__ uint2 max(uint2 a, uint2 b)\n{\n    return make_uint2(max(a.x, b.x), max(a.y, b.y));\n}\ninline __host__ __device__ uint3 max(uint3 a, uint3 b)\n{\n    return make_uint3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z));\n}\ninline __host__ __device__ uint4 max(uint4 a, uint4 b)\n{\n    return make_uint4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w));\n}\n\n////////////////////////////////////////////////////////////////////////////////\n// lerp\n// - linear interpolation between a and b, based on value t in [0, 1] range\n////////////////////////////////////////////////////////////////////////////////\n\ninline __device__ __host__ float lerp(float a, float b, float t)\n{\n    return a + t * (b - a);\n}\ninline __device__ __host__ float2 lerp(float2 a, float2 b, float t)\n{\n    return a + t * (b - a);\n}\ninline __device__ __host__ float3 lerp(float3 a, float3 b, float t)\n{\n    return a + t * (b - a);\n}\ninline __device__ __host__ float4 lerp(float4 a, float4 b, float t)\n{\n    return a + t * (b - a);\n}\n\n////////////////////////////////////////////////////////////////////////////////\n// clamp\n// - clamp the value v to be in the range [a, b]\n////////////////////////////////////////////////////////////////////////////////\n\ninline __device__ __host__ float clamp(float f, float a, float b)\n{\n    return fmaxf(a, fminf(f, b));\n}\ninline __device__ __host__ int clamp(int f, int a, int b)\n{\n    return max(a, min(f, b));\n}\ninline __device__ __host__ uint clamp(uint f, uint a, uint b)\n{\n    return max(a, min(f, b));\n}\n\ninline __device__ __host__ float2 clamp(float2 v, float a, float b)\n{\n    return make_float2(clamp(v.x, a, b), clamp(v.y, a, b));\n}\ninline __device__ __host__ float2 clamp(float2 v, float2 a, float2 b)\n{\n    return make_float2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));\n}\ninline __device__ __host__ float3 clamp(float3 v, float a, float b)\n{\n    return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));\n}\ninline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b)\n{\n    return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));\n}\ninline __device__ __host__ float4 clamp(float4 v, float a, float b)\n{\n    return make_float4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));\n}\ninline __device__ __host__ float4 clamp(float4 v, float4 a, float4 b)\n{\n    return make_float4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));\n}\n\ninline __device__ __host__ int2 clamp(int2 v, int a, int b)\n{\n    return make_int2(clamp(v.x, a, b), clamp(v.y, a, b));\n}\ninline __device__ __host__ int2 clamp(int2 v, int2 a, int2 b)\n{\n    return make_int2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));\n}\ninline __device__ __host__ int3 clamp(int3 v, int a, int b)\n{\n    return make_int3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));\n}\ninline __device__ __host__ int3 clamp(int3 v, int3 a, int3 b)\n{\n    return make_int3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));\n}\ninline __device__ __host__ int4 clamp(int4 v, int a, int b)\n{\n    return make_int4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));\n}\ninline __device__ __host__ int4 clamp(int4 v, int4 a, int4 b)\n{\n    return make_int4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));\n}\n\ninline __device__ __host__ uint2 clamp(uint2 v, uint a, uint b)\n{\n    return make_uint2(clamp(v.x, a, b), clamp(v.y, a, b));\n}\ninline __device__ __host__ uint2 clamp(uint2 v, uint2 a, uint2 b)\n{\n    return make_uint2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));\n}\ninline __device__ __host__ uint3 clamp(uint3 v, uint a, uint b)\n{\n    return make_uint3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));\n}\ninline __device__ __host__ uint3 clamp(uint3 v, uint3 a, uint3 b)\n{\n    return make_uint3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));\n}\ninline __device__ __host__ uint4 clamp(uint4 v, uint a, uint b)\n{\n    return make_uint4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));\n}\ninline __device__ __host__ uint4 clamp(uint4 v, uint4 a, uint4 b)\n{\n    return make_uint4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));\n}\n\n////////////////////////////////////////////////////////////////////////////////\n// dot product\n////////////////////////////////////////////////////////////////////////////////\n\ninline __host__ __device__ float dot(float2 a, float2 b)\n{\n    return a.x * b.x + a.y * b.y;\n}\ninline __host__ __device__ float dot(float3 a, float3 b)\n{\n    return a.x * b.x + a.y * b.y + a.z * b.z;\n}\ninline __host__ __device__ float dot(float4 a, float4 b)\n{\n    return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;\n}\n\ninline __host__ __device__ int dot(int2 a, int2 b)\n{\n    return a.x * b.x + a.y * b.y;\n}\ninline __host__ __device__ int dot(int3 a, int3 b)\n{\n    return a.x * b.x + a.y * b.y + a.z * b.z;\n}\ninline __host__ __device__ int dot(int4 a, int4 b)\n{\n    return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;\n}\n\ninline __host__ __device__ uint dot(uint2 a, uint2 b)\n{\n    return a.x * b.x + a.y * b.y;\n}\ninline __host__ __device__ uint dot(uint3 a, uint3 b)\n{\n    return a.x * b.x + a.y * b.y + a.z * b.z;\n}\ninline __host__ __device__ uint dot(uint4 a, uint4 b)\n{\n    return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;\n}\n\n////////////////////////////////////////////////////////////////////////////////\n// length\n////////////////////////////////////////////////////////////////////////////////\n\ninline __host__ __device__ float length(float2 v)\n{\n    return sqrtf(dot(v, v));\n}\ninline __host__ __device__ float length(float3 v)\n{\n    return sqrtf(dot(v, v));\n}\ninline __host__ __device__ float length(float4 v)\n{\n    return sqrtf(dot(v, v));\n}\n\n////////////////////////////////////////////////////////////////////////////////\n// normalize\n////////////////////////////////////////////////////////////////////////////////\n\ninline __host__ __device__ float2 normalize(float2 v)\n{\n    float invLen = rsqrtf(dot(v, v));\n    return v * invLen;\n}\ninline __host__ __device__ float3 normalize(float3 v)\n{\n    float invLen = rsqrtf(dot(v, v));\n    return v * invLen;\n}\ninline __host__ __device__ float4 normalize(float4 v)\n{\n    float invLen = rsqrtf(dot(v, v));\n    return v * invLen;\n}\n\n////////////////////////////////////////////////////////////////////////////////\n// floor\n////////////////////////////////////////////////////////////////////////////////\n\ninline __host__ __device__ float2 floorf(float2 v)\n{\n    return make_float2(floorf(v.x), floorf(v.y));\n}\ninline __host__ __device__ float3 floorf(float3 v)\n{\n    return make_float3(floorf(v.x), floorf(v.y), floorf(v.z));\n}\ninline __host__ __device__ float4 floorf(float4 v)\n{\n    return make_float4(floorf(v.x), floorf(v.y), floorf(v.z), floorf(v.w));\n}\n\n////////////////////////////////////////////////////////////////////////////////\n// frac - returns the fractional portion of a scalar or each vector component\n////////////////////////////////////////////////////////////////////////////////\n\ninline __host__ __device__ float fracf(float v)\n{\n    return v - floorf(v);\n}\ninline __host__ __device__ float2 fracf(float2 v)\n{\n    return make_float2(fracf(v.x), fracf(v.y));\n}\ninline __host__ __device__ float3 fracf(float3 v)\n{\n    return make_float3(fracf(v.x), fracf(v.y), fracf(v.z));\n}\ninline __host__ __device__ float4 fracf(float4 v)\n{\n    return make_float4(fracf(v.x), fracf(v.y), fracf(v.z), fracf(v.w));\n}\n\n////////////////////////////////////////////////////////////////////////////////\n// fmod\n////////////////////////////////////////////////////////////////////////////////\n\ninline __host__ __device__ float2 fmodf(float2 a, float2 b)\n{\n    return make_float2(fmodf(a.x, b.x), fmodf(a.y, b.y));\n}\ninline __host__ __device__ float3 fmodf(float3 a, float3 b)\n{\n    return make_float3(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z));\n}\ninline __host__ __device__ float4 fmodf(float4 a, float4 b)\n{\n    return make_float4(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z), fmodf(a.w, b.w));\n}\n\n////////////////////////////////////////////////////////////////////////////////\n// absolute value\n////////////////////////////////////////////////////////////////////////////////\n\ninline __host__ __device__ float2 fabs(float2 v)\n{\n    return make_float2(fabs(v.x), fabs(v.y));\n}\ninline __host__ __device__ float3 fabs(float3 v)\n{\n    return make_float3(fabs(v.x), fabs(v.y), fabs(v.z));\n}\ninline __host__ __device__ float4 fabs(float4 v)\n{\n    return make_float4(fabs(v.x), fabs(v.y), fabs(v.z), fabs(v.w));\n}\n\ninline __host__ __device__ int2 abs(int2 v)\n{\n    return make_int2(abs(v.x), abs(v.y));\n}\ninline __host__ __device__ int3 abs(int3 v)\n{\n    return make_int3(abs(v.x), abs(v.y), abs(v.z));\n}\ninline __host__ __device__ int4 abs(int4 v)\n{\n    return make_int4(abs(v.x), abs(v.y), abs(v.z), abs(v.w));\n}\n\n////////////////////////////////////////////////////////////////////////////////\n// reflect\n// - returns reflection of incident ray I around surface normal N\n// - N should be normalized, reflected vector's length is equal to length of I\n////////////////////////////////////////////////////////////////////////////////\n\ninline __host__ __device__ float3 reflect(float3 i, float3 n)\n{\n    return i - 2.0f * n * dot(n, i);\n}\n\n////////////////////////////////////////////////////////////////////////////////\n// cross product\n////////////////////////////////////////////////////////////////////////////////\n\ninline __host__ __device__ float3 cross(float3 a, float3 b)\n{\n    return make_float3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x);\n}\n\n////////////////////////////////////////////////////////////////////////////////\n// smoothstep\n// - returns 0 if x < a\n// - returns 1 if x > b\n// - otherwise returns smooth interpolation between 0 and 1 based on x\n////////////////////////////////////////////////////////////////////////////////\n\ninline __device__ __host__ float smoothstep(float a, float b, float x)\n{\n    float y = clamp((x - a) / (b - a), 0.0f, 1.0f);\n    return (y * y * (3.0f - (2.0f * y)));\n}\ninline __device__ __host__ float2 smoothstep(float2 a, float2 b, float2 x)\n{\n    float2 y = clamp((x - a) / (b - a), 0.0f, 1.0f);\n    return (y * y * (make_float2(3.0f) - (make_float2(2.0f) * y)));\n}\ninline __device__ __host__ float3 smoothstep(float3 a, float3 b, float3 x)\n{\n    float3 y = clamp((x - a) / (b - a), 0.0f, 1.0f);\n    return (y * y * (make_float3(3.0f) - (make_float3(2.0f) * y)));\n}\ninline __device__ __host__ float4 smoothstep(float4 a, float4 b, float4 x)\n{\n    float4 y = clamp((x - a) / (b - a), 0.0f, 1.0f);\n    return (y * y * (make_float4(3.0f) - (make_float4(2.0f) * y)));\n}\n\n////////////////////////////////////////////////////////////////////////////////\n// sign\n////////////////////////////////////////////////////////////////////////////////\ninline __device__ __host__ float3 sign(float3 a)\n{\n    return make_float3(\n        copysignf(1.0f, a.x), copysignf(1.0f, a.y), copysignf(1.0f, a.z));\n}\n\n#endif"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/intersection.cu",
    "content": "/*\n * Copyright (c) 2022 Ruilong Li, UC Berkeley.\n */\n\n#include \"include/helpers_cuda.h\"\n\ntemplate <typename scalar_t>\ninline __host__ __device__ void _swap(scalar_t &a, scalar_t &b)\n{\n    scalar_t c = a;\n    a = b;\n    b = c;\n}\n\ntemplate <typename scalar_t>\ninline __host__ __device__ void _ray_aabb_intersect(\n    const scalar_t *rays_o,\n    const scalar_t *rays_d,\n    const scalar_t *aabb,\n    scalar_t *near,\n    scalar_t *far)\n{\n    // aabb is [xmin, ymin, zmin, xmax, ymax, zmax]\n    scalar_t tmin = (aabb[0] - rays_o[0]) / rays_d[0];\n    scalar_t tmax = (aabb[3] - rays_o[0]) / rays_d[0];\n    if (tmin > tmax)\n        _swap(tmin, tmax);\n\n    scalar_t tymin = (aabb[1] - rays_o[1]) / rays_d[1];\n    scalar_t tymax = (aabb[4] - rays_o[1]) / rays_d[1];\n    if (tymin > tymax)\n        _swap(tymin, tymax);\n\n    if (tmin > tymax || tymin > tmax)\n    {\n        *near = 1e10;\n        *far = 1e10;\n        return;\n    }\n\n    if (tymin > tmin)\n        tmin = tymin;\n    if (tymax < tmax)\n        tmax = tymax;\n\n    scalar_t tzmin = (aabb[2] - rays_o[2]) / rays_d[2];\n    scalar_t tzmax = (aabb[5] - rays_o[2]) / rays_d[2];\n    if (tzmin > tzmax)\n        _swap(tzmin, tzmax);\n\n    if (tmin > tzmax || tzmin > tmax)\n    {\n        *near = 1e10;\n        *far = 1e10;\n        return;\n    }\n\n    if (tzmin > tmin)\n        tmin = tzmin;\n    if (tzmax < tmax)\n        tmax = tzmax;\n\n    *near = tmin;\n    *far = tmax;\n    return;\n}\n\ntemplate <typename scalar_t>\n__global__ void ray_aabb_intersect_kernel(\n    const int N,\n    const scalar_t *rays_o,\n    const scalar_t *rays_d,\n    const scalar_t *aabb,\n    scalar_t *t_min,\n    scalar_t *t_max)\n{\n    // aabb is [xmin, ymin, zmin, xmax, ymax, zmax]\n    CUDA_GET_THREAD_ID(thread_id, N);\n\n    // locate\n    rays_o += thread_id * 3;\n    rays_d += thread_id * 3;\n    t_min += thread_id;\n    t_max += thread_id;\n\n    _ray_aabb_intersect<scalar_t>(rays_o, rays_d, aabb, t_min, t_max);\n\n    scalar_t zero = static_cast<scalar_t>(0.f);\n    *t_min = *t_min > zero ? *t_min : zero;\n    return;\n}\n\n/**\n * @brief Ray AABB Test\n *\n * @param rays_o Ray origins. Tensor with shape [N, 3].\n * @param rays_d Normalized ray directions. Tensor with shape [N, 3].\n * @param aabb Scene AABB [xmin, ymin, zmin, xmax, ymax, zmax]. Tensor with shape [6].\n * @return std::vector<torch::Tensor>\n *  Ray AABB intersection {t_min, t_max} with shape [N] respectively. Note the t_min is\n *  clipped to minimum zero. 1e10 is returned if no intersection.\n */\nstd::vector<torch::Tensor> ray_aabb_intersect(\n    const torch::Tensor rays_o, const torch::Tensor rays_d, const torch::Tensor aabb)\n{\n    DEVICE_GUARD(rays_o);\n    CHECK_INPUT(rays_o);\n    CHECK_INPUT(rays_d);\n    CHECK_INPUT(aabb);\n    TORCH_CHECK(rays_o.ndimension() == 2 & rays_o.size(1) == 3)\n    TORCH_CHECK(rays_d.ndimension() == 2 & rays_d.size(1) == 3)\n    TORCH_CHECK(aabb.ndimension() == 1 & aabb.size(0) == 6)\n\n    const int N = rays_o.size(0);\n\n    const int threads = 256;\n    const int blocks = CUDA_N_BLOCKS_NEEDED(N, threads);\n\n    torch::Tensor t_min = torch::empty({N}, rays_o.options());\n    torch::Tensor t_max = torch::empty({N}, rays_o.options());\n\n    AT_DISPATCH_FLOATING_TYPES_AND_HALF(\n        rays_o.scalar_type(), \"ray_aabb_intersect\",\n        ([&]\n         { ray_aabb_intersect_kernel<scalar_t><<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(\n               N,\n               rays_o.data_ptr<scalar_t>(),\n               rays_d.data_ptr<scalar_t>(),\n               aabb.data_ptr<scalar_t>(),\n               t_min.data_ptr<scalar_t>(),\n               t_max.data_ptr<scalar_t>()); }));\n\n    return {t_min, t_max};\n}"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/pack.cu",
    "content": "/*\n * Copyright (c) 2022 Ruilong Li, UC Berkeley.\n */\n\n#include \"include/helpers_cuda.h\"\n\n__global__ void unpack_info_kernel(\n    // input\n    const int n_rays,\n    const int *packed_info,\n    // output\n    int64_t *ray_indices)\n{\n    CUDA_GET_THREAD_ID(i, n_rays);\n\n    // locate\n    const int base = packed_info[i * 2 + 0];  // point idx start.\n    const int steps = packed_info[i * 2 + 1]; // point idx shift.\n    if (steps == 0)\n        return;\n\n    ray_indices += base;\n\n    for (int j = 0; j < steps; ++j)\n    {\n        ray_indices[j] = i;\n    }\n}\n\n__global__ void unpack_info_to_mask_kernel(\n    // input\n    const int n_rays,\n    const int *packed_info,\n    const int n_samples,\n    // output\n    bool *masks) // [n_rays, n_samples]\n{\n    CUDA_GET_THREAD_ID(i, n_rays);\n\n    // locate\n    const int base = packed_info[i * 2 + 0];  // point idx start.\n    const int steps = packed_info[i * 2 + 1]; // point idx shift.\n    if (steps == 0)\n        return;\n\n    masks += i * n_samples;\n\n    for (int j = 0; j < steps; ++j)\n    {\n        masks[j] = true;\n    }\n}\n\ntemplate <typename scalar_t>\n__global__ void unpack_data_kernel(\n    const uint32_t n_rays,\n    const int *packed_info, // input ray & point indices.\n    const int data_dim,\n    const scalar_t *data,\n    const int n_sampler_per_ray,\n    scalar_t *unpacked_data) // (n_rays, n_sampler_per_ray, data_dim)\n{\n    CUDA_GET_THREAD_ID(i, n_rays);\n\n    // locate\n    const int base = packed_info[i * 2 + 0];  // point idx start.\n    const int steps = packed_info[i * 2 + 1]; // point idx shift.\n    if (steps == 0)\n        return;\n\n    data += base * data_dim;\n    unpacked_data += i * n_sampler_per_ray * data_dim;\n\n    for (int j = 0; j < steps; j++)\n    {\n        for (int k = 0; k < data_dim; k++)\n        {\n            unpacked_data[j * data_dim + k] = data[j * data_dim + k];\n        }\n    }\n    return;\n}\n\ntorch::Tensor unpack_info(const torch::Tensor packed_info, const int n_samples)\n{\n    DEVICE_GUARD(packed_info);\n    CHECK_INPUT(packed_info);\n\n    const int n_rays = packed_info.size(0);\n    const int threads = 256;\n    const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);\n\n    // int n_samples = packed_info[n_rays - 1].sum(0).item<int>();\n    torch::Tensor ray_indices = torch::empty(\n        {n_samples}, packed_info.options().dtype(torch::kLong));\n\n    unpack_info_kernel<<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(\n        n_rays,\n        packed_info.data_ptr<int>(),\n        ray_indices.data_ptr<int64_t>());\n    return ray_indices;\n}\n\n\ntorch::Tensor unpack_info_to_mask(\n    const torch::Tensor packed_info, const int n_samples)\n{\n    DEVICE_GUARD(packed_info);\n    CHECK_INPUT(packed_info);\n\n    const int n_rays = packed_info.size(0);\n    const int threads = 256;\n    const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);\n\n    torch::Tensor masks = torch::zeros(\n        {n_rays, n_samples}, packed_info.options().dtype(torch::kBool));\n\n    unpack_info_to_mask_kernel<<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(\n        n_rays,\n        packed_info.data_ptr<int>(),\n        n_samples,\n        masks.data_ptr<bool>());\n    return masks;\n}\n\ntorch::Tensor unpack_data(\n    torch::Tensor packed_info,\n    torch::Tensor data,\n    int n_samples_per_ray)\n{\n    DEVICE_GUARD(packed_info);\n\n    CHECK_INPUT(packed_info);\n    CHECK_INPUT(data);\n\n    TORCH_CHECK(packed_info.ndimension() == 2 & packed_info.size(1) == 2);\n    TORCH_CHECK(data.ndimension() == 2);\n\n    const int n_rays = packed_info.size(0);\n    const int n_samples = data.size(0);\n    const int data_dim = data.size(1);\n\n    const int threads = 256;\n    const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);\n\n    torch::Tensor unpacked_data = torch::zeros(\n        {n_rays, n_samples_per_ray, data_dim}, data.options());\n\n    AT_DISPATCH_ALL_TYPES(\n        data.scalar_type(),\n        \"unpack_data\",\n        ([&]\n         { unpack_data_kernel<scalar_t><<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(\n               n_rays,\n               // inputs\n               packed_info.data_ptr<int>(),\n               data_dim,\n               data.data_ptr<scalar_t>(),\n               n_samples_per_ray,\n               // outputs\n               unpacked_data.data_ptr<scalar_t>()); }));\n\n    return unpacked_data;\n}\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/pybind.cu",
    "content": "/*\n * Copyright (c) 2022 Ruilong Li, UC Berkeley.\n */\n\n#include \"include/helpers_cuda.h\"\n#include \"include/helpers_math.h\"\n#include \"include/helpers_contraction.h\"\n\n\nstd::vector<torch::Tensor> ray_aabb_intersect(\n    const torch::Tensor rays_o,\n    const torch::Tensor rays_d,\n    const torch::Tensor aabb);\n\nstd::vector<torch::Tensor> ray_marching(\n    // rays\n    const torch::Tensor rays_o,\n    const torch::Tensor rays_d,\n    const torch::Tensor t_min,\n    const torch::Tensor t_max,\n    // occupancy grid & contraction\n    const torch::Tensor roi,\n    const torch::Tensor grid_binary,\n    const ContractionType type,\n    // sampling\n    const float step_size,\n    const float cone_angle);\n\ntorch::Tensor unpack_info(\n    const torch::Tensor packed_info, const int n_samples);\n\ntorch::Tensor unpack_info_to_mask(\n    const torch::Tensor packed_info, const int n_samples);\n\ntorch::Tensor grid_query(\n    const torch::Tensor samples,\n    // occupancy grid & contraction\n    const torch::Tensor roi,\n    const torch::Tensor grid_value,\n    const ContractionType type);\n\ntorch::Tensor contract(\n    const torch::Tensor samples,\n    // contraction\n    const torch::Tensor roi,\n    const ContractionType type);\n\ntorch::Tensor contract_inv(\n    const torch::Tensor samples,\n    // contraction\n    const torch::Tensor roi,\n    const ContractionType type);\n\nstd::vector<torch::Tensor> ray_resampling(\n    torch::Tensor packed_info,\n    torch::Tensor starts,\n    torch::Tensor ends,\n    torch::Tensor weights,\n    const int steps);\n\ntorch::Tensor unpack_data(\n    torch::Tensor packed_info,\n    torch::Tensor data,\n    int n_samples_per_ray);\n\n// cub implementations: parallel across samples\nbool is_cub_available() {\n    return (bool) CUB_SUPPORTS_SCAN_BY_KEY();\n}\ntorch::Tensor transmittance_from_sigma_forward_cub(\n    torch::Tensor ray_indices,\n    torch::Tensor starts,\n    torch::Tensor ends,\n    torch::Tensor sigmas);\ntorch::Tensor transmittance_from_sigma_backward_cub(\n    torch::Tensor ray_indices,\n    torch::Tensor starts,\n    torch::Tensor ends,\n    torch::Tensor transmittance,\n    torch::Tensor transmittance_grad);\ntorch::Tensor transmittance_from_alpha_forward_cub(\n    torch::Tensor ray_indices, torch::Tensor alphas);\ntorch::Tensor transmittance_from_alpha_backward_cub(\n    torch::Tensor ray_indices,\n    torch::Tensor alphas,\n    torch::Tensor transmittance,\n    torch::Tensor transmittance_grad);\n\n// naive implementations: parallel across rays\ntorch::Tensor transmittance_from_sigma_forward_naive(\n    torch::Tensor packed_info,\n    torch::Tensor starts,\n    torch::Tensor ends,\n    torch::Tensor sigmas);\ntorch::Tensor transmittance_from_sigma_backward_naive(\n    torch::Tensor packed_info,\n    torch::Tensor starts,\n    torch::Tensor ends,\n    torch::Tensor transmittance,\n    torch::Tensor transmittance_grad);\ntorch::Tensor transmittance_from_alpha_forward_naive(\n    torch::Tensor packed_info, \n    torch::Tensor alphas);\ntorch::Tensor transmittance_from_alpha_backward_naive(\n    torch::Tensor packed_info,\n    torch::Tensor alphas,\n    torch::Tensor transmittance,\n    torch::Tensor transmittance_grad);\n\ntorch::Tensor weight_from_sigma_forward_naive(\n    torch::Tensor packed_info,\n    torch::Tensor starts,\n    torch::Tensor ends,\n    torch::Tensor sigmas);\ntorch::Tensor weight_from_sigma_backward_naive(\n    torch::Tensor weights,\n    torch::Tensor grad_weights,\n    torch::Tensor packed_info,\n    torch::Tensor starts,\n    torch::Tensor ends,\n    torch::Tensor sigmas);\ntorch::Tensor weight_from_alpha_forward_naive(\n    torch::Tensor packed_info, \n    torch::Tensor alphas);\ntorch::Tensor weight_from_alpha_backward_naive(\n    torch::Tensor weights,\n    torch::Tensor grad_weights,\n    torch::Tensor packed_info,\n    torch::Tensor alphas);\n\ntorch::Tensor weight_from_alpha_patch_based_forward_naive(\n    torch::Tensor packed_info,\n    torch::Tensor alphas);\n\ntorch::Tensor weight_from_alpha_patch_based_backward_naive(\n    torch::Tensor weights,\n    torch::Tensor grad_weights,\n    torch::Tensor packed_info,\n    torch::Tensor alphas);\n\nstd::vector<torch::Tensor> weight_and_transmittance_from_alpha_patch_based_forward_naive(\n    torch::Tensor packed_info, // (n_patches, 2)\n    torch::Tensor alphas // (n_samples, patches_size, 1)\n    );\n\ntorch::Tensor weight_and_transmittance_from_alpha_patch_based_backward_naive(\n    torch::Tensor weights,\n    torch::Tensor grad_weights,  // (n_samples, patches_size, 1)\n    torch::Tensor packed_info,\n    torch::Tensor alphas);\n\ntorch::Tensor transmittance_from_alpha_patch_based_forward_naive(\n    torch::Tensor packed_info, torch::Tensor alphas);\n\ntorch::Tensor transmittance_from_alpha_patch_based_backward_naive(\n    torch::Tensor packed_info,\n    torch::Tensor alphas,\n    torch::Tensor transmittance,\n    torch::Tensor transmittance_grad);\n\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m)\n{\n    // contraction\n    py::enum_<ContractionType>(m, \"ContractionType\")\n        .value(\"AABB\", ContractionType::AABB)\n        .value(\"UN_BOUNDED_TANH\", ContractionType::UN_BOUNDED_TANH)\n        .value(\"UN_BOUNDED_SPHERE\", ContractionType::UN_BOUNDED_SPHERE);\n    m.def(\"contract\", &contract);\n    m.def(\"contract_inv\", &contract_inv);\n\n    // grid\n    m.def(\"grid_query\", &grid_query);\n\n    // marching\n    m.def(\"ray_aabb_intersect\", &ray_aabb_intersect);\n    m.def(\"ray_marching\", &ray_marching);\n    m.def(\"ray_resampling\", &ray_resampling);\n\n    // rendering\n    m.def(\"is_cub_available\", is_cub_available);\n    m.def(\"transmittance_from_sigma_forward_cub\", transmittance_from_sigma_forward_cub);\n    m.def(\"transmittance_from_sigma_backward_cub\", transmittance_from_sigma_backward_cub);\n    m.def(\"transmittance_from_alpha_forward_cub\", transmittance_from_alpha_forward_cub);\n    m.def(\"transmittance_from_alpha_backward_cub\", transmittance_from_alpha_backward_cub);\n    \n    m.def(\"transmittance_from_sigma_forward_naive\", transmittance_from_sigma_forward_naive);\n    m.def(\"transmittance_from_sigma_backward_naive\", transmittance_from_sigma_backward_naive);\n    m.def(\"transmittance_from_alpha_forward_naive\", transmittance_from_alpha_forward_naive);\n    m.def(\"transmittance_from_alpha_backward_naive\", transmittance_from_alpha_backward_naive);\n\n    m.def(\"weight_from_sigma_forward_naive\", weight_from_sigma_forward_naive);\n    m.def(\"weight_from_sigma_backward_naive\", weight_from_sigma_backward_naive);\n    m.def(\"weight_from_alpha_forward_naive\", weight_from_alpha_forward_naive);\n    m.def(\"weight_from_alpha_backward_naive\", weight_from_alpha_backward_naive);\n    m.def(\"weight_from_alpha_patch_based_forward_naive\", weight_from_alpha_patch_based_forward_naive);\n    m.def(\"weight_from_alpha_patch_based_backward_naive\", weight_from_alpha_patch_based_backward_naive);\n    m.def(\"weight_and_transmittance_from_alpha_patch_based_forward_naive\", weight_and_transmittance_from_alpha_patch_based_forward_naive);\n    m.def(\"weight_and_transmittance_from_alpha_patch_based_backward_naive\", weight_and_transmittance_from_alpha_patch_based_backward_naive);\n    m.def(\"transmittance_from_alpha_patch_based_forward_naive\", transmittance_from_alpha_patch_based_forward_naive);\n    m.def(\"transmittance_from_alpha_patch_based_backward_naive\", transmittance_from_alpha_patch_based_backward_naive);\n    // pack & unpack\n    m.def(\"unpack_data\", &unpack_data);\n    m.def(\"unpack_info\", &unpack_info);\n    m.def(\"unpack_info_to_mask\", &unpack_info_to_mask);\n}"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/ray_marching.cu",
    "content": "/*\n * Copyright (c) 2022 Ruilong Li, UC Berkeley.\n */\n\n#include \"include/helpers_cuda.h\"\n#include \"include/helpers_math.h\"\n#include \"include/helpers_contraction.h\"\n\ninline __device__ __host__ float calc_dt(\n    const float t, const float cone_angle,\n    const float dt_min, const float dt_max)\n{\n    return clamp(t * cone_angle, dt_min, dt_max);\n}\n\ninline __device__ __host__ int grid_idx_at(\n    const float3 xyz_unit, const int3 grid_res)\n{\n    // xyz should be always in [0, 1]^3.\n    int3 ixyz = make_int3(xyz_unit * make_float3(grid_res));\n    ixyz = clamp(ixyz, make_int3(0, 0, 0), grid_res - 1);\n    int3 grid_offset = make_int3(grid_res.y * grid_res.z, grid_res.z, 1);\n    int idx = dot(ixyz, grid_offset);\n    return idx;\n}\n\ntemplate <typename scalar_t>\ninline __device__ __host__ scalar_t grid_occupied_at(\n    const float3 xyz,\n    const float3 roi_min, const float3 roi_max,\n    ContractionType type,\n    const int3 grid_res, const scalar_t *grid_value)\n{\n    if (type == ContractionType::AABB &&\n        (xyz.x < roi_min.x || xyz.x > roi_max.x ||\n         xyz.y < roi_min.y || xyz.y > roi_max.y ||\n         xyz.z < roi_min.z || xyz.z > roi_max.z))\n    {\n        return false;\n    }\n    float3 xyz_unit = apply_contraction(\n        xyz, roi_min, roi_max, type);\n    int idx = grid_idx_at(xyz_unit, grid_res);\n    return grid_value[idx];\n}\n\n// dda like step\ninline __device__ __host__ float distance_to_next_voxel(\n    const float3 xyz, const float3 dir, const float3 inv_dir,\n    const float3 roi_min, const float3 roi_max, const int3 grid_res)\n{\n    float3 _occ_res = make_float3(grid_res);\n    float3 _xyz = roi_to_unit(xyz, roi_min, roi_max) * _occ_res;\n    float3 txyz = ((floorf(_xyz + 0.5f + 0.5f * sign(dir)) - _xyz) * inv_dir) / _occ_res * (roi_max - roi_min);\n    float t = min(min(txyz.x, txyz.y), txyz.z);\n    return fmaxf(t, 0.0f);\n}\n\ninline __device__ __host__ float advance_to_next_voxel(\n    const float t, const float dt_min,\n    const float3 xyz, const float3 dir, const float3 inv_dir,\n    const float3 roi_min, const float3 roi_max, const int3 grid_res, const float far)\n{\n    // Regular stepping (may be slower but matches non-empty space)\n    float t_target = t + distance_to_next_voxel(\n                             xyz, dir, inv_dir, roi_min, roi_max, grid_res);\n    \n    t_target = min(t_target, far);\n    float _t = t;\n    do\n    {\n        _t += dt_min;\n    } while (_t < t_target);\n    return _t;\n}\n\n// -------------------------------------------------------------------------------\n// Raymarching\n// -------------------------------------------------------------------------------\n\n__global__ void ray_marching_kernel(\n    // rays info\n    const uint32_t n_rays,\n    const float *rays_o, // shape (n_rays, 3)\n    const float *rays_d, // shape (n_rays, 3)\n    const float *t_min,  // shape (n_rays,)\n    const float *t_max,  // shape (n_rays,)\n    // occupancy grid & contraction\n    const float *roi,\n    const int3 grid_res,\n    const bool *grid_binary, // shape (reso_x, reso_y, reso_z)\n    const ContractionType type,\n    // sampling\n    const float step_size,\n    const float cone_angle,\n    const int *packed_info,\n    // first round outputs\n    int *num_steps,\n    // second round outputs\n    int64_t *ray_indices,\n    float *t_starts,\n    float *t_ends)\n{\n    CUDA_GET_THREAD_ID(i, n_rays);\n\n    bool is_first_round = (packed_info == nullptr);\n\n    // locate\n    rays_o += i * 3;\n    rays_d += i * 3;\n    t_min += i;\n    t_max += i;\n\n    if (is_first_round)\n    {\n        num_steps += i;\n    }\n    else\n    {\n        int base = packed_info[i * 2 + 0];\n        int steps = packed_info[i * 2 + 1];\n        t_starts += base;\n        t_ends += base;\n        ray_indices += base;\n    }\n\n    const float3 origin = make_float3(rays_o[0], rays_o[1], rays_o[2]);\n    const float3 dir = make_float3(rays_d[0], rays_d[1], rays_d[2]);\n    const float3 inv_dir = 1.0f / dir;\n    const float near = t_min[0], far = t_max[0];\n\n    const float3 roi_min = make_float3(roi[0], roi[1], roi[2]);\n    const float3 roi_max = make_float3(roi[3], roi[4], roi[5]);\n\n    // TODO: compute dt_max from occ resolution.\n    float dt_min = step_size;\n    float dt_max = 1e10f;\n\n    int j = 0;\n    float t0 = near;\n    float dt = calc_dt(t0, cone_angle, dt_min, dt_max);\n    float t1 = t0 + dt;\n    float t_mid = (t0 + t1) * 0.5f;\n\n    while (t_mid < far)\n    {\n        // current center\n        const float3 xyz = origin + t_mid * dir;\n        if (grid_occupied_at(xyz, roi_min, roi_max, type, grid_res, grid_binary))\n        {\n            if (!is_first_round)\n            {\n                t_starts[j] = t0;\n                t_ends[j] = t1;\n                ray_indices[j] = i;\n            }\n            ++j;\n            // march to next sample\n            t0 = t1;\n            t1 = t0 + calc_dt(t0, cone_angle, dt_min, dt_max);\n            t_mid = (t0 + t1) * 0.5f;\n        }\n        else\n        {\n            // march to next sample\n            switch (type)\n            {\n            case ContractionType::AABB:\n                // no contraction\n                t_mid = advance_to_next_voxel(\n                    t_mid, dt_min, xyz, dir, inv_dir, roi_min, roi_max, grid_res, far);\n                dt = calc_dt(t_mid, cone_angle, dt_min, dt_max);\n                t0 = t_mid - dt * 0.5f;\n                t1 = t_mid + dt * 0.5f;\n                break;\n\n            default:\n                // any type of scene contraction does not work with DDA.\n                t0 = t1;\n                t1 = t0 + calc_dt(t0, cone_angle, dt_min, dt_max);\n                t_mid = (t0 + t1) * 0.5f;\n                break;\n            }\n        }\n    }\n\n    if (is_first_round)\n    {\n        *num_steps = j;\n    }\n    return;\n}\n\nstd::vector<torch::Tensor> ray_marching(\n    // rays\n    const torch::Tensor rays_o,\n    const torch::Tensor rays_d,\n    const torch::Tensor t_min,\n    const torch::Tensor t_max,\n    // occupancy grid & contraction\n    const torch::Tensor roi,\n    const torch::Tensor grid_binary,\n    const ContractionType type,\n    // sampling\n    const float step_size,\n    const float cone_angle)\n{\n    DEVICE_GUARD(rays_o);\n\n    CHECK_INPUT(rays_o);\n    CHECK_INPUT(rays_d);\n    CHECK_INPUT(t_min);\n    CHECK_INPUT(t_max);\n    CHECK_INPUT(roi);\n    CHECK_INPUT(grid_binary);\n    TORCH_CHECK(rays_o.ndimension() == 2 & rays_o.size(1) == 3)\n    TORCH_CHECK(rays_d.ndimension() == 2 & rays_d.size(1) == 3)\n    TORCH_CHECK(t_min.ndimension() == 1)\n    TORCH_CHECK(t_max.ndimension() == 1)\n    TORCH_CHECK(roi.ndimension() == 1 & roi.size(0) == 6)\n    TORCH_CHECK(grid_binary.ndimension() == 3)\n\n    const int n_rays = rays_o.size(0);\n    const int3 grid_res = make_int3(\n        grid_binary.size(0), grid_binary.size(1), grid_binary.size(2));\n\n    const int threads = 256;\n    const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);\n\n    // helper counter\n    torch::Tensor num_steps = torch::empty(\n        {n_rays}, rays_o.options().dtype(torch::kInt32));\n\n    // count number of samples per ray\n    ray_marching_kernel<<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(\n        // rays\n        n_rays,\n        rays_o.data_ptr<float>(),\n        rays_d.data_ptr<float>(),\n        t_min.data_ptr<float>(),\n        t_max.data_ptr<float>(),\n        // occupancy grid & contraction\n        roi.data_ptr<float>(),\n        grid_res,\n        grid_binary.data_ptr<bool>(),\n        type,\n        // sampling\n        step_size,\n        cone_angle,\n        nullptr, /* packed_info */\n        // outputs\n        num_steps.data_ptr<int>(),\n        nullptr, /* ray_indices */\n        nullptr, /* t_starts */\n        nullptr /* t_ends */);\n\n    torch::Tensor cum_steps = num_steps.cumsum(0, torch::kInt32);\n    torch::Tensor packed_info = torch::stack({cum_steps - num_steps, num_steps}, 1);\n\n    // output samples starts and ends\n    int total_steps = cum_steps[cum_steps.size(0) - 1].item<int>();\n    torch::Tensor t_starts = torch::empty({total_steps, 1}, rays_o.options());\n    torch::Tensor t_ends = torch::empty({total_steps, 1}, rays_o.options());\n    torch::Tensor ray_indices = torch::empty({total_steps}, cum_steps.options().dtype(torch::kLong));\n\n    ray_marching_kernel<<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(\n        // rays\n        n_rays,\n        rays_o.data_ptr<float>(),\n        rays_d.data_ptr<float>(),\n        t_min.data_ptr<float>(),\n        t_max.data_ptr<float>(),\n        // occupancy grid & contraction\n        roi.data_ptr<float>(),\n        grid_res,\n        grid_binary.data_ptr<bool>(),\n        type,\n        // sampling\n        step_size,\n        cone_angle,\n        packed_info.data_ptr<int>(),\n        // outputs\n        nullptr, /* num_steps */\n        ray_indices.data_ptr<int64_t>(),\n        t_starts.data_ptr<float>(),\n        t_ends.data_ptr<float>());\n\n    return {packed_info, ray_indices, t_starts, t_ends};\n}\n\n// ----------------------------------------------------------------------------\n// Query the occupancy grid\n// ----------------------------------------------------------------------------\n\ntemplate <typename scalar_t>\n__global__ void query_occ_kernel(\n    // rays info\n    const uint32_t n_samples,\n    const float *samples, // shape (n_samples, 3)\n    // occupancy grid & contraction\n    const float *roi,\n    const int3 grid_res,\n    const scalar_t *grid_value, // shape (reso_x, reso_y, reso_z)\n    const ContractionType type,\n    // outputs\n    scalar_t *occs)\n{\n    CUDA_GET_THREAD_ID(i, n_samples);\n\n    // locate\n    samples += i * 3;\n    occs += i;\n\n    const float3 roi_min = make_float3(roi[0], roi[1], roi[2]);\n    const float3 roi_max = make_float3(roi[3], roi[4], roi[5]);\n    const float3 xyz = make_float3(samples[0], samples[1], samples[2]);\n\n    *occs = grid_occupied_at(xyz, roi_min, roi_max, type, grid_res, grid_value);\n    return;\n}\n\ntorch::Tensor grid_query(\n    const torch::Tensor samples,\n    // occupancy grid & contraction\n    const torch::Tensor roi,\n    const torch::Tensor grid_value,\n    const ContractionType type)\n{\n    DEVICE_GUARD(samples);\n    CHECK_INPUT(samples);\n\n    const int n_samples = samples.size(0);\n    const int3 grid_res = make_int3(\n        grid_value.size(0), grid_value.size(1), grid_value.size(2));\n\n    const int threads = 256;\n    const int blocks = CUDA_N_BLOCKS_NEEDED(n_samples, threads);\n\n    torch::Tensor occs = torch::empty({n_samples}, grid_value.options());\n\n    AT_DISPATCH_FLOATING_TYPES_AND(\n        at::ScalarType::Bool,\n        occs.scalar_type(),\n        \"grid_query\",\n        ([&]\n         { query_occ_kernel<<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(\n               n_samples,\n               samples.data_ptr<float>(),\n               // grid\n               roi.data_ptr<float>(),\n               grid_res,\n               grid_value.data_ptr<scalar_t>(),\n               type,\n               // outputs\n               occs.data_ptr<scalar_t>()); }));\n\n    return occs;\n}\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/render_transmittance.cu",
    "content": "/*\n * Copyright (c) 2022 Ruilong Li, UC Berkeley.\n */\n\n#include \"include/helpers_cuda.h\"\n\n__global__ void transmittance_from_sigma_forward_kernel(\n    const uint32_t n_rays,\n    // inputs\n    const int *packed_info,\n    const float *starts,\n    const float *ends,\n    const float *sigmas,\n    // outputs\n    float *transmittance)\n{\n    CUDA_GET_THREAD_ID(i, n_rays);\n\n    // locate\n    const int base = packed_info[i * 2 + 0];\n    const int steps = packed_info[i * 2 + 1];\n    if (steps == 0)\n        return;\n\n    starts += base;\n    ends += base;\n    sigmas += base;\n    transmittance += base;\n\n    // accumulation\n    float cumsum = 0.0f;\n    for (int j = 0; j < steps; ++j)\n    {\n        transmittance[j] = __expf(-cumsum);\n        cumsum += sigmas[j] * (ends[j] - starts[j]);\n    }\n\n    // // another way to impl:\n    // float T = 1.f;\n    // for (int j = 0; j < steps; ++j)\n    // {\n    //     const float delta = ends[j] - starts[j];\n    //     const float alpha = 1.f - __expf(-sigmas[j] * delta);\n    //     transmittance[j] = T;\n    //     T *= (1.f - alpha);\n    // }\n    return;\n}\n\n__global__ void transmittance_from_sigma_backward_kernel(\n    const uint32_t n_rays,\n    // inputs\n    const int *packed_info,\n    const float *starts,\n    const float *ends,\n    const float *transmittance,\n    const float *transmittance_grad,\n    // outputs\n    float *sigmas_grad)\n{\n    CUDA_GET_THREAD_ID(i, n_rays);\n\n    // locate\n    const int base = packed_info[i * 2 + 0];\n    const int steps = packed_info[i * 2 + 1];\n    if (steps == 0)\n        return;\n\n    transmittance += base;\n    transmittance_grad += base;\n    starts += base;\n    ends += base;\n    sigmas_grad += base;\n\n    // accumulation\n    float cumsum = 0.0f;\n    for (int j = steps - 1; j >= 0; --j)\n    {\n        sigmas_grad[j] = cumsum * (ends[j] - starts[j]);\n        cumsum += -transmittance_grad[j] * transmittance[j];\n    }\n    return;\n}\n\n__global__ void transmittance_from_alpha_forward_kernel(\n    const uint32_t n_rays,\n    // inputs\n    const int *packed_info,\n    const float *alphas,\n    // outputs\n    float *transmittance)\n{\n    CUDA_GET_THREAD_ID(i, n_rays);\n\n    // locate\n    const int base = packed_info[i * 2 + 0];\n    const int steps = packed_info[i * 2 + 1];\n    if (steps == 0)\n        return;\n\n    alphas += base;\n    transmittance += base;\n\n    // accumulation\n    float T = 1.0f;\n    for (int j = 0; j < steps; ++j)\n    {\n        transmittance[j] = T;\n        T *= (1.0f - alphas[j]);\n    }\n    return;\n}\n\n__global__ void transmittance_from_alpha_backward_kernel(\n    const uint32_t n_rays,\n    // inputs\n    const int *packed_info,\n    const float *alphas,\n    const float *transmittance,\n    const float *transmittance_grad,\n    // outputs\n    float *alphas_grad)\n{\n    CUDA_GET_THREAD_ID(i, n_rays);\n\n    // locate\n    const int base = packed_info[i * 2 + 0];\n    const int steps = packed_info[i * 2 + 1];\n    if (steps == 0)\n        return;\n\n    alphas += base;\n    transmittance += base;\n    transmittance_grad += base;\n    alphas_grad += base;\n\n    // accumulation\n    float cumsum = 0.0f;\n    for (int j = steps - 1; j >= 0; --j)\n    {\n        alphas_grad[j] = cumsum / fmax(1.0f - alphas[j], 1e-10f);\n        cumsum += -transmittance_grad[j] * transmittance[j];\n    }\n    return;\n}\n\n__global__ void transmittance_from_alpha_patch_based_forward_kernel(\n    const uint32_t n_patches,\n    const uint32_t patch_size,\n    // inputs\n    const int *packed_info,\n    const float *alphas,\n    // outputs\n    float *transmittance)\n{\n    CUDA_GET_THREAD_ID_2D(i, k, n_patches, patch_size);  // i is the patch id, k is the ray id within the patch\n\n    // locate\n    const int base = packed_info[i * 2 + 0];  // get the base of the patch\n    const int steps = packed_info[i * 2 + 1]; // get the steps of the patch\n    if (steps == 0)\n        return;\n\n    alphas += base * patch_size;  // move the pointer to the base\n    transmittance += base * patch_size;  // move the pointer to the base\n\n    // accumulation\n    float T = 1.0f;\n    for (int j = 0; j < steps; ++j)\n    {\n        const uint32_t ray_id = j * patch_size + k;\n        transmittance[ray_id] = T;\n        T *= (1.0f - alphas[j]);\n    }\n    return;\n}\n\n__global__ void transmittance_from_alpha_patch_based_backward_kernel(\n    const uint32_t n_patches,\n    const uint32_t patch_size,\n    // inputs\n    const int *packed_info,\n    const float *alphas,\n    const float *transmittance,\n    const float *transmittance_grad,\n    // outputs\n    float *alphas_grad)\n{\n    CUDA_GET_THREAD_ID_2D(i, k, n_patches, patch_size);  // i is the patch id, k is the ray id within the patch\n\n    // locate\n    const int base = packed_info[i * 2 + 0];\n    const int steps = packed_info[i * 2 + 1];\n    if (steps == 0)\n        return;\n\n    alphas += base * patch_size;\n    transmittance += base * patch_size;\n    transmittance_grad += base * patch_size;\n    alphas_grad += base * patch_size;\n\n    // accumulation\n    float cumsum = 0.0f;\n    for (int j = steps - 1; j >= 0; --j)\n    {\n        const uint32_t sample_idx = j * patch_size + k;\n        alphas_grad[sample_idx] = cumsum / fmax(1.0f - alphas[sample_idx], 1e-10f);\n        cumsum += -transmittance_grad[sample_idx] * transmittance[sample_idx];\n    }\n    return;\n}\n\ntorch::Tensor transmittance_from_sigma_forward_naive(\n    torch::Tensor packed_info,\n    torch::Tensor starts,\n    torch::Tensor ends,\n    torch::Tensor sigmas)\n{\n    DEVICE_GUARD(packed_info);\n    CHECK_INPUT(packed_info);\n    CHECK_INPUT(starts);\n    CHECK_INPUT(ends);\n    CHECK_INPUT(sigmas);\n    TORCH_CHECK(packed_info.ndimension() == 2);\n    TORCH_CHECK(starts.ndimension() == 2 & starts.size(1) == 1);\n    TORCH_CHECK(ends.ndimension() == 2 & ends.size(1) == 1);\n    TORCH_CHECK(sigmas.ndimension() == 2 & sigmas.size(1) == 1);\n\n    const uint32_t n_samples = sigmas.size(0);\n    const uint32_t n_rays = packed_info.size(0);\n\n    const int threads = 256;\n    const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);\n\n    // outputs\n    torch::Tensor transmittance = torch::empty_like(sigmas);\n\n    // parallel across rays\n    transmittance_from_sigma_forward_kernel<<<\n        blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(\n        n_rays,\n        // inputs\n        packed_info.data_ptr<int>(),\n        starts.data_ptr<float>(),\n        ends.data_ptr<float>(),\n        sigmas.data_ptr<float>(),\n        // outputs\n        transmittance.data_ptr<float>());\n    return transmittance;\n}\n\ntorch::Tensor transmittance_from_sigma_backward_naive(\n    torch::Tensor packed_info,\n    torch::Tensor starts,\n    torch::Tensor ends,\n    torch::Tensor transmittance,\n    torch::Tensor transmittance_grad)\n{\n    DEVICE_GUARD(packed_info);\n    CHECK_INPUT(packed_info);\n    CHECK_INPUT(starts);\n    CHECK_INPUT(ends);\n    CHECK_INPUT(transmittance);\n    CHECK_INPUT(transmittance_grad);\n    TORCH_CHECK(packed_info.ndimension() == 2);\n    TORCH_CHECK(starts.ndimension() == 2 & starts.size(1) == 1);\n    TORCH_CHECK(ends.ndimension() == 2 & ends.size(1) == 1);\n    TORCH_CHECK(transmittance.ndimension() == 2 & transmittance.size(1) == 1);\n    TORCH_CHECK(transmittance_grad.ndimension() == 2 & transmittance_grad.size(1) == 1);\n\n    const uint32_t n_samples = transmittance.size(0);\n    const uint32_t n_rays = packed_info.size(0);\n\n    const int threads = 256;\n    const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);\n\n    // outputs\n    torch::Tensor sigmas_grad = torch::empty_like(transmittance);\n\n    // parallel across rays\n    transmittance_from_sigma_backward_kernel<<<\n        blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(\n        n_rays,\n        // inputs\n        packed_info.data_ptr<int>(),\n        starts.data_ptr<float>(),\n        ends.data_ptr<float>(),\n        transmittance.data_ptr<float>(),\n        transmittance_grad.data_ptr<float>(),\n        // outputs\n        sigmas_grad.data_ptr<float>());\n    return sigmas_grad;\n}\n\ntorch::Tensor transmittance_from_alpha_forward_naive(\n    torch::Tensor packed_info, torch::Tensor alphas)\n{\n    DEVICE_GUARD(packed_info);\n    CHECK_INPUT(packed_info);\n    CHECK_INPUT(alphas);\n    TORCH_CHECK(alphas.ndimension() == 2 & alphas.size(1) == 1);\n    TORCH_CHECK(packed_info.ndimension() == 2);\n\n    const uint32_t n_samples = alphas.size(0);\n    const uint32_t n_rays = packed_info.size(0);\n\n    const int threads = 256;\n    const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);\n\n    // outputs\n    torch::Tensor transmittance = torch::empty_like(alphas);\n\n    // parallel across rays\n    transmittance_from_alpha_forward_kernel<<<\n        blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(\n        n_rays,\n        // inputs\n        packed_info.data_ptr<int>(),\n        alphas.data_ptr<float>(),\n        // outputs\n        transmittance.data_ptr<float>());\n    return transmittance;\n}\n\ntorch::Tensor transmittance_from_alpha_backward_naive(\n    torch::Tensor packed_info,\n    torch::Tensor alphas,\n    torch::Tensor transmittance,\n    torch::Tensor transmittance_grad)\n{\n    DEVICE_GUARD(packed_info);\n    CHECK_INPUT(packed_info);\n    CHECK_INPUT(transmittance);\n    CHECK_INPUT(transmittance_grad);\n    TORCH_CHECK(packed_info.ndimension() == 2);\n    TORCH_CHECK(transmittance.ndimension() == 2 & transmittance.size(1) == 1);\n    TORCH_CHECK(transmittance_grad.ndimension() == 2 & transmittance_grad.size(1) == 1);\n\n    const uint32_t n_samples = transmittance.size(0);\n    const uint32_t n_rays = packed_info.size(0);\n\n    const int threads = 256;\n    const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);\n\n    // outputs\n    torch::Tensor alphas_grad = torch::empty_like(alphas);\n\n    // parallel across rays\n    transmittance_from_alpha_backward_kernel<<<\n        blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(\n        n_rays,\n        // inputs\n        packed_info.data_ptr<int>(),\n        alphas.data_ptr<float>(),\n        transmittance.data_ptr<float>(),\n        transmittance_grad.data_ptr<float>(),\n        // outputs\n        alphas_grad.data_ptr<float>());\n    return alphas_grad;\n}\n\ntorch::Tensor transmittance_from_alpha_patch_based_forward_naive(\n    torch::Tensor packed_info, torch::Tensor alphas)\n{\n    DEVICE_GUARD(packed_info);\n    CHECK_INPUT(packed_info);\n    CHECK_INPUT(alphas);\n    TORCH_CHECK(packed_info.ndimension() == 2);\n    TORCH_CHECK(alphas.ndimension() == 3 & alphas.size(2) == 1);\n\n    const uint32_t n_samples = alphas.size(0);\n    const uint32_t n_patches = packed_info.size(0);\n    const uint32_t patch_size  = alphas.size(1);\n\n    // compute the required number of thread.y from patch size\n    // take the log2 of patch size and round up to the next power of 2\n    const uint32_t thread_for_a_patch = pow(2, ceil(log2(patch_size)));\n    const uint32_t thread_for_n_samples = 256 / thread_for_a_patch;\n\n    const dim3 threads(thread_for_n_samples, thread_for_a_patch);\n    const dim3 blocks((n_patches+threads.x-1)/threads.x, (patch_size+threads.y-1)/threads.y);\n\n    // outputs\n    torch::Tensor transmittance = torch::empty_like(alphas);\n\n    // parallel across rays\n    transmittance_from_alpha_patch_based_forward_kernel<<<\n        blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(\n        n_patches,\n        patch_size,\n        // inputs\n        packed_info.data_ptr<int>(),\n        alphas.data_ptr<float>(),\n        // outputs\n        transmittance.data_ptr<float>());\n    return transmittance;\n}\n\ntorch::Tensor transmittance_from_alpha_patch_based_backward_naive(\n    torch::Tensor packed_info,\n    torch::Tensor alphas,\n    torch::Tensor transmittance,\n    torch::Tensor transmittance_grad)\n{\n    DEVICE_GUARD(packed_info);\n    CHECK_INPUT(packed_info);\n    CHECK_INPUT(transmittance);\n    CHECK_INPUT(transmittance_grad);\n    TORCH_CHECK(packed_info.ndimension() == 2);\n    TORCH_CHECK(transmittance.ndimension() == 3 & transmittance.size(2) == 1);\n    TORCH_CHECK(transmittance_grad.ndimension() == 3 & transmittance_grad.size(2) == 1);\n\n    const uint32_t n_samples = alphas.size(0);\n    const uint32_t n_patches = packed_info.size(0);\n    const uint32_t patch_size = alphas.size(1);\n\n    // compute the required number of thread.y from patch size\n    // take the log2 of patch size and round up to the next power of 2\n    const uint32_t thread_for_a_patch = pow(2, ceil(log2(patch_size)));\n    const uint32_t thread_for_n_samples = 256 / thread_for_a_patch;\n\n    const dim3 threads(thread_for_n_samples, thread_for_a_patch);\n    const dim3 blocks((n_patches+threads.x-1)/threads.x, (patch_size+threads.y-1)/threads.y);\n\n\n    // outputs\n    torch::Tensor alphas_grad = torch::empty_like(alphas);\n\n    // parallel across rays\n    transmittance_from_alpha_patch_based_backward_kernel<<<\n        blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(\n        n_patches,\n        patch_size,\n        // inputs\n        packed_info.data_ptr<int>(),\n        alphas.data_ptr<float>(),\n        transmittance.data_ptr<float>(),\n        transmittance_grad.data_ptr<float>(),\n        // outputs\n        alphas_grad.data_ptr<float>());\n    return alphas_grad;\n}"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/render_transmittance_cub.cu",
    "content": "/*\n * Copyright (c) 2022 Ruilong Li, UC Berkeley.\n */\n// CUB is supported in CUDA >= 11.0\n// ExclusiveScanByKey is supported in CUB >= 1.15.0 (CUDA >= 11.6)\n// See: https://github.com/NVIDIA/cub/tree/main#releases\n#include \"include/helpers_cuda.h\"\n#if CUB_SUPPORTS_SCAN_BY_KEY()\n#include <cub/cub.cuh>\n#endif\n\nstruct Product\n{\n    template <typename T>\n    __host__ __device__ __forceinline__ T operator()(const T &a, const T &b) const { return a * b; }\n};\n\n#if CUB_SUPPORTS_SCAN_BY_KEY()\ntemplate <typename KeysInputIteratorT, typename ValuesInputIteratorT, typename ValuesOutputIteratorT>\ninline void exclusive_sum_by_key(\n    KeysInputIteratorT keys, ValuesInputIteratorT input, ValuesOutputIteratorT output, int64_t num_items)\n{\n    TORCH_CHECK(num_items <= std::numeric_limits<int64_t>::max(),\n                \"cub ExclusiveSumByKey does not support more than LONG_MAX elements\");\n    CUB_WRAPPER(cub::DeviceScan::ExclusiveSumByKey, keys, input, output,\n                num_items, cub::Equality(), at::cuda::getCurrentCUDAStream());\n}\n\ntemplate <typename KeysInputIteratorT, typename ValuesInputIteratorT, typename ValuesOutputIteratorT>\ninline void exclusive_prod_by_key(\n    KeysInputIteratorT keys, ValuesInputIteratorT input, ValuesOutputIteratorT output, int64_t num_items)\n{\n    TORCH_CHECK(num_items <= std::numeric_limits<int64_t>::max(),\n                \"cub ExclusiveScanByKey does not support more than LONG_MAX elements\");\n    CUB_WRAPPER(cub::DeviceScan::ExclusiveScanByKey, keys, input, output, Product(), 1.0f,\n                num_items, cub::Equality(), at::cuda::getCurrentCUDAStream());\n}\n#endif\n\ntorch::Tensor transmittance_from_sigma_forward_cub(\n    torch::Tensor ray_indices,\n    torch::Tensor starts,\n    torch::Tensor ends,\n    torch::Tensor sigmas)\n{\n    DEVICE_GUARD(ray_indices);\n    CHECK_INPUT(ray_indices);\n    CHECK_INPUT(starts);\n    CHECK_INPUT(ends);\n    CHECK_INPUT(sigmas);\n    TORCH_CHECK(ray_indices.ndimension() == 1);\n    TORCH_CHECK(starts.ndimension() == 2 & starts.size(1) == 1);\n    TORCH_CHECK(ends.ndimension() == 2 & ends.size(1) == 1);\n    TORCH_CHECK(sigmas.ndimension() == 2 & sigmas.size(1) == 1);\n\n    const uint32_t n_samples = sigmas.size(0);\n\n    // parallel across samples\n    torch::Tensor sigmas_dt = sigmas * (ends - starts);\n    torch::Tensor sigmas_dt_cumsum = torch::empty_like(sigmas);\n#if CUB_SUPPORTS_SCAN_BY_KEY()\n    exclusive_sum_by_key(\n        ray_indices.data_ptr<int64_t>(),\n        sigmas_dt.data_ptr<float>(),\n        sigmas_dt_cumsum.data_ptr<float>(),\n        n_samples);\n#else\n    std::runtime_error(\"CUB functions are only supported in CUDA >= 11.6.\");\n#endif\n    torch::Tensor transmittance = (-sigmas_dt_cumsum).exp();\n    return transmittance;\n}\n\ntorch::Tensor transmittance_from_sigma_backward_cub(\n    torch::Tensor ray_indices,\n    torch::Tensor starts,\n    torch::Tensor ends,\n    torch::Tensor transmittance,\n    torch::Tensor transmittance_grad)\n{\n    DEVICE_GUARD(ray_indices);\n    CHECK_INPUT(ray_indices);\n    CHECK_INPUT(starts);\n    CHECK_INPUT(ends);\n    CHECK_INPUT(transmittance);\n    CHECK_INPUT(transmittance_grad);\n    TORCH_CHECK(ray_indices.ndimension() == 1);\n    TORCH_CHECK(starts.ndimension() == 2 & starts.size(1) == 1);\n    TORCH_CHECK(ends.ndimension() == 2 & ends.size(1) == 1);\n    TORCH_CHECK(transmittance.ndimension() == 2 & transmittance.size(1) == 1);\n    TORCH_CHECK(transmittance_grad.ndimension() == 2 & transmittance_grad.size(1) == 1);\n\n    const uint32_t n_samples = transmittance.size(0);\n\n    // parallel across samples\n    torch::Tensor sigmas_dt_cumsum_grad = -transmittance_grad * transmittance;\n    torch::Tensor sigmas_dt_grad = torch::empty_like(transmittance_grad);\n#if CUB_SUPPORTS_SCAN_BY_KEY()\n    exclusive_sum_by_key(\n        thrust::make_reverse_iterator(ray_indices.data_ptr<int64_t>() + n_samples),\n        thrust::make_reverse_iterator(sigmas_dt_cumsum_grad.data_ptr<float>() + n_samples),\n        thrust::make_reverse_iterator(sigmas_dt_grad.data_ptr<float>() + n_samples),\n        n_samples);\n#else\n    std::runtime_error(\"CUB functions are only supported in CUDA >= 11.6.\");\n#endif\n    torch::Tensor sigmas_grad = sigmas_dt_grad * (ends - starts);\n    return sigmas_grad;\n}\n\ntorch::Tensor transmittance_from_alpha_forward_cub(\n    torch::Tensor ray_indices, torch::Tensor alphas)\n{\n    DEVICE_GUARD(ray_indices);\n    CHECK_INPUT(ray_indices);\n    CHECK_INPUT(alphas);\n    TORCH_CHECK(alphas.ndimension() == 2 & alphas.size(1) == 1);\n    TORCH_CHECK(ray_indices.ndimension() == 1);\n\n    const uint32_t n_samples = alphas.size(0);\n\n    // parallel across samples\n    torch::Tensor transmittance = torch::empty_like(alphas);\n#if CUB_SUPPORTS_SCAN_BY_KEY()\n    exclusive_prod_by_key(\n        ray_indices.data_ptr<int64_t>(),\n        (1.0f - alphas).data_ptr<float>(),\n        transmittance.data_ptr<float>(),\n        n_samples);\n#else\n    std::runtime_error(\"CUB functions are only supported in CUDA >= 11.6.\");\n#endif\n    return transmittance;\n}\n\ntorch::Tensor transmittance_from_alpha_backward_cub(\n    torch::Tensor ray_indices,\n    torch::Tensor alphas,\n    torch::Tensor transmittance,\n    torch::Tensor transmittance_grad)\n{\n    DEVICE_GUARD(ray_indices);\n    CHECK_INPUT(ray_indices);\n    CHECK_INPUT(transmittance);\n    CHECK_INPUT(transmittance_grad);\n    TORCH_CHECK(ray_indices.ndimension() == 1);\n    TORCH_CHECK(transmittance.ndimension() == 2 & transmittance.size(1) == 1);\n    TORCH_CHECK(transmittance_grad.ndimension() == 2 & transmittance_grad.size(1) == 1);\n\n    const uint32_t n_samples = transmittance.size(0);\n\n    // parallel across samples\n    torch::Tensor sigmas_dt_cumsum_grad = -transmittance_grad * transmittance;\n    torch::Tensor sigmas_dt_grad = torch::empty_like(transmittance_grad);\n#if CUB_SUPPORTS_SCAN_BY_KEY()\n    exclusive_sum_by_key(\n        thrust::make_reverse_iterator(ray_indices.data_ptr<int64_t>() + n_samples),\n        thrust::make_reverse_iterator(sigmas_dt_cumsum_grad.data_ptr<float>() + n_samples),\n        thrust::make_reverse_iterator(sigmas_dt_grad.data_ptr<float>() + n_samples),\n        n_samples);\n#else\n    std::runtime_error(\"CUB functions are only supported in CUDA >= 11.6.\");\n#endif\n    torch::Tensor alphas_grad = sigmas_dt_grad / (1.0f - alphas).clamp_min(1e-10f);\n    return alphas_grad;\n}\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/cuda/csrc/render_weight.cu",
    "content": "/*\n * Copyright (c) 2022 Ruilong Li, UC Berkeley.\n */\n\n#include \"include/helpers_cuda.h\"\n\n__global__ void weight_from_sigma_forward_kernel(\n    const uint32_t n_rays,\n    const int *packed_info,\n    const float *starts,\n    const float *ends,\n    const float *sigmas,\n    // outputs\n    float *weights)\n{\n    CUDA_GET_THREAD_ID(i, n_rays);\n\n    // locate\n    const int base = packed_info[i * 2 + 0]; \n    const int steps = packed_info[i * 2 + 1];\n    if (steps == 0)\n        return;\n\n    starts += base;\n    ends += base;\n    sigmas += base;\n    weights += base;\n\n    // accumulation\n    float T = 1.f;\n    for (int j = 0; j < steps; ++j)\n    {\n        const float delta = ends[j] - starts[j];\n        const float alpha = 1.f - __expf(-sigmas[j] * delta);\n        weights[j] = alpha * T;\n        T *= (1.f - alpha);\n    }\n    return;\n}\n\n__global__ void weight_from_sigma_backward_kernel(\n    const uint32_t n_rays,\n    const int *packed_info, \n    const float *starts, \n    const float *ends,   \n    const float *sigmas, \n    const float *weights, \n    const float *grad_weights, \n    // outputs\n    float *grad_sigmas)\n{\n    CUDA_GET_THREAD_ID(i, n_rays);\n\n    // locate\n    const int base = packed_info[i * 2 + 0]; \n    const int steps = packed_info[i * 2 + 1]; \n    if (steps == 0)\n        return;\n\n    starts += base;\n    ends += base;\n    sigmas += base;\n    weights += base;\n    grad_weights += base;\n    grad_sigmas += base;\n\n    float accum = 0;\n    for (int j = 0; j < steps; ++j)\n    {\n        accum += grad_weights[j] * weights[j];\n    }\n\n    // accumulation\n    float T = 1.f;\n    for (int j = 0; j < steps; ++j)\n    {\n        const float delta = ends[j] - starts[j];\n        const float alpha = 1.f - __expf(-sigmas[j] * delta);\n        grad_sigmas[j] = (grad_weights[j] * T - accum) * delta;\n        accum -= grad_weights[j] * weights[j];\n        T *= (1.f - alpha);\n    }\n    return;\n}\n\n// template <typename scalar_t>\n__global__ void weight_from_alpha_patch_based_forward_kernel(\n    const uint32_t n_patches,\n    const uint32_t patch_size,\n    const int *packed_info, // (n_patches, 2)\n    const float *alphas,  // (n_samples, patch_size, 1)\n    // outputs\n    float *weights// ()\n    ){\n    CUDA_GET_THREAD_ID_2D(i, k, n_patches, patch_size);  // i is the patch id, k is the ray id within the patch\n\n    // locate\n    const int base = packed_info[i * 2 + 0];  // get the base of the patch\n    const int steps = packed_info[i * 2 + 1]; // get the steps of the patch\n    if (steps == 0)\n        return;\n\n    alphas += base * patch_size;  // move the pointer to the base\n    weights += base * patch_size;  // move the pointer to the base\n//     transmittance += base * patch_size;  // move the pointer to the base\n\n    // accumulation\n    float T = 1.f;\n    for (int j = 0; j < steps; ++j)\n    {\n        const uint32_t ray_id = j * patch_size + k;\n        const float alpha = alphas[ray_id];  // get the alpha value\n//         transmittance[ray_id] = T;\n        weights[ray_id] = alpha * T;  // calculate the weight\n        T *= (1.f - alpha);  // update the T value\n    }\n    return;\n}\n\n__global__ void weight_and_transmittance_from_alpha_patch_based_forward_kernel(\n    const uint32_t n_patches,\n    const uint32_t patch_size,\n    const int *packed_info, // (n_patches, 2)\n    const float *alphas,  // (n_samples, patch_size, 1)\n    // outputs\n    float *weights,\n    float *transmittance// ()\n    ){\n    CUDA_GET_THREAD_ID_2D(i, k, n_patches, patch_size);  // i is the patch id, k is the ray id within the patch\n\n    // locate\n    const int base = packed_info[i * 2 + 0];  // get the base of the patch\n    const int steps = packed_info[i * 2 + 1]; // get the steps of the patch\n    if (steps == 0)\n        return;\n\n    alphas += base * patch_size;  // move the pointer to the base\n    weights += base * patch_size;  // move the pointer to the base\n    transmittance += base * patch_size;  // move the pointer to the base\n\n    // accumulation\n    float T = 1.f;\n    for (int j = 0; j < steps; ++j)\n    {\n        const uint32_t ray_id = j * patch_size + k;\n        const float alpha = alphas[ray_id];  // get the alpha value\n        transmittance[ray_id] = T;\n        weights[ray_id] = alpha * T;  // calculate the weight\n        T *= (1.f - alpha);  // update the T value\n    }\n    return;\n}\n\n__global__ void weight_from_alpha_forward_kernel(\n    const uint32_t n_rays,\n    const int *packed_info,\n    const float *alphas,   \n    // outputs\n    float *weights)\n{\n    CUDA_GET_THREAD_ID(i, n_rays);  // i is the thread id\n\n    // locate\n    const int base = packed_info[i * 2 + 0];  // get the base\n    const int steps = packed_info[i * 2 + 1]; // get the steps\n    if (steps == 0)\n        return;\n\n    alphas += base;  // move the pointer to the base\n    weights += base;  // move the pointer to the base\n\n    // accumulation\n    float T = 1.f;\n    for (int j = 0; j < steps; ++j)\n    {\n        const float alpha = alphas[j];  // get the alpha value\n        weights[j] = alpha * T;  // calculate the weight\n        T *= (1.f - alpha);  // update the T value\n    }\n    return;\n}\n\n__global__ void weight_from_alpha_backward_kernel(\n    const uint32_t n_rays,\n    const int *packed_info,  \n    const float *alphas,     \n    const float *weights,    \n    const float *grad_weights,\n    // outputs\n    float *grad_alphas)\n{\n    CUDA_GET_THREAD_ID(i, n_rays);\n\n    // locate\n    const int base = packed_info[i * 2 + 0]; \n    const int steps = packed_info[i * 2 + 1];\n    if (steps == 0)\n        return;\n\n    alphas += base;\n    weights += base;\n    grad_weights += base;\n    grad_alphas += base;\n\n    float accum = 0;\n    for (int j = 0; j < steps; ++j)\n    {\n        accum += grad_weights[j] * weights[j];\n    }\n\n    // accumulation\n    float T = 1.f;\n    for (int j = 0; j < steps; ++j)\n    {\n        const float alpha = alphas[j];\n        grad_alphas[j] = (grad_weights[j] * T - accum) / fmaxf(1.f - alpha, 1e-10f);\n        accum -= grad_weights[j] * weights[j];\n        T *= (1.f - alpha);\n    }\n    return;\n}\n\n\n__global__ void weight_from_alpha_importance_sampling_forward_kernel(\n    const uint32_t n_rays,\n    const int *packed_info,\n    const float *alphas,\n    const float *importance,\n    // outputs\n    float *weights)\n{\n    CUDA_GET_THREAD_ID(i, n_rays);  // i is the thread id\n\n    // locate\n    const int base = packed_info[i * 2 + 0];  // get the base\n    const int steps = packed_info[i * 2 + 1]; // get the steps\n    if (steps == 0)\n        return;\n\n    alphas += base;  // move the pointer to the base\n    weights += base;  // move the pointer to the base\n    importance += base;  // move the pointer to the base\n\n    // accumulation\n    float T = 1.f;\n    for (int j = 0; j < steps; ++j)\n    {\n        const float alpha = alphas[j];  // get the alpha value\n        weights[j] = alpha * T / importance[j];  // calculate the weight\n        T *= (1.f - alpha);  // update the T value\n    }\n    return;\n}\n\n__global__ void weight_from_alpha_importance_sampling_backward_kernel(\n    const uint32_t n_rays,\n    const int *packed_info,\n    const float *alphas,\n    const float *weights,\n    const float *grad_weights,\n    const float *importance,\n    // outputs\n    float *grad_alphas)\n{\n    CUDA_GET_THREAD_ID(i, n_rays);\n\n    // locate\n    const int base = packed_info[i * 2 + 0];\n    const int steps = packed_info[i * 2 + 1];\n    if (steps == 0)\n        return;\n\n    alphas += base;\n    weights += base;\n    grad_weights += base;\n    grad_alphas += base;\n    importance += base;\n\n    float accum = 0;\n    for (int j = 0; j < steps; ++j)\n    {\n        accum += grad_weights[j] * weights[j];\n    }\n\n    // accumulation\n    float T = 1.f;\n    for (int j = 0; j < steps; ++j)\n    {\n        const float alpha = alphas[j];\n        grad_alphas[j] = (grad_weights[j] * T - importance[j] * accum) / (importance[j] * fmaxf(1.f - alpha, 1e-10f));\n        accum -= grad_weights[j] * weights[j];\n        T *= (1.f - alpha);\n    }\n    return;\n}\n\n\n__global__ void weight_from_alpha_patch_based_backward_kernel(\n    const uint32_t n_patches,\n    const uint32_t patch_size,\n    const int *packed_info,\n    const float *alphas,\n    const float *weights,\n    const float *grad_weights,\n    // outputs\n    float *grad_alphas)\n{\n    CUDA_GET_THREAD_ID_2D(i, k, n_patches, patch_size);  // i is the patch id, k is the ray id within the patch\n\n\n    // locate\n    const int base = packed_info[i * 2 + 0];\n    const int steps = packed_info[i * 2 + 1];\n    if (steps == 0)\n        return;\n\n    alphas += base * patch_size;  // move the pointer to the base\n    weights += base * patch_size;  // move the pointer to the base\n    grad_weights += base * patch_size;  // move the pointer to the base\n    grad_alphas += base * patch_size;  // move the pointer to the base\n\n    float accum = 0;\n    for (int j = 0; j < steps; ++j)\n    {\n        const uint32_t sample_idx = j * patch_size + k;\n        accum += grad_weights[sample_idx] * weights[sample_idx];\n    }\n\n    // accumulation\n    float T = 1.f;\n    for (int j = 0; j < steps; ++j)\n    {\n        const uint32_t sample_idx = j * patch_size + k;\n        const float alpha = alphas[sample_idx];\n        grad_alphas[sample_idx] = (grad_weights[sample_idx] * T - accum) / fmaxf(1.f - alpha, 1e-10f);\n        accum -= grad_weights[sample_idx] * weights[sample_idx];\n        T *= (1.f - alpha);\n    }\n    return;\n}\n\n__global__ void weight_and_transmittance_from_alpha_patch_based_backward_kernel(\n    const uint32_t n_patches,\n    const uint32_t patch_size,\n    const int *packed_info,\n    const float *alphas,\n    const float *weights,\n    const float *grad_weights,\n    // outputs\n    float *grad_alphas)\n{\n    CUDA_GET_THREAD_ID_2D(i, k, n_patches, patch_size);  // i is the patch id, k is the ray id within the patch\n\n\n    // locate\n    const int base = packed_info[i * 2 + 0];\n    const int steps = packed_info[i * 2 + 1];\n    if (steps == 0)\n        return;\n\n    alphas += base * patch_size;  // move the pointer to the base\n    weights += base * patch_size;  // move the pointer to the base\n    grad_weights += base * patch_size;  // move the pointer to the base\n    grad_alphas += base * patch_size;  // move the pointer to the base\n\n    float accum = 0;\n    for (int j = 0; j < steps; ++j)\n    {\n        const uint32_t sample_idx = j * patch_size + k;\n        accum += grad_weights[sample_idx] * weights[sample_idx];\n    }\n\n    // accumulation\n    float T = 1.f;\n    for (int j = 0; j < steps; ++j)\n    {\n        const uint32_t sample_idx = j * patch_size + k;\n        const float alpha = alphas[sample_idx];\n        grad_alphas[sample_idx] = (grad_weights[sample_idx] * T - accum) / fmaxf(1.f - alpha, 1e-10f);\n        accum -= grad_weights[sample_idx] * weights[sample_idx];\n        T *= (1.f - alpha);\n    }\n    return;\n}\n\ntorch::Tensor weight_from_sigma_forward_naive(\n    torch::Tensor packed_info,\n    torch::Tensor starts,\n    torch::Tensor ends,\n    torch::Tensor sigmas)\n{\n    DEVICE_GUARD(packed_info);\n    CHECK_INPUT(packed_info);\n    CHECK_INPUT(starts);\n    CHECK_INPUT(ends);\n    CHECK_INPUT(sigmas);\n\n    TORCH_CHECK(packed_info.ndimension() == 2);\n    TORCH_CHECK(starts.ndimension() == 2 & starts.size(1) == 1);\n    TORCH_CHECK(ends.ndimension() == 2 & ends.size(1) == 1);\n    TORCH_CHECK(sigmas.ndimension() == 2 & sigmas.size(1) == 1);\n\n    const uint32_t n_samples = sigmas.size(0);\n    const uint32_t n_rays = packed_info.size(0);\n\n    const int threads = 256;\n    const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);\n\n    // outputs\n    torch::Tensor weights = torch::empty_like(sigmas);\n\n    weight_from_sigma_forward_kernel<<<\n        blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(\n        n_rays,\n        // inputs\n        packed_info.data_ptr<int>(),\n        starts.data_ptr<float>(),\n        ends.data_ptr<float>(),\n        sigmas.data_ptr<float>(),\n        // outputs\n        weights.data_ptr<float>());\n    return weights;\n}\n\ntorch::Tensor weight_from_sigma_backward_naive(\n    torch::Tensor weights,\n    torch::Tensor grad_weights,\n    torch::Tensor packed_info,\n    torch::Tensor starts,\n    torch::Tensor ends,\n    torch::Tensor sigmas)\n{\n    DEVICE_GUARD(packed_info);\n    CHECK_INPUT(weights);\n    CHECK_INPUT(grad_weights);\n    CHECK_INPUT(packed_info);\n    CHECK_INPUT(starts);\n    CHECK_INPUT(ends);\n    CHECK_INPUT(sigmas);\n\n    TORCH_CHECK(packed_info.ndimension() == 2);\n    TORCH_CHECK(starts.ndimension() == 2 & starts.size(1) == 1);\n    TORCH_CHECK(ends.ndimension() == 2 & ends.size(1) == 1);\n    TORCH_CHECK(sigmas.ndimension() == 2 & sigmas.size(1) == 1);\n    TORCH_CHECK(weights.ndimension() == 2 & weights.size(1) == 1);\n    TORCH_CHECK(grad_weights.ndimension() == 2 & grad_weights.size(1) == 1);\n\n    const uint32_t n_samples = sigmas.size(0);\n    const uint32_t n_rays = packed_info.size(0);\n\n    const int threads = 256;\n    const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);\n\n    // outputs\n    torch::Tensor grad_sigmas = torch::empty_like(sigmas);\n\n    weight_from_sigma_backward_kernel<<<\n        blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(\n        n_rays,\n        // inputs\n        packed_info.data_ptr<int>(),\n        starts.data_ptr<float>(),\n        ends.data_ptr<float>(),\n        sigmas.data_ptr<float>(),\n        weights.data_ptr<float>(),\n        grad_weights.data_ptr<float>(),\n        // outputs\n        grad_sigmas.data_ptr<float>());\n\n    return grad_sigmas;\n}\n\ntorch::Tensor weight_from_alpha_forward_naive(\n    torch::Tensor packed_info, torch::Tensor alphas)\n{\n    DEVICE_GUARD(packed_info);\n    CHECK_INPUT(packed_info);\n    CHECK_INPUT(alphas);\n    TORCH_CHECK(packed_info.ndimension() == 2);\n    TORCH_CHECK(alphas.ndimension() == 2 & alphas.size(1) == 1);\n\n    const uint32_t n_samples = alphas.size(0);\n    const uint32_t n_rays = packed_info.size(0);\n\n    const int threads = 256;\n    const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);\n\n    // outputs\n    torch::Tensor weights = torch::empty_like(alphas);\n\n    weight_from_alpha_forward_kernel<<<\n        blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(\n        n_rays,\n        // inputs\n        packed_info.data_ptr<int>(),\n        alphas.data_ptr<float>(),\n        // outputs\n        weights.data_ptr<float>());\n    return weights;\n}\n\ntorch::Tensor weight_from_alpha_patch_based_forward_naive(\n    torch::Tensor packed_info, // (n_patches, 2)\n    torch::Tensor alphas // (n_samples, patches_size, 1)\n    )\n{\n    DEVICE_GUARD(packed_info);\n    CHECK_INPUT(packed_info);\n    CHECK_INPUT(alphas);\n    TORCH_CHECK(packed_info.ndimension() == 2);\n    TORCH_CHECK(alphas.ndimension() == 3 & alphas.size(2) == 1);\n\n    const uint32_t n_samples = alphas.size(0);\n    const uint32_t n_patches = packed_info.size(0);\n    const uint32_t patch_size  = alphas.size(1);\n\n    // compute the required number of thread.y from patch size\n    // take the log2 of patch size and round up to the next power of 2\n    const uint32_t thread_for_a_patch = pow(2, ceil(log2(patch_size)));\n    const uint32_t thread_for_n_samples = 256 / thread_for_a_patch;\n    // convert to uint\n//     thread_for_a_patch = static_cast<uint32_t>(thread_for_a_patch);\n//     thread_for_n_samples = static_cast<uint32_t>(thread_for_n_samples);\n\n    const dim3 threads(thread_for_n_samples, thread_for_a_patch);\n//     const dim3 blocks = CUDA_N_BLOCKS_NEEDED(n_samples, threads);\n    const dim3 blocks((n_patches+threads.x-1)/threads.x, (patch_size+threads.y-1)/threads.y);\n\n    // outputs\n    torch::Tensor weights = torch::empty_like(alphas);\n    torch::Tensor transmittance = torch::empty_like(alphas);\n\n    weight_from_alpha_patch_based_forward_kernel<<<\n        blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(\n        n_patches,\n        patch_size,\n        // inputs\n        packed_info.data_ptr<int>(),\n        alphas.data_ptr<float>(),\n        // outputs\n        weights.data_ptr<float>());\n    return weights;\n}\n\ntorch::Tensor weight_from_alpha_backward_naive(\n    torch::Tensor weights,\n    torch::Tensor grad_weights,\n    torch::Tensor packed_info,\n    torch::Tensor alphas)\n{\n    DEVICE_GUARD(packed_info);\n    CHECK_INPUT(packed_info);\n    CHECK_INPUT(alphas);\n    CHECK_INPUT(weights);\n    CHECK_INPUT(grad_weights);\n    TORCH_CHECK(packed_info.ndimension() == 2);\n    TORCH_CHECK(alphas.ndimension() == 2 & alphas.size(1) == 1);\n    TORCH_CHECK(weights.ndimension() == 2 & weights.size(1) == 1);\n    TORCH_CHECK(grad_weights.ndimension() == 2 & grad_weights.size(1) == 1);\n\n    const uint32_t n_samples = alphas.size(0);\n    const uint32_t n_rays = packed_info.size(0);\n\n    const int threads = 256;\n    const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);\n\n    // outputs\n    torch::Tensor grad_alphas = torch::empty_like(alphas);\n\n    weight_from_alpha_backward_kernel<<<\n        blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(\n        n_rays,\n        // inputs\n        packed_info.data_ptr<int>(),\n        alphas.data_ptr<float>(),\n        weights.data_ptr<float>(),\n        grad_weights.data_ptr<float>(),\n        // outputs\n        grad_alphas.data_ptr<float>());\n    return grad_alphas;\n}\n\ntorch::Tensor weight_from_alpha_patch_based_backward_naive(\n    torch::Tensor weights,\n    torch::Tensor grad_weights,  // (n_samples, patches_size, 1)\n    torch::Tensor packed_info,\n    torch::Tensor alphas)  // (n_samples, patches_size, 1)\n{\n    DEVICE_GUARD(packed_info);\n    CHECK_INPUT(packed_info);\n    CHECK_INPUT(alphas);\n    CHECK_INPUT(weights);\n    CHECK_INPUT(grad_weights);\n    TORCH_CHECK(packed_info.ndimension() == 2);\n    TORCH_CHECK(alphas.ndimension() == 3 & alphas.size(2) == 1);\n    TORCH_CHECK(weights.ndimension() == 3 & weights.size(2) == 1);\n    TORCH_CHECK(grad_weights.ndimension() == 3 & grad_weights.size(2) == 1);\n\n    const uint32_t n_samples = alphas.size(0);\n    const uint32_t n_patches = packed_info.size(0);\n    const uint32_t patch_size = alphas.size(1);\n\n    // compute the required number of thread.y from patch size\n    // take the log2 of patch size and round up to the next power of 2\n    const uint32_t thread_for_a_patch = pow(2, ceil(log2(patch_size)));\n    const uint32_t thread_for_n_samples = 256 / thread_for_a_patch;\n\n    const dim3 threads(thread_for_n_samples, thread_for_a_patch);\n    const dim3 blocks((n_patches+threads.x-1)/threads.x, (patch_size+threads.y-1)/threads.y);\n\n    // outputs\n    torch::Tensor grad_alphas = torch::empty_like(alphas);\n\n    weight_from_alpha_patch_based_backward_kernel<<<\n        blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(\n        n_patches,\n        patch_size,\n        // inputs\n        packed_info.data_ptr<int>(),\n        alphas.data_ptr<float>(),\n        weights.data_ptr<float>(),\n        grad_weights.data_ptr<float>(),\n        // outputs\n        grad_alphas.data_ptr<float>());\n    return grad_alphas;\n}\n\n\nstd::vector<torch::Tensor> weight_and_transmittance_from_alpha_patch_based_forward_naive(\n    torch::Tensor packed_info, // (n_patches, 2)\n    torch::Tensor alphas // (n_samples, patches_size, 1)\n    )\n{\n    DEVICE_GUARD(packed_info);\n    CHECK_INPUT(packed_info);\n    CHECK_INPUT(alphas);\n    TORCH_CHECK(packed_info.ndimension() == 2);\n    TORCH_CHECK(alphas.ndimension() == 3 & alphas.size(2) == 1);\n\n    const uint32_t n_samples = alphas.size(0);\n    const uint32_t n_patches = packed_info.size(0);\n    const uint32_t patch_size  = alphas.size(1);\n\n    // compute the required number of thread.y from patch size\n    // take the log2 of patch size and round up to the next power of 2\n    const uint32_t thread_for_a_patch = pow(2, ceil(log2(patch_size)));\n    const uint32_t thread_for_n_samples = 256 / thread_for_a_patch;\n\n    const dim3 threads(thread_for_n_samples, thread_for_a_patch);\n    const dim3 blocks((n_patches+threads.x-1)/threads.x, (patch_size+threads.y-1)/threads.y);\n\n    // outputs\n    torch::Tensor weights = torch::empty_like(alphas);\n    torch::Tensor transmittance = torch::empty_like(alphas);\n\n    weight_and_transmittance_from_alpha_patch_based_forward_kernel<<<\n        blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(\n        n_patches,\n        patch_size,\n        // inputs\n        packed_info.data_ptr<int>(),\n        alphas.data_ptr<float>(),\n        // outputs\n        weights.data_ptr<float>(),\n        transmittance.data_ptr<float>());\n    return {weights, transmittance};\n}\n\ntorch::Tensor weight_and_transmittance_from_alpha_patch_based_backward_naive(\n    torch::Tensor weights,\n    torch::Tensor grad_weights,  // (n_samples, patches_size, 1)\n    torch::Tensor packed_info,\n    torch::Tensor alphas)  // (n_samples, patches_size, 1)\n{\n    DEVICE_GUARD(packed_info);\n    CHECK_INPUT(packed_info);\n    CHECK_INPUT(alphas);\n    CHECK_INPUT(weights);\n    CHECK_INPUT(grad_weights);\n    TORCH_CHECK(packed_info.ndimension() == 2);\n    TORCH_CHECK(alphas.ndimension() == 3 & alphas.size(2) == 1);\n    TORCH_CHECK(weights.ndimension() == 3 & weights.size(2) == 1);\n    TORCH_CHECK(grad_weights.ndimension() == 3 & grad_weights.size(2) == 1);\n\n    const uint32_t n_samples = alphas.size(0);\n    const uint32_t n_patches = packed_info.size(0);\n    const uint32_t patch_size = alphas.size(1);\n\n    // compute the required number of thread.y from patch size\n    // take the log2 of patch size and round up to the next power of 2\n    const uint32_t thread_for_a_patch = pow(2, ceil(log2(patch_size)));\n    const uint32_t thread_for_n_samples = 256 / thread_for_a_patch;\n\n    const dim3 threads(thread_for_n_samples, thread_for_a_patch);\n    const dim3 blocks((n_patches+threads.x-1)/threads.x, (patch_size+threads.y-1)/threads.y);\n\n    // outputs\n    torch::Tensor grad_alphas = torch::empty_like(alphas);\n\n    weight_and_transmittance_from_alpha_patch_based_backward_kernel<<<\n        blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(\n        n_patches,\n        patch_size,\n        // inputs\n        packed_info.data_ptr<int>(),\n        alphas.data_ptr<float>(),\n        weights.data_ptr<float>(),\n        grad_weights.data_ptr<float>(),\n        // outputs\n        grad_alphas.data_ptr<float>());\n    return grad_alphas;\n}\n\n// torch::Tensor weight_from_alpha_importance_sampling_forward_naive(\n//     torch::Tensor packed_info, torch::Tensor alphas, torch::Tensor importance_pdfs)\n// {\n//     DEVICE_GUARD(packed_info);\n//     CHECK_INPUT(packed_info);\n//     CHECK_INPUT(alphas);\n//     CHECK_INPUT(importance_pdfs);\n//     TORCH_CHECK(packed_info.ndimension() == 2);\n//     TORCH_CHECK(alphas.ndimension() == 2 & alphas.size(1) == 1);\n//     TORCH_CHECK(importance_pdfs.ndimension() == 2 & importance_pdfs.size(1) == 1);\n//\n//     const uint32_t n_samples = alphas.size(0);\n//     const uint32_t n_rays = packed_info.size(0);\n//\n//     const int threads = 256;\n//     const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);\n//\n//     // outputs\n//     torch::Tensor weights = torch::empty_like(alphas);\n//\n//     weight_from_alpha_forward_kernel<<<\n//         blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(\n//         n_rays,\n//         // inputs\n//         packed_info.data_ptr<int>(),\n//         alphas.data_ptr<float>(),\n//         importance_pdfs.data_ptr<float>(),\n//         // outputs\n//         weights.data_ptr<float>());\n//     return weights;\n// }\n//\n// torch::Tensor weight_from_alpha_importance_sampling_backward_naive(\n//     torch::Tensor weights,\n//     torch::Tensor grad_weights,\n//     torch::Tensor packed_info,\n//     torch::Tensor alphas,\n//     torch::Tensor importance_pdfs)\n// {\n//     DEVICE_GUARD(packed_info);\n//     CHECK_INPUT(packed_info);\n//     CHECK_INPUT(alphas);\n//     CHECK_INPUT(weights);\n//     CHECK_INPUT(grad_weights);\n//     CHECK_INPUT(importance_pdfs);\n//     TORCH_CHECK(packed_info.ndimension() == 2);\n//     TORCH_CHECK(alphas.ndimension() == 2 & alphas.size(1) == 1);\n//     TORCH_CHECK(weights.ndimension() == 2 & weights.size(1) == 1);\n//     TORCH_CHECK(importance_pdfs.ndimension() == 2 & importance_pdfs.size(1) == 1);\n//     TORCH_CHECK(grad_weights.ndimension() == 2 & grad_weights.size(1) == 1);\n//\n//\n//     const uint32_t n_samples = alphas.size(0);\n//     const uint32_t n_rays = packed_info.size(0);\n//\n//     const int threads = 256;\n//     const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);\n//\n//     // outputs\n//     torch::Tensor grad_alphas = torch::empty_like(alphas);\n//\n//     weight_from_alpha_backward_kernel<<<\n//         blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(\n//         n_rays,\n//         // inputs\n//         packed_info.data_ptr<int>(),\n//         alphas.data_ptr<float>(),\n//         weights.data_ptr<float>(),\n//         grad_weights.data_ptr<float>(),\n//         importance_pdfs.data_ptr<float>(),\n//         // outputs\n//         grad_alphas.data_ptr<float>());\n//     return grad_alphas;\n// }"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/grid.py",
    "content": "\"\"\"\nCopyright (c) 2022 Ruilong Li @ UC Berkeley\n\"\"\"\n\nfrom typing import Callable, List, Union\n\nimport torch\nimport torch.nn as nn\n\nimport nerfacc.cuda as _C\n\nfrom .contraction import ContractionType, contract_inv\n\n# TODO: check torch.scatter_reduce_\n# from torch_scatter import scatter_max\n\n\n@torch.no_grad()\ndef query_grid(\n    samples: torch.Tensor,\n    grid_roi: torch.Tensor,\n    grid_values: torch.Tensor,\n    grid_type: ContractionType,\n):\n    \"\"\"Query grid values given coordinates.\n\n    Args:\n        samples: (n_samples, 3) tensor of coordinates.\n        grid_roi: (6,) region of interest of the grid. Usually it should be\n            accquired from the grid itself using `grid.roi_aabb`.\n        grid_values: A 3D tensor of grid values in the shape of (resx, resy, resz).\n        grid_type: Contraction type of the grid. Usually it should be\n            accquired from the grid itself using `grid.contraction_type`.\n\n    Returns:\n        (n_samples) values for those samples queried from the grid.\n    \"\"\"\n    assert samples.dim() == 2 and samples.size(-1) == 3\n    assert grid_roi.dim() == 1 and grid_roi.size(0) == 6\n    assert grid_values.dim() == 3\n    assert isinstance(grid_type, ContractionType)\n    return _C.grid_query(\n        samples.contiguous(),\n        grid_roi.contiguous(),\n        grid_values.contiguous(),\n        grid_type.to_cpp_version(),\n    )\n\n\nclass Grid(nn.Module):\n    \"\"\"An abstract Grid class.\n\n    The grid is used as a cache of the 3D space to indicate whether each voxel\n    area is important or not for the differentiable rendering process. The\n    ray marching function (see :func:`nerfacc.ray_marching`) would use the\n    grid to skip the unimportant voxel areas.\n\n    To work with :func:`nerfacc.ray_marching`, three attributes must exist:\n\n        - :attr:`roi_aabb`: The axis-aligned bounding box of the region of interest.\n        - :attr:`binary`: A 3D binarized tensor of shape {resx, resy, resz}, \\\n            with torch.bool data type.\n        - :attr:`contraction_type`: The contraction type of the grid, indicating how \\\n            the 3D space is mapped to the grid.\n    \"\"\"\n\n    def __init__(self, *args, **kwargs):\n        super().__init__()\n        self.register_buffer(\"_dummy\", torch.empty(0), persistent=False)\n\n    @property\n    def device(self) -> torch.device:\n        return self._dummy.device\n\n    @property\n    def roi_aabb(self) -> torch.Tensor:\n        \"\"\"The axis-aligned bounding box of the region of interest.\n\n        Its is a shape (6,) tensor in the format of {minx, miny, minz, maxx, maxy, maxz}.\n        \"\"\"\n        if hasattr(self, \"_roi_aabb\"):\n            return getattr(self, \"_roi_aabb\")\n        else:\n            raise NotImplementedError(\"please set an attribute named _roi_aabb\")\n\n    @property\n    def binary(self) -> torch.Tensor:\n        \"\"\"A 3D binarized tensor with torch.bool data type.\n\n        The tensor is of shape (resx, resy, resz), in which each boolen value\n        represents whether the corresponding voxel should be kept or not.\n        \"\"\"\n        if hasattr(self, \"_binary\"):\n            return getattr(self, \"_binary\")\n        else:\n            raise NotImplementedError(\"please set an attribute named _binary\")\n\n    @property\n    def contraction_type(self) -> ContractionType:\n        \"\"\"The contraction type of the grid.\n\n        The contraction type is an indicator of how the 3D space is contracted\n        to this voxel grid. See :class:`nerfacc.ContractionType` for more details.\n        \"\"\"\n        if hasattr(self, \"_contraction_type\"):\n            return getattr(self, \"_contraction_type\")\n        else:\n            raise NotImplementedError(\n                \"please set an attribute named _contraction_type\"\n            )\n\n\nclass OccupancyGrid(Grid):\n    \"\"\"Occupancy grid: whether each voxel area is occupied or not.\n\n    Args:\n        roi_aabb: The axis-aligned bounding box of the region of interest. Useful for mapping\n            the 3D space to the grid.\n        resolution: The resolution of the grid. If an integer is given, the grid is assumed to\n            be a cube. Otherwise, a list or a tensor of shape (3,) is expected. Default: 128.\n        contraction_type: The contraction type of the grid. See :class:`nerfacc.ContractionType`\n            for more details. Default: :attr:`nerfacc.ContractionType.AABB`.\n    \"\"\"\n\n    NUM_DIM: int = 3\n\n    def __init__(\n        self,\n        roi_aabb: Union[List[int], torch.Tensor],\n        resolution: Union[int, List[int], torch.Tensor] = 128,\n        contraction_type: ContractionType = ContractionType.AABB,\n    ) -> None:\n        super().__init__()\n        if isinstance(resolution, int):\n            resolution = [resolution] * self.NUM_DIM\n        if isinstance(resolution, (list, tuple)):\n            resolution = torch.tensor(resolution, dtype=torch.int32)\n        assert isinstance(\n            resolution, torch.Tensor\n        ), f\"Invalid type: {type(resolution)}\"\n        assert resolution.shape == (\n            self.NUM_DIM,\n        ), f\"Invalid shape: {resolution.shape}\"\n\n        if isinstance(roi_aabb, (list, tuple)):\n            roi_aabb = torch.tensor(roi_aabb, dtype=torch.float32)\n        assert isinstance(\n            roi_aabb, torch.Tensor\n        ), f\"Invalid type: {type(roi_aabb)}\"\n        assert roi_aabb.shape == torch.Size(\n            [self.NUM_DIM * 2]\n        ), f\"Invalid shape: {roi_aabb.shape}\"\n\n        # total number of voxels\n        self.num_cells = int(resolution.prod().item())\n\n        # required attributes\n        self.register_buffer(\"_roi_aabb\", roi_aabb)\n        self.register_buffer(\n            \"_binary\", torch.zeros(resolution.tolist(), dtype=torch.bool)\n        )\n        self._contraction_type = contraction_type\n\n        # helper attributes\n        self.register_buffer(\"resolution\", resolution)\n        self.register_buffer(\"occs\", torch.zeros(self.num_cells))\n\n        # Grid coords & indices\n        grid_coords = _meshgrid3d(resolution).reshape(\n            self.num_cells, self.NUM_DIM\n        )\n        self.register_buffer(\"grid_coords\", grid_coords, persistent=False)\n        grid_indices = torch.arange(self.num_cells)\n        self.register_buffer(\"grid_indices\", grid_indices, persistent=False)\n\n    @torch.no_grad()\n    def _get_all_cells(self) -> torch.Tensor:\n        \"\"\"Returns all cells of the grid.\"\"\"\n        return self.grid_indices\n\n    @torch.no_grad()\n    def _sample_uniform_and_occupied_cells(self, n: int) -> torch.Tensor:\n        \"\"\"Samples both n uniform and occupied cells.\"\"\"\n        uniform_indices = torch.randint(\n            self.num_cells, (n,), device=self.device\n        )\n        occupied_indices = torch.nonzero(self._binary.flatten())[:, 0]\n        if n < len(occupied_indices):\n            selector = torch.randint(\n                len(occupied_indices), (n,), device=self.device\n            )\n            occupied_indices = occupied_indices[selector]\n        indices = torch.cat([uniform_indices, occupied_indices], dim=0)\n        return indices\n\n    @torch.no_grad()\n    def _update(\n        self,\n        step: int,\n        occ_eval_fn: Callable,\n        occ_thre: float = 0.01,\n        ema_decay: float = 0.95,\n        warmup_steps: int = 256,\n    ) -> None:\n        \"\"\"Update the occ field in the EMA way.\"\"\"\n        # sample cells\n        if step < warmup_steps:\n            indices = self._get_all_cells()\n        else:\n            N = self.num_cells // 4\n            indices = self._sample_uniform_and_occupied_cells(N)\n\n        # infer occupancy: density * step_size\n        grid_coords = self.grid_coords[indices]\n        x = (\n            grid_coords + torch.rand_like(grid_coords, dtype=torch.float32)\n        ) / self.resolution\n        if self._contraction_type == ContractionType.UN_BOUNDED_SPHERE:\n            # only the points inside the sphere are valid\n            mask = (x - 0.5).norm(dim=1) < 0.5\n            x = x[mask]\n            indices = indices[mask]\n        # voxel coordinates [0, 1]^3 -> world\n        x = contract_inv(\n            x,\n            roi=self._roi_aabb,\n            type=self._contraction_type,\n        )\n        occ = occ_eval_fn(x).squeeze(-1)\n\n        # ema update\n        self.occs[indices] = torch.maximum(self.occs[indices] * ema_decay, occ)\n        # suppose to use scatter max but emperically it is almost the same.\n        # self.occs, _ = scatter_max(\n        #     occ, indices, dim=0, out=self.occs * ema_decay\n        # )\n        self._binary = (\n            self.occs > torch.clamp(self.occs.mean(), max=occ_thre)\n        ).view(self._binary.shape)\n\n    @torch.no_grad()\n    def every_n_step(\n        self,\n        step: int,\n        occ_eval_fn: Callable,\n        occ_thre: float = 1e-2,\n        ema_decay: float = 0.95,\n        warmup_steps: int = 256,\n        n: int = 16,\n    ) -> None:\n        \"\"\"Update the grid every n steps during training.\n\n        Args:\n            step: Current training step.\n            occ_eval_fn: A function that takes in sample locations :math:`(N, 3)` and\n                returns the occupancy values :math:`(N, 1)` at those locations.\n            occ_thre: Threshold used to binarize the occupancy grid. Default: 1e-2.\n            ema_decay: The decay rate for EMA updates. Default: 0.95.\n            warmup_steps: Sample all cells during the warmup stage. After the warmup\n                stage we change the sampling strategy to 1/4 uniformly sampled cells\n                together with 1/4 occupied cells. Default: 256.\n            n: Update the grid every n steps. Default: 16.\n        \"\"\"\n        if not self.training:\n            raise RuntimeError(\n                \"You should only call this function only during training. \"\n                \"Please call _update() directly if you want to update the \"\n                \"field during inference.\"\n            )\n        if step % n == 0 and self.training:\n            self._update(\n                step=step,\n                occ_eval_fn=occ_eval_fn,\n                occ_thre=occ_thre,\n                ema_decay=ema_decay,\n                warmup_steps=warmup_steps,\n            )\n\n    @torch.no_grad()\n    def query_occ(self, samples: torch.Tensor) -> torch.Tensor:\n        \"\"\"Query the occupancy field at the given samples.\n\n        Args:\n            samples: Samples in the world coordinates. (n_samples, 3)\n\n        Returns:\n            Occupancy values at the given samples. (n_samples,)\n        \"\"\"\n        return query_grid(\n            samples,\n            self._roi_aabb,\n            self.binary,\n            self.contraction_type,\n        )\n\n\ndef _meshgrid3d(\n    res: torch.Tensor, device: Union[torch.device, str] = \"cpu\"\n) -> torch.Tensor:\n    \"\"\"Create 3D grid coordinates.\"\"\"\n    assert len(res) == 3\n    res = res.tolist()\n    return torch.stack(\n        torch.meshgrid(\n            [\n                torch.arange(res[0], dtype=torch.long),\n                torch.arange(res[1], dtype=torch.long),\n                torch.arange(res[2], dtype=torch.long),\n            ],\n            indexing=\"ij\",\n        ),\n        dim=-1,\n    ).to(device)\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/intersection.py",
    "content": "\"\"\"\nCopyright (c) 2022 Ruilong Li, UC Berkeley.\n\"\"\"\n\nfrom typing import Tuple\n\nimport torch\nfrom torch import Tensor\n\nimport nerfacc.cuda as _C\n\n\n@torch.no_grad()\ndef ray_aabb_intersect(\n    rays_o: Tensor, rays_d: Tensor, aabb: Tensor\n) -> Tuple[Tensor, Tensor]:\n    \"\"\"Ray AABB Test.\n\n    Note:\n        this function is not differentiable to any inputs.\n\n    Args:\n        rays_o: Ray origins of shape (n_rays, 3).\n        rays_d: Normalized ray directions of shape (n_rays, 3).\n        aabb: Scene bounding box {xmin, ymin, zmin, xmax, ymax, zmax}. \\\n            Tensor with shape (6)\n\n    Returns:\n        Ray AABB intersection {t_min, t_max} with shape (n_rays) respectively. \\\n        Note the t_min is clipped to minimum zero. 1e10 means no intersection.\n\n    Examples:\n\n    .. code-block:: python\n\n        aabb = torch.tensor([0.0, 0.0, 0.0, 1.0, 1.0, 1.0], device=\"cuda:0\")\n        rays_o = torch.rand((128, 3), device=\"cuda:0\")\n        rays_d = torch.randn((128, 3), device=\"cuda:0\")\n        rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)\n        t_min, t_max = ray_aabb_intersect(rays_o, rays_d, aabb)\n\n    \"\"\"\n    if rays_o.is_cuda and rays_d.is_cuda and aabb.is_cuda:\n        rays_o = rays_o.contiguous()\n        rays_d = rays_d.contiguous()\n        aabb = aabb.contiguous()\n        t_min, t_max = _C.ray_aabb_intersect(rays_o, rays_d, aabb)\n    else:\n        raise NotImplementedError(\"Only support cuda inputs.\")\n    return t_min, t_max\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/losses.py",
    "content": "from torch import Tensor\n\nfrom .pack import unpack_data\n\n\ndef distortion(\n    packed_info: Tensor, weights: Tensor, t_starts: Tensor, t_ends: Tensor\n) -> Tensor:\n    \"\"\"Distortion loss from Mip-NeRF 360 paper, Equ. 15.\n\n    Args:\n        packed_info: Packed info for the samples. (n_rays, 2)\n        weights: Weights for the samples. (all_samples,)\n        t_starts: Per-sample start distance. Tensor with shape (all_samples, 1).\n        t_ends: Per-sample end distance. Tensor with shape (all_samples, 1).\n\n    Returns:\n        Distortion loss. (n_rays,)\n    \"\"\"\n    # （all_samples, 1) -> (n_rays, n_samples)\n    w = unpack_data(packed_info, weights[..., None]).squeeze(-1)\n    t1 = unpack_data(packed_info, t_starts).squeeze(-1)\n    t2 = unpack_data(packed_info, t_ends).squeeze(-1)\n\n    interval = t2 - t1\n    tmid = (t1 + t2) / 2\n\n    loss_uni = (1 / 3) * (interval * w.pow(2)).sum(-1)\n    ww = w.unsqueeze(-1) * w.unsqueeze(-2)\n    mm = (tmid.unsqueeze(-1) - tmid.unsqueeze(-2)).abs()\n    loss_bi = (ww * mm).sum((-1, -2))\n    return loss_uni + loss_bi\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/pack.py",
    "content": "\"\"\"\nCopyright (c) 2022 Ruilong Li, UC Berkeley.\n\"\"\"\nfrom typing import Optional, Tuple\n\nimport torch\nfrom torch import Tensor\n\nimport nerfacc.cuda as _C\n\n\ndef pack_data(data: Tensor, mask: Tensor) -> Tuple[Tensor, Tensor]:\n    \"\"\"Pack per-ray data (n_rays, n_samples, D) to (all_samples, D) based on mask.\n\n    Args:\n        data: Tensor with shape (n_rays, n_samples, D).\n        mask: Boolen tensor with shape (n_rays, n_samples).\n\n    Returns:\n        Tuple of Tensors including packed data (all_samples, D), \\\n        and packed_info (n_rays, 2) which stores the start index of the sample,\n        and the number of samples kept for each ray. \\\n\n    Examples:\n\n    .. code-block:: python\n\n        data = torch.rand((10, 3, 4), device=\"cuda:0\")\n        mask = data.rand((10, 3), dtype=torch.bool, device=\"cuda:0\")\n        packed_data, packed_info = pack(data, mask)\n        print(packed_data.shape, packed_info.shape)\n\n    \"\"\"\n    assert data.dim() == 3, \"data must be with shape of (n_rays, n_samples, D).\"\n    assert (\n        mask.shape == data.shape[:2]\n    ), \"mask must be with shape of (n_rays, n_samples).\"\n    assert mask.dtype == torch.bool, \"mask must be a boolean tensor.\"\n    packed_data = data[mask]\n    num_steps = mask.sum(dim=-1, dtype=torch.int32)\n    cum_steps = num_steps.cumsum(dim=0, dtype=torch.int32)\n    packed_info = torch.stack([cum_steps - num_steps, num_steps], dim=-1)\n    return packed_data, packed_info\n\n\n@torch.no_grad()\ndef pack_info(ray_indices: Tensor, n_rays: int = None) -> Tensor:\n    \"\"\"Pack `ray_indices` to `packed_info`. Useful for converting per sample data to per ray data.\n\n    Note: \n        this function is not differentiable to any inputs.\n\n    Args:\n        ray_indices: Ray index of each sample. LongTensor with shape (n_sample).\n\n    Returns:\n        packed_info: Stores information on which samples belong to the same ray. \\\n            See :func:`nerfacc.ray_marching` for details. IntTensor with shape (n_rays, 2).\n    \"\"\"\n    assert (\n        ray_indices.dim() == 1\n    ), \"ray_indices must be a 1D tensor with shape (n_samples).\"\n    if ray_indices.is_cuda:\n        ray_indices = ray_indices\n        device = ray_indices.device\n        if n_rays is None:\n            n_rays = int(ray_indices.max()) + 1\n        # else:\n        #     assert n_rays > ray_indices.max()\n        src = torch.ones_like(ray_indices, dtype=torch.int)\n        num_steps = torch.zeros((n_rays,), device=device, dtype=torch.int)\n        num_steps.scatter_add_(0, ray_indices, src)\n        cum_steps = num_steps.cumsum(dim=0, dtype=torch.int)\n        packed_info = torch.stack([cum_steps - num_steps, num_steps], dim=-1)\n    else:\n        raise NotImplementedError(\"Only support cuda inputs.\")\n    return packed_info\n\n\n@torch.no_grad()\ndef unpack_info(packed_info: Tensor, n_samples: int) -> Tensor:\n    \"\"\"Unpack `packed_info` to `ray_indices`. Useful for converting per ray data to per sample data.\n\n    Note: \n        this function is not differentiable to any inputs.\n\n    Args:\n        packed_info: Stores information on which samples belong to the same ray. \\\n            See :func:`nerfacc.ray_marching` for details. IntTensor with shape (n_rays, 2).\n        n_samples: Total number of samples.\n\n    Returns:\n        Ray index of each sample. LongTensor with shape (n_sample).\n\n    Examples:\n\n    .. code-block:: python\n\n        rays_o = torch.rand((128, 3), device=\"cuda:0\")\n        rays_d = torch.randn((128, 3), device=\"cuda:0\")\n        rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)\n        # Ray marching with near far plane.\n        packed_info, t_starts, t_ends = ray_marching(\n            rays_o, rays_d, near_plane=0.1, far_plane=1.0, render_step_size=1e-3\n        )\n        # torch.Size([128, 2]) torch.Size([115200, 1]) torch.Size([115200, 1])\n        print(packed_info.shape, t_starts.shape, t_ends.shape)\n        # Unpack per-ray info to per-sample info.\n        ray_indices = unpack_info(packed_info, t_starts.shape[0])\n        # torch.Size([115200]) torch.int64\n        print(ray_indices.shape, ray_indices.dtype)\n\n    \"\"\"\n    assert (\n        packed_info.dim() == 2 and packed_info.shape[-1] == 2\n    ), \"packed_info must be a 2D tensor with shape (n_rays, 2).\"\n    if packed_info.is_cuda:\n        ray_indices = _C.unpack_info(packed_info.contiguous(), n_samples)\n    else:\n        raise NotImplementedError(\"Only support cuda inputs.\")\n    return ray_indices\n\n\ndef unpack_data(\n    packed_info: Tensor,\n    data: Tensor,\n    n_samples: Optional[int] = None,\n) -> Tensor:\n    \"\"\"Unpack packed data (all_samples, D) to per-ray data (n_rays, n_samples, D).\n\n    Args:\n        packed_info (Tensor): Stores information on which samples belong to the same ray. \\\n            See :func:`nerfacc.ray_marching` for details. Tensor with shape (n_rays, 2).\n        data: Packed data to unpack. Tensor with shape (n_samples, D).\n        n_samples (int): Optional Number of samples per ray. If not provided, it \\\n            will be inferred from the packed_info.\n\n    Returns:\n        Unpacked data (n_rays, n_samples, D).\n\n    Examples:\n\n    .. code-block:: python\n\n        rays_o = torch.rand((128, 3), device=\"cuda:0\")\n        rays_d = torch.randn((128, 3), device=\"cuda:0\")\n        rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)\n\n        # Ray marching with aabb.\n        scene_aabb = torch.tensor([0.0, 0.0, 0.0, 1.0, 1.0, 1.0], device=\"cuda:0\")\n        packed_info, t_starts, t_ends = ray_marching(\n            rays_o, rays_d, scene_aabb=scene_aabb, render_step_size=1e-2\n        )\n        print(t_starts.shape)  # torch.Size([all_samples, 1])\n\n        t_starts = unpack_data(packed_info, t_starts, n_samples=1024)\n        print(t_starts.shape)  # torch.Size([128, 1024, 1])\n    \"\"\"\n    assert (\n        packed_info.dim() == 2 and packed_info.shape[-1] == 2\n    ), \"packed_info must be a 2D tensor with shape (n_rays, 2).\"\n    assert (\n        data.dim() == 2\n    ), \"data must be a 2D tensor with shape (n_samples, D).\"\n    if n_samples is None:\n        n_samples = packed_info[:, 1].max().item()\n    return _UnpackData.apply(packed_info, data, n_samples)\n\n\nclass _UnpackData(torch.autograd.Function):\n    \"\"\"Unpack packed data (all_samples, D) to per-ray data (n_rays, n_samples, D).\"\"\"\n\n    @staticmethod\n    def forward(ctx, packed_info: Tensor, data: Tensor, n_samples: int):\n        # shape of the data should be (all_samples, D)\n        packed_info = packed_info.contiguous()\n        data = data.contiguous()\n        if ctx.needs_input_grad[1]:\n            ctx.save_for_backward(packed_info)\n            ctx.n_samples = n_samples\n        return _C.unpack_data(packed_info, data, n_samples)\n\n    @staticmethod\n    def backward(ctx, grad: Tensor):\n        # shape of the grad should be (n_rays, n_samples, D)\n        packed_info = ctx.saved_tensors[0]\n        n_samples = ctx.n_samples\n        mask = _C.unpack_info_to_mask(packed_info, n_samples)\n        packed_grad = grad[mask].contiguous()\n        return None, packed_grad, None\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/ray_marching.py",
    "content": "from typing import Callable, Optional, Tuple\n\nimport torch\n\nimport nerfacc.cuda as _C\n\nfrom .contraction import ContractionType\nfrom .grid import Grid\nfrom .intersection import ray_aabb_intersect\nfrom .vol_rendering import render_visibility\n\n\n@torch.no_grad()\ndef ray_marching(\n    # rays\n    rays_o: torch.Tensor,\n    rays_d: torch.Tensor,\n    t_min: Optional[torch.Tensor] = None,\n    t_max: Optional[torch.Tensor] = None,\n    # bounding box of the scene\n    scene_aabb: Optional[torch.Tensor] = None,\n    # binarized grid for skipping empty space\n    grid: Optional[Grid] = None,\n    # sigma/alpha function for skipping invisible space\n    sigma_fn: Optional[Callable] = None,\n    alpha_fn: Optional[Callable] = None,\n    early_stop_eps: float = 1e-4,\n    alpha_thre: float = 0.0,\n    # rendering options\n    near_plane: Optional[float] = None,\n    far_plane: Optional[float] = None,\n    render_step_size: float = 1e-3,\n    stratified: bool = False,\n    cone_angle: float = 0.0,\n) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:\n    \"\"\"Ray marching with space skipping.\n\n    Note:\n        The logic for computing `t_min` and `t_max`:\n        1. If `t_min` and `t_max` are given, use them with highest priority.\n        2. If `t_min` and `t_max` are not given, but `scene_aabb` is given, use \\\n            :func:`ray_aabb_intersect` to compute `t_min` and `t_max`.\n        3. If `t_min` and `t_max` are not given, and `scene_aabb` is not given, \\\n            set `t_min` to 0.0, and `t_max` to 1e10. (the case of unbounded scene)\n        4. Always clip `t_min` with `near_plane` and `t_max` with `far_plane` if given.\n\n    Warning:\n        This function is not differentiable to any inputs.\n\n    Args:\n        rays_o: Ray origins of shape (n_rays, 3).\n        rays_d: Normalized ray directions of shape (n_rays, 3).\n        t_min: Optional. Per-ray minimum distance. Tensor with shape (n_rays).\n        t_max: Optional. Per-ray maximum distance. Tensor with shape (n_rays).\n        scene_aabb: Optional. Scene bounding box for computing t_min and t_max.\n            A tensor with shape (6,) {xmin, ymin, zmin, xmax, ymax, zmax}.\n            `scene_aabb` will be ignored if both `t_min` and `t_max` are provided.\n        grid: Optional. Grid that idicates where to skip during marching.\n            See :class:`nerfacc.Grid` for details.\n        sigma_fn: Optional. If provided, the marching will skip the invisible space\n            by evaluating the density along the ray with `sigma_fn`. It should be a \n            function that takes in samples {t_starts (N, 1), t_ends (N, 1),\n            ray indices (N,)} and returns the post-activation density values (N, 1).\n            You should only provide either `sigma_fn` or `alpha_fn`.\n        alpha_fn: Optional. If provided, the marching will skip the invisible space\n            by evaluating the density along the ray with `alpha_fn`. It should be a\n            function that takes in samples {t_starts (N, 1), t_ends (N, 1),\n            ray indices (N,)} and returns the post-activation opacity values (N, 1).\n            You should only provide either `sigma_fn` or `alpha_fn`.\n        early_stop_eps: Early stop threshold for skipping invisible space. Default: 1e-4.\n        alpha_thre: Alpha threshold for skipping empty space. Default: 0.0.\n        near_plane: Optional. Near plane distance. If provided, it will be used\n            to clip t_min.\n        far_plane: Optional. Far plane distance. If provided, it will be used\n            to clip t_max.\n        render_step_size: Step size for marching. Default: 1e-3.\n        stratified: Whether to use stratified sampling. Default: False.\n        cone_angle: Cone angle for linearly-increased step size. 0. means\n            constant step size. Default: 0.0.\n\n    Returns:\n        A tuple of tensors.\n\n            - **ray_indices**: Ray index of each sample. IntTensor with shape (n_samples).\n            - **t_starts**: Per-sample start distance. Tensor with shape (n_samples, 1).\n            - **t_ends**: Per-sample end distance. Tensor with shape (n_samples, 1).\n\n    Examples:\n\n    .. code-block:: python\n\n        import torch\n        from nerfacc import OccupancyGrid, ray_marching, unpack_info\n\n        device = \"cuda:0\"\n        batch_size = 128\n        rays_o = torch.rand((batch_size, 3), device=device)\n        rays_d = torch.randn((batch_size, 3), device=device)\n        rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)\n\n        # Ray marching with near far plane.\n        ray_indices, t_starts, t_ends = ray_marching(\n            rays_o, rays_d, near_plane=0.1, far_plane=1.0, render_step_size=1e-3\n        )\n\n        # Ray marching with aabb.\n        scene_aabb = torch.tensor([0.0, 0.0, 0.0, 1.0, 1.0, 1.0], device=device)\n        ray_indices, t_starts, t_ends = ray_marching(\n            rays_o, rays_d, scene_aabb=scene_aabb, render_step_size=1e-3\n        )\n\n        # Ray marching with per-ray t_min and t_max.\n        t_min = torch.zeros((batch_size,), device=device)\n        t_max = torch.ones((batch_size,), device=device)\n        ray_indices, t_starts, t_ends = ray_marching(\n            rays_o, rays_d, t_min=t_min, t_max=t_max, render_step_size=1e-3\n        )\n\n        # Ray marching with aabb and skip areas based on occupancy grid.\n        scene_aabb = torch.tensor([0.0, 0.0, 0.0, 1.0, 1.0, 1.0], device=device)\n        grid = OccupancyGrid(roi_aabb=[0.0, 0.0, 0.0, 0.5, 0.5, 0.5]).to(device)\n        ray_indices, t_starts, t_ends = ray_marching(\n            rays_o, rays_d, scene_aabb=scene_aabb, grid=grid, render_step_size=1e-3\n        )\n\n        # Convert t_starts and t_ends to sample locations.\n        t_mid = (t_starts + t_ends) / 2.0\n        sample_locs = rays_o[ray_indices] + t_mid * rays_d[ray_indices]\n\n    \"\"\"\n    if not rays_o.is_cuda:\n        raise NotImplementedError(\"Only support cuda inputs.\")\n    if alpha_fn is not None and sigma_fn is not None:\n        raise ValueError(\n            \"Only one of `alpha_fn` and `sigma_fn` should be provided.\"\n        )\n\n    # logic for t_min and t_max:\n    # 1. if t_min and t_max are given, use them with highest priority.\n    # 2. if t_min and t_max are not given, but scene_aabb is given, use\n    # ray_aabb_intersect to compute t_min and t_max.\n    # 3. if t_min and t_max are not given, and scene_aabb is not given,\n    # set t_min to 0.0, and t_max to 1e10. (the case of unbounded scene)\n    # 4. always clip t_min with near_plane and t_max with far_plane if given.\n    if t_min is None or t_max is None:\n        if scene_aabb is not None:\n            t_min, t_max = ray_aabb_intersect(rays_o, rays_d, scene_aabb)\n        else:\n            t_min = torch.zeros_like(rays_o[..., 0])\n            t_max = torch.ones_like(rays_o[..., 0]) * 1e10\n    if near_plane is not None:\n        t_min = torch.clamp(t_min, min=near_plane)\n    if far_plane is not None:\n        t_max = torch.clamp(t_max, max=far_plane)\n\n    # stratified sampling: prevent overfitting during training\n    if stratified:\n        t_min = t_min + torch.rand_like(t_min) * render_step_size\n\n    # use grid for skipping if given\n    if grid is not None:\n        grid_roi_aabb = grid.roi_aabb\n        grid_binary = grid.binary\n        contraction_type = grid.contraction_type.to_cpp_version()\n    else:\n        grid_roi_aabb = torch.tensor(\n            [-1e10, -1e10, -1e10, 1e10, 1e10, 1e10],\n            dtype=torch.float32,\n            device=rays_o.device,\n        )\n        grid_binary = torch.ones(\n            [1, 1, 1], dtype=torch.bool, device=rays_o.device\n        )\n        contraction_type = ContractionType.AABB.to_cpp_version()\n\n    # marching with grid-based skipping\n    packed_info, ray_indices, t_starts, t_ends = _C.ray_marching(\n        # rays\n        rays_o.contiguous(),\n        rays_d.contiguous(),\n        t_min.contiguous(),\n        t_max.contiguous(),\n        # coontraction and grid\n        grid_roi_aabb.contiguous(),\n        grid_binary.contiguous(),\n        contraction_type,\n        # sampling\n        render_step_size,\n        cone_angle,\n    )\n\n    # skip invisible space\n    if sigma_fn is not None or alpha_fn is not None:\n        # Query sigma without gradients\n        if sigma_fn is not None:\n            sigmas = sigma_fn(t_starts, t_ends, ray_indices)\n            assert (\n                sigmas.shape == t_starts.shape\n            ), \"sigmas must have shape of (N, 1)! Got {}\".format(sigmas.shape)\n            alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts))\n        elif alpha_fn is not None:\n            alphas = alpha_fn(t_starts, t_ends, ray_indices)\n            assert (\n                alphas.shape == t_starts.shape\n            ), \"alphas must have shape of (N, 1)! Got {}\".format(alphas.shape)\n\n        # Compute visibility of the samples, and filter out invisible samples\n        masks = render_visibility(\n            alphas,\n            ray_indices=ray_indices,\n            packed_info=packed_info,\n            early_stop_eps=early_stop_eps,\n            alpha_thre=alpha_thre,\n            n_rays=rays_o.shape[0],\n        )\n        ray_indices, t_starts, t_ends = (\n            ray_indices[masks],\n            t_starts[masks],\n            t_ends[masks],\n        )\n\n    return ray_indices, t_starts, t_ends\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/sampling.py",
    "content": "import math\nfrom typing import Callable, Optional, Tuple, Union, overload\n\nimport torch\n\nimport nerfacc.cuda as _C\n\nfrom .cdf import ray_resampling\nfrom .grid import Grid\nfrom .pack import pack_info, unpack_info\nfrom .vol_rendering import (\n    render_transmittance_from_alpha,\n    render_weight_from_density,\n)\n\n\n@overload\ndef sample_along_rays(\n    rays_o: torch.Tensor,  # [n_rays, 3]\n    rays_d: torch.Tensor,  # [n_rays, 3]\n    t_min: torch.Tensor,  # [n_rays,]\n    t_max: torch.Tensor,  # [n_rays,]\n    step_size: float,\n    cone_angle: float = 0.0,\n    grid: Optional[Grid] = None,\n) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:\n    \"\"\"Sample along rays with per-ray min max.\"\"\"\n    ...\n\n\n@overload\ndef sample_along_rays(\n    rays_o: torch.Tensor,  # [n_rays, 3]\n    rays_d: torch.Tensor,  # [n_rays, 3]\n    t_min: float,\n    t_max: float,\n    step_size: float,\n    cone_angle: float = 0.0,\n) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:\n    \"\"\"Sample along rays with near far plane.\"\"\"\n    ...\n\n\n@torch.no_grad()\ndef sample_along_rays(\n    rays_o: torch.Tensor,  # [n_rays, 3]\n    rays_d: torch.Tensor,  # [n_rays, 3]\n    t_min: Union[float, torch.Tensor],  # [n_rays,]\n    t_max: Union[float, torch.Tensor],  # [n_rays,]\n    step_size: float,\n    cone_angle: float = 0.0,\n    grid: Optional[Grid] = None,\n) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:\n    \"\"\"Sample intervals along rays.\"\"\"\n    if isinstance(t_min, float) and isinstance(t_max, float):\n        n_rays = rays_o.shape[0]\n        device = rays_o.device\n        num_steps = math.floor((t_max - t_min) / step_size)\n        t_starts = (\n            (t_min + torch.arange(0, num_steps, device=device) * step_size)\n            .expand(n_rays, -1)\n            .reshape(-1, 1)\n        )\n        t_ends = t_starts + step_size\n        ray_indices = torch.arange(0, n_rays, device=device).repeat_interleave(\n            num_steps, dim=0\n        )\n    else:\n        if grid is None:\n            packed_info, ray_indices, t_starts, t_ends = _C.ray_marching(\n                # rays\n                t_min.contiguous(),\n                t_max.contiguous(),\n                # sampling\n                step_size,\n                cone_angle,\n            )\n        else:\n            (\n                packed_info,\n                ray_indices,\n                t_starts,\n                t_ends,\n            ) = _C.ray_marching_with_grid(\n                # rays\n                rays_o.contiguous(),\n                rays_d.contiguous(),\n                t_min.contiguous(),\n                t_max.contiguous(),\n                # coontraction and grid\n                grid.roi_aabb.contiguous(),\n                grid.binary.contiguous(),\n                grid.contraction_type.to_cpp_version(),\n                # sampling\n                step_size,\n                cone_angle,\n            )\n    return ray_indices, t_starts, t_ends\n\n\n@torch.no_grad()\ndef proposal_sampling_with_filter(\n    t_starts: torch.Tensor,  # [n_samples, 1]\n    t_ends: torch.Tensor,  # [n_samples, 1]\n    ray_indices: torch.Tensor,  # [n_samples,]\n    n_rays: Optional[int] = None,\n    # compute density of samples: {t_starts, t_ends, ray_indices} -> density\n    sigma_fn: Optional[Callable] = None,\n    # proposal density fns: {t_starts, t_ends, ray_indices} -> density\n    proposal_sigma_fns: Tuple[Callable, ...] = [],\n    proposal_n_samples: Tuple[int, ...] = [],\n    proposal_require_grads: bool = False,\n    # acceleration options\n    early_stop_eps: float = 1e-4,\n    alpha_thre: float = 0.0,\n) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:\n    \"\"\"Hueristic marching with proposal fns.\"\"\"\n    assert len(proposal_sigma_fns) == len(proposal_n_samples), (\n        \"proposal_sigma_fns and proposal_n_samples must have the same length, \"\n        f\"but got {len(proposal_sigma_fns)} and {len(proposal_n_samples)}.\"\n    )\n    if n_rays is None:\n        n_rays = ray_indices.max() + 1\n\n    # compute density from proposal fns\n    proposal_samples = []\n    for proposal_fn, n_samples in zip(proposal_sigma_fns, proposal_n_samples):\n\n        # compute weights for resampling\n        sigmas = proposal_fn(t_starts, t_ends, ray_indices)\n        assert (\n            sigmas.shape == t_starts.shape\n        ), \"sigmas must have shape of (N, 1)! Got {}\".format(sigmas.shape)\n        alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts))\n        transmittance = render_transmittance_from_alpha(\n            alphas, ray_indices=ray_indices, n_rays=n_rays\n        )\n        weights = alphas * transmittance\n\n        # Compute visibility for filtering\n        if alpha_thre > 0 or early_stop_eps > 0:\n            vis = (alphas >= alpha_thre) & (transmittance >= early_stop_eps)\n            vis = vis.squeeze(-1)\n            ray_indices, t_starts, t_ends, weights = (\n                ray_indices[vis],\n                t_starts[vis],\n                t_ends[vis],\n                weights[vis],\n            )\n        packed_info = pack_info(ray_indices, n_rays=n_rays)\n\n        # Rerun the proposal function **with** gradients on filtered samples.\n        if proposal_require_grads:\n            with torch.enable_grad():\n                sigmas = proposal_fn(t_starts, t_ends, ray_indices)\n                weights = render_weight_from_density(\n                    t_starts, t_ends, sigmas, ray_indices=ray_indices\n                )\n                proposal_samples.append(\n                    (packed_info, t_starts, t_ends, weights)\n                )\n\n        # resampling on filtered samples\n        packed_info, t_starts, t_ends = ray_resampling(\n            packed_info, t_starts, t_ends, weights, n_samples=n_samples\n        )\n        ray_indices = unpack_info(packed_info, t_starts.shape[0])\n\n    # last round filtering with sigma_fn\n    if (alpha_thre > 0 or early_stop_eps > 0) and (sigma_fn is not None):\n        sigmas = sigma_fn(t_starts, t_ends, ray_indices)\n        assert (\n            sigmas.shape == t_starts.shape\n        ), \"sigmas must have shape of (N, 1)! Got {}\".format(sigmas.shape)\n        alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts))\n        transmittance = render_transmittance_from_alpha(\n            alphas, ray_indices=ray_indices, n_rays=n_rays\n        )\n        vis = (alphas >= alpha_thre) & (transmittance >= early_stop_eps)\n        vis = vis.squeeze(-1)\n        ray_indices, t_starts, t_ends = (\n            ray_indices[vis],\n            t_starts[vis],\n            t_ends[vis],\n        )\n\n    return ray_indices, t_starts, t_ends, proposal_samples\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/version.py",
    "content": "\"\"\"\nCopyright (c) 2022 Ruilong Li, UC Berkeley.\n\"\"\"\n\n__version__ = \"0.3.5\"\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/nerfacc/vol_rendering.py",
    "content": "\"\"\"\nCopyright (c) 2022 Ruilong Li, UC Berkeley.\n\"\"\"\n\nfrom typing import Callable, Optional, Tuple\n\nimport torch\nfrom torch import Tensor\n\nimport nerfacc.cuda as _C\n\nfrom .pack import pack_info\n\n\ndef rendering(\n    # ray marching results\n    t_starts: torch.Tensor,\n    t_ends: torch.Tensor,\n    ray_indices: torch.Tensor,\n    n_rays: int,\n    # radiance field\n    rgb_sigma_fn: Optional[Callable] = None,\n    rgb_alpha_fn: Optional[Callable] = None,\n    # rendering options\n    render_bkgd: Optional[torch.Tensor] = None,\n) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:\n    \"\"\"Render the rays through the radience field defined by `rgb_sigma_fn`.\n\n    This function is differentiable to the outputs of `rgb_sigma_fn` so it can \n    be used for gradient-based optimization.\n\n    Note:\n        Either `rgb_sigma_fn` or `rgb_alpha_fn` should be provided. \n\n    Warning:\n        This function is not differentiable to `t_starts`, `t_ends` and `ray_indices`.\n\n    Args:\n        t_starts: Per-sample start distance. Tensor with shape (n_samples, 1).\n        t_ends: Per-sample end distance. Tensor with shape (n_samples, 1).\n        ray_indices: Ray index of each sample. IntTensor with shape (n_samples).\n        n_rays: Total number of rays. This will decide the shape of the ouputs.\n        rgb_sigma_fn: A function that takes in samples {t_starts (N, 1), t_ends (N, 1), \\\n            ray indices (N,)} and returns the post-activation rgb (N, 3) and density \\\n            values (N, 1). \n        rgb_alpha_fn: A function that takes in samples {t_starts (N, 1), t_ends (N, 1), \\\n            ray indices (N,)} and returns the post-activation rgb (N, 3) and opacity \\\n            values (N, 1).\n        render_bkgd: Optional. Background color. Tensor with shape (3,).\n\n    Returns:\n        Ray colors (n_rays, 3), opacities (n_rays, 1) and depths (n_rays, 1).\n\n    Examples:\n\n    .. code-block:: python\n\n        >>> rays_o = torch.rand((128, 3), device=\"cuda:0\")\n        >>> rays_d = torch.randn((128, 3), device=\"cuda:0\")\n        >>> rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)\n        >>> ray_indices, t_starts, t_ends = ray_marching(\n        >>>     rays_o, rays_d, near_plane=0.1, far_plane=1.0, render_step_size=1e-3)\n        >>> def rgb_sigma_fn(t_starts, t_ends, ray_indices):\n        >>>     # This is a dummy function that returns random values.\n        >>>     rgbs = torch.rand((t_starts.shape[0], 3), device=\"cuda:0\")\n        >>>     sigmas = torch.rand((t_starts.shape[0], 1), device=\"cuda:0\")\n        >>>     return rgbs, sigmas\n        >>> colors, opacities, depths = rendering(\n        >>>     t_starts, t_ends, ray_indices, n_rays=128, rgb_sigma_fn=rgb_sigma_fn)\n        >>> print(colors.shape, opacities.shape, depths.shape)\n        torch.Size([128, 3]) torch.Size([128, 1]) torch.Size([128, 1])\n\n    \"\"\"\n    if rgb_sigma_fn is None and rgb_alpha_fn is None:\n        raise ValueError(\n            \"At least one of `rgb_sigma_fn` and `rgb_alpha_fn` should be specified.\"\n        )\n\n    # Query sigma/alpha and color with gradients\n    if rgb_sigma_fn is not None:\n        rgbs, sigmas = rgb_sigma_fn(t_starts, t_ends, ray_indices)\n        assert rgbs.shape[-1] == 3, \"rgbs must have 3 channels, got {}\".format(\n            rgbs.shape\n        )\n        assert (\n            sigmas.shape == t_starts.shape\n        ), \"sigmas must have shape of (N, 1)! Got {}\".format(sigmas.shape)\n        # Rendering: compute weights.\n        weights = render_weight_from_density(\n            t_starts,\n            t_ends,\n            sigmas,\n            ray_indices=ray_indices,\n            n_rays=n_rays,\n        )\n    elif rgb_alpha_fn is not None:\n        rgbs, alphas = rgb_alpha_fn(t_starts, t_ends, ray_indices)\n        assert rgbs.shape[-1] == 3, \"rgbs must have 3 channels, got {}\".format(\n            rgbs.shape\n        )\n        assert (\n            alphas.shape == t_starts.shape\n        ), \"alphas must have shape of (N, 1)! Got {}\".format(alphas.shape)\n        # Rendering: compute weights.\n        weights = render_weight_from_alpha(\n            alphas,\n            ray_indices=ray_indices,\n            n_rays=n_rays,\n        )\n\n    # Rendering: accumulate rgbs, opacities, and depths along the rays.\n    colors = accumulate_along_rays(\n        weights, ray_indices, values=rgbs, n_rays=n_rays\n    )\n    opacities = accumulate_along_rays(\n        weights, ray_indices, values=None, n_rays=n_rays\n    )\n    depths = accumulate_along_rays(\n        weights,\n        ray_indices,\n        values=(t_starts + t_ends) / 2.0,\n        n_rays=n_rays,\n    )\n\n    # Background composition.\n    if render_bkgd is not None:\n        colors = colors + render_bkgd * (1.0 - opacities)\n\n    return colors, opacities, depths\n\n\ndef accumulate_along_rays(\n    weights: Tensor,\n    ray_indices: Tensor,\n    values: Optional[Tensor] = None,\n    n_rays: Optional[int] = None,\n) -> Tensor:\n    \"\"\"Accumulate volumetric values along the ray.\n\n    Note:\n        This function is only differentiable to `weights` and `values`.\n\n    Args:\n        weights: Volumetric rendering weights for those samples. Tensor with shape \\\n            (n_samples,).\n        ray_indices: Ray index of each sample. LongTensor with shape (n_samples).\n        values: The values to be accmulated. Tensor with shape (n_samples, D). If \\\n            None, the accumulated values are just weights. Default is None.\n        n_rays: Total number of rays. This will decide the shape of the ouputs. If \\\n            None, it will be inferred from `ray_indices.max() + 1`.  If specified \\\n            it should be at least larger than `ray_indices.max()`. Default is None.\n\n    Returns:\n        Accumulated values with shape (n_rays, D). If `values` is not given then we return \\\n            the accumulated weights, in which case D == 1.\n\n    Examples:\n\n    .. code-block:: python\n\n        # Rendering: accumulate rgbs, opacities, and depths along the rays.\n        colors = accumulate_along_rays(weights, ray_indices, values=rgbs, n_rays=n_rays)\n        opacities = accumulate_along_rays(weights, ray_indices, values=None, n_rays=n_rays)\n        depths = accumulate_along_rays(\n            weights,\n            ray_indices,\n            values=(t_starts + t_ends) / 2.0,\n            n_rays=n_rays,\n        )\n        # (n_rays, 3), (n_rays, 1), (n_rays, 1)\n        print(colors.shape, opacities.shape, depths.shape)\n\n    \"\"\"\n    assert ray_indices.dim() == 1 and weights.dim() == 2\n    if not weights.is_cuda:\n        raise NotImplementedError(\"Only support cuda inputs.\")\n    if values is not None:\n        assert (\n            values.dim() == 2 and values.shape[0] == weights.shape[0]\n        ), \"Invalid shapes: {} vs {}\".format(values.shape, weights.shape)\n        src = weights * values\n    else:\n        src = weights\n\n    if ray_indices.numel() == 0:\n        assert n_rays is not None\n        return torch.zeros((n_rays, src.shape[-1]), device=weights.device)\n\n    if n_rays is None:\n        n_rays = int(ray_indices.max()) + 1\n    # assert n_rays > ray_indices.max()\n\n    index = ray_indices[:, None].expand(-1, src.shape[-1])\n    outputs = torch.zeros(\n        (n_rays, src.shape[-1]), device=src.device, dtype=src.dtype\n    )\n    outputs.scatter_add_(0, index, src)\n    return outputs\n\ndef accumulate_along_rays_importance(\n    weights: Tensor,\n    ray_indices: Tensor,\n    values: Optional[Tensor] = None,\n    n_rays: Optional[int] = None,\n) -> Tensor:\n    \"\"\"Accumulate volumetric values along the ray.\n\n    Note:\n        This function is only differentiable to `weights` and `values`.\n\n    Args:\n        weights: Volumetric rendering weights for those samples. Tensor with shape \\\n            (n_samples,).\n        ray_indices: Ray index of each sample. LongTensor with shape (n_samples).\n        values: The values to be accmulated. Tensor with shape (n_samples, D). If \\\n            None, the accumulated values are just weights. Default is None.\n        n_rays: Total number of rays. This will decide the shape of the ouputs. If \\\n            None, it will be inferred from `ray_indices.max() + 1`.  If specified \\\n            it should be at least larger than `ray_indices.max()`. Default is None.\n\n    Returns:\n        Accumulated values with shape (n_rays, D). If `values` is not given then we return \\\n            the accumulated weights, in which case D == 1.\n\n    Examples:\n\n    .. code-block:: python\n\n        # Rendering: accumulate rgbs, opacities, and depths along the rays.\n        colors = accumulate_along_rays(weights, ray_indices, values=rgbs, n_rays=n_rays)\n        opacities = accumulate_along_rays(weights, ray_indices, values=None, n_rays=n_rays)\n        depths = accumulate_along_rays(\n            weights,\n            ray_indices,\n            values=(t_starts + t_ends) / 2.0,\n            n_rays=n_rays,\n        )\n        # (n_rays, 3), (n_rays, 1), (n_rays, 1)\n        print(colors.shape, opacities.shape, depths.shape)\n\n    \"\"\"\n    assert ray_indices.dim() == 1 and weights.dim() == 2\n    if not weights.is_cuda:\n        raise NotImplementedError(\"Only support cuda inputs.\")\n    if values is not None:\n        assert (\n            values.dim() == 2 and values.shape[0] == weights.shape[0]\n        ), \"Invalid shapes: {} vs {}\".format(values.shape, weights.shape)\n        src = weights * values\n    else:\n        src = weights\n\n    if ray_indices.numel() == 0:\n        assert n_rays is not None\n        return torch.zeros((n_rays, src.shape[-1]), device=weights.device)\n\n    if n_rays is None:\n        n_rays = int(ray_indices.max()) + 1\n    # assert n_rays > ray_indices.max()\n\n    index = ray_indices[:, None].expand(-1, src.shape[-1])\n    outputs = torch.zeros(\n        (n_rays, src.shape[-1]), device=src.device, dtype=src.dtype\n    )\n    outputs.scatter_add_(0, index, src)\n    return outputs\n\n\ndef accumulate_along_rays_patch_based(\n    weights: Tensor,\n    ray_indices: Tensor,\n    values: Optional[Tensor] = None,\n    n_patches: Optional[int] = None,\n) -> Tensor:\n    \"\"\"Accumulate volumetric values along the ray.\n\n    Note:\n        This function is only differentiable to `weights` and `values`.\n\n    Args:\n        weights: Volumetric rendering weights for those samples. Tensor with shape \\\n            (n_samples,).\n        ray_indices: Ray index of each sample. LongTensor with shape (n_samples).\n        values: The values to be accmulated. Tensor with shape (n_samples, D). If \\\n            None, the accumulated values are just weights. Default is None.\n        n_rays: Total number of rays. This will decide the shape of the ouputs. If \\\n            None, it will be inferred from `ray_indices.max() + 1`.  If specified \\\n            it should be at least larger than `ray_indices.max()`. Default is None.\n\n    Returns:\n        Accumulated values with shape (n_rays, D). If `values` is not given then we return \\\n            the accumulated weights, in which case D == 1.\n\n    Examples:\n\n    .. code-block:: python\n\n        # Rendering: accumulate rgbs, opacities, and depths along the rays.\n        colors = accumulate_along_rays(weights, ray_indices, values=rgbs, n_rays=n_rays)\n        opacities = accumulate_along_rays(weights, ray_indices, values=None, n_rays=n_rays)\n        depths = accumulate_along_rays(\n            weights,\n            ray_indices,\n            values=(t_starts + t_ends) / 2.0,\n            n_rays=n_rays,\n        )\n        # (n_rays, 3), (n_rays, 1), (n_rays, 1)\n        print(colors.shape, opacities.shape, depths.shape)\n\n    \"\"\"\n    assert ray_indices.dim() == 1 and weights.dim() == 3  # (num_samples, patch_size, 1)\n    if not weights.is_cuda:\n        raise NotImplementedError(\"Only support cuda inputs.\")\n    if values is not None:\n        assert (\n            values.dim() == 3 and values.shape[0] == weights.shape[0]\n        ), \"Invalid shapes: {} vs {}\".format(values.shape, weights.shape)\n        src = weights * values\n    else:\n        src = weights\n\n    if ray_indices.numel() == 0:\n        assert n_patches is not None\n        return torch.zeros((n_patches, src.shape[1], src.shape[-1]), device=weights.device)\n\n    if n_patches is None:\n        n_patches = int(ray_indices.max()) + 1\n    # assert n_rays > ray_indices.max()\n\n    index = ray_indices[:, None, None].expand(-1, src.shape[1], src.shape[-1])\n    outputs = torch.zeros(\n        (n_patches, src.shape[1], src.shape[-1]), device=src.device, dtype=src.dtype\n    )\n    outputs.scatter_add_(0, index, src)\n    return outputs\n\n\ndef render_transmittance_from_density(\n    t_starts: Tensor,\n    t_ends: Tensor,\n    sigmas: Tensor,\n    *,\n    packed_info: Optional[torch.Tensor] = None,\n    ray_indices: Optional[torch.Tensor] = None,\n    n_rays: Optional[int] = None,\n) -> Tensor:\n    \"\"\"Compute transmittance :math:`T_i` from density :math:`\\\\sigma_i`.\n    \n    .. math::\n        T_i = exp(-\\\\sum_{j=1}^{i-1}\\\\sigma_j\\delta_j)\n\n    Note:\n        Either `ray_indices` or `packed_info` should be provided. If `ray_indices` is \n        provided, CUB acceleration will be used if available (CUDA >= 11.6). Otherwise,\n        we will use the naive implementation with `packed_info`.\n\n    Args:\n        t_starts: Where the frustum-shape sample starts along a ray. Tensor with \\\n            shape (n_samples, 1).\n        t_ends: Where the frustum-shape sample ends along a ray. Tensor with \\\n            shape (n_samples, 1).\n        sigmas: The density values of the samples. Tensor with shape (n_samples, 1).\n        packed_info: Optional. Stores information on which samples belong to the same ray. \\\n            See :func:`nerfacc.ray_marching` for details. LongTensor with shape (n_rays, 2).\n        ray_indices: Optional. Ray index of each sample. LongTensor with shape (n_sample).\n        n_rays: Optional. Number of rays. Only useful when `ray_indices` is provided yet \\\n            CUB acceleration is not available. We will implicitly convert `ray_indices` to \\\n            `packed_info` and use the naive implementation. If not provided, we will infer \\\n            it from `ray_indices` but it will be slower.\n\n    Returns:\n        The rendering transmittance. Tensor with shape (n_sample, 1).\n\n    Examples:\n\n    .. code-block:: python\n\n        >>> t_starts = torch.tensor(\n        >>>     [[0.0], [1.0], [2.0], [3.0], [4.0], [5.0], [6.0]], device=\"cuda\")\n        >>> t_ends = torch.tensor(\n        >>>     [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0], [7.0]], device=\"cuda\")\n        >>> sigmas = torch.tensor(\n        >>>     [[0.4], [0.8], [0.1], [0.8], [0.1], [0.0], [0.9]], device=\"cuda\")\n        >>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device=\"cuda\")\n        >>> transmittance = render_transmittance_from_density(\n        >>>     t_starts, t_ends, sigmas, ray_indices=ray_indices)\n        [[1.00], [0.67], [0.30], [1.00], [0.45], [1.00], [1.00]]\n    \n    \"\"\"\n    assert (\n        ray_indices is not None or packed_info is not None\n    ), \"Either ray_indices or packed_info should be provided.\"\n    if ray_indices is not None and _C.is_cub_available():\n        transmittance = _RenderingTransmittanceFromDensityCUB.apply(\n            ray_indices, t_starts, t_ends, sigmas\n        )\n    else:\n        if packed_info is None:\n            packed_info = pack_info(ray_indices, n_rays=n_rays)\n        transmittance = _RenderingTransmittanceFromDensityNaive.apply(\n            packed_info, t_starts, t_ends, sigmas\n        )\n    return transmittance\n\n\ndef render_transmittance_from_alpha(\n    alphas: Tensor,\n    *,\n    packed_info: Optional[torch.Tensor] = None,\n    ray_indices: Optional[torch.Tensor] = None,\n    n_rays: Optional[int] = None,\n) -> Tensor:\n    \"\"\"Compute transmittance :math:`T_i` from alpha :math:`\\\\alpha_i`.\n    \n    .. math::\n        T_i = \\\\prod_{j=1}^{i-1}(1-\\\\alpha_j)\n\n    Note:\n        Either `ray_indices` or `packed_info` should be provided. If `ray_indices` is \n        provided, CUB acceleration will be used if available (CUDA >= 11.6). Otherwise,\n        we will use the naive implementation with `packed_info`.\n\n    Args:\n        alphas: The opacity values of the samples. Tensor with shape (n_samples, 1).\n        packed_info: Optional. Stores information on which samples belong to the same ray. \\\n            See :func:`nerfacc.ray_marching` for details. LongTensor with shape (n_rays, 2).\n        ray_indices: Optional. Ray index of each sample. LongTensor with shape (n_sample).\n        n_rays: Optional. Number of rays. Only useful when `ray_indices` is provided yet \\\n            CUB acceleration is not available. We will implicitly convert `ray_indices` to \\\n            `packed_info` and use the naive implementation. If not provided, we will infer \\\n            it from `ray_indices` but it will be slower.\n\n    Returns:\n        The rendering transmittance. Tensor with shape (n_sample, 1).\n\n    Examples:\n\n    .. code-block:: python\n\n        >>> alphas = torch.tensor( \n        >>>     [[0.4], [0.8], [0.1], [0.8], [0.1], [0.0], [0.9]], device=\"cuda\"))\n        >>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device=\"cuda\")\n        >>> transmittance = render_transmittance_from_alpha(alphas, ray_indices=ray_indices)\n        tensor([[1.0], [0.6], [0.12], [1.0], [0.2], [1.0], [1.0]])\n\n    \"\"\"\n    assert (\n        ray_indices is not None or packed_info is not None\n    ), \"Either ray_indices or packed_info should be provided.\"\n    if ray_indices is not None and _C.is_cub_available():\n        transmittance = _RenderingTransmittanceFromAlphaCUB.apply(\n            ray_indices, alphas\n        )\n    else:\n        if packed_info is None:\n            packed_info = pack_info(ray_indices, n_rays=n_rays)\n        transmittance = _RenderingTransmittanceFromAlphaNaive.apply(\n            packed_info, alphas\n        )\n    return transmittance\n\n\ndef render_weight_from_density(\n    t_starts: Tensor,\n    t_ends: Tensor,\n    sigmas: Tensor,\n    *,\n    packed_info: Optional[torch.Tensor] = None,\n    ray_indices: Optional[torch.Tensor] = None,\n    n_rays: Optional[int] = None,\n) -> torch.Tensor:\n    \"\"\"Compute rendering weights :math:`w_i` from density :math:`\\\\sigma_i` and interval :math:`\\\\delta_i`.\n    \n    .. math::\n        w_i = T_i(1 - exp(-\\\\sigma_i\\delta_i)), \\\\quad\\\\textrm{where}\\\\quad T_i = exp(-\\\\sum_{j=1}^{i-1}\\\\sigma_j\\delta_j)\n\n    Note:\n        Either `ray_indices` or `packed_info` should be provided. If `ray_indices` is \n        provided, CUB acceleration will be used if available (CUDA >= 11.6). Otherwise,\n        we will use the naive implementation with `packed_info`.\n\n    Args:\n        t_starts: Where the frustum-shape sample starts along a ray. Tensor with \\\n            shape (n_samples, 1).\n        t_ends: Where the frustum-shape sample ends along a ray. Tensor with \\\n            shape (n_samples, 1).\n        sigmas: The density values of the samples. Tensor with shape (n_samples, 1).\n        packed_info: Optional. Stores information on which samples belong to the same ray. \\\n            See :func:`nerfacc.ray_marching` for details. LongTensor with shape (n_rays, 2).\n        ray_indices: Optional. Ray index of each sample. LongTensor with shape (n_sample).\n        n_rays: Optional. Number of rays. Only useful when `ray_indices` is provided yet \\\n            CUB acceleration is not available. We will implicitly convert `ray_indices` to \\\n            `packed_info` and use the naive implementation. If not provided, we will infer \\\n            it from `ray_indices` but it will be slower.\n\n    Returns:\n        The rendering weights. Tensor with shape (n_sample, 1).\n\n    Examples:\n\n    .. code-block:: python\n\n        >>> t_starts = torch.tensor(\n        >>>     [[0.0], [1.0], [2.0], [3.0], [4.0], [5.0], [6.0]], device=\"cuda\")\n        >>> t_ends = torch.tensor(\n        >>>     [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0], [7.0]], device=\"cuda\")\n        >>> sigmas = torch.tensor(\n        >>>     [[0.4], [0.8], [0.1], [0.8], [0.1], [0.0], [0.9]], device=\"cuda\")\n        >>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device=\"cuda\")\n        >>> weights = render_weight_from_density(\n        >>>     t_starts, t_ends, sigmas, ray_indices=ray_indices)\n        [[0.33], [0.37], [0.03], [0.55], [0.04], [0.00], [0.59]]\n    \n    \"\"\"\n    assert (\n        ray_indices is not None or packed_info is not None\n    ), \"Either ray_indices or packed_info should be provided.\"\n    if ray_indices is not None and _C.is_cub_available():\n        transmittance = _RenderingTransmittanceFromDensityCUB.apply(\n            ray_indices, t_starts, t_ends, sigmas\n        )\n        alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts))\n        weights = transmittance * alphas\n    else:\n        if packed_info is None:\n            packed_info = pack_info(ray_indices, n_rays=n_rays)\n        weights = _RenderingWeightFromDensityNaive.apply(\n            packed_info, t_starts, t_ends, sigmas\n        )\n    return weights\n\n\ndef render_weight_from_alpha_patch_based(\n        alphas: Tensor,\n        ray_indices: Tensor,\n        *,\n        # packed_info: Optional[torch.Tensor] = None\n        n_rays: Optional[int] = None,\n) -> torch.Tensor:\n    \"\"\"Compute rendering weights :math:`w_i` from opacity :math:`\\\\alpha_i`.\n\n    .. math::\n        w_i = T_i\\\\alpha_i, \\\\quad\\\\textrm{where}\\\\quad T_i = \\\\prod_{j=1}^{i-1}(1-\\\\alpha_j)\n\n    Note:\n        Either `ray_indices` or `packed_info` should be provided. If `ray_indices` is\n        provided, CUB acceleration will be used if available (CUDA >= 11.6). Otherwise,\n        we will use the naive implementation with `packed_info`.\n\n    Args:\n        alphas: The opacity values of the samples. Tensor with shape (n_samples, 1).\n        packed_info: Optional. Stores information on which samples belong to the same ray. \\\n            See :func:`nerfacc.ray_marching` for details. LongTensor with shape (n_rays, 2).\n        ray_indices: Optional. Ray index of each sample. LongTensor with shape (n_sample).\n        n_rays: Optional. Number of rays. Only useful when `ray_indices` is provided yet \\\n            CUB acceleration is not available. We will implicitly convert `ray_indices` to \\\n            `packed_info` and use the naive implementation. If not provided, we will infer \\\n            it from `ray_indices` but it will be slower.\n\n    Returns:\n        The rendering weights. Tensor with shape (n_sample, 1).\n\n    Examples:\n\n    .. code-block:: python\n\n        >>> alphas = torch.tensor(\n        >>>     [[0.4], [0.8], [0.1], [0.8], [0.1], [0.0], [0.9]], device=\"cuda\"))\n        >>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device=\"cuda\")\n        >>> weights = render_weight_from_alpha(alphas, ray_indices=ray_indices)\n        tensor([[0.4], [0.48], [0.012], [0.8], [0.02], [0.0], [0.9]])\n\n    \"\"\"\n    packed_info = pack_info(ray_indices, n_rays=n_rays)\n    weights = _RenderingWeightFromAlphaPatchBasedNaive.apply(packed_info, alphas)\n    return weights\n\n\ndef render_weight_and_transmittance_from_alpha_patch_based(\n        alphas: Tensor,\n        ray_indices: Tensor,\n        *,\n        # packed_info: Optional[torch.Tensor] = None\n        n_rays: Optional[int] = None,\n) -> torch.Tensor:\n    \"\"\"Compute rendering weights :math:`w_i` from opacity :math:`\\\\alpha_i`.\n\n    .. math::\n        w_i = T_i\\\\alpha_i, \\\\quad\\\\textrm{where}\\\\quad T_i = \\\\prod_{j=1}^{i-1}(1-\\\\alpha_j)\n\n    Note:\n        Either `ray_indices` or `packed_info` should be provided. If `ray_indices` is\n        provided, CUB acceleration will be used if available (CUDA >= 11.6). Otherwise,\n        we will use the naive implementation with `packed_info`.\n\n    Args:\n        alphas: The opacity values of the samples. Tensor with shape (n_samples, 1).\n        packed_info: Optional. Stores information on which samples belong to the same ray. \\\n            See :func:`nerfacc.ray_marching` for details. LongTensor with shape (n_rays, 2).\n        ray_indices: Optional. Ray index of each sample. LongTensor with shape (n_sample).\n        n_rays: Optional. Number of rays. Only useful when `ray_indices` is provided yet \\\n            CUB acceleration is not available. We will implicitly convert `ray_indices` to \\\n            `packed_info` and use the naive implementation. If not provided, we will infer \\\n            it from `ray_indices` but it will be slower.\n\n    Returns:\n        The rendering weights. Tensor with shape (n_sample, 1).\n\n    Examples:\n\n    .. code-block:: python\n\n        >>> alphas = torch.tensor(\n        >>>     [[0.4], [0.8], [0.1], [0.8], [0.1], [0.0], [0.9]], device=\"cuda\"))\n        >>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device=\"cuda\")\n        >>> weights = render_weight_from_alpha(alphas, ray_indices=ray_indices)\n        tensor([[0.4], [0.48], [0.012], [0.8], [0.02], [0.0], [0.9]])\n\n    \"\"\"\n    packed_info = pack_info(ray_indices, n_rays=n_rays)\n    weights, transmittance = _RenderingWeightAndTransmittanceFromAlphaPatchBasedNaive.apply(packed_info, alphas)\n    return weights, transmittance\n\ndef render_weight_from_alpha(\n    alphas: Tensor,\n    *,\n    packed_info: Optional[torch.Tensor] = None,\n    ray_indices: Optional[torch.Tensor] = None,\n    n_rays: Optional[int] = None,\n) -> torch.Tensor:\n    \"\"\"Compute rendering weights :math:`w_i` from opacity :math:`\\\\alpha_i`.\n    \n    .. math::\n        w_i = T_i\\\\alpha_i, \\\\quad\\\\textrm{where}\\\\quad T_i = \\\\prod_{j=1}^{i-1}(1-\\\\alpha_j)\n\n    Note:\n        Either `ray_indices` or `packed_info` should be provided. If `ray_indices` is \n        provided, CUB acceleration will be used if available (CUDA >= 11.6). Otherwise,\n        we will use the naive implementation with `packed_info`.\n\n    Args:\n        alphas: The opacity values of the samples. Tensor with shape (n_samples, 1).\n        packed_info: Optional. Stores information on which samples belong to the same ray. \\\n            See :func:`nerfacc.ray_marching` for details. LongTensor with shape (n_rays, 2).\n        ray_indices: Optional. Ray index of each sample. LongTensor with shape (n_sample).\n        n_rays: Optional. Number of rays. Only useful when `ray_indices` is provided yet \\\n            CUB acceleration is not available. We will implicitly convert `ray_indices` to \\\n            `packed_info` and use the naive implementation. If not provided, we will infer \\\n            it from `ray_indices` but it will be slower.\n\n    Returns:\n        The rendering weights. Tensor with shape (n_sample, 1).\n\n    Examples:\n\n    .. code-block:: python\n\n        >>> alphas = torch.tensor( \n        >>>     [[0.4], [0.8], [0.1], [0.8], [0.1], [0.0], [0.9]], device=\"cuda\"))\n        >>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device=\"cuda\")\n        >>> weights = render_weight_from_alpha(alphas, ray_indices=ray_indices)\n        tensor([[0.4], [0.48], [0.012], [0.8], [0.02], [0.0], [0.9]])\n\n    \"\"\"\n    assert (\n        ray_indices is not None or packed_info is not None\n    ), \"Either ray_indices or packed_info should be provided.\"\n    if ray_indices is not None and _C.is_cub_available():\n        transmittance = _RenderingTransmittanceFromAlphaCUB.apply(\n            ray_indices, alphas\n        )\n        weights = transmittance * alphas\n    else:\n        if packed_info is None:\n            packed_info = pack_info(ray_indices, n_rays=n_rays)\n        weights = _RenderingWeightFromAlphaNaive.apply(packed_info, alphas)\n    return weights\n\n\n@torch.no_grad()\ndef render_visibility(\n    alphas: torch.Tensor,\n    *,\n    ray_indices: Optional[torch.Tensor] = None,\n    packed_info: Optional[torch.Tensor] = None,\n    n_rays: Optional[int] = None,\n    early_stop_eps: float = 1e-4,\n    alpha_thre: float = 0.0,\n) -> torch.Tensor:\n    \"\"\"Filter out transparent and occluded samples.\n\n    In this function, we first compute the transmittance from the sample opacity. The\n    transmittance is then used to filter out occluded samples. And opacity is used to\n    filter out transparent samples. The function returns a boolean tensor indicating\n    which samples are visible (`transmittance > early_stop_eps` and `opacity > alpha_thre`).\n\n    Note:\n        Either `ray_indices` or `packed_info` should be provided. If `ray_indices` is \n        provided, CUB acceleration will be used if available (CUDA >= 11.6). Otherwise,\n        we will use the naive implementation with `packed_info`.\n\n    Args:\n        alphas: The opacity values of the samples. Tensor with shape (n_samples, 1).\n        packed_info: Optional. Stores information on which samples belong to the same ray. \\\n            See :func:`nerfacc.ray_marching` for details. LongTensor with shape (n_rays, 2).\n        ray_indices: Optional. Ray index of each sample. LongTensor with shape (n_sample).\n        n_rays: Optional. Number of rays. Only useful when `ray_indices` is provided yet \\\n            CUB acceleration is not available. We will implicitly convert `ray_indices` to \\\n            `packed_info` and use the naive implementation. If not provided, we will infer \\\n            it from `ray_indices` but it will be slower.\n        early_stop_eps: The early stopping threshold on transmittance.\n        alpha_thre: The threshold on opacity.\n    \n    Returns:\n        The visibility of each sample. Tensor with shape (n_samples, 1).\n\n    Examples:\n\n    .. code-block:: python\n\n        >>> alphas = torch.tensor( \n        >>>     [[0.4], [0.8], [0.1], [0.8], [0.1], [0.0], [0.9]], device=\"cuda\")\n        >>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device=\"cuda\")\n        >>> transmittance = render_transmittance_from_alpha(alphas, ray_indices=ray_indices)\n        tensor([[1.0], [0.6], [0.12], [1.0], [0.2], [1.0], [1.0]])\n        >>> visibility = render_visibility(\n        >>>     alphas, ray_indices=ray_indices, early_stop_eps=0.3, alpha_thre=0.2)\n        tensor([True,  True, False,  True, False, False,  True])\n\n    \"\"\"\n    assert (\n        ray_indices is not None or packed_info is not None\n    ), \"Either ray_indices or packed_info should be provided.\"\n    if ray_indices is not None and _C.is_cub_available():\n        transmittance = _RenderingTransmittanceFromAlphaCUB.apply(\n            ray_indices, alphas\n        )\n    else:\n        if packed_info is None:\n            packed_info = pack_info(ray_indices, n_rays=n_rays)\n        transmittance = _RenderingTransmittanceFromAlphaNaive.apply(\n            packed_info, alphas\n        )\n    visibility = transmittance >= early_stop_eps\n    if alpha_thre > 0:\n        visibility = visibility & (alphas >= alpha_thre)\n    visibility = visibility.squeeze(-1)\n    return visibility\n\n\n@torch.no_grad()\ndef render_visibility_patch_based(\n        alphas: torch.Tensor,\n        *,\n        ray_indices: Optional[torch.Tensor] = None,\n        packed_info: Optional[torch.Tensor] = None,\n        n_patches: Optional[int] = None,\n        early_stop_eps: float = 1e-4,\n        alpha_thre: float = 0.0,\n) -> torch.Tensor:\n    \"\"\"Filter out transparent and occluded samples.\n\n    In this function, we first compute the transmittance from the sample opacity. The\n    transmittance is then used to filter out occluded samples. And opacity is used to\n    filter out transparent samples. The function returns a boolean tensor indicating\n    which samples are visible (`transmittance > early_stop_eps` and `opacity > alpha_thre`).\n\n    Note:\n        Either `ray_indices` or `packed_info` should be provided. If `ray_indices` is\n        provided, CUB acceleration will be used if available (CUDA >= 11.6). Otherwise,\n        we will use the naive implementation with `packed_info`.\n\n    Args:\n        alphas: The opacity values of the samples. Tensor with shape (n_samples, 1).\n        packed_info: Optional. Stores information on which samples belong to the same ray. \\\n            See :func:`nerfacc.ray_marching` for details. LongTensor with shape (n_rays, 2).\n        ray_indices: Optional. Ray index of each sample. LongTensor with shape (n_sample).\n        n_rays: Optional. Number of rays. Only useful when `ray_indices` is provided yet \\\n            CUB acceleration is not available. We will implicitly convert `ray_indices` to \\\n            `packed_info` and use the naive implementation. If not provided, we will infer \\\n            it from `ray_indices` but it will be slower.\n        early_stop_eps: The early stopping threshold on transmittance.\n        alpha_thre: The threshold on opacity.\n\n    Returns:\n        The visibility of each sample. Tensor with shape (n_samples, 1).\n\n    Examples:\n\n    .. code-block:: python\n\n        >>> alphas = torch.tensor(\n        >>>     [[0.4], [0.8], [0.1], [0.8], [0.1], [0.0], [0.9]], device=\"cuda\")\n        >>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device=\"cuda\")\n        >>> transmittance = render_transmittance_from_alpha(alphas, ray_indices=ray_indices)\n        tensor([[1.0], [0.6], [0.12], [1.0], [0.2], [1.0], [1.0]])\n        >>> visibility = render_visibility(\n        >>>     alphas, ray_indices=ray_indices, early_stop_eps=0.3, alpha_thre=0.2)\n        tensor([True,  True, False,  True, False, False,  True])\n\n    \"\"\"\n    assert (\n            ray_indices is not None or packed_info is not None\n    ), \"Either ray_indices or packed_info should be provided.\"\n    if ray_indices is not None and _C.is_cub_available():\n        transmittance = _RenderingTransmittanceFromAlphaCUB.apply(\n            ray_indices, alphas\n        )\n    else:\n        if packed_info is None:\n            packed_info = pack_info(ray_indices, n_rays=n_patches)\n        transmittance = _RenderingTransmittanceFromAlphaPatchBasedNaive.apply(\n            packed_info, alphas\n        )\n    visibility = torch.any(transmittance >= early_stop_eps, dim=1, keepdim=True)\n    if alpha_thre > 0:\n        visibility = visibility & (alphas >= alpha_thre)\n    visibility = visibility.squeeze()\n    return visibility\n\nclass _RenderingTransmittanceFromDensityCUB(torch.autograd.Function):\n    \"\"\"Rendering transmittance from density with CUB implementation.\"\"\"\n\n    @staticmethod\n    def forward(ctx, ray_indices, t_starts, t_ends, sigmas):\n        ray_indices = ray_indices.contiguous()\n        t_starts = t_starts.contiguous()\n        t_ends = t_ends.contiguous()\n        sigmas = sigmas.contiguous()\n        transmittance = _C.transmittance_from_sigma_forward_cub(\n            ray_indices, t_starts, t_ends, sigmas\n        )\n        if ctx.needs_input_grad[3]:\n            ctx.save_for_backward(ray_indices, t_starts, t_ends, transmittance)\n        return transmittance\n\n    @staticmethod\n    def backward(ctx, transmittance_grads):\n        transmittance_grads = transmittance_grads.contiguous()\n        ray_indices, t_starts, t_ends, transmittance = ctx.saved_tensors\n        grad_sigmas = _C.transmittance_from_sigma_backward_cub(\n            ray_indices, t_starts, t_ends, transmittance, transmittance_grads\n        )\n        return None, None, None, grad_sigmas\n\n\nclass _RenderingTransmittanceFromDensityNaive(torch.autograd.Function):\n    \"\"\"Rendering transmittance from density with naive forloop.\"\"\"\n\n    @staticmethod\n    def forward(ctx, packed_info, t_starts, t_ends, sigmas):\n        packed_info = packed_info.contiguous()\n        t_starts = t_starts.contiguous()\n        t_ends = t_ends.contiguous()\n        sigmas = sigmas.contiguous()\n        transmittance = _C.transmittance_from_sigma_forward_naive(\n            packed_info, t_starts, t_ends, sigmas\n        )\n        if ctx.needs_input_grad[3]:\n            ctx.save_for_backward(packed_info, t_starts, t_ends, transmittance)\n        return transmittance\n\n    @staticmethod\n    def backward(ctx, transmittance_grads):\n        transmittance_grads = transmittance_grads.contiguous()\n        packed_info, t_starts, t_ends, transmittance = ctx.saved_tensors\n        grad_sigmas = _C.transmittance_from_sigma_backward_naive(\n            packed_info, t_starts, t_ends, transmittance, transmittance_grads\n        )\n        return None, None, None, grad_sigmas\n\n\nclass _RenderingTransmittanceFromAlphaCUB(torch.autograd.Function):\n    \"\"\"Rendering transmittance from opacity with CUB implementation.\"\"\"\n\n    @staticmethod\n    def forward(ctx, ray_indices, alphas):\n        ray_indices = ray_indices.contiguous()\n        alphas = alphas.contiguous()\n        transmittance = _C.transmittance_from_alpha_forward_cub(\n            ray_indices, alphas\n        )\n        if ctx.needs_input_grad[1]:\n            ctx.save_for_backward(ray_indices, transmittance, alphas)\n        return transmittance\n\n    @staticmethod\n    def backward(ctx, transmittance_grads):\n        transmittance_grads = transmittance_grads.contiguous()\n        ray_indices, transmittance, alphas = ctx.saved_tensors\n        grad_alphas = _C.transmittance_from_alpha_backward_cub(\n            ray_indices, alphas, transmittance, transmittance_grads\n        )\n        return None, grad_alphas\n\n\nclass _RenderingTransmittanceFromAlphaNaive(torch.autograd.Function):\n    \"\"\"Rendering transmittance from opacity with naive forloop.\"\"\"\n\n    @staticmethod\n    def forward(ctx, packed_info, alphas):\n        packed_info = packed_info.contiguous()\n        alphas = alphas.contiguous()\n        transmittance = _C.transmittance_from_alpha_forward_naive(\n            packed_info, alphas\n        )\n        if ctx.needs_input_grad[1]:\n            ctx.save_for_backward(packed_info, transmittance, alphas)\n        return transmittance\n\n    @staticmethod\n    def backward(ctx, transmittance_grads):\n        transmittance_grads = transmittance_grads.contiguous()\n        packed_info, transmittance, alphas = ctx.saved_tensors\n        grad_alphas = _C.transmittance_from_alpha_backward_naive(\n            packed_info, alphas, transmittance, transmittance_grads\n        )\n        return None, grad_alphas\n\nclass _RenderingTransmittanceFromAlphaPatchBasedNaive(torch.autograd.Function):\n    \"\"\"Rendering weight from opacity with naive forloop.\"\"\"\n\n    @staticmethod\n    def forward(ctx, packed_info, alphas):\n        packed_info = packed_info.contiguous()\n        alphas = alphas.contiguous()\n        transmittance = _C.transmittance_from_alpha_patch_based_forward_naive(packed_info, alphas)\n        if ctx.needs_input_grad[1]:\n            ctx.save_for_backward(packed_info, transmittance, alphas)\n        return transmittance\n\n    @staticmethod\n    def backward(ctx, grad_transmittance):\n        grad_transmittance = grad_transmittance.contiguous()\n        packed_info, transmittance, alphas = ctx.saved_tensors\n        grad_alphas = _C.weight_and_transmittance_from_alpha_patch_based_backward_naive(\n            packed_info, alphas, transmittance, grad_transmittance\n        )\n        return None, grad_alphas\n\nclass _RenderingWeightFromDensityNaive(torch.autograd.Function):\n    \"\"\"Rendering weight from density with naive forloop.\"\"\"\n\n    @staticmethod\n    def forward(ctx, packed_info, t_starts, t_ends, sigmas):\n        packed_info = packed_info.contiguous()\n        t_starts = t_starts.contiguous()\n        t_ends = t_ends.contiguous()\n        sigmas = sigmas.contiguous()\n        weights = _C.weight_from_sigma_forward_naive(\n            packed_info, t_starts, t_ends, sigmas\n        )\n        if ctx.needs_input_grad[3]:\n            ctx.save_for_backward(\n                packed_info, t_starts, t_ends, sigmas, weights\n            )\n        return weights\n\n    @staticmethod\n    def backward(ctx, grad_weights):\n        grad_weights = grad_weights.contiguous()\n        packed_info, t_starts, t_ends, sigmas, weights = ctx.saved_tensors\n        grad_sigmas = _C.weight_from_sigma_backward_naive(\n            weights, grad_weights, packed_info, t_starts, t_ends, sigmas\n        )\n        return None, None, None, grad_sigmas\n\n\nclass _RenderingWeightFromAlphaNaive(torch.autograd.Function):\n    \"\"\"Rendering weight from opacity with naive forloop.\"\"\"\n\n    @staticmethod\n    def forward(ctx, packed_info, alphas):\n        packed_info = packed_info.contiguous()\n        alphas = alphas.contiguous()\n        weights = _C.weight_from_alpha_forward_naive(packed_info, alphas)\n        if ctx.needs_input_grad[1]:\n            ctx.save_for_backward(packed_info, alphas, weights)\n        return weights\n\n    @staticmethod\n    def backward(ctx, grad_weights):\n        grad_weights = grad_weights.contiguous()\n        packed_info, alphas, weights = ctx.saved_tensors\n        grad_alphas = _C.weight_from_alpha_backward_naive(\n            weights, grad_weights, packed_info, alphas\n        )\n        return None, grad_alphas\n\nclass _RenderingWeightFromAlphaPatchBasedNaive(torch.autograd.Function):\n    \"\"\"Rendering weight from opacity with naive forloop.\"\"\"\n\n    @staticmethod\n    def forward(ctx, packed_info, alphas):\n        packed_info = packed_info.contiguous()\n        alphas = alphas.contiguous()\n        weights = _C.weight_from_alpha_patch_based_forward_naive(packed_info, alphas)\n        # print(weights.shape, transmittance.shape)\n        if ctx.needs_input_grad[1]:\n            ctx.save_for_backward(packed_info, alphas, weights)\n        return weights\n\n    @staticmethod\n    def backward(ctx, grad_weights):\n        grad_weights = grad_weights.contiguous()\n        packed_info, alphas, weights = ctx.saved_tensors\n        grad_alphas = _C.weight_from_alpha_patch_based_backward_naive(\n            weights, grad_weights, packed_info, alphas\n        )\n        return None, grad_alphas\n\n\nclass _RenderingWeightAndTransmittanceFromAlphaPatchBasedNaive(torch.autograd.Function):\n    \"\"\"Rendering weight from opacity with naive forloop.\"\"\"\n\n    @staticmethod\n    def forward(ctx, packed_info, alphas):\n        packed_info = packed_info.contiguous()\n        alphas = alphas.contiguous()\n        weights, transmittance = _C.weight_and_transmittance_from_alpha_patch_based_forward_naive(packed_info, alphas)\n        # print(weights.shape, transmittance.shape)\n        if ctx.needs_input_grad[1]:\n            ctx.save_for_backward(packed_info, alphas, weights)\n        return weights, transmittance\n\n    @staticmethod\n    def backward(ctx, grad_weights, grad_transmittance):\n        grad_weights = grad_weights.contiguous()\n        packed_info, alphas, weights = ctx.saved_tensors\n        grad_alphas = _C.weight_and_transmittance_from_alpha_patch_based_backward_naive(\n            weights, grad_weights, packed_info, alphas\n        )\n        return None, grad_alphas\n\n\nclass _RenderingWeightFromAlphaImportanceSamplingNaive(torch.autograd.Function):\n    \"\"\"Rendering weight from opacity with naive forloop.\"\"\"\n\n    @staticmethod\n    def forward(ctx, packed_info, alphas, importance_pdfs):\n        packed_info = packed_info.contiguous()\n        alphas = alphas.contiguous()\n        importance_pdfs = importance_pdfs.contiguous()\n        weights = _C.weight_from_alpha_importance_sampling_forward_naive(packed_info, alphas, importance_pdfs)\n        if ctx.needs_input_grad[1]:\n            ctx.save_for_backward(packed_info, alphas, importance_pdfs, weights)\n        return weights\n\n    @staticmethod\n    def backward(ctx, grad_weights):\n        grad_weights = grad_weights.contiguous()\n        packed_info, alphas, importance_pdfs, weights = ctx.saved_tensors\n        grad_alphas = _C.weight_from_alpha_backward_naive(\n            weights, grad_weights, packed_info, alphas, importance_pdfs\n        )\n        return None, grad_alphas"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/scripts/run_aws_listing.py",
    "content": "import argparse\nimport os\n\nfrom boto3 import client\n\nparser = argparse.ArgumentParser()\nparser.add_argument(\"--access_key_id\", type=str, required=True)\nparser.add_argument(\"--secret_access_key\", type=str, required=True)\nparser.add_argument(\"--bucket\", type=str, required=True)\nparser.add_argument(\"--region\", type=str, required=True)\nargs = parser.parse_args()\n\nURL = f\"https://{args.bucket}.s3.{args.region}.amazonaws.com/\"\n\ns3 = client(\n    \"s3\",\n    aws_access_key_id=args.access_key_id,\n    aws_secret_access_key=args.secret_access_key,\n)\n\nresponses = s3.list_objects_v2(Bucket=args.bucket, Prefix=\"whl/\")[\"Contents\"]\n\nsubdirectories = {}\nfor data in responses:\n    splits = data[\"Key\"].split(\"/\")\n    if len(splits) == 3:\n        subdirectories[splits[1]] = []\n\nfor dir in subdirectories.keys():\n    responses = s3.list_objects_v2(Bucket=args.bucket, Prefix=f\"whl/{dir}\")[\n        \"Contents\"\n    ]\n    for data in responses:\n        splits = data[\"Key\"].split(\"/\")\n        if len(splits) == 3:\n            subdirectories[dir].append(splits[2])\n\nfor dir, files in subdirectories.items():\n    lines = \"\"\n    for file in files:\n        href = os.path.join(URL, \"whl\", dir, file)\n        lines += f\"<a href='{href}'>{file}</a>\\n<br>\\n\"\n\n    html = f\"<html>\\n<head></head>\\n<body>\\n{lines}\\n</body>\\n</html>\\n\"\n    html_file = f\"/tmp/{dir}.html\"\n    with open(html_file, \"w\") as f:\n        f.write(html)\n\n    s3.upload_file(\n        html_file,\n        args.bucket,\n        f\"whl/{dir}.html\",\n        ExtraArgs={\"ContentType\": \"text/html\"},\n    )\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/scripts/run_dev_checks.py",
    "content": "#!/usr/bin/env python\n\"\"\"Simple yaml debugger\"\"\"\nimport subprocess\n\nimport yaml\nfrom rich.console import Console\nfrom rich.style import Style\n\nconsole = Console(width=120)\n\nLOCAL_TESTS = [\n    \"Run license checks\",\n    \"Run isort\",\n    \"Run Black\",\n    \"Python Pylint\",\n    \"Test with pytest\",\n]\n\n\ndef run_command(command: str) -> bool:\n    \"\"\"Run a command kill actions if it fails\n\n    Args:\n        command: command to run\n        continue_on_fail: whether to continue running commands if the current one fails.\n    \"\"\"\n    ret_code = subprocess.call(command, shell=True)\n    if ret_code != 0:\n        console.print(f\"[bold red]Error: `{command}` failed.\")\n    return ret_code == 0\n\n\ndef run_github_actions_file(filename: str):\n    \"\"\"Run a github actions file locally.\n\n    Args:\n        filename: Which yml github actions file to run.\n    \"\"\"\n    with open(filename, \"rb\") as f:\n        my_dict = yaml.safe_load(f)\n    steps = my_dict[\"jobs\"][\"build\"][\"steps\"]\n\n    success = True\n\n    for step in steps:\n        if \"name\" in step and step[\"name\"] in LOCAL_TESTS:\n            compressed = step[\"run\"].replace(\"\\n\", \";\").replace(\"\\\\\", \"\")\n            compressed = compressed.replace(\"--check\", \"\")\n            curr_command = f\"{compressed}\"\n\n            console.line()\n            console.rule(f\"[bold green]Running: {curr_command}\")\n            success = success and run_command(curr_command)\n        else:\n            skip_name = step[\"name\"] if \"name\" in step else step[\"uses\"]\n            console.print(f\"Skipping {skip_name}\")\n\n    # Code Testing\n    console.line()\n    console.rule(\"[bold green]Running pytest\")\n    success = success and run_command(\"pytest\")\n\n    # Add checks for building documentation\n    console.line()\n    console.rule(\"[bold green]Building Documentation\")\n    success = success and run_command(\n        \"cd docs/; make clean; make html SPHINXOPTS='-W;'\"\n    )\n\n    if success:\n        console.line()\n        console.rule(characters=\"=\")\n        console.print(\n            \"[bold green]:TADA: :TADA: :TADA: ALL CHECKS PASSED :TADA: :TADA: :TADA:\",\n            justify=\"center\",\n        )\n        console.rule(characters=\"=\")\n    else:\n        console.line()\n        console.rule(characters=\"=\", style=Style(color=\"red\"))\n        console.print(\n            \"[bold red]:skull: :skull: :skull: ERRORS FOUND :skull: :skull: :skull:\",\n            justify=\"center\",\n        )\n        console.rule(characters=\"=\", style=Style(color=\"red\"))\n\n\nif __name__ == \"__main__\":\n    run_github_actions_file(filename=\".github/workflows/code_checks.yml\")\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/scripts/run_profiler.py",
    "content": "from typing import Callable\n\nimport torch\nimport tqdm\n\nimport nerfacc\n\n# timing\n# https://github.com/pytorch/pytorch/commit/d2784c233bfc57a1d836d961694bcc8ec4ed45e4\n\n\nclass Profiler:\n    def __init__(self, warmup=10, repeat=1000):\n        self.warmup = warmup\n        self.repeat = repeat\n\n    def __call__(self, func: Callable):\n        # warmup\n        for _ in range(self.warmup):\n            func()\n        torch.cuda.synchronize()\n\n        # profile\n        with torch.profiler.profile(\n            activities=[\n                torch.profiler.ProfilerActivity.CPU,\n                torch.profiler.ProfilerActivity.CUDA,\n            ],\n            profile_memory=True,\n        ) as prof:\n            for _ in range(self.repeat):\n                func()\n            torch.cuda.synchronize()\n\n        # return\n        events = prof.key_averages()\n        # print(events.table(sort_by=\"self_cpu_time_total\", row_limit=10))\n        self_cpu_time_total = (\n            sum([event.self_cpu_time_total for event in events]) / self.repeat\n        )\n        self_cuda_time_total = (\n            sum([event.self_cuda_time_total for event in events]) / self.repeat\n        )\n        self_cuda_memory_usage = max(\n            [event.self_cuda_memory_usage for event in events]\n        )\n        return (\n            self_cpu_time_total,  # in us\n            self_cuda_time_total,  # in us\n            self_cuda_memory_usage,  # in bytes\n        )\n\n\ndef main():\n    device = \"cuda:0\"\n    torch.manual_seed(42)\n    profiler = Profiler(warmup=10, repeat=100)\n\n    # # contract\n    # print(\"* contract\")\n    # x = torch.rand([1024, 3], device=device)\n    # roi = torch.tensor([0, 0, 0, 1, 1, 1], dtype=torch.float32, device=device)\n    # fn = lambda: nerfacc.contract(\n    #     x, roi=roi, type=nerfacc.ContractionType.UN_BOUNDED_TANH\n    # )\n    # cpu_t, cuda_t, cuda_bytes = profiler(fn)\n    # print(f\"{cpu_t:.2f} us, {cuda_t:.2f} us, {cuda_bytes / 1024 / 1024:.2f} MB\")\n\n    # rendering\n    print(\"* rendering\")\n    batch_size = 81920\n    rays_o = torch.rand((batch_size, 3), device=device)\n    rays_d = torch.randn((batch_size, 3), device=device)\n    rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)\n\n    ray_indices, t_starts, t_ends = nerfacc.ray_marching(\n        rays_o,\n        rays_d,\n        near_plane=0.1,\n        far_plane=1.0,\n        render_step_size=1e-1,\n    )\n    sigmas = torch.randn_like(t_starts, requires_grad=True)\n    fn = (\n        lambda: nerfacc.render_weight_from_density(\n            ray_indices, t_starts, t_ends, sigmas\n        )\n        .sum()\n        .backward()\n    )\n    fn()\n    torch.cuda.synchronize()\n    for _ in tqdm.tqdm(range(100)):\n        fn()\n        torch.cuda.synchronize()\n\n    cpu_t, cuda_t, cuda_bytes = profiler(fn)\n    print(f\"{cpu_t:.2f} us, {cuda_t:.2f} us, {cuda_bytes / 1024 / 1024:.2f} MB\")\n\n    packed_info = nerfacc.pack_info(ray_indices, n_rays=batch_size)\n    fn = (\n        lambda: nerfacc.vol_rendering._RenderingDensity.apply(\n            packed_info, t_starts, t_ends, sigmas, 0\n        )\n        .sum()\n        .backward()\n    )\n    fn()\n    torch.cuda.synchronize()\n    for _ in tqdm.tqdm(range(100)):\n        fn()\n        torch.cuda.synchronize()\n    cpu_t, cuda_t, cuda_bytes = profiler(fn)\n    print(f\"{cpu_t:.2f} us, {cuda_t:.2f} us, {cuda_bytes / 1024 / 1024:.2f} MB\")\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/setup.cfg",
    "content": "[isort]\nmulti_line_output = 3\nline_length = 80\ninclude_trailing_comma = true\nskip=./examples/pycolmap"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/setup.py",
    "content": "import glob\nimport os\nimport os.path as osp\nimport platform\nimport sys\n\nfrom setuptools import find_packages, setup\n\n__version__ = None\nexec(open(\"nerfacc/version.py\", \"r\").read())\n\nURL = \"https://github.com/KAIR-BAIR/nerfacc\"\n\nBUILD_NO_CUDA = os.getenv(\"BUILD_NO_CUDA\", \"0\") == \"1\"\nWITH_SYMBOLS = os.getenv(\"WITH_SYMBOLS\", \"0\") == \"1\"\n\n\ndef get_ext():\n    from torch.utils.cpp_extension import BuildExtension\n\n    return BuildExtension.with_options(\n        no_python_abi_suffix=True, use_ninja=False\n    )\n\n\ndef get_extensions():\n    import torch\n    from torch.__config__ import parallel_info\n    from torch.utils.cpp_extension import CUDAExtension\n\n    extensions_dir = osp.join(\"nerfacc\", \"cuda\", \"csrc\")\n    sources = glob.glob(osp.join(extensions_dir, \"*.cu\"))\n    # remove generated 'hip' files, in case of rebuilds\n    sources = [path for path in sources if \"hip\" not in path]\n\n    undef_macros = []\n    define_macros = []\n\n    if sys.platform == \"win32\":\n        define_macros += [(\"nerfacc_EXPORTS\", None)]\n\n    extra_compile_args = {\"cxx\": [\"-O3\"]}\n    if not os.name == \"nt\":  # Not on Windows:\n        extra_compile_args[\"cxx\"] += [\"-Wno-sign-compare\"]\n    extra_link_args = [] if WITH_SYMBOLS else [\"-s\"]\n\n    info = parallel_info()\n    if (\n        \"backend: OpenMP\" in info\n        and \"OpenMP not found\" not in info\n        and sys.platform != \"darwin\"\n    ):\n        extra_compile_args[\"cxx\"] += [\"-DAT_PARALLEL_OPENMP\"]\n        if sys.platform == \"win32\":\n            extra_compile_args[\"cxx\"] += [\"/openmp\"]\n        else:\n            extra_compile_args[\"cxx\"] += [\"-fopenmp\"]\n    else:\n        print(\"Compiling without OpenMP...\")\n\n    # Compile for mac arm64\n    if sys.platform == \"darwin\" and platform.machine() == \"arm64\":\n        extra_compile_args[\"cxx\"] += [\"-arch\", \"arm64\"]\n        extra_link_args += [\"-arch\", \"arm64\"]\n\n    nvcc_flags = os.getenv(\"NVCC_FLAGS\", \"\")\n    nvcc_flags = [] if nvcc_flags == \"\" else nvcc_flags.split(\" \")\n    nvcc_flags += [\"-O3\"]\n    if torch.version.hip:\n        # USE_ROCM was added to later versions of PyTorch.\n        # Define here to support older PyTorch versions as well:\n        define_macros += [(\"USE_ROCM\", None)]\n        undef_macros += [\"__HIP_NO_HALF_CONVERSIONS__\"]\n    else:\n        nvcc_flags += [\"--expt-relaxed-constexpr\"]\n    extra_compile_args[\"nvcc\"] = nvcc_flags\n\n    extension = CUDAExtension(\n        f\"nerfacc.csrc\",\n        sources,\n        include_dirs=[osp.join(extensions_dir, \"include\")],\n        define_macros=define_macros,\n        undef_macros=undef_macros,\n        extra_compile_args=extra_compile_args,\n        extra_link_args=extra_link_args,\n    )\n\n    return [extension]\n\n\n# work-around hipify abs paths\ninclude_package_data = True\n# if torch.cuda.is_available() and torch.version.hip:\n#     include_package_data = False\n\nsetup(\n    name=\"nerfacc\",\n    version=__version__,\n    description=\"A General NeRF Acceleration Toolbox\",\n    author=\"Ruilong\",\n    author_email=\"ruilongli94@gmail.com\",\n    url=URL,\n    download_url=f\"{URL}/archive/{__version__}.tar.gz\",\n    keywords=[],\n    python_requires=\">=3.7\",\n    install_requires=[\"rich>=12\", \"torch\"],\n    extras_require={\n        # dev dependencies. Install them by `pip install nerfacc[dev]`\n        \"dev\": [\n            \"black[jupyter]==22.3.0\",\n            \"isort==5.10.1\",\n            \"pylint==2.13.4\",\n            \"pytest==7.1.2\",\n            \"pytest-xdist==2.5.0\",\n            \"typeguard>=2.13.3\",\n            \"pyyaml==6.0\",\n            \"build\",\n            \"twine\",\n        ],\n    },\n    ext_modules=get_extensions() if not BUILD_NO_CUDA else [],\n    cmdclass={\"build_ext\": get_ext()} if not BUILD_NO_CUDA else {},\n    packages=find_packages(),\n    include_package_data=include_package_data,\n)\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/tests/test_contraction.py",
    "content": "import pytest\nimport torch\n\nimport nerfacc.cuda as _C\nfrom nerfacc import ContractionType, contract, contract_inv\n\ndevice = \"cuda:0\"\nbatch_size = 32\neps = 1e-6\n\n\n@pytest.mark.skipif(not torch.cuda.is_available, reason=\"No CUDA device\")\ndef test_ContractionType():\n    ctype = ContractionType.AABB.to_cpp_version()\n    assert ctype == _C.ContractionTypeGetter(0)\n    ctype = ContractionType.UN_BOUNDED_TANH.to_cpp_version()\n    assert ctype == _C.ContractionTypeGetter(1)\n    ctype = ContractionType.UN_BOUNDED_SPHERE.to_cpp_version()\n    assert ctype == _C.ContractionTypeGetter(2)\n\n\n@pytest.mark.skipif(not torch.cuda.is_available, reason=\"No CUDA device\")\ndef test_identity():\n    x = torch.rand([batch_size, 3], device=device)\n    roi = torch.tensor([0, 0, 0, 1, 1, 1], dtype=torch.float32, device=device)\n    x_out = contract(x, roi=roi, type=ContractionType.AABB)\n    assert torch.allclose(x_out, x, atol=eps)\n    x_inv = contract_inv(x_out, roi=roi, type=ContractionType.AABB)\n    assert torch.allclose(x_inv, x, atol=eps)\n\n\n@pytest.mark.skipif(not torch.cuda.is_available, reason=\"No CUDA device\")\ndef test_aabb():\n    x = torch.rand([batch_size, 3], device=device)\n    roi = torch.tensor(\n        [-1, -1, -1, 1, 1, 1], dtype=torch.float32, device=device\n    )\n    x_out = contract(x, roi=roi, type=ContractionType.AABB)\n    x_out_tgt = x * 0.5 + 0.5\n    assert torch.allclose(x_out, x_out_tgt, atol=eps)\n    x_inv = contract_inv(x_out, roi=roi, type=ContractionType.AABB)\n    assert torch.allclose(x_inv, x, atol=eps)\n\n\n@pytest.mark.skipif(not torch.cuda.is_available, reason=\"No CUDA device\")\ndef test_tanh():\n    x = torch.randn([batch_size, 3], device=device)\n    roi = torch.tensor(\n        [-0.2, -0.3, -0.4, 0.7, 0.8, 0.6], dtype=torch.float32, device=device\n    )\n    x_out = contract(x, roi=roi, type=ContractionType.UN_BOUNDED_TANH)\n    x_out_tgt = (\n        torch.tanh((x - roi[:3]) / (roi[3:] - roi[:3]) - 0.5) * 0.5 + 0.5\n    )\n    assert torch.allclose(x_out, x_out_tgt, atol=eps)\n    x_inv = contract_inv(x_out, roi=roi, type=ContractionType.UN_BOUNDED_TANH)\n    assert torch.allclose(x_inv, x, atol=eps)\n\n\n@pytest.mark.skipif(not torch.cuda.is_available, reason=\"No CUDA device\")\ndef test_sphere():\n    x = torch.randn([batch_size, 3], device=device)\n    roi = torch.tensor(\n        [-0.2, -0.3, -0.4, 0.7, 0.8, 0.6], dtype=torch.float32, device=device\n    )\n    x_out = contract(x, roi=roi, type=ContractionType.UN_BOUNDED_SPHERE)\n    assert ((x_out - 0.5).norm(dim=-1) < 0.5).all()\n    x_inv = contract_inv(x_out, roi=roi, type=ContractionType.UN_BOUNDED_SPHERE)\n    assert torch.allclose(x_inv, x, atol=eps)\n\n\nif __name__ == \"__main__\":\n    test_ContractionType()\n    test_identity()\n    test_aabb()\n    test_tanh()\n    test_sphere()\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/tests/test_grid.py",
    "content": "import pytest\nimport torch\n\nfrom nerfacc import OccupancyGrid\n\ndevice = \"cuda:0\"\n\n\n@pytest.mark.skipif(not torch.cuda.is_available, reason=\"No CUDA device\")\ndef occ_eval_fn(x: torch.Tensor) -> torch.Tensor:\n    \"\"\"Pesudo occupancy function: (N, 3) -> (N, 1).\"\"\"\n    return ((x - 0.5).norm(dim=-1, keepdim=True) < 0.5).float()\n\n\n@pytest.mark.skipif(not torch.cuda.is_available, reason=\"No CUDA device\")\ndef test_occ_grid():\n    roi_aabb = [0, 0, 0, 1, 1, 1]\n    occ_grid = OccupancyGrid(roi_aabb=roi_aabb, resolution=128).to(device)\n    occ_grid.every_n_step(0, occ_eval_fn, occ_thre=0.1)\n    assert occ_grid.roi_aabb.shape == (6,)\n    assert occ_grid.binary.shape == (128, 128, 128)\n\n\n@pytest.mark.skipif(not torch.cuda.is_available, reason=\"No CUDA device\")\ndef test_query_grid():\n    roi_aabb = [0, 0, 0, 1, 1, 1]\n    occ_grid = OccupancyGrid(roi_aabb=roi_aabb, resolution=128).to(device)\n    occ_grid.every_n_step(0, occ_eval_fn, occ_thre=0.1)\n    samples = torch.rand((100, 3), device=device)\n    occs = occ_grid.query_occ(samples)\n    assert occs.shape == (100,)\n\n\nif __name__ == \"__main__\":\n    test_occ_grid()\n    test_query_grid()\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/tests/test_intersection.py",
    "content": "import pytest\nimport torch\n\nfrom nerfacc import ray_aabb_intersect\n\ndevice = \"cuda:0\"\nbatch_size = 32\neps = 1e-6\n\n\n@pytest.mark.skipif(not torch.cuda.is_available, reason=\"No CUDA device\")\ndef test_intersection():\n    rays_o = torch.rand([batch_size, 3], device=device)\n    rays_d = torch.randn([batch_size, 3], device=device)\n    aabb = torch.tensor([0, 0, 0, 1, 1, 1], dtype=torch.float32, device=device)\n    t_min, t_max = ray_aabb_intersect(rays_o, rays_d, aabb)\n    assert (t_min == 0).all()\n    t = torch.rand_like(t_min) * (t_max - t_min) + t_min\n    x = rays_o + t.unsqueeze(-1) * rays_d\n    assert (x >= 0).all() and (x <= 1).all()\n\n\nif __name__ == \"__main__\":\n    test_intersection()\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/tests/test_loss.py",
    "content": "import pytest\nimport torch\n\nfrom nerfacc import pack_info, ray_marching\nfrom nerfacc.losses import distortion\n\ndevice = \"cuda:0\"\nbatch_size = 32\neps = 1e-6\n\n\n@pytest.mark.skipif(not torch.cuda.is_available, reason=\"No CUDA device\")\ndef test_distortion():\n    rays_o = torch.rand((batch_size, 3), device=device)\n    rays_d = torch.randn((batch_size, 3), device=device)\n    rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)\n\n    ray_indices, t_starts, t_ends = ray_marching(\n        rays_o,\n        rays_d,\n        near_plane=0.1,\n        far_plane=1.0,\n        render_step_size=1e-3,\n    )\n    packed_info = pack_info(ray_indices, n_rays=batch_size)\n    weights = torch.rand((t_starts.shape[0],), device=device)\n    loss = distortion(packed_info, weights, t_starts, t_ends)\n    assert loss.shape == (batch_size,)\n\n\nif __name__ == \"__main__\":\n    test_distortion()\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/tests/test_pack.py",
    "content": "import pytest\nimport torch\n\nfrom nerfacc import pack_data, pack_info, unpack_data, unpack_info\n\ndevice = \"cuda:0\"\nbatch_size = 32\neps = 1e-6\n\n\n@pytest.mark.skipif(not torch.cuda.is_available, reason=\"No CUDA device\")\ndef test_pack_data():\n    n_rays = 2\n    n_samples = 3\n    data = torch.rand((n_rays, n_samples, 2), device=device, requires_grad=True)\n    mask = torch.rand((n_rays, n_samples), device=device) > 0.5\n    packed_data, packed_info = pack_data(data, mask)\n    unpacked_data = unpack_data(packed_info, packed_data, n_samples)\n    unpacked_data.sum().backward()\n    assert (data.grad[mask] == 1).all()\n    assert torch.allclose(\n        unpacked_data.sum(dim=1), (data * mask[..., None]).sum(dim=1)\n    )\n\n\n@pytest.mark.skipif(not torch.cuda.is_available, reason=\"No CUDA device\")\ndef test_unpack_info():\n    packed_info = torch.tensor(\n        [[0, 1], [1, 0], [1, 4]], dtype=torch.int32, device=device\n    )\n    ray_indices_tgt = torch.tensor(\n        [0, 2, 2, 2, 2], dtype=torch.int64, device=device\n    )\n    ray_indices = unpack_info(packed_info, n_samples=5)\n    packed_info_2 = pack_info(ray_indices, n_rays=packed_info.shape[0])\n    assert torch.allclose(packed_info.int(), packed_info_2.int())\n    assert torch.allclose(ray_indices, ray_indices_tgt)\n\n\nif __name__ == \"__main__\":\n    test_pack_data()\n    test_unpack_info()\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/tests/test_ray_marching.py",
    "content": "import pytest\nimport torch\n\nfrom nerfacc import OccupancyGrid, ray_marching, unpack_info\n\ndevice = \"cuda:0\"\nbatch_size = 128\n\n\n@pytest.mark.skipif(not torch.cuda.is_available, reason=\"No CUDA device\")\ndef test_marching_with_near_far():\n    rays_o = torch.rand((batch_size, 3), device=device)\n    rays_d = torch.randn((batch_size, 3), device=device)\n    rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)\n\n    ray_indices, t_starts, t_ends = ray_marching(\n        rays_o,\n        rays_d,\n        near_plane=0.1,\n        far_plane=1.0,\n        render_step_size=1e-3,\n    )\n    return\n\n\n@pytest.mark.skipif(not torch.cuda.is_available, reason=\"No CUDA device\")\ndef test_marching_with_grid():\n    rays_o = torch.rand((batch_size, 3), device=device)\n    rays_d = torch.randn((batch_size, 3), device=device)\n    rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)\n    grid = OccupancyGrid(roi_aabb=[0, 0, 0, 1, 1, 1]).to(device)\n    grid._binary[:] = True\n\n    ray_indices, t_starts, t_ends = ray_marching(\n        rays_o,\n        rays_d,\n        grid=grid,\n        near_plane=0.0,\n        far_plane=1.0,\n        render_step_size=1e-2,\n    )\n    ray_indices = ray_indices\n    samples = (\n        rays_o[ray_indices] + rays_d[ray_indices] * (t_starts + t_ends) / 2.0\n    )\n    assert (samples <= grid.roi_aabb[3:].unsqueeze(0)).all()\n    assert (samples >= grid.roi_aabb[:3].unsqueeze(0)).all()\n    return\n\n\nif __name__ == \"__main__\":\n    test_marching_with_near_far()\n    test_marching_with_grid()\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/tests/test_rendering.py",
    "content": "import pytest\nimport torch\n\nfrom nerfacc import (\n    accumulate_along_rays,\n    render_transmittance_from_density,\n    render_visibility,\n    render_weight_from_alpha,\n    render_weight_from_density,\n    rendering,\n)\n\ndevice = \"cuda:0\"\nbatch_size = 32\neps = 1e-6\n\n\n@pytest.mark.skipif(not torch.cuda.is_available, reason=\"No CUDA device\")\ndef test_render_visibility():\n    ray_indices = torch.tensor(\n        [0, 2, 2, 2, 2], dtype=torch.int64, device=device\n    )  # (samples,)\n    alphas = torch.tensor(\n        [0.4, 0.3, 0.8, 0.8, 0.5], dtype=torch.float32, device=device\n    ).unsqueeze(\n        -1\n    )  # (n_samples, 1)\n\n    # transmittance: [1.0, 1.0, 0.7, 0.14, 0.028]\n    vis = render_visibility(\n        alphas, ray_indices=ray_indices, early_stop_eps=0.03, alpha_thre=0.0\n    )\n    vis_tgt = torch.tensor(\n        [True, True, True, True, False], dtype=torch.bool, device=device\n    )\n    assert torch.allclose(vis, vis_tgt)\n\n    # transmittance: [1.0, 1.0, 1.0, 0.2, 0.04]\n    vis = render_visibility(\n        alphas, ray_indices=ray_indices, early_stop_eps=0.05, alpha_thre=0.35\n    )\n    vis_tgt = torch.tensor(\n        [True, False, True, True, False], dtype=torch.bool, device=device\n    )\n    assert torch.allclose(vis, vis_tgt)\n\n\n@pytest.mark.skipif(not torch.cuda.is_available, reason=\"No CUDA device\")\ndef test_render_weight_from_alpha():\n    ray_indices = torch.tensor(\n        [0, 2, 2, 2, 2], dtype=torch.int64, device=device\n    )  # (samples,)\n    alphas = torch.tensor(\n        [0.4, 0.3, 0.8, 0.8, 0.5], dtype=torch.float32, device=device\n    ).unsqueeze(\n        -1\n    )  # (n_samples, 1)\n\n    # transmittance: [1.0, 1.0, 0.7, 0.14, 0.028]\n    weights = render_weight_from_alpha(\n        alphas, ray_indices=ray_indices, n_rays=3\n    )\n    weights_tgt = torch.tensor(\n        [1.0 * 0.4, 1.0 * 0.3, 0.7 * 0.8, 0.14 * 0.8, 0.028 * 0.5],\n        dtype=torch.float32,\n        device=device,\n    ).unsqueeze(-1)\n    assert torch.allclose(weights, weights_tgt)\n\n\n@pytest.mark.skipif(not torch.cuda.is_available, reason=\"No CUDA device\")\ndef test_render_weight_from_density():\n    ray_indices = torch.tensor(\n        [0, 2, 2, 2, 2], dtype=torch.int64, device=device\n    )  # (samples,)\n    sigmas = torch.rand(\n        (ray_indices.shape[0], 1), device=device\n    )  # (n_samples, 1)\n    t_starts = torch.rand_like(sigmas)\n    t_ends = torch.rand_like(sigmas) + 1.0\n    alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts))\n\n    weights = render_weight_from_density(\n        t_starts, t_ends, sigmas, ray_indices=ray_indices, n_rays=3\n    )\n    weights_tgt = render_weight_from_alpha(\n        alphas, ray_indices=ray_indices, n_rays=3\n    )\n    assert torch.allclose(weights, weights_tgt)\n\n\n@pytest.mark.skipif(not torch.cuda.is_available, reason=\"No CUDA device\")\ndef test_accumulate_along_rays():\n    ray_indices = torch.tensor(\n        [0, 2, 2, 2, 2], dtype=torch.int64, device=device\n    )  # (n_rays,)\n    weights = torch.tensor(\n        [0.4, 0.3, 0.8, 0.8, 0.5], dtype=torch.float32, device=device\n    ).unsqueeze(-1)\n    values = torch.rand((5, 2), device=device)  # (n_samples, 1)\n\n    ray_values = accumulate_along_rays(\n        weights, ray_indices, values=values, n_rays=3\n    )\n    assert ray_values.shape == (3, 2)\n    assert torch.allclose(ray_values[0, :], weights[0, :] * values[0, :])\n    assert (ray_values[1, :] == 0).all()\n    assert torch.allclose(\n        ray_values[2, :], (weights[1:, :] * values[1:]).sum(dim=0)\n    )\n\n\n@pytest.mark.skipif(not torch.cuda.is_available, reason=\"No CUDA device\")\ndef test_rendering():\n    def rgb_sigma_fn(t_starts, t_ends, ray_indices):\n        return torch.hstack([t_starts] * 3), t_starts\n\n    ray_indices = torch.tensor(\n        [0, 2, 2, 2, 2], dtype=torch.int64, device=device\n    )  # (samples,)\n    sigmas = torch.rand(\n        (ray_indices.shape[0], 1), device=device\n    )  # (n_samples, 1)\n    t_starts = torch.rand_like(sigmas)\n    t_ends = torch.rand_like(sigmas) + 1.0\n\n    _, _, _ = rendering(\n        t_starts,\n        t_ends,\n        ray_indices=ray_indices,\n        n_rays=3,\n        rgb_sigma_fn=rgb_sigma_fn,\n    )\n\n\n@pytest.mark.skipif(not torch.cuda.is_available, reason=\"No CUDA device\")\ndef test_grads():\n    ray_indices = torch.tensor(\n        [0, 2, 2, 2, 2], dtype=torch.int64, device=device\n    )  # (samples,)\n    packed_info = torch.tensor(\n        [[0, 1], [1, 0], [1, 4]], dtype=torch.int32, device=device\n    )\n    sigmas = torch.tensor([[0.4], [0.8], [0.1], [0.8], [0.1]], device=\"cuda\")\n    sigmas.requires_grad = True\n    t_starts = torch.rand_like(sigmas)\n    t_ends = t_starts + 1.0\n\n    weights_ref = torch.tensor(\n        [[0.3297], [0.5507], [0.0428], [0.2239], [0.0174]], device=\"cuda\"\n    )\n    sigmas_grad_ref = torch.tensor(\n        [[0.6703], [0.1653], [0.1653], [0.1653], [0.1653]], device=\"cuda\"\n    )\n\n    # naive impl. trans from sigma\n    trans = render_transmittance_from_density(\n        t_starts, t_ends, sigmas, ray_indices=ray_indices, n_rays=3\n    )\n    weights = trans * (1.0 - torch.exp(-sigmas * (t_ends - t_starts)))\n    weights.sum().backward()\n    sigmas_grad = sigmas.grad.clone()\n    sigmas.grad.zero_()\n    assert torch.allclose(weights_ref, weights, atol=1e-4)\n    assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4)\n\n    # naive impl. trans from alpha\n    trans = render_transmittance_from_density(\n        t_starts, t_ends, sigmas, packed_info=packed_info, n_rays=3\n    )\n    weights = trans * (1.0 - torch.exp(-sigmas * (t_ends - t_starts)))\n    weights.sum().backward()\n    sigmas_grad = sigmas.grad.clone()\n    sigmas.grad.zero_()\n    assert torch.allclose(weights_ref, weights, atol=1e-4)\n    assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4)\n\n    weights = render_weight_from_density(\n        t_starts, t_ends, sigmas, ray_indices=ray_indices, n_rays=3\n    )\n    weights.sum().backward()\n    sigmas_grad = sigmas.grad.clone()\n    sigmas.grad.zero_()\n    assert torch.allclose(weights_ref, weights, atol=1e-4)\n    assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4)\n\n    weights = render_weight_from_density(\n        t_starts, t_ends, sigmas, packed_info=packed_info, n_rays=3\n    )\n    weights.sum().backward()\n    sigmas_grad = sigmas.grad.clone()\n    sigmas.grad.zero_()\n    assert torch.allclose(weights_ref, weights, atol=1e-4)\n    assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4)\n\n    alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts))\n    weights = render_weight_from_alpha(\n        alphas, ray_indices=ray_indices, n_rays=3\n    )\n    weights.sum().backward()\n    sigmas_grad = sigmas.grad.clone()\n    sigmas.grad.zero_()\n    assert torch.allclose(weights_ref, weights, atol=1e-4)\n    assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4)\n\n    alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts))\n    weights = render_weight_from_alpha(\n        alphas, packed_info=packed_info, n_rays=3\n    )\n    weights.sum().backward()\n    sigmas_grad = sigmas.grad.clone()\n    sigmas.grad.zero_()\n    assert torch.allclose(weights_ref, weights, atol=1e-4)\n    assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4)\n\n\nif __name__ == \"__main__\":\n    test_render_visibility()\n    test_render_weight_from_alpha()\n    test_render_weight_from_density()\n    test_accumulate_along_rays()\n    test_rendering()\n    test_grads()\n"
  },
  {
    "path": "third_parties/nerfacc-0.3.5/nerfacc-0.3.5/tests/test_resampling.py",
    "content": "import pytest\nimport torch\n\nfrom nerfacc import pack_info, ray_marching, ray_resampling\n\ndevice = \"cuda:0\"\nbatch_size = 128\n\n\n@pytest.mark.skipif(not torch.cuda.is_available, reason=\"No CUDA device\")\ndef test_resampling():\n    rays_o = torch.rand((batch_size, 3), device=device)\n    rays_d = torch.randn((batch_size, 3), device=device)\n    rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)\n\n    ray_indices, t_starts, t_ends = ray_marching(\n        rays_o,\n        rays_d,\n        near_plane=0.1,\n        far_plane=1.0,\n        render_step_size=1e-3,\n    )\n    packed_info = pack_info(ray_indices, n_rays=batch_size)\n    weights = torch.rand((t_starts.shape[0],), device=device)\n    packed_info, t_starts, t_ends = ray_resampling(\n        packed_info, t_starts, t_ends, weights, n_samples=32\n    )\n    assert t_starts.shape == t_ends.shape == (batch_size * 32, 1)\n\n\nif __name__ == \"__main__\":\n    test_resampling()\n"
  },
  {
    "path": "utilities/utils.py",
    "content": "import numpy as np\nimport cv2\nfrom PIL import Image, ImageChops\nimport os\nimport time\nimport torch\nfrom PIL import Image, ImageDraw, ImageFont\n\nexp_time = str(time.strftime('%Y_%m_%d_%H_%M_%S', time.localtime(time.time())))\ndevice = \"cuda\" if torch.cuda.is_available() else \"cpu\"\nprint(f\"Using {device} device\")\n\ndef crop_a_set_of_images(*image_path):\n    from PIL import ImageChops, Image\n    imgs = []\n    bboxes = []\n    for im_path in image_path:\n        im = Image.open(im_path)\n        bg = Image.new(im.mode, im.size, im.getpixel((0, 0)))\n        diff = ImageChops.difference(im, bg)\n        diff = ImageChops.add(diff, diff, 2.0, -5)\n        bbox = diff.getbbox()\n\n        imgs.append(im)\n        bboxes.append(bbox)\n    bbox_aggre = np.asarray(bboxes)\n    bbox_min = np.min(bbox_aggre, 0)\n    bbox_max = np.max(bbox_aggre, 0)\n    bbox_common = (bbox_min[0], bbox_min[1], bbox_max[2], bbox_max[3])\n    for idx, img in enumerate(imgs):\n        img = img.crop(bbox_common)\n        img.save(image_path[idx])\n    pass\n\n\ndef crop_image_based_on_ref_image(ref_img_path, *img_path):\n    from PIL import ImageChops, Image\n    ref_im = Image.open(ref_img_path)\n    bg = Image.new(ref_im.mode, ref_im.size, ref_im.getpixel((0, 0)))\n    diff = ImageChops.difference(ref_im, bg)\n    diff = ImageChops.add(diff, diff, 2.0, -5)\n    bbox = diff.getbbox()\n\n    for idx, im_path in enumerate(img_path):\n        img = Image.open(im_path)\n        img = img.crop(bbox)\n        img.save(im_path)\n\n\ndef angular_error_map(N1, N2):\n    dot = np.sum(np.multiply(N1, N2), axis=-1)\n    dot = np.clip(dot, -1., 1.)\n    return np.rad2deg(np.arccos(dot))\n\n\ndef crop_mask(mask):\n    if mask.dtype is not np.uint8:\n        mask = mask.astype(np.uint8) * 255\n    im = Image.fromarray(mask)\n    bg = Image.new(im.mode, im.size, im.getpixel((0, 0)))\n    diff = ImageChops.difference(im, bg)\n    diff = ImageChops.add(diff, diff, 2.0, 0)\n    bbox = diff.getbbox()\n    return bbox\n\n\ndef crop_image_by_mask(img, mask):\n    bbox = crop_mask(mask)\n    try:\n        crop_img = img.copy()[bbox[1]:bbox[3], bbox[0]:bbox[2]]\n    except:\n        crop_img = img.copy()\n    return crop_img\n\n\ndef save_video(vpath, images, fps):\n    height, width, _ = images[0].shape\n    fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')\n    video = cv2.VideoWriter(vpath, fourcc, fps, (width, height))\n    for image in images:\n        video.write(image)\n    cv2.destroyAllWindows()\n    video.release()\n\n\ndef toRGBA(img, mask):\n    img = cv2.cvtColor(img, cv2.COLOR_RGB2RGBA)\n    img[:, :, 3] = (mask.astype(bool)*255).astype(np.uint8)\n    return img\n"
  }
]