Full Code of dgrzech/sobfu for AI

master c83646582a14 cached

73 files

409.5 KB

126.9k tokens

329 symbols

1 requests

Download .txt

Showing preview only (432K chars total). Download the full file or copy to clipboard to get everything.

Repository: dgrzech/sobfu
Branch: master
Commit: c83646582a14
Files: 73
Total size: 409.5 KB

Directory structure:
gitextract_kh2b_hh_/

├── .clang-format
├── .gitignore
├── .gitlab-ci.yml
├── .lintflags
├── .lintignore
├── CMakeLists.txt
├── LICENSE.md
├── README.md
├── cmake/
│   ├── Targets.cmake
│   └── Utils.cmake
├── include/
│   ├── kfusion/
│   │   ├── cuda/
│   │   │   ├── device.hpp
│   │   │   ├── device_array.hpp
│   │   │   ├── device_memory.hpp
│   │   │   ├── imgproc.hpp
│   │   │   ├── kernel_containers.hpp
│   │   │   ├── marching_cubes.hpp
│   │   │   ├── projective_icp.hpp
│   │   │   ├── temp_utils.hpp
│   │   │   ├── texture_binder.hpp
│   │   │   └── tsdf_volume.hpp
│   │   ├── exports.hpp
│   │   ├── internal.hpp
│   │   ├── kinfu.hpp
│   │   ├── precomp.hpp
│   │   ├── safe_call.hpp
│   │   └── types.hpp
│   └── sobfu/
│       ├── cuda/
│       │   └── utils.hpp
│       ├── params.hpp
│       ├── precomp.hpp
│       ├── reductor.hpp
│       ├── scalar_fields.hpp
│       ├── sob_fusion.hpp
│       ├── solver.hpp
│       └── vector_fields.hpp
├── params/
│   ├── params_advent.ini
│   ├── params_boxing.ini
│   ├── params_hat.ini
│   ├── params_ours.ini
│   ├── params_snoopy.ini
│   └── params_umbrella.ini
├── setup.sh
├── src/
│   ├── CMakeLists.txt
│   ├── apps/
│   │   ├── CMakeLists.txt
│   │   └── demo.cpp
│   ├── kfusion/
│   │   ├── CMakeLists.txt
│   │   ├── core.cpp
│   │   ├── cuda/
│   │   │   ├── imgproc.cu
│   │   │   ├── marching_cubes.cu
│   │   │   ├── proj_icp.cu
│   │   │   └── tsdf_volume.cu
│   │   ├── device_memory.cpp
│   │   ├── imgproc.cpp
│   │   ├── kinfu.cpp
│   │   ├── marching_cubes.cpp
│   │   ├── precomp.cpp
│   │   ├── projective_icp.cpp
│   │   └── tsdf_volume.cpp
│   └── sobfu/
│       ├── CMakeLists.txt
│       ├── cuda/
│       │   ├── reductor.cu
│       │   ├── scalar_fields.cu
│       │   ├── solver.cu
│       │   └── vector_fields.cu
│       ├── precomp.cpp
│       ├── reductor.cpp
│       ├── scalar_fields.cpp
│       ├── sob_fusion.cpp
│       ├── solver.cpp
│       └── vector_fields.cpp
└── test/
    ├── CMakeLists.txt
    ├── deformation_field_test.cpp
    ├── main.cpp
    ├── reductions_test.cpp
    └── solver_test.cpp

================================================
FILE CONTENTS
================================================

================================================
FILE: .clang-format
================================================
---
# ---------------------------------------------------------------------------- #
#                                CLANG-FORMAT                                  #
#                                   v6.0.x                                     #
# ---------------------------------------------------------------------------- #
# For documentation: https://clang.llvm.org/docs/ClangFormatStyleOptions.html
#
Language:        Cpp
# BasedOnStyle:  Google
AccessModifierOffset: -4
AlignAfterOpenBracket: Align
AlignConsecutiveAssignments: true
AlignConsecutiveDeclarations: false
AlignEscapedNewlinesLeft: true
AlignOperands:   true
AlignTrailingComments: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: All
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: true
BinPackArguments: true
BinPackParameters: true
BraceWrapping:
  AfterClass:      false
  AfterControlStatement: false
  AfterEnum:       false
  AfterFunction:   false
  AfterNamespace:  false
  AfterObjCDeclaration: false
  AfterStruct:     false
  AfterUnion:      false
  BeforeCatch:     false
  BeforeElse:      false
  IndentBraces:    false
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
ColumnLimit:     120
CommentPragmas:  '^ IWYU pragma:'
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DerivePointerAlignment: true
DisableFormat:   false
ExperimentalAutoDetectBinPacking: false
ForEachMacros:   [ foreach, Q_FOREACH, BOOST_FOREACH ]
IncludeCategories:
  - Regex:           '^<.*\.h>'
    Priority:        1
  - Regex:           '^<.*'
    Priority:        2
  - Regex:           '.*'
    Priority:        3
IndentCaseLabels: true
IndentWidth:     4
IndentWrappedFunctionNames: false
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd:   ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: false
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Left
ReflowComments:  true
SortIncludes:    true
SpaceAfterCStyleCast: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: ControlStatements
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 2
SpacesInAngles:  false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard:        Cpp11
TabWidth:        8
UseTab:          Never
...


================================================
FILE: .gitignore
================================================
# Created by https://www.gitignore.io/api/vim,osx,c++
/data
### C++ ###
# Prerequisites
*.d

# Compiled Object files
*.slo
*.lo
*.o
*.obj

# Precompiled Headers
*.gch
*.pch

# Compiled Dynamic libraries
*.so
*.dylib
*.dll

# Fortran module files
*.mod
*.smod

# Compiled Static libraries
*.lai
*.la
*.a
*.lib

# Executables
*.exe
*.out
*.app

### OSX ###
*.DS_Store
.AppleDouble
.LSOverride

# Icon must end with two \r
Icon

# Thumbnails
._*

# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent

# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk

### Vim ###
# swap
[._]*.s[a-v][a-z]
[._]*.sw[a-p]
[._]s[a-v][a-z]
[._]sw[a-p]
# session
Session.vim
# temporary
.netrwhist
*~
# auto-generated tag files
tags

# End of https://www.gitignore.io/api/vim,osx,c++

# Ignore build directory
build
files

# ignore remote sync config file
.remote-sync.json

# Ignore frames
frames

# ignore helper MATLAB script to display point clouds
show_pcl.m

# ignore output folder
out

================================================
FILE: .gitlab-ci.yml
================================================
# ---------------------------------------------------------------------------- #
#                               GITLAB-RUNNER-CI                               #
# ---------------------------------------------------------------------------- #

# Version: v9.5.4
# Running on graphic01.doc.ic.ac.uk

# ---------------------------------------------------------------------------- #
#                                  PIPELINE                                    #
# ---------------------------------------------------------------------------- #
# The CI pipeline is divided into the following stages:
# Lint: Performs linter checks on the repo's code
# Build: Builds the project
# Test: Runs the project's tests
# Deploy: Deploys the project to a given directory
# Cleanup: Removes all artifacts required for the CI stages
stages:
  - Setup
  - Lint
  - Build
  - Test
  - Deploy
  - Cleanup

# env_setup defines a number of environment variables which can be called by
# various before_scripts of the project.
# 1) Exporting the PATH is necessary to enable linking of the custom installed
#    binaries in the group's shared directory.
# 2) setup.sh makes sure all Cuda related binaries are setup correctly.
#    This includes, but isn't limited to, CUDA itself and OpenGL.
.env_setup: &env_setup |
  source setup.sh --set-paths

# Variables for use in script
variables:
  CONTAINER_IMAGE: g1736211/dynfu
  DEV_IMAGE: g1736211/dynfu:dev
  TAG_IMAGE: g1736211/dynfu:${CI_COMMIT_REF_SLUG}
# ---------------------------------------------------------------------------- #
#                                     SETUP                                    #
# ---------------------------------------------------------------------------- #
# The Setup stage sets all the nevironment variables and paths using cmake.

# > cmake <
# Cmake will generate a Makefile for the project linking all needed dependencies
# Cmake stage is required to not fail, in which case it would fail the pipeline.
cmake:
  stage: Setup

  # Before attempting to cmake env variables must be set to link the binaries
  # Call .setup.sh which creates build and setup the dependencies
  # Cmake will also build inside the build/ directory
  before_script:
    - rm -rf build || true
    - source setup.sh --set-paths

  # Build/ directory is cached for a given commit SHA to make sure all the tasks
  # within a same pipeline will have access to the correctly built project
  cache:
    key: "$CI_COMMIT_SHA"
    paths:
      - build/

  # Cmake requires two additional flags:
  # 1) CMAKE_PREFIX enables it to find the custom built binaries
  # 2) CMAKE_CXX_FLAGS are necessary to be set as otherwise BASH doesn't seem to
  #    set the c++ standard correctly (i.e. will throw a number of errors)
  # 3) [Others]
  script:
    - source setup.sh --cmake -i

# ---------------------------------------------------------------------------- #
#                                     LINT                                     #
# ---------------------------------------------------------------------------- #
# The Lint stage runs the linter on the project's files.

# > clang_format <
# Clang formatting will ensure that the latest files have been formatted with
# clang-styling. This step does not fix linter errors, it will just point them
# out to the pipeline, failing it.
clang_format:
  stage: Lint

  # Set env variables before testing should we need to access CUDA of OpenGL
  # headers.
  before_script:
    - *env_setup

  # Files are considered to be source only if *.cpp or *.hpp.
  # Excluded files or directories must be set manually in .lintignore
  script:
     # Print out the configuration file variables
     - /vol/project/2017/362/g1736211/bin/clang-format -dump-config
     # Attempt to check if clang-format had been run before saving
     - ./scripts/.format_bash

# > lint <
# The linter used for the project is clang-tidy.
# 1) Linting stage must run after build due to the numerous dependencies created
#    by Cmake. All these dependencies are logged into compile_commands.json and
#    are referenced by clang-tidy when linting (i.e. to scan headers).
# 2) The run-clang-tidy python script must be linked to the relevant clang-tidy
#    executable as it cannot be found in it's canonical place (i.e /usr/bin).
# 3) run-clang-tidy.py will accept all clang-tidy flags, it has extra value as
#    it can use the compile_commands.json information.
#
# USED FLAGS:
# 1) -list-checks : Lists all the enabled checks.
# 2) -checks= : Sets different checkstyles for the source files
# 3) -p <DIR> : Sets the path for the build directory containing the
#               compile_commands.json information.
# 4) -warnings-as-errors=* : Treats all warnings as errors.
#
# Used checks can be found in .lintflags
clang_tidy:
  stage: Lint

  # Set env variables before testing should we need to access CUDA of OpenGL
  # headers.
  before_script:
    - *env_setup

  # Linting must occur on the newly built build/ binaries.
  cache:
    key: "$CI_COMMIT_SHA"
    paths:
      - build/
    policy: pull

  # Files are considered to be source only if *.cpp or *.hpp.
  # Excluded files or directories must be set manually in .lintignore
  script:
     # Print out the list of enabled checks
     - /vol/project/2017/362/g1736211/bin/clang-tidy `./scripts/.flags_bash` -list-checks
     # Print out the list of files which will be checked for linting
     - ./scripts/.find_bash
     # Perform the linting with the above configuratiions
     - /vol/project/2017/362/g1736211/bin/clang-tidy -p build `./scripts/.diff_tidy_bash` `./scripts/.flags_bash` `./scripts/.find_bash`

# ---------------------------------------------------------------------------- #
#                                    BUILD                                     #
# ---------------------------------------------------------------------------- #
# The build project actually generates the binaries for the project.
# According to the flags set during the CMake stage, tests are built as well in
# their respective directories.

# > make <
# The make stage will run the Makefile generated by cmake in the build/ dir.
# The make stage is required not to fail.
make:
  stage: Build

  # Before attempting to make env variables must be set to make sure all
  # binaries referenced by the Makefile can be accessed.
  # Make will also build the executables inside the build/ directory
  before_script:
    - *env_setup

  # Build/ directory is cached for a given commit SHA to make sure all the tasks
  # within a same pipeline will have access to the correctly built project
  cache:
    key: "$CI_COMMIT_SHA"
    paths:
      - build/

  # Run simple make from the Makefile on four processes.
  script:
    - source setup.sh --make

# > docker_image_build <
# The docker_image_build stage creates the Docker image for an EC2 instance 
# on a docker-compatible runner
docker_image_build:
  stage: Build
  tags:
    - docker
  script:
    - docker build --build-arg CUDA_GENERATION=Maxwell -t $TAG_IMAGE .
  except:
    - master

# > docker_image_build_master <
# The docker_image_build_master stage creates the Docker image for all architectures
# on a docker-compatible runner
docker_image_build_master:
  stage: Build
  tags:
    - docker
  script:
    - docker build --build-arg CUDA_GENERATION=Auto -t $TAG_IMAGE .
  only:
    - master

# ---------------------------------------------------------------------------- #
#                                    TEST                                      #
# ---------------------------------------------------------------------------- #
# The Test stage will perform a number of tests on the Project.

# > gtest <
# The gtest stage runs the GTest suite present within the library.
# GTest binaries can be found in the group shared directory.
gtest:
  stage: Test

  # Set env variables before testing should we need to access CUDA of OpenGL
  # headers.
  before_script:
    - *env_setup

  # Testing must occur on the newly built build/ binaries.
  cache:
    key: "$CI_COMMIT_SHA"
    paths:
      - build/
    policy: pull

  # Run the gtests
  script:
    - ./build/bin/dynfu_test

# > coverage <
# The coverage stage runs gcov (and on top of that lcov) to generate a coverage
# report of the test for the source files.
coverage:
  stage: Test

  # Set env variables before testing should we need to access CUDA of OpenGL
  # headers.
  before_script:
    - *env_setup

  # Coverage report is constructed for the newly build binaries
  cache:
    key: "$CI_COMMIT_SHA"
    paths:
      - build/
    policy: pull

  # Generate the coverage report
  script:
    - source setup.sh --coverage

# ---------------------------------------------------------------------------- #
#                                   DEPLOY                                     #
# ---------------------------------------------------------------------------- #
# The Deploy stage deploys newly built and tested binaries to a given location
# on disk.

# > deploy <
# The deploy stage will deploy the newly built project binaries
# to /data/dynfu.
deploy:
  stage: Deploy

  # The latest build build/ must be retrieved from cache
  cache:
    key: "$CI_COMMIT_SHA"
    paths:
      - build/
    policy: pull

  # Clean the output distributed library directory to clone new data into it
  before_script:
    - rm -rf /data/dynfu
    - mkdir /data/dynfu

  # Copy the latest build BINARIES to /data/
  script:
    - cp -R build/bin/* /data/dynfu

# > docker_image__deploy_dev <
# Deploys to Docker Hub
docker_image_deploy_dev:
  stage: Deploy
  tags:
    - docker
  script:
    - docker login -u $DOCKER_HUB_LOGIN -p $DOCKER_HUB_PASSWORD
    - docker tag $TAG_IMAGE $DEV_IMAGE
    - docker push $DEV_IMAGE
  only:
    - dev

# > docker_image_deploy <
# Deploys to Docker Hub
docker_image_deploy:
  stage: Deploy
  tags:
    - docker
  script:
    - docker login -u $DOCKER_HUB_LOGIN -p $DOCKER_HUB_PASSWORD
    - docker tag $TAG_IMAGE $CONTAINER_IMAGE
    - docker push $CONTAINER_IMAGE
  only:
    - master

# > test_dynfu_on_ec2 <
# Trigger testing on ec2 instance
test_dynfu_on_ec2:
  stage: Deploy
  tags:
    - docker
  before_script:
    - pip install awscli
    - export PATH="${PATH}:~/.local/bin"
  script:
    - AWS_ACCESS_KEY_ID=${UPDATE_LAMBDA_KEY} AWS_SECRET_ACCESS_KEY=${UPDATE_LAMBDA_PASS} AWS_DEFAULT_REGION=eu-west-1 aws lambda invoke --function-name dynamic-fusion-trigger-testing /tmp/lambda-out
  only:
    - dev
# ---------------------------------------------------------------------------- #
#                                   CLEANUP                                    #
# ---------------------------------------------------------------------------- #
# The Cleanup stage makes sure the cache storage is cleaned for a given
# pipeline in the runners directories.

# > cache_cleanup <
# It runs regardless of failure or success, and thus has the aim of cleaning
# up the testing environment. In particular this script must remove unused
# cache artifacts that were stored within one pipeline.
cache_cleanup:
  stage: Cleanup
  script:
    - echo "Present Cache:" && ls /home/gitlab-runner/cache/g1736211/dynfu/
    - rm -rf "/home/gitlab-runner/cache/g1736211/dynfu/$CI_COMMIT_SHA"
  when: always


================================================
FILE: .lintflags
================================================
# ---------------------------------------------------------------------------- #
#                                   FILE FORMAT                                #
# ---------------------------------------------------------------------------- #
# Available checks can be found at http://clang.llvm.org/extra/clang-tidy/     #
# To add a new check add it normally                                           #
# To PREVENT the linter from using a check, prefix with '-'                    #
# Extra links: https://clang.llvm.org/extra/clang-tidy/checks/list.html        #
# Clang-tidy docs: https://clang.llvm.org/docs/ClangFormat.html                #
# ---------------------------------------------------------------------------- #

# Enforce google standard
google-*

# Use High Integrity C++ Coding Standard
hicpp-*

# Use LLVM Coding Standard
llvm-*

# Use all miscellaneous flags
misc-*

# Apply readability constraints
readability-*

# Apply bugprone checks
bugprone-*

# ---------------------------------------------------------------------------- #
# The following are check DISABLES for certain behaviours

# Allow un-use of use auto
-hicpp-use-auto

# Allow string comparison .compare and not only via == or !=
-misc-string-compare

# Allow warnings for signed bitwise operators
-hicpp-signed-bitwise

# Allow printfs calls in C style
-hicpp-vararg

# Allow implicit bool casts and conversions
-readability-implicit-bool-cast
-readability-implicit-bool-conversion

# Prevent conflicting standards
-llvm-include-order

# Deprecated checks
-hicpp-special-member-functions
-hicpp-use-equals-delete
-hicpp-no-array-decay
-readability-delete-null-pointer
-hicpp-member-init


================================================
FILE: .lintignore
================================================
# Ignore the test directory when linting
./tests/*

# Ignore all git files when linting
./.git/*

# Ignore all scripts
./scripts/*

# Ignore the cmake directory
./cmake/*

# Ignore all contetnts of build directory
./build/*


================================================
FILE: CMakeLists.txt
================================================
# ---------------------------------------------------------------------------- #
# CMAKE INIT CONFIGURATIONS
# ---------------------------------------------------------------------------- #
cmake_minimum_required(VERSION 2.8.8)

# ---------------------------------------------------------------------------- #
# SET THE PROJECT
# ---------------------------------------------------------------------------- #
project(kfusion C CXX)

# ---------------------------------------------------------------------------- #
# UTILITY FUNCTIONS (MACROS) AND PATHS
# ---------------------------------------------------------------------------- #
LIST(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/")
include(Utils)
include(Targets)

# ---------------------------------------------------------------------------- #
# CMAKE SETTINGS
# ---------------------------------------------------------------------------- #
# Use both Debug and Release versions
SET(CMAKE_CONFIGURATION_TYPES "Debug;Release")

# Set CXX to compile with C++11 standard
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -MP -MD -pthread -fpermissive")

# ---------------------------------------------------------------------------- #
# DEPENDENCIES
# ---------------------------------------------------------------------------- #
find_package(Boost 1.58.0 REQUIRED COMPONENTS filesystem program_options)
find_package(CUDA REQUIRED)
find_package(OpenCV REQUIRED COMPONENTS core viz highgui calib3d)
find_package(PCL 1.8 REQUIRED)
find_package(VTK REQUIRED)

# ---------------------------------------------------------------------------- #
# CUDA SETTINGS
# ---------------------------------------------------------------------------- #
SET(HAVE_CUDA 1)
LIST(APPEND CUDA_NVCC_FLAGS
   "-gencode;arch=compute_61,code=sm_61;
    --ftz=true;
    --prec-div=false;
    --prec-sqrt=false;
    --expt-relaxed-constexpr"
)

if(UNIX OR APPLE)
  LIST(APPEND CUDA_NVCC_FLAGS "-Xcompiler;-fPIC")
endif()

# ---------------------------------------------------------------------------- #
# SET INCLUDE PATHS
# ---------------------------------------------------------------------------- #
include_directories(include
  ${Boost_INCLUDE_DIRS}
  ${CUDA_INCLUDE_DIRS}
  ${OpenCV_INCLUDE_DIRS}
  ${PCL_INCLUDE_DIRS}
  ${VTK_INCLUDE_DIRS}
)

# ---------------------------------------------------------------------------- #
# BUILD SRC DIRECTORY
# ---------------------------------------------------------------------------- #
add_subdirectory(src)

# ---------------------------------------------------------------------------- #
# BUILD TESTS
# ---------------------------------------------------------------------------- #
if(BUILD_TESTS)
  enable_testing()
  add_subdirectory(test)
endif()


================================================
FILE: LICENSE.md
================================================
Copyright (c) 2012, Anatoly Baksheev
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright notice, this
  list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above copyright notice,
  this list of conditions and the following disclaimer in the documentation
  and/or other materials provided with the distribution.

* Neither the name of the {organization} nor the names of its
  contributors may be used to endorse or promote products derived from
  this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: README.md
================================================
sobfu
============
software for 3d reconstruction of non-rigidly deforming scenes using depth data, based on sobolevfusion

dependencies
-----------
* boost
* nvidia gpu with cuda >8.0
* opencv
* pcl 1.8.1
* vtk

installation
------------
to install, run `source setup.sh -a`

usage
------------
`./build/bin/app /path/to/data /path/to/params <--enable-viz> <--enable-log> <--verbose>`

* the data directory must include folders `color` and `depth`
* the folder `params` has files with recommended parameters for some of the scenes from volumedeform and killingfusion datasets

### options
* with `--enable-viz`, screenshots from the pcl viewer will be logged in .png format to `/path/to/data/screenshots`
* with `--enable-log`, meshes computed from the model tsdf's via marching cubes will be logged in .vtk format to `/path/to/data/meshes`
* `--verbose` and `--vverbose` control verbosity of the solver

example reconstructions
------------
reconstructions of some of the scenes from the volumedeform and killingfusion projects can be viewed on our [youtube playlist](http://bit.ly/sobfu-yt)

there is a large trade-off between frame rate and reconstruction quality--sample reconstructions ran at approx. 2fps

issues
------------

* drift in longer scenes due to less than perfect registration
* topological changes

references 
------------
```
@InProceedings{Slavcheva_2018_CVPR,
author = {Slavcheva, Miroslava and Baust, Maximilian and Ilic, Slobodan},
title = {SobolevFusion: 3D Reconstruction of Scenes Undergoing Free Non-Rigid Motion},
booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2018}
}
```


================================================
FILE: cmake/Targets.cmake
================================================
################################################################################################
# short command to setup source group
function(kf_source_group group)
  cmake_parse_arguments(VW_SOURCE_GROUP "" "" "GLOB" ${ARGN})
  file(GLOB srcs ${VW_SOURCE_GROUP_GLOB})
  #list(LENGTH ${srcs} ___size)
  #if (___size GREATER 0)
    source_group(${group} FILES ${srcs})
  #endif()
endfunction()


################################################################################################
# short command for declaring includes from other modules
macro(declare_deps_includes)
  foreach(__arg ${ARGN})
    get_filename_component(___path "${CMAKE_SOURCE_DIR}/modules/${__arg}/include" ABSOLUTE)
    if (EXISTS ${___path})
      include_directories(${___path})
    endif()
  endforeach()

  unset(___path)
  unset(__arg)
endmacro()


################################################################################################
# short command for setting defeault target properties
function(default_properties target)
  set_target_properties(${target} PROPERTIES
    DEBUG_POSTFIX "d"
    ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
    RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin")

    if (NOT ${target} MATCHES "^test_")
      install(TARGETS ${the_target} RUNTIME DESTINATION ".")
    endif()
endfunction()

function(test_props target)
  #os_project_label(${target} "[test]")
  if(USE_PROJECT_FOLDERS)
    set_target_properties(${target} PROPERTIES FOLDER "Tests")
  endif()
endfunction()

function(app_props target)
  #os_project_label(${target} "[app]")
  if(USE_PROJECT_FOLDERS)
    set_target_properties(${target} PROPERTIES FOLDER "Apps")
  endif()
endfunction()


################################################################################################
# short command for setting defeault target properties
function(default_properties target)
  set_target_properties(${target} PROPERTIES
    DEBUG_POSTFIX "d"
    ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
    RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin")

    if (NOT ${target} MATCHES "^test_")
      install(TARGETS ${the_target} RUNTIME DESTINATION ".")
    endif()
endfunction()


################################################################################################
# short command for adding library module
macro(add_module_library name)
  set(module_name ${name})

  FILE(GLOB_RECURSE sources *.cpp *.cu)

  cuda_add_library(${module_name} STATIC ${sources})

  if(MSVC)
    set_target_properties(${module_name} PROPERTIES DEFINE_SYMBOL KFUSION_API_EXPORTS)
  else()
    add_definitions(-DKFUSION_API_EXPORTS)
  endif()

  default_properties(${module_name})
endmacro()


################################################################################################
# short command for adding application module
macro(add_application target)
  FILE(GLOB_RECURSE sources *.cpp)
  add_executable(${target} ${sources})
  default_properties(${target})
endmacro()


################################################################################################
# short command for adding test target
macro(CREATE_TEST target)
  FILE(GLOB SRCS *.cpp)
  ADD_EXECUTABLE(${target} ${SRCS})
  TARGET_LINK_LIBRARIES(${target} libgtest libgmock)
  add_test(NAME ${target} COMMAND ${target})
  default_properties(${target})
endmacro()


================================================
FILE: cmake/Utils.cmake
================================================
################################################################################################
# short alias for CMake logging
function(dmsg)
  message(STATUS "<<${ARGN}")
endfunction()

################################################################################################
# Command checking if current module has cuda souces to compile
macro(check_cuda var)
  file(GLOB cuda src/cuda/*.cu)
  list(LENGTH cuda ___size)  
  if (HAVE_CUDA AND ___size GREATER 0)  
    set(${var} ON)
  else()  
    set(${var} OFF) 
  endif()
  unset(___size)
  unset(cuda)
endmacro()

################################################################################################
# short command for adding library module
macro(warnings_disable)
  if(NOT ENABLE_NOISY_WARNINGS)
    set(_flag_vars "")
    set(_msvc_warnings "")
    set(_gxx_warnings "")
    foreach(arg ${ARGN})
      if(arg MATCHES "^CMAKE_")
        list(APPEND _flag_vars ${arg})
      elseif(arg MATCHES "^/wd")
        list(APPEND _msvc_warnings ${arg})
      elseif(arg MATCHES "^-W")
        list(APPEND _gxx_warnings ${arg})
      endif()
    endforeach()
    if(MSVC AND _msvc_warnings AND _flag_vars)
      foreach(var ${_flag_vars})
        foreach(warning ${_msvc_warnings})
          set(${var} "${${var}} ${warning}")
        endforeach()
      endforeach()
    elseif(CV_COMPILER_IS_GNU AND _gxx_warnings AND _flag_vars)
      foreach(var ${_flag_vars})
        foreach(warning ${_gxx_warnings})
          if(NOT warning MATCHES "^-Wno-")
            string(REPLACE "${warning}" "" ${var} "${${var}}")
            string(REPLACE "-W" "-Wno-" warning "${warning}")
          endif()
          ocv_check_flag_support(${var} "${warning}" _varname)
          if(${_varname})
            set(${var} "${${var}} ${warning}")
          endif()
        endforeach()
      endforeach()
    endif()
    unset(_flag_vars)
    unset(_msvc_warnings)
    unset(_gxx_warnings)
  endif(NOT ENABLE_NOISY_WARNINGS)
endmacro()

################################################################################################
# Command for asstion options with some preconditions
macro(kf_option variable description value)
  set(__value ${value})
  set(__condition "")
  set(__varname "__value")
  foreach(arg ${ARGN})
    if(arg STREQUAL "IF" OR arg STREQUAL "if")
      set(__varname "__condition")
    else()
      list(APPEND ${__varname} ${arg})
    endif()
  endforeach()
  unset(__varname)
  if("${__condition}" STREQUAL "")
    set(__condition 2 GREATER 1)
  endif()

  if(${__condition})
    if("${__value}" MATCHES ";")
      if(${__value})
        option(${variable} "${description}" ON)
      else()
        option(${variable} "${description}" OFF)
      endif()
    elseif(DEFINED ${__value})
      if(${__value})
        option(${variable} "${description}" ON)
      else()
        option(${variable} "${description}" OFF)
      endif()
    else()
      option(${variable} "${description}" ${__value})
    endif()
  else()
    unset(${variable} CACHE)
  endif()
  unset(__condition)
  unset(__value)
endmacro()


================================================
FILE: include/kfusion/cuda/device.hpp
================================================
#pragma once

#include <cuda_fp16.h>
#include <kfusion/cuda/temp_utils.hpp>
#include <kfusion/precomp.hpp>

////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// TsdfVolume

// __kf_device__ kfusion::device::TsdfVolume::TsdfVolume(float2 *_data, int3 _dims, float3 _voxel_size,
//                                                       float _trunc_dist, float _max_weight)
//     : data(_data), dims(_dims), voxel_size(_voxel_size), trunc_dist(_trunc_dist), max_weight(_max_weight) {}
//
// __kf_device__ kfusion::device::TsdfVolume::TsdfVolume(const TsdfVolume &other)
//     : data(other.data),
//       dims(other.dims),
//       voxel_size(other.voxel_size),
//       trunc_dist(other.trunc_dist),
//       max_weight(other.max_weight) {}

__kf_device__ float2 *kfusion::device::TsdfVolume::operator()(int x, int y, int z) {
    return data + x + y * dims.x + z * dims.y * dims.x;
}

__kf_device__ const float2 *kfusion::device::TsdfVolume::operator()(int x, int y, int z) const {
    return data + x + y * dims.x + z * dims.y * dims.x;
}

__kf_device__ float2 *kfusion::device::TsdfVolume::beg(int x, int y) const { return data + x + dims.x * y; }

__kf_device__ float2 *kfusion::device::TsdfVolume::zstep(float2 *const ptr) const { return ptr + dims.x * dims.y; }

////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// Projector

__kf_device__ float2 kfusion::device::Projector::operator()(const float3 &p) const {
    float2 coo;
    coo.x = __fmaf_rn(f.x, __fdividef(p.x, p.z), c.x);
    coo.y = __fmaf_rn(f.y, __fdividef(p.y, p.z), c.y);
    return coo;
}

////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// Reprojector

__kf_device__ float3 kfusion::device::Reprojector::operator()(int u, int v, float z) const {
    float x = z * (u - c.x) * finv.x;
    float y = z * (v - c.y) * finv.y;
    return make_float3(x, y, z);
}

////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// Utility

namespace kfusion {
namespace device {
__kf_device__ Vec3f operator*(const Mat3f &m, const Vec3f &v) {
    return make_float3(dot(m.data[0], v), dot(m.data[1], v), dot(m.data[2], v));
}

__kf_device__ Vec3f operator*(const Aff3f &a, const Vec3f &v) { return a.R * v + a.t; }

/* operator for multiplication of an affine transform by a float */
__kf_device__ Aff3f operator*(const float &s, const Aff3f &a) {
    Aff3f b;
    b.R = a.R;
    b.t = s * a.t;
    return b;
}

/* 3-d matrix multiplication */
__kf_device__ Mat3f operator*(const Mat3f &m, const Mat3f &n) {
    Mat3f r;
    r.data[0] = m.data[0] * make_float3(n.data[0].x, n.data[1].x, n.data[2].x);
    r.data[1] = m.data[1] * make_float3(n.data[0].y, n.data[1].y, n.data[2].y);
    r.data[2] = m.data[2] * make_float3(n.data[0].z, n.data[1].z, n.data[2].z);
    return r;
}

/* affine transform composition */
__kf_device__ Aff3f operator*(const Aff3f &a, const Aff3f &b) {
    Aff3f c;
    c.R = a.R * b.R;
    c.t = a.t + b.t;
    return c;
}

__kf_device__ Vec3f tr(const float4 &v) { return make_float3(v.x, v.y, v.z); }

struct plus {
    __kf_device__ float operator()(float l, float r) const { return l + r; }
    __kf_device__ double operator()(double l, double r) const { return l + r; }
};
}  // namespace device
}  // namespace kfusion


================================================
FILE: include/kfusion/cuda/device_array.hpp
================================================
#pragma once

#include <kfusion/cuda/device_memory.hpp>
#include <kfusion/exports.hpp>

#include <vector>

namespace kfusion {
namespace cuda {
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/** \brief @b DeviceArray class
 *
 * \note Typed container for GPU memory with reference counting.
 *
 * \author Anatoly Baksheev
 */
template <class T>
class KF_EXPORTS DeviceArray : public DeviceMemory {
public:
    /** \brief Element type. */
    typedef T type;

    /** \brief Element size. */
    enum { elem_size = sizeof(T) };

    /** \brief Empty constructor. */
    DeviceArray();

    /** \brief Allocates internal buffer in GPU memory
     * \param size_t: number of elements to allocate
     * */
    DeviceArray(size_t size);

    /** \brief Initializes with user allocated buffer. Reference counting is
     * disabled in this case. \param ptr: pointer to buffer \param size: elemens
     * number
     * */
    DeviceArray(T *ptr, size_t size);

    /** \brief Copy constructor. Just increments reference counter. */
    DeviceArray(const DeviceArray &other);

    /** \brief Assigment operator. Just increments reference counter. */
    DeviceArray &operator=(const DeviceArray &other);

    /** \brief Allocates internal buffer in GPU memory. If internal buffer was
     * created before the function recreates it with new size. If new and old
     * sizes are equal it does nothing. \param size: elemens number
     * */
    void create(size_t size);

    /** \brief Decrements reference counter and releases internal buffer if
     * needed. */
    void release();

    /** \brief Performs data copying. If destination size differs it will be
     * reallocated. \param other_arg: destination container
     * */
    void copyTo(DeviceArray &other) const;

    /** \brief Uploads data to internal buffer in GPU memory. It calls create()
     * inside to ensure that intenal buffer size is enough. \param host_ptr_arg:
     * pointer to buffer to upload \param size: elemens number
     * */
    void upload(const T *host_ptr, size_t size);

    /** \brief Downloads data from internal buffer to CPU memory
     * \param host_ptr_arg: pointer to buffer to download
     * */
    void download(T *host_ptr) const;

    /** \brief Uploads data to internal buffer in GPU memory. It calls create()
     * inside to ensure that intenal buffer size is enough. \param data: host
     * vector to upload from
     * */
    template <class A>
    void upload(const std::vector<T, A> &data);

    /** \brief Downloads data from internal buffer to CPU memory
     * \param data:  host vector to download to
     * */
    template <typename A>
    void download(std::vector<T, A> &data) const;

    /** \brief Performs swap of data pointed with another device array.
     * \param other: device array to swap with
     * */
    void swap(DeviceArray &other_arg);

    /** \brief Returns pointer for internal buffer in GPU memory. */
    T *ptr();

    /** \brief Returns const pointer for internal buffer in GPU memory. */
    const T *ptr() const;

    // using DeviceMemory::ptr;

    /** \brief Returns pointer for internal buffer in GPU memory. */
    operator T *();

    /** \brief Returns const pointer for internal buffer in GPU memory. */
    operator const T *() const;

    /** \brief Returns size in elements. */
    size_t size() const;
};

//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/** \brief @b DeviceArray2D class
 *
 * \note Typed container for pitched GPU memory with reference counting.
 *
 * \author Anatoly Baksheev
 */
template <class T>
class KF_EXPORTS DeviceArray2D : public DeviceMemory2D {
public:
    /** \brief Element type. */
    typedef T type;

    /** \brief Element size. */
    enum { elem_size = sizeof(T) };

    /** \brief Empty constructor. */
    DeviceArray2D();

    /** \brief Allocates internal buffer in GPU memory
     * \param rows: number of rows to allocate
     * \param cols: number of elements in each row
     * */
    DeviceArray2D(int rows, int cols);

    /** \brief Initializes with user allocated buffer. Reference counting is
     * disabled in this case. \param rows: number of rows \param cols: number of
     * elements in each row \param data: pointer to buffer \param stepBytes:
     * stride between two consecutive rows in bytes
     * */
    DeviceArray2D(int rows, int cols, void *data, size_t stepBytes);

    /** \brief Copy constructor. Just increments reference counter. */
    DeviceArray2D(const DeviceArray2D &other);

    /** \brief Assigment operator. Just increments reference counter. */
    DeviceArray2D &operator=(const DeviceArray2D &other);

    /** \brief Allocates internal buffer in GPU memory. If internal buffer was
     * created before the function recreates it with new size. If new and old
     * sizes are equal it does nothing. \param rows: number of rows to allocate
     * \param cols: number of elements in each row
     * */
    void create(int rows, int cols);

    /** \brief Decrements reference counter and releases internal buffer if
     * needed. */
    void release();

    /** \brief Performs data copying. If destination size differs it will be
     * reallocated. \param other: destination container
     * */
    void copyTo(DeviceArray2D &other) const;

    /** \brief Uploads data to internal buffer in GPU memory. It calls create()
     * inside to ensure that intenal buffer size is enough. \param host_ptr:
     * pointer to host buffer to upload \param host_step: stride between two
     * consecutive rows in bytes for host buffer \param rows: number of rows to
     * upload \param cols: number of elements in each row
     * */
    void upload(const void *host_ptr, size_t host_step, int rows, int cols);

    /** \brief Downloads data from internal buffer to CPU memory. User is
     * resposible for correct host buffer size. \param host_ptr: pointer to host
     * buffer to download \param host_step: stride between two consecutive rows in
     * bytes for host buffer
     * */
    void download(void *host_ptr, size_t host_step) const;

    /** \brief Performs swap of data pointed with another device array.
     * \param other: device array to swap with
     * */
    void swap(DeviceArray2D &other_arg);

    /** \brief Uploads data to internal buffer in GPU memory. It calls create()
     * inside to ensure that intenal buffer size is enough. \param data: host
     * vector to upload from \param cols: stride in elements between two
     * consecutive rows for host buffer
     * */
    template <class A>
    void upload(const std::vector<T, A> &data, int cols);

    /** \brief Downloads data from internal buffer to CPU memory
     * \param data: host vector to download to
     * \param cols: Output stride in elements between two consecutive rows for
     * host vector.
     * */
    template <class A>
    void download(std::vector<T, A> &data, int &cols) const;

    /** \brief Returns pointer to given row in internal buffer.
     * \param y_arg: row index
     * */
    T *ptr(int y = 0);

    /** \brief Returns const pointer to given row in internal buffer.
     * \param y_arg: row index
     * */
    const T *ptr(int y = 0) const;

    // using DeviceMemory2D::ptr;

    /** \brief Returns pointer for internal buffer in GPU memory. */
    operator T *();

    /** \brief Returns const pointer for internal buffer in GPU memory. */
    operator const T *() const;

    /** \brief Returns number of elements in each row. */
    int cols() const;

    /** \brief Returns number of rows. */
    int rows() const;

    /** \brief Returns step in elements. */
    size_t elem_step() const;
};
}  // namespace cuda

namespace device {
using kfusion::cuda::DeviceArray;
using kfusion::cuda::DeviceArray2D;
}  // namespace device
}  // namespace kfusion

/////////////////////  Inline implementations of DeviceArray
///////////////////////////////////////////////

template <class T>
inline kfusion::cuda::DeviceArray<T>::DeviceArray() {}
template <class T>
inline kfusion::cuda::DeviceArray<T>::DeviceArray(size_t size) : DeviceMemory(size * elem_size) {}
template <class T>
inline kfusion::cuda::DeviceArray<T>::DeviceArray(T *ptr, size_t size) : DeviceMemory(ptr, size * elem_size) {}
template <class T>
inline kfusion::cuda::DeviceArray<T>::DeviceArray(const DeviceArray &other) : DeviceMemory(other) {}
template <class T>
inline kfusion::cuda::DeviceArray<T> &kfusion::cuda::DeviceArray<T>::operator=(const DeviceArray &other) {
    DeviceMemory::operator=(other);
    return *this;
}

template <class T>
inline void kfusion::cuda::DeviceArray<T>::create(size_t size) {
    DeviceMemory::create(size * elem_size);
}
template <class T>
inline void kfusion::cuda::DeviceArray<T>::release() {
    DeviceMemory::release();
}

template <class T>
inline void kfusion::cuda::DeviceArray<T>::copyTo(DeviceArray &other) const {
    DeviceMemory::copyTo(other);
}
template <class T>
inline void kfusion::cuda::DeviceArray<T>::upload(const T *host_ptr, size_t size) {
    DeviceMemory::upload(host_ptr, size * elem_size);
}
template <class T>
inline void kfusion::cuda::DeviceArray<T>::download(T *host_ptr) const {
    DeviceMemory::download(host_ptr);
}

template <class T>
void kfusion::cuda::DeviceArray<T>::swap(DeviceArray &other_arg) {
    DeviceMemory::swap(other_arg);
}

template <class T>
inline kfusion::cuda::DeviceArray<T>::operator T *() {
    return ptr();
}
template <class T>
inline kfusion::cuda::DeviceArray<T>::operator const T *() const {
    return ptr();
}
template <class T>
inline size_t kfusion::cuda::DeviceArray<T>::size() const {
    return sizeBytes() / elem_size;
}

template <class T>
inline T *kfusion::cuda::DeviceArray<T>::ptr() {
    return DeviceMemory::ptr<T>();
}
template <class T>
inline const T *kfusion::cuda::DeviceArray<T>::ptr() const {
    return DeviceMemory::ptr<T>();
}

template <class T>
template <class A>
inline void kfusion::cuda::DeviceArray<T>::upload(const std::vector<T, A> &data) {
    upload(&data[0], data.size());
}
template <class T>
template <class A>
inline void kfusion::cuda::DeviceArray<T>::download(std::vector<T, A> &data) const {
    data.resize(size());
    if (!data.empty())
        download(&data[0]);
}

/////////////////////  Inline implementations of DeviceArray2D
///////////////////////////////////////////////

template <class T>
inline kfusion::cuda::DeviceArray2D<T>::DeviceArray2D() {}
template <class T>
inline kfusion::cuda::DeviceArray2D<T>::DeviceArray2D(int rows, int cols) : DeviceMemory2D(rows, cols * elem_size) {}
template <class T>
inline kfusion::cuda::DeviceArray2D<T>::DeviceArray2D(int rows, int cols, void *data, size_t stepBytes)
    : DeviceMemory2D(rows, cols * elem_size, data, stepBytes) {}
template <class T>
inline kfusion::cuda::DeviceArray2D<T>::DeviceArray2D(const DeviceArray2D &other) : DeviceMemory2D(other) {}
template <class T>
inline kfusion::cuda::DeviceArray2D<T> &kfusion::cuda::DeviceArray2D<T>::operator=(const DeviceArray2D &other) {
    DeviceMemory2D::operator=(other);
    return *this;
}

template <class T>
inline void kfusion::cuda::DeviceArray2D<T>::create(int rows, int cols) {
    DeviceMemory2D::create(rows, cols * elem_size);
}
template <class T>
inline void kfusion::cuda::DeviceArray2D<T>::release() {
    DeviceMemory2D::release();
}

template <class T>
inline void kfusion::cuda::DeviceArray2D<T>::copyTo(DeviceArray2D &other) const {
    DeviceMemory2D::copyTo(other);
}
template <class T>
inline void kfusion::cuda::DeviceArray2D<T>::upload(const void *host_ptr, size_t host_step, int rows, int cols) {
    DeviceMemory2D::upload(host_ptr, host_step, rows, cols * elem_size);
}
template <class T>
inline void kfusion::cuda::DeviceArray2D<T>::download(void *host_ptr, size_t host_step) const {
    DeviceMemory2D::download(host_ptr, host_step);
}

template <class T>
template <class A>
inline void kfusion::cuda::DeviceArray2D<T>::upload(const std::vector<T, A> &data, int cols) {
    upload(&data[0], cols * elem_size, data.size() / cols, cols);
}

template <class T>
template <class A>
inline void kfusion::cuda::DeviceArray2D<T>::download(std::vector<T, A> &data, int &elem_step) const {
    elem_step = cols();
    data.resize(cols() * rows());
    if (!data.empty())
        download(&data[0], colsBytes());
}

template <class T>
void kfusion::cuda::DeviceArray2D<T>::swap(DeviceArray2D &other_arg) {
    DeviceMemory2D::swap(other_arg);
}

template <class T>
inline T *kfusion::cuda::DeviceArray2D<T>::ptr(int y) {
    return DeviceMemory2D::ptr<T>(y);
}
template <class T>
inline const T *kfusion::cuda::DeviceArray2D<T>::ptr(int y) const {
    return DeviceMemory2D::ptr<T>(y);
}

template <class T>
inline kfusion::cuda::DeviceArray2D<T>::operator T *() {
    return ptr();
}
template <class T>
inline kfusion::cuda::DeviceArray2D<T>::operator const T *() const {
    return ptr();
}

template <class T>
inline int kfusion::cuda::DeviceArray2D<T>::cols() const {
    return DeviceMemory2D::colsBytes() / elem_size;
}
template <class T>
inline int kfusion::cuda::DeviceArray2D<T>::rows() const {
    return DeviceMemory2D::rows();
}

template <class T>
inline size_t kfusion::cuda::DeviceArray2D<T>::elem_step() const {
    return DeviceMemory2D::step() / elem_size;
}


================================================
FILE: include/kfusion/cuda/device_memory.hpp
================================================
#pragma once

#include <kfusion/cuda/kernel_containers.hpp>
#include <kfusion/exports.hpp>

namespace kfusion {
namespace cuda {
/** \brief Error handler. All GPU functions from this subsystem call the
 * function to report an error. For internal use only */
KF_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func = "");

//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/** \brief @b DeviceMemory class
 *
 * \note This is a BLOB container class with reference counting for GPU memory.
 *
 * \author Anatoly Baksheev
 */

class KF_EXPORTS DeviceMemory {
public:
    /** \brief Empty constructor. */
    DeviceMemory();

    /** \brief Destructor. */
    ~DeviceMemory();

    /** \brief Allocates internal buffer in GPU memory
     * \param sizeBytes_arg: amount of memory to allocate
     * */
    DeviceMemory(size_t sizeBytes_arg);

    /** \brief Initializes with user allocated buffer. Reference counting is
     * disabled in this case. \param ptr_arg: pointer to buffer \param
     * sizeBytes_arg: buffer size
     * */
    DeviceMemory(void *ptr_arg, size_t sizeBytes_arg);

    /** \brief Copy constructor. Just increments reference counter. */
    DeviceMemory(const DeviceMemory &other_arg);

    /** \brief Assigment operator. Just increments reference counter. */
    DeviceMemory &operator=(const DeviceMemory &other_arg);

    /** \brief Allocates internal buffer in GPU memory. If internal buffer was
     * created before the function recreates it with new size. If new and old
     * sizes are equal it does nothing. \param sizeBytes_arg: buffer size
     * */
    void create(size_t sizeBytes_arg);

    /** \brief Decrements reference counter and releases internal buffer if
     * needed. */
    void release();

    /** \brief Performs data copying. If destination size differs it will be
     * reallocated. \param other_arg: destination container
     * */
    void copyTo(DeviceMemory &other) const;

    /** \brief Uploads data to internal buffer in GPU memory. It calls create()
     * inside to ensure that intenal buffer size is enough. \param host_ptr_arg:
     * pointer to buffer to upload \param sizeBytes_arg: buffer size
     * */
    void upload(const void *host_ptr_arg, size_t sizeBytes_arg);

    /** \brief Downloads data from internal buffer to CPU memory
     * \param host_ptr_arg: pointer to buffer to download
     * */
    void download(void *host_ptr_arg) const;

    /** \brief Performs swap of data pointed with another device memory.
     * \param other: device memory to swap with
     * */
    void swap(DeviceMemory &other_arg);

    /** \brief Returns pointer for internal buffer in GPU memory. */
    template <class T>
    T *ptr();

    /** \brief Returns constant pointer for internal buffer in GPU memory. */
    template <class T>
    const T *ptr() const;

    /** \brief Conversion to PtrSz for passing to kernel functions. */
    template <class U>
    operator PtrSz<U>() const;

    /** \brief Returns true if unallocated otherwise false. */
    bool empty() const;

    size_t sizeBytes() const;

private:
    /** \brief Device pointer. */
    void *data_;

    /** \brief Allocated size in bytes. */
    size_t sizeBytes_;

    /** \brief Pointer to reference counter in CPU memory. */
    int *refcount_;
};

//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/** \brief @b DeviceMemory2D class
 *
 * \note This is a BLOB container class with reference counting for pitched GPU
 * memory.
 *
 * \author Anatoly Baksheev
 */

class KF_EXPORTS DeviceMemory2D {
public:
    /** \brief Empty constructor. */
    DeviceMemory2D();

    /** \brief Destructor. */
    ~DeviceMemory2D();

    /** \brief Allocates internal buffer in GPU memory
     * \param rows_arg: number of rows to allocate
     * \param colsBytes_arg: width of the buffer in bytes
     * */
    DeviceMemory2D(int rows_arg, int colsBytes_arg);

    /** \brief Initializes with user allocated buffer. Reference counting is
     * disabled in this case. \param rows_arg: number of rows \param
     * colsBytes_arg: width of the buffer in bytes \param data_arg: pointer to
     * buffer \param stepBytes_arg: stride between two consecutive rows in bytes
     * */
    DeviceMemory2D(int rows_arg, int colsBytes_arg, void *data_arg, size_t step_arg);

    /** \brief Copy constructor. Just increments reference counter. */
    DeviceMemory2D(const DeviceMemory2D &other_arg);

    /** \brief Assigment operator. Just increments reference counter. */
    DeviceMemory2D &operator=(const DeviceMemory2D &other_arg);

    /** \brief Allocates internal buffer in GPU memory. If internal buffer was
     * created before the function recreates it with new size. If new and old
     * sizes are equal it does nothing. \param ptr_arg: number of rows to allocate
     * \param sizeBytes_arg: width of the buffer in bytes
     * */
    void create(int rows_arg, int colsBytes_arg);

    /** \brief Decrements reference counter and releases internal buffer if
     * needed. */
    void release();

    /** \brief Performs data copying. If destination size differs it will be
     * reallocated. \param other_arg: destination container
     * */
    void copyTo(DeviceMemory2D &other) const;

    /** \brief Uploads data to internal buffer in GPU memory. It calls create()
     * inside to ensure that intenal buffer size is enough. \param host_ptr_arg:
     * pointer to host buffer to upload \param host_step_arg: stride between two
     * consecutive rows in bytes for host buffer \param rows_arg: number of rows
     * to upload \param sizeBytes_arg: width of host buffer in bytes
     * */
    void upload(const void *host_ptr_arg, size_t host_step_arg, int rows_arg, int colsBytes_arg);

    /** \brief Downloads data from internal buffer to CPU memory. User is
     * resposible for correct host buffer size. \param host_ptr_arg: pointer to
     * host buffer to download \param host_step_arg: stride between two
     * consecutive rows in bytes for host buffer
     * */
    void download(void *host_ptr_arg, size_t host_step_arg) const;

    /** \brief Performs swap of data pointed with another device memory.
     * \param other: device memory to swap with
     * */
    void swap(DeviceMemory2D &other_arg);

    /** \brief Returns pointer to given row in internal buffer.
     * \param y_arg: row index
     * */
    template <class T>
    T *ptr(int y_arg = 0);

    /** \brief Returns constant pointer to given row in internal buffer.
     * \param y_arg: row index
     * */
    template <class T>
    const T *ptr(int y_arg = 0) const;

    /** \brief Conversion to PtrStep for passing to kernel functions. */
    template <class U>
    operator PtrStep<U>() const;

    /** \brief Conversion to PtrStepSz for passing to kernel functions. */
    template <class U>
    operator PtrStepSz<U>() const;

    /** \brief Returns true if unallocated otherwise false. */
    bool empty() const;

    /** \brief Returns number of bytes in each row. */
    int colsBytes() const;

    /** \brief Returns number of rows. */
    int rows() const;

    /** \brief Returns stride between two consecutive rows in bytes for internal
     * buffer. Step is stored always and everywhere in bytes!!! */
    size_t step() const;

private:
    /** \brief Device pointer. */
    void *data_;

    /** \brief Stride between two consecutive rows in bytes for internal buffer.
     * Step is stored always and everywhere in bytes!!! */
    size_t step_;

    /** \brief Width of the buffer in bytes. */
    int colsBytes_;

    /** \brief Number of rows. */
    int rows_;

    /** \brief Pointer to reference counter in CPU memory. */
    int *refcount_;
};
}  // namespace cuda

namespace device {
using kfusion::cuda::DeviceMemory;
using kfusion::cuda::DeviceMemory2D;
}  // namespace device
}  // namespace kfusion

/////////////////////  Inline implementations of DeviceMemory
///////////////////////////////////////////////

template <class T>
inline T *kfusion::cuda::DeviceMemory::ptr() {
    return (T *) data_;
}
template <class T>
inline const T *kfusion::cuda::DeviceMemory::ptr() const {
    return (const T *) data_;
}

template <class U>
inline kfusion::cuda::DeviceMemory::operator kfusion::cuda::PtrSz<U>() const {
    PtrSz<U> result;
    result.data = (U *) ptr<U>();
    result.size = sizeBytes_ / sizeof(U);
    return result;
}

/////////////////////  Inline implementations of DeviceMemory2D
///////////////////////////////////////////////

template <class T>
T *kfusion::cuda::DeviceMemory2D::ptr(int y_arg) {
    return (T *) ((char *) data_ + y_arg * step_);
}
template <class T>
const T *kfusion::cuda::DeviceMemory2D::ptr(int y_arg) const {
    return (const T *) ((const char *) data_ + y_arg * step_);
}

template <class U>
kfusion::cuda::DeviceMemory2D::operator kfusion::cuda::PtrStep<U>() const {
    PtrStep<U> result;
    result.data = (U *) ptr<U>();
    result.step = step_;
    return result;
}

template <class U>
kfusion::cuda::DeviceMemory2D::operator kfusion::cuda::PtrStepSz<U>() const {
    PtrStepSz<U> result;
    result.data = (U *) ptr<U>();
    result.step = step_;
    result.cols = colsBytes_ / sizeof(U);
    result.rows = rows_;
    return result;
}


================================================
FILE: include/kfusion/cuda/imgproc.hpp
================================================
#pragma once

/* kinfu includes */
#include <kfusion/types.hpp>

/* pcl includes */
#include <pcl/PolygonMesh.h>

namespace kfusion {
namespace cuda {
KF_EXPORTS void depthBilateralFilter(const Depth &in, Depth &out, int ksz, float sigma_spatial, float sigma_depth);

KF_EXPORTS void depthTruncation(Depth &depth, float threshold);

KF_EXPORTS void depthBuildPyramid(const Depth &depth, Depth &pyramid, float sigma_depth);

KF_EXPORTS void computeNormalsAndMaskDepth(const Intr &intr, Depth &depth, Normals &normals);

KF_EXPORTS void computePointNormals(const Intr &intr, const Depth &depth, Cloud &points, Normals &normals);

KF_EXPORTS void computeDists(const Depth &depth, Dists &dists, const Intr &intr);

KF_EXPORTS void resizeDepthNormals(const Depth &depth, const Normals &normals, Depth &depth_out, Normals &normals_out);

KF_EXPORTS void resizePointsNormals(const Cloud &points, const Normals &normals, Cloud &points_out,
                                    Normals &normals_out);

KF_EXPORTS void waitAllDefaultStream();

KF_EXPORTS void renderTangentColors(const Normals &normals, Image &image);

/** \brief rasterises the canonical surface
 * \param[in] intr camera intrinsics
 * \param[in] cam2vol camera -> volume coordinate transform
 * \param[in] surface surface to rasterise
 * \param[out] points_out frame with the currently visible geometry
 * \param[out] normals_out frame with the normals of the currently visible geometry
 */
KF_EXPORTS void rasteriseSurface(const Intr &intr, const Affine3f &vol2cam, const Surface &s, Cloud &vertices_out,
                                 Normals &normals_out);

KF_EXPORTS void renderImage(const Depth &depth, const Normals &normals, const Intr &intr, const Vec3f &light_pose,
                            Image &image);

KF_EXPORTS void renderImage(const Cloud &points, const Normals &normals, const Intr &intr, const Vec3f &light_pose,
                            Image &image);
}  // namespace cuda
}  // namespace kfusion


================================================
FILE: include/kfusion/cuda/kernel_containers.hpp
================================================
#pragma once

#if defined(__CUDACC__)
#define __kf_hdevice__ __host__ __device__ __forceinline__
#define __kf_device__ __device__ __forceinline__
#else
#define __kf_hdevice__
#define __kf_device__
#endif

#include <cstddef>

namespace kfusion {
namespace cuda {
template <typename T>
struct DevPtr {
    typedef T elem_type;
    const static size_t elem_size = sizeof(elem_type);

    T *data;

    __kf_hdevice__ DevPtr() : data(0) {}
    __kf_hdevice__ DevPtr(T *data_arg) : data(data_arg) {}

    __kf_hdevice__ size_t elemSize() const { return elem_size; }
    __kf_hdevice__ operator T *() { return data; }
    __kf_hdevice__ operator const T *() const { return data; }
};

template <typename T>
struct PtrSz : public DevPtr<T> {
    __kf_hdevice__ PtrSz() : size(0) {}
    __kf_hdevice__ PtrSz(T *data_arg, size_t size_arg) : DevPtr<T>(data_arg), size(size_arg) {}

    size_t size;
};

template <typename T>
struct PtrStep : public DevPtr<T> {
    __kf_hdevice__ PtrStep() : step(0) {}
    __kf_hdevice__ PtrStep(T *data_arg, size_t step_arg) : DevPtr<T>(data_arg), step(step_arg) {}

    /** \brief stride between two consecutive rows in bytes. Step is stored always
     * and everywhere in bytes!!! */
    size_t step;

    __kf_hdevice__ T *ptr(int y = 0) { return (T *) ((char *) DevPtr<T>::data + y * step); }
    __kf_hdevice__ const T *ptr(int y = 0) const { return (const T *) ((const char *) DevPtr<T>::data + y * step); }

    __kf_hdevice__ T &operator()(int y, int x) { return ptr(y)[x]; }
    __kf_hdevice__ const T &operator()(int y, int x) const { return ptr(y)[x]; }
};

template <typename T>
struct PtrStepSz : public PtrStep<T> {
    __kf_hdevice__ PtrStepSz() : cols(0), rows(0) {}
    __kf_hdevice__ PtrStepSz(int rows_arg, int cols_arg, T *data_arg, size_t step_arg)
        : PtrStep<T>(data_arg, step_arg), cols(cols_arg), rows(rows_arg) {}

    int cols;
    int rows;
};
}  // namespace cuda

namespace device {
using kfusion::cuda::PtrStep;
using kfusion::cuda::PtrStepSz;
using kfusion::cuda::PtrSz;
}  // namespace device
}  // namespace kfusion

namespace kf = kfusion;


================================================
FILE: include/kfusion/cuda/marching_cubes.hpp
================================================
#pragma once

/* eigen includes */
#include <Eigen/Core>

/* kinfu includes */
#include <kfusion/cuda/device_array.hpp>
#include <kfusion/cuda/tsdf_volume.hpp>

/* pcl includes */
#include <pcl/point_types.h>

namespace kfusion {
namespace cuda {

/** \brief MarchingCubes implements MarchingCubes functionality for TSDF volume on GPU
 * \author Anatoly Baskeheev, Itseez Ltd, (myname.mysurname@mycompany.com)
 */
class KF_EXPORTS MarchingCubes {
public:
    /** \brief Default size for triangles buffer */
    enum { POINTS_PER_TRIANGLE = 3, DEFAULT_TRIANGLES_BUFFER_SIZE = 2 * 1000 * 1000 * POINTS_PER_TRIANGLE };

    /** \brief Smart pointer. */
    typedef boost::shared_ptr<MarchingCubes> Ptr;

    /** \brief Default constructor */
    MarchingCubes();
    /** \brief Destructor */
    ~MarchingCubes();

    /* set volume pose */
    void setPose(const cv::Affine3f& pose);

    /* run mc to extract the zero level set */
    Surface run(const TsdfVolume& volume, DeviceArray<pcl::PointXYZ>& vertices_buffer,
                DeviceArray<pcl::Normal>& normals_buffer);

private:
    /** \brief Edge table for marching cubes  */
    DeviceArray<int> edgeTable_;

    /** \brief Number of vertextes table for marching cubes  */
    DeviceArray<int> numVertsTable_;

    /** \brief Triangles table for marching cubes  */
    DeviceArray<int> triTable_;

    /** \brief Temporary buffer used by marching cubes (first row stores occuped voxes id, second number of vetexes,
     * third poits offsets */
    DeviceArray2D<int> occupied_voxels_buffer_;

    /* volume pose */
    cv::Affine3f pose;
};
}  // namespace cuda
}  // namespace kfusion


================================================
FILE: include/kfusion/cuda/projective_icp.hpp
================================================
#pragma once

#include <kfusion/types.hpp>

namespace kfusion {
namespace cuda {
class ProjectiveICP {
public:
    enum { MAX_PYRAMID_LEVELS = 4 };

    typedef std::vector<Depth> DepthPyr;
    typedef std::vector<Cloud> PointsPyr;
    typedef std::vector<Normals> NormalsPyr;

    ProjectiveICP();
    virtual ~ProjectiveICP();

    float getDistThreshold() const;
    void setDistThreshold(float distance);

    float getAngleThreshold() const;
    void setAngleThreshold(float angle);

    void setIterationsNum(const std::vector<int> &iters);
    int getUsedLevelsNum() const;

    virtual bool estimateTransform(Affine3f &affine, const Intr &intr, const Frame &curr, const Frame &prev);

    /** The function takes masked depth, i.e. it assumes for performance reasons
     * that "if depth(y,x) is not zero, then normals(y,x) surely is not qnan" */
    virtual bool estimateTransform(Affine3f &affine, const Intr &intr, const DepthPyr &dcurr, const NormalsPyr ncurr,
                                   const DepthPyr dprev, const NormalsPyr nprev);
    virtual bool estimateTransform(Affine3f &affine, const Intr &intr, const PointsPyr &vcurr, const NormalsPyr ncurr,
                                   const PointsPyr vprev, const NormalsPyr nprev);

    // static Vec3f rodrigues2(const Mat3f& matrix);
private:
    std::vector<int> iters_;
    float angle_thres_;
    float dist_thres_;
    DeviceArray2D<float> buffer_;

    struct StreamHelper;
    cv::Ptr<StreamHelper> shelp_;
};
}  // namespace cuda
}  // namespace kfusion


================================================
FILE: include/kfusion/cuda/temp_utils.hpp
================================================
#pragma once

#include <cuda.h>
#include <kfusion/cuda/kernel_containers.hpp>

#define FULL_MASK 0xffffffff

namespace kfusion {
namespace device {
template <class T>
__kf_hdevice__ void swap(T &a, T &b) {
    T c(a);
    a = b;
    b = c;
}

template <typename T>
struct numeric_limits;

template <>
struct numeric_limits<float> {
    __kf_device__ static float quiet_NaN() { return __int_as_float(0x7fffffff); /*CUDART_NAN_F*/ };
    __kf_device__ static float epsilon() { return 1.192092896e-07f /*FLT_EPSILON*/; };
    __kf_device__ static float min() { return 1.175494351e-38f /*FLT_MIN*/; };
    __kf_device__ static float max() { return 3.402823466e+38f /*FLT_MAX*/; };
};

template <>
struct numeric_limits<unsigned short> {
    __kf_device__ static unsigned short max() { return USHRT_MAX; };
};

__kf_device__ float dot(const float3 &v1, const float3 &v2) {
    return __fmaf_rn(v1.x, v2.x, __fmaf_rn(v1.y, v2.y, v1.z * v2.z));
}

__kf_device__ float3 &operator+=(float3 &vec, const float &v) {
    vec.x += v;
    vec.y += v;
    vec.z += v;
    return vec;
}

__kf_device__ float3 &operator+=(float3 &v1, const float3 &v2) {
    v1.x += v2.x;
    v1.y += v2.y;
    v1.z += v2.z;
    return v1;
}

__kf_device__ float3 operator+(const float3 &v1, const float3 &v2) {
    return make_float3(v1.x + v2.x, v1.y + v2.y, v1.z + v2.z);
}

__kf_device__ float3 operator*(const float3 &v1, const float3 &v2) {
    return make_float3(v1.x * v2.x, v1.y * v2.y, v1.z * v2.z);
}

__kf_hdevice__ float3 operator*(const float3 &v1, const int3 &v2) {
    return make_float3(v1.x * v2.x, v1.y * v2.y, v1.z * v2.z);
}

__kf_device__ float3 operator/(const float3 &v1, const float3 &v2) {
    return make_float3(v1.x / v2.x, v1.y / v2.y, v1.z / v2.z);
}

__kf_hdevice__ float3 operator/(const float &v, const float3 &vec) {
    return make_float3(v / vec.x, v / vec.y, v / vec.z);
}

__kf_device__ float3 &operator*=(float3 &vec, const float &v) {
    vec.x *= v;
    vec.y *= v;
    vec.z *= v;
    return vec;
}

__kf_hdevice__ float3 operator-(const float3 &v1, const float3 &v2) {
    return make_float3(v1.x - v2.x, v1.y - v2.y, v1.z - v2.z);
}

__kf_hdevice__ float3 operator*(const float3 &v1, const float &v) { return make_float3(v1.x * v, v1.y * v, v1.z * v); }

__kf_hdevice__ float3 operator*(const float &v, const float3 &v1) { return make_float3(v1.x * v, v1.y * v, v1.z * v); }

__kf_device__ float norm(const float3 &v) { return sqrt(dot(v, v)); }

__kf_device__ float norm_sqr(const float3 &v) { return dot(v, v); }

__kf_device__ float3 normalized(const float3 &v) { return v * rsqrt(dot(v, v)); }

__kf_hdevice__ float3 cross(const float3 &v1, const float3 &v2) {
    return make_float3(v1.y * v2.z - v1.z * v2.y, v1.z * v2.x - v1.x * v2.z, v1.x * v2.y - v1.y * v2.x);
}

__kf_device__ void computeRoots2(const float &b, const float &c, float3 &roots) {
    roots.x = 0.f;
    float d = b * b - 4.f * c;
    if (d < 0.f)  // no real roots!!!! THIS SHOULD NOT HAPPEN!
        d = 0.f;

    float sd = sqrtf(d);

    roots.z = 0.5f * (b + sd);
    roots.y = 0.5f * (b - sd);
}

__kf_device__ void computeRoots3(float c0, float c1, float c2, float3 &roots) {
    if (fabsf(c0) < numeric_limits<float>::epsilon())  // one root is 0 -> quadratic equation
    {
        computeRoots2(c2, c1, roots);
    } else {
        const float s_inv3  = 1.f / 3.f;
        const float s_sqrt3 = sqrtf(3.f);
        // Construct the parameters used in classifying the roots of the equation
        // and in solving the equation for the roots in closed form.
        float c2_over_3 = c2 * s_inv3;
        float a_over_3  = (c1 - c2 * c2_over_3) * s_inv3;
        if (a_over_3 > 0.f)
            a_over_3 = 0.f;

        float half_b = 0.5f * (c0 + c2_over_3 * (2.f * c2_over_3 * c2_over_3 - c1));

        float q = half_b * half_b + a_over_3 * a_over_3 * a_over_3;
        if (q > 0.f)
            q = 0.f;

        // Compute the eigenvalues by solving for the roots of the polynomial.
        float rho       = sqrtf(-a_over_3);
        float theta     = atan2f(sqrtf(-q), half_b) * s_inv3;
        float cos_theta = __cosf(theta);
        float sin_theta = __sinf(theta);
        roots.x         = c2_over_3 + 2.f * rho * cos_theta;
        roots.y         = c2_over_3 - rho * (cos_theta + s_sqrt3 * sin_theta);
        roots.z         = c2_over_3 - rho * (cos_theta - s_sqrt3 * sin_theta);

        // Sort in increasing order.
        if (roots.x >= roots.y)
            swap(roots.x, roots.y);

        if (roots.y >= roots.z) {
            swap(roots.y, roots.z);

            if (roots.x >= roots.y)
                swap(roots.x, roots.y);
        }
        if (roots.x <= 0)  // eigenval for symetric positive semi-definite matrix can
                           // not be negative! Set it to 0
            computeRoots2(c2, c1, roots);
    }
}

struct Eigen33 {
public:
    template <int Rows>
    struct MiniMat {
        float3 data[Rows];
        __kf_hdevice__ float3 &operator[](int i) { return data[i]; }
        __kf_hdevice__ const float3 &operator[](int i) const { return data[i]; }
    };
    typedef MiniMat<3> Mat33;
    typedef MiniMat<4> Mat43;

    static __kf_device__ float3 unitOrthogonal(const float3 &src) {
        float3 perp;
        /* Let us compute the crossed product of *this with a vector
         * that is not too close to being colinear to *this.
         */

        /* unless the x and y coords are both close to zero, we can
         * simply take ( -y, x, 0 ) and normalize it.
         */
        if (!isMuchSmallerThan(src.x, src.z) || !isMuchSmallerThan(src.y, src.z)) {
            float invnm = rsqrtf(src.x * src.x + src.y * src.y);
            perp.x      = -src.y * invnm;
            perp.y      = src.x * invnm;
            perp.z      = 0.0f;
        }
        /* if both x and y are close to zero, then the vector is close
         * to the z-axis, so it's far from colinear to the x-axis for instance.
         * So we take the crossed product with (1,0,0) and normalize it.
         */
        else {
            float invnm = rsqrtf(src.z * src.z + src.y * src.y);
            perp.x      = 0.0f;
            perp.y      = -src.z * invnm;
            perp.z      = src.y * invnm;
        }

        return perp;
    }

    __kf_device__ Eigen33(volatile float *mat_pkg_arg) : mat_pkg(mat_pkg_arg) {}
    __kf_device__ void compute(Mat33 &tmp, Mat33 &vec_tmp, Mat33 &evecs, float3 &evals) {
        // Scale the matrix so its entries are in [-1,1].  The scaling is applied
        // only when at least one matrix entry has magnitude larger than 1.

        float max01 = fmaxf(fabsf(mat_pkg[0]), fabsf(mat_pkg[1]));
        float max23 = fmaxf(fabsf(mat_pkg[2]), fabsf(mat_pkg[3]));
        float max45 = fmaxf(fabsf(mat_pkg[4]), fabsf(mat_pkg[5]));
        float m0123 = fmaxf(max01, max23);
        float scale = fmaxf(max45, m0123);

        if (scale <= numeric_limits<float>::min())
            scale = 1.f;

        mat_pkg[0] /= scale;
        mat_pkg[1] /= scale;
        mat_pkg[2] /= scale;
        mat_pkg[3] /= scale;
        mat_pkg[4] /= scale;
        mat_pkg[5] /= scale;

        // The characteristic equation is x^3 - c2*x^2 + c1*x - c0 = 0.  The
        // eigenvalues are the roots to this equation, all guaranteed to be
        // real-valued, because the matrix is symmetric.
        float c0 = m00() * m11() * m22() + 2.f * m01() * m02() * m12() - m00() * m12() * m12() - m11() * m02() * m02() -
                   m22() * m01() * m01();
        float c1 = m00() * m11() - m01() * m01() + m00() * m22() - m02() * m02() + m11() * m22() - m12() * m12();
        float c2 = m00() + m11() + m22();

        computeRoots3(c0, c1, c2, evals);

        if (evals.z - evals.x <= numeric_limits<float>::epsilon()) {
            evecs[0] = make_float3(1.f, 0.f, 0.f);
            evecs[1] = make_float3(0.f, 1.f, 0.f);
            evecs[2] = make_float3(0.f, 0.f, 1.f);
        } else if (evals.y - evals.x <= numeric_limits<float>::epsilon()) {
            // first and second equal
            tmp[0] = row0();
            tmp[1] = row1();
            tmp[2] = row2();
            tmp[0].x -= evals.z;
            tmp[1].y -= evals.z;
            tmp[2].z -= evals.z;

            vec_tmp[0] = cross(tmp[0], tmp[1]);
            vec_tmp[1] = cross(tmp[0], tmp[2]);
            vec_tmp[2] = cross(tmp[1], tmp[2]);

            float len1 = dot(vec_tmp[0], vec_tmp[0]);
            float len2 = dot(vec_tmp[1], vec_tmp[1]);
            float len3 = dot(vec_tmp[2], vec_tmp[2]);

            if (len1 >= len2 && len1 >= len3) {
                evecs[2] = vec_tmp[0] * rsqrtf(len1);
            } else if (len2 >= len1 && len2 >= len3) {
                evecs[2] = vec_tmp[1] * rsqrtf(len2);
            } else {
                evecs[2] = vec_tmp[2] * rsqrtf(len3);
            }

            evecs[1] = unitOrthogonal(evecs[2]);
            evecs[0] = cross(evecs[1], evecs[2]);
        } else if (evals.z - evals.y <= numeric_limits<float>::epsilon()) {
            // second and third equal
            tmp[0] = row0();
            tmp[1] = row1();
            tmp[2] = row2();
            tmp[0].x -= evals.x;
            tmp[1].y -= evals.x;
            tmp[2].z -= evals.x;

            vec_tmp[0] = cross(tmp[0], tmp[1]);
            vec_tmp[1] = cross(tmp[0], tmp[2]);
            vec_tmp[2] = cross(tmp[1], tmp[2]);

            float len1 = dot(vec_tmp[0], vec_tmp[0]);
            float len2 = dot(vec_tmp[1], vec_tmp[1]);
            float len3 = dot(vec_tmp[2], vec_tmp[2]);

            if (len1 >= len2 && len1 >= len3) {
                evecs[0] = vec_tmp[0] * rsqrtf(len1);
            } else if (len2 >= len1 && len2 >= len3) {
                evecs[0] = vec_tmp[1] * rsqrtf(len2);
            } else {
                evecs[0] = vec_tmp[2] * rsqrtf(len3);
            }

            evecs[1] = unitOrthogonal(evecs[0]);
            evecs[2] = cross(evecs[0], evecs[1]);
        } else {
            tmp[0] = row0();
            tmp[1] = row1();
            tmp[2] = row2();
            tmp[0].x -= evals.z;
            tmp[1].y -= evals.z;
            tmp[2].z -= evals.z;

            vec_tmp[0] = cross(tmp[0], tmp[1]);
            vec_tmp[1] = cross(tmp[0], tmp[2]);
            vec_tmp[2] = cross(tmp[1], tmp[2]);

            float len1 = dot(vec_tmp[0], vec_tmp[0]);
            float len2 = dot(vec_tmp[1], vec_tmp[1]);
            float len3 = dot(vec_tmp[2], vec_tmp[2]);

            float mmax[3];

            unsigned int min_el = 2;
            unsigned int max_el = 2;
            if (len1 >= len2 && len1 >= len3) {
                mmax[2]  = len1;
                evecs[2] = vec_tmp[0] * rsqrtf(len1);
            } else if (len2 >= len1 && len2 >= len3) {
                mmax[2]  = len2;
                evecs[2] = vec_tmp[1] * rsqrtf(len2);
            } else {
                mmax[2]  = len3;
                evecs[2] = vec_tmp[2] * rsqrtf(len3);
            }

            tmp[0] = row0();
            tmp[1] = row1();
            tmp[2] = row2();
            tmp[0].x -= evals.y;
            tmp[1].y -= evals.y;
            tmp[2].z -= evals.y;

            vec_tmp[0] = cross(tmp[0], tmp[1]);
            vec_tmp[1] = cross(tmp[0], tmp[2]);
            vec_tmp[2] = cross(tmp[1], tmp[2]);

            len1 = dot(vec_tmp[0], vec_tmp[0]);
            len2 = dot(vec_tmp[1], vec_tmp[1]);
            len3 = dot(vec_tmp[2], vec_tmp[2]);

            if (len1 >= len2 && len1 >= len3) {
                mmax[1]  = len1;
                evecs[1] = vec_tmp[0] * rsqrtf(len1);
                min_el   = len1 <= mmax[min_el] ? 1 : min_el;
                max_el   = len1 > mmax[max_el] ? 1 : max_el;
            } else if (len2 >= len1 && len2 >= len3) {
                mmax[1]  = len2;
                evecs[1] = vec_tmp[1] * rsqrtf(len2);
                min_el   = len2 <= mmax[min_el] ? 1 : min_el;
                max_el   = len2 > mmax[max_el] ? 1 : max_el;
            } else {
                mmax[1]  = len3;
                evecs[1] = vec_tmp[2] * rsqrtf(len3);
                min_el   = len3 <= mmax[min_el] ? 1 : min_el;
                max_el   = len3 > mmax[max_el] ? 1 : max_el;
            }

            tmp[0] = row0();
            tmp[1] = row1();
            tmp[2] = row2();
            tmp[0].x -= evals.x;
            tmp[1].y -= evals.x;
            tmp[2].z -= evals.x;

            vec_tmp[0] = cross(tmp[0], tmp[1]);
            vec_tmp[1] = cross(tmp[0], tmp[2]);
            vec_tmp[2] = cross(tmp[1], tmp[2]);

            len1 = dot(vec_tmp[0], vec_tmp[0]);
            len2 = dot(vec_tmp[1], vec_tmp[1]);
            len3 = dot(vec_tmp[2], vec_tmp[2]);

            if (len1 >= len2 && len1 >= len3) {
                mmax[0]  = len1;
                evecs[0] = vec_tmp[0] * rsqrtf(len1);
                min_el   = len3 <= mmax[min_el] ? 0 : min_el;
                max_el   = len3 > mmax[max_el] ? 0 : max_el;
            } else if (len2 >= len1 && len2 >= len3) {
                mmax[0]  = len2;
                evecs[0] = vec_tmp[1] * rsqrtf(len2);
                min_el   = len3 <= mmax[min_el] ? 0 : min_el;
                max_el   = len3 > mmax[max_el] ? 0 : max_el;
            } else {
                mmax[0]  = len3;
                evecs[0] = vec_tmp[2] * rsqrtf(len3);
                min_el   = len3 <= mmax[min_el] ? 0 : min_el;
                max_el   = len3 > mmax[max_el] ? 0 : max_el;
            }

            unsigned mid_el = 3 - min_el - max_el;
            evecs[min_el]   = normalized(cross(evecs[(min_el + 1) % 3], evecs[(min_el + 2) % 3]));
            evecs[mid_el]   = normalized(cross(evecs[(mid_el + 1) % 3], evecs[(mid_el + 2) % 3]));
        }
        // Rescale back to the original size.
        evals *= scale;
    }

private:
    volatile float *mat_pkg;

    __kf_device__ float m00() const { return mat_pkg[0]; }
    __kf_device__ float m01() const { return mat_pkg[1]; }
    __kf_device__ float m02() const { return mat_pkg[2]; }
    __kf_device__ float m10() const { return mat_pkg[1]; }
    __kf_device__ float m11() const { return mat_pkg[3]; }
    __kf_device__ float m12() const { return mat_pkg[4]; }
    __kf_device__ float m20() const { return mat_pkg[2]; }
    __kf_device__ float m21() const { return mat_pkg[4]; }
    __kf_device__ float m22() const { return mat_pkg[5]; }

    __kf_device__ float3 row0() const { return make_float3(m00(), m01(), m02()); }
    __kf_device__ float3 row1() const { return make_float3(m10(), m11(), m12()); }
    __kf_device__ float3 row2() const { return make_float3(m20(), m21(), m22()); }

    __kf_device__ static bool isMuchSmallerThan(float x, float y) {
        // copied from <eigen>/include/Eigen/src/Core/NumTraits.h
        const float prec_sqr = numeric_limits<float>::epsilon() * numeric_limits<float>::epsilon();
        return x * x <= prec_sqr * y * y;
    }
};

struct Warp {
    enum { LOG_WARP_SIZE = 5, WARP_SIZE = 1 << LOG_WARP_SIZE, STRIDE = WARP_SIZE };

    /** \brief Returns the warp lane ID of the calling thread. */
    static __kf_device__ unsigned int laneId() {
        unsigned int ret;
        asm("mov.u32 %0, %laneid;" : "=r"(ret));
        return ret;
    }

    static __kf_device__ unsigned int id() {
        int tid = threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x;
        return tid >> LOG_WARP_SIZE;
    }

    static __kf_device__ int laneMaskLt() {
#if (__CUDA_ARCH__ >= 200)
        unsigned int ret;
        asm("mov.u32 %0, %lanemask_lt;" : "=r"(ret));
        return ret;
#else
        return 0xFFFFFFFF >> (32 - laneId());
#endif
    }
    static __kf_device__ int binaryExclScan(int ballot_mask) { return __popc(Warp::laneMaskLt() & ballot_mask); }
};

struct Block {
    static __kf_device__ unsigned int stride() { return blockDim.x * blockDim.y * blockDim.z; }

    static __kf_device__ int flattenedThreadId() {
        return threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x;
    }

    template <int CTA_SIZE, typename T, class BinOp>
    static __kf_device__ void reduce(volatile T *buffer, BinOp op) {
        int tid = flattenedThreadId();
        T val   = buffer[tid];

        if (CTA_SIZE >= 1024) {
            if (tid < 512)
                buffer[tid] = val = op(val, buffer[tid + 512]);
            __syncthreads();
        }
        if (CTA_SIZE >= 512) {
            if (tid < 256)
                buffer[tid] = val = op(val, buffer[tid + 256]);
            __syncthreads();
        }
        if (CTA_SIZE >= 256) {
            if (tid < 128)
                buffer[tid] = val = op(val, buffer[tid + 128]);
            __syncthreads();
        }
        if (CTA_SIZE >= 128) {
            if (tid < 64)
                buffer[tid] = val = op(val, buffer[tid + 64]);
            __syncthreads();
        }

        if (tid < 32) {
            if (CTA_SIZE >= 64) {
                buffer[tid] = val = op(val, buffer[tid + 32]);
            }
            if (CTA_SIZE >= 32) {
                buffer[tid] = val = op(val, buffer[tid + 16]);
            }
            if (CTA_SIZE >= 16) {
                buffer[tid] = val = op(val, buffer[tid + 8]);
            }
            if (CTA_SIZE >= 8) {
                buffer[tid] = val = op(val, buffer[tid + 4]);
            }
            if (CTA_SIZE >= 4) {
                buffer[tid] = val = op(val, buffer[tid + 2]);
            }
            if (CTA_SIZE >= 2) {
                buffer[tid] = val = op(val, buffer[tid + 1]);
            }
        }
    }

    template <int CTA_SIZE, typename T, class BinOp>
    static __kf_device__ T reduce(volatile T *buffer, T init, BinOp op) {
        int tid = flattenedThreadId();
        T val = buffer[tid] = init;
        __syncthreads();

        if (CTA_SIZE >= 1024) {
            if (tid < 512)
                buffer[tid] = val = op(val, buffer[tid + 512]);
            __syncthreads();
        }
        if (CTA_SIZE >= 512) {
            if (tid < 256)
                buffer[tid] = val = op(val, buffer[tid + 256]);
            __syncthreads();
        }
        if (CTA_SIZE >= 256) {
            if (tid < 128)
                buffer[tid] = val = op(val, buffer[tid + 128]);
            __syncthreads();
        }
        if (CTA_SIZE >= 128) {
            if (tid < 64)
                buffer[tid] = val = op(val, buffer[tid + 64]);
            __syncthreads();
        }

        if (tid < 32) {
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 && 0
            if (CTA_SIZE >= 64)
                val = op(val, buffer[tid + 32]);
            if (CTA_SIZE >= 32)
                val = op(val, __shfl_xor(val, 16));
            if (CTA_SIZE >= 16)
                val = op(val, __shfl_xor(val, 8));
            if (CTA_SIZE >= 8)
                val = op(val, __shfl_xor(val, 4));
            if (CTA_SIZE >= 4)
                val = op(val, __shfl_xor(val, 2));
            if (CTA_SIZE >= 2)
                buffer[tid] = op(val, __shfl_xor(val, 1));
#else
            if (CTA_SIZE >= 64) {
                buffer[tid] = val = op(val, buffer[tid + 32]);
            }
            if (CTA_SIZE >= 32) {
                buffer[tid] = val = op(val, buffer[tid + 16]);
            }
            if (CTA_SIZE >= 16) {
                buffer[tid] = val = op(val, buffer[tid + 8]);
            }
            if (CTA_SIZE >= 8) {
                buffer[tid] = val = op(val, buffer[tid + 4]);
            }
            if (CTA_SIZE >= 4) {
                buffer[tid] = val = op(val, buffer[tid + 2]);
            }
            if (CTA_SIZE >= 2) {
                buffer[tid] = val = op(val, buffer[tid + 1]);
            }
#endif
        }
        __syncthreads();
        return buffer[0];
    }
};

struct Emulation {
    static __kf_device__ int warp_reduce(volatile int *ptr, const unsigned int tid) {
        const unsigned int lane = tid & 31;  // index of thread in warp (0..31)

        if (lane < 16) {
            int partial = ptr[tid];

            ptr[tid] = partial = partial + ptr[tid + 16];
            ptr[tid] = partial = partial + ptr[tid + 8];
            ptr[tid] = partial = partial + ptr[tid + 4];
            ptr[tid] = partial = partial + ptr[tid + 2];
            ptr[tid] = partial = partial + ptr[tid + 1];
        }
        return ptr[tid - lane];
    }

    static __kf_device__ int Ballot(int predicate, volatile int *cta_buffer) {
#if __CUDA_ARCH__ >= 200
        (void) cta_buffer;
        return __ballot_sync(FULL_MASK, predicate);
#else
        int tid         = Block::flattenedThreadId();
        cta_buffer[tid] = predicate ? (1 << (tid & 31)) : 0;
        return warp_reduce(cta_buffer, tid);
#endif
    }

    static __kf_device__ bool All(int predicate, volatile int *cta_buffer) {
#if __CUDA_ARCH__ >= 200
        (void) cta_buffer;
        return __all_sync(FULL_MASK, predicate);
#else
        int tid         = Block::flattenedThreadId();
        cta_buffer[tid] = predicate ? 1 : 0;
        return warp_reduce(cta_buffer, tid) == 32;
#endif
    }
};
}  // namespace device
}  // namespace kfusion


================================================
FILE: include/kfusion/cuda/texture_binder.hpp
================================================
#pragma once

#include <kfusion/cuda/device_array.hpp>
#include <kfusion/safe_call.hpp>

namespace kfusion {
namespace cuda {
class TextureBinder {
public:
    template <class T, enum cudaTextureReadMode readMode>
    TextureBinder(const DeviceArray2D<T> &arr, const struct texture<T, 2, readMode> &tex) : texref(&tex) {
        cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
        cudaSafeCall(cudaBindTexture2D(0, tex, arr.ptr(), desc, arr.cols(), arr.rows(), arr.step()));
    }

    template <class T, enum cudaTextureReadMode readMode>
    TextureBinder(const DeviceArray<T> &arr, const struct texture<T, 1, readMode> &tex) : texref(&tex) {
        cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
        cudaSafeCall(cudaBindTexture(0, tex, arr.ptr(), desc, arr.sizeBytes()));
    }

    template <class T, enum cudaTextureReadMode readMode>
    TextureBinder(const PtrStepSz<T> &arr, const struct texture<T, 2, readMode> &tex) : texref(&tex) {
        cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
        cudaSafeCall(cudaBindTexture2D(0, tex, arr.data, desc, arr.cols, arr.rows, arr.step));
    }

    template <class A, class T, enum cudaTextureReadMode readMode>
    TextureBinder(const A &arr, const struct texture<T, 2, readMode> &tex, const cudaChannelFormatDesc &desc)
        : texref(&tex) {
        cudaSafeCall(cudaBindTexture2D(0, tex, arr.data, desc, arr.cols, arr.rows, arr.step));
    }

    template <class T, enum cudaTextureReadMode readMode>
    TextureBinder(const PtrSz<T> &arr, const struct texture<T, 1, readMode> &tex) : texref(&tex) {
        cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
        cudaSafeCall(cudaBindTexture(0, tex, arr.data, desc, arr.size * arr.elemSize()));
    }

    ~TextureBinder() { cudaSafeCall(cudaUnbindTexture(texref)); }

private:
    const struct textureReference *texref;
};
}  // namespace cuda

namespace device {
using kfusion::cuda::TextureBinder;
}
}  // namespace kfusion


================================================
FILE: include/kfusion/cuda/tsdf_volume.hpp
================================================
#pragma once

/* kinfu includes */
#include <kfusion/types.hpp>

/* pcl includes */
#include <pcl/common/geometry.h>

/* sobfu includes */
#include <sobfu/params.hpp>

/* sys headers */
#include <memory>

namespace kfusion {
namespace cuda {
class KF_EXPORTS TsdfVolume {
public:
    TsdfVolume(const Params &params);
    virtual ~TsdfVolume();

    void create(const Vec3i &dims);

    Vec3i getDims() const;
    Vec3f getVoxelSize() const;

    const CudaData data() const;
    CudaData data();

    Vec3f getSize() const;
    void setSize(const Vec3f &size);

    float getTruncDist() const;
    void setTruncDist(float &distance);

    float getEta() const;
    void setEta(float &eta);

    float getMaxWeight() const;
    void setMaxWeight(float &weight);

    Affine3f getPose() const;
    void setPose(const Affine3f &pose);

    float getRaycastStepFactor() const;
    void setRaycastStepFactor(float &factor);

    float getGradientDeltaFactor() const;
    void setGradientDeltaFactor(float &factor);

    Vec3i getGridOrigin() const;
    void setGridOrigin(const Vec3i &origin);

    virtual void clear();
    void swap(CudaData &data);

    virtual void applyAffine(const Affine3f &affine);

    virtual void integrate(const TsdfVolume &phi_n_psi);
    virtual void integrate(const Dists &dists, const Affine3f &camera_pose, const Intr &intr);

    virtual void initBox(const float3 &b);
    virtual void initEllipsoid(const float3 &r);
    virtual void initPlane(const float &z);
    virtual void initSphere(const float3 &centre, const float &radius);
    virtual void initTorus(const float2 &t);

    void print_sdf_values();

    struct Entry {
        typedef unsigned short half;

        half tsdf;
        int weight;

        static float half2float(half value);
        static half float2half(float value);
    };

private:
    CudaData data_;

    float trunc_dist_;
    float eta_;
    float max_weight_;
    Vec3i dims_;
    Vec3f size_;
    Affine3f pose_;

    float gradient_delta_factor_;
    float raycast_step_factor_;
};
}  // namespace cuda
}  // namespace kfusion


================================================
FILE: include/kfusion/exports.hpp
================================================
#pragma once

#if (defined WIN32 || defined _WIN32 || defined WINCE) && defined KFUSION_API_EXPORTS
#define KF_EXPORTS __declspec(dllexport)
#else
#define KF_EXPORTS
#endif


================================================
FILE: include/kfusion/internal.hpp
================================================
#pragma once

/* kinfu includes */
#include <kfusion/cuda/device_array.hpp>
#include <kfusion/safe_call.hpp>

/* pcl includes */
#include <pcl/point_types.h>

//#define USE_DEPTH

struct Mat4f {
    float4 data[4];
};

namespace kfusion {
namespace device {
typedef float4 Normal;
typedef float4 Point;

typedef unsigned short ushort;
typedef unsigned char uchar;

typedef PtrStepSz<float> Dists;
typedef DeviceArray2D<ushort> Depth;
typedef DeviceArray2D<Normal> Normals;
typedef DeviceArray2D<Point> Points;
typedef DeviceArray2D<uchar4> Image;

typedef float3 *Displacements;

typedef DeviceArray<float4> Vertices;
typedef Vertices Norms;

/*
 * struct which holds a surface as triangles
 */
struct Surface {
    Vertices vertices;
    Norms normals;
};

struct float8 {
    float x, y, z, w, c1, c2, c3, c4;
};

typedef float4 PointType;

typedef int3 Vec3i;
typedef float3 Vec3f;
struct Mat3f {
    float3 data[3];
};
struct Aff3f {
    Mat3f R;
    Vec3f t;
};

struct TsdfVolume {
public:
    float2 *const data;
    const int3 dims;
    const float3 voxel_size;
    const float trunc_dist;
    const float eta;
    const float max_weight;

    TsdfVolume(float2 *const data, int3 dims, float3 voxel_size, float trunc_dist, float eta, float max_weight);
    // TsdfVolume(const TsdfVolume &);

    TsdfVolume &operator=(const TsdfVolume &);

    __kf_device__ float2 *operator()(int x, int y, int z);
    __kf_device__ const float2 *operator()(int x, int y, int z) const;

    __kf_device__ float2 *beg(int x, int y) const;
    __kf_device__ float2 *zstep(float2 *const ptr) const;
};

struct Projector {
    float2 f, c;
    Projector() {}
    Projector(float fx, float fy, float cx, float cy);
    __kf_device__ float2 operator()(const float3 &p) const;
};

struct Reprojector {
    Reprojector() {}
    Reprojector(float fx, float fy, float cx, float cy);
    float2 finv, c;
    __kf_device__ float3 operator()(int x, int y, float z) const;
};

struct CubeIndexEstimator {
    CubeIndexEstimator(const TsdfVolume &vol) : volume(vol) { isoValue = 0.f; }

    const TsdfVolume volume;
    float isoValue;

    __kf_device__ int computeCubeIndex(int x, int y, int z, float f[8]) const;
    __kf_device__ void readTsdf(int x, int y, int z, float &f, float &weight) const;
};

struct OccupiedVoxels : public CubeIndexEstimator {
    OccupiedVoxels(const TsdfVolume &vol) : CubeIndexEstimator({vol}) {}

    enum {
        CTA_SIZE_X = 32,
        CTA_SIZE_Y = 8,
        CTA_SIZE   = CTA_SIZE_X * CTA_SIZE_Y,

        LOG_WARP_SIZE = 5,
        WARP_SIZE     = 1 << LOG_WARP_SIZE,
        WARPS_COUNT   = CTA_SIZE / WARP_SIZE,
    };

    mutable int *voxels_indices;
    mutable int *vertices_number;
    int max_size;

    __kf_device__ void operator()() const;
};

struct TrianglesGenerator : public CubeIndexEstimator {
    TrianglesGenerator(const TsdfVolume &vol) : CubeIndexEstimator({vol}) {}

    enum { CTA_SIZE = 256, MAX_GRID_SIZE_X = 65536 };

    const int *occupied_voxels;
    const int *vertex_ofssets;
    int voxels_count;
    float3 cell_size;

    Aff3f pose;

    mutable device::PointType *outputVertices;
    mutable device::PointType *outputNormals;

    __kf_device__ void operator()() const;

    __kf_device__ void store_point(float4 *ptr, int index, const float3 &vertex) const;

    __kf_device__ float3 get_node_coo(int x, int y, int z) const;
    __kf_device__ float3 vertex_interp(float3 p0, float3 p1, float f0, float f1) const;

};  // namespace device

struct ComputeIcpHelper {
    struct Policy;
    struct PageLockHelper {
        float *data;
        PageLockHelper();
        ~PageLockHelper();
    };

    float min_cosine;
    float dist2_thres;

    Aff3f aff;

    float rows, cols;
    float2 f, c, finv;

    PtrStep<ushort> dcurr;
    PtrStep<Normal> ncurr;
    PtrStep<Point> vcurr;

    ComputeIcpHelper(float dist_thres, float angle_thres);
    void setLevelIntr(int level_index, float fx, float fy, float cx, float cy);

    void operator()(const Depth &dprev, const Normals &nprev, DeviceArray2D<float> &buffer, float *data,
                    cudaStream_t stream);
    void operator()(const Points &vprev, const Normals &nprev, DeviceArray2D<float> &buffer, float *data,
                    cudaStream_t stream);

    static void allocate_buffer(DeviceArray2D<float> &buffer, int partials_count = -1);

    // private:
    __kf_device__ int find_coresp(int x, int y, float3 &n, float3 &d, float3 &s) const;
    __kf_device__ void partial_reduce(const float row[7], PtrStep<float> &partial_buffer) const;
    __kf_device__ float2 proj(const float3 &p) const;
    __kf_device__ float3 reproj(float x, float y, float z) const;
};

/*
 * TSDF FUNCTIONS
 */

void clear_volume(TsdfVolume &volume);

void integrate(TsdfVolume &phi_global, TsdfVolume &phi_n_psi);
void integrate(const Dists &depth, TsdfVolume &volume, const Aff3f &aff, const Projector &proj);

void init_box(TsdfVolume &volume, const float3 &b);
void init_ellipsoid(TsdfVolume &volume, const float3 &r);
void init_plane(TsdfVolume &volume, const float &z);
void init_sphere(TsdfVolume &volume, const float3 &centre, const float &radius);
void init_torus(TsdfVolume &volume, const float2 &t);

/*
 * MARCHING CUBES FUNCTIONS
 */

/** \brief binds marching cubes tables to texture references */
void bindTextures(const int *edgeBuf, const int *triBuf, const int *numVertsBuf);

/** \brief unbinds */
void unbindTextures();

/** \brief scans TSDF volume and retrieves occuped voxes
 * \param[in] volume TSDF volume
 * \param[out] occupied_voxels buffer for occupied voxels; the function fills the first row with voxel id's and second
 * row with the no. of vertices
 * \return number of voxels in the buffer
 */
int getOccupiedVoxels(const TsdfVolume &volume, DeviceArray2D<int> &occupied_voxels);

/** \brief computes total number of vertices for all voxels and offsets of vertices in final triangle array
 * \param[out] occupied_voxels buffer with occupied voxels; the function fills 3rd only with offsets
 * \return total number of vertexes
 */
int computeOffsetsAndTotalVertices(DeviceArray2D<int> &occupied_voxels);

/** \brief generates final triangle array
 * \param[in] volume TSDF volume
 * \param[in] occupied_voxels occupied voxel ids (1st row), number of vertices (2nd row), offsets (3rd row).
 * \param[in] volume_size volume size in meters
 * \param[out] output triangle array
 */
void generateTriangles(const TsdfVolume &volume, const DeviceArray2D<int> &occupied_voxels, const float3 &volume_size,
                       const Aff3f &pose, DeviceArray<PointType> &outputVertices,
                       DeviceArray<PointType> &outputNormals);

/*
 *  IMAGE PROCESSING FUNCTIONS
 */
void compute_dists(const Depth &depth, Dists dists, float2 f, float2 c);

void truncateDepth(Depth &depth, float max_dist /*meters*/);
void bilateralFilter(const Depth &src, Depth &dst, int kernel_size, float sigma_spatial, float sigma_depth);
void depthPyr(const Depth &source, Depth &pyramid, float sigma_depth);

void resizeDepthNormals(const Depth &depth, const Normals &normals, Depth &depth_out, Normals &normals_out);
void resizePointsNormals(const Points &points, const Normals &normals, Points &points_out, Normals &normals_out);

void computeNormalsAndMaskDepth(const Reprojector &reproj, Depth &depth, Normals &normals);
void computePointNormals(const Reprojector &reproj, const Depth &depth, Points &points, Normals &normals);

/** \brief rasterises the surface extracted via marching cubes
 *  \param[in] proj
 *  \param[in] surface
 *  \param[out] vertices_out
 *  \paraim[out] normals_out
 */
void rasteriseSurface(const Projector &proj, const Aff3f &vol2cam, const Surface &surface, Points &vertices_out,
                      Normals &normals_out);

}  // namespace device
}  // namespace kfusion


================================================
FILE: include/kfusion/kinfu.hpp
================================================
#pragma once

/* boost includes */
#include <boost/filesystem.hpp>

/* kinfu includes */
#include <kfusion/cuda/marching_cubes.hpp>
#include <kfusion/cuda/projective_icp.hpp>
#include <kfusion/cuda/tsdf_volume.hpp>
#include <kfusion/types.hpp>

/* pcl includes */
#include <pcl/PCLPointCloud2.h>
#include <pcl/PolygonMesh.h>
#include <pcl/conversions.h>
#include <pcl/point_cloud.h>
#include <pcl/point_types.h>

/* sys headers */
#include <string>
#include <vector>

namespace kfusion {
namespace cuda {
KF_EXPORTS int getCudaEnabledDeviceCount();
KF_EXPORTS void setDevice(int device);
KF_EXPORTS std::string getDeviceName(int device);
KF_EXPORTS bool checkIfPreFermiGPU(int device);
KF_EXPORTS void printCudaDeviceInfo(int device);
KF_EXPORTS void printShortCudaDeviceInfo(int device);
}  // namespace cuda

struct KF_EXPORTS KinFuParams {
    static KinFuParams default_params();

    int cols;  // pixels
    int rows;  // pixels

    Intr intr;  // Camera parameters

    Vec3i volume_dims;     // number of voxels
    Vec3f volume_size;     // meters
    Affine3f volume_pose;  // meters, inital pose

    float bilateral_sigma_depth;    // meters
    float bilateral_sigma_spatial;  // pixels
    int bilateral_kernel_size;      // pixels

    float icp_truncate_depth_dist;  // meters
    float icp_dist_thres;           // meters
    float icp_angle_thres;          // radians
    std::vector<int> icp_iter_num;  // iterations for level index 0,1,..,3

    float tsdf_min_camera_movement;  // meters, integrate only if exceedes
    float tsdf_trunc_dist;           // meters;
    float tsdf_max_weight;           // frames

    float raycast_step_factor;    // in voxel sizes
    float gradient_delta_factor;  // in voxel sizes

    Vec3f light_pose;  // meters
};

class KF_EXPORTS KinFu {
public:
    typedef cv::Ptr<KinFu> Ptr;

    KinFu(const KinFuParams &params);

    const KinFuParams &params() const;
    KinFuParams &params();

    const cuda::TsdfVolume &tsdf() const;
    cuda::TsdfVolume &tsdf();

    const cuda::ProjectiveICP &icp() const;
    cuda::ProjectiveICP &icp();

    const cuda::MarchingCubes &mc() const;
    cuda::MarchingCubes &mc();

    void reset();

    bool operator()(const cuda::Depth &depth, const cuda::Image &image = cuda::Image());

    void renderImage(cuda::Image &image, int flag = 0);
    void renderImage(cuda::Image &image, const Affine3f &pose, int flag = 0);

    Affine3f getCameraPose(int time = -1) const;

protected:
    void allocate_buffers();

    int frame_counter_;
    KinFuParams params_;

    std::vector<Affine3f> poses_;
    cuda::Dists dists_;
    cuda::Frame curr_, prev_;

    cuda::Cloud points_;
    cuda::Normals normals_;
    cuda::Depth depths_;

    cv::Ptr<cuda::TsdfVolume> volume_;
    cv::Ptr<cuda::ProjectiveICP> icp_;
    cv::Ptr<cuda::MarchingCubes> mc_;
};
}  // namespace kfusion


================================================
FILE: include/kfusion/precomp.hpp
================================================
#pragma once

/* cuda includes */
#include <vector_functions.h>

/* kinfu includes */
#include <kfusion/cuda/imgproc.hpp>
#include <kfusion/cuda/marching_cubes.hpp>
#include <kfusion/cuda/projective_icp.hpp>
#include <kfusion/cuda/tsdf_volume.hpp>
#include <kfusion/internal.hpp>
#include <kfusion/kinfu.hpp>
#include <kfusion/types.hpp>

/* sys headers */
#include <iostream>

namespace kfusion {
template <typename D, typename S>
inline D device_cast(const S &source) {
    return *reinterpret_cast<const D *>(source.val);
}

template <>
inline device::Aff3f device_cast<device::Aff3f, Affine3f>(const Affine3f &source) {
    device::Aff3f aff;
    Mat3f R = source.rotation();
    Vec3f t = source.translation();
    aff.R   = device_cast<device::Mat3f>(R);
    aff.t   = device_cast<device::Vec3f>(t);
    return aff;
}
}  // namespace kfusion


================================================
FILE: include/kfusion/safe_call.hpp
================================================
#pragma once

#include <cuda_runtime_api.h>

namespace kfusion {
namespace cuda {
void error(const char *error_string, const char *file, const int line, const char *func);
}
}  // namespace kfusion

#if defined(__GNUC__)
#define cudaSafeCall(expr) kfusion::cuda::___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
#else /* defined(__CUDACC__) || defined(__MSVC__) */
#define cudaSafeCall(expr) kfusion::cuda::___cudaSafeCall(expr, __FILE__, __LINE__)
#endif

namespace kfusion {
namespace cuda {
static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "") {
    if (cudaSuccess != err)
        error(cudaGetErrorString(err), file, line, func);
}

static inline int divUp(int total, int grain) { return (total + grain - 1) / grain; }
}  // namespace cuda

namespace device {
using kfusion::cuda::divUp;
}
}  // namespace kfusion


================================================
FILE: include/kfusion/types.hpp
================================================
#pragma once

/* cuda includes */
#include <vector_functions.h>

/* kinfu includes */
#include <kfusion/cuda/device_array.hpp>

/* pcl includes */
#include <pcl/point_types.h>

/* opencv includes */
#include <opencv2/core/affine.hpp>
#include <opencv2/core/core.hpp>
#include <opencv2/viz/vizcore.hpp>

/* sys headers */
#include <iosfwd>

struct CUevent_st;

namespace kfusion {
typedef cv::Matx33f Mat3f;
typedef cv::Vec3f Vec3f;
typedef cv::Vec3i Vec3i;
typedef cv::Affine3f Affine3f;

struct KF_EXPORTS Intr {
    float fx, fy, cx, cy;

    Intr();
    Intr(float fx, float fy, float cx, float cy);
    Intr operator()(int level_index) const;
};

KF_EXPORTS std::ostream &operator<<(std::ostream &os, const Intr &intr);

struct Point {
    union {
        float data[4];
        struct {
            float x, y, z;
        };
    };

    Point() = default;

    Point(float x, float y, float z) {
        this->x = x;
        this->y = y;
        this->z = z;
    }
};

typedef Point Normal;

struct RGB {
    union {
        struct {
            unsigned char b, g, r;
        };
        int bgra;
    };
};

struct PixelRGB {
    unsigned char r, g, b;
};

namespace cuda {
typedef cuda::DeviceMemory CudaData;
typedef cuda::CudaData Displacements;
typedef cuda::DeviceArray2D<unsigned short> Depth;
typedef cuda::DeviceArray2D<float> Dists;
typedef cuda::DeviceArray2D<RGB> Image;
typedef cuda::DeviceArray2D<Normal> Normals;
typedef cuda::DeviceArray2D<Point> Cloud;

typedef cuda::DeviceArray<pcl::PointXYZ> Vertices;
typedef cuda::DeviceArray<pcl::Normal> Norms;

/*
 * struct used to hold the surface as triangles
 */
struct Surface {
    Vertices vertices;
    Norms normals;
};

struct Frame {
    bool use_points;

    std::vector<Depth> depth_pyr;
    std::vector<Cloud> points_pyr;
    std::vector<Normals> normals_pyr;
};
}  // namespace cuda

inline float deg2rad(float alpha) { return alpha * 0.017453293f; }

struct KF_EXPORTS ScopeTime {
    const char *name;
    double start;
    ScopeTime(const char *name);
    ~ScopeTime();
};

struct KF_EXPORTS SampledScopeTime {
public:
    enum { EACH = 34 };
    SampledScopeTime(double &time_ms);
    ~SampledScopeTime();

private:
    double getTime();
    SampledScopeTime(const SampledScopeTime &);
    SampledScopeTime &operator=(const SampledScopeTime &);

    double &time_ms_;
    double start;
};

}  // namespace kfusion


================================================
FILE: include/sobfu/cuda/utils.hpp
================================================
#pragma once

/* cuda includes */
#include <cuda.h>
#include <math_constants.h>

/* kinfu includes */
#include <kfusion/internal.hpp>

/* utility class used to avoid linker errors with extern unsized shared memory arrays with templated type */
template <class T>
struct SharedMemory {
    __device__ inline operator T *() {
        extern __shared__ int __smem[];
        return (T *) __smem;
    }

    __device__ inline operator const T *() const {
        extern __shared__ int __smem[];
        return (T *) __smem;
    }
};

/* get 1d index from 3d coordinates */
__device__ __forceinline__ int get_global_idx(int x, int y, int z, int dim_x, int dim_y) {
    return z * dim_y * dim_y + y * dim_x + x;
}

/*
 * LERP
 */

template <typename T>
__host__ __device__ __forceinline__ T lerp(T v0, T v1, T t) {
    return fma(t, v0, fma(-t, v1, v1));
}

__host__ __device__ __forceinline__ float3 lerp3f(float3 v0, float3 v1, float t) {
    return make_float3(lerp<float>(v0.x, v1.x, t), lerp<float>(v0.y, v1.y, t), lerp<float>(v0.z, v1.z, t));
}

__host__ __device__ __forceinline__ float4 lerp4f(float4 v0, float4 v1, float t) {
    return make_float4(lerp<float>(v0.x, v1.x, t), lerp<float>(v0.y, v1.y, t), lerp<float>(v0.z, v1.z, t), 0.f);
}

/*
 * INTERPOLATION
 */

template <typename Vol>
__device__ __forceinline__ float2 interpolate_tsdf(const Vol &volume, const float3 &p_voxels) {
    float3 cf = p_voxels;

    cf.x = fminf(fmaxf(0.f, cf.x), (float) volume.dims.x - 1);
    cf.y = fminf(fmaxf(0.f, cf.y), (float) volume.dims.y - 1);
    cf.z = fminf(fmaxf(0.f, cf.z), (float) volume.dims.z - 1);

    /* rounding to negative infinity */
    int3 g = make_int3(__float2int_rd(cf.x), __float2int_rd(cf.y), __float2int_rd(cf.z));

    int idx_x_1 = g.x + 1;
    if (cf.x == 0.f || cf.x == (float) volume.dims.x - 1) {
        idx_x_1--;
    }
    int idx_y_1 = g.y + 1;
    if (cf.y == 0.f || cf.y == (float) volume.dims.y - 1) {
        idx_y_1--;
    }
    int idx_z_1 = g.z + 1;
    if (cf.z == 0.f || cf.z == (float) volume.dims.z - 1) {
        idx_z_1--;
    }

    float a = cf.x - g.x;
    float b = cf.y - g.y;
    float c = cf.z - g.z;

    float tsdf   = lerp(lerp(lerp((*volume(idx_x_1, idx_y_1, idx_z_1)).x, (*volume(idx_x_1, idx_y_1, g.z + 0)).x, c),
                           lerp((*volume(idx_x_1, g.y + 0, idx_z_1)).x, (*volume(idx_x_1, g.y + 0, g.z + 0)).x, c), b),
                      lerp(lerp((*volume(g.x + 0, idx_y_1, idx_z_1)).x, (*volume(g.x + 0, idx_y_1, g.z + 0)).x, c),
                           lerp((*volume(g.x + 0, g.y + 0, idx_z_1)).x, (*volume(g.x + 0, g.y + 0, g.z + 0)).x, c), b),
                      a);
    float weight = (*volume(g.x, g.y, g.z)).y;

    return make_float2(tsdf, weight);
}

template <typename Field>
__device__ __forceinline__ float4 interpolate_field(const Field &field, const float3 &p_voxels) {
    float3 cf = p_voxels;

    cf.x = fminf(fmaxf(0.f, cf.x), (float) field.dims.x - 1);
    cf.y = fminf(fmaxf(0.f, cf.y), (float) field.dims.y - 1);
    cf.z = fminf(fmaxf(0.f, cf.z), (float) field.dims.z - 1);

    /* rounding to negative infinity */
    int3 g = make_int3(__float2int_rd(cf.x), __float2int_rd(cf.y), __float2int_rd(cf.z));

    int idx_x_1 = g.x + 1;
    if (cf.x == 0.f || cf.x == (float) field.dims.x - 1) {
        idx_x_1--;
    }
    int idx_y_1 = g.y + 1;
    if (cf.y == 0.f || cf.y == (float) field.dims.y - 1) {
        idx_y_1--;
    }
    int idx_z_1 = g.z + 1;
    if (cf.z == 0.f || cf.z == (float) field.dims.z - 1) {
        idx_z_1--;
    }

    float a = cf.x - g.x;
    float b = cf.y - g.y;
    float c = cf.z - g.z;

    float4 result = lerp4f(lerp4f(lerp4f(*field(idx_x_1, idx_y_1, idx_z_1), *field(idx_x_1, idx_y_1, g.z + 0), c),
                                  lerp4f(*field(idx_x_1, g.y + 0, idx_z_1), *field(idx_x_1, g.y + 0, g.z + 0), c), b),
                           lerp4f(lerp4f(*field(g.x + 0, idx_y_1, idx_z_1), *field(g.x + 0, idx_y_1, g.z + 0), c),
                                  lerp4f(*field(g.x + 0, g.y + 0, idx_z_1), *field(g.x + 0, g.y + 0, g.z + 0), c), b),
                           a);
    return result;
}

template <typename Field>
__device__ __forceinline__ float4 interpolate_field_inv(const Field &field, const float3 &p_voxels) {
    float3 cf = p_voxels;

    cf.x = fminf(fmaxf(0.f, cf.x), (float) field.dims.x - 1);
    cf.y = fminf(fmaxf(0.f, cf.y), (float) field.dims.y - 1);
    cf.z = fminf(fmaxf(0.f, cf.z), (float) field.dims.z - 1);

    /* rounding to negative infinity */
    int3 g = make_int3(__float2int_rd(cf.x), __float2int_rd(cf.y), __float2int_rd(cf.z));

    int idx_x_1 = g.x + 1;
    if (cf.x == 0.f || cf.x == (float) field.dims.x - 1) {
        idx_x_1--;
    }
    int idx_y_1 = g.y + 1;
    if (cf.y == 0.f || cf.y == (float) field.dims.y - 1) {
        idx_y_1--;
    }
    int idx_z_1 = g.z + 1;
    if (cf.z == 0.f || cf.z == (float) field.dims.z - 1) {
        idx_z_1--;
    }

    float a = cf.x - g.x;
    float b = cf.y - g.y;
    float c = cf.z - g.z;

    float4 result = lerp4f(lerp4f(lerp4f(field.get_displacement(idx_x_1, idx_y_1, idx_z_1),
                                         field.get_displacement(idx_x_1, idx_y_1, g.z + 0), c),
                                  lerp4f(field.get_displacement(idx_x_1, g.y + 0, idx_z_1),
                                         field.get_displacement(idx_x_1, g.y + 0, g.z + 0), c),
                                  b),
                           lerp4f(lerp4f(field.get_displacement(g.x + 0, idx_y_1, idx_z_1),
                                         field.get_displacement(g.x + 0, idx_y_1, g.z + 0), c),
                                  lerp4f(field.get_displacement(g.x + 0, g.y + 0, idx_z_1),
                                         field.get_displacement(g.x + 0, g.y + 0, g.z + 0), c),
                                  b),
                           a);
    return result;
}

__device__ __forceinline__ float4 interpolate_field(const float4 *field, const float3 &p_voxels, int dim_x, int dim_y,
                                                    int dim_z) {
    float3 cf = p_voxels;

    cf.x = fminf(fmaxf(0.f, cf.x), (float) dim_x - 1);
    cf.y = fminf(fmaxf(0.f, cf.y), (float) dim_y - 1);
    cf.z = fminf(fmaxf(0.f, cf.z), (float) dim_z - 1);

    /* rounding to negative infinity */
    int3 g = make_int3(__float2int_rd(cf.x), __float2int_rd(cf.y), __float2int_rd(cf.z));

    int idx_x_1 = g.x + 1;
    if (cf.x == 0.f || cf.x == (float) dim_x - 1) {
        idx_x_1--;
    }
    int idx_y_1 = g.y + 1;
    if (cf.y == 0.f || cf.y == (float) dim_y - 1) {
        idx_y_1--;
    }
    int idx_z_1 = g.z + 1;
    if (cf.z == 0.f || cf.z == (float) dim_z - 1) {
        idx_z_1--;
    }

    float a = cf.x - g.x;
    float b = cf.y - g.y;
    float c = cf.z - g.z;

    float4 result = lerp4f(lerp4f(lerp4f(field[get_global_idx(idx_x_1, idx_y_1, idx_z_1, dim_x, dim_y)],
                                         field[get_global_idx(idx_x_1, idx_y_1, g.z + 0, dim_x, dim_y)], c),
                                  lerp4f(field[get_global_idx(idx_x_1, g.y + 0, idx_z_1, dim_x, dim_y)],
                                         field[get_global_idx(idx_x_1, g.y + 0, g.z + 0, dim_x, dim_y)], c),
                                  b),
                           lerp4f(lerp4f(field[get_global_idx(g.x + 0, idx_y_1, idx_z_1, dim_x, dim_y)],
                                         field[get_global_idx(g.x + 0, idx_y_1, g.z + 0, dim_x, dim_y)], c),
                                  lerp4f(field[get_global_idx(g.x + 0, g.y + 0, idx_z_1, dim_x, dim_y)],
                                         field[get_global_idx(g.x + 0, g.y + 0, g.z + 0, dim_x, dim_y)], c),
                                  b),
                           a);
    return result;
}

/*
 * FLOAT2
 */

__device__ __forceinline__ float norm(float2 v) {
    return __fsqrt_rn(__fadd_rn(__fmul_rn(v.x, v.x), __fmul_rn(v.y, v.y)));
}

/*
 * FLOAT3
 */

__host__ __device__ __forceinline__ float3 &operator-=(float3 &v1, const float3 &v2) {
    v1.x -= v2.x;
    v1.y -= v2.y;
    v1.z -= v2.z;
    return v1;
}

__device__ __forceinline__ float3 operator/(const float3 &v, const float &d) {
    return make_float3(__fdividef(v.x, d), __fdividef(v.y, d), __fdividef(v.z, d));
}

__device__ __forceinline__ float n(const float3 &v) {
    return __fsqrt_rd(__fmul_rn(v.x, v.x) + __fmul_rn(v.y, v.y) + __fmul_rn(v.z, v.z));
}

__device__ __forceinline__ float3 sq(const float3 &v) {
    return make_float3(__fmul_rn(v.x, v.x), __fmul_rn(v.y, v.y), __fmul_rn(v.z, v.z));
}

__device__ __forceinline__ float rms_epsilon(const float3 &v) { return __fsqrt_rn(v.x + v.y + v.z + 1e-5f); }

/*
 * FLOAT4
 */

__device__ __forceinline__ float4 operator+(const float4 &v1, const float4 &v2) {
    return make_float4(__fadd_rn(v1.x, v2.x), __fadd_rn(v1.y, v2.y), __fadd_rn(v1.z, v2.z), 0.f);
}

__device__ __forceinline__ float4 operator-(const float4 &v1, const float4 &v2) {
    return make_float4(__fadd_rn(v1.x, -v2.x), __fadd_rn(v1.y, -v2.y), __fadd_rn(v1.z, -v2.z), 0.f);
}

__host__ __device__ __forceinline__ float4 &operator+=(float4 &v1, const float4 &v2) {
    v1.x += v2.x;
    v1.y += v2.y;
    v1.z += v2.z;
    return v1;
}

__host__ __device__ __forceinline__ float4 &operator-=(float4 &v1, const float4 &v2) {
    v1.x -= v2.x;
    v1.y -= v2.y;
    v1.z -= v2.z;
    return v1;
}

__device__ __forceinline__ float4 operator*(const float4 &v, const float &m) {
    return make_float4(__fmul_rn(v.x, m), __fmul_rn(v.y, m), __fmul_rn(v.z, m), 0.f);
}

__device__ __forceinline__ float4 operator*(const float &m, const float4 &v) { return v * m; }

__device__ __forceinline__ float4 operator/(const float4 &v, const float &d) {
    return make_float4(__fdividef(v.x, d), __fdividef(v.y, d), __fdividef(v.z, d), 0.f);
}

__device__ __forceinline__ float4 operator/(const float &d, const float4 &v) { return v / d; }

__device__ __forceinline__ float norm(const float4 &v) {
    return __fsqrt_rd(__fmul_rn(v.x, v.x) + __fmul_rn(v.y, v.y) + __fmul_rn(v.z, v.z));
}

__device__ __forceinline__ float norm_sq(const float4 &v) {
    return __fmul_rn(v.x, v.x) + __fmul_rn(v.y, v.y) + __fmul_rn(v.z, v.z);
}

__device__ __forceinline__ float4 normalised(const float4 &v) {
    float n = norm(v);
    return (n > 1e-5f) ? v / n : v;
}

__host__ __device__ __forceinline__ float3 trunc(const float4 &f) { return make_float3(f.x, f.y, f.z); }

__device__ __forceinline__ float4 sq(const float4 &v) {
    return make_float4(__fmul_rn(v.x, v.x), __fmul_rn(v.y, v.y), __fmul_rn(v.z, v.z), 0.f);
}

__device__ __forceinline__ float rms_epsilon(const float4 &v) {
    return __fsqrt_rd(__fadd_rn(__fadd_rn(v.x, __fadd_rn(v.y, v.z)), 1e-5f));
}

/*
 * MAT3F
 */

__device__ __forceinline__ kfusion::device::Mat3f operator+(kfusion::device::Mat3f M1, kfusion::device::Mat3f M2) {
    kfusion::device::Mat3f N;

    N.data[0].x = __fadd_rn(M1.data[0].x, M2.data[0].x);
    N.data[0].y = __fadd_rn(M1.data[0].y, M2.data[0].y);
    N.data[0].z = __fadd_rn(M1.data[0].z, M2.data[0].z);

    N.data[1].x = __fadd_rn(M1.data[1].x, M2.data[1].x);
    N.data[1].y = __fadd_rn(M1.data[1].y, M2.data[1].y);
    N.data[1].z = __fadd_rn(M1.data[1].z, M2.data[1].z);

    N.data[2].x = __fadd_rn(M1.data[2].x, M2.data[2].x);
    N.data[2].y = __fadd_rn(M1.data[2].y, M2.data[2].y);
    N.data[2].z = __fadd_rn(M1.data[2].z, M2.data[2].z);

    return N;
}

__device__ __forceinline__ kfusion::device::Mat3f operator-(kfusion::device::Mat3f M1, kfusion::device::Mat3f M2) {
    kfusion::device::Mat3f N;

    N.data[0].x = __fsub_rn(M1.data[0].x, M2.data[0].x);
    N.data[0].y = __fsub_rn(M1.data[0].y, M2.data[0].y);
    N.data[0].z = __fsub_rn(M1.data[0].z, M2.data[0].z);

    N.data[1].x = __fsub_rn(M1.data[1].x, M2.data[1].x);
    N.data[1].y = __fsub_rn(M1.data[1].y, M2.data[1].y);
    N.data[1].z = __fsub_rn(M1.data[1].z, M2.data[1].z);

    N.data[2].x = __fsub_rn(M1.data[2].x, M2.data[2].x);
    N.data[2].y = __fsub_rn(M1.data[2].y, M2.data[2].y);
    N.data[2].z = __fsub_rn(M1.data[2].z, M2.data[2].z);

    return N;
}

__device__ __forceinline__ kfusion::device::Mat3f operator*(kfusion::device::Mat3f M, float m) {
    kfusion::device::Mat3f N;

    N.data[0].x = __fmul_rn(M.data[0].x, m);
    N.data[0].y = __fmul_rn(M.data[0].y, m);
    N.data[0].z = __fmul_rn(M.data[0].z, m);

    N.data[1].x = __fmul_rn(M.data[1].x, m);
    N.data[1].y = __fmul_rn(M.data[1].y, m);
    N.data[1].z = __fmul_rn(M.data[1].z, m);

    N.data[2].x = __fmul_rn(M.data[2].x, m);
    N.data[2].y = __fmul_rn(M.data[2].y, m);
    N.data[2].z = __fmul_rn(M.data[2].z, m);

    return N;
}

__device__ __forceinline__ kfusion::device::Mat3f operator/(kfusion::device::Mat3f M, float d) { return M * 1.f / d; }

/*
 * MAT4F
 */

__device__ __forceinline__ float4 operator*(Mat4f M, float4 v) {
    return make_float4(
        __fadd_rn(__fadd_rn(__fmul_rn(M.data[0].x, v.x), __fmul_rn(M.data[0].y, v.y)), __fmul_rn(M.data[0].z, v.z)),
        __fadd_rn(__fadd_rn(__fmul_rn(M.data[1].x, v.x), __fmul_rn(M.data[1].y, v.y)), __fmul_rn(M.data[1].z, v.z)),
        __fadd_rn(__fadd_rn(__fmul_rn(M.data[2].x, v.x), __fmul_rn(M.data[2].y, v.y)), __fmul_rn(M.data[2].z, v.z)),
        0.f);
}

__device__ __forceinline__ float det(Mat4f M) {
    return __fsub_rn(__fadd_rn(__fadd_rn(__fmul_rn(__fmul_rn(M.data[0].x, M.data[1].y), M.data[2].z),
                                         __fmul_rn(__fmul_rn(M.data[1].x, M.data[2].y), M.data[0].z)),
                               __fmul_rn(__fmul_rn(M.data[2].x, M.data[0].y), M.data[1].z)),
                     __fadd_rn(__fmul_rn(__fadd_rn(__fmul_rn(M.data[0].z, M.data[1].y), M.data[2].x),
                                         __fmul_rn(__fmul_rn(M.data[1].z, M.data[2].y), M.data[0].x)),
                               __fmul_rn(__fmul_rn(M.data[2].z, M.data[0].y), M.data[1].x)));
}

/*
 * OTHER
 */

__host__ __device__ __forceinline__ float sign(float a) {
    if (a > 0.f) {
        return 1.f;
    } else if (a < 0.f) {
        return -1.f;
    } else {
        return 0.f;
    }
}

__host__ __device__ __forceinline__ bool is_truncated(float tsdf) {
    if (fabs(tsdf) >= 1.f) {
        return true;
    }

    return false;
}

__host__ __device__ __forceinline__ float heaviside_smooth(float phi, float epsilon) {
    return 1.f / CUDART_PI_F * (epsilon / (epsilon * epsilon + phi * phi));
}

__host__ __device__ __forceinline__ void vec(kfusion::device::Mat3f J, float *v) {
    v[0] = J.data[0].x;
    v[1] = J.data[1].x;
    v[2] = J.data[2].x;

    v[3] = J.data[0].y;
    v[4] = J.data[1].y;
    v[5] = J.data[2].y;

    v[6] = J.data[0].z;
    v[7] = J.data[1].z;
    v[8] = J.data[2].z;
}

__host__ __device__ __forceinline__ kfusion::device::Mat3f transpose(kfusion::device::Mat3f J) {
    kfusion::device::Mat3f J_T;

    float3 row_1  = J.data[0];
    J_T.data[0].x = row_1.x;
    J_T.data[1].x = row_1.y;
    J_T.data[2].x = row_1.z;

    float3 row_2  = J.data[1];
    J_T.data[0].y = row_2.x;
    J_T.data[1].y = row_2.y;
    J_T.data[2].y = row_2.z;

    float3 row_3  = J.data[2];
    J_T.data[0].z = row_3.x;
    J_T.data[1].z = row_3.y;
    J_T.data[2].z = row_3.z;

    return J_T;
}


================================================
FILE: include/sobfu/params.hpp
================================================
#pragma once

/* kinfu includes */
#include <kfusion/types.hpp>

/* sobfu parameters */
struct Params {
    int cols = 640;
    int rows = 480; /* no. of rows and columns in the frames */

    cv::Vec3i volume_dims; /* volume dimensions in voxels */
    cv::Vec3f volume_size; /* volume size in metres */

    cv::Affine3f volume_pose;
    kfusion::Intr intr; /* camera intrinsics */

    float icp_truncate_depth_dist; /* depth truncation distance */

    float bilateral_sigma_depth, bilateral_sigma_spatial;
    int bilateral_kernel_size;

    float tsdf_trunc_dist, eta; /* tsdf truncation distance and expected object thickness */
    float tsdf_max_weight;      /* tsdf max. weight */

    float gradient_delta_factor;

    int start_frame = 0; /* frame when to start registration */
    int verbosity   = 0; /* solver verbosity */

    int s /* filter size */, max_iter /* max. no of iterations of the solver */;
    float max_update_norm /* max. update norm */, lambda /* filter parameter */, alpha /* gradient descent step size */,
        w_reg /* weight of the regularisation term */;

    cv::Vec3f voxel_sizes() {
        return cv::Vec3f(volume_size[0] / volume_dims[0], volume_size[1] / volume_dims[1],
                         volume_size[2] / volume_dims[2]);
    }
};


================================================
FILE: include/sobfu/precomp.hpp
================================================
#pragma once

/* cuda includes */
#include <cuda.h>

/* kinfu includes */
#include <kfusion/internal.hpp>

/* checks if x is a power of 2 */
bool isPow2(unsigned int x);

/* computes the nearest power of 2 larger than x */
int nextPow2(int x);

/* compute the number of threads and blocks to use for the given reduction kernel; we set threads
 * block to the minimum of maxThreads and n/2; we observe the maximum specified number of blocks, because each thread
 * in the kernel can process a variable number of elements */
void get_num_blocks_and_threads(int n, int maxBlocks, int maxThreads, int &blocks, int &threads);


================================================
FILE: include/sobfu/reductor.hpp
================================================
#pragma once

/* kinfu includes */
#include <kfusion/cuda/tsdf_volume.hpp>

/* sobfu incldues */
#include <sobfu/precomp.hpp>
#include <sobfu/vector_fields.hpp>

/* thrust includes */
#include <thrust/device_vector.h>
#include <thrust/execution_policy.h>
#include <thrust/extrema.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/transform.h>

namespace sobfu {
namespace device {

/*
 * REDUCTOR
 */

struct Reductor {
    /* constructor */
    Reductor(int3 dims_, float vsz_, float trunc_dist_);
    /* destructor */
    ~Reductor();

    /* energy functional */
    float data_energy(float2 *phi_global_data, float2 *phi_n_data);
    float reg_energy_sobolev(Mat4f *J_data);

    float2 max_update_norm();
    float2 voxel_max_energy(float2 *phi_global_data, float2 *phi_n_data, Mat4f *J_data, float w_reg);

    int3 dims;
    float vsz, trunc_dist;
    int no_voxels;

    int blocks,
        threads; /* no. of blocks and threads for the reductions used to calculate value of the energy functional */

    float4 *updates;

    float *h_data_out, *d_data_out;
    float *h_reg_out, *d_reg_out;

    float2 *h_max_out, *d_max_out;
};

/* kernels */
template <unsigned int blockSize, bool nIsPow2>
__global__ void reduce_data_kernel(float2 *g_idata_global, float2 *g_idata_n, float *g_odata, unsigned int n);
template <unsigned int blockSize, bool nIsPow2>
__global__ void reduce_reg_sobolev_kernel(Mat4f *g_idata, float *g_odata, unsigned int n);

template <unsigned int blockSize, bool nIsPow2>
__global__ void reduce_voxel_max_energy_kernel(float2 *d_idata_global, float2 *d_idata_n, Mat4f *d_idata_reg,
                                               float2 *d_o_data, float w_reg, unsigned int n);
template <unsigned int blockSize, bool nIsPow2>
__global__ void reduce_max_kernel(float4 *updates, float2 *g_o_max_data, unsigned int n);

/* host methods */
void reduce_data(int size, int threads, int blocks, float2 *d_idata_global, float2 *d_idata_n, float *d_odata);
void reduce_reg_sobolev(int size, int threads, int blocks, Mat4f *d_idata, float *d_odata);

void reduce_voxel_max_energy(int size, int threads, int blocks, float2 *d_idata_global, float2 *d_idata_n,
                             Mat4f *d_idata_reg, float w_reg, float2 *d_odata);
void reduce_max(int size, int threads, int blocks, float4 *updates, float2 *d_o_max_data);

/* final reduce on the cpu */
float final_reduce(float *h_odata, float *d_odata, int numBlocks);
float2 final_reduce_max(float2 *h_o_max_data, float2 *d_o_max_data, int numBlocks, int3 dims);

}  // namespace device
}  // namespace sobfu


================================================
FILE: include/sobfu/scalar_fields.hpp
================================================
#pragma once

/* sobfu includes */
#include <sobfu/params.hpp>
#include <sobfu/precomp.hpp>

/* kinfu includes */
#include <kfusion/internal.hpp>
#include <kfusion/precomp.hpp>
#include <kfusion/types.hpp>

namespace sobfu {
namespace cuda {

/*
 * SCALAR FIELD
 */

class ScalarField {
public:
    /* constructor */
    ScalarField(cv::Vec3i dims_);
    /* destructor */
    ~ScalarField();

    /* get field data*/
    kfusion::cuda::CudaData get_data();
    const kfusion::cuda::CudaData get_data() const;

    /* get dims */
    int3 get_dims();

    /* clear field */
    void clear();
    /* sum values in the field */
    float sum();
    /* print field */
    void print();

private:
    kfusion::cuda::CudaData data; /* field data */
    int3 dims;                    /* field dimensions */
};

}  // namespace cuda

namespace device {

/*
 * SCALAR FIELD
 */

struct ScalarField {
    /* constructor */
    ScalarField(float *const data_, int3 dims_) : data(data_), dims(dims_) {}

    __device__ __forceinline__ float *beg(int x, int y) const;
    __device__ __forceinline__ float *zstep(float *const ptr) const;

    __device__ __forceinline__ float *operator()(int idx) const;
    __device__ __forceinline__ float *operator()(int x, int y, int z) const;

    float *const data; /* field data */
    const int3 dims;
};

/* clear */
__global__ void clear_kernel(sobfu::device::ScalarField field);
void clear(ScalarField &field);

/* sum */
float reduce_sum(sobfu::device::ScalarField &field);

template <unsigned int blockSize, bool nIsPow2>
__global__ void reduce_sum_kernel(float *g_idata, float *g_odata, unsigned int n);
float final_reduce_sum(float *d_odata, int numBlocks);

}  // namespace device
}  // namespace sobfu


================================================
FILE: include/sobfu/sob_fusion.hpp
================================================
#pragma once

/* sobfu includes */
#include <sobfu/params.hpp>
#include <sobfu/solver.hpp>

/* kinfu includes */
#include <kfusion/internal.hpp>
#include <kfusion/kinfu.hpp>
#include <kfusion/precomp.hpp>

/* pcl includes */
#include <pcl/PolygonMesh.h>
#include <pcl/point_types.h>

/* sys headers */
#include <math.h>
#include <thread>

/* */
class SobFusion {
public:
    /* default constructor */
    SobFusion(const Params &params);
    /* default destructor */
    ~SobFusion();

    /* get sobfu params */
    Params &getParams();

    /* get the canonical model vertices stored as pcl triangle mesh */
    pcl::PolygonMesh::Ptr get_phi_global_mesh();
    /* get the canonical model mesh warped to live */
    pcl::PolygonMesh::Ptr get_phi_global_psi_inv_mesh();
    /* get the live model mesh */
    pcl::PolygonMesh::Ptr get_phi_n_mesh();
    /* get the live model mesh warped with psi */
    pcl::PolygonMesh::Ptr get_phi_n_psi_mesh();

    /* get the deformation field */
    std::shared_ptr<sobfu::cuda::DeformationField> getDeformationField();

    /* run algorithm on all frames */
    bool operator()(const kfusion::cuda::Depth &depth, const kfusion::cuda::Image &image = kfusion::cuda::Image());

private:
    std::vector<cv::Affine3f> poses_;
    kfusion::cuda::Dists dists_;
    kfusion::cuda::Frame curr_, prev_;

    kfusion::cuda::Cloud points_;
    kfusion::cuda::Normals normals_;
    kfusion::cuda::Depth depths_;

    int frame_counter_;

    /* system parameters */
    Params params;

    /* tsdf's */
    cv::Ptr<kfusion::cuda::TsdfVolume> phi_global, phi_global_psi_inv, phi_n, phi_n_psi;
    /* deformation field warps phi_n to phi_global */
    std::shared_ptr<sobfu::cuda::DeformationField> psi, psi_inv;
    /* solver */
    std::shared_ptr<sobfu::cuda::Solver> solver;

    /* marching cubes */
    cv::Ptr<kfusion::cuda::MarchingCubes> mc;

    /* run marching cubes on vol */
    pcl::PolygonMesh::Ptr get_mesh(cv::Ptr<kfusion::cuda::TsdfVolume> vol);
    /* convert the canonical model to pcl polygon mesh */
    static pcl::PolygonMesh::Ptr convert_to_mesh(const kfusion::cuda::DeviceArray<pcl::PointXYZ> &triangles);
};


================================================
FILE: include/sobfu/solver.hpp
================================================
#pragma once

/* kinfu incldues */
#include <kfusion/cuda/tsdf_volume.hpp>
#include <kfusion/safe_call.hpp>

/* sobfu includes */
#include <sobfu/reductor.hpp>
#include <sobfu/scalar_fields.hpp>
#include <sobfu/vector_fields.hpp>

/*
 * SOLVER PARAMETERS
 */

struct SolverParams {
    int verbosity, max_iter, s;
    float max_update_norm, lambda, alpha, w_reg;
};

/*
 * SDF'S USED IN THE SOLVER
 */

struct SDFs {
    SDFs(kfusion::device::TsdfVolume &phi_global_, kfusion::device::TsdfVolume &phi_global_psi_inv_,
         kfusion::device::TsdfVolume &phi_n_, kfusion::device::TsdfVolume &phi_n_psi_)
        : phi_global(phi_global_), phi_global_psi_inv(phi_global_psi_inv_), phi_n(phi_n_), phi_n_psi(phi_n_psi_) {}

    kfusion::device::TsdfVolume phi_global, phi_global_psi_inv, phi_n, phi_n_psi;
};

/*
 * SPATIAL DIFFERENTIATORS
 */

struct Differentiators {
    Differentiators(sobfu::device::TsdfDifferentiator &tsdf_diff_, sobfu::device::Differentiator &diff_,
                    sobfu::device::Differentiator &diff_inv_,
                    sobfu::device::SecondOrderDifferentiator &second_order_diff_)

        : tsdf_diff(tsdf_diff_), diff(diff_), diff_inv(diff_inv_), second_order_diff(second_order_diff_) {}

    sobfu::device::TsdfDifferentiator tsdf_diff;
    sobfu::device::Differentiator diff, diff_inv;
    sobfu::device::SecondOrderDifferentiator second_order_diff;
};

namespace sobfu {
namespace cuda {

/*
 * SOLVER
 */

class Solver {
public:
    /* constructor */
    Solver(Params &params);
    /* destructor */
    ~Solver();

    void estimate_psi(const cv::Ptr<kfusion::cuda::TsdfVolume> phi_global,
                      cv::Ptr<kfusion::cuda::TsdfVolume> phi_global_psi_inv,
                      const cv::Ptr<kfusion::cuda::TsdfVolume> phi_n, cv::Ptr<kfusion::cuda::TsdfVolume> phi_n_psi,
                      std::shared_ptr<sobfu::cuda::DeformationField> psi,
                      std::shared_ptr<sobfu::cuda::DeformationField> psi_inv);

private:
    /* volume params */
    int3 dims;
    float3 voxel_sizes;

    int no_voxels;
    float trunc_dist, eta, max_weight;

    /* solver params */
    SolverParams solver_params;

    /* gradients */
    sobfu::cuda::SpatialGradients *spatial_grads;
    sobfu::device::SpatialGradients *spatial_grads_device;

    sobfu::device::TsdfGradient *nabla_phi_n, *nabla_phi_n_o_psi;
    sobfu::device::Jacobian *J, *J_inv;
    sobfu::device::Laplacian *L, *L_o_psi_inv;
    sobfu::device::PotentialGradient *nabla_U, *nabla_U_S;

    /* used to calculate value of the energy functional */
    sobfu::device::Reductor *r;

    /* sobolev filter */
    float *h_S_i, *d_S_i;
};

/* get 3d sobolev filter */
static void get_3d_sobolev_filter(SolverParams &params, float *h_S_i);
/* calculate 1d filters from a separable 3d filter */
static void decompose_sobolev_filter(SolverParams &params, float *h_S_i);

}  // namespace cuda

namespace device {

/* potential gradient */
__global__ void calculate_potential_gradient_kernel(float2 *phi_n_psi, float2 *phi_global, float4 *nabla_phi_n_o_psi,
                                                    float4 *L, float4 *nabla_U, float w_reg, int dim_x, int dim_y,
                                                    int dim_z);
void calculate_potential_gradient(kfusion::device::TsdfVolume &phi_n_psi, kfusion::device::TsdfVolume &phi_global,
                                  sobfu::device::TsdfGradient &nabla_phi_n_o_psi, sobfu::device::Laplacian &L,
                                  sobfu::device::PotentialGradient &nabla_U, float w_reg);

/* estimate psi */
void estimate_psi(SDFs &sdfs, sobfu::device::DeformationField &psi, sobfu::device::DeformationField &psi_inv,
                  sobfu::device::SpatialGradients *spatial_grads, Differentiators &diffs, float *d_S_i,
                  sobfu::device::Reductor *r, SolverParams &params);

/* DEFORMATION FIELD UPDATES */
__global__ void update_psi_kernel(float4 *psi, float4 *nabla_U_S, float4 *updates, float alpha, int dim_x, int dim_y,
                                  int dim_z);
void update_psi(sobfu::device::DeformationField &psi, sobfu::device::PotentialGradient &nabla_U_S, float4 *updates,
                float alpha);

/*
 * CONVOLUTIONS
 */

void set_convolution_kernel(float *d_kernel);

__global__ void convolution_rows_kernel(float4 *d_dst, float4 *d_src, int image_w, int image_h, int image_d);
__global__ void convolution_columns_kernel(float4 *d_dst, float4 *d_src, int image_w, int image_h, int image_d);
__global__ void convolution_depth_kernel(float4 *d_dst, float4 *d_src, int image_w, int image_h, int image_d);

void convolution_rows(float4 *d_dst, float4 *updates, int image_w, int image_h, int image_d);
void convolution_columns(float4 *d_dst, float4 *updates, int image_w, int image_h, int image_d);
void convolution_depth(float4 *d_dst, float4 *updates, int image_w, int image_h, int image_d);

}  // namespace device
}  // namespace sobfu


================================================
FILE: include/sobfu/vector_fields.hpp
================================================
#pragma once

/* sobfu includes */
#include <sobfu/params.hpp>
#include <sobfu/precomp.hpp>

/* kinfu includes */
#include <kfusion/cuda/tsdf_volume.hpp>
#include <kfusion/internal.hpp>
#include <kfusion/precomp.hpp>
#include <kfusion/types.hpp>

namespace sobfu {
namespace cuda {

/*
 * VECTOR FIELD
 */

class VectorField {
public:
    /* constructor */
    VectorField(cv::Vec3i dims_);
    /* destructor */
    ~VectorField();

    /* get dimensions of the field */
    cv::Vec3i get_dims() const;

    /* get the data in the field */
    kfusion::cuda::CudaData get_data();
    const kfusion::cuda::CudaData get_data() const;
    /* set the data in the field */
    void set_data(kfusion::cuda::CudaData &data);

    /* clear field */
    void clear();
    /* print field */
    void print();
    /* get no. of nan's in the field */
    int get_no_nans();

protected:
    kfusion::cuda::CudaData data; /* field data */
    cv::Vec3i dims;               /* field dimensions */
};

/*
 * DEFORMATION FIELD
 */

class DeformationField : public VectorField {
public:
    /* constructor */
    DeformationField(cv::Vec3i dims_);
    /* destructor */
    ~DeformationField();

    /* init field to identity */
    void clear();

    /* apply the field to an sdf */
    void apply(const cv::Ptr<kfusion::cuda::TsdfVolume> phi, cv::Ptr<kfusion::cuda::TsdfVolume> phi_psi);
    /* approximate the inverse of the field */
    void get_inverse(sobfu::cuda::DeformationField &psi_inv);
};

/*
 * TSDF GRADIENT, LAPLACIAN, POTENTIAL GRADIENT
 */

typedef VectorField TsdfGradient;
typedef VectorField Laplacian;
typedef VectorField PotentialGradient;

/*
 * JACOBIAN
 */

class Jacobian {
public:
    /* constructor */
    Jacobian(cv::Vec3i dims_);
    /* destructor */
    ~Jacobian();

    kfusion::cuda::CudaData get_data();
    const kfusion::cuda::CudaData get_data() const;

    /* clear jacobian */
    void clear();

private:
    kfusion::cuda::CudaData data;
    cv::Vec3i dims;
};

/*
 * SPATIAL GRADIENTS
 */

struct SpatialGradients {
    /* constructor */
    SpatialGradients(cv::Vec3i dims_);
    /* destructor */
    ~SpatialGradients();

    TsdfGradient *nabla_phi_n, *nabla_phi_n_o_psi;
    Jacobian *J, *J_inv;
    Laplacian *L, *L_o_psi_inv;
    PotentialGradient *nabla_U, *nabla_U_S;
};

}  // namespace cuda
}  // namespace sobfu

namespace sobfu {
namespace device {

/*
 * VECTOR FIELD
 */

struct VectorField {
    /* constructor */
    VectorField(float4 *const data_, const int3 dims_) : data(data_), dims(dims_) {}

    __device__ __forceinline__ float4 *beg(int x, int y) const;
    __device__ __forceinline__ float4 *zstep(float4 *const ptr) const;
    __device__ __forceinline__ float4 *operator()(int x, int y, int z) const;

    __device__ __forceinline__ float4 get_displacement(int x, int y, int z) const;

    float4 *const data; /* vector field data */
    const int3 dims;    /* vector field dimensions */
};

/* clear field */
__global__ void clear_kernel(VectorField field);
void clear(VectorField &field);

/*
 * DEFORMATION FIELD
 */

typedef VectorField DeformationField;

/* set deformation field to identity */
__global__ void init_identity_kernel(DeformationField field);
void init_identity(DeformationField &field);
/* apply psi to an sdf */
__global__ void apply_kernel(const kfusion::device::TsdfVolume phi, kfusion::device::TsdfVolume phi_warped,
                             const DeformationField psi);
void apply(const kfusion::device::TsdfVolume &phi, kfusion::device::TsdfVolume &phi_warped,
           const sobfu::device::DeformationField &psi);
/* approximate the inverse of a deformation field */
__global__ void estimate_inverse_kernel(sobfu::device::DeformationField psi, sobfu::device::DeformationField psi_inv);
void estimate_inverse(sobfu::device::DeformationField &psi, sobfu::device::DeformationField &psi_inv);

/*
 * TSDF GRADIENT
 */

typedef VectorField TsdfGradient;

struct TsdfDifferentiator {
    /* constructor */
    TsdfDifferentiator(kfusion::device::TsdfVolume &vol_) : vol(vol_) {}

    /* calculate the gradient */
    __device__ void operator()(sobfu::device::TsdfGradient &grad) const;
    void calculate(sobfu::device::TsdfGradient &grad);

    kfusion::device::TsdfVolume vol;
};

/* estimate tsdf gradient */
__global__ void estimate_gradient_kernel(const sobfu::device::TsdfDifferentiator diff,
                                         sobfu::device::TsdfGradient grad);
/* interpolate tsdf gradient */
__global__ void interpolate_gradient_kernel(sobfu::device::TsdfGradient nabla_phi_n_psi,
                                            sobfu::device::TsdfGradient nabla_phi_n_psi_t,
                                            sobfu::device::DeformationField psi);
void interpolate_gradient(sobfu::device::TsdfGradient &nabla_phi_n_psi, sobfu::device::TsdfGradient &nabla_phi_n_psi_t,
                          sobfu::device::DeformationField &psi);

/*
 * LAPLACIAN
 */

typedef VectorField Laplacian;

struct SecondOrderDifferentiator {
    /* constructor */
    SecondOrderDifferentiator(sobfu::device::DeformationField &psi_) : psi(psi_) {}

    __device__ void laplacian(sobfu::device::Laplacian &L) const;
    void calculate(sobfu::device::Laplacian &L);

    sobfu::device::DeformationField psi;
};

/* estimate laplacian */
__global__ void estimate_laplacian_kernel(const sobfu::device::SecondOrderDifferentiator diff,
                                          sobfu::device::Laplacian L);
/* interpolate laplacian */
__global__ void interpolate_laplacian_kernel(sobfu::device::Laplacian L, sobfu::device::Laplacian L_o_psi,
                                             sobfu::device::DeformationField psi);
void interpolate_laplacian(sobfu::device::Laplacian &L, sobfu::device::Laplacian &L_o_psi,
                           sobfu::device::DeformationField &psi);

/*
 * POTENTIAL GRADIENT
 */

typedef VectorField PotentialGradient;

/*
 * JACOBIAN
 */

struct Jacobian {
    /* constructor */
    Jacobian(Mat4f *const data_, int3 dims_) : data(data_), dims(dims_) {}

    __device__ __forceinline__ Mat4f *beg(int x, int y) const;
    __device__ __forceinline__ Mat4f *zstep(Mat4f *const ptr) const;
    __device__ __forceinline__ Mat4f *operator()(int x, int y, int z) const;

    Mat4f *const data; /* jacobian data */
    const int3 dims;
};

struct Differentiator {
    /* constructor */
    Differentiator(sobfu::device::DeformationField &psi_) : psi(psi_) {}

    /* calculate jacobian */
    __device__ void operator()(sobfu::device::Jacobian &J, int mode) const;
    void calculate(sobfu::device::Jacobian &J);
    void calculate_deformation_jacobian(sobfu::device::Jacobian &J);

    sobfu::device::DeformationField psi;
};

/* clear jacobian */
__global__ void clear_jacobian_kernel(Jacobian J);
void clear(Jacobian &J);
/* estimate jacobian */
__global__ void estimate_jacobian_kernel(const sobfu::device::Differentiator diff, sobfu::device::Jacobian J);
__global__ void estimate_deformation_jacobian_kernel(const sobfu::device::Differentiator diff,
                                                     sobfu::device::Jacobian J);

/*
 * SPATIAL GRADIENTS
 */

struct SpatialGradients {
    SpatialGradients(sobfu::device::TsdfGradient *nabla_phi_n_, sobfu::device::TsdfGradient *nabla_phi_n_o_psi_,
                     sobfu::device::Jacobian *J_, sobfu::device::Jacobian *J_inv_, sobfu::device::Laplacian *L_,
                     sobfu::device::Laplacian *L_o_psi_inv_, sobfu::device::PotentialGradient *nabla_U_,
                     sobfu::device::PotentialGradient *nabla_U_S_)
        : nabla_phi_n(nabla_phi_n_),
          nabla_phi_n_o_psi(nabla_phi_n_o_psi_),
          J(J_),
          J_inv(J_inv_),
          L(L_),
          L_o_psi_inv(L_o_psi_inv_),
          nabla_U(nabla_U_),
          nabla_U_S(nabla_U_S_) {}
    ~SpatialGradients();

    sobfu::device::TsdfGradient *nabla_phi_n, *nabla_phi_n_o_psi;
    sobfu::device::Jacobian *J, *J_inv;
    sobfu::device::Laplacian *L, *L_o_psi_inv;
    sobfu::device::PotentialGradient *nabla_U, *nabla_U_S;
};

}  // namespace device
}  // namespace sobfu


================================================
FILE: params/params_advent.ini
================================================
# TSDF
VOL_DIMS_X=64
VOL_DIMS_Y=64
VOL_DIMS_Z=64

VOL_SIZE_X=0.5
VOL_SIZE_Y=0.5
VOL_SIZE_Z=0.5

TSDF_TRUNC_DIST=5
ETA=2
TSDF_MAX_WEIGHT=128

GRADIENT_DELTA_FACTOR=0.5

# CAMERA
INTR_FX=570.342
INTR_FY=570.342
INTR_CX=320.0
INTR_CY=240.0

TRUNC_DEPTH=1.5

VOL_POSE_T_Z=0.5

# BILATERAL FILTER
BILATERAL_SIGMA_DEPTH=0.005
BILATERAL_SIGMA_SPATIAL=4.5
BILATERAL_KERNEL_SIZE=7

# SOLVER
MAX_ITER=8192
MAX_UPDATE_NORM=5e-4

S=7
LAMBDA=0.1

ALPHA=0.1
W_REG=0.2


================================================
FILE: params/params_boxing.ini
================================================
# TSDF
VOL_DIMS_X=128
VOL_DIMS_Y=128
VOL_DIMS_Z=128

VOL_SIZE_X=0.75
VOL_SIZE_Y=0.75
VOL_SIZE_Z=0.75

TSDF_TRUNC_DIST=48
ETA=3
TSDF_MAX_WEIGHT=128

GRADIENT_DELTA_FACTOR=0.5

# CAMERA
INTR_FX=570.342
INTR_FY=570.342
INTR_CX=320.0
INTR_CY=240.0

TRUNC_DEPTH=1.0

VOL_POSE_T_Z=0.1

# BILATERAL FILTER
BILATERAL_SIGMA_DEPTH=0.005
BILATERAL_SIGMA_SPATIAL=4.5
BILATERAL_KERNEL_SIZE=7

# SOLVER
START_FRAME=1

MAX_ITER=4096
MAX_UPDATE_NORM=1e-10

S=7
LAMBDA=0.1

RHO_0=1.0
ALPHA=0.001
W_REG=0.6


================================================
FILE: params/params_hat.ini
================================================
# TSDF
VOL_DIMS_X=128
VOL_DIMS_Y=128
VOL_DIMS_Z=128

VOL_SIZE_X=1.20
VOL_SIZE_Y=1.20
VOL_SIZE_Z=1.20

TSDF_TRUNC_DIST=10
ETA=2
TSDF_MAX_WEIGHT=128

GRADIENT_DELTA_FACTOR=0.5

# CAMERA
INTR_FX=517.0
INTR_FY=517.0
INTR_CX=320.0
INTR_CY=240.0

TRUNC_DEPTH=2.0

VOL_POSE_T_Z=0.75

# BILATERAL FILTER
BILATERAL_SIGMA_DEPTH=0.02
BILATERAL_SIGMA_SPATIAL=4.5
BILATERAL_KERNEL_SIZE=7

# SOLVER
MAX_ITER=2048
MAX_UPDATE_NORM=1e-3

S=7
LAMBDA=0.1

ALPHA=0.05
W_REG=0.1


================================================
FILE: params/params_ours.ini
================================================
# intrinsic parameters for our intel realsense sr300
# ---rgb---
# kinfuParams.intr = kfusion::Intr(622.011f, 622.012f, 320.331f, 235.664f);
# ---depth---
# kinfuParams.intr = kfusion::Intr(474.567f, 474.567f, 310.63f, 246.018f);
#
# ----- DOU -----
# --- depth ---
# kinfuParams.intr = kfusion::Intr(520.f, 520.f, 320.f, 240.f);




================================================
FILE: params/params_snoopy.ini
================================================
# TSDF
VOL_DIMS_X=128
VOL_DIMS_Y=128
VOL_DIMS_Z=128

VOL_SIZE_X=0.9
VOL_SIZE_Y=0.9
VOL_SIZE_Z=0.9

TSDF_TRUNC_DIST=10
ETA=5
TSDF_MAX_WEIGHT=128

GRADIENT_DELTA_FACTOR=0.5

# CAMERA
INTR_FX=517.0
INTR_FY=517.0
INTR_CX=320.0
INTR_CY=240.0

TRUNC_DEPTH=3.0

VOL_POSE_T_Z=0.05

# BILATERAL FILTER
BILATERAL_SIGMA_DEPTH=0.01
BILATERAL_SIGMA_SPATIAL=4.5
BILATERAL_KERNEL_SIZE=7

# SOLVER
START_FRAME=4

MAX_ITER=2048
MAX_UPDATE_NORM=1e-3

S=7
LAMBDA=0.1

ALPHA=0.1
W_REG=0.2


================================================
FILE: params/params_umbrella.ini
================================================
# TSDF
VOL_DIMS_X=128
VOL_DIMS_Y=128
VOL_DIMS_Z=128

VOL_SIZE_X=1.0
VOL_SIZE_Y=1.0
VOL_SIZE_Z=1.0

TSDF_TRUNC_DIST=8
ETA=3
TSDF_MAX_WEIGHT=128

GRADIENT_DELTA_FACTOR=0.5

# CAMERA
INTR_FX=570.342
INTR_FY=570.342
INTR_CX=320.0
INTR_CY=240.0

TRUNC_DEPTH=1.5

VOL_POSE_T_Z=0.3

# BILATERAL FILTER
BILATERAL_SIGMA_DEPTH=0.04
BILATERAL_SIGMA_SPATIAL=4.5
BILATERAL_KERNEL_SIZE=7

# SOLVER
START_FRAME=1

MAX_ITER=2048
MAX_UPDATE_NORM=1e-10

S=7
LAMBDA=0.1

ALPHA=0.001
W_REG=0.2


================================================
FILE: setup.sh
================================================
#!/bin/sh

RUN_CMAKE=false
RUN_MAKE=false
RUN_MAKE_TESTS=false

usage() {
    echo "USAGE: source setup.sh [options]"
    echo "OPTIONS:"
    echo "\t--help                 -h: Display help"
    echo "\t--cmake                -c: Run CMake"
    echo "\t--make                 -m: Run make"
    echo "\t--make-tests           -t: Make tests"
    echo "\t--all                  -a: Run all the settings"
}

# Parse through the arguments and check if any relavant flag exists
while [ "$1" != "" ]; do
    PARAM=`echo $1 | awk -F= '{print $1}'`
    case $PARAM in
        -h | --help)
            usage
            exit
            ;;
        -c | --cmake)
            RUN_CMAKE=true
            ;;
        -m | --make)
            RUN_MAKE=true
            ;;
        -t | --make-tests)
            RUN_MAKE_TESTS=true
            ;;
        -a | --all)
            RUN_CMAKE=true
            RUN_MAKE=true
            ;;
        *)
            echo "ERROR: unknown parameter \"$PARAM\""
            usage
            return 1
            ;;
    esac
    shift
done

# If build does not exist create one
mkdir -p build
cd build

if $RUN_CMAKE
then
    if $RUN_MAKE_TESTS
    then
    	echo "running cmake..."
    	cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DBUILD_TESTS=ON .. || (cd ../ && return 1)
    else
	echo "running cmake..."
	cmake -DCMAKE_EXPORT_COMPILE_COMMNADS=ON -DBUILD_TESTS=OFF .. || (cd ../ && return 1)
    fi
fi

if $RUN_MAKE
then
    echo "running make..."
    make -j4 || (cd ../ && return 1)
    echo "make complete!"
fi

cd ..


================================================
FILE: src/CMakeLists.txt
================================================
# ---------------------------------------------------------------------------- #
# BUILD ALL SUBPROJECTS
# ---------------------------------------------------------------------------- #
add_subdirectory(kfusion)
add_subdirectory(sobfu)
add_subdirectory(apps)



================================================
FILE: src/apps/CMakeLists.txt
================================================
# Link source files to executables
add_application(app)

# Link to all libraries
target_link_libraries(app
    sobfu
    kfusion
    png
    Boost::program_options
    ${Boost_LIBRARIES}
    ${CUDA_CUDA_LIBRARY}
    ${CUDA_CUDART_LIBRARY}
    ${OpenCV_LIBS}
    ${PCL_LIBRARIES}
    ${VTK_LIBRARIES}
)


================================================
FILE: src/apps/demo.cpp
================================================
/* sobfu includes a*/
#include <sobfu/sob_fusion.hpp>

/* boost includes */
#include <boost/program_options.hpp>

/* opencv includes */
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>

/* pcl includes */
#include <pcl/io/pcd_io.h>
#include <pcl/io/vtk_io.h>
#include <pcl/visualization/pcl_visualizer.h>

/* sys headers */
#include <iostream>

/* vtk includes */
#include <vtkArrowSource.h>
#include <vtkCellArray.h>
#include <vtkGlyph2D.h>
#include <vtkImageData.h>
#include <vtkPointData.h>
#include <vtkPoints.h>
#include <vtkRenderWindow.h>
#include <vtkSmartPointer.h>
#include <vtkXMLImageDataWriter.h>

struct SobFuApp {
    SobFuApp(std::string file_path, std::string params_path, bool logger, bool visualizer, bool visualizer_detailed,
             bool verbose, bool vverbose)
        : exit_(false),
          file_path_(file_path),
          params_path_(params_path),
          logger_(logger),
          visualizer_(visualizer),
          visualizer_detailed_(visualizer_detailed),
          verbose_(verbose),
          vverbose_(vverbose) {
        /*
         * initialise parameters
         */

        Params params;

        if (verbose_) {
            params.verbosity = 1;
        } else if (vverbose_) {
            params.verbosity = 2;
        }

        /*
         * declare parameters to read from .ini
         */

        boost::program_options::options_description desc("parameters");
        declare_parameters(desc, params);

        /*
         * read parameters from .ini
         */

        boost::program_options::variables_map vm;
        read_parameters(desc, vm);

        /*
         * parse parameters stored in units of voxels
         */

        params.tsdf_trunc_dist = vm["TSDF_TRUNC_DIST"].as<float>() * params.voxel_sizes()[0];
        params.eta             = vm["ETA"].as<float>() * params.voxel_sizes()[0];
        params.volume_pose     = cv::Affine3f().translate(
            cv::Vec3f(-params.volume_size[0] / 2.f, -params.volume_size[1] / 2.f, vm["VOL_POSE_T_Z"].as<float>()));

        /*
         * initialise sobolevfusion
         */

        sobfu = std::make_shared<SobFusion>(params);
    }

    /*
     * declare sobfu parameters
     */

    void declare_parameters(boost::program_options::options_description &desc, Params &params) {
        /*
         * tsdf
         */

        desc.add_options()("VOL_DIMS_X", boost::program_options::value<int>(&params.volume_dims[0]),
                           "no. of voxels along x axis");
        desc.add_options()("VOL_DIMS_Y", boost::program_options::value<int>(&params.volume_dims[1]),
                           "no. of voxels along y axis");
        desc.add_options()("VOL_DIMS_Z", boost::program_options::value<int>(&params.volume_dims[2]),
                           "no. of voxels along z axis");

        desc.add_options()("VOL_SIZE_X", boost::program_options::value<float>(&params.volume_size[0]),
                           "vol. size along x axis (metres)");
        desc.add_options()("VOL_SIZE_Y", boost::program_options::value<float>(&params.volume_size[1]),
                           "vol. size along y axis (metres)");
        desc.add_options()("VOL_SIZE_Z", boost::program_options::value<float>(&params.volume_size[2]),
                           "vol. size along z axis (metres)");

        desc.add_options()("TSDF_TRUNC_DIST", boost::program_options::value<float>(), "truncation distance (voxels)");
        desc.add_options()("ETA", boost::program_options::value<float>(), "expected object thickness (voxels)");
        desc.add_options()("TSDF_MAX_WEIGHT", boost::program_options::value<float>(&params.tsdf_max_weight),
                           "max. tsdf weight");

        desc.add_options()("GRADIENT_DELTA_FACTOR", boost::program_options::value<float>(&params.gradient_delta_factor),
                           "delta factor when calculating tsdf gradient (voxels)");

        /*
         * camera
         */

        desc.add_options()("INTR_FX", boost::program_options::value<float>(&params.intr.fx), "focal length x");
        desc.add_options()("INTR_FY", boost::program_options::value<float>(&params.intr.fy), "focal length y");
        desc.add_options()("INTR_CX", boost::program_options::value<float>(&params.intr.cx), "principal point x");
        desc.add_options()("INTR_CY", boost::program_options::value<float>(&params.intr.cy), "principal point y");

        desc.add_options()("TRUNC_DEPTH", boost::program_options::value<float>(&params.icp_truncate_depth_dist),
                           "depth map truncation distance (metres)");
        desc.add_options()("VOL_POSE_T_Z", boost::program_options::value<float>(),
                           "camera to volume translation along z axis");

        /*
         * bilateral filter
         */

        desc.add_options()("BILATERAL_SIGMA_DEPTH", boost::program_options::value<float>(&params.bilateral_sigma_depth),
                           "bilateral filter sigma z");
        desc.add_options()("BILATERAL_SIGMA_SPATIAL",
                           boost::program_options::value<float>(&params.bilateral_sigma_spatial),
                           "bilateral filter sigma x-y");
        desc.add_options()("BILATERAL_KERNEL_SIZE", boost::program_options::value<int>(&params.bilateral_kernel_size),
                           "bilateral filter kernel size");

        /*
         * solver
         */
        desc.add_options()("START_FRAME", boost::program_options::value<int>(&params.start_frame),
                           "frame when to start registration");

        desc.add_options()("MAX_ITER", boost::program_options::value<int>(&params.max_iter),
                           "max. no. of iterations of the solver");
        desc.add_options()("MAX_UPDATE_NORM", boost::program_options::value<float>(&params.max_update_norm),
                           "max. update norm when running the solver");

        /* SOBOLEV */
        desc.add_options()("S", boost::program_options::value<int>(&params.s),
                           "Sobolev kernel size (currently only supports s=7");
        desc.add_options()("LAMBDA", boost::program_options::value<float>(&params.lambda),
                           "Sobolev filter parameter (currently only supports 0.05, 0.1, 0.2, and 0.4");

        /* FASTFUSION */
        desc.add_options()("ALPHA", boost::program_options::value<float>(&params.alpha), "gradient descent step size");
        desc.add_options()("W_REG", boost::program_options::value<float>(&params.w_reg), "regularisation weight");
    }

    /*
     * read parameters from params.ini
     */

    void read_parameters(boost::program_options::options_description &desc, boost::program_options::variables_map &vm) {
        std::ifstream settings_file(params_path_);

        boost::program_options::store(boost::program_options::parse_config_file(settings_file, desc), vm);
        boost::program_options::notify(vm);
    }

    /*
     * load colour and depth images
     */

    void load_files(std::vector<cv::String> *depths, std::vector<cv::String> *images, std::vector<cv::String> *masks) {
        if (!boost::filesystem::exists(file_path_)) {
            std::cerr << "error: directory '" << file_path_ << "' does not exist. exiting" << std::endl;
            exit(EXIT_FAILURE);
        }

        if (!boost::filesystem::exists(file_path_ + "/depth") || !boost::filesystem::exists(file_path_ + "/color")) {
            std::cerr << "error: source directory should contain 'color' and 'depth' folders. exiting..." << std::endl;
            exit(EXIT_FAILURE);
        }

        cv::glob(file_path_ + "/depth", *depths);
        cv::glob(file_path_ + "/color", *images);

        std::sort((*depths).begin(), (*depths).end());
        std::sort((*images).begin(), (*images).end());

        if (boost::filesystem::exists(file_path_ + "/omask")) {
            cv::glob(file_path_ + "/omask", *masks);
            std::sort((*masks).begin(), (*masks).end());
        }
    }

    /*
     * create a new directory out if not already present inside the input folder
     */

    void create_output_directory() {
        out_path_ = file_path_ + "/meshes";
        boost::filesystem::path dir_meshes(out_path_);

        if (boost::filesystem::create_directory(dir_meshes)) {
            std::cout << "created output directory for meshes" << std::endl;
        }

        if (visualizer_ || visualizer_detailed_) {
            out_screenshot_path_ = file_path_ + "/screenshots";
            boost::filesystem::path dir_screenshots(out_screenshot_path_);

            if (boost::filesystem::create_directory(dir_screenshots)) {
                std::cout << "created output directory for screenshots" << std::endl;
            }
        }
    }

    /*
     * get no. of vertices in a mesh
     */

    int get_no_vertices(pcl::PolygonMesh::Ptr mesh) {
        pcl::PointCloud<pcl::PointXYZ> pc;
        pcl::fromPCLPointCloud2(mesh->cloud, pc);

        return pc.size();
    }

    /*
     * save output meshes in .vtk format
     */

    void save_mesh(pcl::PolygonMesh::Ptr mesh, int i, std::string name) {
        /* pad frame no. with 0's */
        std::stringstream ss;
        ss << std::setw(6) << std::setfill('0') << i;
        std::string frameNum = ss.str();

        /* save to vtk */
        pcl::io::saveVTKFile(out_path_ + "/" + name + "_" + frameNum + ".vtk", *mesh);
        std::cout << "saved " + name + "_" + frameNum + ".vtk" << std::endl;
    }

    /*
     * save 3d deformation field in .vtk format
     */

    void save_field(std::shared_ptr<SobFusion> sobfu, int i) {
        /* get parameters */
        Params params = sobfu->getParams();

        /* create vtk image */
        vtkSmartPointer<vtkImageData> image = vtkSmartPointer<vtkImageData>::New();
        /* specify size of image */
        image->SetDimensions(params.volume_dims[0], params.volume_dims[1], params.volume_dims[2]);

        /* specify size of image data */
        image->AllocateScalars(VTK_FLOAT, 4);
        int *dims = image->GetDimensions();

        /* copy vector field data */
        std::shared_ptr<sobfu::cuda::DeformationField> psi = sobfu->getDeformationField();
        kfusion::cuda::CudaData displacement_data          = psi->get_data();
        displacement_data.download(image->GetScalarPointer());

        /* pad frame no. with 0's */
        std::stringstream ss;
        ss << std::setw(6) << std::setfill('0') << i;
        std::string frameNum = ss.str();
        std::string fileName = out_path_ + "/field_" + frameNum + ".vti";

        /* save file */
        vtkSmartPointer<vtkXMLImageDataWriter> writer = vtkSmartPointer<vtkXMLImageDataWriter>::New();
        writer->SetFileName(fileName.c_str());
        writer->SetInputData(image);
        writer->Write();

        std::cout << "saved the vector field to .vti" << std::endl;
    }

    bool execute() {
        /* sobfu app */
        sobfu = sobfu;

        /* images */
        cv::Mat depth, image, mask;
        std::vector<cv::String> depths, images, masks;

        load_files(&depths, &images, &masks);

        /* output */
        create_output_directory();

        /* pipeline */
        double time_ms = 0;
        bool has_image = false;
        bool has_masks = masks.size() > 0;

        int v1(0);
        int v2(0);

        int v3(0);
        int v4(0);
        int v5(0);

        for (size_t i = 0; i < depths.size(); ++i) {
            depth = cv::imread(depths[i], CV_LOAD_IMAGE_ANYDEPTH);
            image = cv::imread(images[i], CV_LOAD_IMAGE_COLOR);

            cv::Mat depth_masked = cv::Mat::zeros(depth.size(), depth.type());
            if (has_masks) {
                mask = cv::imread(masks[i], CV_8U);
                depth.copyTo(depth_masked, mask);
            }

            if (!image.data || !depth.data) {
                std::cerr << "error: image could not be read; check for improper"
                          << " permissions or invalid formats. exiting..." << std::endl;
                exit(EXIT_FAILURE);
            }

            if (has_masks) {
                depth_device_.upload(depth_masked.data, depth_masked.step, depth_masked.rows, depth_masked.cols);
            } else {
                depth_device_.upload(depth.data, depth.step, depth.rows, depth.cols);
            }

            {
                kfusion::SampledScopeTime fps(time_ms);
                has_image = (*(sobfu))(depth_device_);
            }

            /* get meshes */
            pcl::PolygonMesh::Ptr mesh_global, mesh_global_psi_inv;
            pcl::PolygonMesh::Ptr mesh_n, mesh_n_psi;

            if (visualizer_ || visualizer_detailed_ || logger_) {
                mesh_global               = sobfu->get_phi_global_mesh();
                int no_vertices_canonical = get_no_vertices(mesh_global);
                std::cout << "no. of point-normal pairs in the canonical model: " << no_vertices_canonical << std::endl;

                if (i >= 1) {
                    mesh_global_psi_inv                      = sobfu->get_phi_global_psi_inv_mesh();
                    int no_vertices_canonical_warped_to_live = get_no_vertices(mesh_global_psi_inv);
                    std::cout << "no. of point-normal pairs in the canonical model warped to live: "
                              << no_vertices_canonical_warped_to_live << std::endl;
                }

                if (visualizer_detailed_) {
                    if (i == 1) {
                        mesh_n     = sobfu->get_phi_n_mesh();
                        mesh_n_psi = sobfu->get_phi_n_psi_mesh();
                    }
                    if (i >= 2) {
                        mesh_n     = sobfu->get_phi_n_mesh();
                        mesh_n_psi = sobfu->get_phi_n_psi_mesh();
                    }
                }
            }

            if (logger_) {
                save_mesh(mesh_global, i, "canonical_mesh");
                if (i >= 1) {
                    save_mesh(mesh_global_psi_inv, i, "canonical_warped_to_live_mesh");
                }

                // save_field(sobfu, i);
            }

            if (visualizer_ || visualizer_detailed_) {
                if (i == 0) {
                    viewer = std::make_shared<pcl::visualization::PCLVisualizer>("meshes");
                }

                /*
                 * BASIC VISUALISER
                 *
                 */

                if (visualizer_) {
                    if (i == 0) {
                        /* create view ports */
                        viewer->createViewPort(0.0, 0.0, 0.5, 1.0, v1);
                        viewer->createViewPort(0.5, 0.0, 1.0, 1.0, v2);

                        /* add labels */
                        viewer->addText("frame no. " + std::to_string(i), 20, 512, 15, 1.0, 1.0, 1.0, "frame_num", v1);

                        viewer->addText("canonical model", 15, 15, 15, 1.0, 1.0, 1.0, "v1 text", v1);
                        viewer->addText("canonical model warped to live", 15, 15, 15, 1.0, 1.0, 1.0, "v2 text", v2);

                        /* add bounding box */
                        cv::Vec3f vol_size    = sobfu->getParams().volume_size;
                        cv::Affine3f vol_pose = sobfu->getParams().volume_pose;

                        cv::Vec3f min = vol_pose * cv::Vec3f(0.f, 0.f, 0.f);
                        cv::Vec3f max = vol_pose * vol_size;

                        viewer->setCameraPosition(0.0, 0.0, max[2] + 3.0, 0.0, 0.0, 0.0);

                        /* display meshes */
                        viewer->addPolygonMesh(*mesh_global, "mesh canonical", v1);
                        viewer->spinOnce(5000);

                        /* save screenshot */
                        std::stringstream ss;
                        ss << std::setw(6) << std::setfill('0') << i;
                        std::string frame_num = ss.str();

                        viewer->saveScreenshot(out_screenshot_path_ + "/" + frame_num + ".png");
                    } else if (i >= 1) {
                        /* update label */
                        viewer->updateText("frame no. " + std::to_string(i), 20, 512, 15, 1.0, 1.0, 1.0, "frame_num");

                        /* display updated meshes */
                        viewer->updatePolygonMesh(*mesh_global, "mesh canonical");
                        if (i == 1) {
                            viewer->addPolygonMesh(*mesh_global_psi_inv, "mesh canonical warped to live", v2);
                        } else {
                            viewer->updatePolygonMesh(*mesh_global_psi_inv, "mesh canonical warped to live");
                        }
                        viewer->spinOnce(50);

                        /* save screenshot */
                        std::stringstream ss;
                        ss << std::setw(6) << std::setfill('0') << i;
                        std::string frame_num = ss.str();

                        viewer->saveScreenshot(out_screenshot_path_ + "/" + frame_num + ".png");
                    }
                }

                /*
                 * DETAILED VISUALISER
                 *
                 */

                else if (visualizer_detailed_) {
                    if (i == 0) {
                        /* create view ports */
                        viewer->createViewPort(0.0, 0.5, 0.5, 1.0, v1);
                        viewer->createViewPort(0.5, 0.5, 1.0, 1.0, v2);

                        viewer->createViewPort(0.0, 0.0, 0.5, 0.5, v3);
                        viewer->createViewPort(0.5, 0.0, 1.0, 0.5, v4);

                        /* add labels */
                        viewer->addText("frame no. " + std::to_string(i), 20, 244, 15, 1.0, 1.0, 1.0, "frame_num", v1);

                        viewer->addText("phi_n", 15, 15, 15, 1.0, 1.0, 1.0, "v1 text", v1);
                        viewer->addText("phi_global(psi_inv)", 15, 15, 15, 1.0, 1.0, 1.0, "v2 text", v2);

                        viewer->addText("phi_global", 15, 15, 15, 1.0, 1.0, 1.0, "v3 text", v3);
                        viewer->addText("phi_n(psi)", 15, 15, 15, 1.0, 1.0, 1.0, "v4 text", v4);

                        /* add bounding box */
                        cv::Vec3f vol_size    = sobfu->getParams().volume_size;
                        cv::Affine3f vol_pose = sobfu->getParams().volume_pose;

                        cv::Vec3f min = vol_pose * cv::Vec3f(0.f, 0.f, 0.f);
                        cv::Vec3f max = vol_pose * vol_size;

                        viewer->setCameraPosition(0.0, 0.0, max[2] + 3.0, 0.0, 0.0, 0.0);

                        /* display meshes */
                        viewer->addPolygonMesh(*mesh_global, "mesh canonical", v3);
                        viewer->spinOnce(10000);

                        /* save screenshot */
                        std::stringstream ss;
                        ss << std::setw(6) << std::setfill('0') << i;
                        std::string frame_num = ss.str();

                        viewer->saveScreenshot(out_screenshot_path_ + "/" + frame_num + ".png");
                    }
                    if (i >= 1) {
                        /* update labels */
                        viewer->updateText("frame no. " + std::to_string(i), 20, 244, 15, 1.0, 1.0, 1.0, "frame_num");
                        /* display meshes */
                        viewer->updatePolygonMesh(*mesh_global, "mesh canonical");

                        if (i == 1) {
                            viewer->addPolygonMesh(*mesh_n, "mesh n", v1);
                            viewer->addPolygonMesh(*mesh_global_psi_inv, "mesh canonical warped to live", v2);
                            viewer->addPolygonMesh(*mesh_n_psi, "mesh n psi", v4);
                        }
                        if (i >= 2) {
                            viewer->updatePolygonMesh(*mesh_n, "mesh n");
                            viewer->updatePolygonMesh(*mesh_global_psi_inv, "mesh canonical warped to live");
                            viewer->updatePolygonMesh(*mesh_n_psi, "mesh n psi");
                        }
                        viewer->spinOnce(50);

                        /* save screenshot */
                        std::stringstream ss;
                        ss << std::setw(6) << std::setfill('0') << i;
                        std::string frame_num = ss.str();

                        viewer->saveScreenshot(out_screenshot_path_ + "/" + frame_num + ".png");
                    }
                }
            }
        }

        return true;
    }

    kfusion::cuda::Depth depth_device_;

    std::shared_ptr<SobFusion> sobfu;
    std::shared_ptr<pcl::visualization::PCLVisualizer> viewer;

    bool exit_, logger_, visualizer_, visualizer_detailed_, off_screen_, verbose_, vverbose_;
    std::string file_path_, params_path_, out_path_, out_screenshot_path_;
};

/*
 * parse the input flag and determine the file path and whether or not to enable visualizer
 * all flags will be matched and the last argument which does not match the flag will be
 * treated as filepath
 */
void parse_flags(std::vector<std::string> args, std::string *file_path, std::string *params_path, bool *logger,
                 bool *visualizer, bool *visualizer_detailed, bool *verbose, bool *vverbose) {
    std::vector<std::string> flags = {"-h",           "--help",    "--enable-viz", "--enable-viz-detailed",
                                      "--enable-log", "--verbose", "--vverbose"};

    int idx = 0;
    for (auto arg : args) {
        if (std::find(std::begin(flags), std::end(flags), arg) != std::end(flags)) {
            if (arg == "-h" || arg == "--help") {
                std::cout << "USAGE: sobfu [OPTIONS] <file path> <ini path>" << std::endl;
                std::cout << "\t--help -h:    display help" << std::endl;
                std::cout << "\t--enable-viz: enable visualizer" << std::endl;
                std::cout << "\t--enable-viz-detailed: enable visualizer with additional meshes for debugging"
                          << std::endl;
                std::cout << "\t--enable-log: log output meshes" << std::endl;
                std::cout << "\t--verbose: low verbosity" << std::endl;
                std::cout << "\t--vverbose: high verbosity" << std::endl;
                std::exit(EXIT_SUCCESS);
            }

            if (arg == "--enable-log") {
                *logger = true;
            }
            if (arg == "--enable-viz") {
                *visualizer = true;
            }
            if (arg == "--enable-viz-detailed") {
                *visualizer_detailed = true;
            }
            if (arg == "--verbose") {
                *verbose = true;
            }
            if (arg == "--vverbose") {
                *vverbose = true;
            }
        } else if (idx == 0) {
            *file_path = arg;
            idx++;
        } else if (idx == 1) {
            *params_path = arg;
        }
    }
}

int main(int argc, char *argv[]) {
    int device = 0;

    kfusion::cuda::setDevice(device);
    kfusion::cuda::printShortCudaDeviceInfo(device);

    if (kfusion::cuda::checkIfPreFermiGPU(device)) {
        std::cout << std::endl
                  << "kinfu does not support pre-fermi gpu architectures, and is not built for them by "
                     "default; exiting..."
                  << std::endl;
        return 1;
    }

    /* program requires at least one argument--the path to the directory where the source files are */
    if (argc < 3) {
        return std::cerr << "error: incorrect number of arguments; please supply path to source data and .ini file; "
                            "exiting..."
                         << std::endl,
               -1;
    }

    std::vector<std::string> args(argv + 1, argv + argc);
    std::string file_path, params_path;

    bool logger              = false;
    bool visualizer          = false;
    bool visualizer_detailed = false;
    bool off_screen          = false;
    bool verbose             = false;
    bool vverbose            = false;

    parse_flags(args, &file_path, &params_path, &logger, &visualizer, &visualizer_detailed, &verbose, &vverbose);

    /* disable the visualiser when running over SSH */
    if (((visualizer || visualizer_detailed) && !off_screen) && getenv("SSH_CLIENT")) {
        return std::cerr << "error: cannot run visualiser while running over ssh; please run locally or disable the"
                            "visualiser. exiting..."
                         << std::endl,
               -1;
    }

    SobFuApp app(file_path, params_path, logger, visualizer, visualizer_detailed, verbose, vverbose);

    /* execute */
    app.execute();

    return 0;
}


================================================
FILE: src/kfusion/CMakeLists.txt
================================================
# Build all kfusion source files into library kfusion
add_module_library(kfusion)

# Link to kfusion library
target_link_libraries(kfusion
  ${CUDA_CUDART_LIBRARY}
  ${PCL_LIBRARIES}
)


================================================
FILE: src/kfusion/core.cpp
================================================
#include <kfusion/kinfu.hpp>
#include <kfusion/safe_call.hpp>

#include <cuda.h>
#include <cstdio>
#include <iostream>

int kf::cuda::getCudaEnabledDeviceCount() {
    int count;
    cudaError_t error = cudaGetDeviceCount(&count);

    if (error == cudaErrorInsufficientDriver)
        return -1;

    if (error == cudaErrorNoDevice)
        return 0;

    cudaSafeCall(error);
    return count;
}

void kf::cuda::setDevice(int device) { cudaSafeCall(cudaSetDevice(device)); }

std::string kf::cuda::getDeviceName(int device) {
    cudaDeviceProp prop;
    cudaSafeCall(cudaGetDeviceProperties(&prop, device));

    return prop.name;
}

bool kf::cuda::checkIfPreFermiGPU(int device) {
    if (device < 0)
        cudaSafeCall(cudaGetDevice(&device));

    cudaDeviceProp prop;
    cudaSafeCall(cudaGetDeviceProperties(&prop, device));
    return prop.major < 2;  // CC == 1.x
}

namespace {
template <class T>
inline void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute, int device) {
    *attribute     = T();
    CUresult error = cuDeviceGetAttribute(attribute, device_attribute, device);
    if (CUDA_SUCCESS == error)
        return;

    printf("Driver API error = %04d\n", error);
    kfusion::cuda::error("driver API error", __FILE__, __LINE__);
}

inline int convertSMVer2Cores(int major, int minor) {
    // Defines for GPU Architecture types (using the SM version to determine the #
    // of cores per SM
    typedef struct {
        int SM;  // 0xMm (hexidecimal notation), M = SM Major version, and m = SM
                 // minor version
        int Cores;
    } SMtoCores;

    SMtoCores gpuArchCoresPerSM[] = {{0x10, 8},   {0x11, 8},   {0x12, 8},   {0x13, 8},   {0x20, 32},  {0x21, 48},
                                     {0x30, 192}, {0x35, 192}, {0x37, 192}, {0x50, 128}, {0x52, 128}, {0x60, 64},
                                     {0x61, 128}, {0x70, 64},  {0x75, 64},  {-1, -1}};

    int index = 0;
    while (gpuArchCoresPerSM[index].SM != -1) {
        if (gpuArchCoresPerSM[index].SM == ((major << 4) + minor))
            return gpuArchCoresPerSM[index].Cores;
        index++;
    }
    printf("\nCan't determine number of cores. Unknown SM version %d.%d!\n", major, minor);
    return 0;
}
}  // namespace

void kf::cuda::printCudaDeviceInfo(int device) {
    int count  = getCudaEnabledDeviceCount();
    bool valid = (device >= 0) && (device < count);

    int beg = valid ? device : 0;
    int end = valid ? device + 1 : count;

    printf(
        "*** CUDA Device Query (Runtime API) version (CUDART static linking) "
        "*** \n\n");
    printf("Device count: %d\n", count);

    int driverVersion = 0, runtimeVersion = 0;
    cudaSafeCall(cudaDriverGetVersion(&driverVersion));
    cudaSafeCall(cudaRuntimeGetVersion(&runtimeVersion));

    const char *computeMode[] = {
        "Default (multiple host threads can use ::cudaSetDevice() with device "
        "simultaneously)",
        "Exclusive (only one host thread in one process is able to use "
        "::cudaSetDevice() with this device)",
        "Prohibited (no host thread can use ::cudaSetDevice() with this device)",
        "Exclusive Process (many threads in one process is able to use "
        "::cudaSetDevice() with this device)",
        "Unknown",
        NULL};

    for (int dev = beg; dev < end; ++dev) {
        cudaDeviceProp prop;
        cudaSafeCall(cudaGetDeviceProperties(&prop, dev));

        int sm_cores = convertSMVer2Cores(prop.major, prop.minor);

        printf("\nDevice %d: \"%s\"\n", dev, prop.name);
        printf("  CUDA Driver Version / Runtime Version          %d.%d / %d.%d\n", driverVersion / 1000,
               driverVersion % 100, runtimeVersion / 1000, runtimeVersion % 100);
        printf("  CUDA Capability Major/Minor version number:    %d.%d\n", prop.major, prop.minor);
        printf(
            "  Total amount of global memory:                 %.0f MBytes (%llu "
            "bytes)\n",
            (float) prop.totalGlobalMem / 1048576.0f, (unsigned long long) prop.totalGlobalMem);
        printf("  (%2d) Multiprocessors x (%2d) CUDA Cores/MP:     %d CUDA Cores\n", prop.multiProcessorCount, sm_cores,
               sm_cores * prop.multiProcessorCount);
        printf("  GPU Clock Speed:                               %.2f GHz\n", prop.clockRate * 1e-6f);

#if (CUDART_VERSION >= 4000)
        // This is not available in the CUDA Runtime API, so we make the necessary
        // calls the driver API to support this for output
        int memoryClock, memBusWidth, L2CacheSize;
        getCudaAttribute<int>(&memoryClock, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, dev);
        getCudaAttribute<int>(&memBusWidth, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, dev);
        getCudaAttribute<int>(&L2CacheSize, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, dev);

        printf("  Memory Clock rate:                             %.2f Mhz\n", memoryClock * 1e-3f);
        printf("  Memory Bus Width:                              %d-bit\n", memBusWidth);
        if (L2CacheSize)
            printf("  L2 Cache Size:                                 %d bytes\n", L2CacheSize);

        printf(
            "  Max Texture Dimension Size (x,y,z)             1D=(%d), "
            "2D=(%d,%d), 3D=(%d,%d,%d)\n",
            prop.maxTexture1D, prop.maxTexture2D[0], prop.maxTexture2D[1], prop.maxTexture3D[0], prop.maxTexture3D[1],
            prop.maxTexture3D[2]);
        printf(
            "  Max Layered Texture Size (dim) x layers        1D=(%d) x %d, "
            "2D=(%d,%d) x %d\n",
            prop.maxTexture1DLayered[0], prop.maxTexture1DLayered[1], prop.maxTexture2DLayered[0],
            prop.maxTexture2DLayered[1], prop.maxTexture2DLayered[2]);
#endif
        printf("  Total amount of constant memory:               %u bytes\n", (int) prop.totalConstMem);
        printf("  Total amount of shared memory per block:       %u bytes\n", (int) prop.sharedMemPerBlock);
        printf("  Total number of registers available per block: %d\n", prop.regsPerBlock);
        printf("  Warp size:                                     %d\n", prop.warpSize);
        printf("  Maximum number of threads per block:           %d\n", prop.maxThreadsPerBlock);
        printf("  Maximum sizes of each dimension of a block:    %d x %d x %d\n", prop.maxThreadsDim[0],
               prop.maxThreadsDim[1], prop.maxThreadsDim[2]);
        printf("  Maximum sizes of each dimension of a grid:     %d x %d x %d\n", prop.maxGridSize[0],
               prop.maxGridSize[1], prop.maxGridSize[2]);
        printf("  Maximum memory pitch:                          %u bytes\n", (int) prop.memPitch);
        printf("  Texture alignment:                             %u bytes\n", (int) prop.textureAlignment);

#if CUDART_VERSION >= 4000
        printf(
            "  Concurrent copy and execution:                 %s with %d copy "
            "engine(s)\n",
            (prop.deviceOverlap ? "Yes" : "No"), prop.asyncEngineCount);
#else
        printf("  Concurrent copy and execution:                 %s\n", prop.deviceOverlap ? "Yes" : "No");
#endif
        printf("  Run time limit on kernels:                     %s\n", prop.kernelExecTimeoutEnabled ? "Yes" : "No");
        printf("  Integrated GPU sharing Host Memory:            %s\n", prop.integrated ? "Yes" : "No");
        printf("  Support host page-locked memory mapping:       %s\n", prop.canMapHostMemory ? "Yes" : "No");

        printf("  Concurrent kernel execution:                   %s\n", prop.concurrentKernels ? "Yes" : "No");
        printf("  Alignment requirement for Surfaces:            %s\n", prop.surfaceAlignment ? "Yes" : "No");
        printf("  Device has ECC support enabled:                %s\n", prop.ECCEnabled ? "Yes" : "No");
        printf("  Device is using TCC driver mode:               %s\n", prop.tccDriver ? "Yes" : "No");
#if CUDART_VERSION >= 4000
        printf("  Device supports Unified Addressing (UVA):      %s\n", prop.unifiedAddressing ? "Yes" : "No");
        printf("  Device PCI Bus ID / PCI location ID:           %d / %d\n", prop.pciBusID, prop.pciDeviceID);
#endif
        printf("  Compute Mode:\n");
        printf("      %s \n", computeMode[prop.computeMode]);
    }

    printf("\n");
    printf("deviceQuery, CUDA Driver = CUDART");
    printf(", CUDA Driver Version  = %d.%d", driverVersion / 1000, driverVersion % 100);
    printf(", CUDA Runtime Version = %d.%d", runtimeVersion / 1000, runtimeVersion % 100);
    printf(", NumDevs = %d\n\n", count);
    fflush(stdout);
}

void kf::cuda::printShortCudaDeviceInfo(int device) {
    int count  = getCudaEnabledDeviceCount();
    bool valid = (device >= 0) && (device < count);

    int beg = valid ? device : 0;
    int end = valid ? device + 1 : count;

    int driverVersion = 0, runtimeVersion = 0;
    cudaSafeCall(cudaDriverGetVersion(&driverVersion));
    cudaSafeCall(cudaRuntimeGetVersion(&runtimeVersion));

    for (int dev = beg; dev < end; ++dev) {
        cudaDeviceProp prop;
        cudaSafeCall(cudaGetDeviceProperties(&prop, dev));

        const char *arch_str = prop.major < 2 ? " (pre-Fermi)" : "";
        printf("Device %d:  \"%s\"  %.0fMb", dev, prop.name, (float) prop.totalGlobalMem / 1048576.0f);
        printf(", sm_%d%d%s, %d cores", prop.major, prop.minor, arch_str,
               convertSMVer2Cores(prop.major, prop.minor) * prop.multiProcessorCount);
        printf(", Driver/Runtime ver.%d.%d/%d.%d\n", driverVersion / 1000, driverVersion % 100, runtimeVersion / 1000,
               runtimeVersion % 100);
    }
    fflush(stdout);
}

kf::SampledScopeTime::SampledScopeTime(double &time_ms) : time_ms_(time_ms) { start = (double) cv::getTickCount(); }
kf::SampledScopeTime::~SampledScopeTime() {
    static int i_ = 0;
    time_ms_ += getTime();
    if (i_ % EACH == 0 && i_) {
        std::cout << "avg. frame time = " << time_ms_ / EACH << "ms (" << 1000.f * EACH / time_ms_ << "fps)"
                  << std::endl;
        time_ms_ = 0.0;
    }
    ++i_;
}

double kf::SampledScopeTime::getTime() {
    return ((double) cv::getTickCount() - start) * 1000.0 / cv::getTickFrequency();
}

kf::ScopeTime::ScopeTime(const char *name_) : name(name_) { start = (double) cv::getTickCount(); }
kf::ScopeTime::~ScopeTime() {
    double time_ms = ((double) cv::getTickCount() - start) * 1000.0 / cv::getTickFrequency();
    std::cout << "Time(" << name << ") = " << time_ms << "ms" << std::endl;
}


================================================
FILE: src/kfusion/cuda/imgproc.cu
================================================
#include <kfusion/cuda/device.hpp>

////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// Depth bilateral filter

namespace kfusion {
namespace device {
__global__ void bilateral_kernel(const PtrStepSz<ushort> src, PtrStep<ushort> dst, const int ksz,
                                 const float sigma_spatial2_inv_half, const float sigma_depth2_inv_half) {
    int x = threadIdx.x + blockIdx.x * blockDim.x;
    int y = threadIdx.y + blockIdx.y * blockDim.y;

    if (x >= src.cols || y >= src.rows)
        return;

    int value = src(y, x);

    int tx = min(x - ksz / 2 + ksz, src.cols - 1);
    int ty = min(y - ksz / 2 + ksz, src.rows - 1);

    float sum1 = 0;
    float sum2 = 0;

    for (int cy = max(y - ksz / 2, 0); cy < ty; ++cy) {
        for (int cx = max(x - ksz / 2, 0); cx < tx; ++cx) {
            int depth = src(cy, cx);

            float space2 = (x - cx) * (x - cx) + (y - cy) * (y - cy);
            float color2 = (value - depth) * (value - depth);

            float weight = __expf(-(space2 * sigma_spatial2_inv_half + color2 * sigma_depth2_inv_half));

            sum1 += depth * weight;
            sum2 += weight;
        }
    }
    dst(y, x) = __float2int_rn(sum1 / sum2);
}
}  // namespace device
}  // namespace kfusion

void kfusion::device::bilateralFilter(const Depth& src, Depth& dst, int kernel_size, float sigma_spatial,
                                      float sigma_depth) {
    sigma_depth *= 1000;  // meters -> mm

    dim3 block(64, 16);
    dim3 grid(divUp(src.cols(), block.x), divUp(src.rows(), block.y));

    cudaSafeCall(cudaFuncSetCacheConfig(bilateral_kernel, cudaFuncCachePreferL1));
    bilateral_kernel<<<grid, block>>>(src, dst, kernel_size, 0.5f / (sigma_spatial * sigma_spatial),
                                      0.5f / (sigma_depth * sigma_depth));
    cudaSafeCall(cudaGetLastError());
};

////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// Depth truncation

namespace kfusion {
namespace device {
__global__ void truncate_depth_kernel(PtrStepSz<ushort> depth, ushort max_dist /*mm*/) {
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;

    if (x < depth.cols && y < depth.rows)
        if (depth(y, x) > max_dist)
            depth(y, x) = 0;
}
}  // namespace device
}  // namespace kfusion

void kfusion::device::truncateDepth(Depth& depth, float max_dist /*meters*/) {
    dim3 block(64, 16);
    dim3 grid(divUp(depth.cols(), block.x), divUp(depth.rows(), block.y));

    truncate_depth_kernel<<<grid, block>>>(depth, static_cast<ushort>(max_dist * 1000.f));
    cudaSafeCall(cudaGetLastError());
}

////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// Build depth pyramid

namespace kfusion {
namespace device {
__global__ void pyramid_kernel(const PtrStepSz<ushort> src, PtrStepSz<ushort> dst, float sigma_depth_mult3) {
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;

    if (x >= dst.cols || y >= dst.rows)
        return;

    const int D = 5;
    int center  = src(2 * y, 2 * x);

    int tx = min(2 * x - D / 2 + D, src.cols - 1);
    int ty = min(2 * y - D / 2 + D, src.rows - 1);
    int cy = max(0, 2 * y - D / 2);

    int sum   = 0;
    int count = 0;

    for (; cy < ty; ++cy)
        for (int cx = max(0, 2 * x - D / 2); cx < tx; ++cx) {
            int val = src(cy, cx);
            if (abs(val - center) < sigma_depth_mult3) {
                sum += val;
                ++count;
            }
        }
    dst(y, x) = (count == 0) ? 0 : sum / count;
}
}  // namespace device
}  // namespace kfusion

void kfusion::device::depthPyr(const Depth& source, Depth& pyramid, float sigma_depth) {
    sigma_depth *= 1000;  // meters -> mm

    dim3 block(64, 16);
    dim3 grid(divUp(pyramid.cols(), block.x), divUp(pyramid.rows(), block.y));

    pyramid_kernel<<<grid, block>>>(source, pyramid, sigma_depth * 3);
    cudaSafeCall(cudaGetLastError());
}

////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// Compute normals

namespace kfusion {
namespace device {
__global__ void compute_normals_kernel(const PtrStepSz<ushort> depth, const Reprojector reproj,
                                       PtrStep<Normal> normals) {
    int x = threadIdx.x + blockIdx.x * blockDim.x;
    int y = threadIdx.y + blockIdx.y * blockDim.y;

    if (x >= depth.cols || y >= depth.rows)
        return;

    const float qnan = numeric_limits<float>::quiet_NaN();

    Normal n_out = make_float4(qnan, qnan, qnan, 0.f);

    if (x < depth.cols - 1 && y < depth.rows - 1) {
        // mm -> meters
        float z00 = depth(y, x) * 0.001f;
        float z01 = depth(y, x + 1) * 0.001f;
        float z10 = depth(y + 1, x) * 0.001f;

        if (z00 * z01 * z10 != 0) {
            float3 v00 = reproj(x, y, z00);
            float3 v01 = reproj(x + 1, y, z01);
            float3 v10 = reproj(x, y + 1, z10);

            float3 n = normalized(cross(v01 - v00, v10 - v00));
            n_out    = make_float4(-n.x, -n.y, -n.z, 0.f);
        }
    }
    normals(y, x) = n_out;
}

__global__ void mask_depth_kernel(const PtrStep<Normal> normals, PtrStepSz<ushort> depth) {
    int x = threadIdx.x + blockIdx.x * blockDim.x;
    int y = threadIdx.y + blockIdx.y * blockDim.y;

    if (x < depth.cols || y < depth.rows) {
        float4 n = normals(y, x);
        if (isnan(n.x))
            depth(y, x) = 0;
    }
}
}  // namespace device
}  // namespace kfusion

void kfusion::device::computeNormalsAndMaskDepth(const Reprojector& reproj, Depth& depth, Normals& normals) {
    dim3 block(64, 16);
    dim3 grid(divUp(depth.cols(), block.x), divUp(depth.rows(), block.y));

    compute_normals_kernel<<<grid, block>>>(depth, reproj, normals);
    cudaSafeCall(cudaGetLastError());

    mask_depth_kernel<<<grid, block>>>(normals, depth);
    cudaSafeCall(cudaGetLastError());
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// Compute computePointNormals

namespace kfusion {
namespace device {
__global__ void points_normals_kernel(const Reprojector reproj, const PtrStepSz<ushort> depth, PtrStep<Point> points,
                                      PtrStep<Normal> normals) {
    int x = threadIdx.x + blockIdx.x * blockDim.x;
    int y = threadIdx.y + blockIdx.y * blockDim.y;

    if (x >= depth.cols || y >= depth.rows)
        return;

    const float qnan = numeric_limits<float>::quiet_NaN();
    points(y, x) = normals(y, x) = make_float4(qnan, qnan, qnan, qnan);

    if (x >= depth.cols - 1 || y >= depth.rows - 1)
        return;

    // mm -> meters
    float z00 = depth(y, x) * 0.001f;
    float z01 = depth(y, x + 1) * 0.001f;
    float z10 = depth(y + 1, x) * 0.001f;

    if (z00 * z01 * z10 != 0) {
        float3 v00 = reproj(x, y, z00);
        float3 v01 = reproj(x + 1, y, z01);
        float3 v10 = reproj(x, y + 1, z10);

        float3 n      = normalized(cross(v01 - v00, v10 - v00));
        normals(y, x) = make_float4(-n.x, -n.y, -n.z, 0.f);
        points(y, x)  = make_float4(v00.x, v00.y, v00.z, 0.f);
    }
}
}  // namespace device
}  // namespace kfusion

void kfusion::device::computePointNormals(const Reprojector& reproj, const Depth& depth, Points& points,
                                          Normals& normals) {
    dim3 block(64, 16);
    dim3 grid(divUp(depth.cols(), block.x), divUp(depth.rows(), block.y));

    points_normals_kernel<<<grid, block>>>(reproj, depth, points, normals);
    cudaSafeCall(cudaGetLastError());
}

////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// Compute dists

namespace kfusion {
namespace device {
__global__ void compute_dists_kernel(const PtrStepSz<ushort> depth, Dists dists, float2 finv, float2 c) {
    int x = threadIdx.x + blockIdx.x * blockDim.x;
    int y = threadIdx.y + blockIdx.y * blockDim.y;

    if (x < depth.cols || y < depth.rows) {
        float xl     = (x - c.x) * finv.x;
        float yl     = (y - c.y) * finv.y;
        float lambda = sqrtf(xl * xl + yl * yl + 1);

        dists(y, x) = depth(y, x) * lambda * 0.001f;  // meters
    }
}
}  // namespace device
}  // namespace kfusion

void kfusion::device::compute_dists(const Depth& depth, Dists dists, float2 f, float2 c) {
    dim3 block(64, 16);
    dim3 grid(divUp(depth.cols(), block.x), divUp(depth.rows(), block.y));

    compute_dists_kernel<<<grid, block>>>(depth, dists, make_float2(1.f / f.x, 1.f / f.y), c);
    cudaSafeCall(cudaGetLastError());
}

namespace kfusion {
namespace device {
__global__ void resize_depth_normals_kernel(const PtrStep<ushort> dsrc, const PtrStep<float4> nsrc,
                                            PtrStepSz<ushort> ddst, PtrStep<float4> ndst) {
    int x = threadIdx.x + blockIdx.x * blockDim.x;
    int y = threadIdx.y + blockIdx.y * blockDim.y;

    if (x >= ddst.cols || y >= ddst.rows)
        return;

    const float qnan = numeric_limits<float>::quiet_NaN();

    ushort d = 0;
    float4 n = make_float4(qnan, qnan, qnan, qnan);

    int xs = x * 2;
    int ys = y * 2;

    int d00 = dsrc(ys + 0, xs + 0);
    int d01 = dsrc(ys + 0, xs + 1);
    int d10 = dsrc(ys + 1, xs + 0);
    int d11 = dsrc(ys + 1, xs + 1);

    if (d00 * d01 != 0 && d10 * d11 != 0) {
        d = (d00 + d01 + d10 + d11) / 4;

        float4 n00 = nsrc(ys + 0, xs + 0);
        float4 n01 = nsrc(ys + 0, xs + 1);
        float4 n10 = nsrc(ys + 1, xs + 0);
        float4 n11 = nsrc(ys + 1, xs + 1);

        n.x = (n00.x + n01.x + n10.x + n11.x) * 0.25;
        n.y = (n00.y + n01.y + n10.y + n11.y) * 0.25;
        n.z = (n00.z + n01.z + n10.z + n11.z) * 0.25;
    }
    ddst(y, x) = d;
    ndst(y, x) = n;
}
}  // namespace device
}  // namespace kfusion

void kfusion::device::resizeDepthNormals(const Depth& depth, const Normals& normals, Depth& depth_out,
                                         Normals& normals_out) {
    int in_cols = depth.cols();
    int in_rows = depth.rows();

    int out_cols = in_cols / 2;
    int out_rows = in_rows / 2;

    dim3 block(64, 16);
    dim3 grid(divUp(out_cols, block.x), divUp(out_rows, block.y));

    resize_depth_normals_kernel<<<grid, block>>>(depth, normals, depth_out, normals_out);
    cudaSafeCall(cudaGetLastError());
}

namespace kfusion {
namespace device {
__global__ void resize_points_normals_kernel(const PtrStep<Point> vsrc, const PtrStep<Normal> nsrc,
                                             PtrStepSz<Point> vdst, PtrStep<Normal> ndst) {
    int x = threadIdx.x + blockIdx.x * blockDim.x;
    int y = threadIdx.y + blockIdx.y * blockDim.y;

    if (x >= vdst.cols || y >= vdst.rows)
        return;

    const float qnan = numeric_limits<float>::quiet_NaN();
    vdst(y, x) = ndst(y, x) = make_float4(qnan, qnan, qnan, 0.f);

    int xs = x * 2;
    int ys = y * 2;

    float3 d00 = tr(vsrc(ys + 0, xs + 0));
    float3 d01 = tr(vsrc(ys + 0, xs + 1));
    float3 d10 = tr(vsrc(ys + 1, xs + 0));
    float3 d11 = tr(vsrc(ys + 1, xs + 1));

    if (!isnan(d00.x * d01.x * d10.x * d11.x)) {
        float3 d   = (d00 + d01 + d10 + d11) * 0.25f;
        vdst(y, x) = make_float4(d.x, d.y, d.z, 0.f);

        float3 n00 = tr(nsrc(ys + 0, xs + 0));
        float3 n01 = tr(nsrc(ys + 0, xs + 1));
        float3 n10 = tr(nsrc(ys + 1, xs + 0));
        float3 n11 = tr(nsrc(ys + 1, xs + 1));

        float3 n   = (n00 + n01 + n10 + n11) * 0.25f;
        ndst(y, x) = make_float4(n.x, n.y, n.z, 0.f);
    }
}
}  // namespace device
}  // namespace kfusion

void kfusion::device::resizePointsNormals(const Points& points, const Normals& normals, Points& points_out,
                                          Normals& normals_out) {
    int out_cols = points.cols() / 2;
    int out_rows = points.rows() / 2;

    dim3 block(64, 16);
    dim3 grid(divUp(out_cols, block.x), divUp(out_rows, block.y));

    resize_points_normals_kernel<<<grid, block>>>(points, normals, points_out, normals_out);
    cudaSafeCall(cudaGetLastError());
}

namespace kfusion {
namespace device {
/* calculate for the projected triangle the bounding box in the image domain */
__host__ __device__ void get_bounding_box(float2 v1, float2 v2, float2 v3, int2& min, int2& max) {
    min.x = static_cast<int>(fmin(v1.x, fmin(v2.x, v3.x)));
    min.y = static_cast<int>(fmin(v1.y, fmin(v2.y, v3.y)));

    max.x = static_cast<int>(fmax(v1.x, fmax(v2.x, v3.x)));
    max.y = static_cast<int>(fmax(v1.y, fmax(v2.y, v3.y)));
}

__host__ __device__ __forceinline__ float edge_function(const float2& a, const float2& b, const float2& c) {
    return (c.x - a.x) * (b.y - a.y) - (c.y - a.y) * (b.x - a.x);
}

/* rasterise surface triangles */
__global__ void rasterise_surface_kernel(const Projector proj, const Aff3f vol2cam, const PtrSz<Point> vsrc,
                                         const PtrSz<Normal> nsrc, PtrStepSz<Point> points_out,
                                         PtrStep<Normal> normals_out) {
    int x = (threadIdx.x + blockIdx.x * blockDim.x) * 3;

    if ((x + 2) >= vsrc.size)
        return;

    /* get vertices and normals */
    float3 v1 = vol2cam * tr(vsrc[x]);
    float3 v2 = vol2cam * tr(vsrc[x + 1]);
    float3 v3 = vol2cam * tr(vsrc[x + 2]);

    /* project vertices onto the image plane */
    float2 coos1 = proj(v1);

Download .txt

gitextract_kh2b_hh_/

├── .clang-format
├── .gitignore
├── .gitlab-ci.yml
├── .lintflags
├── .lintignore
├── CMakeLists.txt
├── LICENSE.md
├── README.md
├── cmake/
│   ├── Targets.cmake
│   └── Utils.cmake
├── include/
│   ├── kfusion/
│   │   ├── cuda/
│   │   │   ├── device.hpp
│   │   │   ├── device_array.hpp
│   │   │   ├── device_memory.hpp
│   │   │   ├── imgproc.hpp
│   │   │   ├── kernel_containers.hpp
│   │   │   ├── marching_cubes.hpp
│   │   │   ├── projective_icp.hpp
│   │   │   ├── temp_utils.hpp
│   │   │   ├── texture_binder.hpp
│   │   │   └── tsdf_volume.hpp
│   │   ├── exports.hpp
│   │   ├── internal.hpp
│   │   ├── kinfu.hpp
│   │   ├── precomp.hpp
│   │   ├── safe_call.hpp
│   │   └── types.hpp
│   └── sobfu/
│       ├── cuda/
│       │   └── utils.hpp
│       ├── params.hpp
│       ├── precomp.hpp
│       ├── reductor.hpp
│       ├── scalar_fields.hpp
│       ├── sob_fusion.hpp
│       ├── solver.hpp
│       └── vector_fields.hpp
├── params/
│   ├── params_advent.ini
│   ├── params_boxing.ini
│   ├── params_hat.ini
│   ├── params_ours.ini
│   ├── params_snoopy.ini
│   └── params_umbrella.ini
├── setup.sh
├── src/
│   ├── CMakeLists.txt
│   ├── apps/
│   │   ├── CMakeLists.txt
│   │   └── demo.cpp
│   ├── kfusion/
│   │   ├── CMakeLists.txt
│   │   ├── core.cpp
│   │   ├── cuda/
│   │   │   ├── imgproc.cu
│   │   │   ├── marching_cubes.cu
│   │   │   ├── proj_icp.cu
│   │   │   └── tsdf_volume.cu
│   │   ├── device_memory.cpp
│   │   ├── imgproc.cpp
│   │   ├── kinfu.cpp
│   │   ├── marching_cubes.cpp
│   │   ├── precomp.cpp
│   │   ├── projective_icp.cpp
│   │   └── tsdf_volume.cpp
│   └── sobfu/
│       ├── CMakeLists.txt
│       ├── cuda/
│       │   ├── reductor.cu
│       │   ├── scalar_fields.cu
│       │   ├── solver.cu
│       │   └── vector_fields.cu
│       ├── precomp.cpp
│       ├── reductor.cpp
│       ├── scalar_fields.cpp
│       ├── sob_fusion.cpp
│       ├── solver.cpp
│       └── vector_fields.cpp
└── test/
    ├── CMakeLists.txt
    ├── deformation_field_test.cpp
    ├── main.cpp
    ├── reductions_test.cpp
    └── solver_test.cpp

Download .txt

SYMBOL INDEX (329 symbols across 36 files)

FILE: include/kfusion/cuda/device.hpp
  function __kf_device__ (line 21) | __kf_device__ float2 *kfusion::device::TsdfVolume::operator()(int x, int...
  function __kf_device__ (line 25) | __kf_device__ const float2 *kfusion::device::TsdfVolume::operator()(int ...
  function __kf_device__ (line 29) | __kf_device__ float2 *kfusion::device::TsdfVolume::beg(int x, int y) con...
  function __kf_device__ (line 31) | __kf_device__ float2 *kfusion::device::TsdfVolume::zstep(float2 *const p...
  function __kf_device__ (line 36) | __kf_device__ float2 kfusion::device::Projector::operator()(const float3...
  function __kf_device__ (line 46) | __kf_device__ float3 kfusion::device::Reprojector::operator()(int u, int...
  type kfusion (line 55) | namespace kfusion {
    type device (line 56) | namespace device {
      function __kf_device__ (line 57) | __kf_device__ Vec3f operator*(const Mat3f &m, const Vec3f &v) {
      function __kf_device__ (line 61) | __kf_device__ Vec3f operator*(const Aff3f &a, const Vec3f &v) { retu...
      function __kf_device__ (line 64) | __kf_device__ Aff3f operator*(const float &s, const Aff3f &a) {
      function __kf_device__ (line 72) | __kf_device__ Mat3f operator*(const Mat3f &m, const Mat3f &n) {
      function __kf_device__ (line 81) | __kf_device__ Aff3f operator*(const Aff3f &a, const Aff3f &b) {
      function __kf_device__ (line 88) | __kf_device__ Vec3f tr(const float4 &v) { return make_float3(v.x, v....
      type plus (line 90) | struct plus {
        method __kf_device__ (line 91) | __kf_device__ float operator()(float l, float r) const { return l ...
        method __kf_device__ (line 92) | __kf_device__ double operator()(double l, double r) const { return...

FILE: include/kfusion/cuda/device_array.hpp
  type kfusion (line 8) | namespace kfusion {
    type cuda (line 9) | namespace cuda {
      function DeviceMemory (line 18) | class KF_EXPORTS DeviceArray : public DeviceMemory {
      function DeviceMemory2D (line 116) | class KF_EXPORTS DeviceArray2D : public DeviceMemory2D {
    type device (line 227) | namespace device {
  function T (line 291) | inline T *kfusion::cuda::DeviceArray<T>::ptr() {
  function T (line 295) | inline const T *kfusion::cuda::DeviceArray<T>::ptr() const {
  function T (line 373) | inline T *kfusion::cuda::DeviceArray2D<T>::ptr(int y) {
  function T (line 377) | inline const T *kfusion::cuda::DeviceArray2D<T>::ptr(int y) const {

FILE: include/kfusion/cuda/device_memory.hpp
  type kfusion (line 6) | namespace kfusion {
    type cuda (line 7) | namespace cuda {
      function DeviceMemory (line 20) | class KF_EXPORTS DeviceMemory {
      function DeviceMemory2D (line 113) | class KF_EXPORTS DeviceMemory2D {
    type device (line 228) | namespace device {
  function T (line 238) | inline T *kfusion::cuda::DeviceMemory::ptr() {
  function T (line 242) | inline const T *kfusion::cuda::DeviceMemory::ptr() const {
  function T (line 258) | T *kfusion::cuda::DeviceMemory2D::ptr(int y_arg) {
  function T (line 262) | const T *kfusion::cuda::DeviceMemory2D::ptr(int y_arg) const {

FILE: include/kfusion/cuda/imgproc.hpp
  type kfusion (line 9) | namespace kfusion {
    type cuda (line 10) | namespace cuda {

FILE: include/kfusion/cuda/kernel_containers.hpp
  type kfusion (line 13) | namespace kfusion {
    type cuda (line 14) | namespace cuda {
      type DevPtr (line 16) | struct DevPtr {
        method __kf_hdevice__ (line 22) | __kf_hdevice__ DevPtr() : data(0) {}
        method __kf_hdevice__ (line 23) | __kf_hdevice__ DevPtr(T *data_arg) : data(data_arg) {}
        method __kf_hdevice__ (line 25) | __kf_hdevice__ size_t elemSize() const { return elem_size; }
        method __kf_hdevice__ (line 26) | __kf_hdevice__ operator T *() { return data; }
        method __kf_hdevice__ (line 27) | __kf_hdevice__ operator const T *() const { return data; }
      type PtrSz (line 31) | struct PtrSz : public DevPtr<T> {
        method __kf_hdevice__ (line 32) | __kf_hdevice__ PtrSz() : size(0) {}
      type PtrStep (line 39) | struct PtrStep : public DevPtr<T> {
        method __kf_hdevice__ (line 40) | __kf_hdevice__ PtrStep() : step(0) {}
        method __kf_hdevice__ (line 47) | __kf_hdevice__ T *ptr(int y = 0) { return (T *) ((char *) DevPtr<T...
        method __kf_hdevice__ (line 48) | __kf_hdevice__ const T *ptr(int y = 0) const { return (const T *) ...
        method __kf_hdevice__ (line 50) | __kf_hdevice__ T &operator()(int y, int x) { return ptr(y)[x]; }
        method __kf_hdevice__ (line 51) | __kf_hdevice__ const T &operator()(int y, int x) const { return pt...
      type PtrStepSz (line 55) | struct PtrStepSz : public PtrStep<T> {
    type device (line 65) | namespace device {

FILE: include/kfusion/cuda/marching_cubes.hpp
  type kfusion (line 13) | namespace kfusion {
    type cuda (line 14) | namespace cuda {
      function MarchingCubes (line 19) | class KF_EXPORTS MarchingCubes {

FILE: include/kfusion/cuda/projective_icp.hpp
  type kfusion (line 5) | namespace kfusion {
    type cuda (line 6) | namespace cuda {
      class ProjectiveICP (line 7) | class ProjectiveICP {
        type StreamHelper (line 43) | struct StreamHelper

FILE: include/kfusion/cuda/temp_utils.hpp
  type kfusion (line 8) | namespace kfusion {
    type device (line 9) | namespace device {
      function __kf_hdevice__ (line 11) | __kf_hdevice__ void swap(T &a, T &b) {
      type numeric_limits (line 18) | struct numeric_limits
      type numeric_limits<float> (line 21) | struct numeric_limits<float> {
        method __kf_device__ (line 22) | __kf_device__ static float quiet_NaN() { return __int_as_float(0x7...
        method __kf_device__ (line 23) | __kf_device__ static float epsilon() { return 1.192092896e-07f /*F...
        method __kf_device__ (line 24) | __kf_device__ static float min() { return 1.175494351e-38f /*FLT_M...
        method __kf_device__ (line 25) | __kf_device__ static float max() { return 3.402823466e+38f /*FLT_M...
      type numeric_limits<unsigned short> (line 29) | struct numeric_limits<unsigned short> {
        method max (line 30) | short max() { return USHRT_MAX; }
      function __kf_device__ (line 33) | __kf_device__ float dot(const float3 &v1, const float3 &v2) {
      function __kf_device__ (line 37) | __kf_device__ float3 &operator+=(float3 &vec, const float &v) {
      function __kf_device__ (line 44) | __kf_device__ float3 &operator+=(float3 &v1, const float3 &v2) {
      function __kf_device__ (line 51) | __kf_device__ float3 operator+(const float3 &v1, const float3 &v2) {
      function __kf_device__ (line 55) | __kf_device__ float3 operator*(const float3 &v1, const float3 &v2) {
      function __kf_hdevice__ (line 59) | __kf_hdevice__ float3 operator*(const float3 &v1, const int3 &v2) {
      function __kf_device__ (line 63) | __kf_device__ float3 operator/(const float3 &v1, const float3 &v2) {
      function __kf_hdevice__ (line 67) | __kf_hdevice__ float3 operator/(const float &v, const float3 &vec) {
      function __kf_device__ (line 71) | __kf_device__ float3 &operator*=(float3 &vec, const float &v) {
      function __kf_hdevice__ (line 78) | __kf_hdevice__ float3 operator-(const float3 &v1, const float3 &v2) {
      function __kf_hdevice__ (line 82) | __kf_hdevice__ float3 operator*(const float3 &v1, const float &v) { ...
      function __kf_hdevice__ (line 84) | __kf_hdevice__ float3 operator*(const float &v, const float3 &v1) { ...
      function __kf_device__ (line 86) | __kf_device__ float norm(const float3 &v) { return sqrt(dot(v, v)); }
      function __kf_device__ (line 88) | __kf_device__ float norm_sqr(const float3 &v) { return dot(v, v); }
      function __kf_device__ (line 90) | __kf_device__ float3 normalized(const float3 &v) { return v * rsqrt(...
      function __kf_hdevice__ (line 92) | __kf_hdevice__ float3 cross(const float3 &v1, const float3 &v2) {
      function __kf_device__ (line 96) | __kf_device__ void computeRoots2(const float &b, const float &c, flo...
      function __kf_device__ (line 108) | __kf_device__ void computeRoots3(float c0, float c1, float c2, float...
      type Eigen33 (line 153) | struct Eigen33 {
        type MiniMat (line 156) | struct MiniMat {
          method __kf_hdevice__ (line 158) | __kf_hdevice__ float3 &operator[](int i) { return data[i]; }
          method __kf_hdevice__ (line 159) | __kf_hdevice__ const float3 &operator[](int i) const { return da...
        method __kf_device__ (line 164) | static __kf_device__ float3 unitOrthogonal(const float3 &src) {
        method __kf_device__ (line 193) | __kf_device__ Eigen33(volatile float *mat_pkg_arg) : mat_pkg(mat_p...
        method __kf_device__ (line 194) | __kf_device__ void compute(Mat33 &tmp, Mat33 &vec_tmp, Mat33 &evec...
        method __kf_device__ (line 388) | __kf_device__ float m00() const { return mat_pkg[0]; }
        method __kf_device__ (line 389) | __kf_device__ float m01() const { return mat_pkg[1]; }
        method __kf_device__ (line 390) | __kf_device__ float m02() const { return mat_pkg[2]; }
        method __kf_device__ (line 391) | __kf_device__ float m10() const { return mat_pkg[1]; }
        method __kf_device__ (line 392) | __kf_device__ float m11() const { return mat_pkg[3]; }
        method __kf_device__ (line 393) | __kf_device__ float m12() const { return mat_pkg[4]; }
        method __kf_device__ (line 394) | __kf_device__ float m20() const { return mat_pkg[2]; }
        method __kf_device__ (line 395) | __kf_device__ float m21() const { return mat_pkg[4]; }
        method __kf_device__ (line 396) | __kf_device__ float m22() const { return mat_pkg[5]; }
        method __kf_device__ (line 398) | __kf_device__ float3 row0() const { return make_float3(m00(), m01(...
        method __kf_device__ (line 399) | __kf_device__ float3 row1() const { return make_float3(m10(), m11(...
        method __kf_device__ (line 400) | __kf_device__ float3 row2() const { return make_float3(m20(), m21(...
        method __kf_device__ (line 402) | __kf_device__ static bool isMuchSmallerThan(float x, float y) {
      type Warp (line 409) | struct Warp {
        method laneId (line 413) | static __kf_device__ unsigned int laneId() {
        method id (line 419) | static __kf_device__ unsigned int id() {
        method __kf_device__ (line 424) | static __kf_device__ int laneMaskLt() {
        method __kf_device__ (line 433) | static __kf_device__ int binaryExclScan(int ballot_mask) { return ...
      type Block (line 436) | struct Block {
        method stride (line 437) | static __kf_device__ unsigned int stride() { return blockDim.x * b...
        method __kf_device__ (line 439) | static __kf_device__ int flattenedThreadId() {
        method __kf_device__ (line 444) | static __kf_device__ void reduce(volatile T *buffer, BinOp op) {
        method __kf_device__ (line 492) | static __kf_device__ T reduce(volatile T *buffer, T init, BinOp op) {
      type Emulation (line 558) | struct Emulation {
        method __kf_device__ (line 559) | static __kf_device__ int warp_reduce(volatile int *ptr, const unsi...
        method __kf_device__ (line 574) | static __kf_device__ int Ballot(int predicate, volatile int *cta_b...
        method __kf_device__ (line 585) | static __kf_device__ bool All(int predicate, volatile int *cta_buf...

FILE: include/kfusion/cuda/texture_binder.hpp
  type kfusion (line 6) | namespace kfusion {
    type cuda (line 7) | namespace cuda {
      class TextureBinder (line 8) | class TextureBinder {
        type cudaTextureReadMode (line 10) | enum cudaTextureReadMode
        method TextureBinder (line 11) | TextureBinder(const DeviceArray2D<T> &arr, const struct texture<T,...
        type cudaTextureReadMode (line 16) | enum cudaTextureReadMode
        method TextureBinder (line 17) | TextureBinder(const DeviceArray<T> &arr, const struct texture<T, 1...
        type cudaTextureReadMode (line 22) | enum cudaTextureReadMode
        method TextureBinder (line 23) | TextureBinder(const PtrStepSz<T> &arr, const struct texture<T, 2, ...
        type cudaTextureReadMode (line 28) | enum cudaTextureReadMode
        method TextureBinder (line 29) | TextureBinder(const A &arr, const struct texture<T, 2, readMode> &...
        type cudaTextureReadMode (line 34) | enum cudaTextureReadMode
        method TextureBinder (line 35) | TextureBinder(const PtrSz<T> &arr, const struct texture<T, 1, read...
        type textureReference (line 43) | struct textureReference
    type device (line 47) | namespace device {

FILE: include/kfusion/cuda/tsdf_volume.hpp
  type kfusion (line 15) | namespace kfusion {
    type cuda (line 16) | namespace cuda {
      function TsdfVolume (line 17) | class KF_EXPORTS TsdfVolume {

FILE: include/kfusion/internal.hpp
  type Mat4f (line 12) | struct Mat4f {
  type kfusion (line 16) | namespace kfusion {
    type device (line 17) | namespace device {
      type Surface (line 38) | struct Surface {
      type float8 (line 43) | struct float8 {
      type Mat3f (line 51) | struct Mat3f {
      type Aff3f (line 54) | struct Aff3f {
      type TsdfVolume (line 59) | struct TsdfVolume {
      type Projector (line 80) | struct Projector {
        method Projector (line 82) | Projector() {}
      type Reprojector (line 87) | struct Reprojector {
        method Reprojector (line 88) | Reprojector() {}
      type CubeIndexEstimator (line 94) | struct CubeIndexEstimator {
        method CubeIndexEstimator (line 95) | CubeIndexEstimator(const TsdfVolume &vol) : volume(vol) { isoValue...
      type OccupiedVoxels (line 104) | struct OccupiedVoxels : public CubeIndexEstimator {
        method OccupiedVoxels (line 105) | OccupiedVoxels(const TsdfVolume &vol) : CubeIndexEstimator({vol}) {}
      type TrianglesGenerator (line 124) | struct TrianglesGenerator : public CubeIndexEstimator {
        method TrianglesGenerator (line 125) | TrianglesGenerator(const TsdfVolume &vol) : CubeIndexEstimator({vo...
      type ComputeIcpHelper (line 148) | struct ComputeIcpHelper {
        type Policy (line 149) | struct Policy
        type PageLockHelper (line 150) | struct PageLockHelper {

FILE: include/kfusion/kinfu.hpp
  type kfusion (line 23) | namespace kfusion {
    type cuda (line 24) | namespace cuda {
    function KinFuParams (line 33) | struct KF_EXPORTS KinFuParams {
    function KinFu (line 64) | class KF_EXPORTS KinFu {

FILE: include/kfusion/precomp.hpp
  type kfusion (line 18) | namespace kfusion {
    function D (line 20) | inline D device_cast(const S &source) {

FILE: include/kfusion/safe_call.hpp
  type kfusion (line 5) | namespace kfusion {
    type cuda (line 6) | namespace cuda {
      function ___cudaSafeCall (line 19) | static inline void ___cudaSafeCall(cudaError_t err, const char *file...
      function divUp (line 24) | static inline int divUp(int total, int grain) { return (total + grai...
    type cuda (line 18) | namespace cuda {
      function ___cudaSafeCall (line 19) | static inline void ___cudaSafeCall(cudaError_t err, const char *file...
      function divUp (line 24) | static inline int divUp(int total, int grain) { return (total + grai...
    type device (line 27) | namespace device {
  type kfusion (line 17) | namespace kfusion {
    type cuda (line 6) | namespace cuda {
      function ___cudaSafeCall (line 19) | static inline void ___cudaSafeCall(cudaError_t err, const char *file...
      function divUp (line 24) | static inline int divUp(int total, int grain) { return (total + grai...
    type cuda (line 18) | namespace cuda {
      function ___cudaSafeCall (line 19) | static inline void ___cudaSafeCall(cudaError_t err, const char *file...
      function divUp (line 24) | static inline int divUp(int total, int grain) { return (total + grai...
    type device (line 27) | namespace device {

FILE: include/kfusion/types.hpp
  type CUevent_st (line 20) | struct CUevent_st
  type kfusion (line 22) | namespace kfusion {
    function Intr (line 28) | struct KF_EXPORTS Intr {
    type Point (line 38) | struct Point {
      method Point (line 46) | Point() = default;
      method Point (line 48) | Point(float x, float y, float z) {
    type RGB (line 57) | struct RGB {
    type PixelRGB (line 66) | struct PixelRGB {
    type cuda (line 70) | namespace cuda {
      type Surface (line 85) | struct Surface {
      type Frame (line 90) | struct Frame {
    function deg2rad (line 99) | inline float deg2rad(float alpha) { return alpha * 0.017453293f; }
    function ScopeTime (line 101) | struct KF_EXPORTS ScopeTime {
    function SampledScopeTime (line 108) | struct KF_EXPORTS SampledScopeTime {

FILE: include/sobfu/cuda/utils.hpp
  type SharedMemory (line 12) | struct SharedMemory {
    method __device__ (line 13) | __device__ inline operator T *() {
    method __device__ (line 18) | __device__ inline operator const T *() const {
  function __device__ (line 25) | __device__ __forceinline__ int get_global_idx(int x, int y, int z, int d...
  function __host__ (line 34) | __host__ __device__ __forceinline__ T lerp(T v0, T v1, T t) {
  function __host__ (line 38) | __host__ __device__ __forceinline__ float3 lerp3f(float3 v0, float3 v1, ...
  function __host__ (line 42) | __host__ __device__ __forceinline__ float4 lerp4f(float4 v0, float4 v1, ...
  function __device__ (line 51) | __device__ __forceinline__ float2 interpolate_tsdf(const Vol &volume, co...
  function __device__ (line 89) | __device__ __forceinline__ float4 interpolate_field(const Field &field, ...
  function __device__ (line 125) | __device__ __forceinline__ float4 interpolate_field_inv(const Field &fie...
  function __device__ (line 166) | __device__ __forceinline__ float4 interpolate_field(const float4 *field,...
  function __device__ (line 212) | __device__ __forceinline__ float norm(float2 v) {
  function __host__ (line 220) | __host__ __device__ __forceinline__ float3 &operator-=(float3 &v1, const...
  function __device__ (line 227) | __device__ __forceinline__ float3 operator/(const float3 &v, const float...
  function __device__ (line 231) | __device__ __forceinline__ float n(const float3 &v) {
  function __device__ (line 235) | __device__ __forceinline__ float3 sq(const float3 &v) {
  function __device__ (line 239) | __device__ __forceinline__ float rms_epsilon(const float3 &v) { return _...
  function __device__ (line 245) | __device__ __forceinline__ float4 operator+(const float4 &v1, const floa...
  function __device__ (line 249) | __device__ __forceinline__ float4 operator-(const float4 &v1, const floa...
  function __host__ (line 253) | __host__ __device__ __forceinline__ float4 &operator+=(float4 &v1, const...
  function __host__ (line 260) | __host__ __device__ __forceinline__ float4 &operator-=(float4 &v1, const...
  function __device__ (line 267) | __device__ __forceinline__ float4 operator*(const float4 &v, const float...
  function __device__ (line 271) | __device__ __forceinline__ float4 operator*(const float &m, const float4...
  function __device__ (line 273) | __device__ __forceinline__ float4 operator/(const float4 &v, const float...
  function __device__ (line 277) | __device__ __forceinline__ float4 operator/(const float &d, const float4...
  function __device__ (line 279) | __device__ __forceinline__ float norm(const float4 &v) {
  function __device__ (line 283) | __device__ __forceinline__ float norm_sq(const float4 &v) {
  function __device__ (line 287) | __device__ __forceinline__ float4 normalised(const float4 &v) {
  function __host__ (line 292) | __host__ __device__ __forceinline__ float3 trunc(const float4 &f) { retu...
  function __device__ (line 294) | __device__ __forceinline__ float4 sq(const float4 &v) {
  function __device__ (line 298) | __device__ __forceinline__ float rms_epsilon(const float4 &v) {
  function __device__ (line 306) | __device__ __forceinline__ kfusion::device::Mat3f operator+(kfusion::dev...
  function __device__ (line 324) | __device__ __forceinline__ kfusion::device::Mat3f operator-(kfusion::dev...
  function __device__ (line 342) | __device__ __forceinline__ kfusion::device::Mat3f operator*(kfusion::dev...
  function __device__ (line 360) | __device__ __forceinline__ kfusion::device::Mat3f operator/(kfusion::dev...
  function __device__ (line 366) | __device__ __forceinline__ float4 operator*(Mat4f M, float4 v) {
  function __device__ (line 374) | __device__ __forceinline__ float det(Mat4f M) {
  function __host__ (line 387) | __host__ __device__ __forceinline__ float sign(float a) {
  function __host__ (line 397) | __host__ __device__ __forceinline__ bool is_truncated(float tsdf) {
  function __host__ (line 405) | __host__ __device__ __forceinline__ float heaviside_smooth(float phi, fl...
  function __host__ (line 409) | __host__ __device__ __forceinline__ void vec(kfusion::device::Mat3f J, f...
  function __host__ (line 423) | __host__ __device__ __forceinline__ kfusion::device::Mat3f transpose(kfu...

FILE: include/sobfu/params.hpp
  type Params (line 7) | struct Params {
    method voxel_sizes (line 34) | cv::Vec3f voxel_sizes() {

FILE: include/sobfu/reductor.hpp
  type sobfu (line 17) | namespace sobfu {
    type device (line 18) | namespace device {
      type Reductor (line 24) | struct Reductor {

FILE: include/sobfu/scalar_fields.hpp
  type sobfu (line 12) | namespace sobfu {
    type cuda (line 13) | namespace cuda {
      class ScalarField (line 19) | class ScalarField {
    type device (line 47) | namespace device {
      type ScalarField (line 53) | struct ScalarField {
        method ScalarField (line 55) | ScalarField(float *const data_, int3 dims_) : data(data_), dims(di...

FILE: include/sobfu/sob_fusion.hpp
  class SobFusion (line 21) | class SobFusion {

FILE: include/sobfu/solver.hpp
  type SolverParams (line 16) | struct SolverParams {
  type SDFs (line 25) | struct SDFs {
    method SDFs (line 26) | SDFs(kfusion::device::TsdfVolume &phi_global_, kfusion::device::TsdfVo...
  type Differentiators (line 37) | struct Differentiators {
    method Differentiators (line 38) | Differentiators(sobfu::device::TsdfDifferentiator &tsdf_diff_, sobfu::...
  type sobfu (line 49) | namespace sobfu {
    type cuda (line 50) | namespace cuda {
      class Solver (line 56) | class Solver {
    type device (line 103) | namespace device {

FILE: include/sobfu/vector_fields.hpp
  type sobfu (line 13) | namespace sobfu {
    type cuda (line 14) | namespace cuda {
      class VectorField (line 20) | class VectorField {
      class DeformationField (line 52) | class DeformationField : public VectorField {
      class Jacobian (line 80) | class Jacobian {
      type SpatialGradients (line 102) | struct SpatialGradients {
    type device (line 118) | namespace device {
      type VectorField (line 124) | struct VectorField {
        method VectorField (line 126) | VectorField(float4 *const data_, const int3 dims_) : data(data_), ...
      type TsdfDifferentiator (line 166) | struct TsdfDifferentiator {
        method TsdfDifferentiator (line 168) | TsdfDifferentiator(kfusion::device::TsdfVolume &vol_) : vol(vol_) {}
      type SecondOrderDifferentiator (line 193) | struct SecondOrderDifferentiator {
        method SecondOrderDifferentiator (line 195) | SecondOrderDifferentiator(sobfu::device::DeformationField &psi_) :...
      type Jacobian (line 222) | struct Jacobian {
        method Jacobian (line 224) | Jacobian(Mat4f *const data_, int3 dims_) : data(data_), dims(dims_...
      type Differentiator (line 234) | struct Differentiator {
        method Differentiator (line 236) | Differentiator(sobfu::device::DeformationField &psi_) : psi(psi_) {}
      type SpatialGradients (line 258) | struct SpatialGradients {
        method SpatialGradients (line 259) | SpatialGradients(sobfu::device::TsdfGradient *nabla_phi_n_, sobfu:...
  type sobfu (line 117) | namespace sobfu {
    type cuda (line 14) | namespace cuda {
      class VectorField (line 20) | class VectorField {
      class DeformationField (line 52) | class DeformationField : public VectorField {
      class Jacobian (line 80) | class Jacobian {
      type SpatialGradients (line 102) | struct SpatialGradients {
    type device (line 118) | namespace device {
      type VectorField (line 124) | struct VectorField {
        method VectorField (line 126) | VectorField(float4 *const data_, const int3 dims_) : data(data_), ...
      type TsdfDifferentiator (line 166) | struct TsdfDifferentiator {
        method TsdfDifferentiator (line 168) | TsdfDifferentiator(kfusion::device::TsdfVolume &vol_) : vol(vol_) {}
      type SecondOrderDifferentiator (line 193) | struct SecondOrderDifferentiator {
        method SecondOrderDifferentiator (line 195) | SecondOrderDifferentiator(sobfu::device::DeformationField &psi_) :...
      type Jacobian (line 222) | struct Jacobian {
        method Jacobian (line 224) | Jacobian(Mat4f *const data_, int3 dims_) : data(data_), dims(dims_...
      type Differentiator (line 234) | struct Differentiator {
        method Differentiator (line 236) | Differentiator(sobfu::device::DeformationField &psi_) : psi(psi_) {}
      type SpatialGradients (line 258) | struct SpatialGradients {
        method SpatialGradients (line 259) | SpatialGradients(sobfu::device::TsdfGradient *nabla_phi_n_, sobfu:...

FILE: src/apps/demo.cpp
  type SobFuApp (line 30) | struct SobFuApp {
    method SobFuApp (line 31) | SobFuApp(std::string file_path, std::string params_path, bool logger, ...
    method declare_parameters (line 87) | void declare_parameters(boost::program_options::options_description &d...
    method read_parameters (line 166) | void read_parameters(boost::program_options::options_description &desc...
    method load_files (line 177) | void load_files(std::vector<cv::String> *depths, std::vector<cv::Strin...
    method create_output_directory (line 204) | void create_output_directory() {
    method get_no_vertices (line 226) | int get_no_vertices(pcl::PolygonMesh::Ptr mesh) {
    method save_mesh (line 237) | void save_mesh(pcl::PolygonMesh::Ptr mesh, int i, std::string name) {
    method save_field (line 252) | void save_field(std::shared_ptr<SobFusion> sobfu, int i) {
    method execute (line 285) | bool execute() {
  function parse_flags (line 526) | void parse_flags(std::vector<std::string> args, std::string *file_path, ...
  function main (line 570) | int main(int argc, char *argv[]) {

FILE: src/kfusion/core.cpp
  function getCudaAttribute (line 42) | inline void getCudaAttribute(T *attribute, CUdevice_attribute device_att...
  function convertSMVer2Cores (line 52) | inline int convertSMVer2Cores(int major, int minor) {

FILE: src/kfusion/device_memory.cpp
  function _Tp (line 40) | static inline _Tp CV_XADD(_Tp *addr, _Tp delta) {

FILE: src/kfusion/kinfu.cpp
  function deg2rad (line 8) | static inline float deg2rad(float alpha) { return alpha * 0.017453293f; }

FILE: src/kfusion/projective_icp.cpp
  type kfusion::cuda::ProjectiveICP::StreamHelper (line 25) | struct kfusion::cuda::ProjectiveICP::StreamHelper {
    method StreamHelper (line 33) | StreamHelper() { cudaSafeCall(cudaStreamCreate(&stream)); }
    method Mat6f (line 39) | Mat6f get(Vec6f &b) {

FILE: src/kfusion/tsdf_volume.cpp
  function Vec3i (line 43) | Vec3i kfusion::cuda::TsdfVolume::getDims() const { return dims_; }
  function Vec3f (line 45) | Vec3f kfusion::cuda::TsdfVolume::getVoxelSize() const {
  function CudaData (line 49) | const CudaData kfusion::cuda::TsdfVolume::data() const { return data_; }
  function CudaData (line 50) | CudaData kfusion::cuda::TsdfVolume::data() { return data_; }
  function Vec3f (line 52) | Vec3f kfusion::cuda::TsdfVolume::getSize() const { return size_; }
  function Affine3f (line 64) | Affine3f kfusion::cuda::TsdfVolume::getPose() const { return pose_; }

FILE: src/sobfu/precomp.cpp
  function isPow2 (line 5) | bool isPow2(unsigned int x) { return ((x & (x - 1)) == 0); }
  function nextPow2 (line 8) | int nextPow2(int x) {
  function get_num_blocks_and_threads (line 20) | void get_num_blocks_and_threads(int n, int maxBlocks, int maxThreads, in...

FILE: src/sobfu/reductor.cpp
  function float2 (line 52) | float2 sobfu::device::Reductor::max_update_norm() {
  function float2 (line 59) | float2 sobfu::device::Reductor::voxel_max_energy(float2 *phi_global_data...
  function float2 (line 81) | float2 sobfu::device::final_reduce_max(float2 *h_o_max_data, float2 *d_o...

FILE: src/sobfu/scalar_fields.cpp
  function int3 (line 21) | int3 sobfu::cuda::ScalarField::get_dims() { return dims; }

FILE: src/sobfu/sob_fusion.cpp
  function Params (line 41) | Params &SobFusion::getParams() { return params; }

FILE: test/deformation_field_test.cpp
  class DeformationFieldTest (line 19) | class DeformationFieldTest : public ::testing::Test {
    method DeformationFieldTest (line 25) | DeformationFieldTest() = default;
    method SetUp (line 35) | void SetUp() override {
    method TearDown (line 66) | void TearDown() override {}
  function TEST_F (line 92) | TEST_F(DeformationFieldTest, ClearTest) {
  function TEST_F (line 111) | TEST_F(DeformationFieldTest, TsdfGradientTest) {
  function TEST_F (line 152) | TEST_F(DeformationFieldTest, UniformFieldJacobianTest) {
  function TEST_F (line 199) | TEST_F(DeformationFieldTest, JacobianTestSimple) {
  function TEST_F (line 252) | TEST_F(DeformationFieldTest, JacobianLaplacianTestComplicated) {

FILE: test/main.cpp
  function main (line 3) | int main(int argc, char **argv) {

FILE: test/reductions_test.cpp
  class ReductionsTest (line 19) | class ReductionsTest : public ::testing::Test {
    method ReductionsTest (line 25) | ReductionsTest() = default;
    method SetUp (line 35) | void SetUp() override {
    method TearDown (line 61) | void TearDown() override {}
  function TEST_F (line 86) | TEST_F(ReductionsTest, DataTermTest) {

FILE: test/solver_test.cpp
  class SolverTest (line 19) | class SolverTest : public ::testing::Test {
    method SolverTest (line 25) | SolverTest() = default;
    method SetUp (line 35) | void SetUp() override {
    method TearDown (line 81) | void TearDown() override {}
  function TEST_F (line 109) | TEST_F(SolverTest, AlignmentTestSphereTranslation) {
  function TEST_F (line 135) | TEST_F(SolverTest, AlignmentTestSphereExpanding) {
  function TEST_F (line 162) | TEST_F(SolverTest, SerialAlignmentTest) {

Download .json

Condensed preview — 73 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (436K chars).

[
  {
    "path": ".clang-format",
    "chars": 2995,
    "preview": "---\n# ---------------------------------------------------------------------------- #\n#                                CL"
  },
  {
    "path": ".gitignore",
    "chars": 1164,
    "preview": "# Created by https://www.gitignore.io/api/vim,osx,c++\n/data\n### C++ ###\n# Prerequisites\n*.d\n\n# Compiled Object files\n*.s"
  },
  {
    "path": ".gitlab-ci.yml",
    "chars": 11271,
    "preview": "# ---------------------------------------------------------------------------- #\n#                               GITLAB-"
  },
  {
    "path": ".lintflags",
    "chars": 1675,
    "preview": "# ---------------------------------------------------------------------------- #\n#                                   FIL"
  },
  {
    "path": ".lintignore",
    "chars": 224,
    "preview": "# Ignore the test directory when linting\n./tests/*\n\n# Ignore all git files when linting\n./.git/*\n\n# Ignore all scripts\n."
  },
  {
    "path": "CMakeLists.txt",
    "chars": 2736,
    "preview": "# ---------------------------------------------------------------------------- #\n# CMAKE INIT CONFIGURATIONS\n# ---------"
  },
  {
    "path": "LICENSE.md",
    "chars": 1492,
    "preview": "Copyright (c) 2012, Anatoly Baksheev\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or wi"
  },
  {
    "path": "README.md",
    "chars": 1667,
    "preview": "sobfu\n============\nsoftware for 3d reconstruction of non-rigidly deforming scenes using depth data, based on sobolevfusi"
  },
  {
    "path": "cmake/Targets.cmake",
    "chars": 3348,
    "preview": "################################################################################################\n# short command to setu"
  },
  {
    "path": "cmake/Utils.cmake",
    "chars": 3083,
    "preview": "################################################################################################\n# short alias for CMake"
  },
  {
    "path": "include/kfusion/cuda/device.hpp",
    "chars": 3463,
    "preview": "#pragma once\n\n#include <cuda_fp16.h>\n#include <kfusion/cuda/temp_utils.hpp>\n#include <kfusion/precomp.hpp>\n\n////////////"
  },
  {
    "path": "include/kfusion/cuda/device_array.hpp",
    "chars": 13400,
    "preview": "#pragma once\n\n#include <kfusion/cuda/device_memory.hpp>\n#include <kfusion/exports.hpp>\n\n#include <vector>\n\nnamespace kfu"
  },
  {
    "path": "include/kfusion/cuda/device_memory.hpp",
    "chars": 9398,
    "preview": "#pragma once\n\n#include <kfusion/cuda/kernel_containers.hpp>\n#include <kfusion/exports.hpp>\n\nnamespace kfusion {\nnamespac"
  },
  {
    "path": "include/kfusion/cuda/imgproc.hpp",
    "chars": 1984,
    "preview": "#pragma once\n\n/* kinfu includes */\n#include <kfusion/types.hpp>\n\n/* pcl includes */\n#include <pcl/PolygonMesh.h>\n\nnamesp"
  },
  {
    "path": "include/kfusion/cuda/kernel_containers.hpp",
    "chars": 2107,
    "preview": "#pragma once\n\n#if defined(__CUDACC__)\n#define __kf_hdevice__ __host__ __device__ __forceinline__\n#define __kf_device__ _"
  },
  {
    "path": "include/kfusion/cuda/marching_cubes.hpp",
    "chars": 1647,
    "preview": "#pragma once\n\n/* eigen includes */\n#include <Eigen/Core>\n\n/* kinfu includes */\n#include <kfusion/cuda/device_array.hpp>\n"
  },
  {
    "path": "include/kfusion/cuda/projective_icp.hpp",
    "chars": 1538,
    "preview": "#pragma once\n\n#include <kfusion/types.hpp>\n\nnamespace kfusion {\nnamespace cuda {\nclass ProjectiveICP {\npublic:\n    enum "
  },
  {
    "path": "include/kfusion/cuda/temp_utils.hpp",
    "chars": 21104,
    "preview": "#pragma once\n\n#include <cuda.h>\n#include <kfusion/cuda/kernel_containers.hpp>\n\n#define FULL_MASK 0xffffffff\n\nnamespace k"
  },
  {
    "path": "include/kfusion/cuda/texture_binder.hpp",
    "chars": 1987,
    "preview": "#pragma once\n\n#include <kfusion/cuda/device_array.hpp>\n#include <kfusion/safe_call.hpp>\n\nnamespace kfusion {\nnamespace c"
  },
  {
    "path": "include/kfusion/cuda/tsdf_volume.hpp",
    "chars": 2097,
    "preview": "#pragma once\n\n/* kinfu includes */\n#include <kfusion/types.hpp>\n\n/* pcl includes */\n#include <pcl/common/geometry.h>\n\n/*"
  },
  {
    "path": "include/kfusion/exports.hpp",
    "chars": 173,
    "preview": "#pragma once\n\n#if (defined WIN32 || defined _WIN32 || defined WINCE) && defined KFUSION_API_EXPORTS\n#define KF_EXPORTS _"
  },
  {
    "path": "include/kfusion/internal.hpp",
    "chars": 7831,
    "preview": "#pragma once\n\n/* kinfu includes */\n#include <kfusion/cuda/device_array.hpp>\n#include <kfusion/safe_call.hpp>\n\n/* pcl inc"
  },
  {
    "path": "include/kfusion/kinfu.hpp",
    "chars": 2869,
    "preview": "#pragma once\n\n/* boost includes */\n#include <boost/filesystem.hpp>\n\n/* kinfu includes */\n#include <kfusion/cuda/marching"
  },
  {
    "path": "include/kfusion/precomp.hpp",
    "chars": 848,
    "preview": "#pragma once\n\n/* cuda includes */\n#include <vector_functions.h>\n\n/* kinfu includes */\n#include <kfusion/cuda/imgproc.hpp"
  },
  {
    "path": "include/kfusion/safe_call.hpp",
    "chars": 878,
    "preview": "#pragma once\n\n#include <cuda_runtime_api.h>\n\nnamespace kfusion {\nnamespace cuda {\nvoid error(const char *error_string, c"
  },
  {
    "path": "include/kfusion/types.hpp",
    "chars": 2395,
    "preview": "#pragma once\n\n/* cuda includes */\n#include <vector_functions.h>\n\n/* kinfu includes */\n#include <kfusion/cuda/device_arra"
  },
  {
    "path": "include/sobfu/cuda/utils.hpp",
    "chars": 15237,
    "preview": "#pragma once\n\n/* cuda includes */\n#include <cuda.h>\n#include <math_constants.h>\n\n/* kinfu includes */\n#include <kfusion/"
  },
  {
    "path": "include/sobfu/params.hpp",
    "chars": 1286,
    "preview": "#pragma once\n\n/* kinfu includes */\n#include <kfusion/types.hpp>\n\n/* sobfu parameters */\nstruct Params {\n    int cols = 6"
  },
  {
    "path": "include/sobfu/precomp.hpp",
    "chars": 621,
    "preview": "#pragma once\n\n/* cuda includes */\n#include <cuda.h>\n\n/* kinfu includes */\n#include <kfusion/internal.hpp>\n\n/* checks if "
  },
  {
    "path": "include/sobfu/reductor.hpp",
    "chars": 2605,
    "preview": "#pragma once\n\n/* kinfu includes */\n#include <kfusion/cuda/tsdf_volume.hpp>\n\n/* sobfu incldues */\n#include <sobfu/precomp"
  },
  {
    "path": "include/sobfu/scalar_fields.hpp",
    "chars": 1738,
    "preview": "#pragma once\n\n/* sobfu includes */\n#include <sobfu/params.hpp>\n#include <sobfu/precomp.hpp>\n\n/* kinfu includes */\n#inclu"
  },
  {
    "path": "include/sobfu/sob_fusion.hpp",
    "chars": 2159,
    "preview": "#pragma once\n\n/* sobfu includes */\n#include <sobfu/params.hpp>\n#include <sobfu/solver.hpp>\n\n/* kinfu includes */\n#includ"
  },
  {
    "path": "include/sobfu/solver.hpp",
    "chars": 4953,
    "preview": "#pragma once\n\n/* kinfu incldues */\n#include <kfusion/cuda/tsdf_volume.hpp>\n#include <kfusion/safe_call.hpp>\n\n/* sobfu in"
  },
  {
    "path": "include/sobfu/vector_fields.hpp",
    "chars": 8124,
    "preview": "#pragma once\n\n/* sobfu includes */\n#include <sobfu/params.hpp>\n#include <sobfu/precomp.hpp>\n\n/* kinfu includes */\n#inclu"
  },
  {
    "path": "params/params_advent.ini",
    "chars": 454,
    "preview": "# TSDF\nVOL_DIMS_X=64\nVOL_DIMS_Y=64\nVOL_DIMS_Z=64\n\nVOL_SIZE_X=0.5\nVOL_SIZE_Y=0.5\nVOL_SIZE_Z=0.5\n\nTSDF_TRUNC_DIST=5\nETA=2\n"
  },
  {
    "path": "params/params_boxing.ini",
    "chars": 489,
    "preview": "# TSDF\nVOL_DIMS_X=128\nVOL_DIMS_Y=128\nVOL_DIMS_Z=128\n\nVOL_SIZE_X=0.75\nVOL_SIZE_Y=0.75\nVOL_SIZE_Z=0.75\n\nTSDF_TRUNC_DIST=48"
  },
  {
    "path": "params/params_hat.ini",
    "chars": 458,
    "preview": "# TSDF\nVOL_DIMS_X=128\nVOL_DIMS_Y=128\nVOL_DIMS_Z=128\n\nVOL_SIZE_X=1.20\nVOL_SIZE_Y=1.20\nVOL_SIZE_Z=1.20\n\nTSDF_TRUNC_DIST=10"
  },
  {
    "path": "params/params_ours.ini",
    "chars": 332,
    "preview": "# intrinsic parameters for our intel realsense sr300\n# ---rgb---\n# kinfuParams.intr = kfusion::Intr(622.011f, 622.012f, "
  },
  {
    "path": "params/params_snoopy.ini",
    "chars": 469,
    "preview": "# TSDF\nVOL_DIMS_X=128\nVOL_DIMS_Y=128\nVOL_DIMS_Z=128\n\nVOL_SIZE_X=0.9\nVOL_SIZE_Y=0.9\nVOL_SIZE_Z=0.9\n\nTSDF_TRUNC_DIST=10\nET"
  },
  {
    "path": "params/params_umbrella.ini",
    "chars": 474,
    "preview": "# TSDF\nVOL_DIMS_X=128\nVOL_DIMS_Y=128\nVOL_DIMS_Z=128\n\nVOL_SIZE_X=1.0\nVOL_SIZE_Y=1.0\nVOL_SIZE_Z=1.0\n\nTSDF_TRUNC_DIST=8\nETA"
  },
  {
    "path": "setup.sh",
    "chars": 1546,
    "preview": "#!/bin/sh\n\nRUN_CMAKE=false\nRUN_MAKE=false\nRUN_MAKE_TESTS=false\n\nusage() {\n    echo \"USAGE: source setup.sh [options]\"\n  "
  },
  {
    "path": "src/CMakeLists.txt",
    "chars": 260,
    "preview": "# ---------------------------------------------------------------------------- #\n# BUILD ALL SUBPROJECTS\n# -------------"
  },
  {
    "path": "src/apps/CMakeLists.txt",
    "chars": 302,
    "preview": "# Link source files to executables\nadd_application(app)\n\n# Link to all libraries\ntarget_link_libraries(app\n    sobfu\n   "
  },
  {
    "path": "src/apps/demo.cpp",
    "chars": 24750,
    "preview": "/* sobfu includes a*/\n#include <sobfu/sob_fusion.hpp>\n\n/* boost includes */\n#include <boost/program_options.hpp>\n\n/* ope"
  },
  {
    "path": "src/kfusion/CMakeLists.txt",
    "chars": 185,
    "preview": "# Build all kfusion source files into library kfusion\nadd_module_library(kfusion)\n\n# Link to kfusion library\ntarget_link"
  },
  {
    "path": "src/kfusion/core.cpp",
    "chars": 10449,
    "preview": "#include <kfusion/kinfu.hpp>\n#include <kfusion/safe_call.hpp>\n\n#include <cuda.h>\n#include <cstdio>\n#include <iostream>\n\n"
  },
  {
    "path": "src/kfusion/cuda/imgproc.cu",
    "chars": 15618,
    "preview": "#include <kfusion/cuda/device.hpp>\n\n////////////////////////////////////////////////////////////////////////////////////"
  },
  {
    "path": "src/kfusion/cuda/marching_cubes.cu",
    "chars": 11215,
    "preview": "#include <kfusion/cuda/device.hpp>\n\n#include <thrust/device_ptr.h>\n#include <thrust/scan.h>\n\n#define FULL_MASK 0xfffffff"
  },
  {
    "path": "src/kfusion/cuda/proj_icp.cu",
    "chars": 11575,
    "preview": "#include <kfusion/cuda/device.hpp>\n#include <kfusion/cuda/texture_binder.hpp>\n\nnamespace kfusion {\nnamespace device {\nte"
  },
  {
    "path": "src/kfusion/cuda/tsdf_volume.cu",
    "chars": 13993,
    "preview": "/* kfusion includes */\n#include <kfusion/cuda/device.hpp>\n#include <kfusion/cuda/texture_binder.hpp>\n\n/* sobfu includes "
  },
  {
    "path": "src/kfusion/device_memory.cpp",
    "chars": 7673,
    "preview": "#include <cassert>\n#include <cstdlib>\n#include <iostream>\n#include <kfusion/cuda/device_memory.hpp>\n#include <kfusion/sa"
  },
  {
    "path": "src/kfusion/imgproc.cpp",
    "chars": 4777,
    "preview": "#include <kfusion/precomp.hpp>\n\nvoid kfusion::cuda::depthBilateralFilter(const Depth &in, Depth &out, int kernel_size, f"
  },
  {
    "path": "src/kfusion/kinfu.cpp",
    "chars": 3527,
    "preview": "#include <kfusion/internal.hpp>\n#include <kfusion/precomp.hpp>\n\nusing namespace std;\nusing namespace kfusion;\nusing name"
  },
  {
    "path": "src/kfusion/marching_cubes.cpp",
    "chars": 29157,
    "preview": "/* kinfu includes */\n#include <kfusion/cuda/marching_cubes.hpp>\n#include <kfusion/internal.hpp>\n#include <kfusion/kinfu."
  },
  {
    "path": "src/kfusion/precomp.cpp",
    "chars": 3185,
    "preview": "#include <kfusion/internal.hpp>\n#include <kfusion/precomp.hpp>\n\n////////////////////////////////////////////////////////"
  },
  {
    "path": "src/kfusion/projective_icp.cpp",
    "chars": 7431,
    "preview": "#include <kfusion/precomp.hpp>\n\nusing namespace kfusion;\nusing namespace kfusion::cuda;\n\nusing namespace std;\n//////////"
  },
  {
    "path": "src/kfusion/tsdf_volume.cpp",
    "chars": 6422,
    "preview": "#include <kfusion/precomp.hpp>\n\nusing namespace kfusion;\nusing namespace kfusion::cuda;\n\n///////////////////////////////"
  },
  {
    "path": "src/sobfu/CMakeLists.txt",
    "chars": 223,
    "preview": "# Build all sobfu source files into library dynfy\nadd_module_library(sobfu)\n\n# Link to sobfu library\ntarget_link_librari"
  },
  {
    "path": "src/sobfu/cuda/reductor.cu",
    "chars": 29796,
    "preview": "/* sobfu includes */\n#include <sobfu/cuda/utils.hpp>\n#include <sobfu/reductor.hpp>\n\n#define FULL_MASK 0xffffffff\n\n/*\n * "
  },
  {
    "path": "src/sobfu/cuda/scalar_fields.cu",
    "chars": 8527,
    "preview": "/* sobfu includes */\n#include <sobfu/cuda/utils.hpp>\n#include <sobfu/scalar_fields.hpp>\n\n#define FULL_MASK 0xffffffff\n\n/"
  },
  {
    "path": "src/sobfu/cuda/solver.cu",
    "chars": 19766,
    "preview": "/* sobfu includes */\n#include <sobfu/cuda/utils.hpp>\n#include <sobfu/solver.hpp>\n\n/* kinfu includes */\n#include <kfusion"
  },
  {
    "path": "src/sobfu/cuda/vector_fields.cu",
    "chars": 15800,
    "preview": "/* sobfu includes */\n#include <sobfu/cuda/utils.hpp>\n#include <sobfu/vector_fields.hpp>\n\n/* kinfu includes */\n#include <"
  },
  {
    "path": "src/sobfu/precomp.cpp",
    "chars": 1297,
    "preview": "/* sobfu includes */\n#include <sobfu/precomp.hpp>\n\n/* checks if x is a power of 2 */\nbool isPow2(unsigned int x) { retur"
  },
  {
    "path": "src/sobfu/reductor.cpp",
    "chars": 3038,
    "preview": "/* sobfu incldues */\n#include <sobfu/reductor.hpp>\n\n/*\n * REDUCTOR\n */\n\nsobfu::device::Reductor::Reductor(int3 dims_, fl"
  },
  {
    "path": "src/sobfu/scalar_fields.cpp",
    "chars": 1442,
    "preview": "#include <sobfu/scalar_fields.hpp>\n\n/*\n * SCALAR FIELD\n */\n\nsobfu::cuda::ScalarField::ScalarField(cv::Vec3i dims_) {\n   "
  },
  {
    "path": "src/sobfu/sob_fusion.cpp",
    "chars": 5491,
    "preview": "#include <sobfu/sob_fusion.hpp>\n\nSobFusion::SobFusion(const Params &params) : frame_counter_(0), params(params) {\n    in"
  },
  {
    "path": "src/sobfu/solver.cpp",
    "chars": 9460,
    "preview": "#include <sobfu/solver.hpp>\n\n/*\n * SOLVER\n */\n\nsobfu::cuda::Solver::Solver(Params& params) {\n    /*\n     * PARAMETERS\n  "
  },
  {
    "path": "src/sobfu/vector_fields.cpp",
    "chars": 5584,
    "preview": "/* sobfu includes */\n#include <sobfu/vector_fields.hpp>\n\n/*\n * VECTOR FIELD\n */\n\nsobfu::cuda::VectorField::VectorField(c"
  },
  {
    "path": "test/CMakeLists.txt",
    "chars": 2115,
    "preview": "# ---------------------------------------------------------------------------- #\n# GOOGLE TEST DEPENDECIES\n# -----------"
  },
  {
    "path": "test/deformation_field_test.cpp",
    "chars": 12213,
    "preview": "/* gtest includes */\n#include <gtest/gtest.h>\n\n/* sobfu includes */\n#include <sobfu/params.hpp>\n#include <sobfu/solver.h"
  },
  {
    "path": "test/main.cpp",
    "chars": 134,
    "preview": "#include \"gtest/gtest.h\"\n\nint main(int argc, char **argv) {\n    ::testing::InitGoogleTest(&argc, argv);\n    return RUN_A"
  },
  {
    "path": "test/reductions_test.cpp",
    "chars": 3477,
    "preview": "/* gtest includes */\n#include <gtest/gtest.h>\n\n/* sobfu includes */\n#include <sobfu/params.hpp>\n#include <sobfu/solver.h"
  },
  {
    "path": "test/solver_test.cpp",
    "chars": 6109,
    "preview": "/* gtest includes */\n#include <gtest/gtest.h>\n\n/* sobfu includes */\n#include <sobfu/params.hpp>\n#include <sobfu/solver.h"
  }
]

About this extraction

This page contains the full source code of the dgrzech/sobfu GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 73 files (409.5 KB), approximately 126.9k tokens, and a symbol index with 329 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Extract another repo