Repository: dgrzech/sobfu Branch: master Commit: c83646582a14 Files: 73 Total size: 409.5 KB Directory structure: gitextract_kh2b_hh_/ ├── .clang-format ├── .gitignore ├── .gitlab-ci.yml ├── .lintflags ├── .lintignore ├── CMakeLists.txt ├── LICENSE.md ├── README.md ├── cmake/ │ ├── Targets.cmake │ └── Utils.cmake ├── include/ │ ├── kfusion/ │ │ ├── cuda/ │ │ │ ├── device.hpp │ │ │ ├── device_array.hpp │ │ │ ├── device_memory.hpp │ │ │ ├── imgproc.hpp │ │ │ ├── kernel_containers.hpp │ │ │ ├── marching_cubes.hpp │ │ │ ├── projective_icp.hpp │ │ │ ├── temp_utils.hpp │ │ │ ├── texture_binder.hpp │ │ │ └── tsdf_volume.hpp │ │ ├── exports.hpp │ │ ├── internal.hpp │ │ ├── kinfu.hpp │ │ ├── precomp.hpp │ │ ├── safe_call.hpp │ │ └── types.hpp │ └── sobfu/ │ ├── cuda/ │ │ └── utils.hpp │ ├── params.hpp │ ├── precomp.hpp │ ├── reductor.hpp │ ├── scalar_fields.hpp │ ├── sob_fusion.hpp │ ├── solver.hpp │ └── vector_fields.hpp ├── params/ │ ├── params_advent.ini │ ├── params_boxing.ini │ ├── params_hat.ini │ ├── params_ours.ini │ ├── params_snoopy.ini │ └── params_umbrella.ini ├── setup.sh ├── src/ │ ├── CMakeLists.txt │ ├── apps/ │ │ ├── CMakeLists.txt │ │ └── demo.cpp │ ├── kfusion/ │ │ ├── CMakeLists.txt │ │ ├── core.cpp │ │ ├── cuda/ │ │ │ ├── imgproc.cu │ │ │ ├── marching_cubes.cu │ │ │ ├── proj_icp.cu │ │ │ └── tsdf_volume.cu │ │ ├── device_memory.cpp │ │ ├── imgproc.cpp │ │ ├── kinfu.cpp │ │ ├── marching_cubes.cpp │ │ ├── precomp.cpp │ │ ├── projective_icp.cpp │ │ └── tsdf_volume.cpp │ └── sobfu/ │ ├── CMakeLists.txt │ ├── cuda/ │ │ ├── reductor.cu │ │ ├── scalar_fields.cu │ │ ├── solver.cu │ │ └── vector_fields.cu │ ├── precomp.cpp │ ├── reductor.cpp │ ├── scalar_fields.cpp │ ├── sob_fusion.cpp │ ├── solver.cpp │ └── vector_fields.cpp └── test/ ├── CMakeLists.txt ├── deformation_field_test.cpp ├── main.cpp ├── reductions_test.cpp └── solver_test.cpp ================================================ FILE CONTENTS ================================================ ================================================ FILE: .clang-format ================================================ --- # ---------------------------------------------------------------------------- # # CLANG-FORMAT # # v6.0.x # # ---------------------------------------------------------------------------- # # For documentation: https://clang.llvm.org/docs/ClangFormatStyleOptions.html # Language: Cpp # BasedOnStyle: Google AccessModifierOffset: -4 AlignAfterOpenBracket: Align AlignConsecutiveAssignments: true AlignConsecutiveDeclarations: false AlignEscapedNewlinesLeft: true AlignOperands: true AlignTrailingComments: true AllowAllParametersOfDeclarationOnNextLine: true AllowShortBlocksOnASingleLine: false AllowShortCaseLabelsOnASingleLine: false AllowShortFunctionsOnASingleLine: All AllowShortIfStatementsOnASingleLine: false AllowShortLoopsOnASingleLine: false AlwaysBreakAfterDefinitionReturnType: None AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: true AlwaysBreakTemplateDeclarations: true BinPackArguments: true BinPackParameters: true BraceWrapping: AfterClass: false AfterControlStatement: false AfterEnum: false AfterFunction: false AfterNamespace: false AfterObjCDeclaration: false AfterStruct: false AfterUnion: false BeforeCatch: false BeforeElse: false IndentBraces: false BreakBeforeBinaryOperators: None BreakBeforeBraces: Attach BreakBeforeTernaryOperators: true BreakConstructorInitializersBeforeComma: false ColumnLimit: 120 CommentPragmas: '^ IWYU pragma:' ConstructorInitializerAllOnOneLineOrOnePerLine: true ConstructorInitializerIndentWidth: 4 ContinuationIndentWidth: 4 Cpp11BracedListStyle: true DerivePointerAlignment: true DisableFormat: false ExperimentalAutoDetectBinPacking: false ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] IncludeCategories: - Regex: '^<.*\.h>' Priority: 1 - Regex: '^<.*' Priority: 2 - Regex: '.*' Priority: 3 IndentCaseLabels: true IndentWidth: 4 IndentWrappedFunctionNames: false KeepEmptyLinesAtTheStartOfBlocks: false MacroBlockBegin: '' MacroBlockEnd: '' MaxEmptyLinesToKeep: 1 NamespaceIndentation: None ObjCBlockIndentWidth: 2 ObjCSpaceAfterProperty: false ObjCSpaceBeforeProtocolList: false PenaltyBreakBeforeFirstCallParameter: 1 PenaltyBreakComment: 300 PenaltyBreakFirstLessLess: 120 PenaltyBreakString: 1000 PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 200 PointerAlignment: Left ReflowComments: true SortIncludes: true SpaceAfterCStyleCast: true SpaceBeforeAssignmentOperators: true SpaceBeforeParens: ControlStatements SpaceInEmptyParentheses: false SpacesBeforeTrailingComments: 2 SpacesInAngles: false SpacesInContainerLiterals: true SpacesInCStyleCastParentheses: false SpacesInParentheses: false SpacesInSquareBrackets: false Standard: Cpp11 TabWidth: 8 UseTab: Never ... ================================================ FILE: .gitignore ================================================ # Created by https://www.gitignore.io/api/vim,osx,c++ /data ### C++ ### # Prerequisites *.d # Compiled Object files *.slo *.lo *.o *.obj # Precompiled Headers *.gch *.pch # Compiled Dynamic libraries *.so *.dylib *.dll # Fortran module files *.mod *.smod # Compiled Static libraries *.lai *.la *.a *.lib # Executables *.exe *.out *.app ### OSX ### *.DS_Store .AppleDouble .LSOverride # Icon must end with two \r Icon # Thumbnails ._* # Files that might appear in the root of a volume .DocumentRevisions-V100 .fseventsd .Spotlight-V100 .TemporaryItems .Trashes .VolumeIcon.icns .com.apple.timemachine.donotpresent # Directories potentially created on remote AFP share .AppleDB .AppleDesktop Network Trash Folder Temporary Items .apdisk ### Vim ### # swap [._]*.s[a-v][a-z] [._]*.sw[a-p] [._]s[a-v][a-z] [._]sw[a-p] # session Session.vim # temporary .netrwhist *~ # auto-generated tag files tags # End of https://www.gitignore.io/api/vim,osx,c++ # Ignore build directory build files # ignore remote sync config file .remote-sync.json # Ignore frames frames # ignore helper MATLAB script to display point clouds show_pcl.m # ignore output folder out ================================================ FILE: .gitlab-ci.yml ================================================ # ---------------------------------------------------------------------------- # # GITLAB-RUNNER-CI # # ---------------------------------------------------------------------------- # # Version: v9.5.4 # Running on graphic01.doc.ic.ac.uk # ---------------------------------------------------------------------------- # # PIPELINE # # ---------------------------------------------------------------------------- # # The CI pipeline is divided into the following stages: # Lint: Performs linter checks on the repo's code # Build: Builds the project # Test: Runs the project's tests # Deploy: Deploys the project to a given directory # Cleanup: Removes all artifacts required for the CI stages stages: - Setup - Lint - Build - Test - Deploy - Cleanup # env_setup defines a number of environment variables which can be called by # various before_scripts of the project. # 1) Exporting the PATH is necessary to enable linking of the custom installed # binaries in the group's shared directory. # 2) setup.sh makes sure all Cuda related binaries are setup correctly. # This includes, but isn't limited to, CUDA itself and OpenGL. .env_setup: &env_setup | source setup.sh --set-paths # Variables for use in script variables: CONTAINER_IMAGE: g1736211/dynfu DEV_IMAGE: g1736211/dynfu:dev TAG_IMAGE: g1736211/dynfu:${CI_COMMIT_REF_SLUG} # ---------------------------------------------------------------------------- # # SETUP # # ---------------------------------------------------------------------------- # # The Setup stage sets all the nevironment variables and paths using cmake. # > cmake < # Cmake will generate a Makefile for the project linking all needed dependencies # Cmake stage is required to not fail, in which case it would fail the pipeline. cmake: stage: Setup # Before attempting to cmake env variables must be set to link the binaries # Call .setup.sh which creates build and setup the dependencies # Cmake will also build inside the build/ directory before_script: - rm -rf build || true - source setup.sh --set-paths # Build/ directory is cached for a given commit SHA to make sure all the tasks # within a same pipeline will have access to the correctly built project cache: key: "$CI_COMMIT_SHA" paths: - build/ # Cmake requires two additional flags: # 1) CMAKE_PREFIX enables it to find the custom built binaries # 2) CMAKE_CXX_FLAGS are necessary to be set as otherwise BASH doesn't seem to # set the c++ standard correctly (i.e. will throw a number of errors) # 3) [Others] script: - source setup.sh --cmake -i # ---------------------------------------------------------------------------- # # LINT # # ---------------------------------------------------------------------------- # # The Lint stage runs the linter on the project's files. # > clang_format < # Clang formatting will ensure that the latest files have been formatted with # clang-styling. This step does not fix linter errors, it will just point them # out to the pipeline, failing it. clang_format: stage: Lint # Set env variables before testing should we need to access CUDA of OpenGL # headers. before_script: - *env_setup # Files are considered to be source only if *.cpp or *.hpp. # Excluded files or directories must be set manually in .lintignore script: # Print out the configuration file variables - /vol/project/2017/362/g1736211/bin/clang-format -dump-config # Attempt to check if clang-format had been run before saving - ./scripts/.format_bash # > lint < # The linter used for the project is clang-tidy. # 1) Linting stage must run after build due to the numerous dependencies created # by Cmake. All these dependencies are logged into compile_commands.json and # are referenced by clang-tidy when linting (i.e. to scan headers). # 2) The run-clang-tidy python script must be linked to the relevant clang-tidy # executable as it cannot be found in it's canonical place (i.e /usr/bin). # 3) run-clang-tidy.py will accept all clang-tidy flags, it has extra value as # it can use the compile_commands.json information. # # USED FLAGS: # 1) -list-checks : Lists all the enabled checks. # 2) -checks= : Sets different checkstyles for the source files # 3) -p : Sets the path for the build directory containing the # compile_commands.json information. # 4) -warnings-as-errors=* : Treats all warnings as errors. # # Used checks can be found in .lintflags clang_tidy: stage: Lint # Set env variables before testing should we need to access CUDA of OpenGL # headers. before_script: - *env_setup # Linting must occur on the newly built build/ binaries. cache: key: "$CI_COMMIT_SHA" paths: - build/ policy: pull # Files are considered to be source only if *.cpp or *.hpp. # Excluded files or directories must be set manually in .lintignore script: # Print out the list of enabled checks - /vol/project/2017/362/g1736211/bin/clang-tidy `./scripts/.flags_bash` -list-checks # Print out the list of files which will be checked for linting - ./scripts/.find_bash # Perform the linting with the above configuratiions - /vol/project/2017/362/g1736211/bin/clang-tidy -p build `./scripts/.diff_tidy_bash` `./scripts/.flags_bash` `./scripts/.find_bash` # ---------------------------------------------------------------------------- # # BUILD # # ---------------------------------------------------------------------------- # # The build project actually generates the binaries for the project. # According to the flags set during the CMake stage, tests are built as well in # their respective directories. # > make < # The make stage will run the Makefile generated by cmake in the build/ dir. # The make stage is required not to fail. make: stage: Build # Before attempting to make env variables must be set to make sure all # binaries referenced by the Makefile can be accessed. # Make will also build the executables inside the build/ directory before_script: - *env_setup # Build/ directory is cached for a given commit SHA to make sure all the tasks # within a same pipeline will have access to the correctly built project cache: key: "$CI_COMMIT_SHA" paths: - build/ # Run simple make from the Makefile on four processes. script: - source setup.sh --make # > docker_image_build < # The docker_image_build stage creates the Docker image for an EC2 instance # on a docker-compatible runner docker_image_build: stage: Build tags: - docker script: - docker build --build-arg CUDA_GENERATION=Maxwell -t $TAG_IMAGE . except: - master # > docker_image_build_master < # The docker_image_build_master stage creates the Docker image for all architectures # on a docker-compatible runner docker_image_build_master: stage: Build tags: - docker script: - docker build --build-arg CUDA_GENERATION=Auto -t $TAG_IMAGE . only: - master # ---------------------------------------------------------------------------- # # TEST # # ---------------------------------------------------------------------------- # # The Test stage will perform a number of tests on the Project. # > gtest < # The gtest stage runs the GTest suite present within the library. # GTest binaries can be found in the group shared directory. gtest: stage: Test # Set env variables before testing should we need to access CUDA of OpenGL # headers. before_script: - *env_setup # Testing must occur on the newly built build/ binaries. cache: key: "$CI_COMMIT_SHA" paths: - build/ policy: pull # Run the gtests script: - ./build/bin/dynfu_test # > coverage < # The coverage stage runs gcov (and on top of that lcov) to generate a coverage # report of the test for the source files. coverage: stage: Test # Set env variables before testing should we need to access CUDA of OpenGL # headers. before_script: - *env_setup # Coverage report is constructed for the newly build binaries cache: key: "$CI_COMMIT_SHA" paths: - build/ policy: pull # Generate the coverage report script: - source setup.sh --coverage # ---------------------------------------------------------------------------- # # DEPLOY # # ---------------------------------------------------------------------------- # # The Deploy stage deploys newly built and tested binaries to a given location # on disk. # > deploy < # The deploy stage will deploy the newly built project binaries # to /data/dynfu. deploy: stage: Deploy # The latest build build/ must be retrieved from cache cache: key: "$CI_COMMIT_SHA" paths: - build/ policy: pull # Clean the output distributed library directory to clone new data into it before_script: - rm -rf /data/dynfu - mkdir /data/dynfu # Copy the latest build BINARIES to /data/ script: - cp -R build/bin/* /data/dynfu # > docker_image__deploy_dev < # Deploys to Docker Hub docker_image_deploy_dev: stage: Deploy tags: - docker script: - docker login -u $DOCKER_HUB_LOGIN -p $DOCKER_HUB_PASSWORD - docker tag $TAG_IMAGE $DEV_IMAGE - docker push $DEV_IMAGE only: - dev # > docker_image_deploy < # Deploys to Docker Hub docker_image_deploy: stage: Deploy tags: - docker script: - docker login -u $DOCKER_HUB_LOGIN -p $DOCKER_HUB_PASSWORD - docker tag $TAG_IMAGE $CONTAINER_IMAGE - docker push $CONTAINER_IMAGE only: - master # > test_dynfu_on_ec2 < # Trigger testing on ec2 instance test_dynfu_on_ec2: stage: Deploy tags: - docker before_script: - pip install awscli - export PATH="${PATH}:~/.local/bin" script: - AWS_ACCESS_KEY_ID=${UPDATE_LAMBDA_KEY} AWS_SECRET_ACCESS_KEY=${UPDATE_LAMBDA_PASS} AWS_DEFAULT_REGION=eu-west-1 aws lambda invoke --function-name dynamic-fusion-trigger-testing /tmp/lambda-out only: - dev # ---------------------------------------------------------------------------- # # CLEANUP # # ---------------------------------------------------------------------------- # # The Cleanup stage makes sure the cache storage is cleaned for a given # pipeline in the runners directories. # > cache_cleanup < # It runs regardless of failure or success, and thus has the aim of cleaning # up the testing environment. In particular this script must remove unused # cache artifacts that were stored within one pipeline. cache_cleanup: stage: Cleanup script: - echo "Present Cache:" && ls /home/gitlab-runner/cache/g1736211/dynfu/ - rm -rf "/home/gitlab-runner/cache/g1736211/dynfu/$CI_COMMIT_SHA" when: always ================================================ FILE: .lintflags ================================================ # ---------------------------------------------------------------------------- # # FILE FORMAT # # ---------------------------------------------------------------------------- # # Available checks can be found at http://clang.llvm.org/extra/clang-tidy/ # # To add a new check add it normally # # To PREVENT the linter from using a check, prefix with '-' # # Extra links: https://clang.llvm.org/extra/clang-tidy/checks/list.html # # Clang-tidy docs: https://clang.llvm.org/docs/ClangFormat.html # # ---------------------------------------------------------------------------- # # Enforce google standard google-* # Use High Integrity C++ Coding Standard hicpp-* # Use LLVM Coding Standard llvm-* # Use all miscellaneous flags misc-* # Apply readability constraints readability-* # Apply bugprone checks bugprone-* # ---------------------------------------------------------------------------- # # The following are check DISABLES for certain behaviours # Allow un-use of use auto -hicpp-use-auto # Allow string comparison .compare and not only via == or != -misc-string-compare # Allow warnings for signed bitwise operators -hicpp-signed-bitwise # Allow printfs calls in C style -hicpp-vararg # Allow implicit bool casts and conversions -readability-implicit-bool-cast -readability-implicit-bool-conversion # Prevent conflicting standards -llvm-include-order # Deprecated checks -hicpp-special-member-functions -hicpp-use-equals-delete -hicpp-no-array-decay -readability-delete-null-pointer -hicpp-member-init ================================================ FILE: .lintignore ================================================ # Ignore the test directory when linting ./tests/* # Ignore all git files when linting ./.git/* # Ignore all scripts ./scripts/* # Ignore the cmake directory ./cmake/* # Ignore all contetnts of build directory ./build/* ================================================ FILE: CMakeLists.txt ================================================ # ---------------------------------------------------------------------------- # # CMAKE INIT CONFIGURATIONS # ---------------------------------------------------------------------------- # cmake_minimum_required(VERSION 2.8.8) # ---------------------------------------------------------------------------- # # SET THE PROJECT # ---------------------------------------------------------------------------- # project(kfusion C CXX) # ---------------------------------------------------------------------------- # # UTILITY FUNCTIONS (MACROS) AND PATHS # ---------------------------------------------------------------------------- # LIST(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/") include(Utils) include(Targets) # ---------------------------------------------------------------------------- # # CMAKE SETTINGS # ---------------------------------------------------------------------------- # # Use both Debug and Release versions SET(CMAKE_CONFIGURATION_TYPES "Debug;Release") # Set CXX to compile with C++11 standard SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -MP -MD -pthread -fpermissive") # ---------------------------------------------------------------------------- # # DEPENDENCIES # ---------------------------------------------------------------------------- # find_package(Boost 1.58.0 REQUIRED COMPONENTS filesystem program_options) find_package(CUDA REQUIRED) find_package(OpenCV REQUIRED COMPONENTS core viz highgui calib3d) find_package(PCL 1.8 REQUIRED) find_package(VTK REQUIRED) # ---------------------------------------------------------------------------- # # CUDA SETTINGS # ---------------------------------------------------------------------------- # SET(HAVE_CUDA 1) LIST(APPEND CUDA_NVCC_FLAGS "-gencode;arch=compute_61,code=sm_61; --ftz=true; --prec-div=false; --prec-sqrt=false; --expt-relaxed-constexpr" ) if(UNIX OR APPLE) LIST(APPEND CUDA_NVCC_FLAGS "-Xcompiler;-fPIC") endif() # ---------------------------------------------------------------------------- # # SET INCLUDE PATHS # ---------------------------------------------------------------------------- # include_directories(include ${Boost_INCLUDE_DIRS} ${CUDA_INCLUDE_DIRS} ${OpenCV_INCLUDE_DIRS} ${PCL_INCLUDE_DIRS} ${VTK_INCLUDE_DIRS} ) # ---------------------------------------------------------------------------- # # BUILD SRC DIRECTORY # ---------------------------------------------------------------------------- # add_subdirectory(src) # ---------------------------------------------------------------------------- # # BUILD TESTS # ---------------------------------------------------------------------------- # if(BUILD_TESTS) enable_testing() add_subdirectory(test) endif() ================================================ FILE: LICENSE.md ================================================ Copyright (c) 2012, Anatoly Baksheev All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the {organization} nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: README.md ================================================ sobfu ============ software for 3d reconstruction of non-rigidly deforming scenes using depth data, based on sobolevfusion dependencies ----------- * boost * nvidia gpu with cuda >8.0 * opencv * pcl 1.8.1 * vtk installation ------------ to install, run `source setup.sh -a` usage ------------ `./build/bin/app /path/to/data /path/to/params <--enable-viz> <--enable-log> <--verbose>` * the data directory must include folders `color` and `depth` * the folder `params` has files with recommended parameters for some of the scenes from volumedeform and killingfusion datasets ### options * with `--enable-viz`, screenshots from the pcl viewer will be logged in .png format to `/path/to/data/screenshots` * with `--enable-log`, meshes computed from the model tsdf's via marching cubes will be logged in .vtk format to `/path/to/data/meshes` * `--verbose` and `--vverbose` control verbosity of the solver example reconstructions ------------ reconstructions of some of the scenes from the volumedeform and killingfusion projects can be viewed on our [youtube playlist](http://bit.ly/sobfu-yt) there is a large trade-off between frame rate and reconstruction quality--sample reconstructions ran at approx. 2fps issues ------------ * drift in longer scenes due to less than perfect registration * topological changes references ------------ ``` @InProceedings{Slavcheva_2018_CVPR, author = {Slavcheva, Miroslava and Baust, Maximilian and Ilic, Slobodan}, title = {SobolevFusion: 3D Reconstruction of Scenes Undergoing Free Non-Rigid Motion}, booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2018} } ``` ================================================ FILE: cmake/Targets.cmake ================================================ ################################################################################################ # short command to setup source group function(kf_source_group group) cmake_parse_arguments(VW_SOURCE_GROUP "" "" "GLOB" ${ARGN}) file(GLOB srcs ${VW_SOURCE_GROUP_GLOB}) #list(LENGTH ${srcs} ___size) #if (___size GREATER 0) source_group(${group} FILES ${srcs}) #endif() endfunction() ################################################################################################ # short command for declaring includes from other modules macro(declare_deps_includes) foreach(__arg ${ARGN}) get_filename_component(___path "${CMAKE_SOURCE_DIR}/modules/${__arg}/include" ABSOLUTE) if (EXISTS ${___path}) include_directories(${___path}) endif() endforeach() unset(___path) unset(__arg) endmacro() ################################################################################################ # short command for setting defeault target properties function(default_properties target) set_target_properties(${target} PROPERTIES DEBUG_POSTFIX "d" ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib" RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin") if (NOT ${target} MATCHES "^test_") install(TARGETS ${the_target} RUNTIME DESTINATION ".") endif() endfunction() function(test_props target) #os_project_label(${target} "[test]") if(USE_PROJECT_FOLDERS) set_target_properties(${target} PROPERTIES FOLDER "Tests") endif() endfunction() function(app_props target) #os_project_label(${target} "[app]") if(USE_PROJECT_FOLDERS) set_target_properties(${target} PROPERTIES FOLDER "Apps") endif() endfunction() ################################################################################################ # short command for setting defeault target properties function(default_properties target) set_target_properties(${target} PROPERTIES DEBUG_POSTFIX "d" ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib" RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin") if (NOT ${target} MATCHES "^test_") install(TARGETS ${the_target} RUNTIME DESTINATION ".") endif() endfunction() ################################################################################################ # short command for adding library module macro(add_module_library name) set(module_name ${name}) FILE(GLOB_RECURSE sources *.cpp *.cu) cuda_add_library(${module_name} STATIC ${sources}) if(MSVC) set_target_properties(${module_name} PROPERTIES DEFINE_SYMBOL KFUSION_API_EXPORTS) else() add_definitions(-DKFUSION_API_EXPORTS) endif() default_properties(${module_name}) endmacro() ################################################################################################ # short command for adding application module macro(add_application target) FILE(GLOB_RECURSE sources *.cpp) add_executable(${target} ${sources}) default_properties(${target}) endmacro() ################################################################################################ # short command for adding test target macro(CREATE_TEST target) FILE(GLOB SRCS *.cpp) ADD_EXECUTABLE(${target} ${SRCS}) TARGET_LINK_LIBRARIES(${target} libgtest libgmock) add_test(NAME ${target} COMMAND ${target}) default_properties(${target}) endmacro() ================================================ FILE: cmake/Utils.cmake ================================================ ################################################################################################ # short alias for CMake logging function(dmsg) message(STATUS "<<${ARGN}") endfunction() ################################################################################################ # Command checking if current module has cuda souces to compile macro(check_cuda var) file(GLOB cuda src/cuda/*.cu) list(LENGTH cuda ___size) if (HAVE_CUDA AND ___size GREATER 0) set(${var} ON) else() set(${var} OFF) endif() unset(___size) unset(cuda) endmacro() ################################################################################################ # short command for adding library module macro(warnings_disable) if(NOT ENABLE_NOISY_WARNINGS) set(_flag_vars "") set(_msvc_warnings "") set(_gxx_warnings "") foreach(arg ${ARGN}) if(arg MATCHES "^CMAKE_") list(APPEND _flag_vars ${arg}) elseif(arg MATCHES "^/wd") list(APPEND _msvc_warnings ${arg}) elseif(arg MATCHES "^-W") list(APPEND _gxx_warnings ${arg}) endif() endforeach() if(MSVC AND _msvc_warnings AND _flag_vars) foreach(var ${_flag_vars}) foreach(warning ${_msvc_warnings}) set(${var} "${${var}} ${warning}") endforeach() endforeach() elseif(CV_COMPILER_IS_GNU AND _gxx_warnings AND _flag_vars) foreach(var ${_flag_vars}) foreach(warning ${_gxx_warnings}) if(NOT warning MATCHES "^-Wno-") string(REPLACE "${warning}" "" ${var} "${${var}}") string(REPLACE "-W" "-Wno-" warning "${warning}") endif() ocv_check_flag_support(${var} "${warning}" _varname) if(${_varname}) set(${var} "${${var}} ${warning}") endif() endforeach() endforeach() endif() unset(_flag_vars) unset(_msvc_warnings) unset(_gxx_warnings) endif(NOT ENABLE_NOISY_WARNINGS) endmacro() ################################################################################################ # Command for asstion options with some preconditions macro(kf_option variable description value) set(__value ${value}) set(__condition "") set(__varname "__value") foreach(arg ${ARGN}) if(arg STREQUAL "IF" OR arg STREQUAL "if") set(__varname "__condition") else() list(APPEND ${__varname} ${arg}) endif() endforeach() unset(__varname) if("${__condition}" STREQUAL "") set(__condition 2 GREATER 1) endif() if(${__condition}) if("${__value}" MATCHES ";") if(${__value}) option(${variable} "${description}" ON) else() option(${variable} "${description}" OFF) endif() elseif(DEFINED ${__value}) if(${__value}) option(${variable} "${description}" ON) else() option(${variable} "${description}" OFF) endif() else() option(${variable} "${description}" ${__value}) endif() else() unset(${variable} CACHE) endif() unset(__condition) unset(__value) endmacro() ================================================ FILE: include/kfusion/cuda/device.hpp ================================================ #pragma once #include #include #include //////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// TsdfVolume // __kf_device__ kfusion::device::TsdfVolume::TsdfVolume(float2 *_data, int3 _dims, float3 _voxel_size, // float _trunc_dist, float _max_weight) // : data(_data), dims(_dims), voxel_size(_voxel_size), trunc_dist(_trunc_dist), max_weight(_max_weight) {} // // __kf_device__ kfusion::device::TsdfVolume::TsdfVolume(const TsdfVolume &other) // : data(other.data), // dims(other.dims), // voxel_size(other.voxel_size), // trunc_dist(other.trunc_dist), // max_weight(other.max_weight) {} __kf_device__ float2 *kfusion::device::TsdfVolume::operator()(int x, int y, int z) { return data + x + y * dims.x + z * dims.y * dims.x; } __kf_device__ const float2 *kfusion::device::TsdfVolume::operator()(int x, int y, int z) const { return data + x + y * dims.x + z * dims.y * dims.x; } __kf_device__ float2 *kfusion::device::TsdfVolume::beg(int x, int y) const { return data + x + dims.x * y; } __kf_device__ float2 *kfusion::device::TsdfVolume::zstep(float2 *const ptr) const { return ptr + dims.x * dims.y; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// Projector __kf_device__ float2 kfusion::device::Projector::operator()(const float3 &p) const { float2 coo; coo.x = __fmaf_rn(f.x, __fdividef(p.x, p.z), c.x); coo.y = __fmaf_rn(f.y, __fdividef(p.y, p.z), c.y); return coo; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// Reprojector __kf_device__ float3 kfusion::device::Reprojector::operator()(int u, int v, float z) const { float x = z * (u - c.x) * finv.x; float y = z * (v - c.y) * finv.y; return make_float3(x, y, z); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// Utility namespace kfusion { namespace device { __kf_device__ Vec3f operator*(const Mat3f &m, const Vec3f &v) { return make_float3(dot(m.data[0], v), dot(m.data[1], v), dot(m.data[2], v)); } __kf_device__ Vec3f operator*(const Aff3f &a, const Vec3f &v) { return a.R * v + a.t; } /* operator for multiplication of an affine transform by a float */ __kf_device__ Aff3f operator*(const float &s, const Aff3f &a) { Aff3f b; b.R = a.R; b.t = s * a.t; return b; } /* 3-d matrix multiplication */ __kf_device__ Mat3f operator*(const Mat3f &m, const Mat3f &n) { Mat3f r; r.data[0] = m.data[0] * make_float3(n.data[0].x, n.data[1].x, n.data[2].x); r.data[1] = m.data[1] * make_float3(n.data[0].y, n.data[1].y, n.data[2].y); r.data[2] = m.data[2] * make_float3(n.data[0].z, n.data[1].z, n.data[2].z); return r; } /* affine transform composition */ __kf_device__ Aff3f operator*(const Aff3f &a, const Aff3f &b) { Aff3f c; c.R = a.R * b.R; c.t = a.t + b.t; return c; } __kf_device__ Vec3f tr(const float4 &v) { return make_float3(v.x, v.y, v.z); } struct plus { __kf_device__ float operator()(float l, float r) const { return l + r; } __kf_device__ double operator()(double l, double r) const { return l + r; } }; } // namespace device } // namespace kfusion ================================================ FILE: include/kfusion/cuda/device_array.hpp ================================================ #pragma once #include #include #include namespace kfusion { namespace cuda { ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /** \brief @b DeviceArray class * * \note Typed container for GPU memory with reference counting. * * \author Anatoly Baksheev */ template class KF_EXPORTS DeviceArray : public DeviceMemory { public: /** \brief Element type. */ typedef T type; /** \brief Element size. */ enum { elem_size = sizeof(T) }; /** \brief Empty constructor. */ DeviceArray(); /** \brief Allocates internal buffer in GPU memory * \param size_t: number of elements to allocate * */ DeviceArray(size_t size); /** \brief Initializes with user allocated buffer. Reference counting is * disabled in this case. \param ptr: pointer to buffer \param size: elemens * number * */ DeviceArray(T *ptr, size_t size); /** \brief Copy constructor. Just increments reference counter. */ DeviceArray(const DeviceArray &other); /** \brief Assigment operator. Just increments reference counter. */ DeviceArray &operator=(const DeviceArray &other); /** \brief Allocates internal buffer in GPU memory. If internal buffer was * created before the function recreates it with new size. If new and old * sizes are equal it does nothing. \param size: elemens number * */ void create(size_t size); /** \brief Decrements reference counter and releases internal buffer if * needed. */ void release(); /** \brief Performs data copying. If destination size differs it will be * reallocated. \param other_arg: destination container * */ void copyTo(DeviceArray &other) const; /** \brief Uploads data to internal buffer in GPU memory. It calls create() * inside to ensure that intenal buffer size is enough. \param host_ptr_arg: * pointer to buffer to upload \param size: elemens number * */ void upload(const T *host_ptr, size_t size); /** \brief Downloads data from internal buffer to CPU memory * \param host_ptr_arg: pointer to buffer to download * */ void download(T *host_ptr) const; /** \brief Uploads data to internal buffer in GPU memory. It calls create() * inside to ensure that intenal buffer size is enough. \param data: host * vector to upload from * */ template void upload(const std::vector &data); /** \brief Downloads data from internal buffer to CPU memory * \param data: host vector to download to * */ template void download(std::vector &data) const; /** \brief Performs swap of data pointed with another device array. * \param other: device array to swap with * */ void swap(DeviceArray &other_arg); /** \brief Returns pointer for internal buffer in GPU memory. */ T *ptr(); /** \brief Returns const pointer for internal buffer in GPU memory. */ const T *ptr() const; // using DeviceMemory::ptr; /** \brief Returns pointer for internal buffer in GPU memory. */ operator T *(); /** \brief Returns const pointer for internal buffer in GPU memory. */ operator const T *() const; /** \brief Returns size in elements. */ size_t size() const; }; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /** \brief @b DeviceArray2D class * * \note Typed container for pitched GPU memory with reference counting. * * \author Anatoly Baksheev */ template class KF_EXPORTS DeviceArray2D : public DeviceMemory2D { public: /** \brief Element type. */ typedef T type; /** \brief Element size. */ enum { elem_size = sizeof(T) }; /** \brief Empty constructor. */ DeviceArray2D(); /** \brief Allocates internal buffer in GPU memory * \param rows: number of rows to allocate * \param cols: number of elements in each row * */ DeviceArray2D(int rows, int cols); /** \brief Initializes with user allocated buffer. Reference counting is * disabled in this case. \param rows: number of rows \param cols: number of * elements in each row \param data: pointer to buffer \param stepBytes: * stride between two consecutive rows in bytes * */ DeviceArray2D(int rows, int cols, void *data, size_t stepBytes); /** \brief Copy constructor. Just increments reference counter. */ DeviceArray2D(const DeviceArray2D &other); /** \brief Assigment operator. Just increments reference counter. */ DeviceArray2D &operator=(const DeviceArray2D &other); /** \brief Allocates internal buffer in GPU memory. If internal buffer was * created before the function recreates it with new size. If new and old * sizes are equal it does nothing. \param rows: number of rows to allocate * \param cols: number of elements in each row * */ void create(int rows, int cols); /** \brief Decrements reference counter and releases internal buffer if * needed. */ void release(); /** \brief Performs data copying. If destination size differs it will be * reallocated. \param other: destination container * */ void copyTo(DeviceArray2D &other) const; /** \brief Uploads data to internal buffer in GPU memory. It calls create() * inside to ensure that intenal buffer size is enough. \param host_ptr: * pointer to host buffer to upload \param host_step: stride between two * consecutive rows in bytes for host buffer \param rows: number of rows to * upload \param cols: number of elements in each row * */ void upload(const void *host_ptr, size_t host_step, int rows, int cols); /** \brief Downloads data from internal buffer to CPU memory. User is * resposible for correct host buffer size. \param host_ptr: pointer to host * buffer to download \param host_step: stride between two consecutive rows in * bytes for host buffer * */ void download(void *host_ptr, size_t host_step) const; /** \brief Performs swap of data pointed with another device array. * \param other: device array to swap with * */ void swap(DeviceArray2D &other_arg); /** \brief Uploads data to internal buffer in GPU memory. It calls create() * inside to ensure that intenal buffer size is enough. \param data: host * vector to upload from \param cols: stride in elements between two * consecutive rows for host buffer * */ template void upload(const std::vector &data, int cols); /** \brief Downloads data from internal buffer to CPU memory * \param data: host vector to download to * \param cols: Output stride in elements between two consecutive rows for * host vector. * */ template void download(std::vector &data, int &cols) const; /** \brief Returns pointer to given row in internal buffer. * \param y_arg: row index * */ T *ptr(int y = 0); /** \brief Returns const pointer to given row in internal buffer. * \param y_arg: row index * */ const T *ptr(int y = 0) const; // using DeviceMemory2D::ptr; /** \brief Returns pointer for internal buffer in GPU memory. */ operator T *(); /** \brief Returns const pointer for internal buffer in GPU memory. */ operator const T *() const; /** \brief Returns number of elements in each row. */ int cols() const; /** \brief Returns number of rows. */ int rows() const; /** \brief Returns step in elements. */ size_t elem_step() const; }; } // namespace cuda namespace device { using kfusion::cuda::DeviceArray; using kfusion::cuda::DeviceArray2D; } // namespace device } // namespace kfusion ///////////////////// Inline implementations of DeviceArray /////////////////////////////////////////////// template inline kfusion::cuda::DeviceArray::DeviceArray() {} template inline kfusion::cuda::DeviceArray::DeviceArray(size_t size) : DeviceMemory(size * elem_size) {} template inline kfusion::cuda::DeviceArray::DeviceArray(T *ptr, size_t size) : DeviceMemory(ptr, size * elem_size) {} template inline kfusion::cuda::DeviceArray::DeviceArray(const DeviceArray &other) : DeviceMemory(other) {} template inline kfusion::cuda::DeviceArray &kfusion::cuda::DeviceArray::operator=(const DeviceArray &other) { DeviceMemory::operator=(other); return *this; } template inline void kfusion::cuda::DeviceArray::create(size_t size) { DeviceMemory::create(size * elem_size); } template inline void kfusion::cuda::DeviceArray::release() { DeviceMemory::release(); } template inline void kfusion::cuda::DeviceArray::copyTo(DeviceArray &other) const { DeviceMemory::copyTo(other); } template inline void kfusion::cuda::DeviceArray::upload(const T *host_ptr, size_t size) { DeviceMemory::upload(host_ptr, size * elem_size); } template inline void kfusion::cuda::DeviceArray::download(T *host_ptr) const { DeviceMemory::download(host_ptr); } template void kfusion::cuda::DeviceArray::swap(DeviceArray &other_arg) { DeviceMemory::swap(other_arg); } template inline kfusion::cuda::DeviceArray::operator T *() { return ptr(); } template inline kfusion::cuda::DeviceArray::operator const T *() const { return ptr(); } template inline size_t kfusion::cuda::DeviceArray::size() const { return sizeBytes() / elem_size; } template inline T *kfusion::cuda::DeviceArray::ptr() { return DeviceMemory::ptr(); } template inline const T *kfusion::cuda::DeviceArray::ptr() const { return DeviceMemory::ptr(); } template template inline void kfusion::cuda::DeviceArray::upload(const std::vector &data) { upload(&data[0], data.size()); } template template inline void kfusion::cuda::DeviceArray::download(std::vector &data) const { data.resize(size()); if (!data.empty()) download(&data[0]); } ///////////////////// Inline implementations of DeviceArray2D /////////////////////////////////////////////// template inline kfusion::cuda::DeviceArray2D::DeviceArray2D() {} template inline kfusion::cuda::DeviceArray2D::DeviceArray2D(int rows, int cols) : DeviceMemory2D(rows, cols * elem_size) {} template inline kfusion::cuda::DeviceArray2D::DeviceArray2D(int rows, int cols, void *data, size_t stepBytes) : DeviceMemory2D(rows, cols * elem_size, data, stepBytes) {} template inline kfusion::cuda::DeviceArray2D::DeviceArray2D(const DeviceArray2D &other) : DeviceMemory2D(other) {} template inline kfusion::cuda::DeviceArray2D &kfusion::cuda::DeviceArray2D::operator=(const DeviceArray2D &other) { DeviceMemory2D::operator=(other); return *this; } template inline void kfusion::cuda::DeviceArray2D::create(int rows, int cols) { DeviceMemory2D::create(rows, cols * elem_size); } template inline void kfusion::cuda::DeviceArray2D::release() { DeviceMemory2D::release(); } template inline void kfusion::cuda::DeviceArray2D::copyTo(DeviceArray2D &other) const { DeviceMemory2D::copyTo(other); } template inline void kfusion::cuda::DeviceArray2D::upload(const void *host_ptr, size_t host_step, int rows, int cols) { DeviceMemory2D::upload(host_ptr, host_step, rows, cols * elem_size); } template inline void kfusion::cuda::DeviceArray2D::download(void *host_ptr, size_t host_step) const { DeviceMemory2D::download(host_ptr, host_step); } template template inline void kfusion::cuda::DeviceArray2D::upload(const std::vector &data, int cols) { upload(&data[0], cols * elem_size, data.size() / cols, cols); } template template inline void kfusion::cuda::DeviceArray2D::download(std::vector &data, int &elem_step) const { elem_step = cols(); data.resize(cols() * rows()); if (!data.empty()) download(&data[0], colsBytes()); } template void kfusion::cuda::DeviceArray2D::swap(DeviceArray2D &other_arg) { DeviceMemory2D::swap(other_arg); } template inline T *kfusion::cuda::DeviceArray2D::ptr(int y) { return DeviceMemory2D::ptr(y); } template inline const T *kfusion::cuda::DeviceArray2D::ptr(int y) const { return DeviceMemory2D::ptr(y); } template inline kfusion::cuda::DeviceArray2D::operator T *() { return ptr(); } template inline kfusion::cuda::DeviceArray2D::operator const T *() const { return ptr(); } template inline int kfusion::cuda::DeviceArray2D::cols() const { return DeviceMemory2D::colsBytes() / elem_size; } template inline int kfusion::cuda::DeviceArray2D::rows() const { return DeviceMemory2D::rows(); } template inline size_t kfusion::cuda::DeviceArray2D::elem_step() const { return DeviceMemory2D::step() / elem_size; } ================================================ FILE: include/kfusion/cuda/device_memory.hpp ================================================ #pragma once #include #include namespace kfusion { namespace cuda { /** \brief Error handler. All GPU functions from this subsystem call the * function to report an error. For internal use only */ KF_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func = ""); ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /** \brief @b DeviceMemory class * * \note This is a BLOB container class with reference counting for GPU memory. * * \author Anatoly Baksheev */ class KF_EXPORTS DeviceMemory { public: /** \brief Empty constructor. */ DeviceMemory(); /** \brief Destructor. */ ~DeviceMemory(); /** \brief Allocates internal buffer in GPU memory * \param sizeBytes_arg: amount of memory to allocate * */ DeviceMemory(size_t sizeBytes_arg); /** \brief Initializes with user allocated buffer. Reference counting is * disabled in this case. \param ptr_arg: pointer to buffer \param * sizeBytes_arg: buffer size * */ DeviceMemory(void *ptr_arg, size_t sizeBytes_arg); /** \brief Copy constructor. Just increments reference counter. */ DeviceMemory(const DeviceMemory &other_arg); /** \brief Assigment operator. Just increments reference counter. */ DeviceMemory &operator=(const DeviceMemory &other_arg); /** \brief Allocates internal buffer in GPU memory. If internal buffer was * created before the function recreates it with new size. If new and old * sizes are equal it does nothing. \param sizeBytes_arg: buffer size * */ void create(size_t sizeBytes_arg); /** \brief Decrements reference counter and releases internal buffer if * needed. */ void release(); /** \brief Performs data copying. If destination size differs it will be * reallocated. \param other_arg: destination container * */ void copyTo(DeviceMemory &other) const; /** \brief Uploads data to internal buffer in GPU memory. It calls create() * inside to ensure that intenal buffer size is enough. \param host_ptr_arg: * pointer to buffer to upload \param sizeBytes_arg: buffer size * */ void upload(const void *host_ptr_arg, size_t sizeBytes_arg); /** \brief Downloads data from internal buffer to CPU memory * \param host_ptr_arg: pointer to buffer to download * */ void download(void *host_ptr_arg) const; /** \brief Performs swap of data pointed with another device memory. * \param other: device memory to swap with * */ void swap(DeviceMemory &other_arg); /** \brief Returns pointer for internal buffer in GPU memory. */ template T *ptr(); /** \brief Returns constant pointer for internal buffer in GPU memory. */ template const T *ptr() const; /** \brief Conversion to PtrSz for passing to kernel functions. */ template operator PtrSz() const; /** \brief Returns true if unallocated otherwise false. */ bool empty() const; size_t sizeBytes() const; private: /** \brief Device pointer. */ void *data_; /** \brief Allocated size in bytes. */ size_t sizeBytes_; /** \brief Pointer to reference counter in CPU memory. */ int *refcount_; }; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /** \brief @b DeviceMemory2D class * * \note This is a BLOB container class with reference counting for pitched GPU * memory. * * \author Anatoly Baksheev */ class KF_EXPORTS DeviceMemory2D { public: /** \brief Empty constructor. */ DeviceMemory2D(); /** \brief Destructor. */ ~DeviceMemory2D(); /** \brief Allocates internal buffer in GPU memory * \param rows_arg: number of rows to allocate * \param colsBytes_arg: width of the buffer in bytes * */ DeviceMemory2D(int rows_arg, int colsBytes_arg); /** \brief Initializes with user allocated buffer. Reference counting is * disabled in this case. \param rows_arg: number of rows \param * colsBytes_arg: width of the buffer in bytes \param data_arg: pointer to * buffer \param stepBytes_arg: stride between two consecutive rows in bytes * */ DeviceMemory2D(int rows_arg, int colsBytes_arg, void *data_arg, size_t step_arg); /** \brief Copy constructor. Just increments reference counter. */ DeviceMemory2D(const DeviceMemory2D &other_arg); /** \brief Assigment operator. Just increments reference counter. */ DeviceMemory2D &operator=(const DeviceMemory2D &other_arg); /** \brief Allocates internal buffer in GPU memory. If internal buffer was * created before the function recreates it with new size. If new and old * sizes are equal it does nothing. \param ptr_arg: number of rows to allocate * \param sizeBytes_arg: width of the buffer in bytes * */ void create(int rows_arg, int colsBytes_arg); /** \brief Decrements reference counter and releases internal buffer if * needed. */ void release(); /** \brief Performs data copying. If destination size differs it will be * reallocated. \param other_arg: destination container * */ void copyTo(DeviceMemory2D &other) const; /** \brief Uploads data to internal buffer in GPU memory. It calls create() * inside to ensure that intenal buffer size is enough. \param host_ptr_arg: * pointer to host buffer to upload \param host_step_arg: stride between two * consecutive rows in bytes for host buffer \param rows_arg: number of rows * to upload \param sizeBytes_arg: width of host buffer in bytes * */ void upload(const void *host_ptr_arg, size_t host_step_arg, int rows_arg, int colsBytes_arg); /** \brief Downloads data from internal buffer to CPU memory. User is * resposible for correct host buffer size. \param host_ptr_arg: pointer to * host buffer to download \param host_step_arg: stride between two * consecutive rows in bytes for host buffer * */ void download(void *host_ptr_arg, size_t host_step_arg) const; /** \brief Performs swap of data pointed with another device memory. * \param other: device memory to swap with * */ void swap(DeviceMemory2D &other_arg); /** \brief Returns pointer to given row in internal buffer. * \param y_arg: row index * */ template T *ptr(int y_arg = 0); /** \brief Returns constant pointer to given row in internal buffer. * \param y_arg: row index * */ template const T *ptr(int y_arg = 0) const; /** \brief Conversion to PtrStep for passing to kernel functions. */ template operator PtrStep() const; /** \brief Conversion to PtrStepSz for passing to kernel functions. */ template operator PtrStepSz() const; /** \brief Returns true if unallocated otherwise false. */ bool empty() const; /** \brief Returns number of bytes in each row. */ int colsBytes() const; /** \brief Returns number of rows. */ int rows() const; /** \brief Returns stride between two consecutive rows in bytes for internal * buffer. Step is stored always and everywhere in bytes!!! */ size_t step() const; private: /** \brief Device pointer. */ void *data_; /** \brief Stride between two consecutive rows in bytes for internal buffer. * Step is stored always and everywhere in bytes!!! */ size_t step_; /** \brief Width of the buffer in bytes. */ int colsBytes_; /** \brief Number of rows. */ int rows_; /** \brief Pointer to reference counter in CPU memory. */ int *refcount_; }; } // namespace cuda namespace device { using kfusion::cuda::DeviceMemory; using kfusion::cuda::DeviceMemory2D; } // namespace device } // namespace kfusion ///////////////////// Inline implementations of DeviceMemory /////////////////////////////////////////////// template inline T *kfusion::cuda::DeviceMemory::ptr() { return (T *) data_; } template inline const T *kfusion::cuda::DeviceMemory::ptr() const { return (const T *) data_; } template inline kfusion::cuda::DeviceMemory::operator kfusion::cuda::PtrSz() const { PtrSz result; result.data = (U *) ptr(); result.size = sizeBytes_ / sizeof(U); return result; } ///////////////////// Inline implementations of DeviceMemory2D /////////////////////////////////////////////// template T *kfusion::cuda::DeviceMemory2D::ptr(int y_arg) { return (T *) ((char *) data_ + y_arg * step_); } template const T *kfusion::cuda::DeviceMemory2D::ptr(int y_arg) const { return (const T *) ((const char *) data_ + y_arg * step_); } template kfusion::cuda::DeviceMemory2D::operator kfusion::cuda::PtrStep() const { PtrStep result; result.data = (U *) ptr(); result.step = step_; return result; } template kfusion::cuda::DeviceMemory2D::operator kfusion::cuda::PtrStepSz() const { PtrStepSz result; result.data = (U *) ptr(); result.step = step_; result.cols = colsBytes_ / sizeof(U); result.rows = rows_; return result; } ================================================ FILE: include/kfusion/cuda/imgproc.hpp ================================================ #pragma once /* kinfu includes */ #include /* pcl includes */ #include namespace kfusion { namespace cuda { KF_EXPORTS void depthBilateralFilter(const Depth &in, Depth &out, int ksz, float sigma_spatial, float sigma_depth); KF_EXPORTS void depthTruncation(Depth &depth, float threshold); KF_EXPORTS void depthBuildPyramid(const Depth &depth, Depth &pyramid, float sigma_depth); KF_EXPORTS void computeNormalsAndMaskDepth(const Intr &intr, Depth &depth, Normals &normals); KF_EXPORTS void computePointNormals(const Intr &intr, const Depth &depth, Cloud &points, Normals &normals); KF_EXPORTS void computeDists(const Depth &depth, Dists &dists, const Intr &intr); KF_EXPORTS void resizeDepthNormals(const Depth &depth, const Normals &normals, Depth &depth_out, Normals &normals_out); KF_EXPORTS void resizePointsNormals(const Cloud &points, const Normals &normals, Cloud &points_out, Normals &normals_out); KF_EXPORTS void waitAllDefaultStream(); KF_EXPORTS void renderTangentColors(const Normals &normals, Image &image); /** \brief rasterises the canonical surface * \param[in] intr camera intrinsics * \param[in] cam2vol camera -> volume coordinate transform * \param[in] surface surface to rasterise * \param[out] points_out frame with the currently visible geometry * \param[out] normals_out frame with the normals of the currently visible geometry */ KF_EXPORTS void rasteriseSurface(const Intr &intr, const Affine3f &vol2cam, const Surface &s, Cloud &vertices_out, Normals &normals_out); KF_EXPORTS void renderImage(const Depth &depth, const Normals &normals, const Intr &intr, const Vec3f &light_pose, Image &image); KF_EXPORTS void renderImage(const Cloud &points, const Normals &normals, const Intr &intr, const Vec3f &light_pose, Image &image); } // namespace cuda } // namespace kfusion ================================================ FILE: include/kfusion/cuda/kernel_containers.hpp ================================================ #pragma once #if defined(__CUDACC__) #define __kf_hdevice__ __host__ __device__ __forceinline__ #define __kf_device__ __device__ __forceinline__ #else #define __kf_hdevice__ #define __kf_device__ #endif #include namespace kfusion { namespace cuda { template struct DevPtr { typedef T elem_type; const static size_t elem_size = sizeof(elem_type); T *data; __kf_hdevice__ DevPtr() : data(0) {} __kf_hdevice__ DevPtr(T *data_arg) : data(data_arg) {} __kf_hdevice__ size_t elemSize() const { return elem_size; } __kf_hdevice__ operator T *() { return data; } __kf_hdevice__ operator const T *() const { return data; } }; template struct PtrSz : public DevPtr { __kf_hdevice__ PtrSz() : size(0) {} __kf_hdevice__ PtrSz(T *data_arg, size_t size_arg) : DevPtr(data_arg), size(size_arg) {} size_t size; }; template struct PtrStep : public DevPtr { __kf_hdevice__ PtrStep() : step(0) {} __kf_hdevice__ PtrStep(T *data_arg, size_t step_arg) : DevPtr(data_arg), step(step_arg) {} /** \brief stride between two consecutive rows in bytes. Step is stored always * and everywhere in bytes!!! */ size_t step; __kf_hdevice__ T *ptr(int y = 0) { return (T *) ((char *) DevPtr::data + y * step); } __kf_hdevice__ const T *ptr(int y = 0) const { return (const T *) ((const char *) DevPtr::data + y * step); } __kf_hdevice__ T &operator()(int y, int x) { return ptr(y)[x]; } __kf_hdevice__ const T &operator()(int y, int x) const { return ptr(y)[x]; } }; template struct PtrStepSz : public PtrStep { __kf_hdevice__ PtrStepSz() : cols(0), rows(0) {} __kf_hdevice__ PtrStepSz(int rows_arg, int cols_arg, T *data_arg, size_t step_arg) : PtrStep(data_arg, step_arg), cols(cols_arg), rows(rows_arg) {} int cols; int rows; }; } // namespace cuda namespace device { using kfusion::cuda::PtrStep; using kfusion::cuda::PtrStepSz; using kfusion::cuda::PtrSz; } // namespace device } // namespace kfusion namespace kf = kfusion; ================================================ FILE: include/kfusion/cuda/marching_cubes.hpp ================================================ #pragma once /* eigen includes */ #include /* kinfu includes */ #include #include /* pcl includes */ #include namespace kfusion { namespace cuda { /** \brief MarchingCubes implements MarchingCubes functionality for TSDF volume on GPU * \author Anatoly Baskeheev, Itseez Ltd, (myname.mysurname@mycompany.com) */ class KF_EXPORTS MarchingCubes { public: /** \brief Default size for triangles buffer */ enum { POINTS_PER_TRIANGLE = 3, DEFAULT_TRIANGLES_BUFFER_SIZE = 2 * 1000 * 1000 * POINTS_PER_TRIANGLE }; /** \brief Smart pointer. */ typedef boost::shared_ptr Ptr; /** \brief Default constructor */ MarchingCubes(); /** \brief Destructor */ ~MarchingCubes(); /* set volume pose */ void setPose(const cv::Affine3f& pose); /* run mc to extract the zero level set */ Surface run(const TsdfVolume& volume, DeviceArray& vertices_buffer, DeviceArray& normals_buffer); private: /** \brief Edge table for marching cubes */ DeviceArray edgeTable_; /** \brief Number of vertextes table for marching cubes */ DeviceArray numVertsTable_; /** \brief Triangles table for marching cubes */ DeviceArray triTable_; /** \brief Temporary buffer used by marching cubes (first row stores occuped voxes id, second number of vetexes, * third poits offsets */ DeviceArray2D occupied_voxels_buffer_; /* volume pose */ cv::Affine3f pose; }; } // namespace cuda } // namespace kfusion ================================================ FILE: include/kfusion/cuda/projective_icp.hpp ================================================ #pragma once #include namespace kfusion { namespace cuda { class ProjectiveICP { public: enum { MAX_PYRAMID_LEVELS = 4 }; typedef std::vector DepthPyr; typedef std::vector PointsPyr; typedef std::vector NormalsPyr; ProjectiveICP(); virtual ~ProjectiveICP(); float getDistThreshold() const; void setDistThreshold(float distance); float getAngleThreshold() const; void setAngleThreshold(float angle); void setIterationsNum(const std::vector &iters); int getUsedLevelsNum() const; virtual bool estimateTransform(Affine3f &affine, const Intr &intr, const Frame &curr, const Frame &prev); /** The function takes masked depth, i.e. it assumes for performance reasons * that "if depth(y,x) is not zero, then normals(y,x) surely is not qnan" */ virtual bool estimateTransform(Affine3f &affine, const Intr &intr, const DepthPyr &dcurr, const NormalsPyr ncurr, const DepthPyr dprev, const NormalsPyr nprev); virtual bool estimateTransform(Affine3f &affine, const Intr &intr, const PointsPyr &vcurr, const NormalsPyr ncurr, const PointsPyr vprev, const NormalsPyr nprev); // static Vec3f rodrigues2(const Mat3f& matrix); private: std::vector iters_; float angle_thres_; float dist_thres_; DeviceArray2D buffer_; struct StreamHelper; cv::Ptr shelp_; }; } // namespace cuda } // namespace kfusion ================================================ FILE: include/kfusion/cuda/temp_utils.hpp ================================================ #pragma once #include #include #define FULL_MASK 0xffffffff namespace kfusion { namespace device { template __kf_hdevice__ void swap(T &a, T &b) { T c(a); a = b; b = c; } template struct numeric_limits; template <> struct numeric_limits { __kf_device__ static float quiet_NaN() { return __int_as_float(0x7fffffff); /*CUDART_NAN_F*/ }; __kf_device__ static float epsilon() { return 1.192092896e-07f /*FLT_EPSILON*/; }; __kf_device__ static float min() { return 1.175494351e-38f /*FLT_MIN*/; }; __kf_device__ static float max() { return 3.402823466e+38f /*FLT_MAX*/; }; }; template <> struct numeric_limits { __kf_device__ static unsigned short max() { return USHRT_MAX; }; }; __kf_device__ float dot(const float3 &v1, const float3 &v2) { return __fmaf_rn(v1.x, v2.x, __fmaf_rn(v1.y, v2.y, v1.z * v2.z)); } __kf_device__ float3 &operator+=(float3 &vec, const float &v) { vec.x += v; vec.y += v; vec.z += v; return vec; } __kf_device__ float3 &operator+=(float3 &v1, const float3 &v2) { v1.x += v2.x; v1.y += v2.y; v1.z += v2.z; return v1; } __kf_device__ float3 operator+(const float3 &v1, const float3 &v2) { return make_float3(v1.x + v2.x, v1.y + v2.y, v1.z + v2.z); } __kf_device__ float3 operator*(const float3 &v1, const float3 &v2) { return make_float3(v1.x * v2.x, v1.y * v2.y, v1.z * v2.z); } __kf_hdevice__ float3 operator*(const float3 &v1, const int3 &v2) { return make_float3(v1.x * v2.x, v1.y * v2.y, v1.z * v2.z); } __kf_device__ float3 operator/(const float3 &v1, const float3 &v2) { return make_float3(v1.x / v2.x, v1.y / v2.y, v1.z / v2.z); } __kf_hdevice__ float3 operator/(const float &v, const float3 &vec) { return make_float3(v / vec.x, v / vec.y, v / vec.z); } __kf_device__ float3 &operator*=(float3 &vec, const float &v) { vec.x *= v; vec.y *= v; vec.z *= v; return vec; } __kf_hdevice__ float3 operator-(const float3 &v1, const float3 &v2) { return make_float3(v1.x - v2.x, v1.y - v2.y, v1.z - v2.z); } __kf_hdevice__ float3 operator*(const float3 &v1, const float &v) { return make_float3(v1.x * v, v1.y * v, v1.z * v); } __kf_hdevice__ float3 operator*(const float &v, const float3 &v1) { return make_float3(v1.x * v, v1.y * v, v1.z * v); } __kf_device__ float norm(const float3 &v) { return sqrt(dot(v, v)); } __kf_device__ float norm_sqr(const float3 &v) { return dot(v, v); } __kf_device__ float3 normalized(const float3 &v) { return v * rsqrt(dot(v, v)); } __kf_hdevice__ float3 cross(const float3 &v1, const float3 &v2) { return make_float3(v1.y * v2.z - v1.z * v2.y, v1.z * v2.x - v1.x * v2.z, v1.x * v2.y - v1.y * v2.x); } __kf_device__ void computeRoots2(const float &b, const float &c, float3 &roots) { roots.x = 0.f; float d = b * b - 4.f * c; if (d < 0.f) // no real roots!!!! THIS SHOULD NOT HAPPEN! d = 0.f; float sd = sqrtf(d); roots.z = 0.5f * (b + sd); roots.y = 0.5f * (b - sd); } __kf_device__ void computeRoots3(float c0, float c1, float c2, float3 &roots) { if (fabsf(c0) < numeric_limits::epsilon()) // one root is 0 -> quadratic equation { computeRoots2(c2, c1, roots); } else { const float s_inv3 = 1.f / 3.f; const float s_sqrt3 = sqrtf(3.f); // Construct the parameters used in classifying the roots of the equation // and in solving the equation for the roots in closed form. float c2_over_3 = c2 * s_inv3; float a_over_3 = (c1 - c2 * c2_over_3) * s_inv3; if (a_over_3 > 0.f) a_over_3 = 0.f; float half_b = 0.5f * (c0 + c2_over_3 * (2.f * c2_over_3 * c2_over_3 - c1)); float q = half_b * half_b + a_over_3 * a_over_3 * a_over_3; if (q > 0.f) q = 0.f; // Compute the eigenvalues by solving for the roots of the polynomial. float rho = sqrtf(-a_over_3); float theta = atan2f(sqrtf(-q), half_b) * s_inv3; float cos_theta = __cosf(theta); float sin_theta = __sinf(theta); roots.x = c2_over_3 + 2.f * rho * cos_theta; roots.y = c2_over_3 - rho * (cos_theta + s_sqrt3 * sin_theta); roots.z = c2_over_3 - rho * (cos_theta - s_sqrt3 * sin_theta); // Sort in increasing order. if (roots.x >= roots.y) swap(roots.x, roots.y); if (roots.y >= roots.z) { swap(roots.y, roots.z); if (roots.x >= roots.y) swap(roots.x, roots.y); } if (roots.x <= 0) // eigenval for symetric positive semi-definite matrix can // not be negative! Set it to 0 computeRoots2(c2, c1, roots); } } struct Eigen33 { public: template struct MiniMat { float3 data[Rows]; __kf_hdevice__ float3 &operator[](int i) { return data[i]; } __kf_hdevice__ const float3 &operator[](int i) const { return data[i]; } }; typedef MiniMat<3> Mat33; typedef MiniMat<4> Mat43; static __kf_device__ float3 unitOrthogonal(const float3 &src) { float3 perp; /* Let us compute the crossed product of *this with a vector * that is not too close to being colinear to *this. */ /* unless the x and y coords are both close to zero, we can * simply take ( -y, x, 0 ) and normalize it. */ if (!isMuchSmallerThan(src.x, src.z) || !isMuchSmallerThan(src.y, src.z)) { float invnm = rsqrtf(src.x * src.x + src.y * src.y); perp.x = -src.y * invnm; perp.y = src.x * invnm; perp.z = 0.0f; } /* if both x and y are close to zero, then the vector is close * to the z-axis, so it's far from colinear to the x-axis for instance. * So we take the crossed product with (1,0,0) and normalize it. */ else { float invnm = rsqrtf(src.z * src.z + src.y * src.y); perp.x = 0.0f; perp.y = -src.z * invnm; perp.z = src.y * invnm; } return perp; } __kf_device__ Eigen33(volatile float *mat_pkg_arg) : mat_pkg(mat_pkg_arg) {} __kf_device__ void compute(Mat33 &tmp, Mat33 &vec_tmp, Mat33 &evecs, float3 &evals) { // Scale the matrix so its entries are in [-1,1]. The scaling is applied // only when at least one matrix entry has magnitude larger than 1. float max01 = fmaxf(fabsf(mat_pkg[0]), fabsf(mat_pkg[1])); float max23 = fmaxf(fabsf(mat_pkg[2]), fabsf(mat_pkg[3])); float max45 = fmaxf(fabsf(mat_pkg[4]), fabsf(mat_pkg[5])); float m0123 = fmaxf(max01, max23); float scale = fmaxf(max45, m0123); if (scale <= numeric_limits::min()) scale = 1.f; mat_pkg[0] /= scale; mat_pkg[1] /= scale; mat_pkg[2] /= scale; mat_pkg[3] /= scale; mat_pkg[4] /= scale; mat_pkg[5] /= scale; // The characteristic equation is x^3 - c2*x^2 + c1*x - c0 = 0. The // eigenvalues are the roots to this equation, all guaranteed to be // real-valued, because the matrix is symmetric. float c0 = m00() * m11() * m22() + 2.f * m01() * m02() * m12() - m00() * m12() * m12() - m11() * m02() * m02() - m22() * m01() * m01(); float c1 = m00() * m11() - m01() * m01() + m00() * m22() - m02() * m02() + m11() * m22() - m12() * m12(); float c2 = m00() + m11() + m22(); computeRoots3(c0, c1, c2, evals); if (evals.z - evals.x <= numeric_limits::epsilon()) { evecs[0] = make_float3(1.f, 0.f, 0.f); evecs[1] = make_float3(0.f, 1.f, 0.f); evecs[2] = make_float3(0.f, 0.f, 1.f); } else if (evals.y - evals.x <= numeric_limits::epsilon()) { // first and second equal tmp[0] = row0(); tmp[1] = row1(); tmp[2] = row2(); tmp[0].x -= evals.z; tmp[1].y -= evals.z; tmp[2].z -= evals.z; vec_tmp[0] = cross(tmp[0], tmp[1]); vec_tmp[1] = cross(tmp[0], tmp[2]); vec_tmp[2] = cross(tmp[1], tmp[2]); float len1 = dot(vec_tmp[0], vec_tmp[0]); float len2 = dot(vec_tmp[1], vec_tmp[1]); float len3 = dot(vec_tmp[2], vec_tmp[2]); if (len1 >= len2 && len1 >= len3) { evecs[2] = vec_tmp[0] * rsqrtf(len1); } else if (len2 >= len1 && len2 >= len3) { evecs[2] = vec_tmp[1] * rsqrtf(len2); } else { evecs[2] = vec_tmp[2] * rsqrtf(len3); } evecs[1] = unitOrthogonal(evecs[2]); evecs[0] = cross(evecs[1], evecs[2]); } else if (evals.z - evals.y <= numeric_limits::epsilon()) { // second and third equal tmp[0] = row0(); tmp[1] = row1(); tmp[2] = row2(); tmp[0].x -= evals.x; tmp[1].y -= evals.x; tmp[2].z -= evals.x; vec_tmp[0] = cross(tmp[0], tmp[1]); vec_tmp[1] = cross(tmp[0], tmp[2]); vec_tmp[2] = cross(tmp[1], tmp[2]); float len1 = dot(vec_tmp[0], vec_tmp[0]); float len2 = dot(vec_tmp[1], vec_tmp[1]); float len3 = dot(vec_tmp[2], vec_tmp[2]); if (len1 >= len2 && len1 >= len3) { evecs[0] = vec_tmp[0] * rsqrtf(len1); } else if (len2 >= len1 && len2 >= len3) { evecs[0] = vec_tmp[1] * rsqrtf(len2); } else { evecs[0] = vec_tmp[2] * rsqrtf(len3); } evecs[1] = unitOrthogonal(evecs[0]); evecs[2] = cross(evecs[0], evecs[1]); } else { tmp[0] = row0(); tmp[1] = row1(); tmp[2] = row2(); tmp[0].x -= evals.z; tmp[1].y -= evals.z; tmp[2].z -= evals.z; vec_tmp[0] = cross(tmp[0], tmp[1]); vec_tmp[1] = cross(tmp[0], tmp[2]); vec_tmp[2] = cross(tmp[1], tmp[2]); float len1 = dot(vec_tmp[0], vec_tmp[0]); float len2 = dot(vec_tmp[1], vec_tmp[1]); float len3 = dot(vec_tmp[2], vec_tmp[2]); float mmax[3]; unsigned int min_el = 2; unsigned int max_el = 2; if (len1 >= len2 && len1 >= len3) { mmax[2] = len1; evecs[2] = vec_tmp[0] * rsqrtf(len1); } else if (len2 >= len1 && len2 >= len3) { mmax[2] = len2; evecs[2] = vec_tmp[1] * rsqrtf(len2); } else { mmax[2] = len3; evecs[2] = vec_tmp[2] * rsqrtf(len3); } tmp[0] = row0(); tmp[1] = row1(); tmp[2] = row2(); tmp[0].x -= evals.y; tmp[1].y -= evals.y; tmp[2].z -= evals.y; vec_tmp[0] = cross(tmp[0], tmp[1]); vec_tmp[1] = cross(tmp[0], tmp[2]); vec_tmp[2] = cross(tmp[1], tmp[2]); len1 = dot(vec_tmp[0], vec_tmp[0]); len2 = dot(vec_tmp[1], vec_tmp[1]); len3 = dot(vec_tmp[2], vec_tmp[2]); if (len1 >= len2 && len1 >= len3) { mmax[1] = len1; evecs[1] = vec_tmp[0] * rsqrtf(len1); min_el = len1 <= mmax[min_el] ? 1 : min_el; max_el = len1 > mmax[max_el] ? 1 : max_el; } else if (len2 >= len1 && len2 >= len3) { mmax[1] = len2; evecs[1] = vec_tmp[1] * rsqrtf(len2); min_el = len2 <= mmax[min_el] ? 1 : min_el; max_el = len2 > mmax[max_el] ? 1 : max_el; } else { mmax[1] = len3; evecs[1] = vec_tmp[2] * rsqrtf(len3); min_el = len3 <= mmax[min_el] ? 1 : min_el; max_el = len3 > mmax[max_el] ? 1 : max_el; } tmp[0] = row0(); tmp[1] = row1(); tmp[2] = row2(); tmp[0].x -= evals.x; tmp[1].y -= evals.x; tmp[2].z -= evals.x; vec_tmp[0] = cross(tmp[0], tmp[1]); vec_tmp[1] = cross(tmp[0], tmp[2]); vec_tmp[2] = cross(tmp[1], tmp[2]); len1 = dot(vec_tmp[0], vec_tmp[0]); len2 = dot(vec_tmp[1], vec_tmp[1]); len3 = dot(vec_tmp[2], vec_tmp[2]); if (len1 >= len2 && len1 >= len3) { mmax[0] = len1; evecs[0] = vec_tmp[0] * rsqrtf(len1); min_el = len3 <= mmax[min_el] ? 0 : min_el; max_el = len3 > mmax[max_el] ? 0 : max_el; } else if (len2 >= len1 && len2 >= len3) { mmax[0] = len2; evecs[0] = vec_tmp[1] * rsqrtf(len2); min_el = len3 <= mmax[min_el] ? 0 : min_el; max_el = len3 > mmax[max_el] ? 0 : max_el; } else { mmax[0] = len3; evecs[0] = vec_tmp[2] * rsqrtf(len3); min_el = len3 <= mmax[min_el] ? 0 : min_el; max_el = len3 > mmax[max_el] ? 0 : max_el; } unsigned mid_el = 3 - min_el - max_el; evecs[min_el] = normalized(cross(evecs[(min_el + 1) % 3], evecs[(min_el + 2) % 3])); evecs[mid_el] = normalized(cross(evecs[(mid_el + 1) % 3], evecs[(mid_el + 2) % 3])); } // Rescale back to the original size. evals *= scale; } private: volatile float *mat_pkg; __kf_device__ float m00() const { return mat_pkg[0]; } __kf_device__ float m01() const { return mat_pkg[1]; } __kf_device__ float m02() const { return mat_pkg[2]; } __kf_device__ float m10() const { return mat_pkg[1]; } __kf_device__ float m11() const { return mat_pkg[3]; } __kf_device__ float m12() const { return mat_pkg[4]; } __kf_device__ float m20() const { return mat_pkg[2]; } __kf_device__ float m21() const { return mat_pkg[4]; } __kf_device__ float m22() const { return mat_pkg[5]; } __kf_device__ float3 row0() const { return make_float3(m00(), m01(), m02()); } __kf_device__ float3 row1() const { return make_float3(m10(), m11(), m12()); } __kf_device__ float3 row2() const { return make_float3(m20(), m21(), m22()); } __kf_device__ static bool isMuchSmallerThan(float x, float y) { // copied from /include/Eigen/src/Core/NumTraits.h const float prec_sqr = numeric_limits::epsilon() * numeric_limits::epsilon(); return x * x <= prec_sqr * y * y; } }; struct Warp { enum { LOG_WARP_SIZE = 5, WARP_SIZE = 1 << LOG_WARP_SIZE, STRIDE = WARP_SIZE }; /** \brief Returns the warp lane ID of the calling thread. */ static __kf_device__ unsigned int laneId() { unsigned int ret; asm("mov.u32 %0, %laneid;" : "=r"(ret)); return ret; } static __kf_device__ unsigned int id() { int tid = threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; return tid >> LOG_WARP_SIZE; } static __kf_device__ int laneMaskLt() { #if (__CUDA_ARCH__ >= 200) unsigned int ret; asm("mov.u32 %0, %lanemask_lt;" : "=r"(ret)); return ret; #else return 0xFFFFFFFF >> (32 - laneId()); #endif } static __kf_device__ int binaryExclScan(int ballot_mask) { return __popc(Warp::laneMaskLt() & ballot_mask); } }; struct Block { static __kf_device__ unsigned int stride() { return blockDim.x * blockDim.y * blockDim.z; } static __kf_device__ int flattenedThreadId() { return threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; } template static __kf_device__ void reduce(volatile T *buffer, BinOp op) { int tid = flattenedThreadId(); T val = buffer[tid]; if (CTA_SIZE >= 1024) { if (tid < 512) buffer[tid] = val = op(val, buffer[tid + 512]); __syncthreads(); } if (CTA_SIZE >= 512) { if (tid < 256) buffer[tid] = val = op(val, buffer[tid + 256]); __syncthreads(); } if (CTA_SIZE >= 256) { if (tid < 128) buffer[tid] = val = op(val, buffer[tid + 128]); __syncthreads(); } if (CTA_SIZE >= 128) { if (tid < 64) buffer[tid] = val = op(val, buffer[tid + 64]); __syncthreads(); } if (tid < 32) { if (CTA_SIZE >= 64) { buffer[tid] = val = op(val, buffer[tid + 32]); } if (CTA_SIZE >= 32) { buffer[tid] = val = op(val, buffer[tid + 16]); } if (CTA_SIZE >= 16) { buffer[tid] = val = op(val, buffer[tid + 8]); } if (CTA_SIZE >= 8) { buffer[tid] = val = op(val, buffer[tid + 4]); } if (CTA_SIZE >= 4) { buffer[tid] = val = op(val, buffer[tid + 2]); } if (CTA_SIZE >= 2) { buffer[tid] = val = op(val, buffer[tid + 1]); } } } template static __kf_device__ T reduce(volatile T *buffer, T init, BinOp op) { int tid = flattenedThreadId(); T val = buffer[tid] = init; __syncthreads(); if (CTA_SIZE >= 1024) { if (tid < 512) buffer[tid] = val = op(val, buffer[tid + 512]); __syncthreads(); } if (CTA_SIZE >= 512) { if (tid < 256) buffer[tid] = val = op(val, buffer[tid + 256]); __syncthreads(); } if (CTA_SIZE >= 256) { if (tid < 128) buffer[tid] = val = op(val, buffer[tid + 128]); __syncthreads(); } if (CTA_SIZE >= 128) { if (tid < 64) buffer[tid] = val = op(val, buffer[tid + 64]); __syncthreads(); } if (tid < 32) { #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 && 0 if (CTA_SIZE >= 64) val = op(val, buffer[tid + 32]); if (CTA_SIZE >= 32) val = op(val, __shfl_xor(val, 16)); if (CTA_SIZE >= 16) val = op(val, __shfl_xor(val, 8)); if (CTA_SIZE >= 8) val = op(val, __shfl_xor(val, 4)); if (CTA_SIZE >= 4) val = op(val, __shfl_xor(val, 2)); if (CTA_SIZE >= 2) buffer[tid] = op(val, __shfl_xor(val, 1)); #else if (CTA_SIZE >= 64) { buffer[tid] = val = op(val, buffer[tid + 32]); } if (CTA_SIZE >= 32) { buffer[tid] = val = op(val, buffer[tid + 16]); } if (CTA_SIZE >= 16) { buffer[tid] = val = op(val, buffer[tid + 8]); } if (CTA_SIZE >= 8) { buffer[tid] = val = op(val, buffer[tid + 4]); } if (CTA_SIZE >= 4) { buffer[tid] = val = op(val, buffer[tid + 2]); } if (CTA_SIZE >= 2) { buffer[tid] = val = op(val, buffer[tid + 1]); } #endif } __syncthreads(); return buffer[0]; } }; struct Emulation { static __kf_device__ int warp_reduce(volatile int *ptr, const unsigned int tid) { const unsigned int lane = tid & 31; // index of thread in warp (0..31) if (lane < 16) { int partial = ptr[tid]; ptr[tid] = partial = partial + ptr[tid + 16]; ptr[tid] = partial = partial + ptr[tid + 8]; ptr[tid] = partial = partial + ptr[tid + 4]; ptr[tid] = partial = partial + ptr[tid + 2]; ptr[tid] = partial = partial + ptr[tid + 1]; } return ptr[tid - lane]; } static __kf_device__ int Ballot(int predicate, volatile int *cta_buffer) { #if __CUDA_ARCH__ >= 200 (void) cta_buffer; return __ballot_sync(FULL_MASK, predicate); #else int tid = Block::flattenedThreadId(); cta_buffer[tid] = predicate ? (1 << (tid & 31)) : 0; return warp_reduce(cta_buffer, tid); #endif } static __kf_device__ bool All(int predicate, volatile int *cta_buffer) { #if __CUDA_ARCH__ >= 200 (void) cta_buffer; return __all_sync(FULL_MASK, predicate); #else int tid = Block::flattenedThreadId(); cta_buffer[tid] = predicate ? 1 : 0; return warp_reduce(cta_buffer, tid) == 32; #endif } }; } // namespace device } // namespace kfusion ================================================ FILE: include/kfusion/cuda/texture_binder.hpp ================================================ #pragma once #include #include namespace kfusion { namespace cuda { class TextureBinder { public: template TextureBinder(const DeviceArray2D &arr, const struct texture &tex) : texref(&tex) { cudaChannelFormatDesc desc = cudaCreateChannelDesc(); cudaSafeCall(cudaBindTexture2D(0, tex, arr.ptr(), desc, arr.cols(), arr.rows(), arr.step())); } template TextureBinder(const DeviceArray &arr, const struct texture &tex) : texref(&tex) { cudaChannelFormatDesc desc = cudaCreateChannelDesc(); cudaSafeCall(cudaBindTexture(0, tex, arr.ptr(), desc, arr.sizeBytes())); } template TextureBinder(const PtrStepSz &arr, const struct texture &tex) : texref(&tex) { cudaChannelFormatDesc desc = cudaCreateChannelDesc(); cudaSafeCall(cudaBindTexture2D(0, tex, arr.data, desc, arr.cols, arr.rows, arr.step)); } template TextureBinder(const A &arr, const struct texture &tex, const cudaChannelFormatDesc &desc) : texref(&tex) { cudaSafeCall(cudaBindTexture2D(0, tex, arr.data, desc, arr.cols, arr.rows, arr.step)); } template TextureBinder(const PtrSz &arr, const struct texture &tex) : texref(&tex) { cudaChannelFormatDesc desc = cudaCreateChannelDesc(); cudaSafeCall(cudaBindTexture(0, tex, arr.data, desc, arr.size * arr.elemSize())); } ~TextureBinder() { cudaSafeCall(cudaUnbindTexture(texref)); } private: const struct textureReference *texref; }; } // namespace cuda namespace device { using kfusion::cuda::TextureBinder; } } // namespace kfusion ================================================ FILE: include/kfusion/cuda/tsdf_volume.hpp ================================================ #pragma once /* kinfu includes */ #include /* pcl includes */ #include /* sobfu includes */ #include /* sys headers */ #include namespace kfusion { namespace cuda { class KF_EXPORTS TsdfVolume { public: TsdfVolume(const Params ¶ms); virtual ~TsdfVolume(); void create(const Vec3i &dims); Vec3i getDims() const; Vec3f getVoxelSize() const; const CudaData data() const; CudaData data(); Vec3f getSize() const; void setSize(const Vec3f &size); float getTruncDist() const; void setTruncDist(float &distance); float getEta() const; void setEta(float &eta); float getMaxWeight() const; void setMaxWeight(float &weight); Affine3f getPose() const; void setPose(const Affine3f &pose); float getRaycastStepFactor() const; void setRaycastStepFactor(float &factor); float getGradientDeltaFactor() const; void setGradientDeltaFactor(float &factor); Vec3i getGridOrigin() const; void setGridOrigin(const Vec3i &origin); virtual void clear(); void swap(CudaData &data); virtual void applyAffine(const Affine3f &affine); virtual void integrate(const TsdfVolume &phi_n_psi); virtual void integrate(const Dists &dists, const Affine3f &camera_pose, const Intr &intr); virtual void initBox(const float3 &b); virtual void initEllipsoid(const float3 &r); virtual void initPlane(const float &z); virtual void initSphere(const float3 ¢re, const float &radius); virtual void initTorus(const float2 &t); void print_sdf_values(); struct Entry { typedef unsigned short half; half tsdf; int weight; static float half2float(half value); static half float2half(float value); }; private: CudaData data_; float trunc_dist_; float eta_; float max_weight_; Vec3i dims_; Vec3f size_; Affine3f pose_; float gradient_delta_factor_; float raycast_step_factor_; }; } // namespace cuda } // namespace kfusion ================================================ FILE: include/kfusion/exports.hpp ================================================ #pragma once #if (defined WIN32 || defined _WIN32 || defined WINCE) && defined KFUSION_API_EXPORTS #define KF_EXPORTS __declspec(dllexport) #else #define KF_EXPORTS #endif ================================================ FILE: include/kfusion/internal.hpp ================================================ #pragma once /* kinfu includes */ #include #include /* pcl includes */ #include //#define USE_DEPTH struct Mat4f { float4 data[4]; }; namespace kfusion { namespace device { typedef float4 Normal; typedef float4 Point; typedef unsigned short ushort; typedef unsigned char uchar; typedef PtrStepSz Dists; typedef DeviceArray2D Depth; typedef DeviceArray2D Normals; typedef DeviceArray2D Points; typedef DeviceArray2D Image; typedef float3 *Displacements; typedef DeviceArray Vertices; typedef Vertices Norms; /* * struct which holds a surface as triangles */ struct Surface { Vertices vertices; Norms normals; }; struct float8 { float x, y, z, w, c1, c2, c3, c4; }; typedef float4 PointType; typedef int3 Vec3i; typedef float3 Vec3f; struct Mat3f { float3 data[3]; }; struct Aff3f { Mat3f R; Vec3f t; }; struct TsdfVolume { public: float2 *const data; const int3 dims; const float3 voxel_size; const float trunc_dist; const float eta; const float max_weight; TsdfVolume(float2 *const data, int3 dims, float3 voxel_size, float trunc_dist, float eta, float max_weight); // TsdfVolume(const TsdfVolume &); TsdfVolume &operator=(const TsdfVolume &); __kf_device__ float2 *operator()(int x, int y, int z); __kf_device__ const float2 *operator()(int x, int y, int z) const; __kf_device__ float2 *beg(int x, int y) const; __kf_device__ float2 *zstep(float2 *const ptr) const; }; struct Projector { float2 f, c; Projector() {} Projector(float fx, float fy, float cx, float cy); __kf_device__ float2 operator()(const float3 &p) const; }; struct Reprojector { Reprojector() {} Reprojector(float fx, float fy, float cx, float cy); float2 finv, c; __kf_device__ float3 operator()(int x, int y, float z) const; }; struct CubeIndexEstimator { CubeIndexEstimator(const TsdfVolume &vol) : volume(vol) { isoValue = 0.f; } const TsdfVolume volume; float isoValue; __kf_device__ int computeCubeIndex(int x, int y, int z, float f[8]) const; __kf_device__ void readTsdf(int x, int y, int z, float &f, float &weight) const; }; struct OccupiedVoxels : public CubeIndexEstimator { OccupiedVoxels(const TsdfVolume &vol) : CubeIndexEstimator({vol}) {} enum { CTA_SIZE_X = 32, CTA_SIZE_Y = 8, CTA_SIZE = CTA_SIZE_X * CTA_SIZE_Y, LOG_WARP_SIZE = 5, WARP_SIZE = 1 << LOG_WARP_SIZE, WARPS_COUNT = CTA_SIZE / WARP_SIZE, }; mutable int *voxels_indices; mutable int *vertices_number; int max_size; __kf_device__ void operator()() const; }; struct TrianglesGenerator : public CubeIndexEstimator { TrianglesGenerator(const TsdfVolume &vol) : CubeIndexEstimator({vol}) {} enum { CTA_SIZE = 256, MAX_GRID_SIZE_X = 65536 }; const int *occupied_voxels; const int *vertex_ofssets; int voxels_count; float3 cell_size; Aff3f pose; mutable device::PointType *outputVertices; mutable device::PointType *outputNormals; __kf_device__ void operator()() const; __kf_device__ void store_point(float4 *ptr, int index, const float3 &vertex) const; __kf_device__ float3 get_node_coo(int x, int y, int z) const; __kf_device__ float3 vertex_interp(float3 p0, float3 p1, float f0, float f1) const; }; // namespace device struct ComputeIcpHelper { struct Policy; struct PageLockHelper { float *data; PageLockHelper(); ~PageLockHelper(); }; float min_cosine; float dist2_thres; Aff3f aff; float rows, cols; float2 f, c, finv; PtrStep dcurr; PtrStep ncurr; PtrStep vcurr; ComputeIcpHelper(float dist_thres, float angle_thres); void setLevelIntr(int level_index, float fx, float fy, float cx, float cy); void operator()(const Depth &dprev, const Normals &nprev, DeviceArray2D &buffer, float *data, cudaStream_t stream); void operator()(const Points &vprev, const Normals &nprev, DeviceArray2D &buffer, float *data, cudaStream_t stream); static void allocate_buffer(DeviceArray2D &buffer, int partials_count = -1); // private: __kf_device__ int find_coresp(int x, int y, float3 &n, float3 &d, float3 &s) const; __kf_device__ void partial_reduce(const float row[7], PtrStep &partial_buffer) const; __kf_device__ float2 proj(const float3 &p) const; __kf_device__ float3 reproj(float x, float y, float z) const; }; /* * TSDF FUNCTIONS */ void clear_volume(TsdfVolume &volume); void integrate(TsdfVolume &phi_global, TsdfVolume &phi_n_psi); void integrate(const Dists &depth, TsdfVolume &volume, const Aff3f &aff, const Projector &proj); void init_box(TsdfVolume &volume, const float3 &b); void init_ellipsoid(TsdfVolume &volume, const float3 &r); void init_plane(TsdfVolume &volume, const float &z); void init_sphere(TsdfVolume &volume, const float3 ¢re, const float &radius); void init_torus(TsdfVolume &volume, const float2 &t); /* * MARCHING CUBES FUNCTIONS */ /** \brief binds marching cubes tables to texture references */ void bindTextures(const int *edgeBuf, const int *triBuf, const int *numVertsBuf); /** \brief unbinds */ void unbindTextures(); /** \brief scans TSDF volume and retrieves occuped voxes * \param[in] volume TSDF volume * \param[out] occupied_voxels buffer for occupied voxels; the function fills the first row with voxel id's and second * row with the no. of vertices * \return number of voxels in the buffer */ int getOccupiedVoxels(const TsdfVolume &volume, DeviceArray2D &occupied_voxels); /** \brief computes total number of vertices for all voxels and offsets of vertices in final triangle array * \param[out] occupied_voxels buffer with occupied voxels; the function fills 3rd only with offsets * \return total number of vertexes */ int computeOffsetsAndTotalVertices(DeviceArray2D &occupied_voxels); /** \brief generates final triangle array * \param[in] volume TSDF volume * \param[in] occupied_voxels occupied voxel ids (1st row), number of vertices (2nd row), offsets (3rd row). * \param[in] volume_size volume size in meters * \param[out] output triangle array */ void generateTriangles(const TsdfVolume &volume, const DeviceArray2D &occupied_voxels, const float3 &volume_size, const Aff3f &pose, DeviceArray &outputVertices, DeviceArray &outputNormals); /* * IMAGE PROCESSING FUNCTIONS */ void compute_dists(const Depth &depth, Dists dists, float2 f, float2 c); void truncateDepth(Depth &depth, float max_dist /*meters*/); void bilateralFilter(const Depth &src, Depth &dst, int kernel_size, float sigma_spatial, float sigma_depth); void depthPyr(const Depth &source, Depth &pyramid, float sigma_depth); void resizeDepthNormals(const Depth &depth, const Normals &normals, Depth &depth_out, Normals &normals_out); void resizePointsNormals(const Points &points, const Normals &normals, Points &points_out, Normals &normals_out); void computeNormalsAndMaskDepth(const Reprojector &reproj, Depth &depth, Normals &normals); void computePointNormals(const Reprojector &reproj, const Depth &depth, Points &points, Normals &normals); /** \brief rasterises the surface extracted via marching cubes * \param[in] proj * \param[in] surface * \param[out] vertices_out * \paraim[out] normals_out */ void rasteriseSurface(const Projector &proj, const Aff3f &vol2cam, const Surface &surface, Points &vertices_out, Normals &normals_out); } // namespace device } // namespace kfusion ================================================ FILE: include/kfusion/kinfu.hpp ================================================ #pragma once /* boost includes */ #include /* kinfu includes */ #include #include #include #include /* pcl includes */ #include #include #include #include #include /* sys headers */ #include #include namespace kfusion { namespace cuda { KF_EXPORTS int getCudaEnabledDeviceCount(); KF_EXPORTS void setDevice(int device); KF_EXPORTS std::string getDeviceName(int device); KF_EXPORTS bool checkIfPreFermiGPU(int device); KF_EXPORTS void printCudaDeviceInfo(int device); KF_EXPORTS void printShortCudaDeviceInfo(int device); } // namespace cuda struct KF_EXPORTS KinFuParams { static KinFuParams default_params(); int cols; // pixels int rows; // pixels Intr intr; // Camera parameters Vec3i volume_dims; // number of voxels Vec3f volume_size; // meters Affine3f volume_pose; // meters, inital pose float bilateral_sigma_depth; // meters float bilateral_sigma_spatial; // pixels int bilateral_kernel_size; // pixels float icp_truncate_depth_dist; // meters float icp_dist_thres; // meters float icp_angle_thres; // radians std::vector icp_iter_num; // iterations for level index 0,1,..,3 float tsdf_min_camera_movement; // meters, integrate only if exceedes float tsdf_trunc_dist; // meters; float tsdf_max_weight; // frames float raycast_step_factor; // in voxel sizes float gradient_delta_factor; // in voxel sizes Vec3f light_pose; // meters }; class KF_EXPORTS KinFu { public: typedef cv::Ptr Ptr; KinFu(const KinFuParams ¶ms); const KinFuParams ¶ms() const; KinFuParams ¶ms(); const cuda::TsdfVolume &tsdf() const; cuda::TsdfVolume &tsdf(); const cuda::ProjectiveICP &icp() const; cuda::ProjectiveICP &icp(); const cuda::MarchingCubes &mc() const; cuda::MarchingCubes &mc(); void reset(); bool operator()(const cuda::Depth &depth, const cuda::Image &image = cuda::Image()); void renderImage(cuda::Image &image, int flag = 0); void renderImage(cuda::Image &image, const Affine3f &pose, int flag = 0); Affine3f getCameraPose(int time = -1) const; protected: void allocate_buffers(); int frame_counter_; KinFuParams params_; std::vector poses_; cuda::Dists dists_; cuda::Frame curr_, prev_; cuda::Cloud points_; cuda::Normals normals_; cuda::Depth depths_; cv::Ptr volume_; cv::Ptr icp_; cv::Ptr mc_; }; } // namespace kfusion ================================================ FILE: include/kfusion/precomp.hpp ================================================ #pragma once /* cuda includes */ #include /* kinfu includes */ #include #include #include #include #include #include #include /* sys headers */ #include namespace kfusion { template inline D device_cast(const S &source) { return *reinterpret_cast(source.val); } template <> inline device::Aff3f device_cast(const Affine3f &source) { device::Aff3f aff; Mat3f R = source.rotation(); Vec3f t = source.translation(); aff.R = device_cast(R); aff.t = device_cast(t); return aff; } } // namespace kfusion ================================================ FILE: include/kfusion/safe_call.hpp ================================================ #pragma once #include namespace kfusion { namespace cuda { void error(const char *error_string, const char *file, const int line, const char *func); } } // namespace kfusion #if defined(__GNUC__) #define cudaSafeCall(expr) kfusion::cuda::___cudaSafeCall(expr, __FILE__, __LINE__, __func__) #else /* defined(__CUDACC__) || defined(__MSVC__) */ #define cudaSafeCall(expr) kfusion::cuda::___cudaSafeCall(expr, __FILE__, __LINE__) #endif namespace kfusion { namespace cuda { static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "") { if (cudaSuccess != err) error(cudaGetErrorString(err), file, line, func); } static inline int divUp(int total, int grain) { return (total + grain - 1) / grain; } } // namespace cuda namespace device { using kfusion::cuda::divUp; } } // namespace kfusion ================================================ FILE: include/kfusion/types.hpp ================================================ #pragma once /* cuda includes */ #include /* kinfu includes */ #include /* pcl includes */ #include /* opencv includes */ #include #include #include /* sys headers */ #include struct CUevent_st; namespace kfusion { typedef cv::Matx33f Mat3f; typedef cv::Vec3f Vec3f; typedef cv::Vec3i Vec3i; typedef cv::Affine3f Affine3f; struct KF_EXPORTS Intr { float fx, fy, cx, cy; Intr(); Intr(float fx, float fy, float cx, float cy); Intr operator()(int level_index) const; }; KF_EXPORTS std::ostream &operator<<(std::ostream &os, const Intr &intr); struct Point { union { float data[4]; struct { float x, y, z; }; }; Point() = default; Point(float x, float y, float z) { this->x = x; this->y = y; this->z = z; } }; typedef Point Normal; struct RGB { union { struct { unsigned char b, g, r; }; int bgra; }; }; struct PixelRGB { unsigned char r, g, b; }; namespace cuda { typedef cuda::DeviceMemory CudaData; typedef cuda::CudaData Displacements; typedef cuda::DeviceArray2D Depth; typedef cuda::DeviceArray2D Dists; typedef cuda::DeviceArray2D Image; typedef cuda::DeviceArray2D Normals; typedef cuda::DeviceArray2D Cloud; typedef cuda::DeviceArray Vertices; typedef cuda::DeviceArray Norms; /* * struct used to hold the surface as triangles */ struct Surface { Vertices vertices; Norms normals; }; struct Frame { bool use_points; std::vector depth_pyr; std::vector points_pyr; std::vector normals_pyr; }; } // namespace cuda inline float deg2rad(float alpha) { return alpha * 0.017453293f; } struct KF_EXPORTS ScopeTime { const char *name; double start; ScopeTime(const char *name); ~ScopeTime(); }; struct KF_EXPORTS SampledScopeTime { public: enum { EACH = 34 }; SampledScopeTime(double &time_ms); ~SampledScopeTime(); private: double getTime(); SampledScopeTime(const SampledScopeTime &); SampledScopeTime &operator=(const SampledScopeTime &); double &time_ms_; double start; }; } // namespace kfusion ================================================ FILE: include/sobfu/cuda/utils.hpp ================================================ #pragma once /* cuda includes */ #include #include /* kinfu includes */ #include /* utility class used to avoid linker errors with extern unsized shared memory arrays with templated type */ template struct SharedMemory { __device__ inline operator T *() { extern __shared__ int __smem[]; return (T *) __smem; } __device__ inline operator const T *() const { extern __shared__ int __smem[]; return (T *) __smem; } }; /* get 1d index from 3d coordinates */ __device__ __forceinline__ int get_global_idx(int x, int y, int z, int dim_x, int dim_y) { return z * dim_y * dim_y + y * dim_x + x; } /* * LERP */ template __host__ __device__ __forceinline__ T lerp(T v0, T v1, T t) { return fma(t, v0, fma(-t, v1, v1)); } __host__ __device__ __forceinline__ float3 lerp3f(float3 v0, float3 v1, float t) { return make_float3(lerp(v0.x, v1.x, t), lerp(v0.y, v1.y, t), lerp(v0.z, v1.z, t)); } __host__ __device__ __forceinline__ float4 lerp4f(float4 v0, float4 v1, float t) { return make_float4(lerp(v0.x, v1.x, t), lerp(v0.y, v1.y, t), lerp(v0.z, v1.z, t), 0.f); } /* * INTERPOLATION */ template __device__ __forceinline__ float2 interpolate_tsdf(const Vol &volume, const float3 &p_voxels) { float3 cf = p_voxels; cf.x = fminf(fmaxf(0.f, cf.x), (float) volume.dims.x - 1); cf.y = fminf(fmaxf(0.f, cf.y), (float) volume.dims.y - 1); cf.z = fminf(fmaxf(0.f, cf.z), (float) volume.dims.z - 1); /* rounding to negative infinity */ int3 g = make_int3(__float2int_rd(cf.x), __float2int_rd(cf.y), __float2int_rd(cf.z)); int idx_x_1 = g.x + 1; if (cf.x == 0.f || cf.x == (float) volume.dims.x - 1) { idx_x_1--; } int idx_y_1 = g.y + 1; if (cf.y == 0.f || cf.y == (float) volume.dims.y - 1) { idx_y_1--; } int idx_z_1 = g.z + 1; if (cf.z == 0.f || cf.z == (float) volume.dims.z - 1) { idx_z_1--; } float a = cf.x - g.x; float b = cf.y - g.y; float c = cf.z - g.z; float tsdf = lerp(lerp(lerp((*volume(idx_x_1, idx_y_1, idx_z_1)).x, (*volume(idx_x_1, idx_y_1, g.z + 0)).x, c), lerp((*volume(idx_x_1, g.y + 0, idx_z_1)).x, (*volume(idx_x_1, g.y + 0, g.z + 0)).x, c), b), lerp(lerp((*volume(g.x + 0, idx_y_1, idx_z_1)).x, (*volume(g.x + 0, idx_y_1, g.z + 0)).x, c), lerp((*volume(g.x + 0, g.y + 0, idx_z_1)).x, (*volume(g.x + 0, g.y + 0, g.z + 0)).x, c), b), a); float weight = (*volume(g.x, g.y, g.z)).y; return make_float2(tsdf, weight); } template __device__ __forceinline__ float4 interpolate_field(const Field &field, const float3 &p_voxels) { float3 cf = p_voxels; cf.x = fminf(fmaxf(0.f, cf.x), (float) field.dims.x - 1); cf.y = fminf(fmaxf(0.f, cf.y), (float) field.dims.y - 1); cf.z = fminf(fmaxf(0.f, cf.z), (float) field.dims.z - 1); /* rounding to negative infinity */ int3 g = make_int3(__float2int_rd(cf.x), __float2int_rd(cf.y), __float2int_rd(cf.z)); int idx_x_1 = g.x + 1; if (cf.x == 0.f || cf.x == (float) field.dims.x - 1) { idx_x_1--; } int idx_y_1 = g.y + 1; if (cf.y == 0.f || cf.y == (float) field.dims.y - 1) { idx_y_1--; } int idx_z_1 = g.z + 1; if (cf.z == 0.f || cf.z == (float) field.dims.z - 1) { idx_z_1--; } float a = cf.x - g.x; float b = cf.y - g.y; float c = cf.z - g.z; float4 result = lerp4f(lerp4f(lerp4f(*field(idx_x_1, idx_y_1, idx_z_1), *field(idx_x_1, idx_y_1, g.z + 0), c), lerp4f(*field(idx_x_1, g.y + 0, idx_z_1), *field(idx_x_1, g.y + 0, g.z + 0), c), b), lerp4f(lerp4f(*field(g.x + 0, idx_y_1, idx_z_1), *field(g.x + 0, idx_y_1, g.z + 0), c), lerp4f(*field(g.x + 0, g.y + 0, idx_z_1), *field(g.x + 0, g.y + 0, g.z + 0), c), b), a); return result; } template __device__ __forceinline__ float4 interpolate_field_inv(const Field &field, const float3 &p_voxels) { float3 cf = p_voxels; cf.x = fminf(fmaxf(0.f, cf.x), (float) field.dims.x - 1); cf.y = fminf(fmaxf(0.f, cf.y), (float) field.dims.y - 1); cf.z = fminf(fmaxf(0.f, cf.z), (float) field.dims.z - 1); /* rounding to negative infinity */ int3 g = make_int3(__float2int_rd(cf.x), __float2int_rd(cf.y), __float2int_rd(cf.z)); int idx_x_1 = g.x + 1; if (cf.x == 0.f || cf.x == (float) field.dims.x - 1) { idx_x_1--; } int idx_y_1 = g.y + 1; if (cf.y == 0.f || cf.y == (float) field.dims.y - 1) { idx_y_1--; } int idx_z_1 = g.z + 1; if (cf.z == 0.f || cf.z == (float) field.dims.z - 1) { idx_z_1--; } float a = cf.x - g.x; float b = cf.y - g.y; float c = cf.z - g.z; float4 result = lerp4f(lerp4f(lerp4f(field.get_displacement(idx_x_1, idx_y_1, idx_z_1), field.get_displacement(idx_x_1, idx_y_1, g.z + 0), c), lerp4f(field.get_displacement(idx_x_1, g.y + 0, idx_z_1), field.get_displacement(idx_x_1, g.y + 0, g.z + 0), c), b), lerp4f(lerp4f(field.get_displacement(g.x + 0, idx_y_1, idx_z_1), field.get_displacement(g.x + 0, idx_y_1, g.z + 0), c), lerp4f(field.get_displacement(g.x + 0, g.y + 0, idx_z_1), field.get_displacement(g.x + 0, g.y + 0, g.z + 0), c), b), a); return result; } __device__ __forceinline__ float4 interpolate_field(const float4 *field, const float3 &p_voxels, int dim_x, int dim_y, int dim_z) { float3 cf = p_voxels; cf.x = fminf(fmaxf(0.f, cf.x), (float) dim_x - 1); cf.y = fminf(fmaxf(0.f, cf.y), (float) dim_y - 1); cf.z = fminf(fmaxf(0.f, cf.z), (float) dim_z - 1); /* rounding to negative infinity */ int3 g = make_int3(__float2int_rd(cf.x), __float2int_rd(cf.y), __float2int_rd(cf.z)); int idx_x_1 = g.x + 1; if (cf.x == 0.f || cf.x == (float) dim_x - 1) { idx_x_1--; } int idx_y_1 = g.y + 1; if (cf.y == 0.f || cf.y == (float) dim_y - 1) { idx_y_1--; } int idx_z_1 = g.z + 1; if (cf.z == 0.f || cf.z == (float) dim_z - 1) { idx_z_1--; } float a = cf.x - g.x; float b = cf.y - g.y; float c = cf.z - g.z; float4 result = lerp4f(lerp4f(lerp4f(field[get_global_idx(idx_x_1, idx_y_1, idx_z_1, dim_x, dim_y)], field[get_global_idx(idx_x_1, idx_y_1, g.z + 0, dim_x, dim_y)], c), lerp4f(field[get_global_idx(idx_x_1, g.y + 0, idx_z_1, dim_x, dim_y)], field[get_global_idx(idx_x_1, g.y + 0, g.z + 0, dim_x, dim_y)], c), b), lerp4f(lerp4f(field[get_global_idx(g.x + 0, idx_y_1, idx_z_1, dim_x, dim_y)], field[get_global_idx(g.x + 0, idx_y_1, g.z + 0, dim_x, dim_y)], c), lerp4f(field[get_global_idx(g.x + 0, g.y + 0, idx_z_1, dim_x, dim_y)], field[get_global_idx(g.x + 0, g.y + 0, g.z + 0, dim_x, dim_y)], c), b), a); return result; } /* * FLOAT2 */ __device__ __forceinline__ float norm(float2 v) { return __fsqrt_rn(__fadd_rn(__fmul_rn(v.x, v.x), __fmul_rn(v.y, v.y))); } /* * FLOAT3 */ __host__ __device__ __forceinline__ float3 &operator-=(float3 &v1, const float3 &v2) { v1.x -= v2.x; v1.y -= v2.y; v1.z -= v2.z; return v1; } __device__ __forceinline__ float3 operator/(const float3 &v, const float &d) { return make_float3(__fdividef(v.x, d), __fdividef(v.y, d), __fdividef(v.z, d)); } __device__ __forceinline__ float n(const float3 &v) { return __fsqrt_rd(__fmul_rn(v.x, v.x) + __fmul_rn(v.y, v.y) + __fmul_rn(v.z, v.z)); } __device__ __forceinline__ float3 sq(const float3 &v) { return make_float3(__fmul_rn(v.x, v.x), __fmul_rn(v.y, v.y), __fmul_rn(v.z, v.z)); } __device__ __forceinline__ float rms_epsilon(const float3 &v) { return __fsqrt_rn(v.x + v.y + v.z + 1e-5f); } /* * FLOAT4 */ __device__ __forceinline__ float4 operator+(const float4 &v1, const float4 &v2) { return make_float4(__fadd_rn(v1.x, v2.x), __fadd_rn(v1.y, v2.y), __fadd_rn(v1.z, v2.z), 0.f); } __device__ __forceinline__ float4 operator-(const float4 &v1, const float4 &v2) { return make_float4(__fadd_rn(v1.x, -v2.x), __fadd_rn(v1.y, -v2.y), __fadd_rn(v1.z, -v2.z), 0.f); } __host__ __device__ __forceinline__ float4 &operator+=(float4 &v1, const float4 &v2) { v1.x += v2.x; v1.y += v2.y; v1.z += v2.z; return v1; } __host__ __device__ __forceinline__ float4 &operator-=(float4 &v1, const float4 &v2) { v1.x -= v2.x; v1.y -= v2.y; v1.z -= v2.z; return v1; } __device__ __forceinline__ float4 operator*(const float4 &v, const float &m) { return make_float4(__fmul_rn(v.x, m), __fmul_rn(v.y, m), __fmul_rn(v.z, m), 0.f); } __device__ __forceinline__ float4 operator*(const float &m, const float4 &v) { return v * m; } __device__ __forceinline__ float4 operator/(const float4 &v, const float &d) { return make_float4(__fdividef(v.x, d), __fdividef(v.y, d), __fdividef(v.z, d), 0.f); } __device__ __forceinline__ float4 operator/(const float &d, const float4 &v) { return v / d; } __device__ __forceinline__ float norm(const float4 &v) { return __fsqrt_rd(__fmul_rn(v.x, v.x) + __fmul_rn(v.y, v.y) + __fmul_rn(v.z, v.z)); } __device__ __forceinline__ float norm_sq(const float4 &v) { return __fmul_rn(v.x, v.x) + __fmul_rn(v.y, v.y) + __fmul_rn(v.z, v.z); } __device__ __forceinline__ float4 normalised(const float4 &v) { float n = norm(v); return (n > 1e-5f) ? v / n : v; } __host__ __device__ __forceinline__ float3 trunc(const float4 &f) { return make_float3(f.x, f.y, f.z); } __device__ __forceinline__ float4 sq(const float4 &v) { return make_float4(__fmul_rn(v.x, v.x), __fmul_rn(v.y, v.y), __fmul_rn(v.z, v.z), 0.f); } __device__ __forceinline__ float rms_epsilon(const float4 &v) { return __fsqrt_rd(__fadd_rn(__fadd_rn(v.x, __fadd_rn(v.y, v.z)), 1e-5f)); } /* * MAT3F */ __device__ __forceinline__ kfusion::device::Mat3f operator+(kfusion::device::Mat3f M1, kfusion::device::Mat3f M2) { kfusion::device::Mat3f N; N.data[0].x = __fadd_rn(M1.data[0].x, M2.data[0].x); N.data[0].y = __fadd_rn(M1.data[0].y, M2.data[0].y); N.data[0].z = __fadd_rn(M1.data[0].z, M2.data[0].z); N.data[1].x = __fadd_rn(M1.data[1].x, M2.data[1].x); N.data[1].y = __fadd_rn(M1.data[1].y, M2.data[1].y); N.data[1].z = __fadd_rn(M1.data[1].z, M2.data[1].z); N.data[2].x = __fadd_rn(M1.data[2].x, M2.data[2].x); N.data[2].y = __fadd_rn(M1.data[2].y, M2.data[2].y); N.data[2].z = __fadd_rn(M1.data[2].z, M2.data[2].z); return N; } __device__ __forceinline__ kfusion::device::Mat3f operator-(kfusion::device::Mat3f M1, kfusion::device::Mat3f M2) { kfusion::device::Mat3f N; N.data[0].x = __fsub_rn(M1.data[0].x, M2.data[0].x); N.data[0].y = __fsub_rn(M1.data[0].y, M2.data[0].y); N.data[0].z = __fsub_rn(M1.data[0].z, M2.data[0].z); N.data[1].x = __fsub_rn(M1.data[1].x, M2.data[1].x); N.data[1].y = __fsub_rn(M1.data[1].y, M2.data[1].y); N.data[1].z = __fsub_rn(M1.data[1].z, M2.data[1].z); N.data[2].x = __fsub_rn(M1.data[2].x, M2.data[2].x); N.data[2].y = __fsub_rn(M1.data[2].y, M2.data[2].y); N.data[2].z = __fsub_rn(M1.data[2].z, M2.data[2].z); return N; } __device__ __forceinline__ kfusion::device::Mat3f operator*(kfusion::device::Mat3f M, float m) { kfusion::device::Mat3f N; N.data[0].x = __fmul_rn(M.data[0].x, m); N.data[0].y = __fmul_rn(M.data[0].y, m); N.data[0].z = __fmul_rn(M.data[0].z, m); N.data[1].x = __fmul_rn(M.data[1].x, m); N.data[1].y = __fmul_rn(M.data[1].y, m); N.data[1].z = __fmul_rn(M.data[1].z, m); N.data[2].x = __fmul_rn(M.data[2].x, m); N.data[2].y = __fmul_rn(M.data[2].y, m); N.data[2].z = __fmul_rn(M.data[2].z, m); return N; } __device__ __forceinline__ kfusion::device::Mat3f operator/(kfusion::device::Mat3f M, float d) { return M * 1.f / d; } /* * MAT4F */ __device__ __forceinline__ float4 operator*(Mat4f M, float4 v) { return make_float4( __fadd_rn(__fadd_rn(__fmul_rn(M.data[0].x, v.x), __fmul_rn(M.data[0].y, v.y)), __fmul_rn(M.data[0].z, v.z)), __fadd_rn(__fadd_rn(__fmul_rn(M.data[1].x, v.x), __fmul_rn(M.data[1].y, v.y)), __fmul_rn(M.data[1].z, v.z)), __fadd_rn(__fadd_rn(__fmul_rn(M.data[2].x, v.x), __fmul_rn(M.data[2].y, v.y)), __fmul_rn(M.data[2].z, v.z)), 0.f); } __device__ __forceinline__ float det(Mat4f M) { return __fsub_rn(__fadd_rn(__fadd_rn(__fmul_rn(__fmul_rn(M.data[0].x, M.data[1].y), M.data[2].z), __fmul_rn(__fmul_rn(M.data[1].x, M.data[2].y), M.data[0].z)), __fmul_rn(__fmul_rn(M.data[2].x, M.data[0].y), M.data[1].z)), __fadd_rn(__fmul_rn(__fadd_rn(__fmul_rn(M.data[0].z, M.data[1].y), M.data[2].x), __fmul_rn(__fmul_rn(M.data[1].z, M.data[2].y), M.data[0].x)), __fmul_rn(__fmul_rn(M.data[2].z, M.data[0].y), M.data[1].x))); } /* * OTHER */ __host__ __device__ __forceinline__ float sign(float a) { if (a > 0.f) { return 1.f; } else if (a < 0.f) { return -1.f; } else { return 0.f; } } __host__ __device__ __forceinline__ bool is_truncated(float tsdf) { if (fabs(tsdf) >= 1.f) { return true; } return false; } __host__ __device__ __forceinline__ float heaviside_smooth(float phi, float epsilon) { return 1.f / CUDART_PI_F * (epsilon / (epsilon * epsilon + phi * phi)); } __host__ __device__ __forceinline__ void vec(kfusion::device::Mat3f J, float *v) { v[0] = J.data[0].x; v[1] = J.data[1].x; v[2] = J.data[2].x; v[3] = J.data[0].y; v[4] = J.data[1].y; v[5] = J.data[2].y; v[6] = J.data[0].z; v[7] = J.data[1].z; v[8] = J.data[2].z; } __host__ __device__ __forceinline__ kfusion::device::Mat3f transpose(kfusion::device::Mat3f J) { kfusion::device::Mat3f J_T; float3 row_1 = J.data[0]; J_T.data[0].x = row_1.x; J_T.data[1].x = row_1.y; J_T.data[2].x = row_1.z; float3 row_2 = J.data[1]; J_T.data[0].y = row_2.x; J_T.data[1].y = row_2.y; J_T.data[2].y = row_2.z; float3 row_3 = J.data[2]; J_T.data[0].z = row_3.x; J_T.data[1].z = row_3.y; J_T.data[2].z = row_3.z; return J_T; } ================================================ FILE: include/sobfu/params.hpp ================================================ #pragma once /* kinfu includes */ #include /* sobfu parameters */ struct Params { int cols = 640; int rows = 480; /* no. of rows and columns in the frames */ cv::Vec3i volume_dims; /* volume dimensions in voxels */ cv::Vec3f volume_size; /* volume size in metres */ cv::Affine3f volume_pose; kfusion::Intr intr; /* camera intrinsics */ float icp_truncate_depth_dist; /* depth truncation distance */ float bilateral_sigma_depth, bilateral_sigma_spatial; int bilateral_kernel_size; float tsdf_trunc_dist, eta; /* tsdf truncation distance and expected object thickness */ float tsdf_max_weight; /* tsdf max. weight */ float gradient_delta_factor; int start_frame = 0; /* frame when to start registration */ int verbosity = 0; /* solver verbosity */ int s /* filter size */, max_iter /* max. no of iterations of the solver */; float max_update_norm /* max. update norm */, lambda /* filter parameter */, alpha /* gradient descent step size */, w_reg /* weight of the regularisation term */; cv::Vec3f voxel_sizes() { return cv::Vec3f(volume_size[0] / volume_dims[0], volume_size[1] / volume_dims[1], volume_size[2] / volume_dims[2]); } }; ================================================ FILE: include/sobfu/precomp.hpp ================================================ #pragma once /* cuda includes */ #include /* kinfu includes */ #include /* checks if x is a power of 2 */ bool isPow2(unsigned int x); /* computes the nearest power of 2 larger than x */ int nextPow2(int x); /* compute the number of threads and blocks to use for the given reduction kernel; we set threads * block to the minimum of maxThreads and n/2; we observe the maximum specified number of blocks, because each thread * in the kernel can process a variable number of elements */ void get_num_blocks_and_threads(int n, int maxBlocks, int maxThreads, int &blocks, int &threads); ================================================ FILE: include/sobfu/reductor.hpp ================================================ #pragma once /* kinfu includes */ #include /* sobfu incldues */ #include #include /* thrust includes */ #include #include #include #include #include namespace sobfu { namespace device { /* * REDUCTOR */ struct Reductor { /* constructor */ Reductor(int3 dims_, float vsz_, float trunc_dist_); /* destructor */ ~Reductor(); /* energy functional */ float data_energy(float2 *phi_global_data, float2 *phi_n_data); float reg_energy_sobolev(Mat4f *J_data); float2 max_update_norm(); float2 voxel_max_energy(float2 *phi_global_data, float2 *phi_n_data, Mat4f *J_data, float w_reg); int3 dims; float vsz, trunc_dist; int no_voxels; int blocks, threads; /* no. of blocks and threads for the reductions used to calculate value of the energy functional */ float4 *updates; float *h_data_out, *d_data_out; float *h_reg_out, *d_reg_out; float2 *h_max_out, *d_max_out; }; /* kernels */ template __global__ void reduce_data_kernel(float2 *g_idata_global, float2 *g_idata_n, float *g_odata, unsigned int n); template __global__ void reduce_reg_sobolev_kernel(Mat4f *g_idata, float *g_odata, unsigned int n); template __global__ void reduce_voxel_max_energy_kernel(float2 *d_idata_global, float2 *d_idata_n, Mat4f *d_idata_reg, float2 *d_o_data, float w_reg, unsigned int n); template __global__ void reduce_max_kernel(float4 *updates, float2 *g_o_max_data, unsigned int n); /* host methods */ void reduce_data(int size, int threads, int blocks, float2 *d_idata_global, float2 *d_idata_n, float *d_odata); void reduce_reg_sobolev(int size, int threads, int blocks, Mat4f *d_idata, float *d_odata); void reduce_voxel_max_energy(int size, int threads, int blocks, float2 *d_idata_global, float2 *d_idata_n, Mat4f *d_idata_reg, float w_reg, float2 *d_odata); void reduce_max(int size, int threads, int blocks, float4 *updates, float2 *d_o_max_data); /* final reduce on the cpu */ float final_reduce(float *h_odata, float *d_odata, int numBlocks); float2 final_reduce_max(float2 *h_o_max_data, float2 *d_o_max_data, int numBlocks, int3 dims); } // namespace device } // namespace sobfu ================================================ FILE: include/sobfu/scalar_fields.hpp ================================================ #pragma once /* sobfu includes */ #include #include /* kinfu includes */ #include #include #include namespace sobfu { namespace cuda { /* * SCALAR FIELD */ class ScalarField { public: /* constructor */ ScalarField(cv::Vec3i dims_); /* destructor */ ~ScalarField(); /* get field data*/ kfusion::cuda::CudaData get_data(); const kfusion::cuda::CudaData get_data() const; /* get dims */ int3 get_dims(); /* clear field */ void clear(); /* sum values in the field */ float sum(); /* print field */ void print(); private: kfusion::cuda::CudaData data; /* field data */ int3 dims; /* field dimensions */ }; } // namespace cuda namespace device { /* * SCALAR FIELD */ struct ScalarField { /* constructor */ ScalarField(float *const data_, int3 dims_) : data(data_), dims(dims_) {} __device__ __forceinline__ float *beg(int x, int y) const; __device__ __forceinline__ float *zstep(float *const ptr) const; __device__ __forceinline__ float *operator()(int idx) const; __device__ __forceinline__ float *operator()(int x, int y, int z) const; float *const data; /* field data */ const int3 dims; }; /* clear */ __global__ void clear_kernel(sobfu::device::ScalarField field); void clear(ScalarField &field); /* sum */ float reduce_sum(sobfu::device::ScalarField &field); template __global__ void reduce_sum_kernel(float *g_idata, float *g_odata, unsigned int n); float final_reduce_sum(float *d_odata, int numBlocks); } // namespace device } // namespace sobfu ================================================ FILE: include/sobfu/sob_fusion.hpp ================================================ #pragma once /* sobfu includes */ #include #include /* kinfu includes */ #include #include #include /* pcl includes */ #include #include /* sys headers */ #include #include /* */ class SobFusion { public: /* default constructor */ SobFusion(const Params ¶ms); /* default destructor */ ~SobFusion(); /* get sobfu params */ Params &getParams(); /* get the canonical model vertices stored as pcl triangle mesh */ pcl::PolygonMesh::Ptr get_phi_global_mesh(); /* get the canonical model mesh warped to live */ pcl::PolygonMesh::Ptr get_phi_global_psi_inv_mesh(); /* get the live model mesh */ pcl::PolygonMesh::Ptr get_phi_n_mesh(); /* get the live model mesh warped with psi */ pcl::PolygonMesh::Ptr get_phi_n_psi_mesh(); /* get the deformation field */ std::shared_ptr getDeformationField(); /* run algorithm on all frames */ bool operator()(const kfusion::cuda::Depth &depth, const kfusion::cuda::Image &image = kfusion::cuda::Image()); private: std::vector poses_; kfusion::cuda::Dists dists_; kfusion::cuda::Frame curr_, prev_; kfusion::cuda::Cloud points_; kfusion::cuda::Normals normals_; kfusion::cuda::Depth depths_; int frame_counter_; /* system parameters */ Params params; /* tsdf's */ cv::Ptr phi_global, phi_global_psi_inv, phi_n, phi_n_psi; /* deformation field warps phi_n to phi_global */ std::shared_ptr psi, psi_inv; /* solver */ std::shared_ptr solver; /* marching cubes */ cv::Ptr mc; /* run marching cubes on vol */ pcl::PolygonMesh::Ptr get_mesh(cv::Ptr vol); /* convert the canonical model to pcl polygon mesh */ static pcl::PolygonMesh::Ptr convert_to_mesh(const kfusion::cuda::DeviceArray &triangles); }; ================================================ FILE: include/sobfu/solver.hpp ================================================ #pragma once /* kinfu incldues */ #include #include /* sobfu includes */ #include #include #include /* * SOLVER PARAMETERS */ struct SolverParams { int verbosity, max_iter, s; float max_update_norm, lambda, alpha, w_reg; }; /* * SDF'S USED IN THE SOLVER */ struct SDFs { SDFs(kfusion::device::TsdfVolume &phi_global_, kfusion::device::TsdfVolume &phi_global_psi_inv_, kfusion::device::TsdfVolume &phi_n_, kfusion::device::TsdfVolume &phi_n_psi_) : phi_global(phi_global_), phi_global_psi_inv(phi_global_psi_inv_), phi_n(phi_n_), phi_n_psi(phi_n_psi_) {} kfusion::device::TsdfVolume phi_global, phi_global_psi_inv, phi_n, phi_n_psi; }; /* * SPATIAL DIFFERENTIATORS */ struct Differentiators { Differentiators(sobfu::device::TsdfDifferentiator &tsdf_diff_, sobfu::device::Differentiator &diff_, sobfu::device::Differentiator &diff_inv_, sobfu::device::SecondOrderDifferentiator &second_order_diff_) : tsdf_diff(tsdf_diff_), diff(diff_), diff_inv(diff_inv_), second_order_diff(second_order_diff_) {} sobfu::device::TsdfDifferentiator tsdf_diff; sobfu::device::Differentiator diff, diff_inv; sobfu::device::SecondOrderDifferentiator second_order_diff; }; namespace sobfu { namespace cuda { /* * SOLVER */ class Solver { public: /* constructor */ Solver(Params ¶ms); /* destructor */ ~Solver(); void estimate_psi(const cv::Ptr phi_global, cv::Ptr phi_global_psi_inv, const cv::Ptr phi_n, cv::Ptr phi_n_psi, std::shared_ptr psi, std::shared_ptr psi_inv); private: /* volume params */ int3 dims; float3 voxel_sizes; int no_voxels; float trunc_dist, eta, max_weight; /* solver params */ SolverParams solver_params; /* gradients */ sobfu::cuda::SpatialGradients *spatial_grads; sobfu::device::SpatialGradients *spatial_grads_device; sobfu::device::TsdfGradient *nabla_phi_n, *nabla_phi_n_o_psi; sobfu::device::Jacobian *J, *J_inv; sobfu::device::Laplacian *L, *L_o_psi_inv; sobfu::device::PotentialGradient *nabla_U, *nabla_U_S; /* used to calculate value of the energy functional */ sobfu::device::Reductor *r; /* sobolev filter */ float *h_S_i, *d_S_i; }; /* get 3d sobolev filter */ static void get_3d_sobolev_filter(SolverParams ¶ms, float *h_S_i); /* calculate 1d filters from a separable 3d filter */ static void decompose_sobolev_filter(SolverParams ¶ms, float *h_S_i); } // namespace cuda namespace device { /* potential gradient */ __global__ void calculate_potential_gradient_kernel(float2 *phi_n_psi, float2 *phi_global, float4 *nabla_phi_n_o_psi, float4 *L, float4 *nabla_U, float w_reg, int dim_x, int dim_y, int dim_z); void calculate_potential_gradient(kfusion::device::TsdfVolume &phi_n_psi, kfusion::device::TsdfVolume &phi_global, sobfu::device::TsdfGradient &nabla_phi_n_o_psi, sobfu::device::Laplacian &L, sobfu::device::PotentialGradient &nabla_U, float w_reg); /* estimate psi */ void estimate_psi(SDFs &sdfs, sobfu::device::DeformationField &psi, sobfu::device::DeformationField &psi_inv, sobfu::device::SpatialGradients *spatial_grads, Differentiators &diffs, float *d_S_i, sobfu::device::Reductor *r, SolverParams ¶ms); /* DEFORMATION FIELD UPDATES */ __global__ void update_psi_kernel(float4 *psi, float4 *nabla_U_S, float4 *updates, float alpha, int dim_x, int dim_y, int dim_z); void update_psi(sobfu::device::DeformationField &psi, sobfu::device::PotentialGradient &nabla_U_S, float4 *updates, float alpha); /* * CONVOLUTIONS */ void set_convolution_kernel(float *d_kernel); __global__ void convolution_rows_kernel(float4 *d_dst, float4 *d_src, int image_w, int image_h, int image_d); __global__ void convolution_columns_kernel(float4 *d_dst, float4 *d_src, int image_w, int image_h, int image_d); __global__ void convolution_depth_kernel(float4 *d_dst, float4 *d_src, int image_w, int image_h, int image_d); void convolution_rows(float4 *d_dst, float4 *updates, int image_w, int image_h, int image_d); void convolution_columns(float4 *d_dst, float4 *updates, int image_w, int image_h, int image_d); void convolution_depth(float4 *d_dst, float4 *updates, int image_w, int image_h, int image_d); } // namespace device } // namespace sobfu ================================================ FILE: include/sobfu/vector_fields.hpp ================================================ #pragma once /* sobfu includes */ #include #include /* kinfu includes */ #include #include #include #include namespace sobfu { namespace cuda { /* * VECTOR FIELD */ class VectorField { public: /* constructor */ VectorField(cv::Vec3i dims_); /* destructor */ ~VectorField(); /* get dimensions of the field */ cv::Vec3i get_dims() const; /* get the data in the field */ kfusion::cuda::CudaData get_data(); const kfusion::cuda::CudaData get_data() const; /* set the data in the field */ void set_data(kfusion::cuda::CudaData &data); /* clear field */ void clear(); /* print field */ void print(); /* get no. of nan's in the field */ int get_no_nans(); protected: kfusion::cuda::CudaData data; /* field data */ cv::Vec3i dims; /* field dimensions */ }; /* * DEFORMATION FIELD */ class DeformationField : public VectorField { public: /* constructor */ DeformationField(cv::Vec3i dims_); /* destructor */ ~DeformationField(); /* init field to identity */ void clear(); /* apply the field to an sdf */ void apply(const cv::Ptr phi, cv::Ptr phi_psi); /* approximate the inverse of the field */ void get_inverse(sobfu::cuda::DeformationField &psi_inv); }; /* * TSDF GRADIENT, LAPLACIAN, POTENTIAL GRADIENT */ typedef VectorField TsdfGradient; typedef VectorField Laplacian; typedef VectorField PotentialGradient; /* * JACOBIAN */ class Jacobian { public: /* constructor */ Jacobian(cv::Vec3i dims_); /* destructor */ ~Jacobian(); kfusion::cuda::CudaData get_data(); const kfusion::cuda::CudaData get_data() const; /* clear jacobian */ void clear(); private: kfusion::cuda::CudaData data; cv::Vec3i dims; }; /* * SPATIAL GRADIENTS */ struct SpatialGradients { /* constructor */ SpatialGradients(cv::Vec3i dims_); /* destructor */ ~SpatialGradients(); TsdfGradient *nabla_phi_n, *nabla_phi_n_o_psi; Jacobian *J, *J_inv; Laplacian *L, *L_o_psi_inv; PotentialGradient *nabla_U, *nabla_U_S; }; } // namespace cuda } // namespace sobfu namespace sobfu { namespace device { /* * VECTOR FIELD */ struct VectorField { /* constructor */ VectorField(float4 *const data_, const int3 dims_) : data(data_), dims(dims_) {} __device__ __forceinline__ float4 *beg(int x, int y) const; __device__ __forceinline__ float4 *zstep(float4 *const ptr) const; __device__ __forceinline__ float4 *operator()(int x, int y, int z) const; __device__ __forceinline__ float4 get_displacement(int x, int y, int z) const; float4 *const data; /* vector field data */ const int3 dims; /* vector field dimensions */ }; /* clear field */ __global__ void clear_kernel(VectorField field); void clear(VectorField &field); /* * DEFORMATION FIELD */ typedef VectorField DeformationField; /* set deformation field to identity */ __global__ void init_identity_kernel(DeformationField field); void init_identity(DeformationField &field); /* apply psi to an sdf */ __global__ void apply_kernel(const kfusion::device::TsdfVolume phi, kfusion::device::TsdfVolume phi_warped, const DeformationField psi); void apply(const kfusion::device::TsdfVolume &phi, kfusion::device::TsdfVolume &phi_warped, const sobfu::device::DeformationField &psi); /* approximate the inverse of a deformation field */ __global__ void estimate_inverse_kernel(sobfu::device::DeformationField psi, sobfu::device::DeformationField psi_inv); void estimate_inverse(sobfu::device::DeformationField &psi, sobfu::device::DeformationField &psi_inv); /* * TSDF GRADIENT */ typedef VectorField TsdfGradient; struct TsdfDifferentiator { /* constructor */ TsdfDifferentiator(kfusion::device::TsdfVolume &vol_) : vol(vol_) {} /* calculate the gradient */ __device__ void operator()(sobfu::device::TsdfGradient &grad) const; void calculate(sobfu::device::TsdfGradient &grad); kfusion::device::TsdfVolume vol; }; /* estimate tsdf gradient */ __global__ void estimate_gradient_kernel(const sobfu::device::TsdfDifferentiator diff, sobfu::device::TsdfGradient grad); /* interpolate tsdf gradient */ __global__ void interpolate_gradient_kernel(sobfu::device::TsdfGradient nabla_phi_n_psi, sobfu::device::TsdfGradient nabla_phi_n_psi_t, sobfu::device::DeformationField psi); void interpolate_gradient(sobfu::device::TsdfGradient &nabla_phi_n_psi, sobfu::device::TsdfGradient &nabla_phi_n_psi_t, sobfu::device::DeformationField &psi); /* * LAPLACIAN */ typedef VectorField Laplacian; struct SecondOrderDifferentiator { /* constructor */ SecondOrderDifferentiator(sobfu::device::DeformationField &psi_) : psi(psi_) {} __device__ void laplacian(sobfu::device::Laplacian &L) const; void calculate(sobfu::device::Laplacian &L); sobfu::device::DeformationField psi; }; /* estimate laplacian */ __global__ void estimate_laplacian_kernel(const sobfu::device::SecondOrderDifferentiator diff, sobfu::device::Laplacian L); /* interpolate laplacian */ __global__ void interpolate_laplacian_kernel(sobfu::device::Laplacian L, sobfu::device::Laplacian L_o_psi, sobfu::device::DeformationField psi); void interpolate_laplacian(sobfu::device::Laplacian &L, sobfu::device::Laplacian &L_o_psi, sobfu::device::DeformationField &psi); /* * POTENTIAL GRADIENT */ typedef VectorField PotentialGradient; /* * JACOBIAN */ struct Jacobian { /* constructor */ Jacobian(Mat4f *const data_, int3 dims_) : data(data_), dims(dims_) {} __device__ __forceinline__ Mat4f *beg(int x, int y) const; __device__ __forceinline__ Mat4f *zstep(Mat4f *const ptr) const; __device__ __forceinline__ Mat4f *operator()(int x, int y, int z) const; Mat4f *const data; /* jacobian data */ const int3 dims; }; struct Differentiator { /* constructor */ Differentiator(sobfu::device::DeformationField &psi_) : psi(psi_) {} /* calculate jacobian */ __device__ void operator()(sobfu::device::Jacobian &J, int mode) const; void calculate(sobfu::device::Jacobian &J); void calculate_deformation_jacobian(sobfu::device::Jacobian &J); sobfu::device::DeformationField psi; }; /* clear jacobian */ __global__ void clear_jacobian_kernel(Jacobian J); void clear(Jacobian &J); /* estimate jacobian */ __global__ void estimate_jacobian_kernel(const sobfu::device::Differentiator diff, sobfu::device::Jacobian J); __global__ void estimate_deformation_jacobian_kernel(const sobfu::device::Differentiator diff, sobfu::device::Jacobian J); /* * SPATIAL GRADIENTS */ struct SpatialGradients { SpatialGradients(sobfu::device::TsdfGradient *nabla_phi_n_, sobfu::device::TsdfGradient *nabla_phi_n_o_psi_, sobfu::device::Jacobian *J_, sobfu::device::Jacobian *J_inv_, sobfu::device::Laplacian *L_, sobfu::device::Laplacian *L_o_psi_inv_, sobfu::device::PotentialGradient *nabla_U_, sobfu::device::PotentialGradient *nabla_U_S_) : nabla_phi_n(nabla_phi_n_), nabla_phi_n_o_psi(nabla_phi_n_o_psi_), J(J_), J_inv(J_inv_), L(L_), L_o_psi_inv(L_o_psi_inv_), nabla_U(nabla_U_), nabla_U_S(nabla_U_S_) {} ~SpatialGradients(); sobfu::device::TsdfGradient *nabla_phi_n, *nabla_phi_n_o_psi; sobfu::device::Jacobian *J, *J_inv; sobfu::device::Laplacian *L, *L_o_psi_inv; sobfu::device::PotentialGradient *nabla_U, *nabla_U_S; }; } // namespace device } // namespace sobfu ================================================ FILE: params/params_advent.ini ================================================ # TSDF VOL_DIMS_X=64 VOL_DIMS_Y=64 VOL_DIMS_Z=64 VOL_SIZE_X=0.5 VOL_SIZE_Y=0.5 VOL_SIZE_Z=0.5 TSDF_TRUNC_DIST=5 ETA=2 TSDF_MAX_WEIGHT=128 GRADIENT_DELTA_FACTOR=0.5 # CAMERA INTR_FX=570.342 INTR_FY=570.342 INTR_CX=320.0 INTR_CY=240.0 TRUNC_DEPTH=1.5 VOL_POSE_T_Z=0.5 # BILATERAL FILTER BILATERAL_SIGMA_DEPTH=0.005 BILATERAL_SIGMA_SPATIAL=4.5 BILATERAL_KERNEL_SIZE=7 # SOLVER MAX_ITER=8192 MAX_UPDATE_NORM=5e-4 S=7 LAMBDA=0.1 ALPHA=0.1 W_REG=0.2 ================================================ FILE: params/params_boxing.ini ================================================ # TSDF VOL_DIMS_X=128 VOL_DIMS_Y=128 VOL_DIMS_Z=128 VOL_SIZE_X=0.75 VOL_SIZE_Y=0.75 VOL_SIZE_Z=0.75 TSDF_TRUNC_DIST=48 ETA=3 TSDF_MAX_WEIGHT=128 GRADIENT_DELTA_FACTOR=0.5 # CAMERA INTR_FX=570.342 INTR_FY=570.342 INTR_CX=320.0 INTR_CY=240.0 TRUNC_DEPTH=1.0 VOL_POSE_T_Z=0.1 # BILATERAL FILTER BILATERAL_SIGMA_DEPTH=0.005 BILATERAL_SIGMA_SPATIAL=4.5 BILATERAL_KERNEL_SIZE=7 # SOLVER START_FRAME=1 MAX_ITER=4096 MAX_UPDATE_NORM=1e-10 S=7 LAMBDA=0.1 RHO_0=1.0 ALPHA=0.001 W_REG=0.6 ================================================ FILE: params/params_hat.ini ================================================ # TSDF VOL_DIMS_X=128 VOL_DIMS_Y=128 VOL_DIMS_Z=128 VOL_SIZE_X=1.20 VOL_SIZE_Y=1.20 VOL_SIZE_Z=1.20 TSDF_TRUNC_DIST=10 ETA=2 TSDF_MAX_WEIGHT=128 GRADIENT_DELTA_FACTOR=0.5 # CAMERA INTR_FX=517.0 INTR_FY=517.0 INTR_CX=320.0 INTR_CY=240.0 TRUNC_DEPTH=2.0 VOL_POSE_T_Z=0.75 # BILATERAL FILTER BILATERAL_SIGMA_DEPTH=0.02 BILATERAL_SIGMA_SPATIAL=4.5 BILATERAL_KERNEL_SIZE=7 # SOLVER MAX_ITER=2048 MAX_UPDATE_NORM=1e-3 S=7 LAMBDA=0.1 ALPHA=0.05 W_REG=0.1 ================================================ FILE: params/params_ours.ini ================================================ # intrinsic parameters for our intel realsense sr300 # ---rgb--- # kinfuParams.intr = kfusion::Intr(622.011f, 622.012f, 320.331f, 235.664f); # ---depth--- # kinfuParams.intr = kfusion::Intr(474.567f, 474.567f, 310.63f, 246.018f); # # ----- DOU ----- # --- depth --- # kinfuParams.intr = kfusion::Intr(520.f, 520.f, 320.f, 240.f); ================================================ FILE: params/params_snoopy.ini ================================================ # TSDF VOL_DIMS_X=128 VOL_DIMS_Y=128 VOL_DIMS_Z=128 VOL_SIZE_X=0.9 VOL_SIZE_Y=0.9 VOL_SIZE_Z=0.9 TSDF_TRUNC_DIST=10 ETA=5 TSDF_MAX_WEIGHT=128 GRADIENT_DELTA_FACTOR=0.5 # CAMERA INTR_FX=517.0 INTR_FY=517.0 INTR_CX=320.0 INTR_CY=240.0 TRUNC_DEPTH=3.0 VOL_POSE_T_Z=0.05 # BILATERAL FILTER BILATERAL_SIGMA_DEPTH=0.01 BILATERAL_SIGMA_SPATIAL=4.5 BILATERAL_KERNEL_SIZE=7 # SOLVER START_FRAME=4 MAX_ITER=2048 MAX_UPDATE_NORM=1e-3 S=7 LAMBDA=0.1 ALPHA=0.1 W_REG=0.2 ================================================ FILE: params/params_umbrella.ini ================================================ # TSDF VOL_DIMS_X=128 VOL_DIMS_Y=128 VOL_DIMS_Z=128 VOL_SIZE_X=1.0 VOL_SIZE_Y=1.0 VOL_SIZE_Z=1.0 TSDF_TRUNC_DIST=8 ETA=3 TSDF_MAX_WEIGHT=128 GRADIENT_DELTA_FACTOR=0.5 # CAMERA INTR_FX=570.342 INTR_FY=570.342 INTR_CX=320.0 INTR_CY=240.0 TRUNC_DEPTH=1.5 VOL_POSE_T_Z=0.3 # BILATERAL FILTER BILATERAL_SIGMA_DEPTH=0.04 BILATERAL_SIGMA_SPATIAL=4.5 BILATERAL_KERNEL_SIZE=7 # SOLVER START_FRAME=1 MAX_ITER=2048 MAX_UPDATE_NORM=1e-10 S=7 LAMBDA=0.1 ALPHA=0.001 W_REG=0.2 ================================================ FILE: setup.sh ================================================ #!/bin/sh RUN_CMAKE=false RUN_MAKE=false RUN_MAKE_TESTS=false usage() { echo "USAGE: source setup.sh [options]" echo "OPTIONS:" echo "\t--help -h: Display help" echo "\t--cmake -c: Run CMake" echo "\t--make -m: Run make" echo "\t--make-tests -t: Make tests" echo "\t--all -a: Run all the settings" } # Parse through the arguments and check if any relavant flag exists while [ "$1" != "" ]; do PARAM=`echo $1 | awk -F= '{print $1}'` case $PARAM in -h | --help) usage exit ;; -c | --cmake) RUN_CMAKE=true ;; -m | --make) RUN_MAKE=true ;; -t | --make-tests) RUN_MAKE_TESTS=true ;; -a | --all) RUN_CMAKE=true RUN_MAKE=true ;; *) echo "ERROR: unknown parameter \"$PARAM\"" usage return 1 ;; esac shift done # If build does not exist create one mkdir -p build cd build if $RUN_CMAKE then if $RUN_MAKE_TESTS then echo "running cmake..." cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DBUILD_TESTS=ON .. || (cd ../ && return 1) else echo "running cmake..." cmake -DCMAKE_EXPORT_COMPILE_COMMNADS=ON -DBUILD_TESTS=OFF .. || (cd ../ && return 1) fi fi if $RUN_MAKE then echo "running make..." make -j4 || (cd ../ && return 1) echo "make complete!" fi cd .. ================================================ FILE: src/CMakeLists.txt ================================================ # ---------------------------------------------------------------------------- # # BUILD ALL SUBPROJECTS # ---------------------------------------------------------------------------- # add_subdirectory(kfusion) add_subdirectory(sobfu) add_subdirectory(apps) ================================================ FILE: src/apps/CMakeLists.txt ================================================ # Link source files to executables add_application(app) # Link to all libraries target_link_libraries(app sobfu kfusion png Boost::program_options ${Boost_LIBRARIES} ${CUDA_CUDA_LIBRARY} ${CUDA_CUDART_LIBRARY} ${OpenCV_LIBS} ${PCL_LIBRARIES} ${VTK_LIBRARIES} ) ================================================ FILE: src/apps/demo.cpp ================================================ /* sobfu includes a*/ #include /* boost includes */ #include /* opencv includes */ #include #include /* pcl includes */ #include #include #include /* sys headers */ #include /* vtk includes */ #include #include #include #include #include #include #include #include #include struct SobFuApp { SobFuApp(std::string file_path, std::string params_path, bool logger, bool visualizer, bool visualizer_detailed, bool verbose, bool vverbose) : exit_(false), file_path_(file_path), params_path_(params_path), logger_(logger), visualizer_(visualizer), visualizer_detailed_(visualizer_detailed), verbose_(verbose), vverbose_(vverbose) { /* * initialise parameters */ Params params; if (verbose_) { params.verbosity = 1; } else if (vverbose_) { params.verbosity = 2; } /* * declare parameters to read from .ini */ boost::program_options::options_description desc("parameters"); declare_parameters(desc, params); /* * read parameters from .ini */ boost::program_options::variables_map vm; read_parameters(desc, vm); /* * parse parameters stored in units of voxels */ params.tsdf_trunc_dist = vm["TSDF_TRUNC_DIST"].as() * params.voxel_sizes()[0]; params.eta = vm["ETA"].as() * params.voxel_sizes()[0]; params.volume_pose = cv::Affine3f().translate( cv::Vec3f(-params.volume_size[0] / 2.f, -params.volume_size[1] / 2.f, vm["VOL_POSE_T_Z"].as())); /* * initialise sobolevfusion */ sobfu = std::make_shared(params); } /* * declare sobfu parameters */ void declare_parameters(boost::program_options::options_description &desc, Params ¶ms) { /* * tsdf */ desc.add_options()("VOL_DIMS_X", boost::program_options::value(¶ms.volume_dims[0]), "no. of voxels along x axis"); desc.add_options()("VOL_DIMS_Y", boost::program_options::value(¶ms.volume_dims[1]), "no. of voxels along y axis"); desc.add_options()("VOL_DIMS_Z", boost::program_options::value(¶ms.volume_dims[2]), "no. of voxels along z axis"); desc.add_options()("VOL_SIZE_X", boost::program_options::value(¶ms.volume_size[0]), "vol. size along x axis (metres)"); desc.add_options()("VOL_SIZE_Y", boost::program_options::value(¶ms.volume_size[1]), "vol. size along y axis (metres)"); desc.add_options()("VOL_SIZE_Z", boost::program_options::value(¶ms.volume_size[2]), "vol. size along z axis (metres)"); desc.add_options()("TSDF_TRUNC_DIST", boost::program_options::value(), "truncation distance (voxels)"); desc.add_options()("ETA", boost::program_options::value(), "expected object thickness (voxels)"); desc.add_options()("TSDF_MAX_WEIGHT", boost::program_options::value(¶ms.tsdf_max_weight), "max. tsdf weight"); desc.add_options()("GRADIENT_DELTA_FACTOR", boost::program_options::value(¶ms.gradient_delta_factor), "delta factor when calculating tsdf gradient (voxels)"); /* * camera */ desc.add_options()("INTR_FX", boost::program_options::value(¶ms.intr.fx), "focal length x"); desc.add_options()("INTR_FY", boost::program_options::value(¶ms.intr.fy), "focal length y"); desc.add_options()("INTR_CX", boost::program_options::value(¶ms.intr.cx), "principal point x"); desc.add_options()("INTR_CY", boost::program_options::value(¶ms.intr.cy), "principal point y"); desc.add_options()("TRUNC_DEPTH", boost::program_options::value(¶ms.icp_truncate_depth_dist), "depth map truncation distance (metres)"); desc.add_options()("VOL_POSE_T_Z", boost::program_options::value(), "camera to volume translation along z axis"); /* * bilateral filter */ desc.add_options()("BILATERAL_SIGMA_DEPTH", boost::program_options::value(¶ms.bilateral_sigma_depth), "bilateral filter sigma z"); desc.add_options()("BILATERAL_SIGMA_SPATIAL", boost::program_options::value(¶ms.bilateral_sigma_spatial), "bilateral filter sigma x-y"); desc.add_options()("BILATERAL_KERNEL_SIZE", boost::program_options::value(¶ms.bilateral_kernel_size), "bilateral filter kernel size"); /* * solver */ desc.add_options()("START_FRAME", boost::program_options::value(¶ms.start_frame), "frame when to start registration"); desc.add_options()("MAX_ITER", boost::program_options::value(¶ms.max_iter), "max. no. of iterations of the solver"); desc.add_options()("MAX_UPDATE_NORM", boost::program_options::value(¶ms.max_update_norm), "max. update norm when running the solver"); /* SOBOLEV */ desc.add_options()("S", boost::program_options::value(¶ms.s), "Sobolev kernel size (currently only supports s=7"); desc.add_options()("LAMBDA", boost::program_options::value(¶ms.lambda), "Sobolev filter parameter (currently only supports 0.05, 0.1, 0.2, and 0.4"); /* FASTFUSION */ desc.add_options()("ALPHA", boost::program_options::value(¶ms.alpha), "gradient descent step size"); desc.add_options()("W_REG", boost::program_options::value(¶ms.w_reg), "regularisation weight"); } /* * read parameters from params.ini */ void read_parameters(boost::program_options::options_description &desc, boost::program_options::variables_map &vm) { std::ifstream settings_file(params_path_); boost::program_options::store(boost::program_options::parse_config_file(settings_file, desc), vm); boost::program_options::notify(vm); } /* * load colour and depth images */ void load_files(std::vector *depths, std::vector *images, std::vector *masks) { if (!boost::filesystem::exists(file_path_)) { std::cerr << "error: directory '" << file_path_ << "' does not exist. exiting" << std::endl; exit(EXIT_FAILURE); } if (!boost::filesystem::exists(file_path_ + "/depth") || !boost::filesystem::exists(file_path_ + "/color")) { std::cerr << "error: source directory should contain 'color' and 'depth' folders. exiting..." << std::endl; exit(EXIT_FAILURE); } cv::glob(file_path_ + "/depth", *depths); cv::glob(file_path_ + "/color", *images); std::sort((*depths).begin(), (*depths).end()); std::sort((*images).begin(), (*images).end()); if (boost::filesystem::exists(file_path_ + "/omask")) { cv::glob(file_path_ + "/omask", *masks); std::sort((*masks).begin(), (*masks).end()); } } /* * create a new directory out if not already present inside the input folder */ void create_output_directory() { out_path_ = file_path_ + "/meshes"; boost::filesystem::path dir_meshes(out_path_); if (boost::filesystem::create_directory(dir_meshes)) { std::cout << "created output directory for meshes" << std::endl; } if (visualizer_ || visualizer_detailed_) { out_screenshot_path_ = file_path_ + "/screenshots"; boost::filesystem::path dir_screenshots(out_screenshot_path_); if (boost::filesystem::create_directory(dir_screenshots)) { std::cout << "created output directory for screenshots" << std::endl; } } } /* * get no. of vertices in a mesh */ int get_no_vertices(pcl::PolygonMesh::Ptr mesh) { pcl::PointCloud pc; pcl::fromPCLPointCloud2(mesh->cloud, pc); return pc.size(); } /* * save output meshes in .vtk format */ void save_mesh(pcl::PolygonMesh::Ptr mesh, int i, std::string name) { /* pad frame no. with 0's */ std::stringstream ss; ss << std::setw(6) << std::setfill('0') << i; std::string frameNum = ss.str(); /* save to vtk */ pcl::io::saveVTKFile(out_path_ + "/" + name + "_" + frameNum + ".vtk", *mesh); std::cout << "saved " + name + "_" + frameNum + ".vtk" << std::endl; } /* * save 3d deformation field in .vtk format */ void save_field(std::shared_ptr sobfu, int i) { /* get parameters */ Params params = sobfu->getParams(); /* create vtk image */ vtkSmartPointer image = vtkSmartPointer::New(); /* specify size of image */ image->SetDimensions(params.volume_dims[0], params.volume_dims[1], params.volume_dims[2]); /* specify size of image data */ image->AllocateScalars(VTK_FLOAT, 4); int *dims = image->GetDimensions(); /* copy vector field data */ std::shared_ptr psi = sobfu->getDeformationField(); kfusion::cuda::CudaData displacement_data = psi->get_data(); displacement_data.download(image->GetScalarPointer()); /* pad frame no. with 0's */ std::stringstream ss; ss << std::setw(6) << std::setfill('0') << i; std::string frameNum = ss.str(); std::string fileName = out_path_ + "/field_" + frameNum + ".vti"; /* save file */ vtkSmartPointer writer = vtkSmartPointer::New(); writer->SetFileName(fileName.c_str()); writer->SetInputData(image); writer->Write(); std::cout << "saved the vector field to .vti" << std::endl; } bool execute() { /* sobfu app */ sobfu = sobfu; /* images */ cv::Mat depth, image, mask; std::vector depths, images, masks; load_files(&depths, &images, &masks); /* output */ create_output_directory(); /* pipeline */ double time_ms = 0; bool has_image = false; bool has_masks = masks.size() > 0; int v1(0); int v2(0); int v3(0); int v4(0); int v5(0); for (size_t i = 0; i < depths.size(); ++i) { depth = cv::imread(depths[i], CV_LOAD_IMAGE_ANYDEPTH); image = cv::imread(images[i], CV_LOAD_IMAGE_COLOR); cv::Mat depth_masked = cv::Mat::zeros(depth.size(), depth.type()); if (has_masks) { mask = cv::imread(masks[i], CV_8U); depth.copyTo(depth_masked, mask); } if (!image.data || !depth.data) { std::cerr << "error: image could not be read; check for improper" << " permissions or invalid formats. exiting..." << std::endl; exit(EXIT_FAILURE); } if (has_masks) { depth_device_.upload(depth_masked.data, depth_masked.step, depth_masked.rows, depth_masked.cols); } else { depth_device_.upload(depth.data, depth.step, depth.rows, depth.cols); } { kfusion::SampledScopeTime fps(time_ms); has_image = (*(sobfu))(depth_device_); } /* get meshes */ pcl::PolygonMesh::Ptr mesh_global, mesh_global_psi_inv; pcl::PolygonMesh::Ptr mesh_n, mesh_n_psi; if (visualizer_ || visualizer_detailed_ || logger_) { mesh_global = sobfu->get_phi_global_mesh(); int no_vertices_canonical = get_no_vertices(mesh_global); std::cout << "no. of point-normal pairs in the canonical model: " << no_vertices_canonical << std::endl; if (i >= 1) { mesh_global_psi_inv = sobfu->get_phi_global_psi_inv_mesh(); int no_vertices_canonical_warped_to_live = get_no_vertices(mesh_global_psi_inv); std::cout << "no. of point-normal pairs in the canonical model warped to live: " << no_vertices_canonical_warped_to_live << std::endl; } if (visualizer_detailed_) { if (i == 1) { mesh_n = sobfu->get_phi_n_mesh(); mesh_n_psi = sobfu->get_phi_n_psi_mesh(); } if (i >= 2) { mesh_n = sobfu->get_phi_n_mesh(); mesh_n_psi = sobfu->get_phi_n_psi_mesh(); } } } if (logger_) { save_mesh(mesh_global, i, "canonical_mesh"); if (i >= 1) { save_mesh(mesh_global_psi_inv, i, "canonical_warped_to_live_mesh"); } // save_field(sobfu, i); } if (visualizer_ || visualizer_detailed_) { if (i == 0) { viewer = std::make_shared("meshes"); } /* * BASIC VISUALISER * */ if (visualizer_) { if (i == 0) { /* create view ports */ viewer->createViewPort(0.0, 0.0, 0.5, 1.0, v1); viewer->createViewPort(0.5, 0.0, 1.0, 1.0, v2); /* add labels */ viewer->addText("frame no. " + std::to_string(i), 20, 512, 15, 1.0, 1.0, 1.0, "frame_num", v1); viewer->addText("canonical model", 15, 15, 15, 1.0, 1.0, 1.0, "v1 text", v1); viewer->addText("canonical model warped to live", 15, 15, 15, 1.0, 1.0, 1.0, "v2 text", v2); /* add bounding box */ cv::Vec3f vol_size = sobfu->getParams().volume_size; cv::Affine3f vol_pose = sobfu->getParams().volume_pose; cv::Vec3f min = vol_pose * cv::Vec3f(0.f, 0.f, 0.f); cv::Vec3f max = vol_pose * vol_size; viewer->setCameraPosition(0.0, 0.0, max[2] + 3.0, 0.0, 0.0, 0.0); /* display meshes */ viewer->addPolygonMesh(*mesh_global, "mesh canonical", v1); viewer->spinOnce(5000); /* save screenshot */ std::stringstream ss; ss << std::setw(6) << std::setfill('0') << i; std::string frame_num = ss.str(); viewer->saveScreenshot(out_screenshot_path_ + "/" + frame_num + ".png"); } else if (i >= 1) { /* update label */ viewer->updateText("frame no. " + std::to_string(i), 20, 512, 15, 1.0, 1.0, 1.0, "frame_num"); /* display updated meshes */ viewer->updatePolygonMesh(*mesh_global, "mesh canonical"); if (i == 1) { viewer->addPolygonMesh(*mesh_global_psi_inv, "mesh canonical warped to live", v2); } else { viewer->updatePolygonMesh(*mesh_global_psi_inv, "mesh canonical warped to live"); } viewer->spinOnce(50); /* save screenshot */ std::stringstream ss; ss << std::setw(6) << std::setfill('0') << i; std::string frame_num = ss.str(); viewer->saveScreenshot(out_screenshot_path_ + "/" + frame_num + ".png"); } } /* * DETAILED VISUALISER * */ else if (visualizer_detailed_) { if (i == 0) { /* create view ports */ viewer->createViewPort(0.0, 0.5, 0.5, 1.0, v1); viewer->createViewPort(0.5, 0.5, 1.0, 1.0, v2); viewer->createViewPort(0.0, 0.0, 0.5, 0.5, v3); viewer->createViewPort(0.5, 0.0, 1.0, 0.5, v4); /* add labels */ viewer->addText("frame no. " + std::to_string(i), 20, 244, 15, 1.0, 1.0, 1.0, "frame_num", v1); viewer->addText("phi_n", 15, 15, 15, 1.0, 1.0, 1.0, "v1 text", v1); viewer->addText("phi_global(psi_inv)", 15, 15, 15, 1.0, 1.0, 1.0, "v2 text", v2); viewer->addText("phi_global", 15, 15, 15, 1.0, 1.0, 1.0, "v3 text", v3); viewer->addText("phi_n(psi)", 15, 15, 15, 1.0, 1.0, 1.0, "v4 text", v4); /* add bounding box */ cv::Vec3f vol_size = sobfu->getParams().volume_size; cv::Affine3f vol_pose = sobfu->getParams().volume_pose; cv::Vec3f min = vol_pose * cv::Vec3f(0.f, 0.f, 0.f); cv::Vec3f max = vol_pose * vol_size; viewer->setCameraPosition(0.0, 0.0, max[2] + 3.0, 0.0, 0.0, 0.0); /* display meshes */ viewer->addPolygonMesh(*mesh_global, "mesh canonical", v3); viewer->spinOnce(10000); /* save screenshot */ std::stringstream ss; ss << std::setw(6) << std::setfill('0') << i; std::string frame_num = ss.str(); viewer->saveScreenshot(out_screenshot_path_ + "/" + frame_num + ".png"); } if (i >= 1) { /* update labels */ viewer->updateText("frame no. " + std::to_string(i), 20, 244, 15, 1.0, 1.0, 1.0, "frame_num"); /* display meshes */ viewer->updatePolygonMesh(*mesh_global, "mesh canonical"); if (i == 1) { viewer->addPolygonMesh(*mesh_n, "mesh n", v1); viewer->addPolygonMesh(*mesh_global_psi_inv, "mesh canonical warped to live", v2); viewer->addPolygonMesh(*mesh_n_psi, "mesh n psi", v4); } if (i >= 2) { viewer->updatePolygonMesh(*mesh_n, "mesh n"); viewer->updatePolygonMesh(*mesh_global_psi_inv, "mesh canonical warped to live"); viewer->updatePolygonMesh(*mesh_n_psi, "mesh n psi"); } viewer->spinOnce(50); /* save screenshot */ std::stringstream ss; ss << std::setw(6) << std::setfill('0') << i; std::string frame_num = ss.str(); viewer->saveScreenshot(out_screenshot_path_ + "/" + frame_num + ".png"); } } } } return true; } kfusion::cuda::Depth depth_device_; std::shared_ptr sobfu; std::shared_ptr viewer; bool exit_, logger_, visualizer_, visualizer_detailed_, off_screen_, verbose_, vverbose_; std::string file_path_, params_path_, out_path_, out_screenshot_path_; }; /* * parse the input flag and determine the file path and whether or not to enable visualizer * all flags will be matched and the last argument which does not match the flag will be * treated as filepath */ void parse_flags(std::vector args, std::string *file_path, std::string *params_path, bool *logger, bool *visualizer, bool *visualizer_detailed, bool *verbose, bool *vverbose) { std::vector flags = {"-h", "--help", "--enable-viz", "--enable-viz-detailed", "--enable-log", "--verbose", "--vverbose"}; int idx = 0; for (auto arg : args) { if (std::find(std::begin(flags), std::end(flags), arg) != std::end(flags)) { if (arg == "-h" || arg == "--help") { std::cout << "USAGE: sobfu [OPTIONS] " << std::endl; std::cout << "\t--help -h: display help" << std::endl; std::cout << "\t--enable-viz: enable visualizer" << std::endl; std::cout << "\t--enable-viz-detailed: enable visualizer with additional meshes for debugging" << std::endl; std::cout << "\t--enable-log: log output meshes" << std::endl; std::cout << "\t--verbose: low verbosity" << std::endl; std::cout << "\t--vverbose: high verbosity" << std::endl; std::exit(EXIT_SUCCESS); } if (arg == "--enable-log") { *logger = true; } if (arg == "--enable-viz") { *visualizer = true; } if (arg == "--enable-viz-detailed") { *visualizer_detailed = true; } if (arg == "--verbose") { *verbose = true; } if (arg == "--vverbose") { *vverbose = true; } } else if (idx == 0) { *file_path = arg; idx++; } else if (idx == 1) { *params_path = arg; } } } int main(int argc, char *argv[]) { int device = 0; kfusion::cuda::setDevice(device); kfusion::cuda::printShortCudaDeviceInfo(device); if (kfusion::cuda::checkIfPreFermiGPU(device)) { std::cout << std::endl << "kinfu does not support pre-fermi gpu architectures, and is not built for them by " "default; exiting..." << std::endl; return 1; } /* program requires at least one argument--the path to the directory where the source files are */ if (argc < 3) { return std::cerr << "error: incorrect number of arguments; please supply path to source data and .ini file; " "exiting..." << std::endl, -1; } std::vector args(argv + 1, argv + argc); std::string file_path, params_path; bool logger = false; bool visualizer = false; bool visualizer_detailed = false; bool off_screen = false; bool verbose = false; bool vverbose = false; parse_flags(args, &file_path, ¶ms_path, &logger, &visualizer, &visualizer_detailed, &verbose, &vverbose); /* disable the visualiser when running over SSH */ if (((visualizer || visualizer_detailed) && !off_screen) && getenv("SSH_CLIENT")) { return std::cerr << "error: cannot run visualiser while running over ssh; please run locally or disable the" "visualiser. exiting..." << std::endl, -1; } SobFuApp app(file_path, params_path, logger, visualizer, visualizer_detailed, verbose, vverbose); /* execute */ app.execute(); return 0; } ================================================ FILE: src/kfusion/CMakeLists.txt ================================================ # Build all kfusion source files into library kfusion add_module_library(kfusion) # Link to kfusion library target_link_libraries(kfusion ${CUDA_CUDART_LIBRARY} ${PCL_LIBRARIES} ) ================================================ FILE: src/kfusion/core.cpp ================================================ #include #include #include #include #include int kf::cuda::getCudaEnabledDeviceCount() { int count; cudaError_t error = cudaGetDeviceCount(&count); if (error == cudaErrorInsufficientDriver) return -1; if (error == cudaErrorNoDevice) return 0; cudaSafeCall(error); return count; } void kf::cuda::setDevice(int device) { cudaSafeCall(cudaSetDevice(device)); } std::string kf::cuda::getDeviceName(int device) { cudaDeviceProp prop; cudaSafeCall(cudaGetDeviceProperties(&prop, device)); return prop.name; } bool kf::cuda::checkIfPreFermiGPU(int device) { if (device < 0) cudaSafeCall(cudaGetDevice(&device)); cudaDeviceProp prop; cudaSafeCall(cudaGetDeviceProperties(&prop, device)); return prop.major < 2; // CC == 1.x } namespace { template inline void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute, int device) { *attribute = T(); CUresult error = cuDeviceGetAttribute(attribute, device_attribute, device); if (CUDA_SUCCESS == error) return; printf("Driver API error = %04d\n", error); kfusion::cuda::error("driver API error", __FILE__, __LINE__); } inline int convertSMVer2Cores(int major, int minor) { // Defines for GPU Architecture types (using the SM version to determine the # // of cores per SM typedef struct { int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM // minor version int Cores; } SMtoCores; SMtoCores gpuArchCoresPerSM[] = {{0x10, 8}, {0x11, 8}, {0x12, 8}, {0x13, 8}, {0x20, 32}, {0x21, 48}, {0x30, 192}, {0x35, 192}, {0x37, 192}, {0x50, 128}, {0x52, 128}, {0x60, 64}, {0x61, 128}, {0x70, 64}, {0x75, 64}, {-1, -1}}; int index = 0; while (gpuArchCoresPerSM[index].SM != -1) { if (gpuArchCoresPerSM[index].SM == ((major << 4) + minor)) return gpuArchCoresPerSM[index].Cores; index++; } printf("\nCan't determine number of cores. Unknown SM version %d.%d!\n", major, minor); return 0; } } // namespace void kf::cuda::printCudaDeviceInfo(int device) { int count = getCudaEnabledDeviceCount(); bool valid = (device >= 0) && (device < count); int beg = valid ? device : 0; int end = valid ? device + 1 : count; printf( "*** CUDA Device Query (Runtime API) version (CUDART static linking) " "*** \n\n"); printf("Device count: %d\n", count); int driverVersion = 0, runtimeVersion = 0; cudaSafeCall(cudaDriverGetVersion(&driverVersion)); cudaSafeCall(cudaRuntimeGetVersion(&runtimeVersion)); const char *computeMode[] = { "Default (multiple host threads can use ::cudaSetDevice() with device " "simultaneously)", "Exclusive (only one host thread in one process is able to use " "::cudaSetDevice() with this device)", "Prohibited (no host thread can use ::cudaSetDevice() with this device)", "Exclusive Process (many threads in one process is able to use " "::cudaSetDevice() with this device)", "Unknown", NULL}; for (int dev = beg; dev < end; ++dev) { cudaDeviceProp prop; cudaSafeCall(cudaGetDeviceProperties(&prop, dev)); int sm_cores = convertSMVer2Cores(prop.major, prop.minor); printf("\nDevice %d: \"%s\"\n", dev, prop.name); printf(" CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion / 1000, driverVersion % 100, runtimeVersion / 1000, runtimeVersion % 100); printf(" CUDA Capability Major/Minor version number: %d.%d\n", prop.major, prop.minor); printf( " Total amount of global memory: %.0f MBytes (%llu " "bytes)\n", (float) prop.totalGlobalMem / 1048576.0f, (unsigned long long) prop.totalGlobalMem); printf(" (%2d) Multiprocessors x (%2d) CUDA Cores/MP: %d CUDA Cores\n", prop.multiProcessorCount, sm_cores, sm_cores * prop.multiProcessorCount); printf(" GPU Clock Speed: %.2f GHz\n", prop.clockRate * 1e-6f); #if (CUDART_VERSION >= 4000) // This is not available in the CUDA Runtime API, so we make the necessary // calls the driver API to support this for output int memoryClock, memBusWidth, L2CacheSize; getCudaAttribute(&memoryClock, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, dev); getCudaAttribute(&memBusWidth, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, dev); getCudaAttribute(&L2CacheSize, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, dev); printf(" Memory Clock rate: %.2f Mhz\n", memoryClock * 1e-3f); printf(" Memory Bus Width: %d-bit\n", memBusWidth); if (L2CacheSize) printf(" L2 Cache Size: %d bytes\n", L2CacheSize); printf( " Max Texture Dimension Size (x,y,z) 1D=(%d), " "2D=(%d,%d), 3D=(%d,%d,%d)\n", prop.maxTexture1D, prop.maxTexture2D[0], prop.maxTexture2D[1], prop.maxTexture3D[0], prop.maxTexture3D[1], prop.maxTexture3D[2]); printf( " Max Layered Texture Size (dim) x layers 1D=(%d) x %d, " "2D=(%d,%d) x %d\n", prop.maxTexture1DLayered[0], prop.maxTexture1DLayered[1], prop.maxTexture2DLayered[0], prop.maxTexture2DLayered[1], prop.maxTexture2DLayered[2]); #endif printf(" Total amount of constant memory: %u bytes\n", (int) prop.totalConstMem); printf(" Total amount of shared memory per block: %u bytes\n", (int) prop.sharedMemPerBlock); printf(" Total number of registers available per block: %d\n", prop.regsPerBlock); printf(" Warp size: %d\n", prop.warpSize); printf(" Maximum number of threads per block: %d\n", prop.maxThreadsPerBlock); printf(" Maximum sizes of each dimension of a block: %d x %d x %d\n", prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2]); printf(" Maximum sizes of each dimension of a grid: %d x %d x %d\n", prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2]); printf(" Maximum memory pitch: %u bytes\n", (int) prop.memPitch); printf(" Texture alignment: %u bytes\n", (int) prop.textureAlignment); #if CUDART_VERSION >= 4000 printf( " Concurrent copy and execution: %s with %d copy " "engine(s)\n", (prop.deviceOverlap ? "Yes" : "No"), prop.asyncEngineCount); #else printf(" Concurrent copy and execution: %s\n", prop.deviceOverlap ? "Yes" : "No"); #endif printf(" Run time limit on kernels: %s\n", prop.kernelExecTimeoutEnabled ? "Yes" : "No"); printf(" Integrated GPU sharing Host Memory: %s\n", prop.integrated ? "Yes" : "No"); printf(" Support host page-locked memory mapping: %s\n", prop.canMapHostMemory ? "Yes" : "No"); printf(" Concurrent kernel execution: %s\n", prop.concurrentKernels ? "Yes" : "No"); printf(" Alignment requirement for Surfaces: %s\n", prop.surfaceAlignment ? "Yes" : "No"); printf(" Device has ECC support enabled: %s\n", prop.ECCEnabled ? "Yes" : "No"); printf(" Device is using TCC driver mode: %s\n", prop.tccDriver ? "Yes" : "No"); #if CUDART_VERSION >= 4000 printf(" Device supports Unified Addressing (UVA): %s\n", prop.unifiedAddressing ? "Yes" : "No"); printf(" Device PCI Bus ID / PCI location ID: %d / %d\n", prop.pciBusID, prop.pciDeviceID); #endif printf(" Compute Mode:\n"); printf(" %s \n", computeMode[prop.computeMode]); } printf("\n"); printf("deviceQuery, CUDA Driver = CUDART"); printf(", CUDA Driver Version = %d.%d", driverVersion / 1000, driverVersion % 100); printf(", CUDA Runtime Version = %d.%d", runtimeVersion / 1000, runtimeVersion % 100); printf(", NumDevs = %d\n\n", count); fflush(stdout); } void kf::cuda::printShortCudaDeviceInfo(int device) { int count = getCudaEnabledDeviceCount(); bool valid = (device >= 0) && (device < count); int beg = valid ? device : 0; int end = valid ? device + 1 : count; int driverVersion = 0, runtimeVersion = 0; cudaSafeCall(cudaDriverGetVersion(&driverVersion)); cudaSafeCall(cudaRuntimeGetVersion(&runtimeVersion)); for (int dev = beg; dev < end; ++dev) { cudaDeviceProp prop; cudaSafeCall(cudaGetDeviceProperties(&prop, dev)); const char *arch_str = prop.major < 2 ? " (pre-Fermi)" : ""; printf("Device %d: \"%s\" %.0fMb", dev, prop.name, (float) prop.totalGlobalMem / 1048576.0f); printf(", sm_%d%d%s, %d cores", prop.major, prop.minor, arch_str, convertSMVer2Cores(prop.major, prop.minor) * prop.multiProcessorCount); printf(", Driver/Runtime ver.%d.%d/%d.%d\n", driverVersion / 1000, driverVersion % 100, runtimeVersion / 1000, runtimeVersion % 100); } fflush(stdout); } kf::SampledScopeTime::SampledScopeTime(double &time_ms) : time_ms_(time_ms) { start = (double) cv::getTickCount(); } kf::SampledScopeTime::~SampledScopeTime() { static int i_ = 0; time_ms_ += getTime(); if (i_ % EACH == 0 && i_) { std::cout << "avg. frame time = " << time_ms_ / EACH << "ms (" << 1000.f * EACH / time_ms_ << "fps)" << std::endl; time_ms_ = 0.0; } ++i_; } double kf::SampledScopeTime::getTime() { return ((double) cv::getTickCount() - start) * 1000.0 / cv::getTickFrequency(); } kf::ScopeTime::ScopeTime(const char *name_) : name(name_) { start = (double) cv::getTickCount(); } kf::ScopeTime::~ScopeTime() { double time_ms = ((double) cv::getTickCount() - start) * 1000.0 / cv::getTickFrequency(); std::cout << "Time(" << name << ") = " << time_ms << "ms" << std::endl; } ================================================ FILE: src/kfusion/cuda/imgproc.cu ================================================ #include //////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// Depth bilateral filter namespace kfusion { namespace device { __global__ void bilateral_kernel(const PtrStepSz src, PtrStep dst, const int ksz, const float sigma_spatial2_inv_half, const float sigma_depth2_inv_half) { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; if (x >= src.cols || y >= src.rows) return; int value = src(y, x); int tx = min(x - ksz / 2 + ksz, src.cols - 1); int ty = min(y - ksz / 2 + ksz, src.rows - 1); float sum1 = 0; float sum2 = 0; for (int cy = max(y - ksz / 2, 0); cy < ty; ++cy) { for (int cx = max(x - ksz / 2, 0); cx < tx; ++cx) { int depth = src(cy, cx); float space2 = (x - cx) * (x - cx) + (y - cy) * (y - cy); float color2 = (value - depth) * (value - depth); float weight = __expf(-(space2 * sigma_spatial2_inv_half + color2 * sigma_depth2_inv_half)); sum1 += depth * weight; sum2 += weight; } } dst(y, x) = __float2int_rn(sum1 / sum2); } } // namespace device } // namespace kfusion void kfusion::device::bilateralFilter(const Depth& src, Depth& dst, int kernel_size, float sigma_spatial, float sigma_depth) { sigma_depth *= 1000; // meters -> mm dim3 block(64, 16); dim3 grid(divUp(src.cols(), block.x), divUp(src.rows(), block.y)); cudaSafeCall(cudaFuncSetCacheConfig(bilateral_kernel, cudaFuncCachePreferL1)); bilateral_kernel<<>>(src, dst, kernel_size, 0.5f / (sigma_spatial * sigma_spatial), 0.5f / (sigma_depth * sigma_depth)); cudaSafeCall(cudaGetLastError()); }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// Depth truncation namespace kfusion { namespace device { __global__ void truncate_depth_kernel(PtrStepSz depth, ushort max_dist /*mm*/) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; if (x < depth.cols && y < depth.rows) if (depth(y, x) > max_dist) depth(y, x) = 0; } } // namespace device } // namespace kfusion void kfusion::device::truncateDepth(Depth& depth, float max_dist /*meters*/) { dim3 block(64, 16); dim3 grid(divUp(depth.cols(), block.x), divUp(depth.rows(), block.y)); truncate_depth_kernel<<>>(depth, static_cast(max_dist * 1000.f)); cudaSafeCall(cudaGetLastError()); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// Build depth pyramid namespace kfusion { namespace device { __global__ void pyramid_kernel(const PtrStepSz src, PtrStepSz dst, float sigma_depth_mult3) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; if (x >= dst.cols || y >= dst.rows) return; const int D = 5; int center = src(2 * y, 2 * x); int tx = min(2 * x - D / 2 + D, src.cols - 1); int ty = min(2 * y - D / 2 + D, src.rows - 1); int cy = max(0, 2 * y - D / 2); int sum = 0; int count = 0; for (; cy < ty; ++cy) for (int cx = max(0, 2 * x - D / 2); cx < tx; ++cx) { int val = src(cy, cx); if (abs(val - center) < sigma_depth_mult3) { sum += val; ++count; } } dst(y, x) = (count == 0) ? 0 : sum / count; } } // namespace device } // namespace kfusion void kfusion::device::depthPyr(const Depth& source, Depth& pyramid, float sigma_depth) { sigma_depth *= 1000; // meters -> mm dim3 block(64, 16); dim3 grid(divUp(pyramid.cols(), block.x), divUp(pyramid.rows(), block.y)); pyramid_kernel<<>>(source, pyramid, sigma_depth * 3); cudaSafeCall(cudaGetLastError()); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// Compute normals namespace kfusion { namespace device { __global__ void compute_normals_kernel(const PtrStepSz depth, const Reprojector reproj, PtrStep normals) { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; if (x >= depth.cols || y >= depth.rows) return; const float qnan = numeric_limits::quiet_NaN(); Normal n_out = make_float4(qnan, qnan, qnan, 0.f); if (x < depth.cols - 1 && y < depth.rows - 1) { // mm -> meters float z00 = depth(y, x) * 0.001f; float z01 = depth(y, x + 1) * 0.001f; float z10 = depth(y + 1, x) * 0.001f; if (z00 * z01 * z10 != 0) { float3 v00 = reproj(x, y, z00); float3 v01 = reproj(x + 1, y, z01); float3 v10 = reproj(x, y + 1, z10); float3 n = normalized(cross(v01 - v00, v10 - v00)); n_out = make_float4(-n.x, -n.y, -n.z, 0.f); } } normals(y, x) = n_out; } __global__ void mask_depth_kernel(const PtrStep normals, PtrStepSz depth) { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; if (x < depth.cols || y < depth.rows) { float4 n = normals(y, x); if (isnan(n.x)) depth(y, x) = 0; } } } // namespace device } // namespace kfusion void kfusion::device::computeNormalsAndMaskDepth(const Reprojector& reproj, Depth& depth, Normals& normals) { dim3 block(64, 16); dim3 grid(divUp(depth.cols(), block.x), divUp(depth.rows(), block.y)); compute_normals_kernel<<>>(depth, reproj, normals); cudaSafeCall(cudaGetLastError()); mask_depth_kernel<<>>(normals, depth); cudaSafeCall(cudaGetLastError()); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// Compute computePointNormals namespace kfusion { namespace device { __global__ void points_normals_kernel(const Reprojector reproj, const PtrStepSz depth, PtrStep points, PtrStep normals) { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; if (x >= depth.cols || y >= depth.rows) return; const float qnan = numeric_limits::quiet_NaN(); points(y, x) = normals(y, x) = make_float4(qnan, qnan, qnan, qnan); if (x >= depth.cols - 1 || y >= depth.rows - 1) return; // mm -> meters float z00 = depth(y, x) * 0.001f; float z01 = depth(y, x + 1) * 0.001f; float z10 = depth(y + 1, x) * 0.001f; if (z00 * z01 * z10 != 0) { float3 v00 = reproj(x, y, z00); float3 v01 = reproj(x + 1, y, z01); float3 v10 = reproj(x, y + 1, z10); float3 n = normalized(cross(v01 - v00, v10 - v00)); normals(y, x) = make_float4(-n.x, -n.y, -n.z, 0.f); points(y, x) = make_float4(v00.x, v00.y, v00.z, 0.f); } } } // namespace device } // namespace kfusion void kfusion::device::computePointNormals(const Reprojector& reproj, const Depth& depth, Points& points, Normals& normals) { dim3 block(64, 16); dim3 grid(divUp(depth.cols(), block.x), divUp(depth.rows(), block.y)); points_normals_kernel<<>>(reproj, depth, points, normals); cudaSafeCall(cudaGetLastError()); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// Compute dists namespace kfusion { namespace device { __global__ void compute_dists_kernel(const PtrStepSz depth, Dists dists, float2 finv, float2 c) { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; if (x < depth.cols || y < depth.rows) { float xl = (x - c.x) * finv.x; float yl = (y - c.y) * finv.y; float lambda = sqrtf(xl * xl + yl * yl + 1); dists(y, x) = depth(y, x) * lambda * 0.001f; // meters } } } // namespace device } // namespace kfusion void kfusion::device::compute_dists(const Depth& depth, Dists dists, float2 f, float2 c) { dim3 block(64, 16); dim3 grid(divUp(depth.cols(), block.x), divUp(depth.rows(), block.y)); compute_dists_kernel<<>>(depth, dists, make_float2(1.f / f.x, 1.f / f.y), c); cudaSafeCall(cudaGetLastError()); } namespace kfusion { namespace device { __global__ void resize_depth_normals_kernel(const PtrStep dsrc, const PtrStep nsrc, PtrStepSz ddst, PtrStep ndst) { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; if (x >= ddst.cols || y >= ddst.rows) return; const float qnan = numeric_limits::quiet_NaN(); ushort d = 0; float4 n = make_float4(qnan, qnan, qnan, qnan); int xs = x * 2; int ys = y * 2; int d00 = dsrc(ys + 0, xs + 0); int d01 = dsrc(ys + 0, xs + 1); int d10 = dsrc(ys + 1, xs + 0); int d11 = dsrc(ys + 1, xs + 1); if (d00 * d01 != 0 && d10 * d11 != 0) { d = (d00 + d01 + d10 + d11) / 4; float4 n00 = nsrc(ys + 0, xs + 0); float4 n01 = nsrc(ys + 0, xs + 1); float4 n10 = nsrc(ys + 1, xs + 0); float4 n11 = nsrc(ys + 1, xs + 1); n.x = (n00.x + n01.x + n10.x + n11.x) * 0.25; n.y = (n00.y + n01.y + n10.y + n11.y) * 0.25; n.z = (n00.z + n01.z + n10.z + n11.z) * 0.25; } ddst(y, x) = d; ndst(y, x) = n; } } // namespace device } // namespace kfusion void kfusion::device::resizeDepthNormals(const Depth& depth, const Normals& normals, Depth& depth_out, Normals& normals_out) { int in_cols = depth.cols(); int in_rows = depth.rows(); int out_cols = in_cols / 2; int out_rows = in_rows / 2; dim3 block(64, 16); dim3 grid(divUp(out_cols, block.x), divUp(out_rows, block.y)); resize_depth_normals_kernel<<>>(depth, normals, depth_out, normals_out); cudaSafeCall(cudaGetLastError()); } namespace kfusion { namespace device { __global__ void resize_points_normals_kernel(const PtrStep vsrc, const PtrStep nsrc, PtrStepSz vdst, PtrStep ndst) { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; if (x >= vdst.cols || y >= vdst.rows) return; const float qnan = numeric_limits::quiet_NaN(); vdst(y, x) = ndst(y, x) = make_float4(qnan, qnan, qnan, 0.f); int xs = x * 2; int ys = y * 2; float3 d00 = tr(vsrc(ys + 0, xs + 0)); float3 d01 = tr(vsrc(ys + 0, xs + 1)); float3 d10 = tr(vsrc(ys + 1, xs + 0)); float3 d11 = tr(vsrc(ys + 1, xs + 1)); if (!isnan(d00.x * d01.x * d10.x * d11.x)) { float3 d = (d00 + d01 + d10 + d11) * 0.25f; vdst(y, x) = make_float4(d.x, d.y, d.z, 0.f); float3 n00 = tr(nsrc(ys + 0, xs + 0)); float3 n01 = tr(nsrc(ys + 0, xs + 1)); float3 n10 = tr(nsrc(ys + 1, xs + 0)); float3 n11 = tr(nsrc(ys + 1, xs + 1)); float3 n = (n00 + n01 + n10 + n11) * 0.25f; ndst(y, x) = make_float4(n.x, n.y, n.z, 0.f); } } } // namespace device } // namespace kfusion void kfusion::device::resizePointsNormals(const Points& points, const Normals& normals, Points& points_out, Normals& normals_out) { int out_cols = points.cols() / 2; int out_rows = points.rows() / 2; dim3 block(64, 16); dim3 grid(divUp(out_cols, block.x), divUp(out_rows, block.y)); resize_points_normals_kernel<<>>(points, normals, points_out, normals_out); cudaSafeCall(cudaGetLastError()); } namespace kfusion { namespace device { /* calculate for the projected triangle the bounding box in the image domain */ __host__ __device__ void get_bounding_box(float2 v1, float2 v2, float2 v3, int2& min, int2& max) { min.x = static_cast(fmin(v1.x, fmin(v2.x, v3.x))); min.y = static_cast(fmin(v1.y, fmin(v2.y, v3.y))); max.x = static_cast(fmax(v1.x, fmax(v2.x, v3.x))); max.y = static_cast(fmax(v1.y, fmax(v2.y, v3.y))); } __host__ __device__ __forceinline__ float edge_function(const float2& a, const float2& b, const float2& c) { return (c.x - a.x) * (b.y - a.y) - (c.y - a.y) * (b.x - a.x); } /* rasterise surface triangles */ __global__ void rasterise_surface_kernel(const Projector proj, const Aff3f vol2cam, const PtrSz vsrc, const PtrSz nsrc, PtrStepSz points_out, PtrStep normals_out) { int x = (threadIdx.x + blockIdx.x * blockDim.x) * 3; if ((x + 2) >= vsrc.size) return; /* get vertices and normals */ float3 v1 = vol2cam * tr(vsrc[x]); float3 v2 = vol2cam * tr(vsrc[x + 1]); float3 v3 = vol2cam * tr(vsrc[x + 2]); /* project vertices onto the image plane */ float2 coos1 = proj(v1); float2 coos2 = proj(v2); float2 coos3 = proj(v3); /* get 2-d triangle bounding box */ int2 min; int2 max; get_bounding_box(coos1, coos2, coos3, min, max); /* check for validity of coordinates */ if (min.x < 0 || min.y < 0 || max.x >= points_out.cols || max.y >= points_out.rows) return; /* used for smooth interpoation */ float area = edge_function(coos1, coos2, coos3); /* shade pixels */ for (int i = min.x; i < max.x; i++) { for (int j = min.y; j < max.y; j++) { /* coordinates of the centre of the pixel */ float2 p = make_float2(i + 0.5f, j + 0.5f); float w0 = edge_function(coos2, coos3, p) / area; float w1 = edge_function(coos3, coos1, p) / area; float w2 = edge_function(coos1, coos2, p) / area; float z = w0 * v1.z + w1 * v2.z + w2 * v3.z; if (z < points_out(j, i).z || fabs(points_out(j, i).z) < 1e-7f) { points_out(j, i) = make_float4(w0 * v1.x + w1 * v2.x + w2 * v3.x, w0 * v1.y + w1 * v2.y + w2 * v3.y, z, 0.f); } } } for (int i = min.x; i < max.x; i++) { for (int j = min.y; j < max.y; j++) { float3 v0 = tr(points_out(j, i)); float3 v1 = tr(points_out(j + 1, i)); float3 v2 = tr(points_out(j, i + 1)); float3 n = normalized(cross(v1 - v0, v2 - v0)); normals_out(j, i) = make_float4(n.x, n.y, n.z, 1.f); } } } } // namespace device } // namespace kfusion void kfusion::device::rasteriseSurface(const Projector& proj, const Aff3f& vol2cam, const Surface& surface, Points& points_out, Normals& normals_out) { dim3 block(256); dim3 grid(divUp(surface.vertices.size() / 3, block.x)); rasterise_surface_kernel<<>>(proj, vol2cam, surface.vertices, surface.normals, points_out, normals_out); cudaSafeCall(cudaGetLastError()); } ================================================ FILE: src/kfusion/cuda/marching_cubes.cu ================================================ #include #include #include #define FULL_MASK 0xffffffff namespace kfusion { namespace device { // texture edgeTex; texture triTex; texture numVertsTex; } // namespace device } // namespace kfusion void kfusion::device::bindTextures(const int* /*edgeBuf*/, const int* triBuf, const int* numVertsBuf) { cudaChannelFormatDesc desc = cudaCreateChannelDesc(); // cudaSafeCall(cudaBindTexture(0, edgeTex, edgeBuf, desc) ); cudaSafeCall(cudaBindTexture(0, triTex, triBuf, desc)); cudaSafeCall(cudaBindTexture(0, numVertsTex, numVertsBuf, desc)); } void kfusion::device::unbindTextures() { // cudaSafeCall( cudaUnbindTexture(edgeTex) ); cudaSafeCall(cudaUnbindTexture(numVertsTex)); cudaSafeCall(cudaUnbindTexture(triTex)); } namespace kfusion { namespace device { __device__ int global_count = 0; __device__ int output_count; __device__ unsigned int blocks_done = 0; __kf_device__ void kfusion::device::CubeIndexEstimator::readTsdf(int x, int y, int z, float& f, float& weight) const { float2 aux = *volume(x, y, z); f = aux.x; weight = aux.y; } __kf_device__ int kfusion::device::CubeIndexEstimator::computeCubeIndex(int x, int y, int z, float f[8]) const { float weight; readTsdf(x, y, z, f[0], weight); if (weight == 0.f) return 0; readTsdf(x + 1, y, z, f[1], weight); if (weight == 0.f) return 0; readTsdf(x + 1, y + 1, z, f[2], weight); if (weight == 0.f) return 0; readTsdf(x, y + 1, z, f[3], weight); if (weight == 0.f) return 0; readTsdf(x, y, z + 1, f[4], weight); if (weight == 0.f) return 0; readTsdf(x + 1, y, z + 1, f[5], weight); if (weight == 0.f) return 0; readTsdf(x + 1, y + 1, z + 1, f[6], weight); if (weight == 0.f) return 0; readTsdf(x, y + 1, z + 1, f[7], weight); if (weight == 0.f) return 0; // calculate flag indicating if each vertex is inside or outside isosurface int cubeindex = 0; cubeindex = int(f[0] < isoValue); cubeindex += int(f[1] < isoValue) * 2; cubeindex += int(f[2] < isoValue) * 4; cubeindex += int(f[3] < isoValue) * 8; cubeindex += int(f[4] < isoValue) * 16; cubeindex += int(f[5] < isoValue) * 32; cubeindex += int(f[6] < isoValue) * 64; cubeindex += int(f[7] < isoValue) * 128; return cubeindex; } __kf_device__ void kfusion::device::OccupiedVoxels::operator()() const { int x = threadIdx.x + blockIdx.x * CTA_SIZE_X; int y = threadIdx.y + blockIdx.y * CTA_SIZE_Y; if (__all_sync(FULL_MASK, x >= volume.dims.x) || __all_sync(FULL_MASK, y >= volume.dims.y)) { return; } int ftid = Block::flattenedThreadId(); int warp_id = Warp::id(); int lane_id = Warp::laneId(); volatile __shared__ int warps_buffer[WARPS_COUNT]; for (int z = 0; z < volume.dims.z - 1; z++) { int numVerts = 0; ; if (x + 1 < volume.dims.x && y + 1 < volume.dims.y) { float field[8]; int cubeindex = computeCubeIndex(x, y, z, field); // read number of vertices from texture numVerts = (cubeindex == 0 || cubeindex == 255) ? 0 : tex1Dfetch(numVertsTex, cubeindex); } int total = __popc(__ballot_sync(FULL_MASK, numVerts > 0)); if (total == 0) continue; if (lane_id == 0) { int old = atomicAdd(&global_count, total); warps_buffer[warp_id] = old; } int old_global_voxels_count = warps_buffer[warp_id]; int offs = Warp::binaryExclScan(__ballot_sync(FULL_MASK, numVerts > 0)); if (old_global_voxels_count + offs < max_size && numVerts > 0) { voxels_indices[old_global_voxels_count + offs] = volume.dims.y * volume.dims.x * z + volume.dims.x * y + x; vertices_number[old_global_voxels_count + offs] = numVerts; } bool full = old_global_voxels_count + total >= max_size; if (full) break; } /* for(int z = 0; z < 128 - 1; z++) */ ///////////////////////// // prepare for future scans if (ftid == 0) { unsigned int total_blocks = gridDim.x * gridDim.y * gridDim.z; unsigned int value = atomicInc(&blocks_done, total_blocks); // last block if (value == total_blocks - 1) { output_count = min(max_size, global_count); blocks_done = 0; global_count = 0; } } } /* operator () */ __global__ void getOccupiedVoxelsKernel(const OccupiedVoxels ov) { ov(); } int getOccupiedVoxels(const TsdfVolume& volume, DeviceArray2D& occupied_voxels) { OccupiedVoxels ov(volume); ov.voxels_indices = occupied_voxels.ptr(0); ov.vertices_number = occupied_voxels.ptr(1); ov.max_size = occupied_voxels.cols(); dim3 block(OccupiedVoxels::CTA_SIZE_X, OccupiedVoxels::CTA_SIZE_Y); dim3 grid(divUp(volume.dims.x, block.x), divUp(volume.dims.y, block.y)); getOccupiedVoxelsKernel<<>>(ov); cudaSafeCall(cudaGetLastError()); cudaSafeCall(cudaDeviceSynchronize()); int size; cudaSafeCall(cudaMemcpyFromSymbol(&size, output_count, sizeof(size))); return size; } int computeOffsetsAndTotalVertices(DeviceArray2D& occupied_voxels) { thrust::device_ptr beg(occupied_voxels.ptr(1)); thrust::device_ptr end = beg + occupied_voxels.cols(); thrust::device_ptr out(occupied_voxels.ptr(2)); thrust::exclusive_scan(beg, end, out); int lastElement, lastScanElement; DeviceArray last_elem(occupied_voxels.ptr(1) + occupied_voxels.cols() - 1, 1); DeviceArray last_scan(occupied_voxels.ptr(2) + occupied_voxels.cols() - 1, 1); last_elem.download(&lastElement); last_scan.download(&lastScanElement); return lastElement + lastScanElement; } __kf_device__ float3 kfusion::device::TrianglesGenerator::get_node_coo(int x, int y, int z) const { float3 coo = make_float3(x, y, z); coo += 0.5f; // shift to volume cell center; coo.x *= cell_size.x; coo.y *= cell_size.y; coo.z *= cell_size.z; return coo; } __kf_device__ float3 kfusion::device::TrianglesGenerator::vertex_interp(float3 p0, float3 p1, float f0, float f1) const { float t = (isoValue - f0) / (f1 - f0 + 1e-15f); float x = p0.x + t * (p1.x - p0.x); float y = p0.y + t * (p1.y - p0.y); float z = p0.z + t * (p1.z - p0.z); return make_float3(x, y, z); } __kf_device__ void kfusion::device::TrianglesGenerator::operator()() const { int tid = threadIdx.x; int idx = (blockIdx.y * MAX_GRID_SIZE_X + blockIdx.x) * CTA_SIZE + tid; if (idx >= voxels_count) return; int voxel = occupied_voxels[idx]; int z = voxel / (volume.dims.x * volume.dims.y); int y = (voxel - z * volume.dims.x * volume.dims.y) / volume.dims.x; int x = (voxel - z * volume.dims.x * volume.dims.y) - y * volume.dims.x; float f[8]; int cubeindex = computeCubeIndex(x, y, z, f); /* calculate cell vertex positions */ float3 v[8]; v[0] = get_node_coo(x, y, z); v[1] = get_node_coo(x + 1, y, z); v[2] = get_node_coo(x + 1, y + 1, z); v[3] = get_node_coo(x, y + 1, z); v[4] = get_node_coo(x, y, z + 1); v[5] = get_node_coo(x + 1, y, z + 1); v[6] = get_node_coo(x + 1, y + 1, z + 1); v[7] = get_node_coo(x, y + 1, z + 1); /* find vertices where surface intersects the cube; use shared memory to avoid using local */ __shared__ float3 vertlist[12][CTA_SIZE]; vertlist[0][tid] = vertex_interp(v[0], v[1], f[0], f[1]); vertlist[1][tid] = vertex_interp(v[1], v[2], f[1], f[2]); vertlist[2][tid] = vertex_interp(v[2], v[3], f[2], f[3]); vertlist[3][tid] = vertex_interp(v[3], v[0], f[3], f[0]); vertlist[4][tid] = vertex_interp(v[4], v[5], f[4], f[5]); vertlist[5][tid] = vertex_interp(v[5], v[6], f[5], f[6]); vertlist[6][tid] = vertex_interp(v[6], v[7], f[6], f[7]); vertlist[7][tid] = vertex_interp(v[7], v[4], f[7], f[4]); vertlist[8][tid] = vertex_interp(v[0], v[4], f[0], f[4]); vertlist[9][tid] = vertex_interp(v[1], v[5], f[1], f[5]); vertlist[10][tid] = vertex_interp(v[2], v[6], f[2], f[6]); vertlist[11][tid] = vertex_interp(v[3], v[7], f[3], f[7]); __syncthreads(); /* output triangle vertices and normals */ int numVerts = tex1Dfetch(numVertsTex, cubeindex); for (int i = 0; i < numVerts; i += 3) { int index = vertex_ofssets[idx] + i; int v1 = tex1Dfetch(triTex, (cubeindex * 16) + i + 0); int v2 = tex1Dfetch(triTex, (cubeindex * 16) + i + 1); int v3 = tex1Dfetch(triTex, (cubeindex * 16) + i + 2); /* NOTE (dig15): the surface could be smoother if the normal weren't the same for each vertex of the triangle */ float3 n = normalized(cross(vertlist[v3][tid] - vertlist[v1][tid], vertlist[v2][tid] - vertlist[v1][tid])); store_point(outputVertices, index + 0, pose * vertlist[v1][tid]); store_point(outputNormals, index + 0, n); store_point(outputVertices, index + 1, pose * vertlist[v2][tid]); store_point(outputNormals, index + 1, n); store_point(outputVertices, index + 2, pose * vertlist[v3][tid]); store_point(outputNormals, index + 2, n); } } __kf_device__ void kfusion::device::TrianglesGenerator::store_point(float4* ptr, int index, const float3& vertex) const { ptr[index] = make_float4(vertex.x, -vertex.y, -vertex.z, 1.f); } __global__ void trianglesGeneratorKernel(const TrianglesGenerator tg) { tg(); } void generateTriangles(const TsdfVolume& volume, const DeviceArray2D& occupied_voxels, const float3& volume_size, const Aff3f& pose, DeviceArray& outputVertices, DeviceArray& outputNormals) { int device; cudaSafeCall(cudaGetDevice(&device)); cudaDeviceProp prop; cudaSafeCall(cudaGetDeviceProperties(&prop, device)); typedef TrianglesGenerator Tg; Tg tg(volume); tg.occupied_voxels = occupied_voxels.ptr(0); tg.vertex_ofssets = occupied_voxels.ptr(2); tg.voxels_count = occupied_voxels.cols(); tg.cell_size.x = volume_size.x / volume.dims.x; tg.cell_size.y = volume_size.y / volume.dims.y; tg.cell_size.z = volume_size.z / volume.dims.z; tg.outputVertices = outputVertices; tg.outputNormals = outputNormals; tg.pose = pose; int block_size = 256; int blocks_num = divUp(tg.voxels_count, block_size); dim3 block(block_size); dim3 grid(min(blocks_num, Tg::MAX_GRID_SIZE_X), divUp(blocks_num, Tg::MAX_GRID_SIZE_X)); trianglesGeneratorKernel<<>>(tg); cudaSafeCall(cudaGetLastError()); cudaSafeCall(cudaDeviceSynchronize()); } } // namespace device } // namespace kfusion ================================================ FILE: src/kfusion/cuda/proj_icp.cu ================================================ #include #include namespace kfusion { namespace device { texture dprev_tex; texture nprev_tex; texture vprev_tex; struct ComputeIcpHelper::Policy { enum { CTA_SIZE_X = 32, CTA_SIZE_Y = 8, CTA_SIZE = CTA_SIZE_X * CTA_SIZE_Y, B = 6, COLS = 6, ROWS = 6, DIAG = 6, UPPER_DIAG_MAT = (COLS * ROWS - DIAG) / 2 + DIAG, TOTAL = UPPER_DIAG_MAT + B, FINAL_REDUCE_CTA_SIZE = 256, FINAL_REDUCE_STRIDE = FINAL_REDUCE_CTA_SIZE }; }; __kf_device__ float2 ComputeIcpHelper::proj(const float3& p) const { float2 coo; coo.x = __fmaf_rn(f.x, __fdividef(p.x, p.z), c.x); coo.y = __fmaf_rn(f.y, __fdividef(p.y, p.z), c.y); return coo; } __kf_device__ float3 ComputeIcpHelper::reproj(float u, float v, float z) const { float x = z * (u - c.x) * finv.x; float y = z * (v - c.y) * finv.y; return make_float3(x, y, z); } #if defined USE_DEPTH __kf_device__ int ComputeIcpHelper::find_coresp(int x, int y, float3& nd, float3& d, float3& s) const { int src_z = dcurr(y, x); if (src_z == 0) return 40; s = aff * reproj(x, y, src_z * 0.001f); float2 coo = proj(s); if (s.z <= 0 || coo.x < 0 || coo.y < 0 || coo.x >= cols || coo.y >= rows) return 80; int dst_z = tex2D(dprev_tex, coo.x, coo.y); if (dst_z == 0) return 120; d = reproj(coo.x, coo.y, dst_z * 0.001f); float dist2 = norm_sqr(s - d); if (dist2 > dist2_thres) return 160; float3 ns = aff.R * tr(ncurr(y, x)); nd = tr(tex2D(nprev_tex, coo.x, coo.y)); float cosine = fabs(dot(ns, nd)); if (cosine < min_cosine) return 200; return 0; } #else __kf_device__ int ComputeIcpHelper::find_coresp(int x, int y, float3& nd, float3& d, float3& s) const { s = tr(vcurr(y, x)); if (isnan(s.x)) return 40; s = aff * s; float2 coo = proj(s); if (s.z <= 0 || coo.x < 0 || coo.y < 0 || coo.x >= cols || coo.y >= rows) return 80; d = tr(tex2D(vprev_tex, coo.x, coo.y)); if (isnan(d.x)) return 120; float dist2 = norm_sqr(s - d); if (dist2 > dist2_thres) return 160; float3 ns = aff.R * tr(ncurr(y, x)); nd = tr(tex2D(nprev_tex, coo.x, coo.y)); float cosine = fabs(dot(ns, nd)); if (cosine < min_cosine) return 200; return 0; } #endif __kf_device__ void ComputeIcpHelper::partial_reduce(const float row[7], PtrStep& partial_buf) const { volatile __shared__ float smem[Policy::CTA_SIZE]; int tid = Block::flattenedThreadId(); float* pos = partial_buf.data + blockIdx.x + gridDim.x * blockIdx.y; size_t step = partial_buf.step / sizeof(float); #define STOR \ if (tid == 0) { \ *pos = smem[0]; \ pos += step; \ } __syncthreads(); smem[tid] = row[0] * row[0]; __syncthreads(); Block::reduce(smem, plus()); STOR __syncthreads(); smem[tid] = row[0] * row[1]; __syncthreads(); Block::reduce(smem, plus()); STOR __syncthreads(); smem[tid] = row[0] * row[2]; __syncthreads(); Block::reduce(smem, plus()); STOR __syncthreads(); smem[tid] = row[0] * row[3]; __syncthreads(); Block::reduce(smem, plus()); STOR __syncthreads(); smem[tid] = row[0] * row[4]; __syncthreads(); Block::reduce(smem, plus()); STOR __syncthreads(); smem[tid] = row[0] * row[5]; __syncthreads(); Block::reduce(smem, plus()); STOR __syncthreads(); smem[tid] = row[0] * row[6]; __syncthreads(); Block::reduce(smem, plus()); STOR //////////////////////////////// __syncthreads(); smem[tid] = row[1] * row[1]; __syncthreads(); Block::reduce(smem, plus()); STOR __syncthreads(); smem[tid] = row[1] * row[2]; __syncthreads(); Block::reduce(smem, plus()); STOR __syncthreads(); smem[tid] = row[1] * row[3]; __syncthreads(); Block::reduce(smem, plus()); STOR __syncthreads(); smem[tid] = row[1] * row[4]; __syncthreads(); Block::reduce(smem, plus()); STOR __syncthreads(); smem[tid] = row[1] * row[5]; __syncthreads(); Block::reduce(smem, plus()); STOR __syncthreads(); smem[tid] = row[1] * row[6]; __syncthreads(); Block::reduce(smem, plus()); STOR //////////////////////////////// __syncthreads(); smem[tid] = row[2] * row[2]; __syncthreads(); Block::reduce(smem, plus()); STOR __syncthreads(); smem[tid] = row[2] * row[3]; __syncthreads(); Block::reduce(smem, plus()); STOR __syncthreads(); smem[tid] = row[2] * row[4]; __syncthreads(); Block::reduce(smem, plus()); STOR __syncthreads(); smem[tid] = row[2] * row[5]; __syncthreads(); Block::reduce(smem, plus()); STOR __syncthreads(); smem[tid] = row[2] * row[6]; __syncthreads(); Block::reduce(smem, plus()); STOR //////////////////////////////// __syncthreads(); smem[tid] = row[3] * row[3]; __syncthreads(); Block::reduce(smem, plus()); STOR __syncthreads(); smem[tid] = row[3] * row[4]; __syncthreads(); Block::reduce(smem, plus()); STOR __syncthreads(); smem[tid] = row[3] * row[5]; __syncthreads(); Block::reduce(smem, plus()); STOR __syncthreads(); smem[tid] = row[3] * row[6]; __syncthreads(); Block::reduce(smem, plus()); STOR /////////////////////////////////////////////////// __syncthreads(); smem[tid] = row[4] * row[4]; __syncthreads(); Block::reduce(smem, plus()); STOR __syncthreads(); smem[tid] = row[4] * row[5]; __syncthreads(); Block::reduce(smem, plus()); STOR __syncthreads(); smem[tid] = row[4] * row[6]; __syncthreads(); Block::reduce(smem, plus()); STOR /////////////////////////////////////////////////// __syncthreads(); smem[tid] = row[5] * row[5]; __syncthreads(); Block::reduce(smem, plus()); STOR __syncthreads(); smem[tid] = row[5] * row[6]; __syncthreads(); Block::reduce(smem, plus()); STOR } __global__ void icp_helper_kernel(const ComputeIcpHelper helper, PtrStep partial_buf) { int x = threadIdx.x + blockIdx.x * ComputeIcpHelper::Policy::CTA_SIZE_X; int y = threadIdx.y + blockIdx.y * ComputeIcpHelper::Policy::CTA_SIZE_Y; float3 n, d, s; int filtered = (x < helper.cols && y < helper.rows) ? helper.find_coresp(x, y, n, d, s) : 1; // if (x < helper.cols && y < helper.rows) mask(y, x) = filtered; float row[7]; if (!filtered) { *(float3*) &row[0] = cross(s, n); *(float3*) &row[3] = n; row[6] = dot(n, d - s); } else row[0] = row[1] = row[2] = row[3] = row[4] = row[5] = row[6] = 0.f; helper.partial_reduce(row, partial_buf); } __global__ void icp_final_reduce_kernel(const PtrStep partial_buf, const int length, float* final_buf) { const float* beg = partial_buf.ptr(blockIdx.x); const float* end = beg + length; int tid = threadIdx.x; float sum = 0.f; for (const float* t = beg + tid; t < end; t += ComputeIcpHelper::Policy::FINAL_REDUCE_STRIDE) sum += *t; __shared__ float smem[ComputeIcpHelper::Policy::FINAL_REDUCE_CTA_SIZE]; smem[tid] = sum; __syncthreads(); Block::reduce(smem, plus()); if (tid == 0) final_buf[blockIdx.x] = smem[0]; } } // namespace device } // namespace kfusion void kfusion::device::ComputeIcpHelper::operator()(const Depth& dprev, const Normals& nprev, DeviceArray2D& buffer, float* data, cudaStream_t s) { dprev_tex.filterMode = cudaFilterModePoint; nprev_tex.filterMode = cudaFilterModePoint; TextureBinder dprev_binder(dprev, dprev_tex); TextureBinder nprev_binder(nprev, nprev_tex); dim3 block(Policy::CTA_SIZE_X, Policy::CTA_SIZE_Y); dim3 grid(divUp((int) cols, block.x), divUp((int) rows, block.y)); int partials_count = (int) (grid.x * grid.y); allocate_buffer(buffer, partials_count); icp_helper_kernel<<>>(*this, buffer); cudaSafeCall(cudaGetLastError()); int b = Policy::FINAL_REDUCE_CTA_SIZE; int g = Policy::TOTAL; icp_final_reduce_kernel<<>>(buffer, partials_count, buffer.ptr(Policy::TOTAL)); cudaSafeCall(cudaGetLastError()); cudaSafeCall( cudaMemcpyAsync(data, buffer.ptr(Policy::TOTAL), Policy::TOTAL * sizeof(float), cudaMemcpyDeviceToHost, s)); cudaSafeCall(cudaGetLastError()); } void kfusion::device::ComputeIcpHelper::operator()(const Points& vprev, const Normals& nprev, DeviceArray2D& buffer, float* data, cudaStream_t s) { dprev_tex.filterMode = cudaFilterModePoint; nprev_tex.filterMode = cudaFilterModePoint; TextureBinder vprev_binder(vprev, vprev_tex); TextureBinder nprev_binder(nprev, nprev_tex); dim3 block(Policy::CTA_SIZE_X, Policy::CTA_SIZE_Y); dim3 grid(divUp((int) cols, block.x), divUp((int) rows, block.y)); int partials_count = (int) (grid.x * grid.y); allocate_buffer(buffer, partials_count); icp_helper_kernel<<>>(*this, buffer); cudaSafeCall(cudaGetLastError()); int b = Policy::FINAL_REDUCE_CTA_SIZE; int g = Policy::TOTAL; icp_final_reduce_kernel<<>>(buffer, partials_count, buffer.ptr(Policy::TOTAL)); cudaSafeCall(cudaGetLastError()); cudaSafeCall( cudaMemcpyAsync(data, buffer.ptr(Policy::TOTAL), Policy::TOTAL * sizeof(float), cudaMemcpyDeviceToHost, s)); cudaSafeCall(cudaGetLastError()); } void kfusion::device::ComputeIcpHelper::allocate_buffer(DeviceArray2D& buffer, int partials_count) { if (partials_count < 0) { const int input_cols = 640; const int input_rows = 480; int gx = divUp(input_cols, Policy::CTA_SIZE_X); int gy = divUp(input_rows, Policy::CTA_SIZE_Y); partials_count = gx * gy; } int min_rows = Policy::TOTAL + 1; int min_cols = max(partials_count, Policy::TOTAL); if (buffer.rows() < min_rows || buffer.cols() < min_cols) buffer.create(min_rows, min_cols); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// ComputeIcpHelper::PageLockHelper kfusion::device::ComputeIcpHelper::PageLockHelper::PageLockHelper() : data(0) { cudaSafeCall(cudaMallocHost((void**) &data, Policy::TOTAL * sizeof(float))); } kfusion::device::ComputeIcpHelper::PageLockHelper::~PageLockHelper() { cudaSafeCall(cudaFreeHost(data)); data = 0; } ================================================ FILE: src/kfusion/cuda/tsdf_volume.cu ================================================ /* kfusion includes */ #include #include /* sobfu includes */ #include /* cuda includes */ #include #include /* thrust includes */ #include #include using namespace kfusion::device; /////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// Volume initialization namespace kfusion { namespace device { __global__ void clear_volume_kernel(TsdfVolume tsdf) { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; if (x < tsdf.dims.x && y < tsdf.dims.y) { float2* beg = tsdf.beg(x, y); float2* end = beg + tsdf.dims.x * tsdf.dims.y * tsdf.dims.z; for (float2* pos = beg; pos != end; pos = tsdf.zstep(pos)) *pos = make_float2(0.f, 0.f); } } } // namespace device } // namespace kfusion void kfusion::device::clear_volume(TsdfVolume& volume) { dim3 block(64, 16); dim3 grid(1, 1, 1); grid.x = divUp(volume.dims.x, block.x); grid.y = divUp(volume.dims.y, block.y); clear_volume_kernel<<>>(volume); cudaSafeCall(cudaGetLastError()); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// Volume integration namespace kfusion { namespace device { texture dists_tex(0, cudaFilterModePoint, cudaAddressModeBorder, cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat)); struct TsdfIntegrator { Projector proj; Aff3f vol2cam; int2 dists_size; __kf_device__ void operator()(TsdfVolume& volume) const { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; if (x >= volume.dims.x || y >= volume.dims.y) { return; } float3 vc = make_float3(x * volume.voxel_size.x + volume.voxel_size.x / 2.f, y * volume.voxel_size.y + volume.voxel_size.y / 2.f, volume.voxel_size.z / 2.f); float3 vc_cam = vol2cam * vc; /* transfrom from volume coos to camera coos */ float3 zstep = make_float3(0.f, 0.f, volume.voxel_size.z); float2* vptr = volume.beg(x, y); for (int i = 0; i <= volume.dims.z - 1; ++i, vc_cam += zstep, vptr = volume.zstep(vptr)) { /* project the voxel centre onto the depth map */ float2 coo = proj(vc_cam); if (coo.x < 0 || coo.y < 0 || coo.x >= dists_size.x || coo.y >= dists_size.y) { continue; } float Dp = tex2D(dists_tex, coo.x, coo.y); if (Dp <= 0.f || vc_cam.z <= 0) { continue; } /* get the psdf value */ float psdf = Dp - vc_cam.z; /* get the weight */ float weight = (psdf > -volume.eta) ? 1.f : 0.f; if (psdf >= volume.trunc_dist) { *vptr = make_float2(1.f, weight); } else if (psdf <= -volume.trunc_dist) { *vptr = make_float2(-1.f, weight); } else { *vptr = make_float2(__fdividef(psdf, volume.trunc_dist), weight); } } } __kf_device__ void operator()(TsdfVolume& phi_global, TsdfVolume& phi_n_psi) const { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; if (x >= phi_global.dims.x || y >= phi_global.dims.y) { return; } float2* pos_global = phi_global.beg(x, y); float2* pos_n_psi = phi_n_psi.beg(x, y); for (int i = 0; i <= phi_global.dims.z - 1; ++i, pos_global = phi_global.zstep(pos_global), pos_n_psi = phi_n_psi.zstep(pos_n_psi)) { float2 tsdf = *pos_n_psi; if (tsdf.y == 0.f || (tsdf.y == 1.f && (tsdf.x == 0.f || tsdf.x == -1.f))) { continue; } float2 tsdf_prev = *pos_global; float tsdf_new = __fdividef(__fmaf_rn(tsdf_prev.y, tsdf_prev.x, tsdf.x), tsdf_prev.y + 1.f); float weight_new = fminf(tsdf_prev.y + 1.f, (float) phi_global.max_weight); /* pack and write */ *pos_global = make_float2(tsdf_new, weight_new); } } }; // namespace device __global__ void integrate_kernel(const TsdfIntegrator integrator, TsdfVolume volume) { integrator(volume); } __global__ void integrate_kernel(const TsdfIntegrator integrator, TsdfVolume phi_global, TsdfVolume phi_n_psi) { integrator(phi_global, phi_n_psi); }; } // namespace device } // namespace kfusion void kfusion::device::integrate(const PtrStepSz& dists, TsdfVolume& volume, const Aff3f& aff, const Projector& proj) { /* init tsdf */ TsdfIntegrator ti; ti.dists_size = make_int2(dists.cols, dists.rows); ti.vol2cam = aff; ti.proj = proj; dists_tex.filterMode = cudaFilterModePoint; dists_tex.addressMode[0] = cudaAddressModeBorder; dists_tex.addressMode[1] = cudaAddressModeBorder; dists_tex.addressMode[2] = cudaAddressModeBorder; TextureBinder binder(dists, dists_tex, cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat)); (void) binder; dim3 block(64, 16); dim3 grid(divUp(volume.dims.x, block.x), divUp(volume.dims.y, block.y)); integrate_kernel<<>>(ti, volume); cudaSafeCall(cudaGetLastError()); cudaSafeCall(cudaDeviceSynchronize()); } void kfusion::device::integrate(TsdfVolume& phi_global, TsdfVolume& phi_n_psi) { TsdfIntegrator ti; dim3 block(64, 16); dim3 grid(divUp(phi_global.dims.x, block.x), divUp(phi_global.dims.y, block.y)); integrate_kernel<<>>(ti, phi_global, phi_n_psi); cudaSafeCall(cudaGetLastError()); cudaSafeCall(cudaDeviceSynchronize()); } namespace kfusion { namespace device { //////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// signed distance fields for various primitives __global__ void init_box_kernel(TsdfVolume volume, const float3 b) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; if (x >= volume.dims.x || y >= volume.dims.y) return; /* centering */ float3 c = make_float3(volume.dims.x / 2.f * volume.voxel_size.x, volume.dims.y / 2.f * volume.voxel_size.y, volume.dims.z / 2.f * volume.voxel_size.z); float3 vc = make_float3(x * volume.voxel_size.x + volume.voxel_size.x / 2.f, y * volume.voxel_size.y + volume.voxel_size.y / 2.f, volume.voxel_size.z / 2.f) - c; float3 zstep = make_float3(0.f, 0.f, volume.voxel_size.z); float2* vptr = volume.beg(x, y); for (int i = 0; i < volume.dims.z; vc += zstep, vptr = volume.zstep(vptr), ++i) { float3 d = make_float3(fabs(vc.x), fabs(vc.y), fabs(vc.z)) - b; float sdf = fmin(fmax(d.x, fmax(d.y, d.z)), 0.f) + norm(make_float3(fmax(d.x, 0.f), fmax(d.y, 0.f), fmax(d.z, 0.f))); float weight = 1.f; if (sdf >= volume.trunc_dist) { *vptr = make_float2(1.f, weight); } else if (sdf <= -volume.trunc_dist) { *vptr = make_float2(-1.f, weight); } else { *vptr = make_float2(__fdividef(sdf, volume.trunc_dist), weight); } } } __global__ void init_ellipsoid_kernel(TsdfVolume volume, const float3 r) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; if (x >= volume.dims.x || y >= volume.dims.y) return; /* centering */ float3 c = make_float3(volume.dims.x / 2.f * volume.voxel_size.x, volume.dims.y / 2.f * volume.voxel_size.y, volume.dims.z / 2.f * volume.voxel_size.z); float3 vc = make_float3(x * volume.voxel_size.x + volume.voxel_size.x / 2.f, y * volume.voxel_size.y + volume.voxel_size.y / 2.f, volume.voxel_size.z / 2.f) - c; float3 zstep = make_float3(0.f, 0.f, volume.voxel_size.z); float2* vptr = volume.beg(x, y); for (int i = 0; i < volume.dims.z; vc += zstep, vptr = volume.zstep(vptr), ++i) { float k0 = norm(make_float3(vc.x / r.x, vc.y / r.y, vc.z / r.z)); float k1 = norm(make_float3(vc.x / (r.x * r.x), vc.y / (r.y * r.y), vc.z / (r.z * r.z))); float sdf = k0 * (k0 - 1.f) / k1; float weight = 1.f; if (sdf >= volume.trunc_dist) { *vptr = make_float2(1.f, weight); } else if (sdf <= -volume.trunc_dist) { *vptr = make_float2(-1.f, weight); } else { *vptr = make_float2(__fdividef(sdf, volume.trunc_dist), weight); } } } __global__ void init_sphere_kernel(TsdfVolume volume, float3 centre, float radius) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; if (x >= volume.dims.x || y >= volume.dims.y) return; float3 vc = make_float3(x * volume.voxel_size.x + volume.voxel_size.x / 2.f, y * volume.voxel_size.y + volume.voxel_size.y / 2.f, volume.voxel_size.z / 2.f); float3 zstep = make_float3(0.f, 0.f, volume.voxel_size.z); float2* vptr = volume.beg(x, y); for (int i = 0; i < volume.dims.z; vc += zstep, vptr = volume.zstep(vptr), ++i) { float d = sqrtf(powf(vc.x - centre.x, 2) + powf(vc.y - centre.y, 2) + powf(vc.z - centre.z, 2)); float sdf = d - radius; float weight = (sdf > -volume.eta) ? 1.f : 0.f; if (sdf >= volume.trunc_dist) { *vptr = make_float2(1.f, weight); } else if (sdf <= -volume.trunc_dist) { *vptr = make_float2(-1.f, weight); } else { *vptr = make_float2(__fdividef(sdf, volume.trunc_dist), weight); } } } __global__ void init_plane_kernel(TsdfVolume volume, float z) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; if (x >= volume.dims.x || y >= volume.dims.y) return; float3 vc = make_float3(x * volume.voxel_size.x + volume.voxel_size.x / 2.f, y * volume.voxel_size.y + volume.voxel_size.y / 2.f, volume.voxel_size.z / 2.f); float3 zstep = make_float3(0.f, 0.f, volume.voxel_size.z); float2* vptr = volume.beg(x, y); for (int i = 0; i < volume.dims.z; vc += zstep, vptr = volume.zstep(vptr), ++i) { float sdf = vc.z - z; float weight = 1.f; if (sdf >= volume.trunc_dist) { *vptr = make_float2(1.f, weight); } else if (sdf <= -volume.trunc_dist) { *vptr = make_float2(-1.f, weight); } else { *vptr = make_float2(__fdividef(sdf, volume.trunc_dist), weight); } } } __global__ void init_torus_kernel(TsdfVolume volume, const float2 t) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; if (x >= volume.dims.x || y >= volume.dims.y) return; /* centering */ float3 c = make_float3(volume.dims.x / 2.f * volume.voxel_size.x, volume.dims.y / 2.f * volume.voxel_size.y, volume.dims.z / 2.f * volume.voxel_size.z); float3 vc = make_float3(x * volume.voxel_size.x + volume.voxel_size.x / 2.f, y * volume.voxel_size.y + volume.voxel_size.y / 2.f, volume.voxel_size.z / 2.f) - c; float3 zstep = make_float3(0.f, 0.f, volume.voxel_size.z); float2* vptr = volume.beg(x, y); for (int i = 0; i < volume.dims.z; vc += zstep, vptr = volume.zstep(vptr), ++i) { float2 q = make_float2(norm(make_float2(vc.x, vc.z)) - t.x, vc.y); float sdf = norm(q) - t.y; float weight = 1.f; if (sdf >= volume.trunc_dist) { *vptr = make_float2(1.f, weight); } else if (sdf <= -volume.trunc_dist) { *vptr = make_float2(-1.f, weight); } else { *vptr = make_float2(__fdividef(sdf, volume.trunc_dist), weight); } } } } // namespace device } // namespace kfusion void kfusion::device::init_box(TsdfVolume& volume, const float3& b) { dim3 block(64, 16); dim3 grid(divUp(volume.dims.x, block.x), divUp(volume.dims.y, block.y)); init_box_kernel<<>>(volume, b); cudaSafeCall(cudaGetLastError()); cudaSafeCall(cudaDeviceSynchronize()); } void kfusion::device::init_ellipsoid(TsdfVolume& volume, const float3& r) { dim3 block(64, 16); dim3 grid(divUp(volume.dims.x, block.x), divUp(volume.dims.y, block.y)); init_ellipsoid_kernel<<>>(volume, r); cudaSafeCall(cudaGetLastError()); cudaSafeCall(cudaDeviceSynchronize()); } void kfusion::device::init_plane(TsdfVolume& volume, const float& z) { dim3 block(64, 16); dim3 grid(divUp(volume.dims.x, block.x), divUp(volume.dims.y, block.y)); init_plane_kernel<<>>(volume, z); cudaSafeCall(cudaGetLastError()); cudaSafeCall(cudaDeviceSynchronize()); } void kfusion::device::init_sphere(TsdfVolume& volume, const float3& centre, const float& radius) { dim3 block(64, 16); dim3 grid(divUp(volume.dims.x, block.x), divUp(volume.dims.y, block.y)); init_sphere_kernel<<>>(volume, centre, radius); cudaSafeCall(cudaGetLastError()); cudaSafeCall(cudaDeviceSynchronize()); } void kfusion::device::init_torus(TsdfVolume& volume, const float2& t) { dim3 block(64, 16); dim3 grid(divUp(volume.dims.x, block.x), divUp(volume.dims.y, block.y)); init_torus_kernel<<>>(volume, t); cudaSafeCall(cudaGetLastError()); cudaSafeCall(cudaDeviceSynchronize()); } ================================================ FILE: src/kfusion/device_memory.cpp ================================================ #include #include #include #include #include void kfusion::cuda::error(const char *error_string, const char *file, const int line, const char *func) { std::cout << "error: " << error_string << "\t" << file << ":" << line << std::endl; exit(0); } ////////////////////////// XADD /////////////////////////////// #ifdef __GNUC__ #if __GNUC__ * 10 + __GNUC_MINOR__ >= 42 #if !defined WIN32 && (defined __i486__ || defined __i586__ || defined __i686__ || defined __MMX__ || \ defined __SSE__ || defined __ppc__) #define CV_XADD __sync_fetch_and_add #else #include #define CV_XADD __gnu_cxx::__exchange_and_add #endif #else #include #if __GNUC__ * 10 + __GNUC_MINOR__ >= 34 #define CV_XADD __gnu_cxx::__exchange_and_add #else #define CV_XADD __exchange_and_add #endif #endif #elif defined WIN32 || defined _WIN32 #include #define CV_XADD(addr, delta) _InterlockedExchangeAdd((long volatile *) (addr), (delta)) #else template static inline _Tp CV_XADD(_Tp *addr, _Tp delta) { int tmp = *addr; *addr += delta; return tmp; } #endif //////////////////////// DeviceArray ///////////////////////////// kfusion::cuda::DeviceMemory::DeviceMemory() : data_(0), sizeBytes_(0), refcount_(0) {} kfusion::cuda::DeviceMemory::DeviceMemory(void *ptr_arg, size_t sizeBytes_arg) : data_(ptr_arg), sizeBytes_(sizeBytes_arg), refcount_(0) {} kfusion::cuda::DeviceMemory::DeviceMemory(size_t sizeBtes_arg) : data_(0), sizeBytes_(0), refcount_(0) { create(sizeBtes_arg); } kfusion::cuda::DeviceMemory::~DeviceMemory() { release(); } kfusion::cuda::DeviceMemory::DeviceMemory(const DeviceMemory &other_arg) : data_(other_arg.data_), sizeBytes_(other_arg.sizeBytes_), refcount_(other_arg.refcount_) { if (refcount_) CV_XADD(refcount_, 1); } kfusion::cuda::DeviceMemory &kfusion::cuda::DeviceMemory::operator=(const kfusion::cuda::DeviceMemory &other_arg) { if (this != &other_arg) { if (other_arg.refcount_) CV_XADD(other_arg.refcount_, 1); release(); data_ = other_arg.data_; sizeBytes_ = other_arg.sizeBytes_; refcount_ = other_arg.refcount_; } return *this; } void kfusion::cuda::DeviceMemory::create(size_t sizeBytes_arg) { if (sizeBytes_arg == sizeBytes_) return; if (sizeBytes_arg > 0) { if (data_) release(); sizeBytes_ = sizeBytes_arg; cudaSafeCall(cudaMalloc(&data_, sizeBytes_)); // refcount_ = (int*)cv::fastMalloc(sizeof(*refcount_)); refcount_ = new int; *refcount_ = 1; } } void kfusion::cuda::DeviceMemory::copyTo(DeviceMemory &other) const { if (empty()) other.release(); else { other.create(sizeBytes_); cudaSafeCall(cudaMemcpy(other.data_, data_, sizeBytes_, cudaMemcpyDeviceToDevice)); } } void kfusion::cuda::DeviceMemory::release() { if (refcount_ && CV_XADD(refcount_, -1) == 1) { // cv::fastFree(refcount); delete refcount_; cudaSafeCall(cudaFree(data_)); } data_ = 0; sizeBytes_ = 0; refcount_ = 0; } void kfusion::cuda::DeviceMemory::upload(const void *host_ptr_arg, size_t sizeBytes_arg) { create(sizeBytes_arg); cudaSafeCall(cudaMemcpy(data_, host_ptr_arg, sizeBytes_, cudaMemcpyHostToDevice)); } void kfusion::cuda::DeviceMemory::download(void *host_ptr_arg) const { cudaSafeCall(cudaMemcpy(host_ptr_arg, data_, sizeBytes_, cudaMemcpyDeviceToHost)); } void kfusion::cuda::DeviceMemory::swap(DeviceMemory &other_arg) { std::swap(data_, other_arg.data_); std::swap(sizeBytes_, other_arg.sizeBytes_); std::swap(refcount_, other_arg.refcount_); } bool kfusion::cuda::DeviceMemory::empty() const { return !data_; } size_t kfusion::cuda::DeviceMemory::sizeBytes() const { return sizeBytes_; } //////////////////////// DeviceArray2D ///////////////////////////// kfusion::cuda::DeviceMemory2D::DeviceMemory2D() : data_(0), step_(0), colsBytes_(0), rows_(0), refcount_(0) {} kfusion::cuda::DeviceMemory2D::DeviceMemory2D(int rows_arg, int colsBytes_arg) : data_(0), step_(0), colsBytes_(0), rows_(0), refcount_(0) { create(rows_arg, colsBytes_arg); } kfusion::cuda::DeviceMemory2D::DeviceMemory2D(int rows_arg, int colsBytes_arg, void *data_arg, size_t step_arg) : data_(data_arg), step_(step_arg), colsBytes_(colsBytes_arg), rows_(rows_arg), refcount_(0) {} kfusion::cuda::DeviceMemory2D::~DeviceMemory2D() { release(); } kfusion::cuda::DeviceMemory2D::DeviceMemory2D(const DeviceMemory2D &other_arg) : data_(other_arg.data_), step_(other_arg.step_), colsBytes_(other_arg.colsBytes_), rows_(other_arg.rows_), refcount_(other_arg.refcount_) { if (refcount_) CV_XADD(refcount_, 1); } kfusion::cuda::DeviceMemory2D &kfusion::cuda::DeviceMemory2D::operator=( const kfusion::cuda::DeviceMemory2D &other_arg) { if (this != &other_arg) { if (other_arg.refcount_) CV_XADD(other_arg.refcount_, 1); release(); colsBytes_ = other_arg.colsBytes_; rows_ = other_arg.rows_; data_ = other_arg.data_; step_ = other_arg.step_; refcount_ = other_arg.refcount_; } return *this; } void kfusion::cuda::DeviceMemory2D::create(int rows_arg, int colsBytes_arg) { if (colsBytes_ == colsBytes_arg && rows_ == rows_arg) return; if (rows_arg > 0 && colsBytes_arg > 0) { if (data_) release(); colsBytes_ = colsBytes_arg; rows_ = rows_arg; cudaSafeCall(cudaMallocPitch((void **) &data_, &step_, colsBytes_, rows_)); // refcount = (int*)cv::fastMalloc(sizeof(*refcount)); refcount_ = new int; *refcount_ = 1; } } void kfusion::cuda::DeviceMemory2D::release() { if (refcount_ && CV_XADD(refcount_, -1) == 1) { // cv::fastFree(refcount); delete refcount_; cudaSafeCall(cudaFree(data_)); } colsBytes_ = 0; rows_ = 0; data_ = 0; step_ = 0; refcount_ = 0; } void kfusion::cuda::DeviceMemory2D::copyTo(DeviceMemory2D &other) const { if (empty()) other.release(); else { other.create(rows_, colsBytes_); cudaSafeCall(cudaMemcpy2D(other.data_, other.step_, data_, step_, colsBytes_, rows_, cudaMemcpyDeviceToDevice)); } } void kfusion::cuda::DeviceMemory2D::upload(const void *host_ptr_arg, size_t host_step_arg, int rows_arg, int colsBytes_arg) { create(rows_arg, colsBytes_arg); cudaSafeCall(cudaMemcpy2D(data_, step_, host_ptr_arg, host_step_arg, colsBytes_, rows_, cudaMemcpyHostToDevice)); } void kfusion::cuda::DeviceMemory2D::download(void *host_ptr_arg, size_t host_step_arg) const { cudaSafeCall(cudaMemcpy2D(host_ptr_arg, host_step_arg, data_, step_, colsBytes_, rows_, cudaMemcpyDeviceToHost)); } void kfusion::cuda::DeviceMemory2D::swap(DeviceMemory2D &other_arg) { std::swap(data_, other_arg.data_); std::swap(step_, other_arg.step_); std::swap(colsBytes_, other_arg.colsBytes_); std::swap(rows_, other_arg.rows_); std::swap(refcount_, other_arg.refcount_); } bool kfusion::cuda::DeviceMemory2D::empty() const { return !data_; } int kfusion::cuda::DeviceMemory2D::colsBytes() const { return colsBytes_; } int kfusion::cuda::DeviceMemory2D::rows() const { return rows_; } size_t kfusion::cuda::DeviceMemory2D::step() const { return step_; } ================================================ FILE: src/kfusion/imgproc.cpp ================================================ #include void kfusion::cuda::depthBilateralFilter(const Depth &in, Depth &out, int kernel_size, float sigma_spatial, float sigma_depth) { out.create(in.rows(), in.cols()); device::bilateralFilter(in, out, kernel_size, sigma_spatial, sigma_depth); } void kfusion::cuda::depthTruncation(Depth &depth, float threshold) { device::truncateDepth(depth, threshold); } void kfusion::cuda::depthBuildPyramid(const Depth &depth, Depth &pyramid, float sigma_depth) { pyramid.create(depth.rows() / 2, depth.cols() / 2); device::depthPyr(depth, pyramid, sigma_depth); } void kfusion::cuda::waitAllDefaultStream() { cudaSafeCall(cudaDeviceSynchronize()); } void kfusion::cuda::computeNormalsAndMaskDepth(const Intr &intr, Depth &depth, Normals &normals) { normals.create(depth.rows(), depth.cols()); device::Reprojector reproj(intr.fx, intr.fy, intr.cx, intr.cy); device::Normals &n = (device::Normals &) normals; device::computeNormalsAndMaskDepth(reproj, depth, n); } void kfusion::cuda::computePointNormals(const Intr &intr, const Depth &depth, Cloud &points, Normals &normals) { points.create(depth.rows(), depth.cols()); normals.create(depth.rows(), depth.cols()); device::Reprojector reproj(intr.fx, intr.fy, intr.cx, intr.cy); device::Points &p = (device::Points &) points; device::Normals &n = (device::Normals &) normals; device::computePointNormals(reproj, depth, p, n); } void kfusion::cuda::computeDists(const Depth &depth, Dists &dists, const Intr &intr) { dists.create(depth.rows(), depth.cols()); device::compute_dists(depth, dists, make_float2(intr.fx, intr.fy), make_float2(intr.cx, intr.cy)); } void kfusion::cuda::resizeDepthNormals(const Depth &depth, const Normals &normals, Depth &depth_out, Normals &normals_out) { depth_out.create(depth.rows() / 2, depth.cols() / 2); normals_out.create(normals.rows() / 2, normals.cols() / 2); device::Normals &nsrc = (device::Normals &) normals; device::Normals &ndst = (device::Normals &) normals_out; device::resizeDepthNormals(depth, nsrc, depth_out, ndst); } void kfusion::cuda::resizePointsNormals(const Cloud &points, const Normals &normals, Cloud &points_out, Normals &normals_out) { points_out.create(points.rows() / 2, points.cols() / 2); normals_out.create(normals.rows() / 2, normals.cols() / 2); device::Points &pi = (device::Points &) points; device::Normals &ni = (device::Normals &) normals; device::Points &po = (device::Points &) points_out; device::Normals &no = (device::Normals &) normals_out; device::resizePointsNormals(pi, ni, po, no); } void kfusion::cuda::rasteriseSurface(const Intr &intr, const Affine3f &vol2cam, const Surface &s, Cloud &vertices_out, Normals &normals_out) { device::Projector proj(intr.fx, intr.fy, intr.cx, intr.cy); device::Aff3f dev_vol2cam = device_cast(vol2cam); /* convert warped triangle vertices & normals to point clouds */ pcl::PointCloud vertices_warped; s.vertices.download(vertices_warped.points); pcl::PointCloud normals_warped; s.normals.download(normals_warped.points); /* lay out the warped triangle vertices and normals in memory */ std::vector triangle_vertices; /* the index of each entry in the vector, modulo 3, stores the 1st, 2nd, and 3rd vertex of a triangle */ std::vector triangle_normals; for (size_t i = 0; i < vertices_warped.size() / 3; i++) { triangle_vertices.emplace_back(vertices_warped[i * 3 + 0]); triangle_vertices.emplace_back(vertices_warped[i * 3 + 1]); triangle_vertices.emplace_back(vertices_warped[i * 3 + 2]); triangle_normals.emplace_back(normals_warped[i * 3 + 0]); triangle_normals.emplace_back(normals_warped[i * 3 + 1]); triangle_normals.emplace_back(normals_warped[i * 3 + 2]); } kfusion::cuda::Vertices vertices; vertices.upload(triangle_vertices); kfusion::cuda::Norms normals; normals.upload(triangle_normals); kfusion::cuda::Surface surface; surface.vertices = vertices; surface.normals = normals; /* convert to classes that can be used in cuda */ const device::Surface &dev_surface = (const device::Surface &) surface; device::Points &vo = (device::Points &) vertices_out; device::Normals &no = (device::Normals &) normals_out; device::rasteriseSurface(proj, dev_vol2cam, dev_surface, vo, no); waitAllDefaultStream(); } ================================================ FILE: src/kfusion/kinfu.cpp ================================================ #include #include using namespace std; using namespace kfusion; using namespace kfusion::cuda; static inline float deg2rad(float alpha) { return alpha * 0.017453293f; } kfusion::KinFuParams kfusion::KinFuParams::default_params() { const int iters[] = {10, 5, 4, 0}; const int levels = sizeof(iters) / sizeof(iters[0]); KinFuParams p; p.cols = 640; // pixels p.rows = 480; // pixels p.intr = Intr(525.f, 525.f, p.cols / 2 - 0.5f, p.rows / 2 - 0.5f); p.volume_dims = Vec3i::all(512); // number of voxels p.volume_size = Vec3f::all(3.f); // meters p.volume_pose = Affine3f().translate(Vec3f(-p.volume_size[0] / 2, -p.volume_size[1] / 2, 0.5f)); p.bilateral_sigma_depth = 0.04f; // meter p.bilateral_sigma_spatial = 4.5; // pixels p.bilateral_kernel_size = 7; // pixels p.icp_truncate_depth_dist = 0.f; // meters, disabled p.icp_dist_thres = 0.1f; // meters p.icp_angle_thres = deg2rad(30.f); // radians p.icp_iter_num.assign(iters, iters + levels); p.tsdf_min_camera_movement = 0.f; // meters, disabled p.tsdf_trunc_dist = 0.04f; // meters; p.tsdf_max_weight = 64; // frames p.raycast_step_factor = 0.75f; // in voxel sizes p.gradient_delta_factor = 0.5f; // in voxel sizes // p.light_pose = p.volume_pose.translation()/4; //meters p.light_pose = Vec3f::all(0.f); // meters return p; } const kfusion::KinFuParams &kfusion::KinFu::params() const { return params_; } kfusion::KinFuParams &kfusion::KinFu::params() { return params_; } const kfusion::cuda::TsdfVolume &kfusion::KinFu::tsdf() const { return *volume_; } kfusion::cuda::TsdfVolume &kfusion::KinFu::tsdf() { return *volume_; } const kfusion::cuda::ProjectiveICP &kfusion::KinFu::icp() const { return *icp_; } kfusion::cuda::ProjectiveICP &kfusion::KinFu::icp() { return *icp_; } const kfusion::cuda::MarchingCubes &kfusion::KinFu::mc() const { return *mc_; } kfusion::cuda::MarchingCubes &kfusion::KinFu::mc() { return *mc_; } void kfusion::KinFu::allocate_buffers() { const int LEVELS = cuda::ProjectiveICP::MAX_PYRAMID_LEVELS; int cols = params_.cols; int rows = params_.rows; dists_.create(rows, cols); curr_.depth_pyr.resize(LEVELS); curr_.normals_pyr.resize(LEVELS); prev_.depth_pyr.resize(LEVELS); prev_.normals_pyr.resize(LEVELS); curr_.points_pyr.resize(LEVELS); prev_.points_pyr.resize(LEVELS); for (int i = 0; i < LEVELS; ++i) { curr_.depth_pyr[i].create(rows, cols); curr_.normals_pyr[i].create(rows, cols); prev_.depth_pyr[i].create(rows, cols); prev_.normals_pyr[i].create(rows, cols); curr_.points_pyr[i].create(rows, cols); prev_.points_pyr[i].create(rows, cols); cols /= 2; rows /= 2; } depths_.create(params_.rows, params_.cols); normals_.create(params_.rows, params_.cols); points_.create(params_.rows, params_.cols); } void kfusion::KinFu::reset() { if (frame_counter_) cout << "Reset" << endl; frame_counter_ = 0; poses_.clear(); poses_.reserve(30000); poses_.push_back(Affine3f::Identity()); volume_->clear(); } kfusion::Affine3f kfusion::KinFu::getCameraPose(int time) const { if ((time > static_cast(poses_.size())) || (time < 0)) { time = static_cast(poses_.size()) - 1; } return poses_[time]; } ================================================ FILE: src/kfusion/marching_cubes.cpp ================================================ /* kinfu includes */ #include #include #include #include /* sys headers */ #include extern const int edgeTable[256]; extern const int triTable[256][16]; extern const int numVertsTable[256]; kfusion::cuda::MarchingCubes::MarchingCubes() { edgeTable_.upload(edgeTable, 256); numVertsTable_.upload(numVertsTable, 256); triTable_.upload(&triTable[0][0], 256 * 16); } kfusion::cuda::MarchingCubes::~MarchingCubes() = default; void kfusion::cuda::MarchingCubes::setPose(const cv::Affine3f& pose_) { this->pose = pose_; } kfusion::cuda::Surface kfusion::cuda::MarchingCubes::run(const kfusion::cuda::TsdfVolume& volume, kfusion::cuda::DeviceArray& vertices_buffer, kfusion::cuda::DeviceArray& normals_buffer) { if (vertices_buffer.empty()) { vertices_buffer.create(DEFAULT_TRIANGLES_BUFFER_SIZE); } if (normals_buffer.empty()) { normals_buffer.create(DEFAULT_TRIANGLES_BUFFER_SIZE); } occupied_voxels_buffer_.create(3, static_cast(vertices_buffer.size() / 3)); kfusion::device::bindTextures(edgeTable_, triTable_, numVertsTable_); int3 dims = device_cast(volume.getDims()); float3 voxel_size = device_cast(volume.getVoxelSize()); float trunc_dist = volume.getTruncDist(); float eta = volume.getEta(); float max_weight = volume.getMaxWeight(); kfusion::device::TsdfVolume vol(volume.data().ptr(), dims, voxel_size, trunc_dist, eta, max_weight); int active_voxels = kfusion::device::getOccupiedVoxels(vol, occupied_voxels_buffer_); std::cout << "no. of active voxels: " << active_voxels << std::endl; if (!active_voxels) { kfusion::device::unbindTextures(); return kfusion::cuda::Surface(); } kfusion::cuda::DeviceArray2D occupied_voxels(3, active_voxels, occupied_voxels_buffer_.ptr(), occupied_voxels_buffer_.step()); int total_vertices = kfusion::device::computeOffsetsAndTotalVertices(occupied_voxels); float3 volume_size = device_cast(volume.getSize()); const device::Aff3f pose_device = device_cast(pose); kfusion::device::generateTriangles(vol, occupied_voxels, volume_size, pose_device, (kfusion::cuda::DeviceArray&) vertices_buffer, (kfusion::cuda::DeviceArray&) normals_buffer); kfusion::device::unbindTextures(); kfusion::cuda::DeviceArray v(vertices_buffer.ptr(), total_vertices); kfusion::cuda::DeviceArray n(normals_buffer.ptr(), total_vertices); kfusion::cuda::Surface surface; surface.vertices = v; surface.normals = n; return surface; } /* edge table maps 8-bit flag representing which cube vertices are inside the isosurface to 12-bit number indicating * which edges are intersected */ const int edgeTable[256] = { 0x0, 0x109, 0x203, 0x30a, 0x406, 0x50f, 0x605, 0x70c, 0x80c, 0x905, 0xa0f, 0xb06, 0xc0a, 0xd03, 0xe09, 0xf00, 0x190, 0x99, 0x393, 0x29a, 0x596, 0x49f, 0x795, 0x69c, 0x99c, 0x895, 0xb9f, 0xa96, 0xd9a, 0xc93, 0xf99, 0xe90, 0x230, 0x339, 0x33, 0x13a, 0x636, 0x73f, 0x435, 0x53c, 0xa3c, 0xb35, 0x83f, 0x936, 0xe3a, 0xf33, 0xc39, 0xd30, 0x3a0, 0x2a9, 0x1a3, 0xaa, 0x7a6, 0x6af, 0x5a5, 0x4ac, 0xbac, 0xaa5, 0x9af, 0x8a6, 0xfaa, 0xea3, 0xda9, 0xca0, 0x460, 0x569, 0x663, 0x76a, 0x66, 0x16f, 0x265, 0x36c, 0xc6c, 0xd65, 0xe6f, 0xf66, 0x86a, 0x963, 0xa69, 0xb60, 0x5f0, 0x4f9, 0x7f3, 0x6fa, 0x1f6, 0xff, 0x3f5, 0x2fc, 0xdfc, 0xcf5, 0xfff, 0xef6, 0x9fa, 0x8f3, 0xbf9, 0xaf0, 0x650, 0x759, 0x453, 0x55a, 0x256, 0x35f, 0x55, 0x15c, 0xe5c, 0xf55, 0xc5f, 0xd56, 0xa5a, 0xb53, 0x859, 0x950, 0x7c0, 0x6c9, 0x5c3, 0x4ca, 0x3c6, 0x2cf, 0x1c5, 0xcc, 0xfcc, 0xec5, 0xdcf, 0xcc6, 0xbca, 0xac3, 0x9c9, 0x8c0, 0x8c0, 0x9c9, 0xac3, 0xbca, 0xcc6, 0xdcf, 0xec5, 0xfcc, 0xcc, 0x1c5, 0x2cf, 0x3c6, 0x4ca, 0x5c3, 0x6c9, 0x7c0, 0x950, 0x859, 0xb53, 0xa5a, 0xd56, 0xc5f, 0xf55, 0xe5c, 0x15c, 0x55, 0x35f, 0x256, 0x55a, 0x453, 0x759, 0x650, 0xaf0, 0xbf9, 0x8f3, 0x9fa, 0xef6, 0xfff, 0xcf5, 0xdfc, 0x2fc, 0x3f5, 0xff, 0x1f6, 0x6fa, 0x7f3, 0x4f9, 0x5f0, 0xb60, 0xa69, 0x963, 0x86a, 0xf66, 0xe6f, 0xd65, 0xc6c, 0x36c, 0x265, 0x16f, 0x66, 0x76a, 0x663, 0x569, 0x460, 0xca0, 0xda9, 0xea3, 0xfaa, 0x8a6, 0x9af, 0xaa5, 0xbac, 0x4ac, 0x5a5, 0x6af, 0x7a6, 0xaa, 0x1a3, 0x2a9, 0x3a0, 0xd30, 0xc39, 0xf33, 0xe3a, 0x936, 0x83f, 0xb35, 0xa3c, 0x53c, 0x435, 0x73f, 0x636, 0x13a, 0x33, 0x339, 0x230, 0xe90, 0xf99, 0xc93, 0xd9a, 0xa96, 0xb9f, 0x895, 0x99c, 0x69c, 0x795, 0x49f, 0x596, 0x29a, 0x393, 0x99, 0x190, 0xf00, 0xe09, 0xd03, 0xc0a, 0xb06, 0xa0f, 0x905, 0x80c, 0x70c, 0x605, 0x50f, 0x406, 0x30a, 0x203, 0x109, 0x0}; /* triangle table maps same cube vertex index to a list of up to 5 triangles which are built from the interpolated edge * vertices */ const int triTable[256][16] = {{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {0, 8, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {0, 1, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {1, 8, 3, 9, 8, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {1, 2, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {0, 8, 3, 1, 2, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {9, 2, 10, 0, 2, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {2, 8, 3, 2, 10, 8, 10, 9, 8, -1, -1, -1, -1, -1, -1, -1}, {3, 11, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {0, 11, 2, 8, 11, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {1, 9, 0, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {1, 11, 2, 1, 9, 11, 9, 8, 11, -1, -1, -1, -1, -1, -1, -1}, {3, 10, 1, 11, 10, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {0, 10, 1, 0, 8, 10, 8, 11, 10, -1, -1, -1, -1, -1, -1, -1}, {3, 9, 0, 3, 11, 9, 11, 10, 9, -1, -1, -1, -1, -1, -1, -1}, {9, 8, 10, 10, 8, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {4, 7, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {4, 3, 0, 7, 3, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {0, 1, 9, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {4, 1, 9, 4, 7, 1, 7, 3, 1, -1, -1, -1, -1, -1, -1, -1}, {1, 2, 10, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {3, 4, 7, 3, 0, 4, 1, 2, 10, -1, -1, -1, -1, -1, -1, -1}, {9, 2, 10, 9, 0, 2, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1}, {2, 10, 9, 2, 9, 7, 2, 7, 3, 7, 9, 4, -1, -1, -1, -1}, {8, 4, 7, 3, 11, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {11, 4, 7, 11, 2, 4, 2, 0, 4, -1, -1, -1, -1, -1, -1, -1}, {9, 0, 1, 8, 4, 7, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1}, {4, 7, 11, 9, 4, 11, 9, 11, 2, 9, 2, 1, -1, -1, -1, -1}, {3, 10, 1, 3, 11, 10, 7, 8, 4, -1, -1, -1, -1, -1, -1, -1}, {1, 11, 10, 1, 4, 11, 1, 0, 4, 7, 11, 4, -1, -1, -1, -1}, {4, 7, 8, 9, 0, 11, 9, 11, 10, 11, 0, 3, -1, -1, -1, -1}, {4, 7, 11, 4, 11, 9, 9, 11, 10, -1, -1, -1, -1, -1, -1, -1}, {9, 5, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {9, 5, 4, 0, 8, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {0, 5, 4, 1, 5, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {8, 5, 4, 8, 3, 5, 3, 1, 5, -1, -1, -1, -1, -1, -1, -1}, {1, 2, 10, 9, 5, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {3, 0, 8, 1, 2, 10, 4, 9, 5, -1, -1, -1, -1, -1, -1, -1}, {5, 2, 10, 5, 4, 2, 4, 0, 2, -1, -1, -1, -1, -1, -1, -1}, {2, 10, 5, 3, 2, 5, 3, 5, 4, 3, 4, 8, -1, -1, -1, -1}, {9, 5, 4, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {0, 11, 2, 0, 8, 11, 4, 9, 5, -1, -1, -1, -1, -1, -1, -1}, {0, 5, 4, 0, 1, 5, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1}, {2, 1, 5, 2, 5, 8, 2, 8, 11, 4, 8, 5, -1, -1, -1, -1}, {10, 3, 11, 10, 1, 3, 9, 5, 4, -1, -1, -1, -1, -1, -1, -1}, {4, 9, 5, 0, 8, 1, 8, 10, 1, 8, 11, 10, -1, -1, -1, -1}, {5, 4, 0, 5, 0, 11, 5, 11, 10, 11, 0, 3, -1, -1, -1, -1}, {5, 4, 8, 5, 8, 10, 10, 8, 11, -1, -1, -1, -1, -1, -1, -1}, {9, 7, 8, 5, 7, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {9, 3, 0, 9, 5, 3, 5, 7, 3, -1, -1, -1, -1, -1, -1, -1}, {0, 7, 8, 0, 1, 7, 1, 5, 7, -1, -1, -1, -1, -1, -1, -1}, {1, 5, 3, 3, 5, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {9, 7, 8, 9, 5, 7, 10, 1, 2, -1, -1, -1, -1, -1, -1, -1}, {10, 1, 2, 9, 5, 0, 5, 3, 0, 5, 7, 3, -1, -1, -1, -1}, {8, 0, 2, 8, 2, 5, 8, 5, 7, 10, 5, 2, -1, -1, -1, -1}, {2, 10, 5, 2, 5, 3, 3, 5, 7, -1, -1, -1, -1, -1, -1, -1}, {7, 9, 5, 7, 8, 9, 3, 11, 2, -1, -1, -1, -1, -1, -1, -1}, {9, 5, 7, 9, 7, 2, 9, 2, 0, 2, 7, 11, -1, -1, -1, -1}, {2, 3, 11, 0, 1, 8, 1, 7, 8, 1, 5, 7, -1, -1, -1, -1}, {11, 2, 1, 11, 1, 7, 7, 1, 5, -1, -1, -1, -1, -1, -1, -1}, {9, 5, 8, 8, 5, 7, 10, 1, 3, 10, 3, 11, -1, -1, -1, -1}, {5, 7, 0, 5, 0, 9, 7, 11, 0, 1, 0, 10, 11, 10, 0, -1}, {11, 10, 0, 11, 0, 3, 10, 5, 0, 8, 0, 7, 5, 7, 0, -1}, {11, 10, 5, 7, 11, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {10, 6, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {0, 8, 3, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {9, 0, 1, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {1, 8, 3, 1, 9, 8, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1}, {1, 6, 5, 2, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {1, 6, 5, 1, 2, 6, 3, 0, 8, -1, -1, -1, -1, -1, -1, -1}, {9, 6, 5, 9, 0, 6, 0, 2, 6, -1, -1, -1, -1, -1, -1, -1}, {5, 9, 8, 5, 8, 2, 5, 2, 6, 3, 2, 8, -1, -1, -1, -1}, {2, 3, 11, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {11, 0, 8, 11, 2, 0, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1}, {0, 1, 9, 2, 3, 11, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1}, {5, 10, 6, 1, 9, 2, 9, 11, 2, 9, 8, 11, -1, -1, -1, -1}, {6, 3, 11, 6, 5, 3, 5, 1, 3, -1, -1, -1, -1, -1, -1, -1}, {0, 8, 11, 0, 11, 5, 0, 5, 1, 5, 11, 6, -1, -1, -1, -1}, {3, 11, 6, 0, 3, 6, 0, 6, 5, 0, 5, 9, -1, -1, -1, -1}, {6, 5, 9, 6, 9, 11, 11, 9, 8, -1, -1, -1, -1, -1, -1, -1}, {5, 10, 6, 4, 7, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {4, 3, 0, 4, 7, 3, 6, 5, 10, -1, -1, -1, -1, -1, -1, -1}, {1, 9, 0, 5, 10, 6, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1}, {10, 6, 5, 1, 9, 7, 1, 7, 3, 7, 9, 4, -1, -1, -1, -1}, {6, 1, 2, 6, 5, 1, 4, 7, 8, -1, -1, -1, -1, -1, -1, -1}, {1, 2, 5, 5, 2, 6, 3, 0, 4, 3, 4, 7, -1, -1, -1, -1}, {8, 4, 7, 9, 0, 5, 0, 6, 5, 0, 2, 6, -1, -1, -1, -1}, {7, 3, 9, 7, 9, 4, 3, 2, 9, 5, 9, 6, 2, 6, 9, -1}, {3, 11, 2, 7, 8, 4, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1}, {5, 10, 6, 4, 7, 2, 4, 2, 0, 2, 7, 11, -1, -1, -1, -1}, {0, 1, 9, 4, 7, 8, 2, 3, 11, 5, 10, 6, -1, -1, -1, -1}, {9, 2, 1, 9, 11, 2, 9, 4, 11, 7, 11, 4, 5, 10, 6, -1}, {8, 4, 7, 3, 11, 5, 3, 5, 1, 5, 11, 6, -1, -1, -1, -1}, {5, 1, 11, 5, 11, 6, 1, 0, 11, 7, 11, 4, 0, 4, 11, -1}, {0, 5, 9, 0, 6, 5, 0, 3, 6, 11, 6, 3, 8, 4, 7, -1}, {6, 5, 9, 6, 9, 11, 4, 7, 9, 7, 11, 9, -1, -1, -1, -1}, {10, 4, 9, 6, 4, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {4, 10, 6, 4, 9, 10, 0, 8, 3, -1, -1, -1, -1, -1, -1, -1}, {10, 0, 1, 10, 6, 0, 6, 4, 0, -1, -1, -1, -1, -1, -1, -1}, {8, 3, 1, 8, 1, 6, 8, 6, 4, 6, 1, 10, -1, -1, -1, -1}, {1, 4, 9, 1, 2, 4, 2, 6, 4, -1, -1, -1, -1, -1, -1, -1}, {3, 0, 8, 1, 2, 9, 2, 4, 9, 2, 6, 4, -1, -1, -1, -1}, {0, 2, 4, 4, 2, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {8, 3, 2, 8, 2, 4, 4, 2, 6, -1, -1, -1, -1, -1, -1, -1}, {10, 4, 9, 10, 6, 4, 11, 2, 3, -1, -1, -1, -1, -1, -1, -1}, {0, 8, 2, 2, 8, 11, 4, 9, 10, 4, 10, 6, -1, -1, -1, -1}, {3, 11, 2, 0, 1, 6, 0, 6, 4, 6, 1, 10, -1, -1, -1, -1}, {6, 4, 1, 6, 1, 10, 4, 8, 1, 2, 1, 11, 8, 11, 1, -1}, {9, 6, 4, 9, 3, 6, 9, 1, 3, 11, 6, 3, -1, -1, -1, -1}, {8, 11, 1, 8, 1, 0, 11, 6, 1, 9, 1, 4, 6, 4, 1, -1}, {3, 11, 6, 3, 6, 0, 0, 6, 4, -1, -1, -1, -1, -1, -1, -1}, {6, 4, 8, 11, 6, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {7, 10, 6, 7, 8, 10, 8, 9, 10, -1, -1, -1, -1, -1, -1, -1}, {0, 7, 3, 0, 10, 7, 0, 9, 10, 6, 7, 10, -1, -1, -1, -1}, {10, 6, 7, 1, 10, 7, 1, 7, 8, 1, 8, 0, -1, -1, -1, -1}, {10, 6, 7, 10, 7, 1, 1, 7, 3, -1, -1, -1, -1, -1, -1, -1}, {1, 2, 6, 1, 6, 8, 1, 8, 9, 8, 6, 7, -1, -1, -1, -1}, {2, 6, 9, 2, 9, 1, 6, 7, 9, 0, 9, 3, 7, 3, 9, -1}, {7, 8, 0, 7, 0, 6, 6, 0, 2, -1, -1, -1, -1, -1, -1, -1}, {7, 3, 2, 6, 7, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {2, 3, 11, 10, 6, 8, 10, 8, 9, 8, 6, 7, -1, -1, -1, -1}, {2, 0, 7, 2, 7, 11, 0, 9, 7, 6, 7, 10, 9, 10, 7, -1}, {1, 8, 0, 1, 7, 8, 1, 10, 7, 6, 7, 10, 2, 3, 11, -1}, {11, 2, 1, 11, 1, 7, 10, 6, 1, 6, 7, 1, -1, -1, -1, -1}, {8, 9, 6, 8, 6, 7, 9, 1, 6, 11, 6, 3, 1, 3, 6, -1}, {0, 9, 1, 11, 6, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {7, 8, 0, 7, 0, 6, 3, 11, 0, 11, 6, 0, -1, -1, -1, -1}, {7, 11, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {7, 6, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {3, 0, 8, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {0, 1, 9, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {8, 1, 9, 8, 3, 1, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1}, {10, 1, 2, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {1, 2, 10, 3, 0, 8, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1}, {2, 9, 0, 2, 10, 9, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1}, {6, 11, 7, 2, 10, 3, 10, 8, 3, 10, 9, 8, -1, -1, -1, -1}, {7, 2, 3, 6, 2, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {7, 0, 8, 7, 6, 0, 6, 2, 0, -1, -1, -1, -1, -1, -1, -1}, {2, 7, 6, 2, 3, 7, 0, 1, 9, -1, -1, -1, -1, -1, -1, -1}, {1, 6, 2, 1, 8, 6, 1, 9, 8, 8, 7, 6, -1, -1, -1, -1}, {10, 7, 6, 10, 1, 7, 1, 3, 7, -1, -1, -1, -1, -1, -1, -1}, {10, 7, 6, 1, 7, 10, 1, 8, 7, 1, 0, 8, -1, -1, -1, -1}, {0, 3, 7, 0, 7, 10, 0, 10, 9, 6, 10, 7, -1, -1, -1, -1}, {7, 6, 10, 7, 10, 8, 8, 10, 9, -1, -1, -1, -1, -1, -1, -1}, {6, 8, 4, 11, 8, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {3, 6, 11, 3, 0, 6, 0, 4, 6, -1, -1, -1, -1, -1, -1, -1}, {8, 6, 11, 8, 4, 6, 9, 0, 1, -1, -1, -1, -1, -1, -1, -1}, {9, 4, 6, 9, 6, 3, 9, 3, 1, 11, 3, 6, -1, -1, -1, -1}, {6, 8, 4, 6, 11, 8, 2, 10, 1, -1, -1, -1, -1, -1, -1, -1}, {1, 2, 10, 3, 0, 11, 0, 6, 11, 0, 4, 6, -1, -1, -1, -1}, {4, 11, 8, 4, 6, 11, 0, 2, 9, 2, 10, 9, -1, -1, -1, -1}, {10, 9, 3, 10, 3, 2, 9, 4, 3, 11, 3, 6, 4, 6, 3, -1}, {8, 2, 3, 8, 4, 2, 4, 6, 2, -1, -1, -1, -1, -1, -1, -1}, {0, 4, 2, 4, 6, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {1, 9, 0, 2, 3, 4, 2, 4, 6, 4, 3, 8, -1, -1, -1, -1}, {1, 9, 4, 1, 4, 2, 2, 4, 6, -1, -1, -1, -1, -1, -1, -1}, {8, 1, 3, 8, 6, 1, 8, 4, 6, 6, 10, 1, -1, -1, -1, -1}, {10, 1, 0, 10, 0, 6, 6, 0, 4, -1, -1, -1, -1, -1, -1, -1}, {4, 6, 3, 4, 3, 8, 6, 10, 3, 0, 3, 9, 10, 9, 3, -1}, {10, 9, 4, 6, 10, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {4, 9, 5, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {0, 8, 3, 4, 9, 5, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1}, {5, 0, 1, 5, 4, 0, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1}, {11, 7, 6, 8, 3, 4, 3, 5, 4, 3, 1, 5, -1, -1, -1, -1}, {9, 5, 4, 10, 1, 2, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1}, {6, 11, 7, 1, 2, 10, 0, 8, 3, 4, 9, 5, -1, -1, -1, -1}, {7, 6, 11, 5, 4, 10, 4, 2, 10, 4, 0, 2, -1, -1, -1, -1}, {3, 4, 8, 3, 5, 4, 3, 2, 5, 10, 5, 2, 11, 7, 6, -1}, {7, 2, 3, 7, 6, 2, 5, 4, 9, -1, -1, -1, -1, -1, -1, -1}, {9, 5, 4, 0, 8, 6, 0, 6, 2, 6, 8, 7, -1, -1, -1, -1}, {3, 6, 2, 3, 7, 6, 1, 5, 0, 5, 4, 0, -1, -1, -1, -1}, {6, 2, 8, 6, 8, 7, 2, 1, 8, 4, 8, 5, 1, 5, 8, -1}, {9, 5, 4, 10, 1, 6, 1, 7, 6, 1, 3, 7, -1, -1, -1, -1}, {1, 6, 10, 1, 7, 6, 1, 0, 7, 8, 7, 0, 9, 5, 4, -1}, {4, 0, 10, 4, 10, 5, 0, 3, 10, 6, 10, 7, 3, 7, 10, -1}, {7, 6, 10, 7, 10, 8, 5, 4, 10, 4, 8, 10, -1, -1, -1, -1}, {6, 9, 5, 6, 11, 9, 11, 8, 9, -1, -1, -1, -1, -1, -1, -1}, {3, 6, 11, 0, 6, 3, 0, 5, 6, 0, 9, 5, -1, -1, -1, -1}, {0, 11, 8, 0, 5, 11, 0, 1, 5, 5, 6, 11, -1, -1, -1, -1}, {6, 11, 3, 6, 3, 5, 5, 3, 1, -1, -1, -1, -1, -1, -1, -1}, {1, 2, 10, 9, 5, 11, 9, 11, 8, 11, 5, 6, -1, -1, -1, -1}, {0, 11, 3, 0, 6, 11, 0, 9, 6, 5, 6, 9, 1, 2, 10, -1}, {11, 8, 5, 11, 5, 6, 8, 0, 5, 10, 5, 2, 0, 2, 5, -1}, {6, 11, 3, 6, 3, 5, 2, 10, 3, 10, 5, 3, -1, -1, -1, -1}, {5, 8, 9, 5, 2, 8, 5, 6, 2, 3, 8, 2, -1, -1, -1, -1}, {9, 5, 6, 9, 6, 0, 0, 6, 2, -1, -1, -1, -1, -1, -1, -1}, {1, 5, 8, 1, 8, 0, 5, 6, 8, 3, 8, 2, 6, 2, 8, -1}, {1, 5, 6, 2, 1, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {1, 3, 6, 1, 6, 10, 3, 8, 6, 5, 6, 9, 8, 9, 6, -1}, {10, 1, 0, 10, 0, 6, 9, 5, 0, 5, 6, 0, -1, -1, -1, -1}, {0, 3, 8, 5, 6, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {10, 5, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {11, 5, 10, 7, 5, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {11, 5, 10, 11, 7, 5, 8, 3, 0, -1, -1, -1, -1, -1, -1, -1}, {5, 11, 7, 5, 10, 11, 1, 9, 0, -1, -1, -1, -1, -1, -1, -1}, {10, 7, 5, 10, 11, 7, 9, 8, 1, 8, 3, 1, -1, -1, -1, -1}, {11, 1, 2, 11, 7, 1, 7, 5, 1, -1, -1, -1, -1, -1, -1, -1}, {0, 8, 3, 1, 2, 7, 1, 7, 5, 7, 2, 11, -1, -1, -1, -1}, {9, 7, 5, 9, 2, 7, 9, 0, 2, 2, 11, 7, -1, -1, -1, -1}, {7, 5, 2, 7, 2, 11, 5, 9, 2, 3, 2, 8, 9, 8, 2, -1}, {2, 5, 10, 2, 3, 5, 3, 7, 5, -1, -1, -1, -1, -1, -1, -1}, {8, 2, 0, 8, 5, 2, 8, 7, 5, 10, 2, 5, -1, -1, -1, -1}, {9, 0, 1, 5, 10, 3, 5, 3, 7, 3, 10, 2, -1, -1, -1, -1}, {9, 8, 2, 9, 2, 1, 8, 7, 2, 10, 2, 5, 7, 5, 2, -1}, {1, 3, 5, 3, 7, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {0, 8, 7, 0, 7, 1, 1, 7, 5, -1, -1, -1, -1, -1, -1, -1}, {9, 0, 3, 9, 3, 5, 5, 3, 7, -1, -1, -1, -1, -1, -1, -1}, {9, 8, 7, 5, 9, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {5, 8, 4, 5, 10, 8, 10, 11, 8, -1, -1, -1, -1, -1, -1, -1}, {5, 0, 4, 5, 11, 0, 5, 10, 11, 11, 3, 0, -1, -1, -1, -1}, {0, 1, 9, 8, 4, 10, 8, 10, 11, 10, 4, 5, -1, -1, -1, -1}, {10, 11, 4, 10, 4, 5, 11, 3, 4, 9, 4, 1, 3, 1, 4, -1}, {2, 5, 1, 2, 8, 5, 2, 11, 8, 4, 5, 8, -1, -1, -1, -1}, {0, 4, 11, 0, 11, 3, 4, 5, 11, 2, 11, 1, 5, 1, 11, -1}, {0, 2, 5, 0, 5, 9, 2, 11, 5, 4, 5, 8, 11, 8, 5, -1}, {9, 4, 5, 2, 11, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {2, 5, 10, 3, 5, 2, 3, 4, 5, 3, 8, 4, -1, -1, -1, -1}, {5, 10, 2, 5, 2, 4, 4, 2, 0, -1, -1, -1, -1, -1, -1, -1}, {3, 10, 2, 3, 5, 10, 3, 8, 5, 4, 5, 8, 0, 1, 9, -1}, {5, 10, 2, 5, 2, 4, 1, 9, 2, 9, 4, 2, -1, -1, -1, -1}, {8, 4, 5, 8, 5, 3, 3, 5, 1, -1, -1, -1, -1, -1, -1, -1}, {0, 4, 5, 1, 0, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {8, 4, 5, 8, 5, 3, 9, 0, 5, 0, 3, 5, -1, -1, -1, -1}, {9, 4, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {4, 11, 7, 4, 9, 11, 9, 10, 11, -1, -1, -1, -1, -1, -1, -1}, {0, 8, 3, 4, 9, 7, 9, 11, 7, 9, 10, 11, -1, -1, -1, -1}, {1, 10, 11, 1, 11, 4, 1, 4, 0, 7, 4, 11, -1, -1, -1, -1}, {3, 1, 4, 3, 4, 8, 1, 10, 4, 7, 4, 11, 10, 11, 4, -1}, {4, 11, 7, 9, 11, 4, 9, 2, 11, 9, 1, 2, -1, -1, -1, -1}, {9, 7, 4, 9, 11, 7, 9, 1, 11, 2, 11, 1, 0, 8, 3, -1}, {11, 7, 4, 11, 4, 2, 2, 4, 0, -1, -1, -1, -1, -1, -1, -1}, {11, 7, 4, 11, 4, 2, 8, 3, 4, 3, 2, 4, -1, -1, -1, -1}, {2, 9, 10, 2, 7, 9, 2, 3, 7, 7, 4, 9, -1, -1, -1, -1}, {9, 10, 7, 9, 7, 4, 10, 2, 7, 8, 7, 0, 2, 0, 7, -1}, {3, 7, 10, 3, 10, 2, 7, 4, 10, 1, 10, 0, 4, 0, 10, -1}, {1, 10, 2, 8, 7, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {4, 9, 1, 4, 1, 7, 7, 1, 3, -1, -1, -1, -1, -1, -1, -1}, {4, 9, 1, 4, 1, 7, 0, 8, 1, 8, 7, 1, -1, -1, -1, -1}, {4, 0, 3, 7, 4, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {4, 8, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {9, 10, 8, 10, 11, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {3, 0, 9, 3, 9, 11, 11, 9, 10, -1, -1, -1, -1, -1, -1, -1}, {0, 1, 10, 0, 10, 8, 8, 10, 11, -1, -1, -1, -1, -1, -1, -1}, {3, 1, 10, 11, 3, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {1, 2, 11, 1, 11, 9, 9, 11, 8, -1, -1, -1, -1, -1, -1, -1}, {3, 0, 9, 3, 9, 11, 1, 2, 9, 2, 11, 9, -1, -1, -1, -1}, {0, 2, 11, 8, 0, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {3, 2, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {2, 3, 8, 2, 8, 10, 10, 8, 9, -1, -1, -1, -1, -1, -1, -1}, {9, 10, 2, 0, 9, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {2, 3, 8, 2, 8, 10, 0, 1, 8, 1, 10, 8, -1, -1, -1, -1}, {1, 10, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {1, 3, 8, 9, 1, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {0, 9, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {0, 3, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}; /* number of vertices for each case above */ const int numVertsTable[256] = { 0, 3, 3, 6, 3, 6, 6, 9, 3, 6, 6, 9, 6, 9, 9, 6, 3, 6, 6, 9, 6, 9, 9, 12, 6, 9, 9, 12, 9, 12, 12, 9, 3, 6, 6, 9, 6, 9, 9, 12, 6, 9, 9, 12, 9, 12, 12, 9, 6, 9, 9, 6, 9, 12, 12, 9, 9, 12, 12, 9, 12, 15, 15, 6, 3, 6, 6, 9, 6, 9, 9, 12, 6, 9, 9, 12, 9, 12, 12, 9, 6, 9, 9, 12, 9, 12, 12, 15, 9, 12, 12, 15, 12, 15, 15, 12, 6, 9, 9, 12, 9, 12, 6, 9, 9, 12, 12, 15, 12, 15, 9, 6, 9, 12, 12, 9, 12, 15, 9, 6, 12, 15, 15, 12, 15, 6, 12, 3, 3, 6, 6, 9, 6, 9, 9, 12, 6, 9, 9, 12, 9, 12, 12, 9, 6, 9, 9, 12, 9, 12, 12, 15, 9, 6, 12, 9, 12, 9, 15, 6, 6, 9, 9, 12, 9, 12, 12, 15, 9, 12, 12, 15, 12, 15, 15, 12, 9, 12, 12, 9, 12, 15, 15, 12, 12, 9, 15, 6, 15, 12, 6, 3, 6, 9, 9, 12, 9, 12, 12, 15, 9, 12, 12, 15, 6, 9, 9, 6, 9, 12, 12, 15, 12, 15, 15, 6, 12, 9, 15, 12, 9, 6, 12, 3, 9, 12, 12, 15, 12, 15, 9, 12, 12, 15, 15, 6, 9, 12, 6, 3, 6, 9, 9, 6, 9, 12, 6, 3, 9, 6, 12, 3, 6, 3, 3, 0, }; ================================================ FILE: src/kfusion/precomp.cpp ================================================ #include #include //////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// Kinfu/types implementation kfusion::Intr::Intr() {} kfusion::Intr::Intr(float fx_, float fy_, float cx_, float cy_) : fx(fx_), fy(fy_), cx(cx_), cy(cy_) {} kfusion::Intr kfusion::Intr::operator()(int level_index) const { int div = 1 << level_index; return (Intr(fx / div, fy / div, cx / div, cy / div)); } std::ostream &operator<<(std::ostream &os, const kfusion::Intr &intr) { return os << "([f = " << intr.fx << ", " << intr.fy << "] [cp = " << intr.cx << ", " << intr.cy << "])"; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// TsdfVolume host implementation kfusion::device::TsdfVolume::TsdfVolume(float2 *data, int3 dims, float3 voxel_size, float trunc_dist, float eta, float max_weight) : data(data), dims(dims), voxel_size(voxel_size), trunc_dist(trunc_dist), eta(eta), max_weight(max_weight) {} // kfusion::device::TsdfVolume::float2* // kfusionl::device::TsdfVolume::operator()(int x, int y, int z) { return data + // x + y*dims.x + z*dims.y*dims.x; } // // const kfusion::device::TsdfVolume::float2* // kfusionl::device::TsdfVolume::operator() (int x, int y, int z) const { return // data + x + y*dims.x + z*dims.y*dims.x; } // // kfusion::device::TsdfVolume::float2* kfusionl::device::TsdfVolume::beg(int // x, int y) const { return data + x + dims.x * y; } // // kfusion::device::TsdfVolume::float2* // kfusionl::device::TsdfVolume::zstep(float2 *const ptr) const { return data //+ dims.x * dims.y; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// Projector host implementation kfusion::device::Projector::Projector(float fx, float fy, float cx, float cy) : f(make_float2(fx, fy)), c(make_float2(cx, cy)) {} // float2 kfusion::device::Projector::operator()(const float3& p) const //{ // float2 coo; // coo.x = p.x * f.x / p.z + c.x; // coo.y = p.y * f.y / p.z + c.y; // return coo; //} //////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// Reprojector host implementation kfusion::device::Reprojector::Reprojector(float fx, float fy, float cx, float cy) : finv(make_float2(1.f / fx, 1.f / fy)), c(make_float2(cx, cy)) {} // float3 kfusion::device::Reprojector::operator()(int u, int v, float z) const //{ // float x = z * (u - c.x) * finv.x; // float y = z * (v - c.y) * finv.y; // return make_float3(x, y, z); //} //////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// Host implementation of packing/unpacking tsdf volume element // float2 kfusion::device::pack_tsdf(float tsdf, int weight) { throw "Not // implemented"; return float2(); } float kfusion::device::unpack_tsdf(float2 // value, int& weight) { throw "Not implemented"; return 0; } float // kfusion::device::unpack_tsdf(float2 value) { throw "Not implemented"; return // 0; } ================================================ FILE: src/kfusion/projective_icp.cpp ================================================ #include using namespace kfusion; using namespace kfusion::cuda; using namespace std; //////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// ComputeIcpHelper kfusion::device::ComputeIcpHelper::ComputeIcpHelper(float dist_thres, float angle_thres) { min_cosine = cos(angle_thres); dist2_thres = dist_thres * dist_thres; } void kfusion::device::ComputeIcpHelper::setLevelIntr(int level_index, float fx, float fy, float cx, float cy) { int div = 1 << level_index; f = make_float2(fx / div, fy / div); c = make_float2(cx / div, cy / div); finv = make_float2(1.f / f.x, 1.f / f.y); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// ProjectiveICP::StreamHelper struct kfusion::cuda::ProjectiveICP::StreamHelper { typedef device::ComputeIcpHelper::PageLockHelper PageLockHelper; typedef cv::Matx66f Mat6f; typedef cv::Vec6f Vec6f; cudaStream_t stream; PageLockHelper locked_buffer; StreamHelper() { cudaSafeCall(cudaStreamCreate(&stream)); } ~StreamHelper() { cudaSafeCall(cudaStreamDestroy(stream)); } operator float *() { return locked_buffer.data; } operator cudaStream_t() { return stream; } Mat6f get(Vec6f &b) { cudaSafeCall(cudaStreamSynchronize(stream)); Mat6f A; float *data_A = A.val; float *data_b = b.val; int shift = 0; for (int i = 0; i < 6; ++i) // rows for (int j = i; j < 7; ++j) // cols + b { float value = locked_buffer.data[shift++]; if (j == 6) // vector b data_b[i] = value; else data_A[j * 6 + i] = data_A[i * 6 + j] = value; } return A; } }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// ProjectiveICP kfusion::cuda::ProjectiveICP::ProjectiveICP() : angle_thres_(deg2rad(20.f)), dist_thres_(0.1f) { const int iters[] = {10, 5, 4, 0}; std::vector vector_iters(iters, iters + 4); setIterationsNum(vector_iters); device::ComputeIcpHelper::allocate_buffer(buffer_); shelp_ = cv::Ptr(new StreamHelper()); } kfusion::cuda::ProjectiveICP::~ProjectiveICP() {} float kfusion::cuda::ProjectiveICP::getDistThreshold() const { return dist_thres_; } void kfusion::cuda::ProjectiveICP::setDistThreshold(float distance) { dist_thres_ = distance; } float kfusion::cuda::ProjectiveICP::getAngleThreshold() const { return angle_thres_; } void kfusion::cuda::ProjectiveICP::setAngleThreshold(float angle) { angle_thres_ = angle; } void kfusion::cuda::ProjectiveICP::setIterationsNum(const std::vector &iters) { if (iters.size() >= MAX_PYRAMID_LEVELS) iters_.assign(iters.begin(), iters.begin() + MAX_PYRAMID_LEVELS); else { iters_ = vector(MAX_PYRAMID_LEVELS, 0); copy(iters.begin(), iters.end(), iters_.begin()); } } int kfusion::cuda::ProjectiveICP::getUsedLevelsNum() const { int i = MAX_PYRAMID_LEVELS - 1; for (; i >= 0 && !iters_[i]; --i) ; return i + 1; } bool kfusion::cuda::ProjectiveICP::estimateTransform(Affine3f & /*affine*/, const Intr & /*intr*/, const Frame & /*curr*/, const Frame & /*prev*/) { // bool has_depth = !curr.depth_pyr.empty() && !prev.depth_pyr.empty(); // bool has_points = !curr.points_pyr.empty() && !prev.points_pyr.empty(); // if (has_depth) // return estimateTransform(affine, intr, curr.depth_pyr, // curr.normals_pyr, prev.depth_pyr, prev.normals_pyr); // else if(has_points) // return estimateTransform(affine, intr, curr.points_pyr, // curr.normals_pyr, prev.points_pyr, prev.normals_pyr); // else // CV_Assert(!"Wrong parameters passed to estimateTransform"); CV_Assert(!"Not implemented"); return false; } bool kfusion::cuda::ProjectiveICP::estimateTransform(Affine3f &affine, const Intr &intr, const DepthPyr &dcurr, const NormalsPyr ncurr, const DepthPyr dprev, const NormalsPyr nprev) { const int LEVELS = getUsedLevelsNum(); StreamHelper &sh = *shelp_; device::ComputeIcpHelper helper(dist_thres_, angle_thres_); affine = Affine3f::Identity(); for (int level_index = LEVELS - 1; level_index >= 0; --level_index) { const device::Normals &n = (const device::Normals &) nprev[level_index]; helper.rows = (float) n.rows(); helper.cols = (float) n.cols(); helper.setLevelIntr(level_index, intr.fx, intr.fy, intr.cx, intr.cy); helper.dcurr = dcurr[level_index]; helper.ncurr = ncurr[level_index]; for (int iter = 0; iter < iters_[level_index]; ++iter) { helper.aff = device_cast(affine); helper(dprev[level_index], n, buffer_, sh, sh); StreamHelper::Vec6f b; StreamHelper::Mat6f A = sh.get(b); // checking nullspace double det = cv::determinant(A); if (fabs(det) < 1e-15 || cv::viz::isNan(det)) { if (cv::viz::isNan(det)) cout << "qnan" << endl; return false; } StreamHelper::Vec6f r; cv::solve(A, b, r, cv::DECOMP_SVD); Affine3f Tinc(Vec3f(r.val), Vec3f(r.val + 3)); affine = Tinc * affine; } } return true; } bool kfusion::cuda::ProjectiveICP::estimateTransform(Affine3f &affine, const Intr &intr, const PointsPyr &vcurr, const NormalsPyr ncurr, const PointsPyr vprev, const NormalsPyr nprev) { const int LEVELS = getUsedLevelsNum(); StreamHelper &sh = *shelp_; device::ComputeIcpHelper helper(dist_thres_, angle_thres_); affine = Affine3f::Identity(); for (int level_index = LEVELS - 1; level_index >= 0; --level_index) { const device::Normals &n = (const device::Normals &) nprev[level_index]; const device::Points &v = (const device::Points &) vprev[level_index]; helper.rows = (float) n.rows(); helper.cols = (float) n.cols(); helper.setLevelIntr(level_index, intr.fx, intr.fy, intr.cx, intr.cy); helper.vcurr = vcurr[level_index]; helper.ncurr = ncurr[level_index]; for (int iter = 0; iter < iters_[level_index]; ++iter) { helper.aff = device_cast(affine); helper(v, n, buffer_, sh, sh); StreamHelper::Vec6f b; StreamHelper::Mat6f A = sh.get(b); // checking nullspace double det = cv::determinant(A); if (fabs(det) < 1e-15 || cv::viz::isNan(det)) { if (cv::viz::isNan(det)) cout << "qnan" << endl; return false; } StreamHelper::Vec6f r; cv::solve(A, b, r, cv::DECOMP_SVD); Affine3f Tinc(Vec3f(r.val), Vec3f(r.val + 3)); affine = Tinc * affine; } } return true; } ================================================ FILE: src/kfusion/tsdf_volume.cpp ================================================ #include using namespace kfusion; using namespace kfusion::cuda; //////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// TsdfVolume::Entry float kfusion::cuda::TsdfVolume::Entry::half2float(half) { throw "Not implemented"; } kfusion::cuda::TsdfVolume::Entry::half kfusion::cuda::TsdfVolume::Entry::float2half(float value) { throw "Not implemented"; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// TsdfVolume kfusion::cuda::TsdfVolume::TsdfVolume(const Params ¶ms) { dims_ = params.volume_dims; size_ = params.volume_size; pose_ = params.volume_pose; trunc_dist_ = params.tsdf_trunc_dist; eta_ = params.eta; max_weight_ = params.tsdf_max_weight; gradient_delta_factor_ = params.gradient_delta_factor; create(dims_); } kfusion::cuda::TsdfVolume::~TsdfVolume() {} void kfusion::cuda::TsdfVolume::create(const Vec3i &dims) { dims_ = dims; int no_voxels = dims_[0] * dims_[1] * dims_[2]; data_.create(no_voxels * 2 * sizeof(float)); clear(); } Vec3i kfusion::cuda::TsdfVolume::getDims() const { return dims_; } Vec3f kfusion::cuda::TsdfVolume::getVoxelSize() const { return Vec3f(size_[0] / dims_[0], size_[1] / dims_[1], size_[2] / dims_[2]); } const CudaData kfusion::cuda::TsdfVolume::data() const { return data_; } CudaData kfusion::cuda::TsdfVolume::data() { return data_; } Vec3f kfusion::cuda::TsdfVolume::getSize() const { return size_; } void kfusion::cuda::TsdfVolume::setSize(const Vec3f &size) { size_ = size; } float kfusion::cuda::TsdfVolume::getTruncDist() const { return trunc_dist_; } void kfusion::cuda::TsdfVolume::setTruncDist(float &distance) { trunc_dist_ = distance; } float kfusion::cuda::TsdfVolume::getEta() const { return eta_; } void kfusion::cuda::TsdfVolume::setEta(float &eta) { eta_ = eta; } float kfusion::cuda::TsdfVolume::getMaxWeight() const { return max_weight_; } void kfusion::cuda::TsdfVolume::setMaxWeight(float &weight) { max_weight_ = weight; } Affine3f kfusion::cuda::TsdfVolume::getPose() const { return pose_; } void kfusion::cuda::TsdfVolume::setPose(const Affine3f &pose) { pose_ = pose; } float kfusion::cuda::TsdfVolume::getRaycastStepFactor() const { return raycast_step_factor_; } void kfusion::cuda::TsdfVolume::setRaycastStepFactor(float &factor) { raycast_step_factor_ = factor; } float kfusion::cuda::TsdfVolume::getGradientDeltaFactor() const { return gradient_delta_factor_; } void kfusion::cuda::TsdfVolume::setGradientDeltaFactor(float &factor) { gradient_delta_factor_ = factor; } void kfusion::cuda::TsdfVolume::clear() { device::Vec3i dims = device_cast(dims_); device::Vec3f vsz = device_cast(getVoxelSize()); device::TsdfVolume volume(data_.ptr(), dims, vsz, trunc_dist_, eta_, max_weight_); device::clear_volume(volume); } void kfusion::cuda::TsdfVolume::swap(CudaData &data) { data_.swap(data); } void kfusion::cuda::TsdfVolume::applyAffine(const Affine3f &affine) { pose_ = affine * pose_; } void kfusion::cuda::TsdfVolume::integrate(const TsdfVolume &phi_n_psi) { device::Vec3i dims = device_cast(dims_); device::Vec3f vsz = device_cast(getVoxelSize()); device::TsdfVolume phi_global_device(data_.ptr(), dims, vsz, trunc_dist_, eta_, max_weight_); device::TsdfVolume phi_n_psi_device(phi_n_psi.data().ptr(), dims, vsz, trunc_dist_, eta_, max_weight_); device::integrate(phi_global_device, phi_n_psi_device); } void kfusion::cuda::TsdfVolume::integrate(const Dists &dists, const Affine3f &camera_pose, const Intr &intr) { Affine3f vol2cam = camera_pose.inv() * pose_; device::Aff3f aff = device_cast(vol2cam); device::Projector proj(intr.fx, intr.fy, intr.cx, intr.cy); device::Vec3i dims = device_cast(dims_); device::Vec3f vsz = device_cast(getVoxelSize()); device::TsdfVolume volume(data_.ptr(), dims, vsz, trunc_dist_, eta_, max_weight_); device::integrate(dists, volume, aff, proj); } void kfusion::cuda::TsdfVolume::initBox(const float3 &b) { device::Vec3i dims = device_cast(dims_); device::Vec3f vsz = device_cast(getVoxelSize()); device::TsdfVolume volume(data_.ptr(), dims, vsz, trunc_dist_, eta_, max_weight_); device::init_box(volume, b); } void kfusion::cuda::TsdfVolume::initEllipsoid(const float3 &r) { device::Vec3i dims = device_cast(dims_); device::Vec3f vsz = device_cast(getVoxelSize()); device::TsdfVolume volume(data_.ptr(), dims, vsz, trunc_dist_, eta_, max_weight_); device::init_ellipsoid(volume, r); } void kfusion::cuda::TsdfVolume::initPlane(const float &z) { device::Vec3i dims = device_cast(dims_); device::Vec3f vsz = device_cast(getVoxelSize()); device::TsdfVolume volume(data_.ptr(), dims, vsz, trunc_dist_, eta_, max_weight_); device::init_plane(volume, z); } void kfusion::cuda::TsdfVolume::initSphere(const float3 ¢re, const float &radius) { device::Vec3i dims = device_cast(dims_); device::Vec3f vsz = device_cast(getVoxelSize()); device::TsdfVolume volume(data_.ptr(), dims, vsz, trunc_dist_, eta_, max_weight_); device::init_sphere(volume, centre, radius); } void kfusion::cuda::TsdfVolume::initTorus(const float2 &t) { device::Vec3i dims = device_cast(dims_); device::Vec3f vsz = device_cast(getVoxelSize()); device::TsdfVolume volume(data_.ptr(), dims, vsz, trunc_dist_, eta_, max_weight_); device::init_torus(volume, t); } void kfusion::cuda::TsdfVolume::print_sdf_values() { cv::Vec3i dims = getDims(); float2 *phi_data_ptr = new float2[dims[0] * dims[1] * dims[2]]; data().download(phi_data_ptr); for (int i = 0; i < dims[0]; i++) { for (int j = 0; j < dims[1]; j++) { for (int k = 0; k < dims[2]; k++) { float2 val = phi_data_ptr[k * dims[1] * dims[0] + j * dims[0] + i]; if (val.x != 0.f) { std::cout << val.x << std::endl; } } } } } ================================================ FILE: src/sobfu/CMakeLists.txt ================================================ # Build all sobfu source files into library dynfy add_module_library(sobfu) # Link to sobfu library target_link_libraries(sobfu kfusion gcov ${Boost_LIBRARIES} ${CUDA_CUDART_LIBRARY} ${PCL_LIBRARIES} ) ================================================ FILE: src/sobfu/cuda/reductor.cu ================================================ /* sobfu includes */ #include #include #define FULL_MASK 0xffffffff /* * OWN KERNELS */ template __global__ void sobfu::device::reduce_data_kernel(float2 *g_idata_global, float2 *g_idata_n, float *g_odata, unsigned int n) { float *sdata = SharedMemory(); /* perform first level of reduction, reading from global memory, writing to shared memory */ unsigned int tid = threadIdx.x; unsigned int i = blockIdx.x * blockSize * 2 + threadIdx.x; unsigned int gridSize = blockSize * 2 * gridDim.x; float mySum = 0.f; /* we reduce multiple elements per thread; the number is determined by the umber of active thread blocks (via * gridDim); more blocks will result in a larger gridSize and therefore fewer elements per thread */ while (i < n) { mySum += (g_idata_global[i].x - g_idata_n[i].x) * (g_idata_global[i].x - g_idata_n[i].x); /* ensure we don't read out of bounds -- this is optimized away for powerOf2 sized arrays */ if (nIsPow2 || i + blockSize < n) { mySum += (g_idata_global[i + blockSize].x - g_idata_n[i + blockSize].x) * (g_idata_global[i + blockSize].x - g_idata_n[i + blockSize].x); } i += gridSize; } /* each thread puts its local sum into shared memory */ sdata[tid] = mySum; __syncthreads(); /* do reduction in shared mem */ if ((blockSize >= 512) && (tid < 256)) { sdata[tid] = mySum = mySum + sdata[tid + 256]; } __syncthreads(); if ((blockSize >= 256) && (tid < 128)) { sdata[tid] = mySum = mySum + sdata[tid + 128]; } __syncthreads(); if ((blockSize >= 128) && (tid < 64)) { sdata[tid] = mySum = mySum + sdata[tid + 64]; } __syncthreads(); #if (__CUDA_ARCH__ >= 300) if (tid < 32) { /* fetch final intermediate sum from 2nd warp */ if (blockSize >= 64) mySum += sdata[tid + 32]; /* reduce final warp using shuffle */ for (int offset = warpSize / 2; offset > 0; offset /= 2) { mySum += __shfl_down_sync(FULL_MASK, mySum, offset); } } #else /* fully unroll reduction within a single warp */ if ((blockSize >= 64) && (tid < 32)) { sdata[tid] = mySum = mySum + sdata[tid + 32]; } __syncthreads(); if ((blockSize >= 32) && (tid < 16)) { sdata[tid] = mySum = mySum + sdata[tid + 16]; } __syncthreads(); if ((blockSize >= 16) && (tid < 8)) { sdata[tid] = mySum = mySum + sdata[tid + 8]; } __syncthreads(); if ((blockSize >= 8) && (tid < 4)) { sdata[tid] = mySum = mySum + sdata[tid + 4]; } __syncthreads(); if ((blockSize >= 4) && (tid < 2)) { sdata[tid] = mySum = mySum + sdata[tid + 2]; } __syncthreads(); if ((blockSize >= 2) && (tid < 1)) { sdata[tid] = mySum = mySum + sdata[tid + 1]; } __syncthreads(); #endif /* write result for this block to global mem */ if (tid == 0) g_odata[blockIdx.x] = mySum; } template __global__ void sobfu::device::reduce_reg_sobolev_kernel(Mat4f *g_idata, float *g_odata, unsigned int n) { float *sdata = SharedMemory(); /* perform first level of reduction, reading from global memory, writing to shared memory */ unsigned int tid = threadIdx.x; unsigned int i = blockIdx.x * blockSize * 2 + threadIdx.x; unsigned int gridSize = blockSize * 2 * gridDim.x; float mySum = 0.f; /* we reduce multiple elements per thread; the number is determined by the umber of active thread blocks (via * gridDim); more blocks will result in a larger gridSize and therefore fewer elements per thread */ while (i < n) { mySum += norm_sq(g_idata[i].data[0]) + norm_sq(g_idata[i].data[1]) + norm_sq(g_idata[i].data[2]); /* ensure we don't read out of bounds -- this is optimized away for powerOf2 sized arrays */ if (nIsPow2 || i + blockSize < n) { mySum += norm_sq(g_idata[i + blockSize].data[0]) + norm_sq(g_idata[i + blockSize].data[1]) + norm_sq(g_idata[i + blockSize].data[2]); } i += gridSize; } /* each thread puts its local sum into shared memory */ sdata[tid] = mySum; __syncthreads(); /* do reduction in shared mem */ if ((blockSize >= 512) && (tid < 256)) { sdata[tid] = mySum = mySum + sdata[tid + 256]; } __syncthreads(); if ((blockSize >= 256) && (tid < 128)) { sdata[tid] = mySum = mySum + sdata[tid + 128]; } __syncthreads(); if ((blockSize >= 128) && (tid < 64)) { sdata[tid] = mySum = mySum + sdata[tid + 64]; } __syncthreads(); #if (__CUDA_ARCH__ >= 300) if (tid < 32) { /* fetch final intermediate sum from 2nd warp */ if (blockSize >= 64) mySum += sdata[tid + 32]; /* reduce final warp using shuffle */ for (int offset = warpSize / 2; offset > 0; offset /= 2) { mySum += __shfl_down_sync(FULL_MASK, mySum, offset); } } #else /* fully unroll reduction within a single warp */ if ((blockSize >= 64) && (tid < 32)) { sdata[tid] = mySum = mySum + sdata[tid + 32]; } __syncthreads(); if ((blockSize >= 32) && (tid < 16)) { sdata[tid] = mySum = mySum + sdata[tid + 16]; } __syncthreads(); if ((blockSize >= 16) && (tid < 8)) { sdata[tid] = mySum = mySum + sdata[tid + 8]; } __syncthreads(); if ((blockSize >= 8) && (tid < 4)) { sdata[tid] = mySum = mySum + sdata[tid + 4]; } __syncthreads(); if ((blockSize >= 4) && (tid < 2)) { sdata[tid] = mySum = mySum + sdata[tid + 2]; } __syncthreads(); if ((blockSize >= 2) && (tid < 1)) { sdata[tid] = mySum = mySum + sdata[tid + 1]; } __syncthreads(); #endif /* write result for this block to global mem */ if (tid == 0) g_odata[blockIdx.x] = mySum; } template __global__ void sobfu::device::reduce_voxel_max_energy_kernel(float2 *d_idata_global, float2 *d_idata_n, Mat4f *d_idata_reg, float2 *d_o_data, float w_reg, unsigned int n) { float2 *sdata = SharedMemory(); /* perform first level of reduction, reading from global memory, writing to shared memory */ unsigned int tid = threadIdx.x; unsigned int i = blockIdx.x * blockSize * 2 + threadIdx.x; unsigned int gridSize = blockSize * 2 * gridDim.x; float2 local_max; local_max.x = 0.f; local_max.y = 0.f; /* we reduce multiple elements per thread; the number is determined by the umber of active thread blocks (via * gridDim); more blocks will result in a larger gridSize and therefore fewer elements per thread */ while (i < n) { float temp = (d_idata_global[i].x - d_idata_n[i].x) * (d_idata_global[i].x - d_idata_n[i].x) + w_reg * (norm_sq(d_idata_reg[i].data[0]) + norm_sq(d_idata_reg[i].data[1]) + norm_sq(d_idata_reg[i].data[2])); if (temp > local_max.x) { local_max.x = temp; local_max.y = (float) i; } /* ensure we don't read out of bounds -- this is optimized away for powerOf2 sized arrays */ if (nIsPow2 || i + blockSize < n) { temp = (d_idata_global[i + blockSize].x - d_idata_n[i + blockSize].x) * (d_idata_global[i + blockSize].x - d_idata_n[i + blockSize].x) + w_reg * (norm_sq(d_idata_reg[i + blockSize].data[0]) + norm_sq(d_idata_reg[i + blockSize].data[1]) + norm_sq(d_idata_reg[i + blockSize].data[2])); if (temp > local_max.x) { local_max.x = temp; local_max.y = (float) i + blockSize; } } i += gridSize; } /* each thread puts its local sum into shared memory */ sdata[tid] = local_max; __syncthreads(); /* do reduction in shared mem */ if ((blockSize >= 512) && (tid < 256)) { if (sdata[tid + 256].x > local_max.x) { sdata[tid] = local_max = sdata[tid + 256]; } } __syncthreads(); if ((blockSize >= 256) && (tid < 128)) { if (sdata[tid + 128].x > local_max.x) { sdata[tid] = local_max = sdata[tid + 128]; } } __syncthreads(); if ((blockSize >= 128) && (tid < 64)) { if (sdata[tid + 64].x > local_max.x) { sdata[tid] = local_max = sdata[tid + 64]; } } __syncthreads(); if ((blockSize >= 64) && (tid < 32)) { if (sdata[tid + 32].x > local_max.x) { sdata[tid] = local_max = sdata[tid + 32]; } } __syncthreads(); if ((blockSize >= 32) && (tid < 16)) { if (sdata[tid + 16].x > local_max.x) { sdata[tid] = local_max = sdata[tid + 16]; } } __syncthreads(); if ((blockSize >= 16) && (tid < 8)) { if (sdata[tid + 8].x > local_max.x) { sdata[tid] = local_max = sdata[tid + 8]; } } __syncthreads(); if ((blockSize >= 8) && (tid < 4)) { if (sdata[tid + 4].x > local_max.x) { sdata[tid] = local_max = sdata[tid + 4]; } } __syncthreads(); if ((blockSize >= 4) && (tid < 2)) { if (sdata[tid + 2].x > local_max.x) { sdata[tid] = local_max = sdata[tid + 2]; } } __syncthreads(); if ((blockSize >= 2) && (tid < 1)) { if (sdata[tid + 1].x > local_max.x) { sdata[tid] = local_max = sdata[tid + 1]; } } __syncthreads(); /* write result for this block to global mem */ if (tid == 0) { d_o_data[blockIdx.x] = local_max; } } template __global__ void sobfu::device::reduce_max_kernel(float4 *updates, float2 *g_o_max_data, unsigned int n) { float2 *sdata = SharedMemory(); /* perform first level of reduction, reading from global memory, writing to shared memory */ unsigned int tid = threadIdx.x; unsigned int i = blockIdx.x * blockSize * 2 + threadIdx.x; unsigned int gridSize = blockSize * 2 * gridDim.x; float2 local_max; local_max.x = 0.f; local_max.y = 0.f; /* we reduce multiple elements per thread; the number is determined by the umber of active thread blocks (via * gridDim); more blocks will result in a larger gridSize and therefore fewer elements per thread */ while (i < n) { if (norm(updates[i]) > local_max.x) { local_max.x = norm(updates[i]); local_max.y = (float) i; } /* ensure we don't read out of bounds -- this is optimized away for powerOf2 sized arrays */ if (nIsPow2 || i + blockSize < n) { if (norm(updates[i + blockSize]) > local_max.x) { local_max.x = norm(updates[i + blockSize]); local_max.y = (float) i + blockSize; } } i += gridSize; } /* each thread puts its local sum into shared memory */ sdata[tid] = local_max; __syncthreads(); /* do reduction in shared mem */ if ((blockSize >= 512) && (tid < 256)) { if (sdata[tid + 256].x > local_max.x) { sdata[tid] = local_max = sdata[tid + 256]; } } __syncthreads(); if ((blockSize >= 256) && (tid < 128)) { if (sdata[tid + 128].x > local_max.x) { sdata[tid] = local_max = sdata[tid + 128]; } } __syncthreads(); if ((blockSize >= 128) && (tid < 64)) { if (sdata[tid + 64].x > local_max.x) { sdata[tid] = local_max = sdata[tid + 64]; } } __syncthreads(); if ((blockSize >= 64) && (tid < 32)) { if (sdata[tid + 32].x > local_max.x) { sdata[tid] = local_max = sdata[tid + 32]; } } __syncthreads(); if ((blockSize >= 32) && (tid < 16)) { if (sdata[tid + 16].x > local_max.x) { sdata[tid] = local_max = sdata[tid + 16]; } } __syncthreads(); if ((blockSize >= 16) && (tid < 8)) { if (sdata[tid + 8].x > local_max.x) { sdata[tid] = local_max = sdata[tid + 8]; } } __syncthreads(); if ((blockSize >= 8) && (tid < 4)) { if (sdata[tid + 4].x > local_max.x) { sdata[tid] = local_max = sdata[tid + 4]; } } __syncthreads(); if ((blockSize >= 4) && (tid < 2)) { if (sdata[tid + 2].x > local_max.x) { sdata[tid] = local_max = sdata[tid + 2]; } } __syncthreads(); if ((blockSize >= 2) && (tid < 1)) { if (sdata[tid + 1].x > local_max.x) { sdata[tid] = local_max = sdata[tid + 1]; } } __syncthreads(); /* write result for this block to global mem */ if (tid == 0) { g_o_max_data[blockIdx.x] = local_max; } } /* wrapper function for kernel launch */ void sobfu::device::reduce_data(int size, int threads, int blocks, float2 *d_idata_global, float2 *d_idata_n, float *d_odata) { dim3 dimBlock(threads, 1, 1); dim3 dimGrid(blocks, 1, 1); /* when there is only one warp per block, we need to allocate two warps worth of shared memory so that we don't * index shared memory out of bounds */ int smemSize = (threads <= 32) ? 2 * threads * sizeof(float) : threads * sizeof(float); if (isPow2(size)) { switch (threads) { case 512: reduce_data_kernel<512, true> <<>>(d_idata_global, d_idata_n, d_odata, size); break; case 256: reduce_data_kernel<256, true> <<>>(d_idata_global, d_idata_n, d_odata, size); break; case 128: reduce_data_kernel<128, true> <<>>(d_idata_global, d_idata_n, d_odata, size); break; case 64: reduce_data_kernel<64, true><<>>(d_idata_global, d_idata_n, d_odata, size); break; case 32: reduce_data_kernel<32, true><<>>(d_idata_global, d_idata_n, d_odata, size); break; case 16: reduce_data_kernel<16, true><<>>(d_idata_global, d_idata_n, d_odata, size); break; case 8: reduce_data_kernel<8, true><<>>(d_idata_global, d_idata_n, d_odata, size); break; case 4: reduce_data_kernel<4, true><<>>(d_idata_global, d_idata_n, d_odata, size); break; case 2: reduce_data_kernel<2, true><<>>(d_idata_global, d_idata_n, d_odata, size); break; case 1: reduce_data_kernel<1, true><<>>(d_idata_global, d_idata_n, d_odata, size); break; } } else { switch (threads) { case 512: reduce_data_kernel<512, false> <<>>(d_idata_global, d_idata_n, d_odata, size); break; case 256: reduce_data_kernel<256, false> <<>>(d_idata_global, d_idata_n, d_odata, size); break; case 128: reduce_data_kernel<128, false> <<>>(d_idata_global, d_idata_n, d_odata, size); break; case 64: reduce_data_kernel<64, false> <<>>(d_idata_global, d_idata_n, d_odata, size); break; case 32: reduce_data_kernel<32, false> <<>>(d_idata_global, d_idata_n, d_odata, size); break; case 16: reduce_data_kernel<16, false> <<>>(d_idata_global, d_idata_n, d_odata, size); break; case 8: reduce_data_kernel<8, false><<>>(d_idata_global, d_idata_n, d_odata, size); break; case 4: reduce_data_kernel<4, false><<>>(d_idata_global, d_idata_n, d_odata, size); break; case 2: reduce_data_kernel<2, false><<>>(d_idata_global, d_idata_n, d_odata, size); break; case 1: reduce_data_kernel<1, false><<>>(d_idata_global, d_idata_n, d_odata, size); break; } } } void sobfu::device::reduce_reg_sobolev(int size, int threads, int blocks, Mat4f *d_idata, float *d_odata) { dim3 dimBlock(threads, 1, 1); dim3 dimGrid(blocks, 1, 1); /* when there is only one warp per block, we need to allocate two warps worth of shared memory so that we don't * index shared memory out of bounds */ int smemSize = (threads <= 32) ? 2 * threads * sizeof(float) : threads * sizeof(float); if (isPow2(size)) { switch (threads) { case 512: reduce_reg_sobolev_kernel<512, true><<>>(d_idata, d_odata, size); break; case 256: reduce_reg_sobolev_kernel<256, true><<>>(d_idata, d_odata, size); break; case 128: reduce_reg_sobolev_kernel<128, true><<>>(d_idata, d_odata, size); break; case 64: reduce_reg_sobolev_kernel<64, true><<>>(d_idata, d_odata, size); break; case 32: reduce_reg_sobolev_kernel<32, true><<>>(d_idata, d_odata, size); break; case 16: reduce_reg_sobolev_kernel<16, true><<>>(d_idata, d_odata, size); break; case 8: reduce_reg_sobolev_kernel<8, true><<>>(d_idata, d_odata, size); break; case 4: reduce_reg_sobolev_kernel<4, true><<>>(d_idata, d_odata, size); break; case 2: reduce_reg_sobolev_kernel<2, true><<>>(d_idata, d_odata, size); break; case 1: reduce_reg_sobolev_kernel<1, true><<>>(d_idata, d_odata, size); break; } } else { switch (threads) { case 512: reduce_reg_sobolev_kernel<512, false><<>>(d_idata, d_odata, size); break; case 256: reduce_reg_sobolev_kernel<256, false><<>>(d_idata, d_odata, size); break; case 128: reduce_reg_sobolev_kernel<128, false><<>>(d_idata, d_odata, size); break; case 64: reduce_reg_sobolev_kernel<64, false><<>>(d_idata, d_odata, size); break; case 32: reduce_reg_sobolev_kernel<32, false><<>>(d_idata, d_odata, size); break; case 16: reduce_reg_sobolev_kernel<16, false><<>>(d_idata, d_odata, size); break; case 8: reduce_reg_sobolev_kernel<8, false><<>>(d_idata, d_odata, size); break; case 4: reduce_reg_sobolev_kernel<4, false><<>>(d_idata, d_odata, size); break; case 2: reduce_reg_sobolev_kernel<2, false><<>>(d_idata, d_odata, size); break; case 1: reduce_reg_sobolev_kernel<1, false><<>>(d_idata, d_odata, size); break; } } } void sobfu::device::reduce_voxel_max_energy(int size, int threads, int blocks, float2 *d_idata_global, float2 *d_idata_n, Mat4f *d_idata_reg, float w_reg, float2 *d_odata) { dim3 dimBlock(threads, 1, 1); dim3 dimGrid(blocks, 1, 1); /* when there is only one warp per block, we need to allocate two warps worth of shared memory so that we don't * index shared memory out of bounds */ int smemSize = (threads <= 32) ? 2 * threads * sizeof(float2) : threads * sizeof(float2); if (isPow2(size)) { switch (threads) { case 512: reduce_voxel_max_energy_kernel<512, true> <<>>(d_idata_global, d_idata_n, d_idata_reg, d_odata, w_reg, size); break; case 256: reduce_voxel_max_energy_kernel<256, true> <<>>(d_idata_global, d_idata_n, d_idata_reg, d_odata, w_reg, size); break; case 128: reduce_voxel_max_energy_kernel<128, true> <<>>(d_idata_global, d_idata_n, d_idata_reg, d_odata, w_reg, size); break; case 64: reduce_voxel_max_energy_kernel<64, true> <<>>(d_idata_global, d_idata_n, d_idata_reg, d_odata, w_reg, size); break; case 32: reduce_voxel_max_energy_kernel<32, true> <<>>(d_idata_global, d_idata_n, d_idata_reg, d_odata, w_reg, size); break; case 16: reduce_voxel_max_energy_kernel<16, true> <<>>(d_idata_global, d_idata_n, d_idata_reg, d_odata, w_reg, size); break; case 8: reduce_voxel_max_energy_kernel<8, true> <<>>(d_idata_global, d_idata_n, d_idata_reg, d_odata, w_reg, size); break; case 4: reduce_voxel_max_energy_kernel<4, true> <<>>(d_idata_global, d_idata_n, d_idata_reg, d_odata, w_reg, size); break; case 2: reduce_voxel_max_energy_kernel<2, true> <<>>(d_idata_global, d_idata_n, d_idata_reg, d_odata, w_reg, size); break; case 1: reduce_voxel_max_energy_kernel<1, true> <<>>(d_idata_global, d_idata_n, d_idata_reg, d_odata, w_reg, size); break; } } else { switch (threads) { case 512: reduce_voxel_max_energy_kernel<512, false> <<>>(d_idata_global, d_idata_n, d_idata_reg, d_odata, w_reg, size); break; case 256: reduce_voxel_max_energy_kernel<256, false> <<>>(d_idata_global, d_idata_n, d_idata_reg, d_odata, w_reg, size); break; case 128: reduce_voxel_max_energy_kernel<128, false> <<>>(d_idata_global, d_idata_n, d_idata_reg, d_odata, w_reg, size); break; case 64: reduce_voxel_max_energy_kernel<64, false> <<>>(d_idata_global, d_idata_n, d_idata_reg, d_odata, w_reg, size); break; case 32: reduce_voxel_max_energy_kernel<32, false> <<>>(d_idata_global, d_idata_n, d_idata_reg, d_odata, w_reg, size); break; case 16: reduce_voxel_max_energy_kernel<16, false> <<>>(d_idata_global, d_idata_n, d_idata_reg, d_odata, w_reg, size); break; case 8: reduce_voxel_max_energy_kernel<8, false> <<>>(d_idata_global, d_idata_n, d_idata_reg, d_odata, w_reg, size); break; case 4: reduce_voxel_max_energy_kernel<4, false> <<>>(d_idata_global, d_idata_n, d_idata_reg, d_odata, w_reg, size); break; case 2: reduce_voxel_max_energy_kernel<2, false> <<>>(d_idata_global, d_idata_n, d_idata_reg, d_odata, w_reg, size); break; case 1: reduce_voxel_max_energy_kernel<1, false> <<>>(d_idata_global, d_idata_n, d_idata_reg, d_odata, w_reg, size); break; } } } void sobfu::device::reduce_max(int size, int threads, int blocks, float4 *updates, float2 *d_o_max_data) { dim3 dimBlock(threads, 1, 1); dim3 dimGrid(blocks, 1, 1); /* when there is only one warp per block, we need to allocate two warps worth of shared memory so that we don't * index shared memory out of bounds */ int smemSize = (threads <= 32) ? 2 * threads * sizeof(float2) : threads * sizeof(float2); if (isPow2(size)) { switch (threads) { case 512: reduce_max_kernel<512, true><<>>(updates, d_o_max_data, size); break; case 256: reduce_max_kernel<256, true><<>>(updates, d_o_max_data, size); break; case 128: reduce_max_kernel<128, true><<>>(updates, d_o_max_data, size); break; case 64: reduce_max_kernel<64, true><<>>(updates, d_o_max_data, size); break; case 32: reduce_max_kernel<32, true><<>>(updates, d_o_max_data, size); break; case 16: reduce_max_kernel<16, true><<>>(updates, d_o_max_data, size); break; case 8: reduce_max_kernel<8, true><<>>(updates, d_o_max_data, size); break; case 4: reduce_max_kernel<4, true><<>>(updates, d_o_max_data, size); break; case 2: reduce_max_kernel<2, true><<>>(updates, d_o_max_data, size); break; case 1: reduce_max_kernel<1, true><<>>(updates, d_o_max_data, size); break; } } else { switch (threads) { case 512: reduce_max_kernel<512, false><<>>(updates, d_o_max_data, size); break; case 256: reduce_max_kernel<256, false><<>>(updates, d_o_max_data, size); break; case 128: reduce_max_kernel<128, false><<>>(updates, d_o_max_data, size); break; case 64: reduce_max_kernel<64, false><<>>(updates, d_o_max_data, size); break; case 32: reduce_max_kernel<32, false><<>>(updates, d_o_max_data, size); break; case 16: reduce_max_kernel<16, false><<>>(updates, d_o_max_data, size); break; case 8: reduce_max_kernel<8, false><<>>(updates, d_o_max_data, size); break; case 4: reduce_max_kernel<4, false><<>>(updates, d_o_max_data, size); break; case 2: reduce_max_kernel<2, false><<>>(updates, d_o_max_data, size); break; case 1: reduce_max_kernel<1, false><<>>(updates, d_o_max_data, size); break; } } } ================================================ FILE: src/sobfu/cuda/scalar_fields.cu ================================================ /* sobfu includes */ #include #include #define FULL_MASK 0xffffffff /* * SCALAR FIELD */ __device__ __forceinline__ float* sobfu::device::ScalarField::beg(int x, int y) const { return data + y * dims.x + x; } __device__ __forceinline__ float* sobfu::device::ScalarField::zstep(float* const ptr) const { return ptr + dims.x * dims.y; } __device__ __forceinline__ float* sobfu::device::ScalarField::operator()(int idx) const { return data + idx; } __device__ __forceinline__ float* sobfu::device::ScalarField::operator()(int x, int y, int z) const { return data + z * dims.y * dims.x + y * dims.x + x; } __global__ void sobfu::device::clear_kernel(sobfu::device::ScalarField field) { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; if (x > field.dims.x - 1 || y > field.dims.y - 1) { return; } float* pos = field.beg(x, y); for (int i = 0; i <= field.dims.z - 1; pos = field.zstep(pos), ++i) { *pos = 0.f; } } void sobfu::device::clear(sobfu::device::ScalarField& field) { dim3 block(64, 16); dim3 grid(kfusion::device::divUp(field.dims.x, block.x), kfusion::device::divUp(field.dims.y, block.y)); clear_kernel<<>>(field); cudaSafeCall(cudaGetLastError()); } template __global__ void sobfu::device::reduce_sum_kernel(float* g_idata, float* g_odata, unsigned int n) { float* sdata = SharedMemory(); /* perform first level of reduction, reading from global memory, writing to shared memory */ unsigned int tid = threadIdx.x; unsigned int i = blockIdx.x * blockSize * 2 + threadIdx.x; unsigned int gridSize = blockSize * 2 * gridDim.x; float mySum = 0.f; /* we reduce multiple elements per thread; the number is determined by the umber of active thread blocks (via * gridDim); more blocks will result in a larger gridSize and therefore fewer elements per thread */ while (i < n) { mySum += g_idata[i]; /* ensure we don't read out of bounds -- this is optimized away for powerOf2 sized arrays */ if (nIsPow2 || i + blockSize < n) { mySum += g_idata[i + blockSize]; } i += gridSize; } /* each thread puts its local sum into shared memory */ sdata[tid] = mySum; __syncthreads(); /* do reduction in shared mem */ if ((blockSize >= 512) && (tid < 256)) { sdata[tid] = mySum = mySum + sdata[tid + 256]; } __syncthreads(); if ((blockSize >= 256) && (tid < 128)) { sdata[tid] = mySum = mySum + sdata[tid + 128]; } __syncthreads(); if ((blockSize >= 128) && (tid < 64)) { sdata[tid] = mySum = mySum + sdata[tid + 64]; } __syncthreads(); #if (__CUDA_ARCH__ >= 300) if (tid < 32) { /* fetch final intermediate sum from 2nd warp */ if (blockSize >= 64) mySum += sdata[tid + 32]; /* reduce final warp using shuffle */ for (int offset = warpSize / 2; offset > 0; offset /= 2) { mySum += __shfl_down_sync(FULL_MASK, mySum, offset); } } #else /* fully unroll reduction within a single warp */ if ((blockSize >= 64) && (tid < 32)) { sdata[tid] = mySum = mySum + sdata[tid + 32]; } __syncthreads(); if ((blockSize >= 32) && (tid < 16)) { sdata[tid] = mySum = mySum + sdata[tid + 16]; } __syncthreads(); if ((blockSize >= 16) && (tid < 8)) { sdata[tid] = mySum = mySum + sdata[tid + 8]; } __syncthreads(); if ((blockSize >= 8) && (tid < 4)) { sdata[tid] = mySum = mySum + sdata[tid + 4]; } __syncthreads(); if ((blockSize >= 4) && (tid < 2)) { sdata[tid] = mySum = mySum + sdata[tid + 2]; } __syncthreads(); if ((blockSize >= 2) && (tid < 1)) { sdata[tid] = mySum = mySum + sdata[tid + 1]; } __syncthreads(); #endif /* write result for this block to global mem */ if (tid == 0) g_odata[blockIdx.x] = mySum; } float sobfu::device::final_reduce_sum(float* d_odata, int numBlocks) { /* copy result from device to host */ float* h_odata = new float[numBlocks]; cudaSafeCall(cudaMemcpy(h_odata, d_odata, numBlocks * sizeof(float), cudaMemcpyDeviceToHost)); float result = 0.f; for (int i = 0; i < numBlocks; i++) { result += h_odata[i]; } delete h_odata; return result; } float sobfu::device::reduce_sum(sobfu::device::ScalarField& field) { int no_voxels = field.dims.x * field.dims.y * field.dims.z; int blocks, threads; get_num_blocks_and_threads(no_voxels, 65536, 512, blocks, threads); dim3 dimBlock(threads, 1, 1); dim3 dimGrid(blocks, 1, 1); /* when there is only one warp per block, we need to allocate two warps worth of shared memory so that we don't * index shared memory out of bounds */ int smemSize = (threads <= 32) ? 2 * threads * sizeof(float) : threads * sizeof(float); float* d_odata; cudaSafeCall(cudaMalloc((void**) &d_odata, blocks * sizeof(float))); if (isPow2(no_voxels)) { switch (threads) { case 512: reduce_sum_kernel<512, true><<>>(field.data, d_odata, no_voxels); break; case 256: reduce_sum_kernel<256, true><<>>(field.data, d_odata, no_voxels); break; case 128: reduce_sum_kernel<128, true><<>>(field.data, d_odata, no_voxels); break; case 64: reduce_sum_kernel<64, true><<>>(field.data, d_odata, no_voxels); break; case 32: reduce_sum_kernel<32, true><<>>(field.data, d_odata, no_voxels); break; case 16: reduce_sum_kernel<16, true><<>>(field.data, d_odata, no_voxels); break; case 8: reduce_sum_kernel<8, true><<>>(field.data, d_odata, no_voxels); break; case 4: reduce_sum_kernel<4, true><<>>(field.data, d_odata, no_voxels); break; case 2: reduce_sum_kernel<2, true><<>>(field.data, d_odata, no_voxels); break; case 1: reduce_sum_kernel<1, true><<>>(field.data, d_odata, no_voxels); break; } } else { switch (threads) { case 512: reduce_sum_kernel<512, false><<>>(field.data, d_odata, no_voxels); break; case 256: reduce_sum_kernel<256, false><<>>(field.data, d_odata, no_voxels); break; case 128: reduce_sum_kernel<128, false><<>>(field.data, d_odata, no_voxels); break; case 64: reduce_sum_kernel<64, false><<>>(field.data, d_odata, no_voxels); break; case 32: reduce_sum_kernel<32, false><<>>(field.data, d_odata, no_voxels); break; case 16: reduce_sum_kernel<16, false><<>>(field.data, d_odata, no_voxels); break; case 8: reduce_sum_kernel<8, false><<>>(field.data, d_odata, no_voxels); break; case 4: reduce_sum_kernel<4, false><<>>(field.data, d_odata, no_voxels); break; case 2: reduce_sum_kernel<2, false><<>>(field.data, d_odata, no_voxels); break; case 1: reduce_sum_kernel<1, false><<>>(field.data, d_odata, no_voxels); break; } } float result = final_reduce_sum(d_odata, blocks); cudaSafeCall(cudaFree(d_odata)); return result; } ================================================ FILE: src/sobfu/cuda/solver.cu ================================================ /* sobfu includes */ #include #include /* kinfu includes */ #include using namespace kfusion; using namespace kfusion::device; /* * POTENTIAL GRADIENT */ __global__ void sobfu::device::calculate_potential_gradient_kernel(float2* phi_n_psi, float2* phi_global, float4* nabla_phi_n_o_psi, float4* L, float4* nabla_U, float w_reg, int dim_x, int dim_y, int dim_z) { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; if (x > dim_x - 1 || y > dim_y - 1) { return; } int idx = y * dim_x + x; for (int i = 0; i <= dim_z - 1; idx += dim_y * dim_x, ++i) { float tsdf_n_psi = phi_n_psi[idx].x; float tsdf_global = phi_global[idx].x; nabla_U[idx] = (tsdf_n_psi - tsdf_global) * nabla_phi_n_o_psi[idx] + w_reg * L[idx]; } } void sobfu::device::calculate_potential_gradient(kfusion::device::TsdfVolume& phi_n_psi, kfusion::device::TsdfVolume& phi_global, sobfu::device::TsdfGradient& nabla_phi_n_o_psi, sobfu::device::Laplacian& L, sobfu::device::PotentialGradient& nabla_U, float w_reg) { dim3 block(64, 16); dim3 grid(kfusion::device::divUp(phi_n_psi.dims.x, block.x), kfusion::device::divUp(phi_n_psi.dims.y, block.y)); calculate_potential_gradient_kernel<<>>(phi_n_psi.data, phi_global.data, nabla_phi_n_o_psi.data, L.data, nabla_U.data, w_reg, phi_n_psi.dims.x, phi_n_psi.dims.y, phi_n_psi.dims.z); cudaSafeCall(cudaGetLastError()); } /* * DEFORMATION FIELD */ __global__ void sobfu::device::update_psi_kernel(float4* psi, float4* nabla_U_S, float4* updates, float alpha, int dim_x, int dim_y, int dim_z) { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; if (x > dim_x - 1 || y > dim_y - 1) { return; } int global_idx = y * dim_x + x; for (int i = 0; i <= dim_z - 1; global_idx += dim_y * dim_x, ++i) { float4 update = alpha * nabla_U_S[global_idx]; updates[global_idx] = update; psi[global_idx] -= update; } } void sobfu::device::update_psi(sobfu::device::DeformationField& psi, sobfu::device::PotentialGradient& nabla_U_S, float4* updates, float alpha) { /* integrate in time */ dim3 block(64, 16); dim3 grid(kfusion::device::divUp(psi.dims.x, block.x), kfusion::device::divUp(psi.dims.y, block.y)); update_psi_kernel<<>>(psi.data, nabla_U_S.data, updates, alpha, psi.dims.x, psi.dims.y, psi.dims.z); cudaSafeCall(cudaGetLastError()); } /* * PIPELINE */ void sobfu::device::estimate_psi(SDFs& sdfs, sobfu::device::DeformationField& psi, sobfu::device::DeformationField& psi_inv, sobfu::device::SpatialGradients* spatial_grads, Differentiators& differentiators, float* d_S_i, sobfu::device::Reductor* r, SolverParams& params) { /* copy sobolev filter to constant memory */ set_convolution_kernel(d_S_i); /* create cuda streams */ int no_streams = 3; cudaStream_t streams[no_streams]; for (int i = 0; i < no_streams; i++) { cudaSafeCall(cudaStreamCreate(&streams[i])); } /* calculate no. of blocks and no. of threads per block */ int3 dims = psi.dims; dim3 block(64, 16); dim3 grid(kfusion::device::divUp(dims.x, block.x), kfusion::device::divUp(dims.y, block.y)); /* apply psi to phi_n */ apply_kernel<<>>(sdfs.phi_n, sdfs.phi_n_psi, psi); cudaSafeCall(cudaGetLastError()); /* run gradient descent */ float2 curr_max_update_norm; float e_curr = std::numeric_limits::infinity(); int iter = 1; while (iter <= params.max_iter) { if (iter == 1 || iter % 50 == 0) { std::cout << "iter. no. " << iter << std::endl; } /* calculate the gradient of phi_n */ estimate_gradient_kernel<<>>(differentiators.tsdf_diff, *(spatial_grads->nabla_phi_n_o_psi)); cudaSafeCall(cudaGetLastError()); /* calculate the jacobian of psi */ estimate_deformation_jacobian_kernel<<>>(differentiators.diff, *(spatial_grads->J)); cudaSafeCall(cudaGetLastError()); /* calculate the laplacian of psi */ estimate_laplacian_kernel<<>>(differentiators.second_order_diff, *(spatial_grads->L)); cudaSafeCall(cudaGetLastError()); /* calculate current value of the energy functional */ if ((params.verbosity == 1 && (iter == 1 || iter % 50 == 0 || iter == params.max_iter) || params.verbosity == 2)) { /* data term */ float e_data = r->data_energy(sdfs.phi_global.data, sdfs.phi_n_psi.data); /* regularisation term */ float e_reg = r->reg_energy_sobolev(spatial_grads->J->data); e_curr = e_data + params.w_reg * e_reg; std::cout << "data energy + w_reg * reg energy = " << e_data << " + " << params.w_reg << " * " << e_reg << " = " << e_curr << std::endl; } /* * PDE'S */ /* calculate gradient of the potential */ sobfu::device::calculate_potential_gradient(sdfs.phi_n_psi, sdfs.phi_global, *(spatial_grads->nabla_phi_n_o_psi), *(spatial_grads->L), *(spatial_grads->nabla_U), params.w_reg); cudaSafeCall(cudaGetLastError()); /* convolve gradient of the potential with a sobolev kernel */ sobfu::device::convolution_rows((*(spatial_grads->nabla_U_S)).data, (*(spatial_grads->nabla_U)).data, dims.x, dims.y, dims.z); sobfu::device::convolution_columns((*(spatial_grads->nabla_U_S)).data, (*(spatial_grads->nabla_U)).data, dims.x, dims.y, dims.z); sobfu::device::convolution_depth((*(spatial_grads->nabla_U_S)).data, (*(spatial_grads->nabla_U)).data, dims.x, dims.y, dims.z); /* update psi */ update_psi_kernel<<>>(psi.data, (*(spatial_grads->nabla_U_S)).data, r->updates, params.alpha, dims.x, dims.y, dims.z); cudaSafeCall(cudaGetLastError()); /* apply psi to phi_n */ apply_kernel<<>>(sdfs.phi_n, sdfs.phi_n_psi, psi); cudaSafeCall(cudaGetLastError()); /* get value of the max. update norm at the current iteration of the solver */ curr_max_update_norm = r->max_update_norm(); if ((params.verbosity == 1 && (iter == 1 || iter % 50 == 0 || iter == params.max_iter) || params.verbosity == 2)) { int idx_x = curr_max_update_norm.y / (psi.dims.x * psi.dims.y); int idx_y = (curr_max_update_norm.y - idx_x * psi.dims.x * psi.dims.y) / psi.dims.x; int idx_z = curr_max_update_norm.y - psi.dims.x * (idx_y + psi.dims.y * idx_x); std::cout << "max. update norm " << curr_max_update_norm.x << " at voxel (" << idx_z << ", " << idx_y << ", " << idx_x << ")" << std::endl; } if (curr_max_update_norm.x <= params.max_update_norm) { std::cout << "SOLVER CONVERGED AFTER " << iter << " ITERATIONS" << std::endl; break; } if (iter == params.max_iter) { std::cout << "SOLVER REACHED MAX. NO. OF ITERATIONS WITHOUT CONVERGING" << std::endl; } iter++; } /* iteratively estimate the inverse deformation field */ sobfu::device::init_identity(psi_inv); sobfu::device::estimate_inverse(psi, psi_inv); /* apply psi_inv to phi_global */ apply_kernel<<>>(sdfs.phi_global, sdfs.phi_global_psi_inv, psi_inv); cudaSafeCall(cudaGetLastError()); for (int i = 0; i < no_streams; i++) { cudaSafeCall(cudaStreamDestroy(streams[i])); } } /* * CONVOLUTIONS */ #define KERNEL_RADIUS 3 #define KERNEL_LENGTH (2 * KERNEL_RADIUS + 1) #define ROWS_BLOCKDIM_X 4 #define ROWS_BLOCKDIM_Y 64 #define ROWS_RESULT_STEPS 4 #define ROWS_HALO_STEPS 1 #define COLUMNS_BLOCKDIM_X 64 #define COLUMNS_BLOCKDIM_Y 4 #define COLUMNS_RESULT_STEPS 4 #define COLUMNS_HALO_STEPS 1 #define DEPTH_BLOCKDIM_X 64 #define DEPTH_BLOCKDIM_Z 4 #define DEPTH_RESULT_STEPS 4 #define DEPTH_HALO_STEPS 1 __constant__ float S[KERNEL_LENGTH]; void sobfu::device::set_convolution_kernel(float* d_kernel) { cudaSafeCall(cudaMemcpyToSymbol(S, d_kernel, KERNEL_LENGTH * sizeof(float), 0, cudaMemcpyDeviceToDevice)); cudaSafeCall(cudaGetLastError()); } /*** ROW CONVOLUTION ***/ __global__ void sobfu::device::convolution_rows_kernel(float4* d_dst, float4* d_src, int image_w, int image_h, int image_d) { __shared__ float4 s_data[ROWS_BLOCKDIM_Y][(ROWS_RESULT_STEPS + 2 * ROWS_HALO_STEPS) * ROWS_BLOCKDIM_X]; /* offset to the left halo edge */ const int base_x = (blockIdx.x * ROWS_RESULT_STEPS - ROWS_HALO_STEPS) * ROWS_BLOCKDIM_X + threadIdx.x; const int base_y = blockIdx.y * ROWS_BLOCKDIM_Y + threadIdx.y; const int base_z = blockIdx.z; const int first_pixel_in_line = ROWS_BLOCKDIM_X * ROWS_HALO_STEPS - threadIdx.x; const int last_pixel_in_line = image_w - base_x - 1; d_dst += base_z * image_h * image_w + base_y * image_w + base_x; d_src += base_z * image_h * image_w + base_y * image_w + base_x; /* load main data */ #pragma unroll for (int i = ROWS_HALO_STEPS; i < ROWS_HALO_STEPS + ROWS_RESULT_STEPS; i++) { s_data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X] = (image_w - base_x > i * ROWS_BLOCKDIM_X) ? d_src[i * ROWS_BLOCKDIM_X] : d_src[last_pixel_in_line]; } /* load left halo */ #pragma unroll for (int i = 0; i < ROWS_HALO_STEPS; i++) { s_data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X] = (base_x >= -i * ROWS_BLOCKDIM_X) ? d_src[i * ROWS_BLOCKDIM_X] : d_src[first_pixel_in_line]; } /* load right halo */ #pragma unroll for (int i = ROWS_HALO_STEPS + ROWS_RESULT_STEPS; i < ROWS_HALO_STEPS + ROWS_RESULT_STEPS + ROWS_HALO_STEPS; i++) { s_data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X] = (image_w - base_x > i * ROWS_BLOCKDIM_X) ? d_src[i * ROWS_BLOCKDIM_X] : d_src[last_pixel_in_line]; } /* compute and store results */ __syncthreads(); /* this pixel is not part of the iamge and doesn't need to be convolved */ if (base_y >= image_h) { return; } #pragma unroll for (int i = ROWS_HALO_STEPS; i < ROWS_HALO_STEPS + ROWS_RESULT_STEPS; i++) { if (image_w - base_x > i * ROWS_BLOCKDIM_X) { float4 sum = make_float4(0.f, 0.f, 0.f, 0.f); #pragma unroll for (int j = -KERNEL_RADIUS; j <= KERNEL_RADIUS; j++) { sum += S[KERNEL_RADIUS - j] * s_data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X + j]; } d_dst[i * ROWS_BLOCKDIM_X] = sum; } } } void sobfu::device::convolution_rows(float4* d_dst, float4* d_src, int image_w, int image_h, int image_d) { int blocks_x = image_w / (ROWS_RESULT_STEPS * ROWS_BLOCKDIM_X) + min(1, image_w % (ROWS_RESULT_STEPS * ROWS_BLOCKDIM_X)); int blocks_y = image_h / ROWS_BLOCKDIM_Y + min(1, image_h % ROWS_BLOCKDIM_Y); int blocks_z = image_d; dim3 blocks(blocks_x, blocks_y, blocks_z); dim3 threads(ROWS_BLOCKDIM_X, ROWS_BLOCKDIM_Y, 1); convolution_rows_kernel<<>>(d_dst, d_src, image_w, image_h, image_d); cudaSafeCall(cudaGetLastError()); } /*** COLUMMN CONVOLUTION ***/ __global__ void sobfu::device::convolution_columns_kernel(float4* d_dst, float4* d_src, int image_w, int image_h, int image_d) { __shared__ float4 s_data[COLUMNS_BLOCKDIM_X][(COLUMNS_RESULT_STEPS + 2 * COLUMNS_HALO_STEPS) * COLUMNS_BLOCKDIM_Y + 1]; /* offset to the upper halo edge */ const int base_x = blockIdx.x * COLUMNS_BLOCKDIM_X + threadIdx.x; const int base_y = (blockIdx.y * COLUMNS_RESULT_STEPS - COLUMNS_HALO_STEPS) * COLUMNS_BLOCKDIM_Y + threadIdx.y; const int base_z = blockIdx.z; const int first_pixel_in_line = (COLUMNS_BLOCKDIM_Y * COLUMNS_HALO_STEPS - threadIdx.y) * image_w; const int last_pixel_in_line = (image_h - base_y - 1) * image_w; d_dst += base_z * image_h * image_w + base_y * image_w + base_x; d_src += base_z * image_h * image_w + base_y * image_w + base_x; /* main data */ #pragma unroll for (int i = COLUMNS_HALO_STEPS; i < COLUMNS_HALO_STEPS + COLUMNS_RESULT_STEPS; i++) { s_data[threadIdx.x][threadIdx.y + i * COLUMNS_BLOCKDIM_Y] = (image_h - base_y > i * COLUMNS_BLOCKDIM_Y) ? d_src[i * COLUMNS_BLOCKDIM_Y * image_w] : d_src[last_pixel_in_line]; } /* upper halo */ #pragma unroll for (int i = 0; i < COLUMNS_HALO_STEPS; i++) { s_data[threadIdx.x][threadIdx.y + i * COLUMNS_BLOCKDIM_Y] = (base_y >= -i * COLUMNS_BLOCKDIM_Y) ? d_src[i * COLUMNS_BLOCKDIM_Y * image_w] : d_src[first_pixel_in_line]; } /* lower halo */ #pragma unroll for (int i = COLUMNS_HALO_STEPS + COLUMNS_RESULT_STEPS; i < COLUMNS_HALO_STEPS + COLUMNS_RESULT_STEPS + COLUMNS_HALO_STEPS; i++) { s_data[threadIdx.x][threadIdx.y + i * COLUMNS_BLOCKDIM_Y] = (image_h - base_y > i * COLUMNS_BLOCKDIM_Y) ? d_src[i * COLUMNS_BLOCKDIM_Y * image_w] : d_src[last_pixel_in_line]; } /* compute and store results */ __syncthreads(); /* this pixel isn't part of hte image and doesn't need to be convolved */ if (base_x >= image_w) { return; } #pragma unroll for (int i = COLUMNS_HALO_STEPS; i < COLUMNS_HALO_STEPS + COLUMNS_RESULT_STEPS; i++) { if (image_h - base_y > i * COLUMNS_BLOCKDIM_Y) { float4 sum = make_float4(0.f, 0.f, 0.f, 0.f); #pragma unroll for (int j = -KERNEL_RADIUS; j <= KERNEL_RADIUS; j++) { sum += S[KERNEL_RADIUS - j] * s_data[threadIdx.x][threadIdx.y + i * COLUMNS_BLOCKDIM_Y + j]; } d_dst[i * COLUMNS_BLOCKDIM_Y * image_w] += sum; } } } void sobfu::device::convolution_columns(float4* d_dst, float4* d_src, int image_w, int image_h, int image_d) { int blocks_x = image_w / COLUMNS_BLOCKDIM_X + min(1, image_w % COLUMNS_BLOCKDIM_X); int blocks_y = image_h / (COLUMNS_RESULT_STEPS * COLUMNS_BLOCKDIM_Y) + min(1, image_h % (COLUMNS_RESULT_STEPS * COLUMNS_BLOCKDIM_Y)); int blocks_z = image_d; dim3 blocks(blocks_x, blocks_y, blocks_z); dim3 threads(COLUMNS_BLOCKDIM_X, COLUMNS_BLOCKDIM_Y, 1); convolution_columns_kernel<<>>(d_dst, d_src, image_w, image_h, image_d); cudaSafeCall(cudaGetLastError()); } /*** DEPTH CONVOLUTION ***/ __global__ void sobfu::device::convolution_depth_kernel(float4* d_dst, float4* d_src, int image_w, int image_h, int image_d) { /* here it is [x][z] as we leave out y bc it has a size of 1 */ __shared__ float4 s_data[DEPTH_BLOCKDIM_X][(DEPTH_RESULT_STEPS + 2 * DEPTH_HALO_STEPS) * DEPTH_BLOCKDIM_Z + 1]; /* offset to the upper halo edge */ const int base_x = blockIdx.x * DEPTH_BLOCKDIM_X + threadIdx.x; const int base_y = blockIdx.y; const int base_z = (blockIdx.z * DEPTH_RESULT_STEPS - DEPTH_HALO_STEPS) * DEPTH_BLOCKDIM_Z + threadIdx.z; const int first_pixel_in_line = (DEPTH_BLOCKDIM_Z * DEPTH_HALO_STEPS - threadIdx.z) * image_w * image_h; const int last_pixel_in_line = (image_d - base_z - 1) * image_w * image_h; d_dst += base_z * image_h * image_w + base_y * image_w + base_x; d_src += base_z * image_h * image_w + base_y * image_w + base_x; /* main data */ #pragma unroll for (int i = DEPTH_HALO_STEPS; i < DEPTH_HALO_STEPS + DEPTH_RESULT_STEPS; i++) { s_data[threadIdx.x][threadIdx.z + i * DEPTH_BLOCKDIM_Z] = (image_d - base_z > i * DEPTH_BLOCKDIM_Z) ? d_src[i * DEPTH_BLOCKDIM_Z * image_w * image_h] : d_src[last_pixel_in_line]; } /* upper halo */ #pragma unroll for (int i = 0; i < DEPTH_HALO_STEPS; i++) { s_data[threadIdx.x][threadIdx.z + i * DEPTH_BLOCKDIM_Z] = (base_z >= -i * DEPTH_BLOCKDIM_Z) ? d_src[i * DEPTH_BLOCKDIM_Z * image_w * image_h] : d_src[first_pixel_in_line]; } /* lower halo */ #pragma unroll for (int i = DEPTH_HALO_STEPS + DEPTH_RESULT_STEPS; i < DEPTH_HALO_STEPS + DEPTH_RESULT_STEPS + DEPTH_HALO_STEPS; i++) { s_data[threadIdx.x][threadIdx.z + i * DEPTH_BLOCKDIM_Z] = (image_d - base_z > i * DEPTH_BLOCKDIM_Z) ? d_src[i * DEPTH_BLOCKDIM_Z * image_w * image_h] : d_src[last_pixel_in_line]; } /* compute and store results */ __syncthreads(); /* this pixel is not part of the image and doesn't need to be convolved */ if (base_x >= image_w) { return; } #pragma unroll for (int i = DEPTH_HALO_STEPS; i < DEPTH_HALO_STEPS + DEPTH_RESULT_STEPS; i++) { if (image_d - base_z > i * DEPTH_BLOCKDIM_Z) { float4 sum = make_float4(0.f, 0.f, 0.f, 0.f); #pragma unroll for (int j = -KERNEL_RADIUS; j <= KERNEL_RADIUS; j++) { sum += S[KERNEL_RADIUS - j] * s_data[threadIdx.x][threadIdx.z + i * DEPTH_BLOCKDIM_Z + j]; } d_dst[i * DEPTH_BLOCKDIM_Z * image_w * image_h] += sum; } } } void sobfu::device::convolution_depth(float4* d_dst, float4* d_src, int image_w, int image_h, int image_d) { int blocks_x = image_w / DEPTH_BLOCKDIM_X + min(1, image_w % DEPTH_BLOCKDIM_X); int blocks_y = image_h; int blocks_z = image_d / (DEPTH_RESULT_STEPS * DEPTH_BLOCKDIM_Z) + min(1, image_d % (DEPTH_RESULT_STEPS * DEPTH_BLOCKDIM_Z)); dim3 blocks(blocks_x, blocks_y, blocks_z); dim3 threads(DEPTH_BLOCKDIM_X, 1, DEPTH_BLOCKDIM_Z); convolution_depth_kernel<<>>(d_dst, d_src, image_w, image_h, image_d); cudaSafeCall(cudaGetLastError()); } ================================================ FILE: src/sobfu/cuda/vector_fields.cu ================================================ /* sobfu includes */ #include #include /* kinfu includes */ #include using namespace kfusion::device; /* * VECTOR FIELD */ __device__ __forceinline__ float4* sobfu::device::VectorField::beg(int x, int y) const { return data + x + dims.x * y; } __device__ __forceinline__ float4* sobfu::device::VectorField::zstep(float4* const ptr) const { return ptr + dims.x * dims.y; } __device__ __forceinline__ float4* sobfu::device::VectorField::operator()(int x, int y, int z) const { return data + x + y * dims.x + z * dims.y * dims.x; } __device__ __forceinline__ float4 sobfu::device::VectorField::get_displacement(int x, int y, int z) const { return *(data + z * dims.y * dims.x + y * dims.x + x) - make_float4((float) x, (float) y, (float) z, 0.f); } void sobfu::device::clear(VectorField& field) { dim3 block(64, 16); dim3 grid(kfusion::device::divUp(field.dims.x, block.x), kfusion::device::divUp(field.dims.y, block.y)); clear_kernel<<>>(field); cudaSafeCall(cudaGetLastError()); } __global__ void sobfu::device::clear_kernel(sobfu::device::VectorField field) { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; if (x > field.dims.x - 1 || y > field.dims.y - 1) { return; } float4* beg = field.beg(x, y); float4* end = beg + field.dims.x * field.dims.y * field.dims.z; for (float4* pos = beg; pos != end; pos = field.zstep(pos)) { *pos = make_float4(0.f, 0.f, 0.f, 0.f); } } /* * DEFORMATION FIELD */ void sobfu::device::init_identity(sobfu::device::DeformationField& psi) { dim3 block(64, 16); dim3 grid(kfusion::device::divUp(psi.dims.x, block.x), kfusion::device::divUp(psi.dims.y, block.y)); init_identity_kernel<<>>(psi); cudaSafeCall(cudaGetLastError()); } __global__ void sobfu::device::init_identity_kernel(sobfu::device::DeformationField psi) { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; if (x > psi.dims.x - 1 || y > psi.dims.y - 1) { return; } float4 idx = make_float4((float) x, (float) y, 0.f, 0.f); float4 zstep = make_float4(0.f, 0.f, 1.f, 0.f); float4* pos = psi.beg(x, y); for (int i = 0; i <= psi.dims.z - 1; idx += zstep, pos = psi.zstep(pos), ++i) { *pos = idx; } } __global__ void sobfu::device::apply_kernel(const kfusion::device::TsdfVolume phi, kfusion::device::TsdfVolume phi_warped, const sobfu::device::DeformationField psi) { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; if (x > phi_warped.dims.x - 1 || y > phi_warped.dims.y - 1) { return; } float4* psi_ptr = psi.beg(x, y); float2* phi_warped_ptr = phi_warped.beg(x, y); for (int i = 0; i <= phi_warped.dims.z - 1; psi_ptr = psi.zstep(psi_ptr), phi_warped_ptr = phi_warped.zstep(phi_warped_ptr), ++i) { float4 psi_val = *psi_ptr; float2 tsdf_deformed = interpolate_tsdf(phi, trunc(psi_val)); *phi_warped_ptr = tsdf_deformed; } } void sobfu::device::apply(const kfusion::device::TsdfVolume& phi, kfusion::device::TsdfVolume& phi_warped, const sobfu::device::DeformationField& psi) { dim3 block(64, 16); dim3 grid(kfusion::device::divUp(phi.dims.x, block.x), kfusion::device::divUp(phi.dims.y, block.y)); apply_kernel<<>>(phi, phi_warped, psi); cudaSafeCall(cudaGetLastError()); } __global__ void sobfu::device::estimate_inverse_kernel(sobfu::device::DeformationField psi, sobfu::device::DeformationField psi_inv) { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; if (x > psi_inv.dims.x - 1 || y > psi_inv.dims.y - 1) { return; } float4* psi_inv_ptr = psi_inv.beg(x, y); for (int i = 0; i <= psi_inv.dims.z - 1; psi_inv_ptr = psi_inv.zstep(psi_inv_ptr), ++i) { float4 psi_inv_val = *psi_inv_ptr; *psi_inv_ptr = make_float4((float) x, (float) y, (float) i, 0.f) - 1.f * interpolate_field_inv(psi, trunc(psi_inv_val)); } } void sobfu::device::estimate_inverse(sobfu::device::DeformationField& psi, sobfu::device::DeformationField& psi_inverse) { dim3 block(64, 16); dim3 grid(kfusion::device::divUp(psi_inverse.dims.x, block.x), kfusion::device::divUp(psi_inverse.dims.y, block.y)); /* estimate inverse */ for (int iter = 0; iter < 48; ++iter) { estimate_inverse_kernel<<>>(psi, psi_inverse); cudaSafeCall(cudaGetLastError()); } } /* * TSDF DIFFERENTIATOR METHODS */ __global__ void sobfu::device::estimate_gradient_kernel(const sobfu::device::TsdfDifferentiator diff, sobfu::device::TsdfGradient grad) { diff(grad); } void sobfu::device::TsdfDifferentiator::calculate(sobfu::device::TsdfGradient& grad) { dim3 block(64, 16); dim3 grid(kfusion::device::divUp(grad.dims.x, block.x), kfusion::device::divUp(grad.dims.y, block.y)); estimate_gradient_kernel<<>>(*this, grad); cudaSafeCall(cudaGetLastError()); } __device__ __forceinline__ void sobfu::device::TsdfDifferentiator::operator()(sobfu::device::TsdfGradient& grad) const { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; if (x > vol.dims.x - 1 || y > vol.dims.y - 1) { return; } int idx_x_1 = x + 1; int idx_x_2 = x - 1; if (x == 0) { idx_x_2 = x + 1; } else if (x == vol.dims.x - 1) { idx_x_1 = x - 1; } int idx_y_1 = y + 1; int idx_y_2 = y - 1; if (y == 0) { idx_y_2 = y + 1; } else if (y == vol.dims.y - 1) { idx_y_1 = y - 1; } float4* grad_ptr = grad.beg(x, y); #pragma unroll for (int i = 0; i <= vol.dims.z - 1; grad_ptr = grad.zstep(grad_ptr), ++i) { int idx_z_1 = i + 1; int idx_z_2 = i - 1; if (i == 0) { idx_z_2 = i + 1; } else if (i == vol.dims.z - 1) { idx_z_1 = i - 1; } float Fx1 = (*vol(idx_x_1, y, i)).x; float Fx2 = (*vol(idx_x_2, y, i)).x; float n_x = __fdividef(Fx1 - Fx2, 2.f); float Fy1 = (*vol(x, idx_y_1, i)).x; float Fy2 = (*vol(x, idx_y_2, i)).x; float n_y = __fdividef(Fy1 - Fy2, 2.f); float Fz1 = (*vol(x, y, idx_z_1)).x; float Fz2 = (*vol(x, y, idx_z_2)).x; float n_z = __fdividef(Fz1 - Fz2, 2.f); float4 n = make_float4(n_x, n_y, n_z, 0.f); *grad_ptr = n; } } __global__ void sobfu::device::interpolate_gradient_kernel(sobfu::device::TsdfGradient nabla_phi_n_psi, sobfu::device::TsdfGradient nabla_phi_n_psi_t, sobfu::device::DeformationField psi) { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; if (x > psi.dims.x - 1 || y > psi.dims.y - 1) { return; } float3 idx = make_float3(x, y, 0.f); float3 zstep = make_float3(0.f, 0.f, 1.f); int global_idx = y * nabla_phi_n_psi.dims.x + x; float4* nabla_phi_n_psi_t_ptr = nabla_phi_n_psi_t.beg(x, y); for (int i = 0; i <= psi.dims.z - 1; nabla_phi_n_psi_t_ptr = nabla_phi_n_psi_t.zstep(nabla_phi_n_psi_t_ptr), global_idx += nabla_phi_n_psi.dims.x * nabla_phi_n_psi.dims.y, idx += zstep, ++i) { float4 psi_val = psi.data[global_idx]; *nabla_phi_n_psi_t_ptr = interpolate_field(nabla_phi_n_psi, trunc(psi_val)); } } void sobfu::device::interpolate_gradient(sobfu::device::TsdfGradient& nabla_phi_n_psi, sobfu::device::TsdfGradient& nabla_phi_n_psi_t, sobfu::device::DeformationField& psi) { dim3 block(64, 16); dim3 grid(kfusion::device::divUp(psi.dims.x, block.x), kfusion::device::divUp(psi.dims.y, block.y)); interpolate_gradient_kernel<<>>(nabla_phi_n_psi, nabla_phi_n_psi_t, psi); cudaSafeCall(cudaGetLastError()); } /* * LAPLACIAN */ __global__ void sobfu::device::interpolate_laplacian_kernel(sobfu::device::Laplacian L, sobfu::device::Laplacian L_o_psi, sobfu::device::DeformationField psi) { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; if (x > psi.dims.x - 1 || y > psi.dims.y - 1) { return; } float4* psi_ptr = psi.beg(x, y); float4* L_o_psi_ptr = L_o_psi.beg(x, y); for (int i = 0; i <= psi.dims.z - 1; psi_ptr = psi.zstep(psi_ptr), L_o_psi_ptr = L_o_psi.zstep(L_o_psi_ptr), ++i) { float4 psi_val = *psi_ptr; *L_o_psi_ptr = interpolate_field(L, trunc(psi_val)); } } void sobfu::device::interpolate_laplacian(sobfu::device::Laplacian& L, sobfu::device::Laplacian& L_o_psi, sobfu::device::DeformationField& psi) { dim3 block(64, 16); dim3 grid(kfusion::device::divUp(psi.dims.x, block.x), kfusion::device::divUp(psi.dims.y, block.y)); interpolate_laplacian_kernel<<>>(L, L_o_psi, psi); cudaSafeCall(cudaGetLastError()); } /* * SECOND ORDER DIFFERENTIATOR METHODS */ void sobfu::device::SecondOrderDifferentiator::calculate(sobfu::device::Laplacian& L) { dim3 block(64, 16); dim3 grid(kfusion::device::divUp(L.dims.x, block.x), kfusion::device::divUp(L.dims.y, block.y)); estimate_laplacian_kernel<<>>(*this, L); cudaSafeCall(cudaGetLastError()); } __global__ void sobfu::device::estimate_laplacian_kernel(const sobfu::device::SecondOrderDifferentiator diff, sobfu::device::Laplacian L) { diff.laplacian(L); } __device__ __forceinline__ void sobfu::device::SecondOrderDifferentiator::laplacian(sobfu::device::Laplacian& L) const { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; if (x > L.dims.x - 1 || y > L.dims.y - 1) { return; } int idx_x_1 = x + 1; int idx_x_2 = x - 1; if (x == 0) { idx_x_1 = x; idx_x_2 = x; } else if (x == L.dims.x - 1) { idx_x_1 = x; idx_x_2 = x; } int idx_y_1 = y + 1; int idx_y_2 = y - 1; if (y == 0) { idx_y_1 = y; idx_y_2 = y; } else if (y == L.dims.y - 1) { idx_y_1 = y; idx_y_2 = y; } float4* L_ptr = L.beg(x, y); #pragma unroll for (int i = 0; i <= L.dims.z - 1; L_ptr = L.zstep(L_ptr), ++i) { int idx_z_1 = i + 1; int idx_z_2 = i - 1; if (i == 0) { idx_z_1 = i; idx_z_2 = i; } else if (i == L.dims.z - 1) { idx_z_1 = i; idx_z_2 = i; } float4 L_val = -6.f * *psi(x, y, i) + *psi(idx_x_1, y, i) + *psi(idx_x_2, y, i) + *psi(x, idx_y_1, i) + *psi(x, idx_y_2, i) + *psi(x, y, idx_z_1) + *psi(x, y, idx_z_2); *L_ptr = -1.f * L_val; } } /* * JACOBIAN */ __device__ __forceinline__ Mat4f* sobfu::device::Jacobian::beg(int x, int y) const { return data + x + dims.x * y; } __device__ __forceinline__ Mat4f* sobfu::device::Jacobian::zstep(Mat4f* const ptr) const { return ptr + dims.x * dims.y; } __device__ __forceinline__ Mat4f* sobfu::device::Jacobian::operator()(int x, int y, int z) const { return data + x + y * dims.x + z * dims.y * dims.x; } void sobfu::device::clear(Jacobian& J) { dim3 block(64, 16); dim3 grid(kfusion::device::divUp(J.dims.x, block.x), kfusion::device::divUp(J.dims.y, block.y)); clear_jacobian_kernel<<>>(J); cudaSafeCall(cudaGetLastError()); cudaSafeCall(cudaDeviceSynchronize()); } __global__ void sobfu::device::clear_jacobian_kernel(sobfu::device::Jacobian J) { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; if (x > J.dims.x - 1 || y > J.dims.y - 1) { return; } Mat4f* beg = J.beg(x, y); Mat4f* end = beg + J.dims.x * J.dims.y * J.dims.z; for (Mat4f* pos = beg; pos != end; pos = J.zstep(pos)) { float4 g = make_float4(0.f, 0.f, 0.f, 0.f); Mat4f val; val.data[0] = g; val.data[1] = g; val.data[2] = g; *pos = val; } } /* * DIFFERENTIATOR METHODS */ void sobfu::device::Differentiator::calculate(sobfu::device::Jacobian& J) { dim3 block(64, 16); dim3 grid(kfusion::device::divUp(J.dims.x, block.x), kfusion::device::divUp(J.dims.y, block.y)); estimate_jacobian_kernel<<>>(*this, J); cudaSafeCall(cudaGetLastError()); } void sobfu::device::Differentiator::calculate_deformation_jacobian(sobfu::device::Jacobian& J) { dim3 block(64, 16); dim3 grid(kfusion::device::divUp(J.dims.x, block.x), kfusion::device::divUp(J.dims.y, block.y)); estimate_deformation_jacobian_kernel<<>>(*this, J); cudaSafeCall(cudaGetLastError()); } __global__ void sobfu::device::estimate_jacobian_kernel(const sobfu::device::Differentiator diff, sobfu::device::Jacobian J) { diff(J, 0); } __global__ void sobfu::device::estimate_deformation_jacobian_kernel(const sobfu::device::Differentiator diff, sobfu::device::Jacobian J) { diff(J, 1); } __device__ __forceinline__ void sobfu::device::Differentiator::operator()(sobfu::device::Jacobian& J, int mode) const { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; if (x > psi.dims.x - 1 || y > psi.dims.y - 1) { return; } int idx_x_1 = x + 1; int idx_x_2 = x - 1; if (x == 0) { idx_x_2 = x + 1; } else if (x == psi.dims.x - 1) { idx_x_1 = x - 1; } int idx_y_1 = y + 1; int idx_y_2 = y - 1; if (y == 0) { idx_y_2 = y + 1; } else if (y == psi.dims.y - 1) { idx_y_1 = y - 1; } Mat4f* J_ptr = J.beg(x, y); #pragma unroll for (int i = 0; i <= psi.dims.z - 1; J_ptr = J.zstep(J_ptr), ++i) { int idx_z_1 = i + 1; int idx_z_2 = i - 1; if (i == 0) { idx_z_2 = i + 1; } else if (i == psi.dims.z - 1) { idx_z_1 = i - 1; } float4 J_x; float4 J_y; float4 J_z; if (mode == 0) { J_x = (*psi(idx_x_1, y, i) - *psi(idx_x_2, y, i)) / 2.f; J_y = (*psi(x, idx_y_1, i) - *psi(x, idx_y_2, i)) / 2.f; J_z = (*psi(x, y, idx_z_1) - *psi(x, y, idx_z_2)) / 2.f; } else if (mode == 1) { J_x = (psi.get_displacement(idx_x_1, y, i) - psi.get_displacement(idx_x_2, y, i)) / 2.f; J_y = (psi.get_displacement(x, idx_y_1, i) - psi.get_displacement(x, idx_y_2, i)) / 2.f; J_z = (psi.get_displacement(x, y, idx_z_1) - psi.get_displacement(x, y, idx_z_2)) / 2.f; } Mat4f val; val.data[0] = make_float4(J_x.x, J_y.x, J_z.x, 0.f); val.data[1] = make_float4(J_x.y, J_y.y, J_z.y, 0.f); val.data[2] = make_float4(J_x.z, J_y.z, J_z.z, 0.f); *J(x, y, i) = val; } } ================================================ FILE: src/sobfu/precomp.cpp ================================================ /* sobfu includes */ #include /* checks if x is a power of 2 */ bool isPow2(unsigned int x) { return ((x & (x - 1)) == 0); } /* computes the nearest power of 2 larger than x */ int nextPow2(int x) { if (x < 0) return 0; --x; x |= x >> 1; x |= x >> 2; x |= x >> 4; x |= x >> 8; x |= x >> 16; return x + 1; } void get_num_blocks_and_threads(int n, int maxBlocks, int maxThreads, int &blocks, int &threads) { /* get device capability, to avoid block/grid size exceed the upper bound */ cudaDeviceProp prop; int device; cudaSafeCall(cudaGetDevice(&device)); cudaSafeCall(cudaGetDeviceProperties(&prop, device)); threads = (n < maxThreads * 2) ? nextPow2((n + 1) / 2) : maxThreads; blocks = (n + (threads * 2 - 1)) / (threads * 2); if ((float) threads * blocks > (float) prop.maxGridSize[0] * prop.maxThreadsPerBlock) { printf("n is too large, please choose a smaller number!\n"); } if (blocks > prop.maxGridSize[0]) { printf("Grid size <%d> exceeds the device capability <%d>, set block size as %d (original %d)\n", blocks, prop.maxGridSize[0], threads * 2, threads); blocks /= 2; threads *= 2; } blocks = std::min(maxBlocks, blocks); } ================================================ FILE: src/sobfu/reductor.cpp ================================================ /* sobfu incldues */ #include /* * REDUCTOR */ sobfu::device::Reductor::Reductor(int3 dims_, float vsz_, float trunc_dist_) { dims = dims_; vsz = vsz_; trunc_dist = trunc_dist_; no_voxels = dims.x * dims.y * dims.z; /* own kernels */ get_num_blocks_and_threads(no_voxels, 65536, 512, blocks, threads); h_data_out = new float[blocks]; h_reg_out = new float[blocks]; h_max_out = new float2[blocks]; cudaSafeCall(cudaMalloc((void **) &d_data_out, blocks * sizeof(float))); cudaSafeCall(cudaMalloc((void **) &d_reg_out, blocks * sizeof(float))); cudaSafeCall(cudaMalloc((void **) &d_max_out, blocks * sizeof(float2))); cudaSafeCall(cudaMalloc((void **) &updates, no_voxels * sizeof(float4))); } sobfu::device::Reductor::~Reductor() { delete h_data_out, h_reg_out, h_max_out; cudaSafeCall(cudaFree(d_data_out)); cudaSafeCall(cudaFree(d_reg_out)); cudaSafeCall(cudaFree(d_max_out)); cudaSafeCall(cudaFree(updates)); } float sobfu::device::Reductor::data_energy(float2 *phi_global_data, float2 *phi_n_data) { reduce_data(no_voxels, threads, blocks, phi_global_data, phi_n_data, d_data_out); cudaSafeCall(cudaGetLastError()); return 0.5f * final_reduce(h_data_out, d_data_out, blocks); } float sobfu::device::Reductor::reg_energy_sobolev(Mat4f *J_data) { reduce_reg_sobolev(no_voxels, threads, blocks, J_data, d_reg_out); cudaSafeCall(cudaGetLastError()); return 0.5f * final_reduce(h_reg_out, d_reg_out, blocks); } float2 sobfu::device::Reductor::max_update_norm() { reduce_max(no_voxels, threads, blocks, updates, d_max_out); cudaSafeCall(cudaGetLastError()); return final_reduce_max(h_max_out, d_max_out, blocks, dims); } float2 sobfu::device::Reductor::voxel_max_energy(float2 *phi_global_data, float2 *phi_n_data, Mat4f *J_data, float w_reg) { reduce_voxel_max_energy(no_voxels, threads, blocks, phi_global_data, phi_n_data, J_data, w_reg, d_max_out); cudaSafeCall(cudaGetLastError()); return final_reduce_max(h_max_out, d_max_out, blocks, dims); } /* sum partial sums from each block on cpu */ float sobfu::device::final_reduce(float *h_odata, float *d_odata, int numBlocks) { float result = 0.f; /* copy result from device to host */ cudaSafeCall(cudaMemcpy(h_odata, d_odata, numBlocks * sizeof(float), cudaMemcpyDeviceToHost)); for (int i = 0; i < numBlocks; i++) { result += h_odata[i]; } return result; } float2 sobfu::device::final_reduce_max(float2 *h_o_max_data, float2 *d_o_max_data, int numBlocks, int3 dims) { float2 result = make_float2(0.f, 0.f); /* copy result from device to host */ cudaSafeCall(cudaMemcpy(h_o_max_data, d_o_max_data, numBlocks * sizeof(float2), cudaMemcpyDeviceToHost)); for (int i = 0; i < numBlocks; i++) { if (h_o_max_data[i].x > result.x) { result = h_o_max_data[i]; } } return result; } ================================================ FILE: src/sobfu/scalar_fields.cpp ================================================ #include /* * SCALAR FIELD */ sobfu::cuda::ScalarField::ScalarField(cv::Vec3i dims_) { dims = make_int3(dims_[0], dims_[1], dims_[2]); int no_voxels = dims.x * dims.y * dims.z; data.create(no_voxels * sizeof(float)); clear(); } sobfu::cuda::ScalarField::~ScalarField() = default; kfusion::cuda::CudaData sobfu::cuda::ScalarField::get_data() { return data; } const kfusion::cuda::CudaData sobfu::cuda::ScalarField::get_data() const { return data; } int3 sobfu::cuda::ScalarField::get_dims() { return dims; } void sobfu::cuda::ScalarField::clear() { sobfu::device::ScalarField field(data.ptr(), dims); sobfu::device::clear(field); } float sobfu::cuda::ScalarField::sum() { sobfu::device::ScalarField field(data.ptr(), dims); float result = sobfu::device::reduce_sum(field); return result; } void sobfu::cuda::ScalarField::print() { int sizes[3] = {dims.x, dims.y, dims.z}; cv::Mat* mat = new cv::Mat(3, sizes, CV_32FC1); data.download(mat->ptr()); std::cout << "--- FIELD ---" << std::endl; for (int i = 0; i < dims.x; i++) { for (int j = 0; j < dims.y; j++) { for (int k = 0; k < dims.z; k++) { float val = mat->at(k, j, i); std::cout << "(x,y,z)=(" << i << ", " << j << ", " << k << "), val=(" << val << std::endl; } } } delete mat; } ================================================ FILE: src/sobfu/sob_fusion.cpp ================================================ #include SobFusion::SobFusion(const Params ¶ms) : frame_counter_(0), params(params) { int cols = params.cols; int rows = params.rows; dists_.create(rows, cols); curr_.depth_pyr.resize(1); curr_.normals_pyr.resize(1); prev_.depth_pyr.resize(1); prev_.normals_pyr.resize(1); curr_.points_pyr.resize(1); prev_.points_pyr.resize(1); curr_.depth_pyr[0].create(rows, cols); curr_.normals_pyr[0].create(rows, cols); prev_.depth_pyr[0].create(rows, cols); prev_.normals_pyr[0].create(rows, cols); curr_.points_pyr[0].create(rows, cols); prev_.points_pyr[0].create(rows, cols); depths_.create(rows, cols); normals_.create(rows, cols); points_.create(rows, cols); poses_.clear(); poses_.reserve(4096); poses_.push_back(cv::Affine3f::Identity()); mc = cv::Ptr(new kfusion::cuda::MarchingCubes()); mc->setPose(params.volume_pose); } SobFusion::~SobFusion() = default; Params &SobFusion::getParams() { return params; } pcl::PolygonMesh::Ptr SobFusion::get_phi_global_mesh() { return get_mesh(phi_global); } pcl::PolygonMesh::Ptr SobFusion::get_phi_global_psi_inv_mesh() { return get_mesh(phi_global_psi_inv); } pcl::PolygonMesh::Ptr SobFusion::get_phi_n_mesh() { return get_mesh(phi_n); } pcl::PolygonMesh::Ptr SobFusion::get_phi_n_psi_mesh() { return get_mesh(phi_n_psi); } std::shared_ptr SobFusion::getDeformationField() { return this->psi; } /* PIPELINE * * --- frame 0 --- * * 1. bilateral filter * 2. depth truncation * 3. initailisation of phi_global and phi_n * * --- frames n + 1 --- * 1. bilateral filter * 2. depth truncation * 3. initialisation of phi_n * 4. estimation of psi * 5. fusion of phi_n(psi) * 6. warp of phi_global with psi^-1 * */ bool SobFusion::operator()(const kfusion::cuda::Depth &depth, const kfusion::cuda::Image & /*image*/) { std::cout << "--- FRAME NO. " << frame_counter_ << " ---" << std::endl; /* * bilateral filter */ kfusion::cuda::depthBilateralFilter(depth, curr_.depth_pyr[0], params.bilateral_kernel_size, params.bilateral_sigma_spatial, params.bilateral_sigma_depth); /* * depth truncation */ kfusion::cuda::depthTruncation(curr_.depth_pyr[0], params.icp_truncate_depth_dist); /* * compute distances using depth map */ kfusion::cuda::computeDists(curr_.depth_pyr[0], dists_, params.intr); if (frame_counter_ == 0) { /* * INITIALISATION OF PHI_GLOBAL */ phi_global = cv::Ptr(new kfusion::cuda::TsdfVolume(params)); phi_global->integrate(dists_, poses_.back(), params.intr); /* * INITIALISATION OF PHI_GLOBAL(PSI_INV), PHI_N, AND PHI_N(PSI) */ phi_global_psi_inv = cv::Ptr(new kfusion::cuda::TsdfVolume(params)); phi_n = cv::Ptr(new kfusion::cuda::TsdfVolume(params)); phi_n_psi = cv::Ptr(new kfusion::cuda::TsdfVolume(params)); /* * INITIALISATION OF PSI AND PSI_INV */ this->psi = std::make_shared(params.volume_dims); this->psi_inv = std::make_shared(params.volume_dims); /* * INITIALISATION OF THE SOLVER */ this->solver = std::make_shared(params); return ++frame_counter_, true; } /* * UPDATE OF PHI_N */ phi_n->clear(); this->phi_n->integrate(dists_, poses_.back(), params.intr); /* * ESTIMATION OF DEFORMATION FIELD AND SURFACE FUSION */ if (frame_counter_ < params.start_frame) { this->phi_global->integrate(*phi_n); return ++frame_counter_, true; } solver->estimate_psi(phi_global, phi_global_psi_inv, phi_n, phi_n_psi, psi, psi_inv); this->phi_global->integrate(*phi_n_psi); return ++frame_counter_, true; } pcl::PolygonMesh::Ptr SobFusion::get_mesh(cv::Ptr vol) { kfusion::device::DeviceArray vertices_buffer_device; kfusion::device::DeviceArray normals_buffer_device; /* run marching cubes */ std::shared_ptr model = std::make_shared(mc->run(*vol, vertices_buffer_device, normals_buffer_device)); kfusion::cuda::waitAllDefaultStream(); pcl::PolygonMesh::Ptr mesh = convert_to_mesh(model->vertices); return mesh; } pcl::PolygonMesh::Ptr SobFusion::convert_to_mesh(const kfusion::cuda::DeviceArray &triangles) { if (triangles.empty()) { return pcl::PolygonMesh::Ptr(new pcl::PolygonMesh()); } pcl::PointCloud cloud; cloud.width = static_cast(triangles.size()); cloud.height = 1; triangles.download(cloud.points); pcl::PolygonMesh::Ptr mesh(new pcl::PolygonMesh()); pcl::toPCLPointCloud2(cloud, mesh->cloud); mesh->polygons.resize(triangles.size() / 3); for (size_t i = 0; i < mesh->polygons.size(); ++i) { pcl::Vertices v; v.vertices.push_back(i * 3 + 0); v.vertices.push_back(i * 3 + 1); v.vertices.push_back(i * 3 + 2); mesh->polygons[i] = v; } return mesh; } ================================================ FILE: src/sobfu/solver.cpp ================================================ #include /* * SOLVER */ sobfu::cuda::Solver::Solver(Params& params) { /* * PARAMETERS */ /* volume */ cv::Vec3i d = params.volume_dims; dims = make_int3(d[0], d[1], d[2]); no_voxels = dims.x * dims.y * dims.z; cv::Vec3f vsz = params.voxel_sizes(); voxel_sizes = make_float3(vsz[0], vsz[1], vsz[2]); trunc_dist = params.tsdf_trunc_dist; eta = params.eta; max_weight = params.tsdf_max_weight; /* solver */ solver_params.verbosity = params.verbosity; solver_params.max_iter = params.max_iter; solver_params.max_update_norm = params.max_update_norm; solver_params.lambda = params.lambda; solver_params.s = params.s; solver_params.alpha = params.alpha; solver_params.w_reg = params.w_reg; /* * SOLVER HELPER CLASSES */ /* gradients */ spatial_grads = new sobfu::cuda::SpatialGradients(d); nabla_phi_n = new sobfu::device::TsdfGradient(spatial_grads->nabla_phi_n->get_data().ptr(), dims); nabla_phi_n_o_psi = new sobfu::device::TsdfGradient(spatial_grads->nabla_phi_n_o_psi->get_data().ptr(), dims); J = new sobfu::device::Jacobian(spatial_grads->J->get_data().ptr(), dims); J_inv = new sobfu::device::Jacobian(spatial_grads->J_inv->get_data().ptr(), dims); L = new sobfu::device::Laplacian(spatial_grads->L->get_data().ptr(), dims); L_o_psi_inv = new sobfu::device::Laplacian(spatial_grads->L_o_psi_inv->get_data().ptr(), dims); nabla_U = new sobfu::device::PotentialGradient(spatial_grads->nabla_U->get_data().ptr(), dims); nabla_U_S = new sobfu::device::PotentialGradient(spatial_grads->nabla_U_S->get_data().ptr(), dims); spatial_grads_device = new sobfu::device::SpatialGradients(nabla_phi_n, nabla_phi_n_o_psi, J, J_inv, L, L_o_psi_inv, nabla_U, nabla_U_S); /* reductor */ r = new sobfu::device::Reductor(dims, voxel_sizes.x, trunc_dist); /* sobolev filter */ h_S_i = new float[solver_params.s]; cudaSafeCall(cudaMalloc((void**) &d_S_i, solver_params.s * sizeof(float))); decompose_sobolev_filter(solver_params, h_S_i); cudaSafeCall(cudaMemcpy(d_S_i, h_S_i, solver_params.s * sizeof(float), cudaMemcpyHostToDevice)); } sobfu::cuda::Solver::~Solver() = default; void sobfu::cuda::Solver::estimate_psi(const cv::Ptr phi_global, cv::Ptr phi_global_psi_inv, const cv::Ptr phi_n, cv::Ptr phi_n_psi, std::shared_ptr psi, std::shared_ptr psi_inv) { /* DEVICE CLASSES */ sobfu::device::DeformationField psi_device(psi->get_data().ptr(), dims); sobfu::device::DeformationField psi_inv_device(psi_inv->get_data().ptr(), dims); kfusion::device::TsdfVolume phi_global_device(phi_global->data().ptr(), dims, voxel_sizes, trunc_dist, eta, max_weight); kfusion::device::TsdfVolume phi_global_psi_inv_device(phi_global_psi_inv->data().ptr(), dims, voxel_sizes, trunc_dist, eta, max_weight); kfusion::device::TsdfVolume phi_n_device(phi_n->data().ptr(), dims, voxel_sizes, trunc_dist, eta, max_weight); kfusion::device::TsdfVolume phi_n_psi_device(phi_n_psi->data().ptr(), dims, voxel_sizes, trunc_dist, eta, max_weight); SDFs sdfs(phi_global_device, phi_global_psi_inv_device, phi_n_device, phi_n_psi_device); sobfu::device::TsdfDifferentiator tsdf_diff(phi_n_psi_device); sobfu::device::Differentiator diff(psi_device); sobfu::device::Differentiator diff_inv(psi_inv_device); sobfu::device::SecondOrderDifferentiator second_order_diff(psi_device); Differentiators differentiators(tsdf_diff, diff, diff_inv, second_order_diff); /* run the solver */ sobfu::device::estimate_psi(sdfs, psi_device, psi_inv_device, spatial_grads_device, differentiators, d_S_i, r, solver_params); } /* * SOBOLEV FILTER */ static void sobfu::cuda::get_3d_sobolev_filter(SolverParams& params, float* h_S_i) { int s3 = params.s * params.s * params.s; /* init identity and laplacian matrices */ cv::Mat Id = cv::Mat::eye(s3, s3, CV_32FC1); cv::Mat L_mat = -6.f * cv::Mat::eye(s3, s3, CV_32FC1); /* calculate laplacian matrix */ for (int i = 0; i <= static_cast(pow(params.s, 3)) - 1; ++i) { int idx_z = i / (params.s * params.s); int idx_y = (i - idx_z * params.s * params.s) / params.s; int idx_x = i - params.s * (idx_y + params.s * idx_z); if (idx_x + 1 < params.s) { int pos = (idx_x + 1) + idx_y * params.s + idx_z * params.s * params.s; L_mat.at(i, pos) = 1.f; } if (idx_x - 1 >= 0) { int pos = (idx_x - 1) + idx_y * params.s + idx_z * params.s * params.s; L_mat.at(i, pos) = 1.f; } if (idx_y + 1 < params.s) { int pos = idx_x + (idx_y + 1) * params.s + idx_z * params.s * params.s; L_mat.at(i, pos) = 1.f; } if (idx_y - 1 >= 0) { int pos = idx_x + (idx_y - 1) * params.s + idx_z * params.s * params.s; L_mat.at(i, pos) = 1.f; } if (idx_z + 1 < params.s) { int pos = idx_x + idx_y * params.s + (idx_z + 1) * params.s * params.s; L_mat.at(i, pos) = 1.f; } if (idx_z - 1 >= 0) { int pos = idx_x + idx_y * params.s + (idx_z - 1) * params.s * params.s; L_mat.at(i, pos) = 1.f; } } /* init one-hot vector v */ cv::Mat v = cv::Mat::zeros(s3, 1, CV_32FC1); v.at(floor(s3 / 2.f), 0) = 1.f; /* init sobolev filter S */ cv::Mat S = cv::Mat::zeros(s3, 1, CV_32FC1); /* solve for S */ cv::solve((Id - params.lambda * L_mat), v, S, cv::DECOMP_SVD); std::cout << S << std::endl; } static void sobfu::cuda::decompose_sobolev_filter(SolverParams& params, float* h_S_i) { if (params.s == 3) { if (params.lambda == 0.1f) { h_S_i[0] = 0.06537f; h_S_i[1] = 0.99572f; h_S_i[2] = h_S_i[0]; } } if (params.s == 7) { if (params.lambda == 0.05f) { h_S_i[0] = 0.00006f; h_S_i[1] = 0.00015f; h_S_i[2] = 0.03917f; h_S_i[3] = 0.99846f; h_S_i[4] = h_S_i[2]; h_S_i[5] = h_S_i[1]; h_S_i[6] = h_S_i[0]; } if (params.lambda == 0.1f) { h_S_i[0] = 0.00030f; h_S_i[1] = 0.00441f; h_S_i[2] = 0.06571f; h_S_i[3] = 0.99565f; h_S_i[4] = h_S_i[2]; h_S_i[5] = h_S_i[1]; h_S_i[6] = h_S_i[0]; } if (params.lambda == 0.2f) { h_S_i[0] = 0.00120f; h_S_i[1] = 0.01094f; h_S_i[2] = 0.10204f; h_S_i[3] = 0.98941f; h_S_i[4] = h_S_i[2]; h_S_i[5] = h_S_i[1]; h_S_i[6] = h_S_i[0]; } if (params.lambda == 0.4f) { h_S_i[0] = 0.00169f; h_S_i[1] = 0.01312f; h_S_i[2] = 0.10927f; h_S_i[3] = 0.98781f; h_S_i[4] = h_S_i[2]; h_S_i[5] = h_S_i[1]; h_S_i[6] = h_S_i[0]; } } if (params.s == 9) { if (params.lambda == 0.05f) { h_S_i[0] = 0.000003f; h_S_i[1] = 0.00006f; h_S_i[2] = 0.00155f; h_S_i[3] = 0.03917f; h_S_i[4] = 0.99846f; h_S_i[5] = 0.03917f; h_S_i[6] = 0.00155f; h_S_i[7] = 0.00006f; h_S_i[8] = 0.000003f; } if (params.lambda == 0.1f) { h_S_i[0] = 0.00002f; h_S_i[1] = 0.00030f; h_S_i[2] = 0.00441f; h_S_i[3] = 0.06571f; h_S_i[4] = 0.99565f; h_S_i[5] = h_S_i[3]; h_S_i[6] = h_S_i[2]; h_S_i[7] = h_S_i[1]; h_S_i[8] = h_S_i[0]; } } if (params.s == 11) { if (params.lambda == 0.1f) { h_S_i[0] = 0.0000015f; h_S_i[1] = 0.00002f; h_S_i[2] = 0.00030f; h_S_i[3] = 0.00441f; h_S_i[4] = 0.06571f; h_S_i[5] = 0.99565f; h_S_i[6] = h_S_i[4]; h_S_i[7] = h_S_i[3]; h_S_i[8] = h_S_i[2]; h_S_i[9] = h_S_i[1]; h_S_i[10] = h_S_i[0]; } } /* normalise filter to unit sum */ float sum = 0.f; for (int i = 0; i < params.s; ++i) { sum += h_S_i[i]; } for (int i = 0; i < params.s; ++i) { h_S_i[i] /= sum; } } ================================================ FILE: src/sobfu/vector_fields.cpp ================================================ /* sobfu includes */ #include /* * VECTOR FIELD */ sobfu::cuda::VectorField::VectorField(cv::Vec3i dims_) : dims(dims_) { int no_voxels = dims[0] * dims[1] * dims[2]; data.create(no_voxels * sizeof(float4)); clear(); } sobfu::cuda::VectorField::~VectorField() = default; cv::Vec3i sobfu::cuda::VectorField::get_dims() const { return dims; } kfusion::cuda::CudaData sobfu::cuda::VectorField::get_data() { return data; } const kfusion::cuda::CudaData sobfu::cuda::VectorField::get_data() const { return data; } void sobfu::cuda::VectorField::set_data(kfusion::cuda::CudaData& data) { data = data; } void sobfu::cuda::VectorField::clear() { int3 d = make_int3(dims[0], dims[1], dims[2]); sobfu::device::VectorField field(data.ptr(), d); sobfu::device::clear(field); } void sobfu::cuda::VectorField::print() { int sizes[3] = {dims[0], dims[1], dims[2]}; cv::Mat* mat = new cv::Mat(3, sizes, CV_32FC4); get_data().download(mat->ptr()); std::cout << "--- FIELD ---" << std::endl; for (int i = 0; i < dims[0]; i++) { for (int j = 0; j < dims[1]; j++) { for (int k = 0; k < dims[2]; k++) { float u = mat->at(k, j, i).x; float v = mat->at(k, j, i).y; float w = mat->at(k, j, i).z; if (fabs(u) > 1e-5f || fabs(v) > 1e-5f || fabs(w) > 1e-5f) { std::cout << "(x,y,z)=(" << i << ", " << j << ", " << k << "), (u,v,w)=(" << u << ", " << v << "," << w << ")" << std::endl; } } } } delete mat; } int sobfu::cuda::VectorField::get_no_nans() { int sizes[3] = {dims[0], dims[1], dims[2]}; cv::Mat* mat = new cv::Mat(3, sizes, CV_32FC4); get_data().download(mat->ptr()); int no_nan = 0; for (int i = 0; i < dims[0]; i++) { for (int j = 0; j < dims[1]; j++) { for (int k = 0; k < dims[2]; k++) { float u = mat->at(k, j, i).x; float v = mat->at(k, j, i).y; float w = mat->at(k, j, i).z; if (std::isnan(u) || std::isnan(v) || std::isnan(w)) { no_nan++; } } } } delete mat; return no_nan; } /* * DEFORMATION FIELD */ sobfu::cuda::DeformationField::DeformationField(cv::Vec3i dims_) : VectorField(dims_) { clear(); } sobfu::cuda::DeformationField::~DeformationField() = default; void sobfu::cuda::DeformationField::clear() { cv::Vec3f dims = get_dims(); int3 d = make_int3(dims[0], dims[1], dims[2]); sobfu::device::DeformationField psi(get_data().ptr(), d); sobfu::device::init_identity(psi); } void sobfu::cuda::DeformationField::get_inverse(sobfu::cuda::DeformationField& psi_inv) { kfusion::device::Vec3i d = kfusion::device_cast(get_dims()); sobfu::device::DeformationField psi_device(get_data().ptr(), d); sobfu::device::DeformationField psi_inverse_device(psi_inv.get_data().ptr(), d); sobfu::device::estimate_inverse(psi_device, psi_inverse_device); } void sobfu::cuda::DeformationField::apply(const cv::Ptr phi_n, cv::Ptr phi_n_psi) { kfusion::device::Vec3i d = kfusion::device_cast(phi_n->getDims()); kfusion::device::Vec3f vsz = kfusion::device_cast(phi_n->getVoxelSize()); float3 voxel_sizes = kfusion::device_cast(phi_n->getVoxelSize()); float trunc_dist = phi_n->getTruncDist(); float eta = phi_n->getEta(); float max_weight = phi_n->getMaxWeight(); kfusion::device::TsdfVolume phi_device(phi_n->data().ptr(), d, vsz, trunc_dist, eta, max_weight); kfusion::device::TsdfVolume phi_warped_device(phi_n_psi->data().ptr(), d, vsz, trunc_dist, eta, max_weight); sobfu::device::DeformationField psi_device(get_data().ptr(), d); sobfu::device::apply(phi_device, phi_warped_device, psi_device); kfusion::cuda::waitAllDefaultStream(); } /* * JACOBIAN */ sobfu::cuda::Jacobian::Jacobian(cv::Vec3i dims_) : dims(dims_) { int no_voxels = dims[0] * dims[1] * dims[2]; data.create(no_voxels * sizeof(Mat4f)); clear(); } sobfu::cuda::Jacobian::~Jacobian() = default; kfusion::cuda::CudaData sobfu::cuda::Jacobian::get_data() { return data; } const kfusion::cuda::CudaData sobfu::cuda::Jacobian::get_data() const { return data; } void sobfu::cuda::Jacobian::clear() { int3 d = make_int3(dims[0], dims[1], dims[2]); sobfu::device::Jacobian J(data.ptr(), d); sobfu::device::clear(J); } /* * SPATIAL GRADIENTS */ sobfu::cuda::SpatialGradients::SpatialGradients(cv::Vec3i dims_) { nabla_phi_n = new sobfu::cuda::TsdfGradient(dims_); nabla_phi_n_o_psi = new sobfu::cuda::TsdfGradient(dims_); J = new sobfu::cuda::Jacobian(dims_); J_inv = new sobfu::cuda::Jacobian(dims_); L = new sobfu::cuda::Laplacian(dims_); L_o_psi_inv = new sobfu::cuda::Laplacian(dims_); nabla_U = new sobfu::cuda::PotentialGradient(dims_); nabla_U_S = new sobfu::cuda::PotentialGradient(dims_); } sobfu::cuda::SpatialGradients::~SpatialGradients() { delete nabla_phi_n, nabla_phi_n_o_psi, J, J_inv, L, L_o_psi_inv, nabla_U, nabla_U_S; } ================================================ FILE: test/CMakeLists.txt ================================================ # ---------------------------------------------------------------------------- # # GOOGLE TEST DEPENDECIES # ---------------------------------------------------------------------------- # # Implementation heavily based upon: # https://github.com/kaizouman/gtest-cmake-example # Enable ExternalProject CMake module include(ExternalProject) # Download and install GoogleTest ExternalProject_Add( gtest URL https://github.com/google/googletest/archive/release-1.8.1.zip PREFIX ${CMAKE_CURRENT_BINARY_DIR}/gtest # Disable install step INSTALL_COMMAND "" ) # Get GTest source and binary directories from CMake project ExternalProject_Get_Property(gtest source_dir binary_dir) # Create a libgtest target to be used as a dependency by test programs add_library(libgtest IMPORTED STATIC GLOBAL) add_dependencies(libgtest gtest) # Set libgtest properties set_target_properties(libgtest PROPERTIES "IMPORTED_LOCATION" "${binary_dir}/googlemock/gtest/libgtest.a" "IMPORTED_LINK_INTERFACE_LIBRARIES" "${CMAKE_THREAD_LIBS_INIT}" ) # Create a libgmock target to be used as a dependency by test programs add_library(libgmock IMPORTED STATIC GLOBAL) add_dependencies(libgmock gtest) # Set libgmock properties set_target_properties(libgmock PROPERTIES "IMPORTED_LOCATION" "${binary_dir}/googlemock/libgmock.a" "IMPORTED_LINK_INTERFACE_LIBRARIES" "${CMAKE_THREAD_LIBS_INIT}" ) # ---------------------------------------------------------------------------- # # TEST INCLUDE DIRECTORIES # ---------------------------------------------------------------------------- # include_directories("${source_dir}/googletest/include" "${source_dir}/googlemock/include") # ---------------------------------------------------------------------------- # # CREATE TESTS # ---------------------------------------------------------------------------- # # Force test build set(gtest_build_samples ON) # Add quaternion Test CREATE_TEST(sobfu_test) # Link all libraries for testing target_link_libraries(sobfu_test sobfu ${CUDA_CUDART_LIBRARY} ${OpenCV_LIBS} ${PCL_LIBRARIES} ) ================================================ FILE: test/deformation_field_test.cpp ================================================ /* gtest includes */ #include /* sobfu includes */ #include #include /* kinfu includes */ #include #include #include /* sys headers */ #include #include #include #include class DeformationFieldTest : public ::testing::Test { protected: /* You can remove any or all of the following functions if its body * is empty. */ /* You can do set-up work for each test here. */ DeformationFieldTest() = default; /* You can do clean-up work that doesn't throw exceptions here. */ ~DeformationFieldTest() override = default; /* If the constructor and destructor are not enough for setting up * and cleaning up each test, you can define the following methods: */ /* Code here will be called immediately after the constructor (right * before each test). */ void SetUp() override { /* volume params */ params.volume_dims = cv::Vec3i::all(64); params.volume_size = cv::Vec3f::all(0.25f); params.tsdf_trunc_dist = 10.f * params.volume_size[0] / static_cast(params.volume_dims[0]); params.gradient_delta_factor = 0.1f; params.eta = 2.f * params.volume_size[0] / static_cast(params.volume_dims[0]); /* camera params */ params.intr = kfusion::Intr(1.f, 1.f, 0.f, 0.f); dims = kfusion::device_cast(params.volume_dims); voxel_sizes = kfusion::device_cast(params.voxel_sizes()); no_voxels = dims.x * dims.y * dims.z; /* init tsdf's */ phi_global = cv::Ptr(new kfusion::cuda::TsdfVolume(params)); phi_global_psi_inv = cv::Ptr(new kfusion::cuda::TsdfVolume(params)); phi_n = cv::Ptr(new kfusion::cuda::TsdfVolume(params)); phi_n_psi = cv::Ptr(new kfusion::cuda::TsdfVolume(params)); /* init psi */ psi = std::make_shared(params.volume_dims); psi_inv = std::make_shared(params.volume_dims); } /* Code here will be called immediately after each test (right * before the destructor). */ void TearDown() override {} /* Objects declared here can be used by all tests in the test case for Solver. */ /* sobfu params */ Params params; int3 dims; float3 voxel_sizes; int no_voxels; /* global and live tsdf's */ cv::Ptr phi_global, phi_global_psi_inv, phi_n, phi_n_psi; /* deformation field */ std::shared_ptr psi, psi_inv; /* solver */ std::shared_ptr solver; /* max. error */ float max_error = 1e-1f; /* floating point precision */ const float epsilon = 1e-5f; }; /* test that the vector field is correctly initialised to 0 */ TEST_F(DeformationFieldTest, ClearTest) { kfusion::cuda::CudaData data = psi->get_data(); int sizes[3] = {dims.x, dims.y, dims.z}; cv::Mat *matrix = new cv::Mat(3, sizes, CV_32FC4); data.download(matrix->ptr()); for (int i = 0; i < sizes[0]; i++) { for (int j = 0; j < sizes[1]; j++) { for (int k = 0; k < sizes[2]; k++) { ASSERT_NEAR(matrix->at(k, j, i).x, (float) i, epsilon); ASSERT_NEAR(matrix->at(k, j, i).y, (float) j, epsilon); ASSERT_NEAR(matrix->at(k, j, i).z, (float) k, epsilon); } } } } /* test that the norm of the gradient of an sdf is 1 */ TEST_F(DeformationFieldTest, TsdfGradientTest) { /* init tsdf */ float3 c = make_float3(0.16f, 0.16f, 0.16f); float r = 0.01f; phi_global->initSphere(c, r); kfusion::device::TsdfVolume phi_global_device(phi_global->data().ptr(), dims, voxel_sizes, params.tsdf_trunc_dist, params.eta, params.tsdf_max_weight); /* init gradient */ kfusion::cuda::CudaData grad_data; grad_data.create(no_voxels * sizeof(float4)); sobfu::device::TsdfGradient gradient_device(grad_data.ptr(), dims); sobfu::device::TsdfDifferentiator diff(phi_global_device); diff.calculate(gradient_device); /* test that the gradient magnitude is 0 in the truncated regions and close to 1 elsewhere */ float2 *tsdf_ptr = new float2[no_voxels]; phi_global->data().download(tsdf_ptr); float4 *grad_ptr = new float4[no_voxels]; grad_data.download(grad_ptr); int sizes[3] = {dims.x, dims.y, dims.z}; for (int i = 1; i < sizes[0] - 1; i++) { for (int j = 1; j < sizes[1] - 1; j++) { for (int k = 1; k < sizes[2] - 1; k++) { float tsdf_val = (*(tsdf_ptr + i + j * sizes[0] + k * sizes[1] * sizes[0])).x; float4 grad_val = *(grad_ptr + i + j * sizes[0] + k * sizes[1] * sizes[0]); float norm = sqrtf(grad_val.x * grad_val.x + grad_val.y * grad_val.y + grad_val.z * grad_val.z); if (fabs(tsdf_val) < 1.f) { /* only check for non-truncated voxels */ ASSERT_NEAR(norm, voxel_sizes.x / params.tsdf_trunc_dist, 0.15f); } } } } } /* test that the jacobian of a uniform vector field is null */ TEST_F(DeformationFieldTest, UniformFieldJacobianTest) { /* init psi to a uniform field */ int sizes[3] = {dims.x, dims.y, dims.z}; cv::Mat *matrix = new cv::Mat(3, sizes, CV_32FC4); for (int i = 0; i < sizes[0]; i++) { for (int j = 0; j < sizes[1]; j++) { for (int k = 0; k < sizes[2]; k++) { matrix->at(k, j, i) = make_float4(1.f, 1.f, 1.f, 0.f); } } } kfusion::cuda::CudaData psi_data; psi_data.create(no_voxels * sizeof(float4)); psi_data.upload(matrix->ptr(), no_voxels * sizeof(float4)); sobfu::device::DeformationField psi_device(psi_data.ptr(), dims); /* init & clear the jacobian */ kfusion::cuda::CudaData jacobian_data; jacobian_data.create(no_voxels * sizeof(Mat4f)); sobfu::device::Jacobian J(jacobian_data.ptr(), dims); /* calculate the jacobian */ sobfu::device::Differentiator diff(psi_device); diff.calculate(J); /* test that the jacobian is null */ Mat4f *ptr = new Mat4f[no_voxels]; jacobian_data.download(ptr); for (int i = 0; i < sizes[0]; i++) { for (int j = 0; j < sizes[1]; j++) { for (int k = 0; k < sizes[2]; k++) { Mat4f J_val = *(ptr + i + j * sizes[0] + k * sizes[1] * sizes[0]); for (int r = 0; r < 3; r++) { ASSERT_NEAR(J_val.data[r].x, 0.f, epsilon); ASSERT_NEAR(J_val.data[r].y, 0.f, epsilon); ASSERT_NEAR(J_val.data[r].z, 0.f, epsilon); } } } } } /* test the calculation of the jacobian of a simple vector field */ TEST_F(DeformationFieldTest, JacobianTestSimple) { /* init psi */ int sizes[3] = {dims.x, dims.y, dims.z}; cv::Mat *matrix = new cv::Mat(3, sizes, CV_32FC4); for (int i = 0; i < sizes[0]; i++) { for (int j = 0; j < sizes[1]; j++) { for (int k = 0; k < sizes[2]; k++) { matrix->at(k, j, i) = make_float4(i, j, k, 0.f); } } } kfusion::cuda::CudaData psi_data; psi_data.create(no_voxels * sizeof(float4)); psi_data.upload(matrix->ptr(), no_voxels * sizeof(float4)); sobfu::device::DeformationField psi_device(psi_data.ptr(), dims); /* init the jacobian */ kfusion::cuda::CudaData jacobian_data; jacobian_data.create(no_voxels * sizeof(Mat4f)); sobfu::device::Jacobian J(jacobian_data.ptr(), dims); /* calculate the jacobian */ sobfu::device::Differentiator diff(psi_device); diff.calculate(J); /* test that the jacobian values are correct */ Mat4f *ptr = new Mat4f[no_voxels]; jacobian_data.download(ptr); for (int i = 1; i < sizes[0] - 1; i++) { for (int j = 1; j < sizes[1] - 1; j++) { for (int k = 1; k < sizes[2] - 1; k++) { Mat4f J_val = *(ptr + i + j * sizes[0] + k * sizes[1] * sizes[0]); ASSERT_NEAR(J_val.data[0].x, 1.f, epsilon); ASSERT_NEAR(J_val.data[0].y, 0.f, epsilon); ASSERT_NEAR(J_val.data[0].z, 0.f, epsilon); ASSERT_NEAR(J_val.data[1].x, 0.f, epsilon); ASSERT_NEAR(J_val.data[1].y, 1.f, epsilon); ASSERT_NEAR(J_val.data[1].z, 0.f, epsilon); ASSERT_NEAR(J_val.data[2].x, 0.f, epsilon); ASSERT_NEAR(J_val.data[2].y, 0.f, epsilon); ASSERT_NEAR(J_val.data[2].z, 1.f, epsilon); } } } } /* test the computation of the jacobian and laplacian of a vector field */ TEST_F(DeformationFieldTest, JacobianLaplacianTestComplicated) { /* init psi */ int sizes[3] = {dims.x, dims.y, dims.z}; cv::Mat *matrix = new cv::Mat(3, sizes, CV_32FC4); for (int i = 0; i < sizes[0]; i++) { for (int j = 0; j < sizes[1]; j++) { for (int k = 0; k < sizes[2]; k++) { matrix->at(k, j, i) = make_float4(i * (1.f - j), exp(-k) + j, k, 0.f); } } } kfusion::cuda::CudaData psi_data; psi_data.create(no_voxels * sizeof(float4)); psi_data.upload(matrix->ptr(), no_voxels * sizeof(float4)); sobfu::device::DeformationField psi_device(psi_data.ptr(), dims); /* init the jacobian */ kfusion::cuda::CudaData J_data; J_data.create(no_voxels * sizeof(Mat4f)); sobfu::device::Jacobian J(J_data.ptr(), dims); /* calculate the jacobian */ sobfu::device::Differentiator diff(psi_device); diff.calculate(J); /* test that the jacobian values are correct */ Mat4f *ptr = new Mat4f[no_voxels]; J_data.download(ptr); /* (1-y -x 0 ) * J_psi = ( 0 1 -exp(-z) ) * ( 0 0 1 ) */ for (int i = 1; i < sizes[0] - 1; i++) { for (int j = 1; j < sizes[1] - 1; j++) { for (int k = 1; k < sizes[2] - 1; k++) { Mat4f J_val = *(ptr + i + j * sizes[0] + k * sizes[1] * sizes[0]); ASSERT_NEAR(J_val.data[0].x, 1.f - j, max_error); ASSERT_NEAR(J_val.data[0].y, -i, max_error); ASSERT_NEAR(J_val.data[0].z, 0.f, max_error); ASSERT_NEAR(J_val.data[1].x, 0, max_error); ASSERT_NEAR(J_val.data[1].y, 1.f, max_error); ASSERT_NEAR(J_val.data[1].z, -exp(-k), max_error); ASSERT_NEAR(J_val.data[2].x, 0.f, max_error); ASSERT_NEAR(J_val.data[2].y, 0.f, max_error); ASSERT_NEAR(J_val.data[2].z, 1.f, max_error); } } } /* init the laplacian */ kfusion::cuda::CudaData laplacian_data; laplacian_data.create(no_voxels * sizeof(float4)); sobfu::device::Laplacian L(laplacian_data.ptr(), dims); /* calculate the laplacian */ sobfu::device::SecondOrderDifferentiator secondOrderDiff(psi_device); secondOrderDiff.calculate(L); /* test that the laplacian values are correct */ float4 *L_ptr = new float4[no_voxels]; laplacian_data.download(L_ptr); /* * expecting -(0 exp(-z) 0)--we calculate the negative laplacian for simplicity */ for (int i = 1; i < sizes[0] - 1; i++) { for (int j = 1; j < sizes[1] - 1; j++) { for (int k = 1; k < sizes[2] - 1; k++) { float4 L_val = *(L_ptr + i + j * sizes[0] + k * sizes[1] * sizes[0]); ASSERT_NEAR(L_val.x, 0.f, max_error); ASSERT_NEAR(L_val.y, -exp(-k), max_error); ASSERT_NEAR(L_val.z, 0.f, max_error); } } } } ================================================ FILE: test/main.cpp ================================================ #include "gtest/gtest.h" int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } ================================================ FILE: test/reductions_test.cpp ================================================ /* gtest includes */ #include /* sobfu includes */ #include #include /* kinfu includes */ #include #include #include /* sys headers */ #include #include #include #include class ReductionsTest : public ::testing::Test { protected: /* You can remove any or all of the following functions if its body * is empty. */ /* You can do set-up work for each test here. */ ReductionsTest() = default; /* You can do clean-up work that doesn't throw exceptions here. */ ~ReductionsTest() override = default; /* If the constructor and destructor are not enough for setting up * and cleaning up each test, you can define the following methods: */ /* Code here will be called immediately after the constructor (right * before each test). */ void SetUp() override { /* verbosity */ params.verbosity = 2; /* volume params */ params.volume_dims = cv::Vec3i::all(64); params.volume_size = cv::Vec3f::all(0.25f); params.tsdf_trunc_dist = 5.f * params.volume_size[0] / static_cast(params.volume_dims[0]); params.eta = 2.f * params.volume_size[0] / static_cast(params.volume_dims[0]); dims = kfusion::device_cast(params.volume_dims); voxel_sizes = kfusion::device_cast(params.voxel_sizes()); no_voxels = dims.x * dims.y * dims.z; /* init tsdf's */ phi_global = cv::Ptr(new kfusion::cuda::TsdfVolume(params)); phi_n = cv::Ptr(new kfusion::cuda::TsdfVolume(params)); /* init reduction class */ reductor = new sobfu::device::Reductor(dims, voxel_sizes.x, params.tsdf_trunc_dist); } /* Code here will be called immediately after each test (right * before the destructor). */ void TearDown() override {} /* Objects declared here can be used by all tests in the test case for Solver. */ /* sobfu params */ Params params; int3 dims; float3 voxel_sizes; int no_voxels; /* global and live tsdf's */ cv::Ptr phi_global, phi_n; /* reduction class */ sobfu::device::Reductor* reductor; /* max. error */ float max_error = 1e-1f; /* floating point precision */ const float epsilon = 1e-5f; }; /* test calculation of the energy functional data term */ TEST_F(ReductionsTest, DataTermTest) { /* init tsdf's */ kfusion::device::TsdfVolume phi_n_device(phi_n->data().ptr(), dims, voxel_sizes, params.tsdf_trunc_dist, params.eta, params.tsdf_max_weight); kfusion::device::clear_volume(phi_n_device); /* will contain all 0's */ float3 c = make_float3(5.f, 5.f, 5.f); float r = 0.01f; phi_global->initSphere(c, r); /* will contain all 1's */ kfusion::device::TsdfVolume phi_global_device(phi_global->data().ptr(), dims, voxel_sizes, params.tsdf_trunc_dist, params.eta, params.tsdf_max_weight); float data_energy = reductor->data_energy(phi_global->data().ptr(), phi_n->data().ptr()); ASSERT_NEAR(data_energy, 0.5f * static_cast(no_voxels) * 1.f, max_error); } ================================================ FILE: test/solver_test.cpp ================================================ /* gtest includes */ #include /* sobfu includes */ #include #include /* kinfu includes */ #include #include #include /* sys headers */ #include #include #include #include class SolverTest : public ::testing::Test { protected: /* You can remove any or all of the following functions if its body * is empty. */ /* You can do set-up work for each test here. */ SolverTest() = default; /* You can do clean-up work that doesn't throw exceptions here. */ ~SolverTest() override = default; /* If the constructor and destructor are not enough for setting up * and cleaning up each test, you can define the following methods: */ /* Code here will be called immediately after the constructor (right * before each test). */ void SetUp() override { /* verbosity */ params.verbosity = 1; /* volume params */ params.volume_dims = cv::Vec3i::all(64); params.volume_size = cv::Vec3f::all(0.25f); params.tsdf_trunc_dist = 10.f * params.volume_size[0] / static_cast(params.volume_dims[0]); params.gradient_delta_factor = 0.1f; params.eta = 2.f * params.volume_size[0] / static_cast(params.volume_dims[0]); /* camera params */ params.intr = kfusion::Intr(1.f, 1.f, 0.f, 0.f); /* solver params */ params.max_iter = 2048; params.max_update_norm = -1.f; params.s = 7; params.lambda = 0.1f; params.alpha = 0.001f; params.w_reg = 0.4f; dims = kfusion::device_cast(params.volume_dims); voxel_sizes = kfusion::device_cast(params.voxel_sizes()); no_voxels = dims.x * dims.y * dims.z; /* init tsdf's */ phi_global = cv::Ptr(new kfusion::cuda::TsdfVolume(params)); phi_global_psi_inv = cv::Ptr(new kfusion::cuda::TsdfVolume(params)); phi_n = cv::Ptr(new kfusion::cuda::TsdfVolume(params)); phi_n_psi = cv::Ptr(new kfusion::cuda::TsdfVolume(params)); mc = cv::Ptr(new kfusion::cuda::MarchingCubes()); /* init psi */ psi = std::make_shared(params.volume_dims); psi_inv = std::make_shared(params.volume_dims); } /* Code here will be called immediately after each test (right * before the destructor). */ void TearDown() override {} /* Objects declared here can be used by all tests in the test case for Solver. */ /* sobfu params */ Params params; int3 dims; float3 voxel_sizes; int no_voxels; /* global and live tsdf's */ cv::Ptr phi_global, phi_global_psi_inv, phi_n, phi_n_psi; /* marching cubes--for visualisation purposes */ cv::Ptr mc; /* deformation field */ std::shared_ptr psi, psi_inv; /* solver */ std::shared_ptr solver; /* max. error */ float max_error = 1e-1f; /* floating point precision */ const float epsilon = 1e-5f; }; /* test the alignment of 2 spheres */ TEST_F(SolverTest, AlignmentTestSphereTranslation) { /* set parameters */ params.max_iter = 2048; params.alpha = 0.01f; params.w_reg = 0.4f; /* init solver */ solver = std::make_shared(params); /* init phi_global and phi_n */ int sizes[3] = {dims.x, dims.y, dims.z}; float3 c = make_float3(0.13f, 0.13f, 0.13f); float r = 0.012f; phi_global->initSphere(c, r); c = make_float3(0.125f, 0.13f, 0.13f); phi_n->initSphere(c, r); phi_n_psi->initSphere(c, r); /* calculate psi and apply it to phi_n */ solver->estimate_psi(phi_global, phi_global_psi_inv, phi_n, phi_n_psi, psi, psi_inv); } /* test the alignment of a sphere translating and expanding */ TEST_F(SolverTest, AlignmentTestSphereExpanding) { /* set parameters */ params.max_iter = 2048; params.alpha = 0.005f; params.w_reg = 0.4f; /* init solver */ solver = std::make_shared(params); /* init phi_global and phi_n */ int sizes[3] = {dims.x, dims.y, dims.z}; float3 c = make_float3(0.13f, 0.13f, 0.13f); float r = 0.012f; phi_global->initSphere(c, r); c = make_float3(0.125f, 0.13f, 0.13f); r = 0.0145f; phi_n->initSphere(c, r); phi_n_psi->initSphere(c, r); /* calculate psi and apply it to phi_n */ solver->estimate_psi(phi_global, phi_global_psi_inv, phi_n, phi_n_psi, psi, psi_inv); } /* test the alignment of 2 spheres over 3 frames */ TEST_F(SolverTest, SerialAlignmentTest) { /* set parameters */ params.max_iter = 2048; params.alpha = 0.005f; params.w_reg = 0.4f; /* init solver */ solver = std::make_shared(params); /* init phi_global and phi_n */ int sizes[3] = {dims.x, dims.y, dims.z}; float3 c = make_float3(0.13f, 0.13f, 0.13f); float r = 0.02f; phi_global->initSphere(c, r); c = make_float3(0.125f, 0.13f, 0.132f); phi_n->initSphere(c, r); phi_n_psi->initSphere(c, r); /* * FRAME 1 to FRAME 0 * */ std::cout << "\nFRAME 1\n" << std::endl; /* calculate psi and apply it to phi_n */ solver->estimate_psi(phi_global, phi_global_psi_inv, phi_n, phi_n_psi, psi, psi_inv); /* * FRAME 2 to FRAME 0 * */ std::cout << "\nFRAME 2\n" << std::endl; phi_n->clear(); phi_n_psi->clear(); c = make_float3(0.123f, 0.13f, 0.132f); phi_n->initSphere(c, r); psi->apply(phi_n, phi_n_psi); solver->estimate_psi(phi_global, phi_global_psi_inv, phi_n, phi_n_psi, psi, psi_inv); }